santiago a révisé ce gist . Aller à la révision
1 file changed, 154 insertions
mastobook_to_readeck.php(fichier créé)
@@ -0,0 +1,154 @@ | |||
1 | + | #!/usr/bin/php | |
2 | + | <?php | |
3 | + | ||
4 | + | //----------------------------- | |
5 | + | // CREDENTIALS (fill this up) | |
6 | + | // put this in a cron to regularly import all your mastodon / snac | |
7 | + | // bookmarks in your readeck instance | |
8 | + | //----------------------------- | |
9 | + | ||
10 | + | $MASTODON_TOKEN = 'xxxx'; | |
11 | + | $MASTODON_HOST = 'go.lema.org'; | |
12 | + | ||
13 | + | $READECK_TOKEN = 'xxxx'; | |
14 | + | $READECK_HOST = 'read.lema.org'; | |
15 | + | ||
16 | + | $MINIMUM_TEXT_SIZE = 500; // article with less characters of content will be ignored | |
17 | + | ||
18 | + | ||
19 | + | //----------------------------- | |
20 | + | // FETCH MASTODON BOOKMARKS | |
21 | + | //----------------------------- | |
22 | + | echo "# Fetching mastodon / snac bookmarks...\n"; | |
23 | + | $ch = curl_init("https://$MASTODON_HOST/api/v1/bookmarks"); | |
24 | + | curl_setopt_array($ch, [ | |
25 | + | CURLOPT_RETURNTRANSFER => true, | |
26 | + | CURLOPT_HTTPHEADER => [ | |
27 | + | "Authorization: Bearer $MASTODON_TOKEN", | |
28 | + | "Accept: application/json" | |
29 | + | ] | |
30 | + | ]); | |
31 | + | ||
32 | + | $bookmarksJson = curl_exec($ch); | |
33 | + | $bookmarks = json_decode($bookmarksJson, true); | |
34 | + | if (!is_array($bookmarks)) { | |
35 | + | die("❌ Failed to parse Mastodon bookmarks.\n"); | |
36 | + | } | |
37 | + | ||
38 | + | echo "Found bookmarks:".count($bookmarks)."\n"; | |
39 | + | ||
40 | + | //----------------------------- | |
41 | + | // FIND VALID URLs in posts | |
42 | + | //----------------------------- | |
43 | + | ||
44 | + | foreach ($bookmarks as $status) { | |
45 | + | if (!isset($status['content'])) { | |
46 | + | continue; | |
47 | + | } | |
48 | + | $content = strip_tags($status['content']); | |
49 | + | preg_match_all('/https?:\/\/[^\s"<]+/', $content, $matches); | |
50 | + | if (!empty($matches[0])) { | |
51 | + | ||
52 | + | $oneLink = $matches[0][0]; | |
53 | + | if (filter_var($oneLink, FILTER_VALIDATE_URL)) { | |
54 | + | $links[] = $oneLink; | |
55 | + | } else { | |
56 | + | // This happens for example if URL has an emoji at the end | |
57 | + | echo "INVALID URL: $oneLink\n"; | |
58 | + | } | |
59 | + | } | |
60 | + | } | |
61 | + | ||
62 | + | ||
63 | + | echo "Valid URLS:".count($links)."\n"; | |
64 | + | ||
65 | + | print_r($links); | |
66 | + | ||
67 | + | ||
68 | + | //----------------------------- | |
69 | + | // SEND LINKS TO READECK | |
70 | + | //----------------------------- | |
71 | + | ||
72 | + | $apiUrl = "https://$READECK_HOST/api/bookmarks"; | |
73 | + | ||
74 | + | $ch = curl_init(); | |
75 | + | curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | |
76 | + | curl_setopt($ch, CURLOPT_POST, true); | |
77 | + | ||
78 | + | $headers = [ | |
79 | + | "Authorization: Bearer $READECK_TOKEN", | |
80 | + | 'Accept: application/json', | |
81 | + | 'Content-Type: application/json' | |
82 | + | ]; | |
83 | + | ||
84 | + | $alreadySentDir = __DIR__ . "/already_sent"; | |
85 | + | ||
86 | + | if (!is_dir($alreadySentDir)) { | |
87 | + | mkdir($alreadySentDir, 0755, true); // recursive mkdir | |
88 | + | } | |
89 | + | ||
90 | + | ||
91 | + | ||
92 | + | foreach ($links as $link) { | |
93 | + | ||
94 | + | // READECK will accept several times the same URL ! | |
95 | + | // Make sure we don't send it several times by keeping an archive here | |
96 | + | $hash = md5($link); | |
97 | + | $filePath = __DIR__ . "/already_sent/{$hash}.txt"; | |
98 | + | ||
99 | + | if (file_exists($filePath)) { | |
100 | + | echo "ℹ️ Already sent: $link\n"; | |
101 | + | continue; | |
102 | + | } | |
103 | + | ||
104 | + | ||
105 | + | $content = file_get_contents($link); | |
106 | + | if ($content === false) { | |
107 | + | echo "❌ Failed to fetch $link\n"; | |
108 | + | continue; | |
109 | + | } | |
110 | + | $plainText = strip_tags($content); | |
111 | + | ||
112 | + | if (strlen($plainText) < $MINIMUM_TEXT_SIZE) { | |
113 | + | echo "⚠️ Skipping $link\ncontent too small (".strlen($plainText)." chars < $MINIMUM_TEXT_SIZE )\n"; | |
114 | + | continue; | |
115 | + | } | |
116 | + | ||
117 | + | echo "🟢 Will add to Readeck $link\nLength: " . strlen($plainText)."\n"; | |
118 | + | ||
119 | + | //not passing title here, since we don't have it | |
120 | + | $payload = json_encode([ | |
121 | + | "labels" => ["automasto"], | |
122 | + | "url" => $link | |
123 | + | ]); | |
124 | + | ||
125 | + | curl_setopt($ch, CURLOPT_URL, $apiUrl); | |
126 | + | curl_setopt($ch, CURLOPT_POSTFIELDS, $payload); | |
127 | + | curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); | |
128 | + | ||
129 | + | $response = curl_exec($ch); | |
130 | + | $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); | |
131 | + | ||
132 | + | ||
133 | + | if (curl_errno($ch)) { | |
134 | + | echo "❌ Error adding $link: " . curl_error($ch) . "\n"; | |
135 | + | } else { | |
136 | + | ||
137 | + | // Store already sent file only if connection worked | |
138 | + | file_put_contents($filePath, $link); | |
139 | + | ||
140 | + | $json = json_decode($response, true); | |
141 | + | if (json_last_error() === JSON_ERROR_NONE) { | |
142 | + | if ($httpCode >= 200 && $httpCode < 300) { | |
143 | + | echo "✅ [$httpCode] Successfully added: $link\n"; | |
144 | + | } else { | |
145 | + | echo "⚠️ Server returned status $httpCode for $link\n"; | |
146 | + | } | |
147 | + | } else { | |
148 | + | echo "⚠️ Response is not valid JSON for $link: $response\n"; | |
149 | + | } | |
150 | + | } | |
151 | + | ||
152 | + | } | |
153 | + | ||
154 | + | curl_close($ch); |
Plus récent
Plus ancien