mastobook_to_readeck.php
· 4.0 KiB · PHP
Исходник
#!/usr/bin/php
<?php
//-----------------------------
// CREDENTIALS (fill this up)
// put this in a cron to regularly import all your mastodon / snac
// bookmarks in your readeck instance
//-----------------------------
$MASTODON_TOKEN = 'xxxx';
$MASTODON_HOST = 'go.lema.org';
$READECK_TOKEN = 'xxxx';
$READECK_HOST = 'read.lema.org';
$MINIMUM_TEXT_SIZE = 500; // article with less characters of content will be ignored
//-----------------------------
// FETCH MASTODON BOOKMARKS
//-----------------------------
echo "# Fetching mastodon / snac bookmarks...\n";
$ch = curl_init("https://$MASTODON_HOST/api/v1/bookmarks");
curl_setopt_array($ch, [
CURLOPT_RETURNTRANSFER => true,
CURLOPT_HTTPHEADER => [
"Authorization: Bearer $MASTODON_TOKEN",
"Accept: application/json"
]
]);
$bookmarksJson = curl_exec($ch);
$bookmarks = json_decode($bookmarksJson, true);
if (!is_array($bookmarks)) {
die("❌ Failed to parse Mastodon bookmarks.\n");
}
echo "Found bookmarks:".count($bookmarks)."\n";
//-----------------------------
// FIND VALID URLs in posts
//-----------------------------
foreach ($bookmarks as $status) {
if (!isset($status['content'])) {
continue;
}
$content = strip_tags($status['content']);
preg_match_all('/https?:\/\/[^\s"<]+/', $content, $matches);
if (!empty($matches[0])) {
$oneLink = $matches[0][0];
if (filter_var($oneLink, FILTER_VALIDATE_URL)) {
$links[] = $oneLink;
} else {
// This happens for example if URL has an emoji at the end
echo "INVALID URL: $oneLink\n";
}
}
}
echo "Valid URLS:".count($links)."\n";
print_r($links);
//-----------------------------
// SEND LINKS TO READECK
//-----------------------------
$apiUrl = "https://$READECK_HOST/api/bookmarks";
$ch = curl_init();
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
$headers = [
"Authorization: Bearer $READECK_TOKEN",
'Accept: application/json',
'Content-Type: application/json'
];
$alreadySentDir = __DIR__ . "/already_sent";
if (!is_dir($alreadySentDir)) {
mkdir($alreadySentDir, 0755, true); // recursive mkdir
}
foreach ($links as $link) {
// READECK will accept several times the same URL !
// Make sure we don't send it several times by keeping an archive here
$hash = md5($link);
$filePath = __DIR__ . "/already_sent/{$hash}.txt";
if (file_exists($filePath)) {
echo "ℹ️ Already sent: $link\n";
continue;
}
$content = file_get_contents($link);
if ($content === false) {
echo "❌ Failed to fetch $link\n";
continue;
}
$plainText = strip_tags($content);
if (strlen($plainText) < $MINIMUM_TEXT_SIZE) {
echo "⚠️ Skipping $link\ncontent too small (".strlen($plainText)." chars < $MINIMUM_TEXT_SIZE )\n";
continue;
}
echo "🟢 Will add to Readeck $link\nLength: " . strlen($plainText)."\n";
//not passing title here, since we don't have it
$payload = json_encode([
"labels" => ["automasto"],
"url" => $link
]);
curl_setopt($ch, CURLOPT_URL, $apiUrl);
curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if (curl_errno($ch)) {
echo "❌ Error adding $link: " . curl_error($ch) . "\n";
} else {
// Store already sent file only if connection worked
file_put_contents($filePath, $link);
$json = json_decode($response, true);
if (json_last_error() === JSON_ERROR_NONE) {
if ($httpCode >= 200 && $httpCode < 300) {
echo "✅ [$httpCode] Successfully added: $link\n";
} else {
echo "⚠️ Server returned status $httpCode for $link\n";
}
} else {
echo "⚠️ Response is not valid JSON for $link: $response\n";
}
}
}
curl_close($ch);
1 | #!/usr/bin/php |
2 | <?php |
3 | |
4 | //----------------------------- |
5 | // CREDENTIALS (fill this up) |
6 | // put this in a cron to regularly import all your mastodon / snac |
7 | // bookmarks in your readeck instance |
8 | //----------------------------- |
9 | |
10 | $MASTODON_TOKEN = 'xxxx'; |
11 | $MASTODON_HOST = 'go.lema.org'; |
12 | |
13 | $READECK_TOKEN = 'xxxx'; |
14 | $READECK_HOST = 'read.lema.org'; |
15 | |
16 | $MINIMUM_TEXT_SIZE = 500; // article with less characters of content will be ignored |
17 | |
18 | |
19 | //----------------------------- |
20 | // FETCH MASTODON BOOKMARKS |
21 | //----------------------------- |
22 | echo "# Fetching mastodon / snac bookmarks...\n"; |
23 | $ch = curl_init("https://$MASTODON_HOST/api/v1/bookmarks"); |
24 | curl_setopt_array($ch, [ |
25 | CURLOPT_RETURNTRANSFER => true, |
26 | CURLOPT_HTTPHEADER => [ |
27 | "Authorization: Bearer $MASTODON_TOKEN", |
28 | "Accept: application/json" |
29 | ] |
30 | ]); |
31 | |
32 | $bookmarksJson = curl_exec($ch); |
33 | $bookmarks = json_decode($bookmarksJson, true); |
34 | if (!is_array($bookmarks)) { |
35 | die("❌ Failed to parse Mastodon bookmarks.\n"); |
36 | } |
37 | |
38 | echo "Found bookmarks:".count($bookmarks)."\n"; |
39 | |
40 | //----------------------------- |
41 | // FIND VALID URLs in posts |
42 | //----------------------------- |
43 | |
44 | foreach ($bookmarks as $status) { |
45 | if (!isset($status['content'])) { |
46 | continue; |
47 | } |
48 | $content = strip_tags($status['content']); |
49 | preg_match_all('/https?:\/\/[^\s"<]+/', $content, $matches); |
50 | if (!empty($matches[0])) { |
51 | |
52 | $oneLink = $matches[0][0]; |
53 | if (filter_var($oneLink, FILTER_VALIDATE_URL)) { |
54 | $links[] = $oneLink; |
55 | } else { |
56 | // This happens for example if URL has an emoji at the end |
57 | echo "INVALID URL: $oneLink\n"; |
58 | } |
59 | } |
60 | } |
61 | |
62 | |
63 | echo "Valid URLS:".count($links)."\n"; |
64 | |
65 | print_r($links); |
66 | |
67 | |
68 | //----------------------------- |
69 | // SEND LINKS TO READECK |
70 | //----------------------------- |
71 | |
72 | $apiUrl = "https://$READECK_HOST/api/bookmarks"; |
73 | |
74 | $ch = curl_init(); |
75 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); |
76 | curl_setopt($ch, CURLOPT_POST, true); |
77 | |
78 | $headers = [ |
79 | "Authorization: Bearer $READECK_TOKEN", |
80 | 'Accept: application/json', |
81 | 'Content-Type: application/json' |
82 | ]; |
83 | |
84 | $alreadySentDir = __DIR__ . "/already_sent"; |
85 | |
86 | if (!is_dir($alreadySentDir)) { |
87 | mkdir($alreadySentDir, 0755, true); // recursive mkdir |
88 | } |
89 | |
90 | |
91 | |
92 | foreach ($links as $link) { |
93 | |
94 | // READECK will accept several times the same URL ! |
95 | // Make sure we don't send it several times by keeping an archive here |
96 | $hash = md5($link); |
97 | $filePath = __DIR__ . "/already_sent/{$hash}.txt"; |
98 | |
99 | if (file_exists($filePath)) { |
100 | echo "ℹ️ Already sent: $link\n"; |
101 | continue; |
102 | } |
103 | |
104 | |
105 | $content = file_get_contents($link); |
106 | if ($content === false) { |
107 | echo "❌ Failed to fetch $link\n"; |
108 | continue; |
109 | } |
110 | $plainText = strip_tags($content); |
111 | |
112 | if (strlen($plainText) < $MINIMUM_TEXT_SIZE) { |
113 | echo "⚠️ Skipping $link\ncontent too small (".strlen($plainText)." chars < $MINIMUM_TEXT_SIZE )\n"; |
114 | continue; |
115 | } |
116 | |
117 | echo "🟢 Will add to Readeck $link\nLength: " . strlen($plainText)."\n"; |
118 | |
119 | //not passing title here, since we don't have it |
120 | $payload = json_encode([ |
121 | "labels" => ["automasto"], |
122 | "url" => $link |
123 | ]); |
124 | |
125 | curl_setopt($ch, CURLOPT_URL, $apiUrl); |
126 | curl_setopt($ch, CURLOPT_POSTFIELDS, $payload); |
127 | curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); |
128 | |
129 | $response = curl_exec($ch); |
130 | $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); |
131 | |
132 | |
133 | if (curl_errno($ch)) { |
134 | echo "❌ Error adding $link: " . curl_error($ch) . "\n"; |
135 | } else { |
136 | |
137 | // Store already sent file only if connection worked |
138 | file_put_contents($filePath, $link); |
139 | |
140 | $json = json_decode($response, true); |
141 | if (json_last_error() === JSON_ERROR_NONE) { |
142 | if ($httpCode >= 200 && $httpCode < 300) { |
143 | echo "✅ [$httpCode] Successfully added: $link\n"; |
144 | } else { |
145 | echo "⚠️ Server returned status $httpCode for $link\n"; |
146 | } |
147 | } else { |
148 | echo "⚠️ Response is not valid JSON for $link: $response\n"; |
149 | } |
150 | } |
151 | |
152 | } |
153 | |
154 | curl_close($ch); |
155 |