mastobook_to_readeck.php
· 4.3 KiB · PHP
Brut
#!/usr/bin/php
<?php
//-----------------------------
// CREDENTIALS
//-----------------------------
$MASTODON_TOKEN = '8beea62e32b336e5d934d06a21b0b996';
$MASTODON_HOST = 'go.lema.org';
$READECK_TOKEN = 'LDJb4YbGKe6Fp8cSygpuw5LjmwkgGTAbFbP77TQtYwe1hFZ4';
$READECK_HOST = 'read.lema.org';
$MINIMUM_TEXT_SIZE = 500; // article with less characters of content will be ignored
//-----------------------------
// FETCH MASTODON BOOKMARKS
//-----------------------------
echo "# Fetching mastodon / snac bookmarks...\n";
date_default_timezone_set('America/Sao_Paulo');
echo date('Y-m-d H:i:s')."\n";
$ch = curl_init("https://$MASTODON_HOST/api/v1/bookmarks");
curl_setopt_array($ch, [
CURLOPT_RETURNTRANSFER => true,
CURLOPT_HTTPHEADER => [
"Authorization: Bearer $MASTODON_TOKEN",
"Accept: application/json"
]
]);
$bookmarksJson = curl_exec($ch);
$bookmarks = json_decode($bookmarksJson, true);
if (!is_array($bookmarks)) {
die("❌ Failed to parse Mastodon bookmarks.\n");
}
echo "Found bookmarks:".count($bookmarks)."\n";
//-----------------------------
// FIND VALID URLs in posts
//-----------------------------
foreach ($bookmarks as $status) {
if (!isset($status['content'])) {
continue;
}
$content = strip_tags($status['content']);
preg_match_all('/https?:\/\/[^\s"<]+/', $content, $matches);
if (!empty($matches[0])) {
$oneLink = $matches[0][0];
if (filter_var($oneLink, FILTER_VALIDATE_URL)) {
$links[] = $oneLink;
} else {
// This happens for example if URL has an emoji at the end
echo "INVALID URL: $oneLink\n";
}
}
}
echo "Valid URLS:".count($links)."\n";
print_r($links);
//-----------------------------
// SEND LINKS TO READECK
//-----------------------------
$apiUrl = "https://$READECK_HOST/api/bookmarks";
$ch = curl_init();
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0');
$headers = [
"Authorization: Bearer $READECK_TOKEN",
'Accept: application/json',
'Content-Type: application/json'
];
$alreadySentDir = __DIR__ . "/already_sent";
if (!is_dir($alreadySentDir)) {
mkdir($alreadySentDir, 0755, true); // recursive mkdir
}
foreach ($links as $link) {
// READECK will accept several times the same URL !
// Make sure we don't send it several times by keeping an archive here
$hash = md5($link);
$filePath = __DIR__ . "/already_sent/{$hash}.txt";
if (file_exists($filePath)) {
echo "ℹ️ Already sent: $link\n";
continue;
}
$options = [
'http' => [
'method' => 'GET',
'header' => "User-Agent: Mozilla/5.0\r\n"
]
];
// First check if page has content
//$ch = curl_init($link);;
curl_setopt($ch, CURLOPT_URL, $link);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$content = curl_exec($ch);
if ($content === false) {
echo "❌ Failed to fetch $link\n";
continue;
}
$plainText = strip_tags($content);
if (strlen($plainText) < $MINIMUM_TEXT_SIZE) {
echo "⚠️ Skipping $link\ncontent too small (".strlen($plainText)." chars < $MINIMUM_TEXT_SIZE )\n";
continue;
}
echo "🟢 Will add to Readeck $link\nLength: " . strlen($plainText)."\n";
//not passing title here, since we don't have it
$payload = json_encode([
"labels" => ["automasto"],
"url" => $link
]);
curl_setopt($ch, CURLOPT_URL, $apiUrl);
curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
if (curl_errno($ch)) {
echo "❌ Error adding $link: " . curl_error($ch) . "\n";
} else {
// Store already sent file only if connection worked
file_put_contents($filePath, $link);
$json = json_decode($response, true);
if (json_last_error() === JSON_ERROR_NONE) {
if ($httpCode >= 200 && $httpCode < 300) {
echo "✅ [$httpCode] Successfully added: $link\n";
} else {
echo "⚠️ Server returned status $httpCode for $link\n";
}
} else {
echo "⚠️ Response is not valid JSON for $link: $response\n";
}
}
}
curl_close($ch);
1 | #!/usr/bin/php |
2 | <?php |
3 | |
4 | //----------------------------- |
5 | // CREDENTIALS |
6 | //----------------------------- |
7 | |
8 | $MASTODON_TOKEN = '8beea62e32b336e5d934d06a21b0b996'; |
9 | $MASTODON_HOST = 'go.lema.org'; |
10 | |
11 | $READECK_TOKEN = 'LDJb4YbGKe6Fp8cSygpuw5LjmwkgGTAbFbP77TQtYwe1hFZ4'; |
12 | $READECK_HOST = 'read.lema.org'; |
13 | |
14 | $MINIMUM_TEXT_SIZE = 500; // article with less characters of content will be ignored |
15 | |
16 | |
17 | |
18 | |
19 | //----------------------------- |
20 | // FETCH MASTODON BOOKMARKS |
21 | //----------------------------- |
22 | echo "# Fetching mastodon / snac bookmarks...\n"; |
23 | date_default_timezone_set('America/Sao_Paulo'); |
24 | echo date('Y-m-d H:i:s')."\n"; |
25 | |
26 | $ch = curl_init("https://$MASTODON_HOST/api/v1/bookmarks"); |
27 | curl_setopt_array($ch, [ |
28 | CURLOPT_RETURNTRANSFER => true, |
29 | CURLOPT_HTTPHEADER => [ |
30 | "Authorization: Bearer $MASTODON_TOKEN", |
31 | "Accept: application/json" |
32 | ] |
33 | ]); |
34 | |
35 | $bookmarksJson = curl_exec($ch); |
36 | $bookmarks = json_decode($bookmarksJson, true); |
37 | if (!is_array($bookmarks)) { |
38 | die("❌ Failed to parse Mastodon bookmarks.\n"); |
39 | } |
40 | |
41 | echo "Found bookmarks:".count($bookmarks)."\n"; |
42 | |
43 | //----------------------------- |
44 | // FIND VALID URLs in posts |
45 | //----------------------------- |
46 | |
47 | foreach ($bookmarks as $status) { |
48 | if (!isset($status['content'])) { |
49 | continue; |
50 | } |
51 | $content = strip_tags($status['content']); |
52 | preg_match_all('/https?:\/\/[^\s"<]+/', $content, $matches); |
53 | if (!empty($matches[0])) { |
54 | |
55 | $oneLink = $matches[0][0]; |
56 | if (filter_var($oneLink, FILTER_VALIDATE_URL)) { |
57 | $links[] = $oneLink; |
58 | } else { |
59 | // This happens for example if URL has an emoji at the end |
60 | echo "INVALID URL: $oneLink\n"; |
61 | } |
62 | } |
63 | } |
64 | |
65 | |
66 | echo "Valid URLS:".count($links)."\n"; |
67 | |
68 | print_r($links); |
69 | |
70 | |
71 | //----------------------------- |
72 | // SEND LINKS TO READECK |
73 | //----------------------------- |
74 | |
75 | $apiUrl = "https://$READECK_HOST/api/bookmarks"; |
76 | |
77 | $ch = curl_init(); |
78 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); |
79 | curl_setopt($ch, CURLOPT_POST, true); |
80 | curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0'); |
81 | |
82 | $headers = [ |
83 | "Authorization: Bearer $READECK_TOKEN", |
84 | 'Accept: application/json', |
85 | 'Content-Type: application/json' |
86 | ]; |
87 | |
88 | $alreadySentDir = __DIR__ . "/already_sent"; |
89 | |
90 | if (!is_dir($alreadySentDir)) { |
91 | mkdir($alreadySentDir, 0755, true); // recursive mkdir |
92 | } |
93 | |
94 | |
95 | |
96 | foreach ($links as $link) { |
97 | |
98 | // READECK will accept several times the same URL ! |
99 | // Make sure we don't send it several times by keeping an archive here |
100 | $hash = md5($link); |
101 | $filePath = __DIR__ . "/already_sent/{$hash}.txt"; |
102 | |
103 | if (file_exists($filePath)) { |
104 | echo "ℹ️ Already sent: $link\n"; |
105 | continue; |
106 | } |
107 | |
108 | |
109 | $options = [ |
110 | 'http' => [ |
111 | 'method' => 'GET', |
112 | 'header' => "User-Agent: Mozilla/5.0\r\n" |
113 | ] |
114 | ]; |
115 | |
116 | // First check if page has content |
117 | //$ch = curl_init($link);; |
118 | curl_setopt($ch, CURLOPT_URL, $link); |
119 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); |
120 | $content = curl_exec($ch); |
121 | |
122 | if ($content === false) { |
123 | echo "❌ Failed to fetch $link\n"; |
124 | continue; |
125 | } |
126 | $plainText = strip_tags($content); |
127 | |
128 | if (strlen($plainText) < $MINIMUM_TEXT_SIZE) { |
129 | echo "⚠️ Skipping $link\ncontent too small (".strlen($plainText)." chars < $MINIMUM_TEXT_SIZE )\n"; |
130 | continue; |
131 | } |
132 | |
133 | echo "🟢 Will add to Readeck $link\nLength: " . strlen($plainText)."\n"; |
134 | |
135 | //not passing title here, since we don't have it |
136 | $payload = json_encode([ |
137 | "labels" => ["automasto"], |
138 | "url" => $link |
139 | ]); |
140 | |
141 | curl_setopt($ch, CURLOPT_URL, $apiUrl); |
142 | curl_setopt($ch, CURLOPT_POSTFIELDS, $payload); |
143 | curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); |
144 | |
145 | $response = curl_exec($ch); |
146 | $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); |
147 | |
148 | |
149 | if (curl_errno($ch)) { |
150 | echo "❌ Error adding $link: " . curl_error($ch) . "\n"; |
151 | } else { |
152 | |
153 | // Store already sent file only if connection worked |
154 | file_put_contents($filePath, $link); |
155 | |
156 | $json = json_decode($response, true); |
157 | if (json_last_error() === JSON_ERROR_NONE) { |
158 | if ($httpCode >= 200 && $httpCode < 300) { |
159 | echo "✅ [$httpCode] Successfully added: $link\n"; |
160 | } else { |
161 | echo "⚠️ Server returned status $httpCode for $link\n"; |
162 | } |
163 | } else { |
164 | echo "⚠️ Response is not valid JSON for $link: $response\n"; |
165 | } |
166 | } |
167 | |
168 | } |
169 | |
170 | curl_close($ch); |
171 |