最後活躍 1747349217

修訂 94e202ff5b20fa32055eeaf6e6292aafed4ea7c0

mastobook_to_readeck.php 原始檔案
1#!/usr/bin/php
2<?php
3
4//-----------------------------
5// CREDENTIALS
6//-----------------------------
7
8$MASTODON_TOKEN = '8beea62e32b336e5d934d06a21b0b996';
9$MASTODON_HOST = 'go.lema.org';
10
11$READECK_TOKEN = 'LDJb4YbGKe6Fp8cSygpuw5LjmwkgGTAbFbP77TQtYwe1hFZ4';
12$READECK_HOST = 'read.lema.org';
13
14$MINIMUM_TEXT_SIZE = 500; // article with less characters of content will be ignored
15
16
17
18
19//-----------------------------
20// FETCH MASTODON BOOKMARKS
21//-----------------------------
22echo "# Fetching mastodon / snac bookmarks...\n";
23date_default_timezone_set('America/Sao_Paulo');
24echo date('Y-m-d H:i:s')."\n";
25
26$ch = curl_init("https://$MASTODON_HOST/api/v1/bookmarks");
27curl_setopt_array($ch, [
28 CURLOPT_RETURNTRANSFER => true,
29 CURLOPT_HTTPHEADER => [
30 "Authorization: Bearer $MASTODON_TOKEN",
31 "Accept: application/json"
32 ]
33]);
34
35$bookmarksJson = curl_exec($ch);
36$bookmarks = json_decode($bookmarksJson, true);
37if (!is_array($bookmarks)) {
38 die("❌ Failed to parse Mastodon bookmarks.\n");
39}
40
41echo "Found bookmarks:".count($bookmarks)."\n";
42
43//-----------------------------
44// FIND VALID URLs in posts
45//-----------------------------
46
47foreach ($bookmarks as $status) {
48 if (!isset($status['content'])) {
49 continue;
50 }
51 $content = strip_tags($status['content']);
52 preg_match_all('/https?:\/\/[^\s"<]+/', $content, $matches);
53 if (!empty($matches[0])) {
54
55 $oneLink = $matches[0][0];
56 if (filter_var($oneLink, FILTER_VALIDATE_URL)) {
57 $links[] = $oneLink;
58 } else {
59 // This happens for example if URL has an emoji at the end
60 echo "INVALID URL: $oneLink\n";
61 }
62 }
63}
64
65
66echo "Valid URLS:".count($links)."\n";
67
68print_r($links);
69
70
71//-----------------------------
72// SEND LINKS TO READECK
73//-----------------------------
74
75$apiUrl = "https://$READECK_HOST/api/bookmarks";
76
77$ch = curl_init();
78curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
79curl_setopt($ch, CURLOPT_POST, true);
80curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0');
81
82$headers = [
83 "Authorization: Bearer $READECK_TOKEN",
84 'Accept: application/json',
85 'Content-Type: application/json'
86];
87
88$alreadySentDir = __DIR__ . "/already_sent";
89
90if (!is_dir($alreadySentDir)) {
91 mkdir($alreadySentDir, 0755, true); // recursive mkdir
92}
93
94
95
96foreach ($links as $link) {
97
98 // READECK will accept several times the same URL !
99 // Make sure we don't send it several times by keeping an archive here
100 $hash = md5($link);
101 $filePath = __DIR__ . "/already_sent/{$hash}.txt";
102
103 if (file_exists($filePath)) {
104 echo "ℹ️ Already sent: $link\n";
105 continue;
106 }
107
108
109 $options = [
110 'http' => [
111 'method' => 'GET',
112 'header' => "User-Agent: Mozilla/5.0\r\n"
113 ]
114];
115
116 // First check if page has content
117 //$ch = curl_init($link);;
118 curl_setopt($ch, CURLOPT_URL, $link);
119 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
120 $content = curl_exec($ch);
121
122 if ($content === false) {
123 echo "❌ Failed to fetch $link\n";
124 continue;
125 }
126 $plainText = strip_tags($content);
127
128 if (strlen($plainText) < $MINIMUM_TEXT_SIZE) {
129 echo "⚠️ Skipping $link\ncontent too small (".strlen($plainText)." chars < $MINIMUM_TEXT_SIZE )\n";
130 continue;
131 }
132
133 echo "🟢 Will add to Readeck $link\nLength: " . strlen($plainText)."\n";
134
135 //not passing title here, since we don't have it
136 $payload = json_encode([
137 "labels" => ["automasto"],
138 "url" => $link
139 ]);
140
141 curl_setopt($ch, CURLOPT_URL, $apiUrl);
142 curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
143 curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
144
145 $response = curl_exec($ch);
146 $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
147
148
149 if (curl_errno($ch)) {
150 echo "❌ Error adding $link: " . curl_error($ch) . "\n";
151 } else {
152
153 // Store already sent file only if connection worked
154 file_put_contents($filePath, $link);
155
156 $json = json_decode($response, true);
157 if (json_last_error() === JSON_ERROR_NONE) {
158 if ($httpCode >= 200 && $httpCode < 300) {
159 echo "✅ [$httpCode] Successfully added: $link\n";
160 } else {
161 echo "⚠️ Server returned status $httpCode for $link\n";
162 }
163 } else {
164 echo "⚠️ Response is not valid JSON for $link: $response\n";
165 }
166 }
167
168}
169
170curl_close($ch);
171