summaryrefslogtreecommitdiff
path: root/rss.c
diff options
context:
space:
mode:
Diffstat (limited to 'rss.c')
-rw-r--r--rss.c274
1 files changed, 274 insertions, 0 deletions
diff --git a/rss.c b/rss.c
new file mode 100644
index 0000000..f72dac4
--- /dev/null
+++ b/rss.c
@@ -0,0 +1,274 @@
1/* snac - A simple, minimalistic ActivityPub instance */
2/* copyright (c) 2025 grunfink et al. / MIT license */
3
4#include "xs.h"
5#include "xs_html.h"
6#include "xs_regex.h"
7#include "xs_time.h"
8#include "xs_match.h"
9#include "xs_curl.h"
10#include "xs_openssl.h"
11#include "xs_json.h"
12
13#include "snac.h"
14
15xs_str *rss_from_timeline(snac *user, const xs_list *timeline,
16 const char *title, const char *link, const char *desc)
17/* converts a timeline to rss */
18{
19 xs_html *rss = xs_html_tag("rss",
20 xs_html_attr("xmlns:content", "http:/" "/purl.org/rss/1.0/modules/content/"),
21 xs_html_attr("version", "2.0"),
22 xs_html_attr("xmlns:atom", "http:/" "/www.w3.org/2005/Atom"));
23
24 xs_html *channel = xs_html_tag("channel",
25 xs_html_tag("title",
26 xs_html_text(title)),
27 xs_html_tag("language",
28 xs_html_text("en")),
29 xs_html_tag("link",
30 xs_html_text(link)),
31 xs_html_sctag("atom:link",
32 xs_html_attr("href", link),
33 xs_html_attr("rel", "self"),
34 xs_html_attr("type", "application/rss+xml")),
35 xs_html_tag("generator",
36 xs_html_text(USER_AGENT)),
37 xs_html_tag("description",
38 xs_html_text(desc)));
39
40 xs_html_add(rss, channel);
41
42 int cnt = 0;
43 const char *v;
44
45 xs_list_foreach(timeline, v) {
46 xs *msg = NULL;
47
48 if (user) {
49 if (!valid_status(timeline_get_by_md5(user, v, &msg)))
50 continue;
51 }
52 else {
53 if (!valid_status(object_get_by_md5(v, &msg)))
54 continue;
55 }
56
57 const char *id = xs_dict_get(msg, "id");
58 const char *content = xs_dict_get(msg, "content");
59 const char *published = xs_dict_get(msg, "published");
60
61 if (user && !xs_startswith(id, user->actor))
62 continue;
63
64 if (!id || !content || !published)
65 continue;
66
67 /* create a title with the first line of the content */
68 xs *title = xs_replace(content, "<br>", "\n");
69 title = xs_regex_replace_i(title, "<[^>]+>", " ");
70 title = xs_regex_replace_i(title, "&[^;]+;", " ");
71 int i;
72
73 for (i = 0; title[i] && title[i] != '\n' && i < 50; i++);
74
75 if (title[i] != '\0') {
76 title[i] = '\0';
77 title = xs_str_cat(title, "...");
78 }
79
80 title = xs_strip_i(title);
81
82 /* convert the date */
83 time_t t = xs_parse_iso_date(published, 0);
84 xs *rss_date = xs_str_utctime(t, "%a, %d %b %Y %T +0000");
85
86 /* if it's the first one, add it to the header */
87 if (cnt == 0)
88 xs_html_add(channel,
89 xs_html_tag("lastBuildDate",
90 xs_html_text(rss_date)));
91
92 xs_html_add(channel,
93 xs_html_tag("item",
94 xs_html_tag("title",
95 xs_html_text(title)),
96 xs_html_tag("link",
97 xs_html_text(id)),
98 xs_html_tag("guid",
99 xs_html_text(id)),
100 xs_html_tag("pubDate",
101 xs_html_text(rss_date)),
102 xs_html_tag("description",
103 xs_html_text(content))));
104
105 cnt++;
106 }
107
108 return xs_html_render_s(rss, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
109}
110
111
112void rss_to_timeline(snac *user, const char *url)
113/* reads an RSS and inserts all ActivityPub posts into the user's timeline */
114{
115 if (!xs_startswith(url, "https:/") && !xs_startswith(url, "http:/"))
116 return;
117
118 xs *hdrs = xs_dict_new();
119 hdrs = xs_dict_set(hdrs, "accept", "application/rss+xml");
120 hdrs = xs_dict_set(hdrs, "user-agent", USER_AGENT);
121
122 /* get the RSS metadata */
123 xs *md5 = xs_md5_hex(url, strlen(url));
124 xs *rss_md_fn = xs_fmt("%s/rss", user->basedir);
125 mkdirx(rss_md_fn);
126 rss_md_fn = xs_str_cat(rss_md_fn, "/", md5, ".json");
127
128 xs *rss_md = NULL;
129 const char *etag = NULL;
130
131 FILE *f;
132 if ((f = fopen(rss_md_fn, "r")) != NULL) {
133 rss_md = xs_json_load(f);
134 fclose(f);
135
136 etag = xs_dict_get(rss_md, "etag");
137
138 if (xs_is_string(etag))
139 hdrs = xs_dict_set(hdrs, "if-none-match", etag);
140 }
141
142 if (rss_md == NULL)
143 rss_md = xs_dict_new();
144
145 xs *payload = NULL;
146 int status;
147 int p_size;
148
149 xs *rsp = xs_http_request("GET", url, hdrs, NULL, 0, &status, &payload, &p_size, 0);
150
151 snac_log(user, xs_fmt("parsing RSS %s %d", url, status));
152
153 if (!valid_status(status) || !xs_is_string(payload))
154 return;
155
156 /* not an RSS? done */
157 const char *ctype = xs_dict_get(rsp, "content-type");
158 if (!xs_is_string(ctype) || xs_str_in(ctype, "application/rss+xml") == -1)
159 return;
160
161 /* yes, parsing is done with regexes (now I have two problems blah blah blah) */
162 xs *links = xs_regex_select(payload, "<link>[^<]+</link>");
163 const char *link;
164
165 xs_list_foreach(links, link) {
166 xs *l = xs_replace(link, "<link>", "");
167 char *p = strchr(l, '<');
168
169 if (p == NULL)
170 continue;
171 *p = '\0';
172
173 /* skip this same URL */
174 if (strcmp(l, url) == 0)
175 continue;
176
177 /* skip crap */
178 if (!xs_startswith(l, "https:/") && !xs_startswith(l, "http:/"))
179 continue;
180
181 snac_debug(user, 1, xs_fmt("RSS link: %s", l));
182
183 if (timeline_here(user, l)) {
184 snac_debug(user, 1, xs_fmt("RSS entry already in timeline %s", l));
185 continue;
186 }
187
188 /* special trick for Mastodon: convert from the alternate format */
189 if (strchr(l, '@') != NULL) {
190 xs *l2 = xs_split(l, "/");
191
192 if (xs_list_len(l2) == 5) {
193 const char *uid = xs_list_get(l2, 3);
194 if (*uid == '@') {
195 xs *guessed_id = xs_fmt("https:/" "/%s/users/%s/statuses/%s",
196 xs_list_get(l2, 2), uid + 1, xs_list_get(l2, -1));
197
198 if (timeline_here(user, guessed_id)) {
199 snac_debug(user, 1, xs_fmt("RSS entry already in timeline (alt) %s", guessed_id));
200 continue;
201 }
202 }
203 }
204 }
205
206 xs *obj = NULL;
207
208 if (!valid_status(object_get(l, &obj))) {
209 /* object is not here: bring it */
210 if (!valid_status(activitypub_request(user, l, &obj)))
211 continue;
212 }
213
214 if (xs_is_dict(obj)) {
215 const char *id = xs_dict_get(obj, "id");
216 const char *type = xs_dict_get(obj, "type");
217 const char *attr_to = get_atto(obj);
218
219 if (!xs_is_string(id) || !xs_is_string(type) || !xs_is_string(attr_to))
220 continue;
221
222 if (!xs_match(type, POSTLIKE_OBJECT_TYPE))
223 continue;
224
225 if (timeline_here(user, id)) {
226 snac_debug(user, 1, xs_fmt("RSS entry already in timeline (id) %s", id));
227 continue;
228 }
229
230 enqueue_actor_refresh(user, attr_to, 0);
231
232 timeline_add(user, id, obj);
233
234 snac_log(user, xs_fmt("new '%s' (RSS) %s %s", type, attr_to, id));
235 }
236 }
237
238 /* update the RSS metadata */
239 etag = xs_dict_get(rsp, "etag");
240
241 if (xs_is_string(etag)) {
242 rss_md = xs_dict_set(rss_md, "etag", etag);
243 rss_md = xs_dict_set(rss_md, "url", url);
244 if ((f = fopen(rss_md_fn, "w")) != NULL) {
245 xs_json_dump(rss_md, 4, f);
246 fclose(f);
247 }
248 }
249}
250
251
252void rss_poll_hashtags(void)
253/* parses all RSS from all users */
254{
255 xs *list = user_list();
256 const char *uid;
257
258 xs_list_foreach(list, uid) {
259 snac user;
260
261 if (user_open(&user, uid)) {
262 const xs_list *rss = xs_dict_get(user.config, "followed_hashtags");
263
264 if (xs_is_list(rss)) {
265 const char *url;
266
267 xs_list_foreach(rss, url)
268 rss_to_timeline(&user, url);
269 }
270
271 user_free(&user);
272 }
273 }
274}