/* snac - A simple, minimalistic ActivityPub instance */ /* copyright (c) 2025 grunfink et al. / MIT license */ #include "xs.h" #include "xs_html.h" #include "xs_regex.h" #include "xs_time.h" #include "xs_match.h" #include "xs_curl.h" #include "xs_openssl.h" #include "xs_json.h" #include "snac.h" xs_str *rss_from_timeline(snac *user, const xs_list *timeline, const char *title, const char *link, const char *desc) /* converts a timeline to rss */ { xs_html *rss = xs_html_tag("rss", xs_html_attr("xmlns:content", "http:/" "/purl.org/rss/1.0/modules/content/"), xs_html_attr("version", "2.0"), xs_html_attr("xmlns:atom", "http:/" "/www.w3.org/2005/Atom")); xs_html *channel = xs_html_tag("channel", xs_html_tag("title", xs_html_text(title)), xs_html_tag("language", xs_html_text("en")), xs_html_tag("link", xs_html_text(link)), xs_html_sctag("atom:link", xs_html_attr("href", link), xs_html_attr("rel", "self"), xs_html_attr("type", "application/rss+xml")), xs_html_tag("generator", xs_html_text(USER_AGENT)), xs_html_tag("description", xs_html_text(desc))); xs_html_add(rss, channel); int cnt = 0; const char *v; xs_list_foreach(timeline, v) { xs *msg = NULL; if (user) { if (!valid_status(timeline_get_by_md5(user, v, &msg))) continue; } else { if (!valid_status(object_get_by_md5(v, &msg))) continue; } const char *id = xs_dict_get(msg, "id"); const char *content = xs_dict_get(msg, "content"); const char *published = xs_dict_get(msg, "published"); if (user && !xs_startswith(id, user->actor)) continue; if (!id || !content || !published) continue; /* create a title with the first line of the content */ xs *title = xs_replace(content, "
", "\n"); title = xs_regex_replace_i(title, "<[^>]+>", " "); title = xs_regex_replace_i(title, "&[^;]+;", " "); int i; for (i = 0; title[i] && title[i] != '\n' && i < 50; i++); if (title[i] != '\0') { title[i] = '\0'; title = xs_str_cat(title, "..."); } title = xs_strip_i(title); /* convert the date */ time_t t = xs_parse_iso_date(published, 0); xs *rss_date = xs_str_utctime(t, "%a, %d %b %Y %T +0000"); /* if it's the first one, add it to the header */ if (cnt == 0) xs_html_add(channel, xs_html_tag("lastBuildDate", xs_html_text(rss_date))); xs_html_add(channel, xs_html_tag("item", xs_html_tag("title", xs_html_text(title)), xs_html_tag("link", xs_html_text(id)), xs_html_tag("guid", xs_html_text(id)), xs_html_tag("pubDate", xs_html_text(rss_date)), xs_html_tag("description", xs_html_text(content)))); cnt++; } return xs_html_render_s(rss, "\n"); } void rss_to_timeline(snac *user, const char *url) /* reads an RSS and inserts all ActivityPub posts into the user's timeline */ { if (!xs_startswith(url, "https:/") && !xs_startswith(url, "http:/")) return; xs *hdrs = xs_dict_new(); hdrs = xs_dict_set(hdrs, "accept", "application/rss+xml"); hdrs = xs_dict_set(hdrs, "user-agent", USER_AGENT); /* get the RSS metadata */ xs *md5 = xs_md5_hex(url, strlen(url)); xs *rss_md_fn = xs_fmt("%s/rss", user->basedir); mkdirx(rss_md_fn); rss_md_fn = xs_str_cat(rss_md_fn, "/", md5, ".json"); xs *rss_md = NULL; const char *etag = NULL; FILE *f; if ((f = fopen(rss_md_fn, "r")) != NULL) { rss_md = xs_json_load(f); fclose(f); etag = xs_dict_get(rss_md, "etag"); if (xs_is_string(etag)) hdrs = xs_dict_set(hdrs, "if-none-match", etag); } if (rss_md == NULL) rss_md = xs_dict_new(); xs *payload = NULL; int status; int p_size; xs *rsp = xs_http_request("GET", url, hdrs, NULL, 0, &status, &payload, &p_size, 0); snac_log(user, xs_fmt("parsing RSS %s %d", url, status)); if (!valid_status(status) || !xs_is_string(payload)) return; /* not an RSS? done */ const char *ctype = xs_dict_get(rsp, "content-type"); if (!xs_is_string(ctype) || xs_str_in(ctype, "application/rss+xml") == -1) return; /* yes, parsing is done with regexes (now I have two problems blah blah blah) */ xs *links = xs_regex_select(payload, "[^<]+"); const char *link; xs_list_foreach(links, link) { xs *l = xs_replace(link, "", ""); char *p = strchr(l, '<'); if (p == NULL) continue; *p = '\0'; /* skip this same URL */ if (strcmp(l, url) == 0) continue; /* skip crap */ if (!xs_startswith(l, "https:/") && !xs_startswith(l, "http:/")) continue; snac_debug(user, 1, xs_fmt("RSS link: %s", l)); if (timeline_here(user, l)) { snac_debug(user, 1, xs_fmt("RSS entry already in timeline %s", l)); continue; } /* special trick for Mastodon: convert from the alternate format */ if (strchr(l, '@') != NULL) { xs *l2 = xs_split(l, "/"); if (xs_list_len(l2) == 5) { const char *uid = xs_list_get(l2, 3); if (*uid == '@') { xs *guessed_id = xs_fmt("https:/" "/%s/users/%s/statuses/%s", xs_list_get(l2, 2), uid + 1, xs_list_get(l2, -1)); if (timeline_here(user, guessed_id)) { snac_debug(user, 1, xs_fmt("RSS entry already in timeline (alt) %s", guessed_id)); continue; } } } } xs *obj = NULL; if (!valid_status(object_get(l, &obj))) { /* object is not here: bring it */ if (!valid_status(activitypub_request(user, l, &obj))) continue; } if (xs_is_dict(obj)) { const char *id = xs_dict_get(obj, "id"); const char *type = xs_dict_get(obj, "type"); const char *attr_to = get_atto(obj); if (!xs_is_string(id) || !xs_is_string(type) || !xs_is_string(attr_to)) continue; if (!xs_match(type, POSTLIKE_OBJECT_TYPE)) continue; if (timeline_here(user, id)) { snac_debug(user, 1, xs_fmt("RSS entry already in timeline (id) %s", id)); continue; } if (!valid_status(actor_request(user, attr_to, NULL))) continue; timeline_add(user, id, obj); } } /* update the RSS metadata */ etag = xs_dict_get(rsp, "etag"); if (xs_is_string(etag)) { rss_md = xs_dict_set(rss_md, "etag", etag); if ((f = fopen(rss_md_fn, "w")) != NULL) { xs_json_dump(rss_md, 4, f); fclose(f); } } } void rss_poll_hashtags(void) /* parses all RSS from all users */ { xs *list = user_list(); const char *uid; xs_list_foreach(list, uid) { snac user; if (user_open(&user, uid)) { const xs_list *rss = xs_dict_get(user.config, "followed_hashtags"); if (xs_is_list(rss)) { const char *url; xs_list_foreach(rss, url) rss_to_timeline(&user, url); } user_free(&user); } } }