diff options
Diffstat (limited to '')
| -rw-r--r-- | rss.c | 40 |
1 files changed, 38 insertions, 2 deletions
| @@ -7,6 +7,8 @@ | |||
| 7 | #include "xs_time.h" | 7 | #include "xs_time.h" |
| 8 | #include "xs_match.h" | 8 | #include "xs_match.h" |
| 9 | #include "xs_curl.h" | 9 | #include "xs_curl.h" |
| 10 | #include "xs_openssl.h" | ||
| 11 | #include "xs_json.h" | ||
| 10 | 12 | ||
| 11 | #include "snac.h" | 13 | #include "snac.h" |
| 12 | 14 | ||
| @@ -117,12 +119,37 @@ void rss_to_timeline(snac *user, const char *url) | |||
| 117 | hdrs = xs_dict_set(hdrs, "accept", "application/rss+xml"); | 119 | hdrs = xs_dict_set(hdrs, "accept", "application/rss+xml"); |
| 118 | hdrs = xs_dict_set(hdrs, "user-agent", USER_AGENT); | 120 | hdrs = xs_dict_set(hdrs, "user-agent", USER_AGENT); |
| 119 | 121 | ||
| 122 | /* get the RSS metadata */ | ||
| 123 | xs *md5 = xs_md5_hex(url, strlen(url)); | ||
| 124 | xs *rss_md_fn = xs_fmt("%s/rss", user->basedir); | ||
| 125 | mkdirx(rss_md_fn); | ||
| 126 | rss_md_fn = xs_str_cat(rss_md_fn, "/", md5, ".json"); | ||
| 127 | |||
| 128 | xs *rss_md = NULL; | ||
| 129 | const char *etag = NULL; | ||
| 130 | |||
| 131 | FILE *f; | ||
| 132 | if ((f = fopen(rss_md_fn, "r")) != NULL) { | ||
| 133 | rss_md = xs_json_load(f); | ||
| 134 | fclose(f); | ||
| 135 | |||
| 136 | etag = xs_dict_get(rss_md, "etag"); | ||
| 137 | |||
| 138 | if (xs_is_string(etag)) | ||
| 139 | hdrs = xs_dict_set(hdrs, "if-none-match", etag); | ||
| 140 | } | ||
| 141 | |||
| 142 | if (rss_md == NULL) | ||
| 143 | rss_md = xs_dict_new(); | ||
| 144 | |||
| 120 | xs *payload = NULL; | 145 | xs *payload = NULL; |
| 121 | int status; | 146 | int status; |
| 122 | int p_size; | 147 | int p_size; |
| 123 | 148 | ||
| 124 | xs *rsp = xs_http_request("GET", url, hdrs, NULL, 0, &status, &payload, &p_size, 0); | 149 | xs *rsp = xs_http_request("GET", url, hdrs, NULL, 0, &status, &payload, &p_size, 0); |
| 125 | 150 | ||
| 151 | snac_log(user, xs_fmt("parsing RSS %s %d", url, status)); | ||
| 152 | |||
| 126 | if (!valid_status(status) || !xs_is_string(payload)) | 153 | if (!valid_status(status) || !xs_is_string(payload)) |
| 127 | return; | 154 | return; |
| 128 | 155 | ||
| @@ -131,8 +158,6 @@ void rss_to_timeline(snac *user, const char *url) | |||
| 131 | if (!xs_is_string(ctype) || xs_str_in(ctype, "application/rss+xml") == -1) | 158 | if (!xs_is_string(ctype) || xs_str_in(ctype, "application/rss+xml") == -1) |
| 132 | return; | 159 | return; |
| 133 | 160 | ||
| 134 | snac_log(user, xs_fmt("parsing RSS %s", url)); | ||
| 135 | |||
| 136 | /* yes, parsing is done with regexes (now I have two problems blah blah blah) */ | 161 | /* yes, parsing is done with regexes (now I have two problems blah blah blah) */ |
| 137 | xs *links = xs_regex_select(payload, "<link>[^<]+</link>"); | 162 | xs *links = xs_regex_select(payload, "<link>[^<]+</link>"); |
| 138 | const char *link; | 163 | const char *link; |
| @@ -208,6 +233,17 @@ void rss_to_timeline(snac *user, const char *url) | |||
| 208 | timeline_add(user, id, obj); | 233 | timeline_add(user, id, obj); |
| 209 | } | 234 | } |
| 210 | } | 235 | } |
| 236 | |||
| 237 | /* update the RSS metadata */ | ||
| 238 | etag = xs_dict_get(rsp, "etag"); | ||
| 239 | |||
| 240 | if (xs_is_string(etag)) { | ||
| 241 | rss_md = xs_dict_set(rss_md, "etag", etag); | ||
| 242 | if ((f = fopen(rss_md_fn, "w")) != NULL) { | ||
| 243 | xs_json_dump(rss_md, 4, f); | ||
| 244 | fclose(f); | ||
| 245 | } | ||
| 246 | } | ||
| 211 | } | 247 | } |
| 212 | 248 | ||
| 213 | 249 | ||