diff options
| author | 2022-09-20 07:15:39 +0200 | |
|---|---|---|
| committer | 2022-09-20 07:15:39 +0200 | |
| commit | b97f4c7a90125812b6d17d5b9f2450708e53a4ba (patch) | |
| tree | 9d4b37157b80c315b66aa97637f948715c7f476a /xs_json.h | |
| parent | New function user_list(). (diff) | |
| download | snac2-b97f4c7a90125812b6d17d5b9f2450708e53a4ba.tar.gz snac2-b97f4c7a90125812b6d17d5b9f2450708e53a4ba.tar.xz snac2-b97f4c7a90125812b6d17d5b9f2450708e53a4ba.zip | |
Improved Unicode surrogate pairs parsing in xs_json.
Diffstat (limited to '')
| -rw-r--r-- | xs_json.h | 27 |
1 files changed, 21 insertions, 6 deletions
| @@ -229,7 +229,7 @@ d_char *_xs_json_loads_lexer(const char **json, js_type *t) | |||
| 229 | 229 | ||
| 230 | while ((c = *s) != '"' && c != '\0') { | 230 | while ((c = *s) != '"' && c != '\0') { |
| 231 | char tmp[5]; | 231 | char tmp[5]; |
| 232 | int i; | 232 | int cp, i; |
| 233 | 233 | ||
| 234 | if (c == '\\') { | 234 | if (c == '\\') { |
| 235 | s++; | 235 | s++; |
| @@ -240,15 +240,30 @@ d_char *_xs_json_loads_lexer(const char **json, js_type *t) | |||
| 240 | case 't': c = '\t'; break; | 240 | case 't': c = '\t'; break; |
| 241 | case 'u': /* Unicode codepoint as an hex char */ | 241 | case 'u': /* Unicode codepoint as an hex char */ |
| 242 | s++; | 242 | s++; |
| 243 | tmp[0] = (char)*s; s++; | 243 | memcpy(tmp, s, 4); |
| 244 | tmp[1] = (char)*s; s++; | 244 | s += 3; |
| 245 | tmp[2] = (char)*s; s++; | ||
| 246 | tmp[3] = (char)*s; | ||
| 247 | tmp[4] = '\0'; | 245 | tmp[4] = '\0'; |
| 248 | 246 | ||
| 247 | xs_debug(); | ||
| 249 | sscanf(tmp, "%04x", &i); | 248 | sscanf(tmp, "%04x", &i); |
| 250 | 249 | ||
| 251 | v = xs_utf8_enc(v, i); | 250 | if (i >= 0xd800 && i <= 0xdfff) { |
| 251 | /* it's a surrogate pair */ | ||
| 252 | cp = (i & 0x3ff) << 10; | ||
| 253 | |||
| 254 | /* skip to the next value */ | ||
| 255 | s += 3; | ||
| 256 | memcpy(tmp, s, 4); | ||
| 257 | s += 3; | ||
| 258 | |||
| 259 | sscanf(tmp, "%04x", &i); | ||
| 260 | cp |= (i & 0x3ff); | ||
| 261 | cp += 0x10000; | ||
| 262 | } | ||
| 263 | else | ||
| 264 | cp = i; | ||
| 265 | |||
| 266 | v = xs_utf8_enc(v, cp); | ||
| 252 | c = '\0'; | 267 | c = '\0'; |
| 253 | 268 | ||
| 254 | break; | 269 | break; |