diff options
| author | 2023-11-17 03:51:04 +0100 | |
|---|---|---|
| committer | 2023-11-17 03:51:04 +0100 | |
| commit | acf3cdcf80da7c3443e202a02d4b626c13e9e8dd (patch) | |
| tree | f9498dfd051d469d9e76c4b47c5704bfc9a79e48 | |
| parent | Updated RELEASE_NOTES. (diff) | |
| download | penes-snac2-acf3cdcf80da7c3443e202a02d4b626c13e9e8dd.tar.gz penes-snac2-acf3cdcf80da7c3443e202a02d4b626c13e9e8dd.tar.xz penes-snac2-acf3cdcf80da7c3443e202a02d4b626c13e9e8dd.zip | |
Backport from xs.
| -rw-r--r-- | xs.h | 7 | ||||
| -rw-r--r-- | xs_json.h | 12 | ||||
| -rw-r--r-- | xs_unicode.h | 29 | ||||
| -rw-r--r-- | xs_version.h | 2 |
4 files changed, 39 insertions, 11 deletions
| @@ -1180,6 +1180,8 @@ void *xs_memmem(const char *haystack, int h_size, const char *needle, int n_size | |||
| 1180 | 1180 | ||
| 1181 | /** hex **/ | 1181 | /** hex **/ |
| 1182 | 1182 | ||
| 1183 | static char xs_hex_digits[] = "0123456789abcdef"; | ||
| 1184 | |||
| 1183 | xs_str *xs_hex_enc(const xs_val *data, int size) | 1185 | xs_str *xs_hex_enc(const xs_val *data, int size) |
| 1184 | /* returns an hexdump of data */ | 1186 | /* returns an hexdump of data */ |
| 1185 | { | 1187 | { |
| @@ -1190,8 +1192,9 @@ xs_str *xs_hex_enc(const xs_val *data, int size) | |||
| 1190 | p = s = xs_realloc(NULL, _xs_blk_size(size * 2 + 1)); | 1192 | p = s = xs_realloc(NULL, _xs_blk_size(size * 2 + 1)); |
| 1191 | 1193 | ||
| 1192 | for (n = 0; n < size; n++) { | 1194 | for (n = 0; n < size; n++) { |
| 1193 | snprintf(p, 3, "%02x", (unsigned char)data[n]); | 1195 | *p++ = xs_hex_digits[*data >> 4 & 0xf]; |
| 1194 | p += 2; | 1196 | *p++ = xs_hex_digits[*data & 0xf]; |
| 1197 | data++; | ||
| 1195 | } | 1198 | } |
| 1196 | 1199 | ||
| 1197 | *p = '\0'; | 1200 | *p = '\0'; |
| @@ -248,24 +248,20 @@ static xs_val *_xs_json_load_lexer(FILE *f, js_type *t) | |||
| 248 | break; | 248 | break; |
| 249 | } | 249 | } |
| 250 | 250 | ||
| 251 | if (cp >= 0xd800 && cp <= 0xdfff) { | 251 | if (xs_is_surrogate(cp)) { |
| 252 | /* it's a surrogate pair */ | ||
| 253 | cp = (cp & 0x3ff) << 10; | ||
| 254 | |||
| 255 | /* \u must follow */ | 252 | /* \u must follow */ |
| 256 | if (fgetc(f) != '\\' || fgetc(f) != 'u') { | 253 | if (fgetc(f) != '\\' || fgetc(f) != 'u') { |
| 257 | *t = JS_ERROR; | 254 | *t = JS_ERROR; |
| 258 | break; | 255 | break; |
| 259 | } | 256 | } |
| 260 | 257 | ||
| 261 | unsigned int i; | 258 | unsigned int p2; |
| 262 | if (fscanf(f, "%04x", &i) != 1) { | 259 | if (fscanf(f, "%04x", &p2) != 1) { |
| 263 | *t = JS_ERROR; | 260 | *t = JS_ERROR; |
| 264 | break; | 261 | break; |
| 265 | } | 262 | } |
| 266 | 263 | ||
| 267 | cp |= (i & 0x3ff); | 264 | cp = xs_surrogate_dec(cp, p2); |
| 268 | cp += 0x10000; | ||
| 269 | } | 265 | } |
| 270 | 266 | ||
| 271 | /* replace dangerous control codes with their visual representations */ | 267 | /* replace dangerous control codes with their visual representations */ |
diff --git a/xs_unicode.h b/xs_unicode.h index c7d6190..f5880f0 100644 --- a/xs_unicode.h +++ b/xs_unicode.h | |||
| @@ -8,6 +8,9 @@ | |||
| 8 | xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint); | 8 | xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint); |
| 9 | unsigned int xs_utf8_dec(char **str); | 9 | unsigned int xs_utf8_dec(char **str); |
| 10 | int xs_unicode_width(unsigned int cpoint); | 10 | int xs_unicode_width(unsigned int cpoint); |
| 11 | int xs_is_surrogate(unsigned int cpoint); | ||
| 12 | unsigned int xs_surrogate_dec(unsigned int p1, unsigned int p2); | ||
| 13 | unsigned int xs_surrogate_enc(unsigned int cpoint); | ||
| 11 | unsigned int *_xs_unicode_upper_search(unsigned int cpoint); | 14 | unsigned int *_xs_unicode_upper_search(unsigned int cpoint); |
| 12 | unsigned int *_xs_unicode_lower_search(unsigned int cpoint); | 15 | unsigned int *_xs_unicode_lower_search(unsigned int cpoint); |
| 13 | #define xs_unicode_is_upper(cpoint) (!!_xs_unicode_upper_search(cpoint)) | 16 | #define xs_unicode_is_upper(cpoint) (!!_xs_unicode_upper_search(cpoint)) |
| @@ -138,6 +141,32 @@ int xs_unicode_width(unsigned int cpoint) | |||
| 138 | } | 141 | } |
| 139 | 142 | ||
| 140 | 143 | ||
| 144 | /** surrogate pairs **/ | ||
| 145 | |||
| 146 | int xs_is_surrogate(unsigned int cpoint) | ||
| 147 | /* checks if cpoint is the first element of a Unicode surrogate pair */ | ||
| 148 | { | ||
| 149 | return cpoint >= 0xd800 && cpoint <= 0xdfff; | ||
| 150 | } | ||
| 151 | |||
| 152 | |||
| 153 | unsigned int xs_surrogate_dec(unsigned int p1, unsigned int p2) | ||
| 154 | /* "decodes" a surrogate pair into a codepoint */ | ||
| 155 | { | ||
| 156 | return 0x10000 | ((p1 & 0x3ff) << 10) | (p2 & 0x3ff); | ||
| 157 | } | ||
| 158 | |||
| 159 | |||
| 160 | unsigned int xs_surrogate_enc(unsigned int cpoint) | ||
| 161 | /* "encodes" a Unicode into a surrogate pair (p1 in the MSB word) */ | ||
| 162 | { | ||
| 163 | unsigned int p1 = 0xd7c0 + (cpoint >> 10); | ||
| 164 | unsigned int p2 = 0xdc00 + (cpoint & 0x3ff); | ||
| 165 | |||
| 166 | return (p1 << 16) | p2; | ||
| 167 | } | ||
| 168 | |||
| 169 | |||
| 141 | #ifdef _XS_UNICODE_TBL_H | 170 | #ifdef _XS_UNICODE_TBL_H |
| 142 | 171 | ||
| 143 | /* include xs_unicode_tbl.h before this one to use these functions */ | 172 | /* include xs_unicode_tbl.h before this one to use these functions */ |
diff --git a/xs_version.h b/xs_version.h index d888d29..42dc7d2 100644 --- a/xs_version.h +++ b/xs_version.h | |||
| @@ -1 +1 @@ | |||
| /* 40d63c59610c642d1c8b2e2b94bbf5cdde69ad6a */ | /* 0932615dfe85e5d8544c4b2052eb66f3a430eb8c */ | ||