summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar default2024-08-23 17:22:10 +0200
committerGravatar default2024-08-23 17:22:10 +0200
commit8586e44de92c827d6a19a7700121c8b21d3687b1 (patch)
treea0cc5a181a851f58a1cdea505bf1096970eb3e8d
parentUpdated TODO. (diff)
downloadpenes-snac2-8586e44de92c827d6a19a7700121c8b21d3687b1.tar.gz
penes-snac2-8586e44de92c827d6a19a7700121c8b21d3687b1.tar.xz
penes-snac2-8586e44de92c827d6a19a7700121c8b21d3687b1.zip
Some optimizations.
-rw-r--r--xs_json.h7
-rw-r--r--xs_unicode.h116
-rw-r--r--xs_version.h2
3 files changed, 120 insertions, 5 deletions
diff --git a/xs_json.h b/xs_json.h
index a4112b0..3a91de9 100644
--- a/xs_json.h
+++ b/xs_json.h
@@ -208,6 +208,7 @@ static xs_val *_xs_json_load_lexer(FILE *f, js_type *t)
208{ 208{
209 int c; 209 int c;
210 xs_val *v = NULL; 210 xs_val *v = NULL;
211 int offset;
211 212
212 *t = JS_ERROR; 213 *t = JS_ERROR;
213 214
@@ -236,6 +237,7 @@ static xs_val *_xs_json_load_lexer(FILE *f, js_type *t)
236 *t = JS_STRING; 237 *t = JS_STRING;
237 238
238 v = xs_str_new(NULL); 239 v = xs_str_new(NULL);
240 offset = 0;
239 241
240 while ((c = fgetc(f)) != '"' && c != EOF && *t != JS_ERROR) { 242 while ((c = fgetc(f)) != '"' && c != EOF && *t != JS_ERROR) {
241 if (c == '\\') { 243 if (c == '\\') {
@@ -274,11 +276,12 @@ static xs_val *_xs_json_load_lexer(FILE *f, js_type *t)
274 break; 276 break;
275 } 277 }
276 278
277 v = xs_utf8_cat(v, cp); 279 v = xs_utf8_insert(v, cp, &offset);
278 } 280 }
279 else { 281 else {
280 char cc = c; 282 char cc = c;
281 v = xs_append_m(v, &cc, 1); 283 v = xs_insert_m(v, offset, &cc, 1);
284 offset++;
282 } 285 }
283 } 286 }
284 287
diff --git a/xs_unicode.h b/xs_unicode.h
index 2e9a754..a5a1dcb 100644
--- a/xs_unicode.h
+++ b/xs_unicode.h
@@ -9,6 +9,7 @@
9 unsigned int xs_utf8_dec(const char **str); 9 unsigned int xs_utf8_dec(const char **str);
10 int xs_unicode_width(unsigned int cpoint); 10 int xs_unicode_width(unsigned int cpoint);
11 int xs_is_surrogate(unsigned int cpoint); 11 int xs_is_surrogate(unsigned int cpoint);
12 int xs_is_diacritic(unsigned int cpoint);
12 unsigned int xs_surrogate_dec(unsigned int p1, unsigned int p2); 13 unsigned int xs_surrogate_dec(unsigned int p1, unsigned int p2);
13 unsigned int xs_surrogate_enc(unsigned int cpoint); 14 unsigned int xs_surrogate_enc(unsigned int cpoint);
14 unsigned int *_xs_unicode_upper_search(unsigned int cpoint); 15 unsigned int *_xs_unicode_upper_search(unsigned int cpoint);
@@ -22,7 +23,12 @@
22 int xs_unicode_is_alpha(unsigned int cpoint); 23 int xs_unicode_is_alpha(unsigned int cpoint);
23 24
24#ifdef _XS_H 25#ifdef _XS_H
26 xs_str *xs_utf8_insert(xs_str *str, unsigned int cpoint, int *offset);
25 xs_str *xs_utf8_cat(xs_str *str, unsigned int cpoint); 27 xs_str *xs_utf8_cat(xs_str *str, unsigned int cpoint);
28 xs_str *xs_utf8_to_upper(const char *str);
29 xs_str *xs_utf8_to_lower(const char *str);
30 xs_str *xs_utf8_to_nfd(const char *str);
31 xs_str *xs_utf8_to_nfc(const char *str);
26#endif 32#endif
27 33
28#ifdef XS_IMPLEMENTATION 34#ifdef XS_IMPLEMENTATION
@@ -144,6 +150,12 @@ int xs_unicode_width(unsigned int cpoint)
144} 150}
145 151
146 152
153int xs_is_diacritic(unsigned int cpoint)
154{
155 return cpoint >= 0x300 && cpoint <= 0x36f;
156}
157
158
147/** surrogate pairs **/ 159/** surrogate pairs **/
148 160
149int xs_is_surrogate(unsigned int cpoint) 161int xs_is_surrogate(unsigned int cpoint)
@@ -172,14 +184,27 @@ unsigned int xs_surrogate_enc(unsigned int cpoint)
172 184
173#ifdef _XS_H 185#ifdef _XS_H
174 186
175xs_str *xs_utf8_cat(xs_str *str, unsigned int cpoint) 187xs_str *xs_utf8_insert(xs_str *str, unsigned int cpoint, int *offset)
176/* encodes an Unicode codepoint to utf-8 into str */ 188/* encodes an Unicode codepoint to utf-8 into str */
177{ 189{
178 char tmp[4]; 190 char tmp[4];
179 191
180 int c = xs_utf8_enc(tmp, cpoint); 192 int c = xs_utf8_enc(tmp, cpoint);
181 193
182 return xs_append_m(str, tmp, c); 194 str = xs_insert_m(str, *offset, tmp, c);
195
196 *offset += c;
197
198 return str;
199}
200
201
202xs_str *xs_utf8_cat(xs_str *str, unsigned int cpoint)
203/* encodes an Unicode codepoint to utf-8 into str */
204{
205 int offset = strlen(str);
206
207 return xs_utf8_insert(str, cpoint, &offset);
183} 208}
184 209
185#endif /* _XS_H */ 210#endif /* _XS_H */
@@ -232,6 +257,9 @@ unsigned int *_xs_unicode_lower_search(unsigned int cpoint)
232unsigned int xs_unicode_to_lower(unsigned int cpoint) 257unsigned int xs_unicode_to_lower(unsigned int cpoint)
233/* returns the cpoint to lowercase */ 258/* returns the cpoint to lowercase */
234{ 259{
260 if (cpoint < 0x80)
261 return tolower(cpoint);
262
235 unsigned int *p = _xs_unicode_upper_search(cpoint); 263 unsigned int *p = _xs_unicode_upper_search(cpoint);
236 264
237 return p == NULL ? cpoint : p[1]; 265 return p == NULL ? cpoint : p[1];
@@ -241,6 +269,9 @@ unsigned int xs_unicode_to_lower(unsigned int cpoint)
241unsigned int xs_unicode_to_upper(unsigned int cpoint) 269unsigned int xs_unicode_to_upper(unsigned int cpoint)
242/* returns the cpoint to uppercase */ 270/* returns the cpoint to uppercase */
243{ 271{
272 if (cpoint < 0x80)
273 return toupper(cpoint);
274
244 unsigned int *p = _xs_unicode_lower_search(cpoint); 275 unsigned int *p = _xs_unicode_lower_search(cpoint);
245 276
246 return p == NULL ? cpoint : p[0]; 277 return p == NULL ? cpoint : p[0];
@@ -317,6 +348,87 @@ int xs_unicode_is_alpha(unsigned int cpoint)
317} 348}
318 349
319 350
351#ifdef _XS_H
352
353xs_str *xs_utf8_to_upper(const char *str)
354{
355 xs_str *s = xs_str_new(NULL);
356 unsigned int cpoint;
357 int offset = 0;
358
359 while ((cpoint = xs_utf8_dec(&str))) {
360 cpoint = xs_unicode_to_upper(cpoint);
361 s = xs_utf8_insert(s, cpoint, &offset);
362 }
363
364 return s;
365}
366
367
368xs_str *xs_utf8_to_lower(const char *str)
369{
370 xs_str *s = xs_str_new(NULL);
371 unsigned int cpoint;
372 int offset = 0;
373
374 while ((cpoint = xs_utf8_dec(&str))) {
375 cpoint = xs_unicode_to_lower(cpoint);
376 s = xs_utf8_insert(s, cpoint, &offset);
377 }
378
379 return s;
380}
381
382
383xs_str *xs_utf8_to_nfd(const char *str)
384{
385 xs_str *s = xs_str_new(NULL);
386 unsigned int cpoint;
387 int offset = 0;
388
389 while ((cpoint = xs_utf8_dec(&str))) {
390 unsigned int base;
391 unsigned int diac;
392
393 if (xs_unicode_nfd(cpoint, &base, &diac)) {
394 s = xs_utf8_insert(s, base, &offset);
395 s = xs_utf8_insert(s, diac, &offset);
396 }
397 else
398 s = xs_utf8_insert(s, cpoint, &offset);
399 }
400
401 return s;
402}
403
404
405xs_str *xs_utf8_to_nfc(const char *str)
406{
407 xs_str *s = xs_str_new(NULL);
408 unsigned int cpoint;
409 unsigned int base = 0;
410 int offset = 0;
411
412 while ((cpoint = xs_utf8_dec(&str))) {
413 if (xs_is_diacritic(cpoint)) {
414 if (xs_unicode_nfc(base, cpoint, &base))
415 continue;
416 }
417
418 if (base)
419 s = xs_utf8_insert(s, base, &offset);
420
421 base = cpoint;
422 }
423
424 if (base)
425 s = xs_utf8_insert(s, base, &offset);
426
427 return s;
428}
429
430#endif /* _XS_H */
431
320#endif /* _XS_UNICODE_TBL_H */ 432#endif /* _XS_UNICODE_TBL_H */
321 433
322#endif /* XS_IMPLEMENTATION */ 434#endif /* XS_IMPLEMENTATION */
diff --git a/xs_version.h b/xs_version.h
index 4318c7e..ce88558 100644
--- a/xs_version.h
+++ b/xs_version.h
@@ -1 +1 @@
/* c6eca9593f9b3d6791cba600e5950f682fdb36cf 2024-08-12T16:08:37+02:00 */ /* cc9ebd36ae640e4701277327fbba9996143076f6 2024-08-23T17:17:08+02:00 */