summaryrefslogtreecommitdiff
path: root/xs_unicode.h
diff options
context:
space:
mode:
Diffstat (limited to 'xs_unicode.h')
-rw-r--r--xs_unicode.h116
1 files changed, 114 insertions, 2 deletions
diff --git a/xs_unicode.h b/xs_unicode.h
index 2e9a754..a5a1dcb 100644
--- a/xs_unicode.h
+++ b/xs_unicode.h
@@ -9,6 +9,7 @@
9 unsigned int xs_utf8_dec(const char **str); 9 unsigned int xs_utf8_dec(const char **str);
10 int xs_unicode_width(unsigned int cpoint); 10 int xs_unicode_width(unsigned int cpoint);
11 int xs_is_surrogate(unsigned int cpoint); 11 int xs_is_surrogate(unsigned int cpoint);
12 int xs_is_diacritic(unsigned int cpoint);
12 unsigned int xs_surrogate_dec(unsigned int p1, unsigned int p2); 13 unsigned int xs_surrogate_dec(unsigned int p1, unsigned int p2);
13 unsigned int xs_surrogate_enc(unsigned int cpoint); 14 unsigned int xs_surrogate_enc(unsigned int cpoint);
14 unsigned int *_xs_unicode_upper_search(unsigned int cpoint); 15 unsigned int *_xs_unicode_upper_search(unsigned int cpoint);
@@ -22,7 +23,12 @@
22 int xs_unicode_is_alpha(unsigned int cpoint); 23 int xs_unicode_is_alpha(unsigned int cpoint);
23 24
24#ifdef _XS_H 25#ifdef _XS_H
26 xs_str *xs_utf8_insert(xs_str *str, unsigned int cpoint, int *offset);
25 xs_str *xs_utf8_cat(xs_str *str, unsigned int cpoint); 27 xs_str *xs_utf8_cat(xs_str *str, unsigned int cpoint);
28 xs_str *xs_utf8_to_upper(const char *str);
29 xs_str *xs_utf8_to_lower(const char *str);
30 xs_str *xs_utf8_to_nfd(const char *str);
31 xs_str *xs_utf8_to_nfc(const char *str);
26#endif 32#endif
27 33
28#ifdef XS_IMPLEMENTATION 34#ifdef XS_IMPLEMENTATION
@@ -144,6 +150,12 @@ int xs_unicode_width(unsigned int cpoint)
144} 150}
145 151
146 152
153int xs_is_diacritic(unsigned int cpoint)
154{
155 return cpoint >= 0x300 && cpoint <= 0x36f;
156}
157
158
147/** surrogate pairs **/ 159/** surrogate pairs **/
148 160
149int xs_is_surrogate(unsigned int cpoint) 161int xs_is_surrogate(unsigned int cpoint)
@@ -172,14 +184,27 @@ unsigned int xs_surrogate_enc(unsigned int cpoint)
172 184
173#ifdef _XS_H 185#ifdef _XS_H
174 186
175xs_str *xs_utf8_cat(xs_str *str, unsigned int cpoint) 187xs_str *xs_utf8_insert(xs_str *str, unsigned int cpoint, int *offset)
176/* encodes an Unicode codepoint to utf-8 into str */ 188/* encodes an Unicode codepoint to utf-8 into str */
177{ 189{
178 char tmp[4]; 190 char tmp[4];
179 191
180 int c = xs_utf8_enc(tmp, cpoint); 192 int c = xs_utf8_enc(tmp, cpoint);
181 193
182 return xs_append_m(str, tmp, c); 194 str = xs_insert_m(str, *offset, tmp, c);
195
196 *offset += c;
197
198 return str;
199}
200
201
202xs_str *xs_utf8_cat(xs_str *str, unsigned int cpoint)
203/* encodes an Unicode codepoint to utf-8 into str */
204{
205 int offset = strlen(str);
206
207 return xs_utf8_insert(str, cpoint, &offset);
183} 208}
184 209
185#endif /* _XS_H */ 210#endif /* _XS_H */
@@ -232,6 +257,9 @@ unsigned int *_xs_unicode_lower_search(unsigned int cpoint)
232unsigned int xs_unicode_to_lower(unsigned int cpoint) 257unsigned int xs_unicode_to_lower(unsigned int cpoint)
233/* returns the cpoint to lowercase */ 258/* returns the cpoint to lowercase */
234{ 259{
260 if (cpoint < 0x80)
261 return tolower(cpoint);
262
235 unsigned int *p = _xs_unicode_upper_search(cpoint); 263 unsigned int *p = _xs_unicode_upper_search(cpoint);
236 264
237 return p == NULL ? cpoint : p[1]; 265 return p == NULL ? cpoint : p[1];
@@ -241,6 +269,9 @@ unsigned int xs_unicode_to_lower(unsigned int cpoint)
241unsigned int xs_unicode_to_upper(unsigned int cpoint) 269unsigned int xs_unicode_to_upper(unsigned int cpoint)
242/* returns the cpoint to uppercase */ 270/* returns the cpoint to uppercase */
243{ 271{
272 if (cpoint < 0x80)
273 return toupper(cpoint);
274
244 unsigned int *p = _xs_unicode_lower_search(cpoint); 275 unsigned int *p = _xs_unicode_lower_search(cpoint);
245 276
246 return p == NULL ? cpoint : p[0]; 277 return p == NULL ? cpoint : p[0];
@@ -317,6 +348,87 @@ int xs_unicode_is_alpha(unsigned int cpoint)
317} 348}
318 349
319 350
351#ifdef _XS_H
352
353xs_str *xs_utf8_to_upper(const char *str)
354{
355 xs_str *s = xs_str_new(NULL);
356 unsigned int cpoint;
357 int offset = 0;
358
359 while ((cpoint = xs_utf8_dec(&str))) {
360 cpoint = xs_unicode_to_upper(cpoint);
361 s = xs_utf8_insert(s, cpoint, &offset);
362 }
363
364 return s;
365}
366
367
368xs_str *xs_utf8_to_lower(const char *str)
369{
370 xs_str *s = xs_str_new(NULL);
371 unsigned int cpoint;
372 int offset = 0;
373
374 while ((cpoint = xs_utf8_dec(&str))) {
375 cpoint = xs_unicode_to_lower(cpoint);
376 s = xs_utf8_insert(s, cpoint, &offset);
377 }
378
379 return s;
380}
381
382
383xs_str *xs_utf8_to_nfd(const char *str)
384{
385 xs_str *s = xs_str_new(NULL);
386 unsigned int cpoint;
387 int offset = 0;
388
389 while ((cpoint = xs_utf8_dec(&str))) {
390 unsigned int base;
391 unsigned int diac;
392
393 if (xs_unicode_nfd(cpoint, &base, &diac)) {
394 s = xs_utf8_insert(s, base, &offset);
395 s = xs_utf8_insert(s, diac, &offset);
396 }
397 else
398 s = xs_utf8_insert(s, cpoint, &offset);
399 }
400
401 return s;
402}
403
404
405xs_str *xs_utf8_to_nfc(const char *str)
406{
407 xs_str *s = xs_str_new(NULL);
408 unsigned int cpoint;
409 unsigned int base = 0;
410 int offset = 0;
411
412 while ((cpoint = xs_utf8_dec(&str))) {
413 if (xs_is_diacritic(cpoint)) {
414 if (xs_unicode_nfc(base, cpoint, &base))
415 continue;
416 }
417
418 if (base)
419 s = xs_utf8_insert(s, base, &offset);
420
421 base = cpoint;
422 }
423
424 if (base)
425 s = xs_utf8_insert(s, base, &offset);
426
427 return s;
428}
429
430#endif /* _XS_H */
431
320#endif /* _XS_UNICODE_TBL_H */ 432#endif /* _XS_UNICODE_TBL_H */
321 433
322#endif /* XS_IMPLEMENTATION */ 434#endif /* XS_IMPLEMENTATION */