summaryrefslogtreecommitdiff
path: root/xs_unicode.h
diff options
context:
space:
mode:
Diffstat (limited to 'xs_unicode.h')
-rw-r--r--xs_unicode.h87
1 files changed, 68 insertions, 19 deletions
diff --git a/xs_unicode.h b/xs_unicode.h
index 6f78d58..2f081ad 100644
--- a/xs_unicode.h
+++ b/xs_unicode.h
@@ -5,42 +5,91 @@
5#define _XS_UNICODE_H 5#define _XS_UNICODE_H
6 6
7 xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint); 7 xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint);
8 char *xs_utf8_dec(const char *str, unsigned int *cpoint);
8 9
9 10
10#ifdef XS_IMPLEMENTATION 11#ifdef XS_IMPLEMENTATION
11 12
12/** utf-8 **/ 13
14char *_xs_utf8_enc(char buf[4], unsigned int cpoint)
15/* encodes an Unicode codepoint to utf-8 into buf and returns the new position */
16{
17 unsigned char *p = (unsigned char *)buf;
18
19 if (cpoint < 0x80) /* 1 byte char */
20 *p++ = cpoint & 0xff;
21 else {
22 if (cpoint < 0x800) /* 2 byte char */
23 *p++ = 0xc0 | (cpoint >> 6);
24 else {
25 if (cpoint < 0x10000) /* 3 byte char */
26 *p++ = 0xe0 | (cpoint >> 12);
27 else { /* 4 byte char */
28 *p++ = 0xf0 | (cpoint >> 18);
29 *p++ = 0x80 | ((cpoint >> 12) & 0x3f);
30 }
31
32 *p++ = 0x80 | ((cpoint >> 6) & 0x3f);
33 }
34
35 *p++ = 0x80 | (cpoint & 0x3f);
36 }
37
38 return (char *)p;
39}
40
13 41
14xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint) 42xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint)
15/* encodes an Unicode codepoint to utf8 */ 43/* encodes an Unicode codepoint to utf-8 into str */
16{ 44{
17 unsigned char tmp[4]; 45 char tmp[4], *p;
18 int n = 0; 46
47 p = _xs_utf8_enc(tmp, cpoint);
19 48
20 if (cpoint < 0x80) 49 return xs_append_m(str, tmp, p - tmp);
21 tmp[n++] = cpoint & 0xff; 50}
51
52
53char *xs_utf8_dec(const char *str, unsigned int *cpoint)
54/* decodes an utf-8 char inside str into cpoint and returns the next position */
55{
56 unsigned char *p = (unsigned char *)str;
57 int c = *p++;
58 int cb = 0;
59
60 if ((c & 0x80) == 0) { /* 1 byte char */
61 *cpoint = c;
62 }
22 else 63 else
23 if (cpoint < 0x800) { 64 if ((c & 0xe0) == 0xc0) { /* 2 byte char */
24 tmp[n++] = 0xc0 | (cpoint >> 6); 65 *cpoint = (c & 0x1f) << 6;
25 tmp[n++] = 0x80 | (cpoint & 0x3f); 66 cb = 1;
26 } 67 }
27 else 68 else
28 if (cpoint < 0x10000) { 69 if ((c & 0xf0) == 0xe0) { /* 3 byte char */
29 tmp[n++] = 0xe0 | (cpoint >> 12); 70 *cpoint = (c & 0x0f) << 12;
30 tmp[n++] = 0x80 | ((cpoint >> 6) & 0x3f); 71 cb = 2;
31 tmp[n++] = 0x80 | (cpoint & 0x3f);
32 } 72 }
33 else 73 else
34 if (cpoint < 0x200000) { 74 if ((c & 0xf8) == 0xf0) { /* 4 byte char */
35 tmp[n++] = 0xf0 | (cpoint >> 18); 75 *cpoint = (c & 0x07) << 18;
36 tmp[n++] = 0x80 | ((cpoint >> 12) & 0x3f); 76 cb = 3;
37 tmp[n++] = 0x80 | ((cpoint >> 6) & 0x3f); 77 }
38 tmp[n++] = 0x80 | (cpoint & 0x3f); 78
79 /* process the continuation bytes */
80 while (cb--) {
81 if ((*p & 0xc0) == 0x80)
82 *cpoint |= (*p++ & 0x3f) << (cb * 6);
83 else {
84 *cpoint = 0xfffd;
85 break;
86 }
39 } 87 }
40 88
41 return xs_append_m(str, (char *)tmp, n); 89 return (char *)p;
42} 90}
43 91
92
44#endif /* XS_IMPLEMENTATION */ 93#endif /* XS_IMPLEMENTATION */
45 94
46#endif /* _XS_UNICODE_H */ 95#endif /* _XS_UNICODE_H */