summaryrefslogtreecommitdiff
path: root/xs_unicode.h
diff options
context:
space:
mode:
Diffstat (limited to 'xs_unicode.h')
-rw-r--r--xs_unicode.h51
1 files changed, 44 insertions, 7 deletions
diff --git a/xs_unicode.h b/xs_unicode.h
index 48cd660..35cd9f7 100644
--- a/xs_unicode.h
+++ b/xs_unicode.h
@@ -4,8 +4,10 @@
4 4
5#define _XS_UNICODE_H 5#define _XS_UNICODE_H
6 6
7 int _xs_utf8_enc(char buf[4], unsigned int cpoint);
7 xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint); 8 xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint);
8 unsigned int xs_utf8_dec(char **str); 9 unsigned int xs_utf8_dec(char **str);
10 int xs_unicode_width(unsigned int cpoint);
9 unsigned int *_xs_unicode_upper_search(unsigned int cpoint); 11 unsigned int *_xs_unicode_upper_search(unsigned int cpoint);
10 unsigned int *_xs_unicode_lower_search(unsigned int cpoint); 12 unsigned int *_xs_unicode_lower_search(unsigned int cpoint);
11 #define xs_unicode_is_upper(cpoint) (!!_xs_unicode_upper_search(cpoint)) 13 #define xs_unicode_is_upper(cpoint) (!!_xs_unicode_upper_search(cpoint))
@@ -18,8 +20,8 @@
18#ifdef XS_IMPLEMENTATION 20#ifdef XS_IMPLEMENTATION
19 21
20 22
21char *_xs_utf8_enc(char buf[4], unsigned int cpoint) 23int _xs_utf8_enc(char buf[4], unsigned int cpoint)
22/* encodes an Unicode codepoint to utf-8 into buf and returns the new position */ 24/* encodes an Unicode codepoint to utf-8 into buf and returns the size in bytes */
23{ 25{
24 unsigned char *p = (unsigned char *)buf; 26 unsigned char *p = (unsigned char *)buf;
25 27
@@ -42,18 +44,18 @@ char *_xs_utf8_enc(char buf[4], unsigned int cpoint)
42 *p++ = 0x80 | (cpoint & 0x3f); 44 *p++ = 0x80 | (cpoint & 0x3f);
43 } 45 }
44 46
45 return (char *)p; 47 return p - (unsigned char *)buf;
46} 48}
47 49
48 50
49xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint) 51xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint)
50/* encodes an Unicode codepoint to utf-8 into str */ 52/* encodes an Unicode codepoint to utf-8 into str */
51{ 53{
52 char tmp[4], *p; 54 char tmp[4];
53 55
54 p = _xs_utf8_enc(tmp, cpoint); 56 int c = _xs_utf8_enc(tmp, cpoint);
55 57
56 return xs_append_m(str, tmp, p - tmp); 58 return xs_append_m(str, tmp, c);
57} 59}
58 60
59 61
@@ -99,9 +101,44 @@ unsigned int xs_utf8_dec(char **str)
99} 101}
100 102
101 103
104/* intentionally dead simple */
105
106static unsigned int xs_unicode_width_table[] = {
107 0x300, 0x36f, 0, /* diacritics */
108 0x1100, 0x11ff, 2, /* Hangul */
109 0x2e80, 0xa4cf, 2, /* CJK */
110 0xac00, 0xd7a3, 2, /* more Hangul */
111 0xe000, 0xf8ff, 0, /* private use */
112 0xf900, 0xfaff, 2, /* CJK compatibility */
113 0xff00, 0xff60, 2, /* full width things */
114 0xffdf, 0xffe6, 2, /* full width things */
115 0x1f200, 0x1ffff, 2, /* emojis */
116 0x20000, 0x2fffd, 2 /* more CJK */
117};
118
119int xs_unicode_width(unsigned int cpoint)
120/* returns the width in columns of a Unicode codepoint (somewhat simplified) */
121{
122 unsigned int *p = xs_unicode_width_table;
123 unsigned int *e = p + sizeof(xs_unicode_width_table) / sizeof(unsigned int);
124
125 while (p < e) {
126 if (cpoint < p[0])
127 return 1;
128
129 if (cpoint >= p[0] && cpoint <= p[1])
130 return p[2];
131
132 p += 3;
133 }
134
135 return 0;
136}
137
138
102#ifdef _XS_UNICODE_TBL_H 139#ifdef _XS_UNICODE_TBL_H
103 140
104/* include xs_unicode_tbl.h before to use these functions */ 141/* include xs_unicode_tbl.h before this one to use these functions */
105 142
106static int int_cmp(const void *p1, const void *p2) 143static int int_cmp(const void *p1, const void *p2)
107{ 144{