diff options
Diffstat (limited to 'xs_unicode.h')
| -rw-r--r-- | xs_unicode.h | 51 |
1 files changed, 44 insertions, 7 deletions
diff --git a/xs_unicode.h b/xs_unicode.h index 48cd660..35cd9f7 100644 --- a/xs_unicode.h +++ b/xs_unicode.h | |||
| @@ -4,8 +4,10 @@ | |||
| 4 | 4 | ||
| 5 | #define _XS_UNICODE_H | 5 | #define _XS_UNICODE_H |
| 6 | 6 | ||
| 7 | int _xs_utf8_enc(char buf[4], unsigned int cpoint); | ||
| 7 | xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint); | 8 | xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint); |
| 8 | unsigned int xs_utf8_dec(char **str); | 9 | unsigned int xs_utf8_dec(char **str); |
| 10 | int xs_unicode_width(unsigned int cpoint); | ||
| 9 | unsigned int *_xs_unicode_upper_search(unsigned int cpoint); | 11 | unsigned int *_xs_unicode_upper_search(unsigned int cpoint); |
| 10 | unsigned int *_xs_unicode_lower_search(unsigned int cpoint); | 12 | unsigned int *_xs_unicode_lower_search(unsigned int cpoint); |
| 11 | #define xs_unicode_is_upper(cpoint) (!!_xs_unicode_upper_search(cpoint)) | 13 | #define xs_unicode_is_upper(cpoint) (!!_xs_unicode_upper_search(cpoint)) |
| @@ -18,8 +20,8 @@ | |||
| 18 | #ifdef XS_IMPLEMENTATION | 20 | #ifdef XS_IMPLEMENTATION |
| 19 | 21 | ||
| 20 | 22 | ||
| 21 | char *_xs_utf8_enc(char buf[4], unsigned int cpoint) | 23 | int _xs_utf8_enc(char buf[4], unsigned int cpoint) |
| 22 | /* encodes an Unicode codepoint to utf-8 into buf and returns the new position */ | 24 | /* encodes an Unicode codepoint to utf-8 into buf and returns the size in bytes */ |
| 23 | { | 25 | { |
| 24 | unsigned char *p = (unsigned char *)buf; | 26 | unsigned char *p = (unsigned char *)buf; |
| 25 | 27 | ||
| @@ -42,18 +44,18 @@ char *_xs_utf8_enc(char buf[4], unsigned int cpoint) | |||
| 42 | *p++ = 0x80 | (cpoint & 0x3f); | 44 | *p++ = 0x80 | (cpoint & 0x3f); |
| 43 | } | 45 | } |
| 44 | 46 | ||
| 45 | return (char *)p; | 47 | return p - (unsigned char *)buf; |
| 46 | } | 48 | } |
| 47 | 49 | ||
| 48 | 50 | ||
| 49 | xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint) | 51 | xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint) |
| 50 | /* encodes an Unicode codepoint to utf-8 into str */ | 52 | /* encodes an Unicode codepoint to utf-8 into str */ |
| 51 | { | 53 | { |
| 52 | char tmp[4], *p; | 54 | char tmp[4]; |
| 53 | 55 | ||
| 54 | p = _xs_utf8_enc(tmp, cpoint); | 56 | int c = _xs_utf8_enc(tmp, cpoint); |
| 55 | 57 | ||
| 56 | return xs_append_m(str, tmp, p - tmp); | 58 | return xs_append_m(str, tmp, c); |
| 57 | } | 59 | } |
| 58 | 60 | ||
| 59 | 61 | ||
| @@ -99,9 +101,44 @@ unsigned int xs_utf8_dec(char **str) | |||
| 99 | } | 101 | } |
| 100 | 102 | ||
| 101 | 103 | ||
| 104 | /* intentionally dead simple */ | ||
| 105 | |||
| 106 | static unsigned int xs_unicode_width_table[] = { | ||
| 107 | 0x300, 0x36f, 0, /* diacritics */ | ||
| 108 | 0x1100, 0x11ff, 2, /* Hangul */ | ||
| 109 | 0x2e80, 0xa4cf, 2, /* CJK */ | ||
| 110 | 0xac00, 0xd7a3, 2, /* more Hangul */ | ||
| 111 | 0xe000, 0xf8ff, 0, /* private use */ | ||
| 112 | 0xf900, 0xfaff, 2, /* CJK compatibility */ | ||
| 113 | 0xff00, 0xff60, 2, /* full width things */ | ||
| 114 | 0xffdf, 0xffe6, 2, /* full width things */ | ||
| 115 | 0x1f200, 0x1ffff, 2, /* emojis */ | ||
| 116 | 0x20000, 0x2fffd, 2 /* more CJK */ | ||
| 117 | }; | ||
| 118 | |||
| 119 | int xs_unicode_width(unsigned int cpoint) | ||
| 120 | /* returns the width in columns of a Unicode codepoint (somewhat simplified) */ | ||
| 121 | { | ||
| 122 | unsigned int *p = xs_unicode_width_table; | ||
| 123 | unsigned int *e = p + sizeof(xs_unicode_width_table) / sizeof(unsigned int); | ||
| 124 | |||
| 125 | while (p < e) { | ||
| 126 | if (cpoint < p[0]) | ||
| 127 | return 1; | ||
| 128 | |||
| 129 | if (cpoint >= p[0] && cpoint <= p[1]) | ||
| 130 | return p[2]; | ||
| 131 | |||
| 132 | p += 3; | ||
| 133 | } | ||
| 134 | |||
| 135 | return 0; | ||
| 136 | } | ||
| 137 | |||
| 138 | |||
| 102 | #ifdef _XS_UNICODE_TBL_H | 139 | #ifdef _XS_UNICODE_TBL_H |
| 103 | 140 | ||
| 104 | /* include xs_unicode_tbl.h before to use these functions */ | 141 | /* include xs_unicode_tbl.h before this one to use these functions */ |
| 105 | 142 | ||
| 106 | static int int_cmp(const void *p1, const void *p2) | 143 | static int int_cmp(const void *p1, const void *p2) |
| 107 | { | 144 | { |