diff options
| author | 2023-12-27 12:54:38 +0100 | |
|---|---|---|
| committer | 2023-12-27 12:54:38 +0100 | |
| commit | bf435af788d387b3d97fd744e3b1f6a73795beb8 (patch) | |
| tree | 6d193edd88ef3818bffd9278ddab0248e1108ef3 | |
| parent | Also log the job_fifo len in status.txt. (diff) | |
| download | snac2-bf435af788d387b3d97fd744e3b1f6a73795beb8.tar.gz snac2-bf435af788d387b3d97fd744e3b1f6a73795beb8.tar.xz snac2-bf435af788d387b3d97fd744e3b1f6a73795beb8.zip | |
Backport from xs.
Diffstat (limited to '')
| -rw-r--r-- | xs_hex.h | 138 | ||||
| -rw-r--r-- | xs_socket.h | 43 | ||||
| -rw-r--r-- | xs_unicode.h | 192 | ||||
| -rw-r--r-- | xs_version.h | 2 |
4 files changed, 247 insertions, 128 deletions
| @@ -4,65 +4,129 @@ | |||
| 4 | 4 | ||
| 5 | #define _XS_HEX_H | 5 | #define _XS_HEX_H |
| 6 | 6 | ||
| 7 | xs_str *xs_hex_enc(const xs_val *data, int size); | 7 | int xs_is_hex_digit(char str); |
| 8 | xs_val *xs_hex_dec(const xs_str *hex, int *size); | 8 | void xs_hex_enc_1(char **dst, const char **src); |
| 9 | int xs_is_hex(const char *str); | 9 | int xs_hex_dec_1(char **dst, const char **src); |
| 10 | char *_xs_hex_enc(char *dst, const char *src, int src_size); | ||
| 11 | char *_xs_hex_dec(char *dst, const char *src, int src_size); | ||
| 12 | |||
| 13 | #ifdef _XS_H | ||
| 14 | xs_str *xs_hex_enc(const xs_val *data, int size); | ||
| 15 | xs_val *xs_hex_dec(const xs_str *hex, int *size); | ||
| 16 | int xs_is_hex(const char *str); | ||
| 17 | #endif /* _XS_H */ | ||
| 18 | |||
| 10 | 19 | ||
| 11 | #ifdef XS_IMPLEMENTATION | 20 | #ifdef XS_IMPLEMENTATION |
| 12 | 21 | ||
| 22 | #include <string.h> | ||
| 23 | |||
| 13 | /** hex **/ | 24 | /** hex **/ |
| 14 | 25 | ||
| 15 | static char rev_hex_digits[] = "fedcba9876543210FEDCBA"; | 26 | static char rev_hex_digits[] = "fedcba9876543210FEDCBA"; |
| 16 | 27 | ||
| 17 | xs_str *xs_hex_enc(const xs_val *data, int size) | 28 | int xs_is_hex_digit(char str) |
| 18 | /* returns an hexdump of data */ | 29 | /* checks if the char is an hex digit */ |
| 19 | { | 30 | { |
| 20 | xs_str *s; | 31 | return strchr(rev_hex_digits, str) != NULL; |
| 21 | char *p; | 32 | } |
| 22 | int n; | ||
| 23 | 33 | ||
| 24 | p = s = xs_realloc(NULL, _xs_blk_size(size * 2 + 1)); | ||
| 25 | 34 | ||
| 26 | for (n = 0; n < size; n++) { | 35 | void xs_hex_enc_1(char **dst, const char **src) |
| 27 | *p++ = rev_hex_digits[0xf - (*data >> 4 & 0xf)]; | 36 | /* decodes one character into two hex digits */ |
| 28 | *p++ = rev_hex_digits[0xf - (*data & 0xf)]; | 37 | { |
| 29 | data++; | 38 | const char *i = *src; |
| 39 | char *o = *dst; | ||
| 40 | |||
| 41 | *o++ = rev_hex_digits[0xf - (*i >> 4 & 0xf)]; | ||
| 42 | *o++ = rev_hex_digits[0xf - (*i & 0xf)]; | ||
| 43 | |||
| 44 | *src = i + 1; | ||
| 45 | *dst = o; | ||
| 46 | } | ||
| 47 | |||
| 48 | |||
| 49 | int xs_hex_dec_1(char **dst, const char **src) | ||
| 50 | /* decodes two hex digits (returns 0 on error) */ | ||
| 51 | { | ||
| 52 | const char *i = *src; | ||
| 53 | char *o = *dst; | ||
| 54 | |||
| 55 | char *d1 = strchr(rev_hex_digits, *i++); | ||
| 56 | char *d2 = strchr(rev_hex_digits, *i++); | ||
| 57 | |||
| 58 | if (!d1 || !d2) { | ||
| 59 | /* decoding error */ | ||
| 60 | return 0; | ||
| 30 | } | 61 | } |
| 31 | 62 | ||
| 32 | *p = '\0'; | 63 | *o++ = (0xf - ((d1 - rev_hex_digits) & 0xf)) << 4 | |
| 64 | (0xf - ((d2 - rev_hex_digits) & 0xf)); | ||
| 33 | 65 | ||
| 34 | return s; | 66 | *src = i; |
| 67 | *dst = o; | ||
| 68 | return 1; | ||
| 35 | } | 69 | } |
| 36 | 70 | ||
| 37 | 71 | ||
| 38 | xs_val *xs_hex_dec(const xs_str *hex, int *size) | 72 | char *_xs_hex_enc(char *dst, const char *src, int src_size) |
| 39 | /* decodes an hexdump into data */ | 73 | /* hex-encodes the src buffer into dst, which has enough size */ |
| 40 | { | 74 | { |
| 41 | int sz = strlen(hex); | 75 | const char *e = src + src_size; |
| 42 | xs_val *s = NULL; | ||
| 43 | char *p; | ||
| 44 | int n; | ||
| 45 | 76 | ||
| 46 | if (sz % 2) | 77 | while (src < e) |
| 47 | return NULL; | 78 | xs_hex_enc_1(&dst, &src); |
| 48 | 79 | ||
| 49 | p = s = xs_realloc(NULL, _xs_blk_size(sz / 2 + 1)); | 80 | return dst; |
| 81 | } | ||
| 50 | 82 | ||
| 51 | for (n = 0; n < sz; n += 2) { | ||
| 52 | char *d1 = strchr(rev_hex_digits, *hex++); | ||
| 53 | char *d2 = strchr(rev_hex_digits, *hex++); | ||
| 54 | 83 | ||
| 55 | if (!d1 || !d2) { | 84 | char *_xs_hex_dec(char *dst, const char *src, int src_size) |
| 56 | /* decoding error */ | 85 | /* hex-decodes the src string int dst, which has enough size. |
| 57 | return xs_free(s); | 86 | return NULL on decoding errors or the final position of dst */ |
| 58 | } | 87 | { |
| 88 | if (src_size % 2) | ||
| 89 | return NULL; | ||
| 59 | 90 | ||
| 60 | *p++ = (0xf - ((d1 - rev_hex_digits) & 0xf)) << 4 | | 91 | const char *e = src + src_size; |
| 61 | (0xf - ((d2 - rev_hex_digits) & 0xf)); | 92 | |
| 93 | while (src < e) { | ||
| 94 | if (!xs_hex_dec_1(&dst, &src)) | ||
| 95 | return NULL; | ||
| 62 | } | 96 | } |
| 63 | 97 | ||
| 64 | *p = '\0'; | 98 | return dst; |
| 99 | } | ||
| 100 | |||
| 101 | |||
| 102 | #ifdef _XS_H | ||
| 103 | |||
| 104 | xs_str *xs_hex_enc(const xs_val *data, int size) | ||
| 105 | /* returns an hexdump of data */ | ||
| 106 | { | ||
| 107 | xs_str *s = xs_realloc(NULL, _xs_blk_size(size * 2 + 1)); | ||
| 108 | |||
| 109 | char *q = _xs_hex_enc(s, data, size); | ||
| 110 | |||
| 111 | *q = '\0'; | ||
| 112 | |||
| 113 | return s; | ||
| 114 | } | ||
| 115 | |||
| 116 | |||
| 117 | xs_val *xs_hex_dec(const xs_str *hex, int *size) | ||
| 118 | /* decodes an hexdump into data */ | ||
| 119 | { | ||
| 120 | int sz = strlen(hex); | ||
| 121 | xs_val *s = NULL; | ||
| 122 | |||
| 65 | *size = sz / 2; | 123 | *size = sz / 2; |
| 124 | s = xs_realloc(NULL, _xs_blk_size(*size + 1)); | ||
| 125 | |||
| 126 | if (!_xs_hex_dec(s, hex, sz)) | ||
| 127 | return xs_free(s); | ||
| 128 | |||
| 129 | s[*size] = '\0'; | ||
| 66 | 130 | ||
| 67 | return s; | 131 | return s; |
| 68 | } | 132 | } |
| @@ -71,14 +135,18 @@ xs_val *xs_hex_dec(const xs_str *hex, int *size) | |||
| 71 | int xs_is_hex(const char *str) | 135 | int xs_is_hex(const char *str) |
| 72 | /* returns 1 if str is an hex string */ | 136 | /* returns 1 if str is an hex string */ |
| 73 | { | 137 | { |
| 138 | if (strlen(str) % 2) | ||
| 139 | return 0; | ||
| 140 | |||
| 74 | while (*str) { | 141 | while (*str) { |
| 75 | if (strchr(rev_hex_digits, *str++) == NULL) | 142 | if (!xs_is_hex_digit(*str++)) |
| 76 | return 0; | 143 | return 0; |
| 77 | } | 144 | } |
| 78 | 145 | ||
| 79 | return 1; | 146 | return 1; |
| 80 | } | 147 | } |
| 81 | 148 | ||
| 149 | #endif /* _XS_H */ | ||
| 82 | 150 | ||
| 83 | #endif /* XS_IMPLEMENTATION */ | 151 | #endif /* XS_IMPLEMENTATION */ |
| 84 | 152 | ||
diff --git a/xs_socket.h b/xs_socket.h index eea2f2d..ab67a6b 100644 --- a/xs_socket.h +++ b/xs_socket.h | |||
| @@ -7,9 +7,13 @@ | |||
| 7 | int xs_socket_timeout(int s, double rto, double sto); | 7 | int xs_socket_timeout(int s, double rto, double sto); |
| 8 | int xs_socket_server(const char *addr, const char *serv); | 8 | int xs_socket_server(const char *addr, const char *serv); |
| 9 | FILE *xs_socket_accept(int rs); | 9 | FILE *xs_socket_accept(int rs); |
| 10 | xs_str *xs_socket_peername(int s); | 10 | int _xs_socket_peername(int s, char *buf, int buf_size); |
| 11 | int xs_socket_connect(const char *addr, const char *serv); | 11 | int xs_socket_connect(const char *addr, const char *serv); |
| 12 | 12 | ||
| 13 | #ifdef _XS_H | ||
| 14 | xs_str *xs_socket_peername(int s); | ||
| 15 | #endif | ||
| 16 | |||
| 13 | 17 | ||
| 14 | #ifdef XS_IMPLEMENTATION | 18 | #ifdef XS_IMPLEMENTATION |
| 15 | 19 | ||
| @@ -17,6 +21,9 @@ int xs_socket_connect(const char *addr, const char *serv); | |||
| 17 | #include <netdb.h> | 21 | #include <netdb.h> |
| 18 | #include <netinet/in.h> | 22 | #include <netinet/in.h> |
| 19 | #include <arpa/inet.h> | 23 | #include <arpa/inet.h> |
| 24 | #include <string.h> | ||
| 25 | #include <stdlib.h> | ||
| 26 | #include <unistd.h> | ||
| 20 | 27 | ||
| 21 | 28 | ||
| 22 | int xs_socket_timeout(int s, double rto, double sto) | 29 | int xs_socket_timeout(int s, double rto, double sto) |
| @@ -100,34 +107,28 @@ FILE *xs_socket_accept(int rs) | |||
| 100 | } | 107 | } |
| 101 | 108 | ||
| 102 | 109 | ||
| 103 | xs_str *xs_socket_peername(int s) | 110 | int _xs_socket_peername(int s, char *buf, int buf_size) |
| 104 | /* returns the remote address as a string */ | 111 | /* fill the buffer with the socket peername */ |
| 105 | { | 112 | { |
| 106 | xs_str *ip = NULL; | ||
| 107 | struct sockaddr_storage addr; | 113 | struct sockaddr_storage addr; |
| 108 | socklen_t slen = sizeof(addr); | 114 | socklen_t slen = sizeof(addr); |
| 115 | const char *p = NULL; | ||
| 109 | 116 | ||
| 110 | if (getpeername(s, (struct sockaddr *)&addr, &slen) != -1) { | 117 | if (getpeername(s, (struct sockaddr *)&addr, &slen) != -1) { |
| 111 | char buf[1024]; | ||
| 112 | const char *p = NULL; | ||
| 113 | |||
| 114 | if (addr.ss_family == AF_INET) { | 118 | if (addr.ss_family == AF_INET) { |
| 115 | struct sockaddr_in *sa = (struct sockaddr_in *)&addr; | 119 | struct sockaddr_in *sa = (struct sockaddr_in *)&addr; |
| 116 | 120 | ||
| 117 | p = inet_ntop(AF_INET, &sa->sin_addr, buf, sizeof(buf)); | 121 | p = inet_ntop(AF_INET, &sa->sin_addr, buf, buf_size); |
| 118 | } | 122 | } |
| 119 | else | 123 | else |
| 120 | if (addr.ss_family == AF_INET6) { | 124 | if (addr.ss_family == AF_INET6) { |
| 121 | struct sockaddr_in6 *sa = (struct sockaddr_in6 *)&addr; | 125 | struct sockaddr_in6 *sa = (struct sockaddr_in6 *)&addr; |
| 122 | 126 | ||
| 123 | p = inet_ntop(AF_INET6, &sa->sin6_addr, buf, sizeof(buf)); | 127 | p = inet_ntop(AF_INET6, &sa->sin6_addr, buf, buf_size); |
| 124 | } | 128 | } |
| 125 | |||
| 126 | if (p != NULL) | ||
| 127 | ip = xs_str_new(p); | ||
| 128 | } | 129 | } |
| 129 | 130 | ||
| 130 | return ip; | 131 | return p != NULL; |
| 131 | } | 132 | } |
| 132 | 133 | ||
| 133 | 134 | ||
| @@ -195,6 +196,22 @@ int xs_socket_connect(const char *addr, const char *serv) | |||
| 195 | } | 196 | } |
| 196 | 197 | ||
| 197 | 198 | ||
| 199 | #ifdef _XS_H | ||
| 200 | |||
| 201 | xs_str *xs_socket_peername(int s) | ||
| 202 | /* returns the remote address as a string */ | ||
| 203 | { | ||
| 204 | char buf[2028]; | ||
| 205 | xs_str *p = NULL; | ||
| 206 | |||
| 207 | if (_xs_socket_peername(s, buf, sizeof(buf))) | ||
| 208 | p = xs_str_new(buf); | ||
| 209 | |||
| 210 | return p; | ||
| 211 | } | ||
| 212 | |||
| 213 | #endif /* _XS_H */ | ||
| 214 | |||
| 198 | #endif /* XS_IMPLEMENTATION */ | 215 | #endif /* XS_IMPLEMENTATION */ |
| 199 | 216 | ||
| 200 | #endif /* _XS_SOCKET_H */ | 217 | #endif /* _XS_SOCKET_H */ |
diff --git a/xs_unicode.h b/xs_unicode.h index f5880f0..c666479 100644 --- a/xs_unicode.h +++ b/xs_unicode.h | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | #define _XS_UNICODE_H | 5 | #define _XS_UNICODE_H |
| 6 | 6 | ||
| 7 | int _xs_utf8_enc(char buf[4], unsigned int cpoint); | 7 | int _xs_utf8_enc(char buf[4], unsigned int cpoint); |
| 8 | xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint); | ||
| 9 | unsigned int xs_utf8_dec(char **str); | 8 | unsigned int xs_utf8_dec(char **str); |
| 10 | int xs_unicode_width(unsigned int cpoint); | 9 | int xs_unicode_width(unsigned int cpoint); |
| 11 | int xs_is_surrogate(unsigned int cpoint); | 10 | int xs_is_surrogate(unsigned int cpoint); |
| @@ -21,13 +20,20 @@ | |||
| 21 | int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint); | 20 | int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint); |
| 22 | int xs_unicode_is_alpha(unsigned int cpoint); | 21 | int xs_unicode_is_alpha(unsigned int cpoint); |
| 23 | 22 | ||
| 23 | #ifdef _XS_H | ||
| 24 | xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint); | ||
| 25 | #endif | ||
| 26 | |||
| 24 | #ifdef XS_IMPLEMENTATION | 27 | #ifdef XS_IMPLEMENTATION |
| 25 | 28 | ||
| 29 | #ifndef countof | ||
| 30 | #define countof(a) (sizeof((a)) / sizeof((*a))) | ||
| 31 | #endif | ||
| 26 | 32 | ||
| 27 | int _xs_utf8_enc(char buf[4], unsigned int cpoint) | 33 | int _xs_utf8_enc(char buf[4], unsigned int cpoint) |
| 28 | /* encodes an Unicode codepoint to utf-8 into buf and returns the size in bytes */ | 34 | /* encodes an Unicode codepoint to utf-8 into buf and returns the size in bytes */ |
| 29 | { | 35 | { |
| 30 | unsigned char *p = (unsigned char *)buf; | 36 | char *p = buf; |
| 31 | 37 | ||
| 32 | if (cpoint < 0x80) /* 1 byte char */ | 38 | if (cpoint < 0x80) /* 1 byte char */ |
| 33 | *p++ = cpoint & 0xff; | 39 | *p++ = cpoint & 0xff; |
| @@ -48,27 +54,16 @@ int _xs_utf8_enc(char buf[4], unsigned int cpoint) | |||
| 48 | *p++ = 0x80 | (cpoint & 0x3f); | 54 | *p++ = 0x80 | (cpoint & 0x3f); |
| 49 | } | 55 | } |
| 50 | 56 | ||
| 51 | return p - (unsigned char *)buf; | 57 | return p - buf; |
| 52 | } | ||
| 53 | |||
| 54 | |||
| 55 | xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint) | ||
| 56 | /* encodes an Unicode codepoint to utf-8 into str */ | ||
| 57 | { | ||
| 58 | char tmp[4]; | ||
| 59 | |||
| 60 | int c = _xs_utf8_enc(tmp, cpoint); | ||
| 61 | |||
| 62 | return xs_append_m(str, tmp, c); | ||
| 63 | } | 58 | } |
| 64 | 59 | ||
| 65 | 60 | ||
| 66 | unsigned int xs_utf8_dec(char **str) | 61 | unsigned int xs_utf8_dec(char **str) |
| 67 | /* decodes an utf-8 char inside str and updates the pointer */ | 62 | /* decodes an utf-8 char inside str and updates the pointer */ |
| 68 | { | 63 | { |
| 69 | unsigned char *p = (unsigned char *)*str; | 64 | char *p = *str; |
| 70 | unsigned int cpoint = 0; | 65 | unsigned int cpoint = 0; |
| 71 | int c = *p++; | 66 | unsigned char c = *p++; |
| 72 | int cb = 0; | 67 | int cb = 0; |
| 73 | 68 | ||
| 74 | if ((c & 0x80) == 0) { /* 1 byte char */ | 69 | if ((c & 0x80) == 0) { /* 1 byte char */ |
| @@ -91,30 +86,19 @@ unsigned int xs_utf8_dec(char **str) | |||
| 91 | } | 86 | } |
| 92 | 87 | ||
| 93 | /* process the continuation bytes */ | 88 | /* process the continuation bytes */ |
| 94 | while (cb--) { | 89 | while (cb > 0 && *p && (*p & 0xc0) == 0x80) |
| 95 | if ((*p & 0xc0) == 0x80) | 90 | cpoint |= (*p++ & 0x3f) << (--cb * 6); |
| 96 | cpoint |= (*p++ & 0x3f) << (cb * 6); | ||
| 97 | else { | ||
| 98 | cpoint = 0xfffd; | ||
| 99 | break; | ||
| 100 | } | ||
| 101 | } | ||
| 102 | 91 | ||
| 103 | *str = (char *)p; | 92 | /* incomplete or broken? */ |
| 104 | return cpoint; | 93 | if (cb) |
| 105 | } | 94 | cpoint = 0xfffd; |
| 106 | |||
| 107 | |||
| 108 | static int int_range_cmp(const void *p1, const void *p2) | ||
| 109 | { | ||
| 110 | const unsigned int *a = p1; | ||
| 111 | const unsigned int *b = p2; | ||
| 112 | 95 | ||
| 113 | return *a < b[0] ? -1 : *a > b[1] ? 1 : 0; | 96 | *str = p; |
| 97 | return cpoint; | ||
| 114 | } | 98 | } |
| 115 | 99 | ||
| 116 | 100 | ||
| 117 | /* intentionally dead simple */ | 101 | /** Unicode character width: intentionally dead simple **/ |
| 118 | 102 | ||
| 119 | static unsigned int xs_unicode_width_table[] = { | 103 | static unsigned int xs_unicode_width_table[] = { |
| 120 | 0x300, 0x36f, 0, /* diacritics */ | 104 | 0x300, 0x36f, 0, /* diacritics */ |
| @@ -132,12 +116,23 @@ static unsigned int xs_unicode_width_table[] = { | |||
| 132 | int xs_unicode_width(unsigned int cpoint) | 116 | int xs_unicode_width(unsigned int cpoint) |
| 133 | /* returns the width in columns of a Unicode codepoint (somewhat simplified) */ | 117 | /* returns the width in columns of a Unicode codepoint (somewhat simplified) */ |
| 134 | { | 118 | { |
| 135 | unsigned int *r = bsearch(&cpoint, xs_unicode_width_table, | 119 | int b = 0; |
| 136 | sizeof(xs_unicode_width_table) / (sizeof(unsigned int) * 3), | 120 | int t = countof(xs_unicode_width_table) / 3 - 1; |
| 137 | sizeof(unsigned int) * 3, | 121 | |
| 138 | int_range_cmp); | 122 | while (t >= b) { |
| 123 | int n = (b + t) / 2; | ||
| 124 | unsigned int *p = &xs_unicode_width_table[n * 3]; | ||
| 125 | |||
| 126 | if (cpoint < p[0]) | ||
| 127 | t = n - 1; | ||
| 128 | else | ||
| 129 | if (cpoint > p[1]) | ||
| 130 | b = n + 1; | ||
| 131 | else | ||
| 132 | return p[2]; | ||
| 133 | } | ||
| 139 | 134 | ||
| 140 | return r ? r[2] : 1; | 135 | return 1; |
| 141 | } | 136 | } |
| 142 | 137 | ||
| 143 | 138 | ||
| @@ -167,38 +162,56 @@ unsigned int xs_surrogate_enc(unsigned int cpoint) | |||
| 167 | } | 162 | } |
| 168 | 163 | ||
| 169 | 164 | ||
| 170 | #ifdef _XS_UNICODE_TBL_H | 165 | #ifdef _XS_H |
| 171 | |||
| 172 | /* include xs_unicode_tbl.h before this one to use these functions */ | ||
| 173 | 166 | ||
| 174 | static int int_cmp(const void *p1, const void *p2) | 167 | xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint) |
| 168 | /* encodes an Unicode codepoint to utf-8 into str */ | ||
| 175 | { | 169 | { |
| 176 | const unsigned int *a = p1; | 170 | char tmp[4]; |
| 177 | const unsigned int *b = p2; | 171 | |
| 172 | int c = _xs_utf8_enc(tmp, cpoint); | ||
| 178 | 173 | ||
| 179 | return *a < *b ? -1 : *a > *b ? 1 : 0; | 174 | return xs_append_m(str, tmp, c); |
| 180 | } | 175 | } |
| 181 | 176 | ||
| 177 | #endif /* _XS_H */ | ||
| 178 | |||
| 179 | |||
| 180 | #ifdef _XS_UNICODE_TBL_H | ||
| 181 | |||
| 182 | /* include xs_unicode_tbl.h before this one to use these functions */ | ||
| 182 | 183 | ||
| 183 | unsigned int *_xs_unicode_upper_search(unsigned int cpoint) | 184 | unsigned int *_xs_unicode_upper_search(unsigned int cpoint) |
| 184 | /* searches for an uppercase codepoint in the case fold table */ | 185 | /* searches for an uppercase codepoint in the case fold table */ |
| 185 | { | 186 | { |
| 186 | return bsearch(&cpoint, xs_unicode_case_fold_table, | 187 | int b = 0; |
| 187 | sizeof(xs_unicode_case_fold_table) / (sizeof(unsigned int) * 2), | 188 | int t = countof(xs_unicode_case_fold_table) / 2 + 1; |
| 188 | sizeof(unsigned int) * 2, | 189 | |
| 189 | int_cmp); | 190 | while (t >= b) { |
| 191 | int n = (b + t) / 2; | ||
| 192 | unsigned int *p = &xs_unicode_case_fold_table[n * 2]; | ||
| 193 | |||
| 194 | if (cpoint < p[0]) | ||
| 195 | t = n - 1; | ||
| 196 | else | ||
| 197 | if (cpoint > p[0]) | ||
| 198 | b = n + 1; | ||
| 199 | else | ||
| 200 | return p; | ||
| 201 | } | ||
| 202 | |||
| 203 | return NULL; | ||
| 190 | } | 204 | } |
| 191 | 205 | ||
| 192 | 206 | ||
| 193 | unsigned int *_xs_unicode_lower_search(unsigned int cpoint) | 207 | unsigned int *_xs_unicode_lower_search(unsigned int cpoint) |
| 194 | /* searches for a lowercase codepoint in the case fold table */ | 208 | /* searches for a lowercase codepoint in the case fold table */ |
| 195 | { | 209 | { |
| 196 | unsigned int *p = xs_unicode_case_fold_table + 1; | 210 | unsigned int *p = xs_unicode_case_fold_table; |
| 197 | unsigned int *e = xs_unicode_case_fold_table + | 211 | unsigned int *e = p + countof(xs_unicode_case_fold_table); |
| 198 | sizeof(xs_unicode_case_fold_table) / sizeof(unsigned int); | ||
| 199 | 212 | ||
| 200 | while (p < e) { | 213 | while (p < e) { |
| 201 | if (cpoint == *p) | 214 | if (cpoint == p[1]) |
| 202 | return p; | 215 | return p; |
| 203 | 216 | ||
| 204 | p += 2; | 217 | p += 2; |
| @@ -208,38 +221,49 @@ unsigned int *_xs_unicode_lower_search(unsigned int cpoint) | |||
| 208 | } | 221 | } |
| 209 | 222 | ||
| 210 | 223 | ||
| 211 | unsigned int xs_unicode_to_upper(unsigned int cpoint) | 224 | unsigned int xs_unicode_to_lower(unsigned int cpoint) |
| 212 | /* returns the cpoint to uppercase */ | 225 | /* returns the cpoint to lowercase */ |
| 213 | { | 226 | { |
| 214 | unsigned int *p = _xs_unicode_lower_search(cpoint); | 227 | unsigned int *p = _xs_unicode_upper_search(cpoint); |
| 215 | 228 | ||
| 216 | return p == NULL ? cpoint : p[-1]; | 229 | return p == NULL ? cpoint : p[1]; |
| 217 | } | 230 | } |
| 218 | 231 | ||
| 219 | 232 | ||
| 220 | unsigned int xs_unicode_to_lower(unsigned int cpoint) | 233 | unsigned int xs_unicode_to_upper(unsigned int cpoint) |
| 221 | /* returns the cpoint to lowercase */ | 234 | /* returns the cpoint to uppercase */ |
| 222 | { | 235 | { |
| 223 | unsigned int *p = _xs_unicode_upper_search(cpoint); | 236 | unsigned int *p = _xs_unicode_lower_search(cpoint); |
| 224 | 237 | ||
| 225 | return p == NULL ? cpoint : p[1]; | 238 | return p == NULL ? cpoint : p[0]; |
| 226 | } | 239 | } |
| 227 | 240 | ||
| 228 | 241 | ||
| 229 | int xs_unicode_nfd(unsigned int cpoint, unsigned int *base, unsigned int *diac) | 242 | int xs_unicode_nfd(unsigned int cpoint, unsigned int *base, unsigned int *diac) |
| 230 | /* applies unicode Normalization Form D */ | 243 | /* applies unicode Normalization Form D */ |
| 231 | { | 244 | { |
| 232 | unsigned int *r = bsearch(&cpoint, xs_unicode_nfd_table, | 245 | int b = 0; |
| 233 | sizeof(xs_unicode_nfd_table) / (sizeof(unsigned int) * 3), | 246 | int t = countof(xs_unicode_nfd_table) / 3 - 1; |
| 234 | sizeof(unsigned int) * 3, | 247 | |
| 235 | int_cmp); | 248 | while (t >= b) { |
| 236 | 249 | int n = (b + t) / 2; | |
| 237 | if (r != NULL) { | 250 | unsigned int *p = &xs_unicode_nfd_table[n * 3]; |
| 238 | *base = r[1]; | 251 | |
| 239 | *diac = r[2]; | 252 | int c = cpoint - p[0]; |
| 253 | |||
| 254 | if (c < 0) | ||
| 255 | t = n - 1; | ||
| 256 | else | ||
| 257 | if (c > 0) | ||
| 258 | b = n + 1; | ||
| 259 | else { | ||
| 260 | *base = p[1]; | ||
| 261 | *diac = p[2]; | ||
| 262 | return 1; | ||
| 263 | } | ||
| 240 | } | 264 | } |
| 241 | 265 | ||
| 242 | return !!r; | 266 | return 0; |
| 243 | } | 267 | } |
| 244 | 268 | ||
| 245 | 269 | ||
| @@ -247,8 +271,7 @@ int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint) | |||
| 247 | /* applies unicode Normalization Form C */ | 271 | /* applies unicode Normalization Form C */ |
| 248 | { | 272 | { |
| 249 | unsigned int *p = xs_unicode_nfd_table; | 273 | unsigned int *p = xs_unicode_nfd_table; |
| 250 | unsigned int *e = xs_unicode_nfd_table + | 274 | unsigned int *e = p + countof(xs_unicode_nfd_table); |
| 251 | sizeof(xs_unicode_nfd_table) / sizeof(unsigned int); | ||
| 252 | 275 | ||
| 253 | while (p < e) { | 276 | while (p < e) { |
| 254 | if (p[1] == base && p[2] == diac) { | 277 | if (p[1] == base && p[2] == diac) { |
| @@ -266,12 +289,23 @@ int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint) | |||
| 266 | int xs_unicode_is_alpha(unsigned int cpoint) | 289 | int xs_unicode_is_alpha(unsigned int cpoint) |
| 267 | /* checks if a codepoint is an alpha (i.e. a letter) */ | 290 | /* checks if a codepoint is an alpha (i.e. a letter) */ |
| 268 | { | 291 | { |
| 269 | unsigned int *r = bsearch(&cpoint, xs_unicode_alpha_table, | 292 | int b = 0; |
| 270 | sizeof(xs_unicode_alpha_table) / (sizeof(unsigned int) * 2), | 293 | int t = countof(xs_unicode_alpha_table) / 2 - 1; |
| 271 | sizeof(unsigned int) * 2, | 294 | |
| 272 | int_range_cmp); | 295 | while (t >= b) { |
| 296 | int n = (b + t) / 2; | ||
| 297 | unsigned int *p = &xs_unicode_alpha_table[n * 2]; | ||
| 298 | |||
| 299 | if (cpoint < p[0]) | ||
| 300 | t = n - 1; | ||
| 301 | else | ||
| 302 | if (cpoint > p[1]) | ||
| 303 | b = n + 1; | ||
| 304 | else | ||
| 305 | return 1; | ||
| 306 | } | ||
| 273 | 307 | ||
| 274 | return !!r; | 308 | return 0; |
| 275 | } | 309 | } |
| 276 | 310 | ||
| 277 | 311 | ||
diff --git a/xs_version.h b/xs_version.h index f25a017..4f4dc13 100644 --- a/xs_version.h +++ b/xs_version.h | |||
| @@ -1 +1 @@ | |||
| /* 3582ff265e19407df1d532eb1d90c372fe22ca62 2023-12-08T06:10:40+01:00 */ | /* fd50c72456b717bb235eec8fe5f712da5f695f2b 2023-12-27T12:51:14+01:00 */ | ||