diff options
| -rw-r--r-- | src/CanonData.zig | 4 | ||||
| -rw-r--r-- | src/CaseData.zig | 6 | ||||
| -rw-r--r-- | src/CombiningData.zig | 4 | ||||
| -rw-r--r-- | src/CompatData.zig | 2 | ||||
| -rw-r--r-- | src/FoldData.zig | 10 | ||||
| -rw-r--r-- | src/GenCatData.zig | 2 | ||||
| -rw-r--r-- | src/GraphemeData.zig | 6 | ||||
| -rw-r--r-- | src/HangulData.zig | 2 | ||||
| -rw-r--r-- | src/NormPropsData.zig | 6 | ||||
| -rw-r--r-- | src/PropsData.zig | 24 | ||||
| -rw-r--r-- | src/WidthData.zig | 2 |
11 files changed, 35 insertions, 33 deletions
diff --git a/src/CanonData.zig b/src/CanonData.zig index be2b381..05b9017 100644 --- a/src/CanonData.zig +++ b/src/CanonData.zig | |||
| @@ -56,11 +56,11 @@ pub fn deinit(self: *Self) void { | |||
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | /// Returns canonical decomposition for `cp`. | 58 | /// Returns canonical decomposition for `cp`. |
| 59 | pub inline fn toNfd(self: Self, cp: u21) []const u21 { | 59 | pub fn toNfd(self: Self, cp: u21) []const u21 { |
| 60 | return self.nfd[cp]; | 60 | return self.nfd[cp]; |
| 61 | } | 61 | } |
| 62 | 62 | ||
| 63 | // Returns the primary composite for the codepoints in `cp`. | 63 | // Returns the primary composite for the codepoints in `cp`. |
| 64 | pub inline fn toNfc(self: Self, cps: [2]u21) ?u21 { | 64 | pub fn toNfc(self: Self, cps: [2]u21) ?u21 { |
| 65 | return self.nfc.get(cps); | 65 | return self.nfc.get(cps); |
| 66 | } | 66 | } |
diff --git a/src/CaseData.zig b/src/CaseData.zig index 260637a..3a35fdd 100644 --- a/src/CaseData.zig +++ b/src/CaseData.zig | |||
| @@ -81,7 +81,7 @@ pub fn deinit(self: *const Self) void { | |||
| 81 | } | 81 | } |
| 82 | 82 | ||
| 83 | // Returns true if `cp` is either upper, lower, or title case. | 83 | // Returns true if `cp` is either upper, lower, or title case. |
| 84 | pub inline fn isCased(self: Self, cp: u21) bool { | 84 | pub fn isCased(self: Self, cp: u21) bool { |
| 85 | return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; | 85 | return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; |
| 86 | } | 86 | } |
| 87 | 87 | ||
| @@ -109,7 +109,7 @@ test "isUpperStr" { | |||
| 109 | } | 109 | } |
| 110 | 110 | ||
| 111 | /// Returns uppercase mapping for `cp`. | 111 | /// Returns uppercase mapping for `cp`. |
| 112 | pub inline fn toUpper(self: Self, cp: u21) u21 { | 112 | pub fn toUpper(self: Self, cp: u21) u21 { |
| 113 | return self.case_map[cp][0]; | 113 | return self.case_map[cp][0]; |
| 114 | } | 114 | } |
| 115 | 115 | ||
| @@ -167,7 +167,7 @@ test "isLowerStr" { | |||
| 167 | } | 167 | } |
| 168 | 168 | ||
| 169 | /// Returns lowercase mapping for `cp`. | 169 | /// Returns lowercase mapping for `cp`. |
| 170 | pub inline fn toLower(self: Self, cp: u21) u21 { | 170 | pub fn toLower(self: Self, cp: u21) u21 { |
| 171 | return self.case_map[cp][1]; | 171 | return self.case_map[cp][1]; |
| 172 | } | 172 | } |
| 173 | 173 | ||
diff --git a/src/CombiningData.zig b/src/CombiningData.zig index 16b923f..44140f8 100644 --- a/src/CombiningData.zig +++ b/src/CombiningData.zig | |||
| @@ -39,11 +39,11 @@ pub fn deinit(self: *const Self) void { | |||
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | /// Returns the canonical combining class for a code point. | 41 | /// Returns the canonical combining class for a code point. |
| 42 | pub inline fn ccc(self: Self, cp: u21) u8 { | 42 | pub fn ccc(self: Self, cp: u21) u8 { |
| 43 | return self.s2[self.s1[cp >> 8] + (cp & 0xff)]; | 43 | return self.s2[self.s1[cp >> 8] + (cp & 0xff)]; |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | /// True if `cp` is a starter code point, not a combining character. | 46 | /// True if `cp` is a starter code point, not a combining character. |
| 47 | pub inline fn isStarter(self: Self, cp: u21) bool { | 47 | pub fn isStarter(self: Self, cp: u21) bool { |
| 48 | return self.s2[self.s1[cp >> 8] + (cp & 0xff)] == 0; | 48 | return self.s2[self.s1[cp >> 8] + (cp & 0xff)] == 0; |
| 49 | } | 49 | } |
diff --git a/src/CompatData.zig b/src/CompatData.zig index 3346a06..cf184a1 100644 --- a/src/CompatData.zig +++ b/src/CompatData.zig | |||
| @@ -45,6 +45,6 @@ pub fn deinit(self: *const Self) void { | |||
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | /// Returns compatibility decomposition for `cp`. | 47 | /// Returns compatibility decomposition for `cp`. |
| 48 | pub inline fn toNfkd(self: Self, cp: u21) []u21 { | 48 | pub fn toNfkd(self: Self, cp: u21) []u21 { |
| 49 | return self.nfkd[cp]; | 49 | return self.nfkd[cp]; |
| 50 | } | 50 | } |
diff --git a/src/FoldData.zig b/src/FoldData.zig index 892f78f..b7bbbd1 100644 --- a/src/FoldData.zig +++ b/src/FoldData.zig | |||
| @@ -3,9 +3,11 @@ const builtin = @import("builtin"); | |||
| 3 | const compress = std.compress; | 3 | const compress = std.compress; |
| 4 | const mem = std.mem; | 4 | const mem = std.mem; |
| 5 | 5 | ||
| 6 | const cwcf_max = 0x1e950; | ||
| 7 | |||
| 6 | allocator: mem.Allocator, | 8 | allocator: mem.Allocator, |
| 7 | cutoff: u21 = undefined, | 9 | cutoff: u21 = undefined, |
| 8 | cwcf: [0x10ffff]bool = [_]bool{false} ** 0x10ffff, | 10 | cwcf: [cwcf_max]bool = [_]bool{false} ** cwcf_max, |
| 9 | multiple_start: u21 = undefined, | 11 | multiple_start: u21 = undefined, |
| 10 | stage1: []u8 = undefined, | 12 | stage1: []u8 = undefined, |
| 11 | stage2: []u8 = undefined, | 13 | stage2: []u8 = undefined, |
| @@ -54,7 +56,7 @@ pub fn deinit(self: *const Self) void { | |||
| 54 | } | 56 | } |
| 55 | 57 | ||
| 56 | /// Returns the case fold for `cp`. | 58 | /// Returns the case fold for `cp`. |
| 57 | pub inline fn caseFold(self: Self, cp: u21, buf: []u21) []const u21 { | 59 | pub fn caseFold(self: Self, cp: u21, buf: []u21) []const u21 { |
| 58 | if (cp >= self.cutoff) return &.{}; | 60 | if (cp >= self.cutoff) return &.{}; |
| 59 | 61 | ||
| 60 | const stage1_val = self.stage1[cp >> 8]; | 62 | const stage1_val = self.stage1[cp >> 8]; |
| @@ -80,6 +82,6 @@ pub inline fn caseFold(self: Self, cp: u21, buf: []u21) []const u21 { | |||
| 80 | } | 82 | } |
| 81 | 83 | ||
| 82 | /// Returns true when caseFold(NFD(`cp`)) != NFD(`cp`). | 84 | /// Returns true when caseFold(NFD(`cp`)) != NFD(`cp`). |
| 83 | pub inline fn changesWhenCaseFolded(self: Self, cp: u21) bool { | 85 | pub fn changesWhenCaseFolded(self: Self, cp: u21) bool { |
| 84 | return self.cwcf[cp]; | 86 | return cp < cwcf_max and self.cwcf[cp]; |
| 85 | } | 87 | } |
diff --git a/src/GenCatData.zig b/src/GenCatData.zig index 454c45a..5ce9fb6 100644 --- a/src/GenCatData.zig +++ b/src/GenCatData.zig | |||
| @@ -80,7 +80,7 @@ pub fn deinit(self: *const Self) void { | |||
| 80 | } | 80 | } |
| 81 | 81 | ||
| 82 | /// Lookup the General Category for `cp`. | 82 | /// Lookup the General Category for `cp`. |
| 83 | pub inline fn gc(self: Self, cp: u21) Gc { | 83 | pub fn gc(self: Self, cp: u21) Gc { |
| 84 | return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]); | 84 | return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]); |
| 85 | } | 85 | } |
| 86 | 86 | ||
diff --git a/src/GraphemeData.zig b/src/GraphemeData.zig index 1710870..0ed79f2 100644 --- a/src/GraphemeData.zig +++ b/src/GraphemeData.zig | |||
| @@ -73,16 +73,16 @@ pub fn deinit(self: *const Self) void { | |||
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | /// Lookup the grapheme break property for a code point. | 75 | /// Lookup the grapheme break property for a code point. |
| 76 | pub inline fn gbp(self: Self, cp: u21) Gbp { | 76 | pub fn gbp(self: Self, cp: u21) Gbp { |
| 77 | return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 4); | 77 | return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 4); |
| 78 | } | 78 | } |
| 79 | 79 | ||
| 80 | /// Lookup the indic syllable type for a code point. | 80 | /// Lookup the indic syllable type for a code point. |
| 81 | pub inline fn indic(self: Self, cp: u21) Indic { | 81 | pub fn indic(self: Self, cp: u21) Indic { |
| 82 | return @enumFromInt((self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7); | 82 | return @enumFromInt((self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7); |
| 83 | } | 83 | } |
| 84 | 84 | ||
| 85 | /// Lookup the indic syllable type for a code point. | 85 | /// Lookup the indic syllable type for a code point. |
| 86 | pub inline fn isEmoji(self: Self, cp: u21) bool { | 86 | pub fn isEmoji(self: Self, cp: u21) bool { |
| 87 | return self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1; | 87 | return self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1; |
| 88 | } | 88 | } |
diff --git a/src/HangulData.zig b/src/HangulData.zig index 5eee427..048841d 100644 --- a/src/HangulData.zig +++ b/src/HangulData.zig | |||
| @@ -48,6 +48,6 @@ pub fn deinit(self: *const Self) void { | |||
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | /// Returns the Hangul syllable type for `cp`. | 50 | /// Returns the Hangul syllable type for `cp`. |
| 51 | pub inline fn syllable(self: Self, cp: u21) Syllable { | 51 | pub fn syllable(self: Self, cp: u21) Syllable { |
| 52 | return @enumFromInt(self.s2[self.s1[cp >> 8] + (cp & 0xff)]); | 52 | return @enumFromInt(self.s2[self.s1[cp >> 8] + (cp & 0xff)]); |
| 53 | } | 53 | } |
diff --git a/src/NormPropsData.zig b/src/NormPropsData.zig index 899bb8f..5861860 100644 --- a/src/NormPropsData.zig +++ b/src/NormPropsData.zig | |||
| @@ -39,16 +39,16 @@ pub fn deinit(self: *const Self) void { | |||
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | /// Returns true if `cp` is already in NFD form. | 41 | /// Returns true if `cp` is already in NFD form. |
| 42 | pub inline fn isNfd(self: Self, cp: u21) bool { | 42 | pub fn isNfd(self: Self, cp: u21) bool { |
| 43 | return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 1 == 0; | 43 | return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 1 == 0; |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | /// Returns true if `cp` is already in NFKD form. | 46 | /// Returns true if `cp` is already in NFKD form. |
| 47 | pub inline fn isNfkd(self: Self, cp: u21) bool { | 47 | pub fn isNfkd(self: Self, cp: u21) bool { |
| 48 | return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 2 == 0; | 48 | return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 2 == 0; |
| 49 | } | 49 | } |
| 50 | 50 | ||
| 51 | /// Returns true if `cp` is not allowed in any normalized form. | 51 | /// Returns true if `cp` is not allowed in any normalized form. |
| 52 | pub inline fn isFcx(self: Self, cp: u21) bool { | 52 | pub fn isFcx(self: Self, cp: u21) bool { |
| 53 | return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 4 == 4; | 53 | return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 4 == 4; |
| 54 | } | 54 | } |
diff --git a/src/PropsData.zig b/src/PropsData.zig index f6c8370..b77bf30 100644 --- a/src/PropsData.zig +++ b/src/PropsData.zig | |||
| @@ -81,62 +81,62 @@ pub fn deinit(self: *const Self) void { | |||
| 81 | } | 81 | } |
| 82 | 82 | ||
| 83 | /// True if `cp` is a mathematical symbol. | 83 | /// True if `cp` is a mathematical symbol. |
| 84 | pub inline fn isMath(self: Self, cp: u21) bool { | 84 | pub fn isMath(self: Self, cp: u21) bool { |
| 85 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; | 85 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; |
| 86 | } | 86 | } |
| 87 | 87 | ||
| 88 | /// True if `cp` is an alphabetic character. | 88 | /// True if `cp` is an alphabetic character. |
| 89 | pub inline fn isAlphabetic(self: Self, cp: u21) bool { | 89 | pub fn isAlphabetic(self: Self, cp: u21) bool { |
| 90 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; | 90 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; |
| 91 | } | 91 | } |
| 92 | 92 | ||
| 93 | /// True if `cp` is a valid identifier start character. | 93 | /// True if `cp` is a valid identifier start character. |
| 94 | pub inline fn isIdStart(self: Self, cp: u21) bool { | 94 | pub fn isIdStart(self: Self, cp: u21) bool { |
| 95 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; | 95 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; |
| 96 | } | 96 | } |
| 97 | 97 | ||
| 98 | /// True if `cp` is a valid identifier continuation character. | 98 | /// True if `cp` is a valid identifier continuation character. |
| 99 | pub inline fn isIdContinue(self: Self, cp: u21) bool { | 99 | pub fn isIdContinue(self: Self, cp: u21) bool { |
| 100 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8; | 100 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8; |
| 101 | } | 101 | } |
| 102 | 102 | ||
| 103 | /// True if `cp` is a valid extended identifier start character. | 103 | /// True if `cp` is a valid extended identifier start character. |
| 104 | pub inline fn isXidStart(self: Self, cp: u21) bool { | 104 | pub fn isXidStart(self: Self, cp: u21) bool { |
| 105 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16; | 105 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16; |
| 106 | } | 106 | } |
| 107 | 107 | ||
| 108 | /// True if `cp` is a valid extended identifier continuation character. | 108 | /// True if `cp` is a valid extended identifier continuation character. |
| 109 | pub inline fn isXidContinue(self: Self, cp: u21) bool { | 109 | pub fn isXidContinue(self: Self, cp: u21) bool { |
| 110 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32; | 110 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32; |
| 111 | } | 111 | } |
| 112 | 112 | ||
| 113 | /// True if `cp` is a whitespace character. | 113 | /// True if `cp` is a whitespace character. |
| 114 | pub inline fn isWhitespace(self: Self, cp: u21) bool { | 114 | pub fn isWhitespace(self: Self, cp: u21) bool { |
| 115 | return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; | 115 | return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; |
| 116 | } | 116 | } |
| 117 | 117 | ||
| 118 | /// True if `cp` is a hexadecimal digit. | 118 | /// True if `cp` is a hexadecimal digit. |
| 119 | pub inline fn isHexDigit(self: Self, cp: u21) bool { | 119 | pub fn isHexDigit(self: Self, cp: u21) bool { |
| 120 | return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; | 120 | return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; |
| 121 | } | 121 | } |
| 122 | 122 | ||
| 123 | /// True if `cp` is a diacritic mark. | 123 | /// True if `cp` is a diacritic mark. |
| 124 | pub inline fn isDiacritic(self: Self, cp: u21) bool { | 124 | pub fn isDiacritic(self: Self, cp: u21) bool { |
| 125 | return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; | 125 | return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; |
| 126 | } | 126 | } |
| 127 | 127 | ||
| 128 | /// True if `cp` is numeric. | 128 | /// True if `cp` is numeric. |
| 129 | pub inline fn isNumeric(self: Self, cp: u21) bool { | 129 | pub fn isNumeric(self: Self, cp: u21) bool { |
| 130 | return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; | 130 | return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; |
| 131 | } | 131 | } |
| 132 | 132 | ||
| 133 | /// True if `cp` is a digit. | 133 | /// True if `cp` is a digit. |
| 134 | pub inline fn isDigit(self: Self, cp: u21) bool { | 134 | pub fn isDigit(self: Self, cp: u21) bool { |
| 135 | return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; | 135 | return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; |
| 136 | } | 136 | } |
| 137 | 137 | ||
| 138 | /// True if `cp` is decimal. | 138 | /// True if `cp` is decimal. |
| 139 | pub inline fn isDecimal(self: Self, cp: u21) bool { | 139 | pub fn isDecimal(self: Self, cp: u21) bool { |
| 140 | return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; | 140 | return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; |
| 141 | } | 141 | } |
| 142 | 142 | ||
diff --git a/src/WidthData.zig b/src/WidthData.zig index cf31b7f..1f1ae6f 100644 --- a/src/WidthData.zig +++ b/src/WidthData.zig | |||
| @@ -52,7 +52,7 @@ pub fn deinit(self: *const Self) void { | |||
| 52 | /// 3, where BACKSPACE and DELETE return -1 and 3-em-dash returns 3. C0/C1 | 52 | /// 3, where BACKSPACE and DELETE return -1 and 3-em-dash returns 3. C0/C1 |
| 53 | /// control codes return 0. If `cjk` is true, ambiguous code points return 2, | 53 | /// control codes return 0. If `cjk` is true, ambiguous code points return 2, |
| 54 | /// otherwise they return 1. | 54 | /// otherwise they return 1. |
| 55 | pub inline fn codePointWidth(self: Self, cp: u21) i3 { | 55 | pub fn codePointWidth(self: Self, cp: u21) i3 { |
| 56 | return self.s2[self.s1[cp >> 8] + (cp & 0xff)]; | 56 | return self.s2[self.s1[cp >> 8] + (cp & 0xff)]; |
| 57 | } | 57 | } |
| 58 | 58 | ||