summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/CanonData.zig4
-rw-r--r--src/CaseData.zig6
-rw-r--r--src/CombiningData.zig4
-rw-r--r--src/CompatData.zig2
-rw-r--r--src/FoldData.zig10
-rw-r--r--src/GenCatData.zig2
-rw-r--r--src/GraphemeData.zig6
-rw-r--r--src/HangulData.zig2
-rw-r--r--src/NormPropsData.zig6
-rw-r--r--src/PropsData.zig24
-rw-r--r--src/WidthData.zig2
11 files changed, 35 insertions, 33 deletions
diff --git a/src/CanonData.zig b/src/CanonData.zig
index be2b381..05b9017 100644
--- a/src/CanonData.zig
+++ b/src/CanonData.zig
@@ -56,11 +56,11 @@ pub fn deinit(self: *Self) void {
56} 56}
57 57
58/// Returns canonical decomposition for `cp`. 58/// Returns canonical decomposition for `cp`.
59pub inline fn toNfd(self: Self, cp: u21) []const u21 { 59pub fn toNfd(self: Self, cp: u21) []const u21 {
60 return self.nfd[cp]; 60 return self.nfd[cp];
61} 61}
62 62
63// Returns the primary composite for the codepoints in `cp`. 63// Returns the primary composite for the codepoints in `cp`.
64pub inline fn toNfc(self: Self, cps: [2]u21) ?u21 { 64pub fn toNfc(self: Self, cps: [2]u21) ?u21 {
65 return self.nfc.get(cps); 65 return self.nfc.get(cps);
66} 66}
diff --git a/src/CaseData.zig b/src/CaseData.zig
index 260637a..3a35fdd 100644
--- a/src/CaseData.zig
+++ b/src/CaseData.zig
@@ -81,7 +81,7 @@ pub fn deinit(self: *const Self) void {
81} 81}
82 82
83// Returns true if `cp` is either upper, lower, or title case. 83// Returns true if `cp` is either upper, lower, or title case.
84pub inline fn isCased(self: Self, cp: u21) bool { 84pub fn isCased(self: Self, cp: u21) bool {
85 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; 85 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
86} 86}
87 87
@@ -109,7 +109,7 @@ test "isUpperStr" {
109} 109}
110 110
111/// Returns uppercase mapping for `cp`. 111/// Returns uppercase mapping for `cp`.
112pub inline fn toUpper(self: Self, cp: u21) u21 { 112pub fn toUpper(self: Self, cp: u21) u21 {
113 return self.case_map[cp][0]; 113 return self.case_map[cp][0];
114} 114}
115 115
@@ -167,7 +167,7 @@ test "isLowerStr" {
167} 167}
168 168
169/// Returns lowercase mapping for `cp`. 169/// Returns lowercase mapping for `cp`.
170pub inline fn toLower(self: Self, cp: u21) u21 { 170pub fn toLower(self: Self, cp: u21) u21 {
171 return self.case_map[cp][1]; 171 return self.case_map[cp][1];
172} 172}
173 173
diff --git a/src/CombiningData.zig b/src/CombiningData.zig
index 16b923f..44140f8 100644
--- a/src/CombiningData.zig
+++ b/src/CombiningData.zig
@@ -39,11 +39,11 @@ pub fn deinit(self: *const Self) void {
39} 39}
40 40
41/// Returns the canonical combining class for a code point. 41/// Returns the canonical combining class for a code point.
42pub inline fn ccc(self: Self, cp: u21) u8 { 42pub fn ccc(self: Self, cp: u21) u8 {
43 return self.s2[self.s1[cp >> 8] + (cp & 0xff)]; 43 return self.s2[self.s1[cp >> 8] + (cp & 0xff)];
44} 44}
45 45
46/// True if `cp` is a starter code point, not a combining character. 46/// True if `cp` is a starter code point, not a combining character.
47pub inline fn isStarter(self: Self, cp: u21) bool { 47pub fn isStarter(self: Self, cp: u21) bool {
48 return self.s2[self.s1[cp >> 8] + (cp & 0xff)] == 0; 48 return self.s2[self.s1[cp >> 8] + (cp & 0xff)] == 0;
49} 49}
diff --git a/src/CompatData.zig b/src/CompatData.zig
index 3346a06..cf184a1 100644
--- a/src/CompatData.zig
+++ b/src/CompatData.zig
@@ -45,6 +45,6 @@ pub fn deinit(self: *const Self) void {
45} 45}
46 46
47/// Returns compatibility decomposition for `cp`. 47/// Returns compatibility decomposition for `cp`.
48pub inline fn toNfkd(self: Self, cp: u21) []u21 { 48pub fn toNfkd(self: Self, cp: u21) []u21 {
49 return self.nfkd[cp]; 49 return self.nfkd[cp];
50} 50}
diff --git a/src/FoldData.zig b/src/FoldData.zig
index 892f78f..b7bbbd1 100644
--- a/src/FoldData.zig
+++ b/src/FoldData.zig
@@ -3,9 +3,11 @@ const builtin = @import("builtin");
3const compress = std.compress; 3const compress = std.compress;
4const mem = std.mem; 4const mem = std.mem;
5 5
6const cwcf_max = 0x1e950;
7
6allocator: mem.Allocator, 8allocator: mem.Allocator,
7cutoff: u21 = undefined, 9cutoff: u21 = undefined,
8cwcf: [0x10ffff]bool = [_]bool{false} ** 0x10ffff, 10cwcf: [cwcf_max]bool = [_]bool{false} ** cwcf_max,
9multiple_start: u21 = undefined, 11multiple_start: u21 = undefined,
10stage1: []u8 = undefined, 12stage1: []u8 = undefined,
11stage2: []u8 = undefined, 13stage2: []u8 = undefined,
@@ -54,7 +56,7 @@ pub fn deinit(self: *const Self) void {
54} 56}
55 57
56/// Returns the case fold for `cp`. 58/// Returns the case fold for `cp`.
57pub inline fn caseFold(self: Self, cp: u21, buf: []u21) []const u21 { 59pub fn caseFold(self: Self, cp: u21, buf: []u21) []const u21 {
58 if (cp >= self.cutoff) return &.{}; 60 if (cp >= self.cutoff) return &.{};
59 61
60 const stage1_val = self.stage1[cp >> 8]; 62 const stage1_val = self.stage1[cp >> 8];
@@ -80,6 +82,6 @@ pub inline fn caseFold(self: Self, cp: u21, buf: []u21) []const u21 {
80} 82}
81 83
82/// Returns true when caseFold(NFD(`cp`)) != NFD(`cp`). 84/// Returns true when caseFold(NFD(`cp`)) != NFD(`cp`).
83pub inline fn changesWhenCaseFolded(self: Self, cp: u21) bool { 85pub fn changesWhenCaseFolded(self: Self, cp: u21) bool {
84 return self.cwcf[cp]; 86 return cp < cwcf_max and self.cwcf[cp];
85} 87}
diff --git a/src/GenCatData.zig b/src/GenCatData.zig
index 454c45a..5ce9fb6 100644
--- a/src/GenCatData.zig
+++ b/src/GenCatData.zig
@@ -80,7 +80,7 @@ pub fn deinit(self: *const Self) void {
80} 80}
81 81
82/// Lookup the General Category for `cp`. 82/// Lookup the General Category for `cp`.
83pub inline fn gc(self: Self, cp: u21) Gc { 83pub fn gc(self: Self, cp: u21) Gc {
84 return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]); 84 return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]);
85} 85}
86 86
diff --git a/src/GraphemeData.zig b/src/GraphemeData.zig
index 1710870..0ed79f2 100644
--- a/src/GraphemeData.zig
+++ b/src/GraphemeData.zig
@@ -73,16 +73,16 @@ pub fn deinit(self: *const Self) void {
73} 73}
74 74
75/// Lookup the grapheme break property for a code point. 75/// Lookup the grapheme break property for a code point.
76pub inline fn gbp(self: Self, cp: u21) Gbp { 76pub fn gbp(self: Self, cp: u21) Gbp {
77 return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 4); 77 return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 4);
78} 78}
79 79
80/// Lookup the indic syllable type for a code point. 80/// Lookup the indic syllable type for a code point.
81pub inline fn indic(self: Self, cp: u21) Indic { 81pub fn indic(self: Self, cp: u21) Indic {
82 return @enumFromInt((self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7); 82 return @enumFromInt((self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7);
83} 83}
84 84
85/// Lookup the indic syllable type for a code point. 85/// Lookup the indic syllable type for a code point.
86pub inline fn isEmoji(self: Self, cp: u21) bool { 86pub fn isEmoji(self: Self, cp: u21) bool {
87 return self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1; 87 return self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1;
88} 88}
diff --git a/src/HangulData.zig b/src/HangulData.zig
index 5eee427..048841d 100644
--- a/src/HangulData.zig
+++ b/src/HangulData.zig
@@ -48,6 +48,6 @@ pub fn deinit(self: *const Self) void {
48} 48}
49 49
50/// Returns the Hangul syllable type for `cp`. 50/// Returns the Hangul syllable type for `cp`.
51pub inline fn syllable(self: Self, cp: u21) Syllable { 51pub fn syllable(self: Self, cp: u21) Syllable {
52 return @enumFromInt(self.s2[self.s1[cp >> 8] + (cp & 0xff)]); 52 return @enumFromInt(self.s2[self.s1[cp >> 8] + (cp & 0xff)]);
53} 53}
diff --git a/src/NormPropsData.zig b/src/NormPropsData.zig
index 899bb8f..5861860 100644
--- a/src/NormPropsData.zig
+++ b/src/NormPropsData.zig
@@ -39,16 +39,16 @@ pub fn deinit(self: *const Self) void {
39} 39}
40 40
41/// Returns true if `cp` is already in NFD form. 41/// Returns true if `cp` is already in NFD form.
42pub inline fn isNfd(self: Self, cp: u21) bool { 42pub fn isNfd(self: Self, cp: u21) bool {
43 return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 1 == 0; 43 return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 1 == 0;
44} 44}
45 45
46/// Returns true if `cp` is already in NFKD form. 46/// Returns true if `cp` is already in NFKD form.
47pub inline fn isNfkd(self: Self, cp: u21) bool { 47pub fn isNfkd(self: Self, cp: u21) bool {
48 return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 2 == 0; 48 return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 2 == 0;
49} 49}
50 50
51/// Returns true if `cp` is not allowed in any normalized form. 51/// Returns true if `cp` is not allowed in any normalized form.
52pub inline fn isFcx(self: Self, cp: u21) bool { 52pub fn isFcx(self: Self, cp: u21) bool {
53 return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 4 == 4; 53 return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
54} 54}
diff --git a/src/PropsData.zig b/src/PropsData.zig
index f6c8370..b77bf30 100644
--- a/src/PropsData.zig
+++ b/src/PropsData.zig
@@ -81,62 +81,62 @@ pub fn deinit(self: *const Self) void {
81} 81}
82 82
83/// True if `cp` is a mathematical symbol. 83/// True if `cp` is a mathematical symbol.
84pub inline fn isMath(self: Self, cp: u21) bool { 84pub fn isMath(self: Self, cp: u21) bool {
85 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; 85 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
86} 86}
87 87
88/// True if `cp` is an alphabetic character. 88/// True if `cp` is an alphabetic character.
89pub inline fn isAlphabetic(self: Self, cp: u21) bool { 89pub fn isAlphabetic(self: Self, cp: u21) bool {
90 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; 90 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
91} 91}
92 92
93/// True if `cp` is a valid identifier start character. 93/// True if `cp` is a valid identifier start character.
94pub inline fn isIdStart(self: Self, cp: u21) bool { 94pub fn isIdStart(self: Self, cp: u21) bool {
95 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; 95 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
96} 96}
97 97
98/// True if `cp` is a valid identifier continuation character. 98/// True if `cp` is a valid identifier continuation character.
99pub inline fn isIdContinue(self: Self, cp: u21) bool { 99pub fn isIdContinue(self: Self, cp: u21) bool {
100 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8; 100 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8;
101} 101}
102 102
103/// True if `cp` is a valid extended identifier start character. 103/// True if `cp` is a valid extended identifier start character.
104pub inline fn isXidStart(self: Self, cp: u21) bool { 104pub fn isXidStart(self: Self, cp: u21) bool {
105 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16; 105 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16;
106} 106}
107 107
108/// True if `cp` is a valid extended identifier continuation character. 108/// True if `cp` is a valid extended identifier continuation character.
109pub inline fn isXidContinue(self: Self, cp: u21) bool { 109pub fn isXidContinue(self: Self, cp: u21) bool {
110 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32; 110 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32;
111} 111}
112 112
113/// True if `cp` is a whitespace character. 113/// True if `cp` is a whitespace character.
114pub inline fn isWhitespace(self: Self, cp: u21) bool { 114pub fn isWhitespace(self: Self, cp: u21) bool {
115 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; 115 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
116} 116}
117 117
118/// True if `cp` is a hexadecimal digit. 118/// True if `cp` is a hexadecimal digit.
119pub inline fn isHexDigit(self: Self, cp: u21) bool { 119pub fn isHexDigit(self: Self, cp: u21) bool {
120 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; 120 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
121} 121}
122 122
123/// True if `cp` is a diacritic mark. 123/// True if `cp` is a diacritic mark.
124pub inline fn isDiacritic(self: Self, cp: u21) bool { 124pub fn isDiacritic(self: Self, cp: u21) bool {
125 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; 125 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
126} 126}
127 127
128/// True if `cp` is numeric. 128/// True if `cp` is numeric.
129pub inline fn isNumeric(self: Self, cp: u21) bool { 129pub fn isNumeric(self: Self, cp: u21) bool {
130 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; 130 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
131} 131}
132 132
133/// True if `cp` is a digit. 133/// True if `cp` is a digit.
134pub inline fn isDigit(self: Self, cp: u21) bool { 134pub fn isDigit(self: Self, cp: u21) bool {
135 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; 135 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
136} 136}
137 137
138/// True if `cp` is decimal. 138/// True if `cp` is decimal.
139pub inline fn isDecimal(self: Self, cp: u21) bool { 139pub fn isDecimal(self: Self, cp: u21) bool {
140 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; 140 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
141} 141}
142 142
diff --git a/src/WidthData.zig b/src/WidthData.zig
index cf31b7f..1f1ae6f 100644
--- a/src/WidthData.zig
+++ b/src/WidthData.zig
@@ -52,7 +52,7 @@ pub fn deinit(self: *const Self) void {
52/// 3, where BACKSPACE and DELETE return -1 and 3-em-dash returns 3. C0/C1 52/// 3, where BACKSPACE and DELETE return -1 and 3-em-dash returns 3. C0/C1
53/// control codes return 0. If `cjk` is true, ambiguous code points return 2, 53/// control codes return 0. If `cjk` is true, ambiguous code points return 2,
54/// otherwise they return 1. 54/// otherwise they return 1.
55pub inline fn codePointWidth(self: Self, cp: u21) i3 { 55pub fn codePointWidth(self: Self, cp: u21) i3 {
56 return self.s2[self.s1[cp >> 8] + (cp & 0xff)]; 56 return self.s2[self.s1[cp >> 8] + (cp & 0xff)];
57} 57}
58 58