summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-02-15 10:21:30 -0400
committerGravatar Jose Colon Rodriguez2024-02-15 10:21:30 -0400
commit5d9080f1eac3147754ade3d50e64f6def4b6eb25 (patch)
treea6de7e9881a8743026ee5e2fd63b12ec0a3e1773 /src
parentCombinedd Indic ifs (diff)
downloadzg-5d9080f1eac3147754ade3d50e64f6def4b6eb25.tar.gz
zg-5d9080f1eac3147754ade3d50e64f6def4b6eb25.tar.xz
zg-5d9080f1eac3147754ade3d50e64f6def4b6eb25.zip
New single byte props table
Diffstat (limited to 'src')
-rw-r--r--src/Grapheme.zig58
1 files changed, 31 insertions, 27 deletions
diff --git a/src/Grapheme.zig b/src/Grapheme.zig
index d868ef7..ba90b24 100644
--- a/src/Grapheme.zig
+++ b/src/Grapheme.zig
@@ -6,11 +6,7 @@ const unicode = std.unicode;
6const ziglyph = @import("ziglyph"); 6const ziglyph = @import("ziglyph");
7const CodePoint = @import("CodePoint.zig"); 7const CodePoint = @import("CodePoint.zig");
8const CodePointIterator = CodePoint.CodePointIterator; 8const CodePointIterator = CodePoint.CodePointIterator;
9// const emoji = ziglyph.emoji;
10// const gbp = ziglyph.grapheme_break;
11const gbp = @import("gbp"); 9const gbp = @import("gbp");
12const emoji = @import("emoji");
13const indic = @import("indic");
14 10
15pub const Grapheme = @This(); 11pub const Grapheme = @This();
16 12
@@ -83,7 +79,10 @@ pub const GraphemeIterator = struct {
83 79
84// Predicates 80// Predicates
85inline fn isBreaker(cp: u21) bool { 81inline fn isBreaker(cp: u21) bool {
86 return cp == '\x0d' or cp == '\x0a' or gbp.stage_3[gbp.stage_2[gbp.stage_1[cp >> 8] + (cp & 0xff)]] == .control; 82 // Extract relevant properties.
83 const cp_props_byte = gbp.stage_3[gbp.stage_2[gbp.stage_1[cp >> 8] + (cp & 0xff)]];
84 const cp_gbp_prop: gbp.Gbp = @enumFromInt(cp_props_byte >> 4);
85 return cp == '\x0d' or cp == '\x0a' or cp_gbp_prop == .Control;
87} 86}
88 87
89inline fn isIgnorable(cp: u21) bool { 88inline fn isIgnorable(cp: u21) bool {
@@ -170,13 +169,20 @@ pub fn graphemeBreak(
170 cp2: u21, 169 cp2: u21,
171 state: *u3, 170 state: *u3,
172) bool { 171) bool {
172 // Extract relevant properties.
173 const cp1_props_byte = gbp.stage_3[gbp.stage_2[gbp.stage_1[cp1 >> 8] + (cp1 & 0xff)]];
174 const cp1_gbp_prop: gbp.Gbp = @enumFromInt(cp1_props_byte >> 4);
175 const cp1_indic_prop: gbp.Indic = @enumFromInt((cp1_props_byte >> 1) & 0x7);
176 const cp1_is_emoji = cp1_props_byte & 1 == 1;
177
178 const cp2_props_byte = gbp.stage_3[gbp.stage_2[gbp.stage_1[cp2 >> 8] + (cp2 & 0xff)]];
179 const cp2_gbp_prop: gbp.Gbp = @enumFromInt(cp2_props_byte >> 4);
180 const cp2_indic_prop: gbp.Indic = @enumFromInt((cp2_props_byte >> 1) & 0x7);
181 const cp2_is_emoji = cp2_props_byte & 1 == 1;
182
173 // GB11: Emoji Extend* ZWJ x Emoji 183 // GB11: Emoji Extend* ZWJ x Emoji
174 const cp1_is_emoji = emoji.stage_2[emoji.stage_1[cp1 >> 8] + (cp1 & 0xff)];
175 const cp2_is_emoji = emoji.stage_2[emoji.stage_1[cp2 >> 8] + (cp2 & 0xff)];
176 if (!hasXpic(state) and cp1_is_emoji) setXpic(state); 184 if (!hasXpic(state) and cp1_is_emoji) setXpic(state);
177 // GB9c: Indic Conjunct Break 185 // GB9c: Indic Conjunct Break
178 const cp1_indic_prop = indic.stage_3[indic.stage_2[indic.stage_1[cp1 >> 8] + (cp1 & 0xff)]];
179 const cp2_indic_prop = indic.stage_3[indic.stage_2[indic.stage_1[cp2 >> 8] + (cp2 & 0xff)]];
180 if (!hasIndic(state) and cp1_indic_prop == .Consonant) setIndic(state); 186 if (!hasIndic(state) and cp1_indic_prop == .Consonant) setIndic(state);
181 187
182 // GB3: CR x LF 188 // GB3: CR x LF
@@ -186,37 +192,35 @@ pub fn graphemeBreak(
186 if (isBreaker(cp1)) return true; 192 if (isBreaker(cp1)) return true;
187 193
188 // GB6: Hangul L x (L|V|LV|VT) 194 // GB6: Hangul L x (L|V|LV|VT)
189 const cp1_gbp_prop = gbp.stage_3[gbp.stage_2[gbp.stage_1[cp1 >> 8] + (cp1 & 0xff)]]; 195 if (cp1_gbp_prop == .L) {
190 const cp2_gbp_prop = gbp.stage_3[gbp.stage_2[gbp.stage_1[cp2 >> 8] + (cp2 & 0xff)]]; 196 if (cp2_gbp_prop == .L or
191 if (cp1_gbp_prop == .hangul_l) { 197 cp2_gbp_prop == .V or
192 if (cp2_gbp_prop == .hangul_l or 198 cp2_gbp_prop == .LV or
193 cp2_gbp_prop == .hangul_v or 199 cp2_gbp_prop == .LVT) return false;
194 cp2_gbp_prop == .hangul_lv or
195 cp2_gbp_prop == .hangul_lvt) return false;
196 } 200 }
197 201
198 // GB7: Hangul (LV | V) x (V | T) 202 // GB7: Hangul (LV | V) x (V | T)
199 if (cp1_gbp_prop == .hangul_lv or cp1_gbp_prop == .hangul_v) { 203 if (cp1_gbp_prop == .LV or cp1_gbp_prop == .V) {
200 if (cp2_gbp_prop == .hangul_v or 204 if (cp2_gbp_prop == .V or
201 cp2_gbp_prop == .hangul_t) return false; 205 cp2_gbp_prop == .T) return false;
202 } 206 }
203 207
204 // GB8: Hangul (LVT | T) x T 208 // GB8: Hangul (LVT | T) x T
205 if (cp1_gbp_prop == .hangul_lvt or cp1_gbp_prop == .hangul_t) { 209 if (cp1_gbp_prop == .LVT or cp1_gbp_prop == .T) {
206 if (cp2_gbp_prop == .hangul_t) return false; 210 if (cp2_gbp_prop == .T) return false;
207 } 211 }
208 212
209 // GB9b: x (Extend | ZWJ) 213 // GB9b: x (Extend | ZWJ)
210 if (cp2_gbp_prop == .extend or cp2_gbp_prop == .zwj) return false; 214 if (cp2_gbp_prop == .Extend or cp2_gbp_prop == .ZWJ) return false;
211 215
212 // GB9a: x Spacing 216 // GB9a: x Spacing
213 if (cp2_gbp_prop == .spacing) return false; 217 if (cp2_gbp_prop == .SpacingMark) return false;
214 218
215 // GB9b: Prepend x 219 // GB9b: Prepend x
216 if (cp1_gbp_prop == .prepend and !isBreaker(cp2)) return false; 220 if (cp1_gbp_prop == .Prepend and !isBreaker(cp2)) return false;
217 221
218 // GB12, GB13: RI x RI 222 // GB12, GB13: RI x RI
219 if (cp1_gbp_prop == .regional and cp2_gbp_prop == .regional) { 223 if (cp1_gbp_prop == .Regional_Indicator and cp2_gbp_prop == .Regional_Indicator) {
220 if (hasRegional(state)) { 224 if (hasRegional(state)) {
221 unsetRegional(state); 225 unsetRegional(state);
222 return true; 226 return true;
@@ -228,7 +232,7 @@ pub fn graphemeBreak(
228 232
229 // GB11: Emoji Extend* ZWJ x Emoji 233 // GB11: Emoji Extend* ZWJ x Emoji
230 if (hasXpic(state) and 234 if (hasXpic(state) and
231 cp1_gbp_prop == .zwj and 235 cp1_gbp_prop == .ZWJ and
232 cp2_is_emoji) 236 cp2_is_emoji)
233 { 237 {
234 unsetXpic(state); 238 unsetXpic(state);
@@ -251,7 +255,7 @@ pub fn graphemeBreak(
251 } 255 }
252 256
253 if (hasIndic(state) and 257 if (hasIndic(state) and
254 (cp1_indic_prop == .Linker or cp1_gbp_prop == .zwj) and 258 (cp1_indic_prop == .Linker or cp1_gbp_prop == .ZWJ) and
255 cp2_indic_prop == .Consonant) 259 cp2_indic_prop == .Consonant)
256 { 260 {
257 unsetIndic(state); 261 unsetIndic(state);