summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-02-14 21:44:23 -0400
committerGravatar Jose Colon Rodriguez2024-02-14 21:44:23 -0400
commit99a12fda4b1c3343231516b4b041e81367206d49 (patch)
treea61aedef6eaaebf3ec9c17e31d052ed63e393a89
parentgbp and indic direct array access (diff)
downloadzg-99a12fda4b1c3343231516b4b041e81367206d49.tar.gz
zg-99a12fda4b1c3343231516b4b041e81367206d49.tar.xz
zg-99a12fda4b1c3343231516b4b041e81367206d49.zip
emoji direct array access
-rw-r--r--codegen/emoji.zig15
-rw-r--r--src/Grapheme.zig6
2 files changed, 6 insertions, 15 deletions
diff --git a/codegen/emoji.zig b/codegen/emoji.zig
index acad0ca..6dbb74f 100644
--- a/codegen/emoji.zig
+++ b/codegen/emoji.zig
@@ -66,28 +66,17 @@ pub fn main() !void {
66 var out_buf = std.io.bufferedWriter(out_file.writer()); 66 var out_buf = std.io.bufferedWriter(out_file.writer());
67 const writer = out_buf.writer(); 67 const writer = out_buf.writer();
68 68
69 try writer.print("const stage_1 = [{}]u16{{", .{stage1.items.len}); 69 try writer.print("pub const stage_1 = [{}]u16{{", .{stage1.items.len});
70 for (stage1.items) |v| { 70 for (stage1.items) |v| {
71 _ = try writer.print("{},", .{v}); 71 _ = try writer.print("{},", .{v});
72 } 72 }
73 try writer.writeAll("};\n"); 73 try writer.writeAll("};\n");
74 74
75 try writer.print("const stage_2 = [{}]bool{{", .{stage2.items.len}); 75 try writer.print("pub const stage_2 = [{}]bool{{", .{stage2.items.len});
76 for (stage2.items) |v| { 76 for (stage2.items) |v| {
77 _ = try writer.print("{},", .{v}); 77 _ = try writer.print("{},", .{v});
78 } 78 }
79 try writer.writeAll("};\n"); 79 try writer.writeAll("};\n");
80 80
81 const code =
82 \\pub inline fn isExtendedPictographic(cp: u21) bool {
83 \\ const stage_1_index = cp >> 8;
84 \\ const stage_2_index = stage_1[stage_1_index] + (cp & 0xff);
85 \\ return stage_2[stage_2_index];
86 \\}
87 \\
88 ;
89
90 try writer.writeAll(code);
91
92 try out_buf.flush(); 81 try out_buf.flush();
93} 82}
diff --git a/src/Grapheme.zig b/src/Grapheme.zig
index 56eecbe..9c87364 100644
--- a/src/Grapheme.zig
+++ b/src/Grapheme.zig
@@ -171,7 +171,9 @@ pub fn graphemeBreak(
171 state: *u3, 171 state: *u3,
172) bool { 172) bool {
173 // GB11: Emoji Extend* ZWJ x Emoji 173 // GB11: Emoji Extend* ZWJ x Emoji
174 if (!hasXpic(state) and emoji.isExtendedPictographic(cp1)) setXpic(state); 174 const cp1_is_emoji = emoji.stage_2[emoji.stage_1[cp1 >> 8] + (cp1 & 0xff)];
175 const cp2_is_emoji = emoji.stage_2[emoji.stage_1[cp2 >> 8] + (cp2 & 0xff)];
176 if (!hasXpic(state) and cp1_is_emoji) setXpic(state);
175 // GB9c: Indic Conjunct Break 177 // GB9c: Indic Conjunct Break
176 const cp1_indic_prop = indic.stage_3[indic.stage_2[indic.stage_1[cp1 >> 8] + (cp1 & 0xff)]]; 178 const cp1_indic_prop = indic.stage_3[indic.stage_2[indic.stage_1[cp1 >> 8] + (cp1 & 0xff)]];
177 const cp2_indic_prop = indic.stage_3[indic.stage_2[indic.stage_1[cp2 >> 8] + (cp2 & 0xff)]]; 179 const cp2_indic_prop = indic.stage_3[indic.stage_2[indic.stage_1[cp2 >> 8] + (cp2 & 0xff)]];
@@ -227,7 +229,7 @@ pub fn graphemeBreak(
227 // GB11: Emoji Extend* ZWJ x Emoji 229 // GB11: Emoji Extend* ZWJ x Emoji
228 if (hasXpic(state) and 230 if (hasXpic(state) and
229 cp1_gbp_prop == .zwj and 231 cp1_gbp_prop == .zwj and
230 emoji.isExtendedPictographic(cp2)) 232 cp2_is_emoji)
231 { 233 {
232 unsetXpic(state); 234 unsetXpic(state);
233 return false; 235 return false;