diff options
| author | 2024-02-14 21:44:23 -0400 | |
|---|---|---|
| committer | 2024-02-14 21:44:23 -0400 | |
| commit | 99a12fda4b1c3343231516b4b041e81367206d49 (patch) | |
| tree | a61aedef6eaaebf3ec9c17e31d052ed63e393a89 | |
| parent | gbp and indic direct array access (diff) | |
| download | zg-99a12fda4b1c3343231516b4b041e81367206d49.tar.gz zg-99a12fda4b1c3343231516b4b041e81367206d49.tar.xz zg-99a12fda4b1c3343231516b4b041e81367206d49.zip | |
emoji direct array access
| -rw-r--r-- | codegen/emoji.zig | 15 | ||||
| -rw-r--r-- | src/Grapheme.zig | 6 |
2 files changed, 6 insertions, 15 deletions
diff --git a/codegen/emoji.zig b/codegen/emoji.zig index acad0ca..6dbb74f 100644 --- a/codegen/emoji.zig +++ b/codegen/emoji.zig | |||
| @@ -66,28 +66,17 @@ pub fn main() !void { | |||
| 66 | var out_buf = std.io.bufferedWriter(out_file.writer()); | 66 | var out_buf = std.io.bufferedWriter(out_file.writer()); |
| 67 | const writer = out_buf.writer(); | 67 | const writer = out_buf.writer(); |
| 68 | 68 | ||
| 69 | try writer.print("const stage_1 = [{}]u16{{", .{stage1.items.len}); | 69 | try writer.print("pub const stage_1 = [{}]u16{{", .{stage1.items.len}); |
| 70 | for (stage1.items) |v| { | 70 | for (stage1.items) |v| { |
| 71 | _ = try writer.print("{},", .{v}); | 71 | _ = try writer.print("{},", .{v}); |
| 72 | } | 72 | } |
| 73 | try writer.writeAll("};\n"); | 73 | try writer.writeAll("};\n"); |
| 74 | 74 | ||
| 75 | try writer.print("const stage_2 = [{}]bool{{", .{stage2.items.len}); | 75 | try writer.print("pub const stage_2 = [{}]bool{{", .{stage2.items.len}); |
| 76 | for (stage2.items) |v| { | 76 | for (stage2.items) |v| { |
| 77 | _ = try writer.print("{},", .{v}); | 77 | _ = try writer.print("{},", .{v}); |
| 78 | } | 78 | } |
| 79 | try writer.writeAll("};\n"); | 79 | try writer.writeAll("};\n"); |
| 80 | 80 | ||
| 81 | const code = | ||
| 82 | \\pub inline fn isExtendedPictographic(cp: u21) bool { | ||
| 83 | \\ const stage_1_index = cp >> 8; | ||
| 84 | \\ const stage_2_index = stage_1[stage_1_index] + (cp & 0xff); | ||
| 85 | \\ return stage_2[stage_2_index]; | ||
| 86 | \\} | ||
| 87 | \\ | ||
| 88 | ; | ||
| 89 | |||
| 90 | try writer.writeAll(code); | ||
| 91 | |||
| 92 | try out_buf.flush(); | 81 | try out_buf.flush(); |
| 93 | } | 82 | } |
diff --git a/src/Grapheme.zig b/src/Grapheme.zig index 56eecbe..9c87364 100644 --- a/src/Grapheme.zig +++ b/src/Grapheme.zig | |||
| @@ -171,7 +171,9 @@ pub fn graphemeBreak( | |||
| 171 | state: *u3, | 171 | state: *u3, |
| 172 | ) bool { | 172 | ) bool { |
| 173 | // GB11: Emoji Extend* ZWJ x Emoji | 173 | // GB11: Emoji Extend* ZWJ x Emoji |
| 174 | if (!hasXpic(state) and emoji.isExtendedPictographic(cp1)) setXpic(state); | 174 | const cp1_is_emoji = emoji.stage_2[emoji.stage_1[cp1 >> 8] + (cp1 & 0xff)]; |
| 175 | const cp2_is_emoji = emoji.stage_2[emoji.stage_1[cp2 >> 8] + (cp2 & 0xff)]; | ||
| 176 | if (!hasXpic(state) and cp1_is_emoji) setXpic(state); | ||
| 175 | // GB9c: Indic Conjunct Break | 177 | // GB9c: Indic Conjunct Break |
| 176 | const cp1_indic_prop = indic.stage_3[indic.stage_2[indic.stage_1[cp1 >> 8] + (cp1 & 0xff)]]; | 178 | const cp1_indic_prop = indic.stage_3[indic.stage_2[indic.stage_1[cp1 >> 8] + (cp1 & 0xff)]]; |
| 177 | const cp2_indic_prop = indic.stage_3[indic.stage_2[indic.stage_1[cp2 >> 8] + (cp2 & 0xff)]]; | 179 | const cp2_indic_prop = indic.stage_3[indic.stage_2[indic.stage_1[cp2 >> 8] + (cp2 & 0xff)]]; |
| @@ -227,7 +229,7 @@ pub fn graphemeBreak( | |||
| 227 | // GB11: Emoji Extend* ZWJ x Emoji | 229 | // GB11: Emoji Extend* ZWJ x Emoji |
| 228 | if (hasXpic(state) and | 230 | if (hasXpic(state) and |
| 229 | cp1_gbp_prop == .zwj and | 231 | cp1_gbp_prop == .zwj and |
| 230 | emoji.isExtendedPictographic(cp2)) | 232 | cp2_is_emoji) |
| 231 | { | 233 | { |
| 232 | unsetXpic(state); | 234 | unsetXpic(state); |
| 233 | return false; | 235 | return false; |