summaryrefslogtreecommitdiff
path: root/src/Grapheme.zig
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-02-17 09:50:50 -0400
committerGravatar Jose Colon Rodriguez2024-02-17 09:50:50 -0400
commit6c1a88471fc6444ee93d6ca0c64d0953a0d857ac (patch)
treec9ac886559bd1117b75482ab690364a5e792ad2c /src/Grapheme.zig
parentisAsciiOnly SIMD tweaks (diff)
downloadzg-6c1a88471fc6444ee93d6ca0c64d0953a0d857ac.tar.gz
zg-6c1a88471fc6444ee93d6ca0c64d0953a0d857ac.tar.xz
zg-6c1a88471fc6444ee93d6ca0c64d0953a0d857ac.zip
GraphemeIterator ASCII optimization 3x faster
Diffstat (limited to 'src/Grapheme.zig')
-rw-r--r--src/Grapheme.zig79
1 files changed, 42 insertions, 37 deletions
diff --git a/src/Grapheme.zig b/src/Grapheme.zig
index 888fcd4..6892a2a 100644
--- a/src/Grapheme.zig
+++ b/src/Grapheme.zig
@@ -45,9 +45,14 @@ pub const GraphemeIterator = struct {
45 pub fn next(self: *Self) ?Grapheme { 45 pub fn next(self: *Self) ?Grapheme {
46 self.advance(); 46 self.advance();
47 47
48 // If at end 48 // If no more
49 if (self.buf[0] == null) return null; 49 if (self.buf[0] == null) return null;
50 // If last one
50 if (self.buf[1] == null) return Grapheme{ .len = self.buf[0].?.len, .offset = self.buf[0].?.offset }; 51 if (self.buf[1] == null) return Grapheme{ .len = self.buf[0].?.len, .offset = self.buf[0].?.offset };
52 // If ASCII
53 if (self.buf[0].?.code != '\r' and self.buf[0].?.code < 128 and self.buf[1].?.code < 128) {
54 return Grapheme{ .len = self.buf[0].?.len, .offset = self.buf[0].?.offset };
55 }
51 56
52 const gc_start = self.buf[0].?.offset; 57 const gc_start = self.buf[0].?.offset;
53 var gc_len: usize = self.buf[0].?.len; 58 var gc_len: usize = self.buf[0].?.len;
@@ -89,42 +94,6 @@ fn isIgnorable(cp: u21) bool {
89 return cp_gbp_prop == .extend or cp_gbp_prop == .spacing or cp == '\u{200d}'; 94 return cp_gbp_prop == .extend or cp_gbp_prop == .spacing or cp == '\u{200d}';
90} 95}
91 96
92test "Segmentation comptime GraphemeIterator" {
93 const want = [_][]const u8{ "H", "é", "l", "l", "o" };
94
95 comptime {
96 const src = "Héllo";
97 var ct_iter = GraphemeIterator.init(src);
98 var i = 0;
99 while (ct_iter.next()) |grapheme| : (i += 1) {
100 try std.testing.expect(grapheme.eql(src, want[i]));
101 }
102 }
103}
104
105test "Segmentation ZWJ and ZWSP emoji sequences" {
106 const seq_1 = "\u{1F43B}\u{200D}\u{2744}\u{FE0F}";
107 const seq_2 = "\u{1F43B}\u{200D}\u{2744}\u{FE0F}";
108 const with_zwj = seq_1 ++ "\u{200D}" ++ seq_2;
109 const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2;
110 const no_joiner = seq_1 ++ seq_2;
111
112 var ct_iter = GraphemeIterator.init(with_zwj);
113 var i: usize = 0;
114 while (ct_iter.next()) |_| : (i += 1) {}
115 try std.testing.expectEqual(@as(usize, 1), i);
116
117 ct_iter = GraphemeIterator.init(with_zwsp);
118 i = 0;
119 while (ct_iter.next()) |_| : (i += 1) {}
120 try std.testing.expectEqual(@as(usize, 3), i);
121
122 ct_iter = GraphemeIterator.init(no_joiner);
123 i = 0;
124 while (ct_iter.next()) |_| : (i += 1) {}
125 try std.testing.expectEqual(@as(usize, 2), i);
126}
127
128// Grapheme break state. 97// Grapheme break state.
129// Extended Pictographic (emoji) 98// Extended Pictographic (emoji)
130fn hasXpic(state: *const u3) bool { 99fn hasXpic(state: *const u3) bool {
@@ -322,3 +291,39 @@ test "Segmentation GraphemeIterator" {
322 } 291 }
323 } 292 }
324} 293}
294
295test "Segmentation comptime GraphemeIterator" {
296 const want = [_][]const u8{ "H", "é", "l", "l", "o" };
297
298 comptime {
299 const src = "Héllo";
300 var ct_iter = GraphemeIterator.init(src);
301 var i = 0;
302 while (ct_iter.next()) |grapheme| : (i += 1) {
303 try std.testing.expect(grapheme.eql(src, want[i]));
304 }
305 }
306}
307
308test "Segmentation ZWJ and ZWSP emoji sequences" {
309 const seq_1 = "\u{1F43B}\u{200D}\u{2744}\u{FE0F}";
310 const seq_2 = "\u{1F43B}\u{200D}\u{2744}\u{FE0F}";
311 const with_zwj = seq_1 ++ "\u{200D}" ++ seq_2;
312 const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2;
313 const no_joiner = seq_1 ++ seq_2;
314
315 var ct_iter = GraphemeIterator.init(with_zwj);
316 var i: usize = 0;
317 while (ct_iter.next()) |_| : (i += 1) {}
318 try std.testing.expectEqual(@as(usize, 1), i);
319
320 ct_iter = GraphemeIterator.init(with_zwsp);
321 i = 0;
322 while (ct_iter.next()) |_| : (i += 1) {}
323 try std.testing.expectEqual(@as(usize, 3), i);
324
325 ct_iter = GraphemeIterator.init(no_joiner);
326 i = 0;
327 while (ct_iter.next()) |_| : (i += 1) {}
328 try std.testing.expectEqual(@as(usize, 2), i);
329}