summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Sam Atman2024-11-02 10:31:28 -0400
committerGravatar Sam Atman2024-11-02 10:31:28 -0400
commitbf319e504e5476d9b0d2dec3e2f8d81ef6439ce4 (patch)
tree0424b9f9e54972837652042dc858dfe5ba12b5de
parentReplace deprecated uses of std.mem.split (diff)
downloadzg-bf319e504e5476d9b0d2dec3e2f8d81ef6439ce4.tar.gz
zg-bf319e504e5476d9b0d2dec3e2f8d81ef6439ce4.tar.xz
zg-bf319e504e5476d9b0d2dec3e2f8d81ef6439ce4.zip
Add peek() to Grapheme.Iterator
This does the expected thing: returns the next ?Grapheme without mutation of the iteration state.
-rw-r--r--src/grapheme.zig69
-rw-r--r--src/unicode_tests.zig26
2 files changed, 95 insertions, 0 deletions
diff --git a/src/grapheme.zig b/src/grapheme.zig
index 911c856..7538f5b 100644
--- a/src/grapheme.zig
+++ b/src/grapheme.zig
@@ -77,6 +77,75 @@ pub const Iterator = struct {
77 77
78 return Grapheme{ .len = gc_len, .offset = gc_start }; 78 return Grapheme{ .len = gc_len, .offset = gc_start };
79 } 79 }
80
81 pub fn peek(self: *Self) ?Grapheme {
82 const saved_cp_iter = self.cp_iter;
83 const s0 = self.buf[0];
84 const s1 = self.buf[1];
85
86 self.advance();
87
88 // If no more
89 if (self.buf[0] == null) {
90 self.cp_iter = saved_cp_iter;
91 self.buf[0] = s0;
92 self.buf[1] = s1;
93 return null;
94 }
95 // If last one
96 if (self.buf[1] == null) {
97 const len = self.buf[0].?.len;
98 const offset = self.buf[0].?.offset;
99 self.cp_iter = saved_cp_iter;
100 self.buf[0] = s0;
101 self.buf[1] = s1;
102 return Grapheme{ .len = len, .offset = offset };
103 }
104 // If ASCII
105 if (self.buf[0].?.code != '\r' and self.buf[0].?.code < 128 and self.buf[1].?.code < 128) {
106 const len = self.buf[0].?.len;
107 const offset = self.buf[0].?.offset;
108 self.cp_iter = saved_cp_iter;
109 self.buf[0] = s0;
110 self.buf[1] = s1;
111 return Grapheme{ .len = len, .offset = offset };
112 }
113
114 const gc_start = self.buf[0].?.offset;
115 var gc_len: u8 = self.buf[0].?.len;
116 var state = State{};
117
118 if (graphemeBreak(
119 self.buf[0].?.code,
120 self.buf[1].?.code,
121 self.data,
122 &state,
123 )) {
124 self.cp_iter = saved_cp_iter;
125 self.buf[0] = s0;
126 self.buf[1] = s1;
127 return Grapheme{ .len = gc_len, .offset = gc_start };
128 }
129
130 while (true) {
131 self.advance();
132 if (self.buf[0] == null) break;
133
134 gc_len += self.buf[0].?.len;
135
136 if (graphemeBreak(
137 self.buf[0].?.code,
138 if (self.buf[1]) |ncp| ncp.code else 0,
139 self.data,
140 &state,
141 )) break;
142 }
143 self.cp_iter = saved_cp_iter;
144 self.buf[0] = s0;
145 self.buf[1] = s1;
146
147 return Grapheme{ .len = gc_len, .offset = gc_start };
148 }
80}; 149};
81 150
82// Predicates 151// Predicates
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig
index 691ccfb..245c03f 100644
--- a/src/unicode_tests.zig
+++ b/src/unicode_tests.zig
@@ -7,11 +7,37 @@ const mem = std.mem;
7const testing = std.testing; 7const testing = std.testing;
8const unicode = std.unicode; 8const unicode = std.unicode;
9 9
10const grapheme = @import("grapheme");
10const Grapheme = @import("grapheme").Grapheme; 11const Grapheme = @import("grapheme").Grapheme;
11const GraphemeData = @import("grapheme").GraphemeData; 12const GraphemeData = @import("grapheme").GraphemeData;
12const GraphemeIterator = @import("grapheme").Iterator; 13const GraphemeIterator = @import("grapheme").Iterator;
13const Normalize = @import("Normalize"); 14const Normalize = @import("Normalize");
14 15
16comptime {
17 testing.refAllDecls(grapheme);
18}
19test "Iterator.peek" {
20 const peek_seq = "aΔ👨🏻‍🌾→";
21 const data = try GraphemeData.init(std.testing.allocator);
22 defer data.deinit();
23
24 var iter = grapheme.Iterator.init(peek_seq, &data);
25 const peek_a = iter.peek().?;
26 const next_a = iter.next().?;
27 try std.testing.expectEqual(peek_a, next_a);
28 try std.testing.expectEqualStrings("a", peek_a.bytes(peek_seq));
29 const peek_d1 = iter.peek().?;
30 const peek_d2 = iter.peek().?;
31 try std.testing.expectEqual(peek_d1, peek_d2);
32 const next_d = iter.next().?;
33 try std.testing.expectEqual(peek_d2, next_d);
34 try std.testing.expectEqual(iter.peek(), iter.next());
35 try std.testing.expectEqual(iter.peek(), iter.next());
36 try std.testing.expectEqual(null, iter.peek());
37 try std.testing.expectEqual(null, iter.peek());
38 try std.testing.expectEqual(iter.peek(), iter.next());
39}
40
15test "Unicode normalization tests" { 41test "Unicode normalization tests" {
16 var arena = heap.ArenaAllocator.init(testing.allocator); 42 var arena = heap.ArenaAllocator.init(testing.allocator);
17 defer arena.deinit(); 43 defer arena.deinit();