diff options
| author | 2024-11-02 10:31:28 -0400 | |
|---|---|---|
| committer | 2024-11-02 10:31:28 -0400 | |
| commit | bf319e504e5476d9b0d2dec3e2f8d81ef6439ce4 (patch) | |
| tree | 0424b9f9e54972837652042dc858dfe5ba12b5de | |
| parent | Replace deprecated uses of std.mem.split (diff) | |
| download | zg-bf319e504e5476d9b0d2dec3e2f8d81ef6439ce4.tar.gz zg-bf319e504e5476d9b0d2dec3e2f8d81ef6439ce4.tar.xz zg-bf319e504e5476d9b0d2dec3e2f8d81ef6439ce4.zip | |
Add peek() to Grapheme.Iterator
This does the expected thing: returns the next ?Grapheme without
mutation of the iteration state.
| -rw-r--r-- | src/grapheme.zig | 69 | ||||
| -rw-r--r-- | src/unicode_tests.zig | 26 |
2 files changed, 95 insertions, 0 deletions
diff --git a/src/grapheme.zig b/src/grapheme.zig index 911c856..7538f5b 100644 --- a/src/grapheme.zig +++ b/src/grapheme.zig | |||
| @@ -77,6 +77,75 @@ pub const Iterator = struct { | |||
| 77 | 77 | ||
| 78 | return Grapheme{ .len = gc_len, .offset = gc_start }; | 78 | return Grapheme{ .len = gc_len, .offset = gc_start }; |
| 79 | } | 79 | } |
| 80 | |||
| 81 | pub fn peek(self: *Self) ?Grapheme { | ||
| 82 | const saved_cp_iter = self.cp_iter; | ||
| 83 | const s0 = self.buf[0]; | ||
| 84 | const s1 = self.buf[1]; | ||
| 85 | |||
| 86 | self.advance(); | ||
| 87 | |||
| 88 | // If no more | ||
| 89 | if (self.buf[0] == null) { | ||
| 90 | self.cp_iter = saved_cp_iter; | ||
| 91 | self.buf[0] = s0; | ||
| 92 | self.buf[1] = s1; | ||
| 93 | return null; | ||
| 94 | } | ||
| 95 | // If last one | ||
| 96 | if (self.buf[1] == null) { | ||
| 97 | const len = self.buf[0].?.len; | ||
| 98 | const offset = self.buf[0].?.offset; | ||
| 99 | self.cp_iter = saved_cp_iter; | ||
| 100 | self.buf[0] = s0; | ||
| 101 | self.buf[1] = s1; | ||
| 102 | return Grapheme{ .len = len, .offset = offset }; | ||
| 103 | } | ||
| 104 | // If ASCII | ||
| 105 | if (self.buf[0].?.code != '\r' and self.buf[0].?.code < 128 and self.buf[1].?.code < 128) { | ||
| 106 | const len = self.buf[0].?.len; | ||
| 107 | const offset = self.buf[0].?.offset; | ||
| 108 | self.cp_iter = saved_cp_iter; | ||
| 109 | self.buf[0] = s0; | ||
| 110 | self.buf[1] = s1; | ||
| 111 | return Grapheme{ .len = len, .offset = offset }; | ||
| 112 | } | ||
| 113 | |||
| 114 | const gc_start = self.buf[0].?.offset; | ||
| 115 | var gc_len: u8 = self.buf[0].?.len; | ||
| 116 | var state = State{}; | ||
| 117 | |||
| 118 | if (graphemeBreak( | ||
| 119 | self.buf[0].?.code, | ||
| 120 | self.buf[1].?.code, | ||
| 121 | self.data, | ||
| 122 | &state, | ||
| 123 | )) { | ||
| 124 | self.cp_iter = saved_cp_iter; | ||
| 125 | self.buf[0] = s0; | ||
| 126 | self.buf[1] = s1; | ||
| 127 | return Grapheme{ .len = gc_len, .offset = gc_start }; | ||
| 128 | } | ||
| 129 | |||
| 130 | while (true) { | ||
| 131 | self.advance(); | ||
| 132 | if (self.buf[0] == null) break; | ||
| 133 | |||
| 134 | gc_len += self.buf[0].?.len; | ||
| 135 | |||
| 136 | if (graphemeBreak( | ||
| 137 | self.buf[0].?.code, | ||
| 138 | if (self.buf[1]) |ncp| ncp.code else 0, | ||
| 139 | self.data, | ||
| 140 | &state, | ||
| 141 | )) break; | ||
| 142 | } | ||
| 143 | self.cp_iter = saved_cp_iter; | ||
| 144 | self.buf[0] = s0; | ||
| 145 | self.buf[1] = s1; | ||
| 146 | |||
| 147 | return Grapheme{ .len = gc_len, .offset = gc_start }; | ||
| 148 | } | ||
| 80 | }; | 149 | }; |
| 81 | 150 | ||
| 82 | // Predicates | 151 | // Predicates |
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig index 691ccfb..245c03f 100644 --- a/src/unicode_tests.zig +++ b/src/unicode_tests.zig | |||
| @@ -7,11 +7,37 @@ const mem = std.mem; | |||
| 7 | const testing = std.testing; | 7 | const testing = std.testing; |
| 8 | const unicode = std.unicode; | 8 | const unicode = std.unicode; |
| 9 | 9 | ||
| 10 | const grapheme = @import("grapheme"); | ||
| 10 | const Grapheme = @import("grapheme").Grapheme; | 11 | const Grapheme = @import("grapheme").Grapheme; |
| 11 | const GraphemeData = @import("grapheme").GraphemeData; | 12 | const GraphemeData = @import("grapheme").GraphemeData; |
| 12 | const GraphemeIterator = @import("grapheme").Iterator; | 13 | const GraphemeIterator = @import("grapheme").Iterator; |
| 13 | const Normalize = @import("Normalize"); | 14 | const Normalize = @import("Normalize"); |
| 14 | 15 | ||
| 16 | comptime { | ||
| 17 | testing.refAllDecls(grapheme); | ||
| 18 | } | ||
| 19 | test "Iterator.peek" { | ||
| 20 | const peek_seq = "aΔ👨🏻🌾→"; | ||
| 21 | const data = try GraphemeData.init(std.testing.allocator); | ||
| 22 | defer data.deinit(); | ||
| 23 | |||
| 24 | var iter = grapheme.Iterator.init(peek_seq, &data); | ||
| 25 | const peek_a = iter.peek().?; | ||
| 26 | const next_a = iter.next().?; | ||
| 27 | try std.testing.expectEqual(peek_a, next_a); | ||
| 28 | try std.testing.expectEqualStrings("a", peek_a.bytes(peek_seq)); | ||
| 29 | const peek_d1 = iter.peek().?; | ||
| 30 | const peek_d2 = iter.peek().?; | ||
| 31 | try std.testing.expectEqual(peek_d1, peek_d2); | ||
| 32 | const next_d = iter.next().?; | ||
| 33 | try std.testing.expectEqual(peek_d2, next_d); | ||
| 34 | try std.testing.expectEqual(iter.peek(), iter.next()); | ||
| 35 | try std.testing.expectEqual(iter.peek(), iter.next()); | ||
| 36 | try std.testing.expectEqual(null, iter.peek()); | ||
| 37 | try std.testing.expectEqual(null, iter.peek()); | ||
| 38 | try std.testing.expectEqual(iter.peek(), iter.next()); | ||
| 39 | } | ||
| 40 | |||
| 15 | test "Unicode normalization tests" { | 41 | test "Unicode normalization tests" { |
| 16 | var arena = heap.ArenaAllocator.init(testing.allocator); | 42 | var arena = heap.ArenaAllocator.init(testing.allocator); |
| 17 | defer arena.deinit(); | 43 | defer arena.deinit(); |