diff options
| author | 2024-11-02 10:31:28 -0400 | |
|---|---|---|
| committer | 2024-11-02 10:31:28 -0400 | |
| commit | bf319e504e5476d9b0d2dec3e2f8d81ef6439ce4 (patch) | |
| tree | 0424b9f9e54972837652042dc858dfe5ba12b5de /src/grapheme.zig | |
| parent | Replace deprecated uses of std.mem.split (diff) | |
| download | zg-bf319e504e5476d9b0d2dec3e2f8d81ef6439ce4.tar.gz zg-bf319e504e5476d9b0d2dec3e2f8d81ef6439ce4.tar.xz zg-bf319e504e5476d9b0d2dec3e2f8d81ef6439ce4.zip | |
Add peek() to Grapheme.Iterator
This does the expected thing: returns the next ?Grapheme without
mutation of the iteration state.
Diffstat (limited to 'src/grapheme.zig')
| -rw-r--r-- | src/grapheme.zig | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/src/grapheme.zig b/src/grapheme.zig index 911c856..7538f5b 100644 --- a/src/grapheme.zig +++ b/src/grapheme.zig | |||
| @@ -77,6 +77,75 @@ pub const Iterator = struct { | |||
| 77 | 77 | ||
| 78 | return Grapheme{ .len = gc_len, .offset = gc_start }; | 78 | return Grapheme{ .len = gc_len, .offset = gc_start }; |
| 79 | } | 79 | } |
| 80 | |||
| 81 | pub fn peek(self: *Self) ?Grapheme { | ||
| 82 | const saved_cp_iter = self.cp_iter; | ||
| 83 | const s0 = self.buf[0]; | ||
| 84 | const s1 = self.buf[1]; | ||
| 85 | |||
| 86 | self.advance(); | ||
| 87 | |||
| 88 | // If no more | ||
| 89 | if (self.buf[0] == null) { | ||
| 90 | self.cp_iter = saved_cp_iter; | ||
| 91 | self.buf[0] = s0; | ||
| 92 | self.buf[1] = s1; | ||
| 93 | return null; | ||
| 94 | } | ||
| 95 | // If last one | ||
| 96 | if (self.buf[1] == null) { | ||
| 97 | const len = self.buf[0].?.len; | ||
| 98 | const offset = self.buf[0].?.offset; | ||
| 99 | self.cp_iter = saved_cp_iter; | ||
| 100 | self.buf[0] = s0; | ||
| 101 | self.buf[1] = s1; | ||
| 102 | return Grapheme{ .len = len, .offset = offset }; | ||
| 103 | } | ||
| 104 | // If ASCII | ||
| 105 | if (self.buf[0].?.code != '\r' and self.buf[0].?.code < 128 and self.buf[1].?.code < 128) { | ||
| 106 | const len = self.buf[0].?.len; | ||
| 107 | const offset = self.buf[0].?.offset; | ||
| 108 | self.cp_iter = saved_cp_iter; | ||
| 109 | self.buf[0] = s0; | ||
| 110 | self.buf[1] = s1; | ||
| 111 | return Grapheme{ .len = len, .offset = offset }; | ||
| 112 | } | ||
| 113 | |||
| 114 | const gc_start = self.buf[0].?.offset; | ||
| 115 | var gc_len: u8 = self.buf[0].?.len; | ||
| 116 | var state = State{}; | ||
| 117 | |||
| 118 | if (graphemeBreak( | ||
| 119 | self.buf[0].?.code, | ||
| 120 | self.buf[1].?.code, | ||
| 121 | self.data, | ||
| 122 | &state, | ||
| 123 | )) { | ||
| 124 | self.cp_iter = saved_cp_iter; | ||
| 125 | self.buf[0] = s0; | ||
| 126 | self.buf[1] = s1; | ||
| 127 | return Grapheme{ .len = gc_len, .offset = gc_start }; | ||
| 128 | } | ||
| 129 | |||
| 130 | while (true) { | ||
| 131 | self.advance(); | ||
| 132 | if (self.buf[0] == null) break; | ||
| 133 | |||
| 134 | gc_len += self.buf[0].?.len; | ||
| 135 | |||
| 136 | if (graphemeBreak( | ||
| 137 | self.buf[0].?.code, | ||
| 138 | if (self.buf[1]) |ncp| ncp.code else 0, | ||
| 139 | self.data, | ||
| 140 | &state, | ||
| 141 | )) break; | ||
| 142 | } | ||
| 143 | self.cp_iter = saved_cp_iter; | ||
| 144 | self.buf[0] = s0; | ||
| 145 | self.buf[1] = s1; | ||
| 146 | |||
| 147 | return Grapheme{ .len = gc_len, .offset = gc_start }; | ||
| 148 | } | ||
| 80 | }; | 149 | }; |
| 81 | 150 | ||
| 82 | // Predicates | 151 | // Predicates |