diff options
| author | 2025-05-12 12:57:04 -0400 | |
|---|---|---|
| committer | 2025-05-15 15:31:15 -0400 | |
| commit | 04123c2280088acbe4501bbe4c314ca64ff27dab (patch) | |
| tree | 4161453ecdc6b1679ebf6f092107e7a31e2e6180 | |
| parent | Refactor in unicode_tests (diff) | |
| download | zg-04123c2280088acbe4501bbe4c314ca64ff27dab.tar.gz zg-04123c2280088acbe4501bbe4c314ca64ff27dab.tar.xz zg-04123c2280088acbe4501bbe4c314ca64ff27dab.zip | |
Vastly simplify peek()
Idiomatic Zig takes awhile, what can I say (yes I wrote the first one).
| -rw-r--r-- | src/Graphemes.zig | 63 |
1 files changed, 3 insertions, 60 deletions
diff --git a/src/Graphemes.zig b/src/Graphemes.zig index 7bf328a..1ce1ea6 100644 --- a/src/Graphemes.zig +++ b/src/Graphemes.zig | |||
| @@ -99,7 +99,7 @@ pub const Gbp = enum { | |||
| 99 | 99 | ||
| 100 | /// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes. | 100 | /// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes. |
| 101 | pub const Grapheme = struct { | 101 | pub const Grapheme = struct { |
| 102 | len: u8, | 102 | len: u32, |
| 103 | offset: u32, | 103 | offset: u32, |
| 104 | 104 | ||
| 105 | /// `bytes` returns the slice of bytes that correspond to | 105 | /// `bytes` returns the slice of bytes that correspond to |
| @@ -173,69 +173,12 @@ pub const Iterator = struct { | |||
| 173 | const saved_cp_iter = self.cp_iter; | 173 | const saved_cp_iter = self.cp_iter; |
| 174 | const s0 = self.buf[0]; | 174 | const s0 = self.buf[0]; |
| 175 | const s1 = self.buf[1]; | 175 | const s1 = self.buf[1]; |
| 176 | 176 | defer { | |
| 177 | self.advance(); | ||
| 178 | |||
| 179 | // If no more | ||
| 180 | if (self.buf[0] == null) { | ||
| 181 | self.cp_iter = saved_cp_iter; | ||
| 182 | self.buf[0] = s0; | ||
| 183 | self.buf[1] = s1; | ||
| 184 | return null; | ||
| 185 | } | ||
| 186 | // If last one | ||
| 187 | if (self.buf[1] == null) { | ||
| 188 | const len = self.buf[0].?.len; | ||
| 189 | const offset = self.buf[0].?.offset; | ||
| 190 | self.cp_iter = saved_cp_iter; | 177 | self.cp_iter = saved_cp_iter; |
| 191 | self.buf[0] = s0; | 178 | self.buf[0] = s0; |
| 192 | self.buf[1] = s1; | 179 | self.buf[1] = s1; |
| 193 | return Grapheme{ .len = len, .offset = offset }; | ||
| 194 | } | 180 | } |
| 195 | // If ASCII | 181 | return self.next(); |
| 196 | if (self.buf[0].?.code != '\r' and self.buf[0].?.code < 128 and self.buf[1].?.code < 128) { | ||
| 197 | const len = self.buf[0].?.len; | ||
| 198 | const offset = self.buf[0].?.offset; | ||
| 199 | self.cp_iter = saved_cp_iter; | ||
| 200 | self.buf[0] = s0; | ||
| 201 | self.buf[1] = s1; | ||
| 202 | return Grapheme{ .len = len, .offset = offset }; | ||
| 203 | } | ||
| 204 | |||
| 205 | const gc_start = self.buf[0].?.offset; | ||
| 206 | var gc_len: u8 = self.buf[0].?.len; | ||
| 207 | var state = State{}; | ||
| 208 | |||
| 209 | if (graphemeBreak( | ||
| 210 | self.buf[0].?.code, | ||
| 211 | self.buf[1].?.code, | ||
| 212 | self.data, | ||
| 213 | &state, | ||
| 214 | )) { | ||
| 215 | self.cp_iter = saved_cp_iter; | ||
| 216 | self.buf[0] = s0; | ||
| 217 | self.buf[1] = s1; | ||
| 218 | return Grapheme{ .len = gc_len, .offset = gc_start }; | ||
| 219 | } | ||
| 220 | |||
| 221 | while (true) { | ||
| 222 | self.advance(); | ||
| 223 | if (self.buf[0] == null) break; | ||
| 224 | |||
| 225 | gc_len += self.buf[0].?.len; | ||
| 226 | |||
| 227 | if (graphemeBreak( | ||
| 228 | self.buf[0].?.code, | ||
| 229 | if (self.buf[1]) |ncp| ncp.code else 0, | ||
| 230 | self.data, | ||
| 231 | &state, | ||
| 232 | )) break; | ||
| 233 | } | ||
| 234 | self.cp_iter = saved_cp_iter; | ||
| 235 | self.buf[0] = s0; | ||
| 236 | self.buf[1] = s1; | ||
| 237 | |||
| 238 | return Grapheme{ .len = gc_len, .offset = gc_start }; | ||
| 239 | } | 182 | } |
| 240 | }; | 183 | }; |
| 241 | 184 | ||