summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Sam Atman2025-05-12 12:57:04 -0400
committerGravatar Sam Atman2025-05-15 15:31:15 -0400
commit04123c2280088acbe4501bbe4c314ca64ff27dab (patch)
tree4161453ecdc6b1679ebf6f092107e7a31e2e6180
parentRefactor in unicode_tests (diff)
downloadzg-04123c2280088acbe4501bbe4c314ca64ff27dab.tar.gz
zg-04123c2280088acbe4501bbe4c314ca64ff27dab.tar.xz
zg-04123c2280088acbe4501bbe4c314ca64ff27dab.zip
Vastly simplify peek()
Idiomatic Zig takes awhile, what can I say (yes I wrote the first one).
-rw-r--r--src/Graphemes.zig63
1 files changed, 3 insertions, 60 deletions
diff --git a/src/Graphemes.zig b/src/Graphemes.zig
index 7bf328a..1ce1ea6 100644
--- a/src/Graphemes.zig
+++ b/src/Graphemes.zig
@@ -99,7 +99,7 @@ pub const Gbp = enum {
99 99
100/// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes. 100/// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes.
101pub const Grapheme = struct { 101pub const Grapheme = struct {
102 len: u8, 102 len: u32,
103 offset: u32, 103 offset: u32,
104 104
105 /// `bytes` returns the slice of bytes that correspond to 105 /// `bytes` returns the slice of bytes that correspond to
@@ -173,69 +173,12 @@ pub const Iterator = struct {
173 const saved_cp_iter = self.cp_iter; 173 const saved_cp_iter = self.cp_iter;
174 const s0 = self.buf[0]; 174 const s0 = self.buf[0];
175 const s1 = self.buf[1]; 175 const s1 = self.buf[1];
176 176 defer {
177 self.advance();
178
179 // If no more
180 if (self.buf[0] == null) {
181 self.cp_iter = saved_cp_iter;
182 self.buf[0] = s0;
183 self.buf[1] = s1;
184 return null;
185 }
186 // If last one
187 if (self.buf[1] == null) {
188 const len = self.buf[0].?.len;
189 const offset = self.buf[0].?.offset;
190 self.cp_iter = saved_cp_iter; 177 self.cp_iter = saved_cp_iter;
191 self.buf[0] = s0; 178 self.buf[0] = s0;
192 self.buf[1] = s1; 179 self.buf[1] = s1;
193 return Grapheme{ .len = len, .offset = offset };
194 } 180 }
195 // If ASCII 181 return self.next();
196 if (self.buf[0].?.code != '\r' and self.buf[0].?.code < 128 and self.buf[1].?.code < 128) {
197 const len = self.buf[0].?.len;
198 const offset = self.buf[0].?.offset;
199 self.cp_iter = saved_cp_iter;
200 self.buf[0] = s0;
201 self.buf[1] = s1;
202 return Grapheme{ .len = len, .offset = offset };
203 }
204
205 const gc_start = self.buf[0].?.offset;
206 var gc_len: u8 = self.buf[0].?.len;
207 var state = State{};
208
209 if (graphemeBreak(
210 self.buf[0].?.code,
211 self.buf[1].?.code,
212 self.data,
213 &state,
214 )) {
215 self.cp_iter = saved_cp_iter;
216 self.buf[0] = s0;
217 self.buf[1] = s1;
218 return Grapheme{ .len = gc_len, .offset = gc_start };
219 }
220
221 while (true) {
222 self.advance();
223 if (self.buf[0] == null) break;
224
225 gc_len += self.buf[0].?.len;
226
227 if (graphemeBreak(
228 self.buf[0].?.code,
229 if (self.buf[1]) |ncp| ncp.code else 0,
230 self.data,
231 &state,
232 )) break;
233 }
234 self.cp_iter = saved_cp_iter;
235 self.buf[0] = s0;
236 self.buf[1] = s1;
237
238 return Grapheme{ .len = gc_len, .offset = gc_start };
239 } 182 }
240}; 183};
241 184