From 04123c2280088acbe4501bbe4c314ca64ff27dab Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Mon, 12 May 2025 12:57:04 -0400 Subject: Vastly simplify peek() Idiomatic Zig takes awhile, what can I say (yes I wrote the first one). --- src/Graphemes.zig | 63 +++---------------------------------------------------- 1 file changed, 3 insertions(+), 60 deletions(-) (limited to 'src') diff --git a/src/Graphemes.zig b/src/Graphemes.zig index 7bf328a..1ce1ea6 100644 --- a/src/Graphemes.zig +++ b/src/Graphemes.zig @@ -99,7 +99,7 @@ pub const Gbp = enum { /// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes. pub const Grapheme = struct { - len: u8, + len: u32, offset: u32, /// `bytes` returns the slice of bytes that correspond to @@ -173,69 +173,12 @@ pub const Iterator = struct { const saved_cp_iter = self.cp_iter; const s0 = self.buf[0]; const s1 = self.buf[1]; - - self.advance(); - - // If no more - if (self.buf[0] == null) { - self.cp_iter = saved_cp_iter; - self.buf[0] = s0; - self.buf[1] = s1; - return null; - } - // If last one - if (self.buf[1] == null) { - const len = self.buf[0].?.len; - const offset = self.buf[0].?.offset; + defer { self.cp_iter = saved_cp_iter; self.buf[0] = s0; self.buf[1] = s1; - return Grapheme{ .len = len, .offset = offset }; } - // If ASCII - if (self.buf[0].?.code != '\r' and self.buf[0].?.code < 128 and self.buf[1].?.code < 128) { - const len = self.buf[0].?.len; - const offset = self.buf[0].?.offset; - self.cp_iter = saved_cp_iter; - self.buf[0] = s0; - self.buf[1] = s1; - return Grapheme{ .len = len, .offset = offset }; - } - - const gc_start = self.buf[0].?.offset; - var gc_len: u8 = self.buf[0].?.len; - var state = State{}; - - if (graphemeBreak( - self.buf[0].?.code, - self.buf[1].?.code, - self.data, - &state, - )) { - self.cp_iter = saved_cp_iter; - self.buf[0] = s0; - self.buf[1] = s1; - return Grapheme{ .len = gc_len, .offset = gc_start }; - } - - while (true) { - self.advance(); - if (self.buf[0] == null) break; - - gc_len += self.buf[0].?.len; - - if (graphemeBreak( - self.buf[0].?.code, - if (self.buf[1]) |ncp| ncp.code else 0, - self.data, - &state, - )) break; - } - self.cp_iter = saved_cp_iter; - self.buf[0] = s0; - self.buf[1] = s1; - - return Grapheme{ .len = gc_len, .offset = gc_start }; + return self.next(); } }; -- cgit v1.2.3