From 7e4cc4641e966a1aa32fa021a9476af3b9813485 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Tue, 13 May 2025 13:38:28 -0400 Subject: Various small iterator improvements --- src/code_point.zig | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 4 deletions(-) (limited to 'src/code_point.zig') diff --git a/src/code_point.zig b/src/code_point.zig index a319d36..b3c9aea 100644 --- a/src/code_point.zig +++ b/src/code_point.zig @@ -48,12 +48,22 @@ pub fn decodeAtCursor(bytes: []const u8, cursor: *u32) ?CodePoint { }; // Multibyte +<<<<<<< HEAD // Second: var class: u4 = @intCast(u8dfa[byte]); var st: u32 = state_dfa[class]; if (st == RUNE_REJECT or cursor.* == bytes.len) { @branchHint(.cold); // First one is never a truncation +||||||| parent of ad4b046 (Various small iterator improvements) + // Return replacement if we don' have a complete codepoint remaining. Consumes only one byte + if (cp.len > bytes.len) { + // Unicode replacement code point. +======= + // Return replacement if we don't have a complete codepoint remaining. Consumes only one byte. + if (cp.len > bytes.len) { + // Unicode replacement code point. +>>>>>>> ad4b046 (Various small iterator improvements) return .{ .code = 0xfffd, .len = 1, @@ -158,10 +168,19 @@ pub const Iterator = struct { return decodeAtCursor(self.bytes, &self.i); } - pub fn peek(self: *Iterator) ?CodePoint { - const saved_i = self.i; - defer self.i = saved_i; - return self.next(); + pub fn peek(iter: *Iterator) ?CodePoint { + const saved_i = iter.i; + defer iter.i = saved_i; + return iter.next(); + } + + /// Create a backward iterator at this point. It will repeat + /// the last CodePoint seen. + pub fn reverseIterator(iter: *Iterator) ReverseIterator { + if (iter.i == iter.bytes.len) { + return .init(iter.bytes); + } + return .{ .i = iter.i, .bytes = iter.bytes }; } }; @@ -266,6 +285,17 @@ pub const ReverseIterator = struct { defer iter.i = saved_i; return iter.prev(); } + + /// Create a forward iterator at this point. It will repeat the + /// last CodePoint seen. + pub fn forwardIterator(iter: *ReverseIterator) Iterator { + if (iter.i) |i| { + var fwd: Iterator = .{ .i = i, .bytes = iter.bytes }; + _ = fwd.next(); + return fwd; + } + return .{ .i = 0, .bytes = iter.bytes }; + } }; inline fn followbyte(b: u8) bool { @@ -410,6 +440,23 @@ test ReverseIterator { try testing.expectEqual(@as(?CodePoint, null), r_iter.prev()); try testing.expectEqual(@as(?CodePoint, null), r_iter.prev()); } + { + var r_iter: ReverseIterator = .init("123"); + try testing.expectEqual(@as(u21, '3'), r_iter.prev().?.code); + try testing.expectEqual(@as(u21, '2'), r_iter.prev().?.code); + try testing.expectEqual(@as(u21, '1'), r_iter.prev().?.code); + var iter = r_iter.forwardIterator(); + try testing.expectEqual(@as(u21, '1'), iter.next().?.code); + try testing.expectEqual(@as(u21, '2'), iter.next().?.code); + try testing.expectEqual(@as(u21, '3'), iter.next().?.code); + r_iter = iter.reverseIterator(); + try testing.expectEqual(@as(u21, '3'), r_iter.prev().?.code); + try testing.expectEqual(@as(u21, '2'), r_iter.prev().?.code); + iter = r_iter.forwardIterator(); + r_iter = iter.reverseIterator(); + try testing.expectEqual(@as(u21, '2'), iter.next().?.code); + try testing.expectEqual(@as(u21, '2'), r_iter.prev().?.code); + } } const std = @import("std"); -- cgit v1.2.3