From 75dedc6aec86c1a4f43d0d7cd120abf68b5baeb1 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Fri, 9 May 2025 12:58:23 -0400 Subject: Add reverse CodePoint iterator --- src/code_point.zig | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) (limited to 'src/code_point.zig') diff --git a/src/code_point.zig b/src/code_point.zig index fe7ad6e..a319d36 100644 --- a/src/code_point.zig +++ b/src/code_point.zig @@ -233,6 +233,45 @@ const class_mask: [12]u8 = .{ 0, }; +pub const ReverseIterator = struct { + bytes: []const u8, + i: ?u32, + + pub fn init(str: []const u8) ReverseIterator { + var r_iter: ReverseIterator = undefined; + r_iter.bytes = str; + r_iter.i = if (str.len == 0) 0 else @intCast(str.len - 1); + return r_iter; + } + + pub fn prev(iter: *ReverseIterator) ?CodePoint { + if (iter.i == null) return null; + var i_prev = iter.i.?; + + while (i_prev > 0) : (i_prev -= 1) { + if (!followbyte(iter.bytes[i_prev])) break; + if (i_prev == 0) break; + } + + if (i_prev > 0) + iter.i = i_prev - 1 + else + iter.i = null; + + return decode(iter.bytes[i_prev..], i_prev); + } + + pub fn peek(iter: *ReverseIterator) ?CodePoint { + const saved_i = iter.i; + defer iter.i = saved_i; + return iter.prev(); + } +}; + +inline fn followbyte(b: u8) bool { + return 0x80 <= b and b <= 0xbf; +} + test "decode" { const bytes = "🌩️"; const res = decode(bytes, 0); @@ -246,7 +285,7 @@ test "decode" { } } -test "peek" { +test Iterator { var iter = Iterator{ .bytes = "Hi" }; try expectEqual(@as(u21, 'H'), iter.next().?.code); @@ -346,6 +385,33 @@ test "truncation" { } } +test ReverseIterator { + { + var r_iter: ReverseIterator = .init("ABC"); + try testing.expectEqual(@as(u21, 'C'), r_iter.prev().?.code); + try testing.expectEqual(@as(u21, 'B'), r_iter.peek().?.code); + try testing.expectEqual(@as(u21, 'B'), r_iter.prev().?.code); + try testing.expectEqual(@as(u21, 'A'), r_iter.prev().?.code); + try testing.expectEqual(@as(?CodePoint, null), r_iter.peek()); + try testing.expectEqual(@as(?CodePoint, null), r_iter.prev()); + try testing.expectEqual(@as(?CodePoint, null), r_iter.prev()); + } + { + var r_iter: ReverseIterator = .init("∅δq🦾ă"); + try testing.expectEqual(@as(u21, 'ă'), r_iter.prev().?.code); + try testing.expectEqual(@as(u21, '🦾'), r_iter.prev().?.code); + try testing.expectEqual(@as(u21, 'q'), r_iter.prev().?.code); + try testing.expectEqual(@as(u21, 'δ'), r_iter.peek().?.code); + try testing.expectEqual(@as(u21, 'δ'), r_iter.prev().?.code); + try testing.expectEqual(@as(u21, '∅'), r_iter.peek().?.code); + try testing.expectEqual(@as(u21, '∅'), r_iter.peek().?.code); + try testing.expectEqual(@as(u21, '∅'), r_iter.prev().?.code); + try testing.expectEqual(@as(?CodePoint, null), r_iter.peek()); + try testing.expectEqual(@as(?CodePoint, null), r_iter.prev()); + try testing.expectEqual(@as(?CodePoint, null), r_iter.prev()); + } +} + const std = @import("std"); const testing = std.testing; const expect = testing.expect; -- cgit v1.2.3