From 736b4ccce2384c8f96e63d9c49ab4d6aee1d65a5 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Thu, 15 May 2025 10:57:33 -0400 Subject: wordAtIndex passes conformance I removed the initAtIndex functions from the public vocabulary, because the last couple of days of sweat and blood prove that it's hard to use correctly. That's probably it for WordBreak, now to fix the overlong bug on v0.14 and get this integrated with the new reverse grapheme iterator. --- src/unicode_tests.zig | 76 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 13 deletions(-) (limited to 'src/unicode_tests.zig') diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig index ef459bf..8b02e98 100644 --- a/src/unicode_tests.zig +++ b/src/unicode_tests.zig @@ -222,32 +222,58 @@ test "Segmentation Word Iterator" { try want.append(Word{ .len = gc_len, .offset = bytes_index }); bytes_index += cp_index; } + const this_str = all_bytes.items; + { - var iter = wb.iterator(all_bytes.items); + var iter = wb.iterator(this_str); var peeked: ?Word = iter.peek(); // Check. - for (want.items, 1..) |want_word, i| { + for (want.items, 1..) |want_word, idx| { const got_word = (iter.next()).?; std.testing.expectEqualStrings( - want_word.bytes(all_bytes.items), - got_word.bytes(all_bytes.items), + want_word.bytes(this_str), + got_word.bytes(this_str), ) catch |err| { - debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, i }); + debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, idx }); return err; }; std.testing.expectEqualStrings( - peeked.?.bytes(all_bytes.items), - got_word.bytes(all_bytes.items), + peeked.?.bytes(this_str), + got_word.bytes(this_str), ) catch |err| { - debug.print("Peek != word on line {d} #{d}\n", .{ line_iter.line, i }); + debug.print("Peek != word on line {d} #{d}\n", .{ line_iter.line, idx }); return err; }; + var r_iter = iter.reverseIterator(); + const if_r_word = r_iter.prev(); + if (if_r_word) |r_word| { + std.testing.expectEqualStrings( + want_word.bytes(this_str), + r_word.bytes(this_str), + ) catch |err| { + debug.print("Reversal Error on line {d}, #{d}\n", .{ line_iter.line, idx }); + return err; + }; + } else { + try testing.expect(false); + } + for (got_word.offset..got_word.offset + got_word.len) |i| { + const this_word = wb.wordAtIndex(this_str, i); + std.testing.expectEqualSlices( + u8, + got_word.bytes(this_str), + this_word.bytes(this_str), + ) catch |err| { + debug.print("Wrong word on line {d} #{d} offset {d}\n", .{ line_iter.line, idx + 1, i }); + return err; + }; + } peeked = iter.peek(); } } { - var r_iter = wb.reverseIterator(all_bytes.items); + var r_iter = wb.reverseIterator(this_str); var peeked: ?Word = r_iter.peek(); var idx = want.items.len - 1; @@ -256,19 +282,43 @@ test "Segmentation Word Iterator" { const got_word = r_iter.prev().?; std.testing.expectEqualSlices( u8, - want_word.bytes(all_bytes.items), - got_word.bytes(all_bytes.items), + want_word.bytes(this_str), + got_word.bytes(this_str), ) catch |err| { debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, idx + 1 }); return err; }; std.testing.expectEqualStrings( - peeked.?.bytes(all_bytes.items), - got_word.bytes(all_bytes.items), + peeked.?.bytes(this_str), + got_word.bytes(this_str), ) catch |err| { debug.print("Peek != word on line {d} #{d}\n", .{ line_iter.line, idx + 1 }); return err; }; + var f_iter = r_iter.forwardIterator(); + const if_f_word = f_iter.next(); + if (if_f_word) |f_word| { + std.testing.expectEqualStrings( + want_word.bytes(this_str), + f_word.bytes(this_str), + ) catch |err| { + debug.print("Reversal Error on line {d}, #{d}\n", .{ line_iter.line, idx }); + return err; + }; + } else { + try testing.expect(false); + } + for (got_word.offset..got_word.offset + got_word.len) |i| { + const this_word = wb.wordAtIndex(this_str, i); + std.testing.expectEqualSlices( + u8, + got_word.bytes(this_str), + this_word.bytes(this_str), + ) catch |err| { + debug.print("Wrong word on line {d} #{d} offset {d}\n", .{ line_iter.line, idx + 1, i }); + return err; + }; + } peeked = r_iter.peek(); if (idx == 0) break; } -- cgit v1.2.3