Hooked up break test, some bugs squashed

The handling of ignorables is really different, because they 'adhere' to the future of the iteration, not the past.
author: Sam Atman 2025-05-13 17:19:56 -0400
committer: Sam Atman 2025-05-15 15:32:38 -0400
commit: 5cc8c1875a21bfb398e6685b03a29d6ba1cbf74a (patch)
tree: f46287fbc0d92238644c23d0b176354567b647d1 /src/unicode_tests.zig
parent: Reverse Word Iterator (diff)
download: zg-5cc8c1875a21bfb398e6685b03a29d6ba1cbf74a.tar.gz
zg-5cc8c1875a21bfb398e6685b03a29d6ba1cbf74a.tar.xz
zg-5cc8c1875a21bfb398e6685b03a29d6ba1cbf74a.zip
1 files changed, 34 insertions, 15 deletions
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig
index 59f0c6f..8661bfd 100644
--- a/src/unicode_tests.zig
+++ b/src/unicode_tests.zig
@@ -195,7 +195,7 @@ test "Segmentation Word Iterator" {
            line = line[0..final];
        }
        // Iterate over fields.
-        var want = std.ArrayList(Grapheme).init(allocator);
+        var want = std.ArrayList(Word).init(allocator);
        defer want.deinit();
        var all_bytes = std.ArrayList(u8).init(allocator);
@@ -219,22 +219,40 @@ test "Segmentation Word Iterator" {
                gc_len += len;
            }
-            try want.append(Grapheme{ .len = gc_len, .offset = bytes_index });
+            try want.append(Word{ .len = gc_len, .offset = bytes_index });
            bytes_index += cp_index;
        }
+        {
-        var iter = wb.iterator(all_bytes.items);
+            var iter = wb.iterator(all_bytes.items);
-        // Check.
+            // Check.
-        for (want.items, 1..) |want_word, i| {
+            for (want.items, 1..) |want_word, i| {
-            const got_word = (iter.next()).?;
+                const got_word = (iter.next()).?;
-            std.testing.expectEqualStrings(
+                std.testing.expectEqualStrings(
-                want_word.bytes(all_bytes.items),
+                    want_word.bytes(all_bytes.items),
-                got_word.bytes(all_bytes.items),
+                    got_word.bytes(all_bytes.items),
-            ) catch |err| {
+                ) catch |err| {
-                debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, i });
+                    debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, i });
-                return err;
+                    return err;
-            };
+                };
+            }
+        }
+        {
+            var r_iter = wb.reverseIterator(all_bytes.items);
+            var idx = want.items.len - 1;
+            while (true) : (idx -= 1) {
+                const want_word = want.items[idx];
+                const got_word = r_iter.prev().?;
+                std.testing.expectEqualSlices(
+                    u8,
+                    want_word.bytes(all_bytes.items),
+                    got_word.bytes(all_bytes.items),
+                ) catch |err| {
+                    debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, idx + 1 });
+                    return err;
+                };
+                if (idx == 0) break;
+            }
        }
    }
 }
@@ -277,3 +295,4 @@ const GraphemeIterator = @import("Graphemes").Iterator;
 const Normalize = @import("Normalize");
 const WordBreak = @import("WordBreak");
+const Word = WordBreak.Word;
author	Sam Atman	2025-05-13 17:19:56 -0400
committer	Sam Atman	2025-05-15 15:32:38 -0400
commit	5cc8c1875a21bfb398e6685b03a29d6ba1cbf74a (patch)
tree	f46287fbc0d92238644c23d0b176354567b647d1 /src/unicode_tests.zig
parent	Reverse Word Iterator (diff)
download	zg-5cc8c1875a21bfb398e6685b03a29d6ba1cbf74a.tar.gz zg-5cc8c1875a21bfb398e6685b03a29d6ba1cbf74a.tar.xz zg-5cc8c1875a21bfb398e6685b03a29d6ba1cbf74a.zip

diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig index 59f0c6f..8661bfd 100644 --- a/src/unicode_tests.zig +++ b/src/unicode_tests.zig
@@ -195,7 +195,7 @@ test "Segmentation Word Iterator" {
195	line = line[0..final];	195	line = line[0..final];
196	}	196	}
197	// Iterate over fields.	197	// Iterate over fields.
198	var want = std.ArrayList(Grapheme).init(allocator);	198	var want = std.ArrayList(Word).init(allocator);
199	defer want.deinit();	199	defer want.deinit();
200		200
201	var all_bytes = std.ArrayList(u8).init(allocator);	201	var all_bytes = std.ArrayList(u8).init(allocator);
@@ -219,22 +219,40 @@ test "Segmentation Word Iterator" {
219	gc_len += len;	219	gc_len += len;
220	}	220	}
221		221
222	try want.append(Grapheme{ .len = gc_len, .offset = bytes_index });	222	try want.append(Word{ .len = gc_len, .offset = bytes_index });
223	bytes_index += cp_index;	223	bytes_index += cp_index;
224	}	224	}
225		225	{
226	var iter = wb.iterator(all_bytes.items);	226	var iter = wb.iterator(all_bytes.items);
227		227
228	// Check.	228	// Check.
229	for (want.items, 1..) \|want_word, i\| {	229	for (want.items, 1..) \|want_word, i\| {
230	const got_word = (iter.next()).?;	230	const got_word = (iter.next()).?;
231	std.testing.expectEqualStrings(	231	std.testing.expectEqualStrings(
232	want_word.bytes(all_bytes.items),	232	want_word.bytes(all_bytes.items),
233	got_word.bytes(all_bytes.items),	233	got_word.bytes(all_bytes.items),
234	) catch \|err\| {	234	) catch \|err\| {
235	debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, i });	235	debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, i });
236	return err;	236	return err;
237	};	237	};
		238	}
		239	}
		240	{
		241	var r_iter = wb.reverseIterator(all_bytes.items);
		242	var idx = want.items.len - 1;
		243	while (true) : (idx -= 1) {
		244	const want_word = want.items[idx];
		245	const got_word = r_iter.prev().?;
		246	std.testing.expectEqualSlices(
		247	u8,
		248	want_word.bytes(all_bytes.items),
		249	got_word.bytes(all_bytes.items),
		250	) catch \|err\| {
		251	debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, idx + 1 });
		252	return err;
		253	};
		254	if (idx == 0) break;
		255	}
238	}	256	}
239	}	257	}
240	}	258	}
@@ -277,3 +295,4 @@ const GraphemeIterator = @import("Graphemes").Iterator;
277	const Normalize = @import("Normalize");	295	const Normalize = @import("Normalize");
278		296
279	const WordBreak = @import("WordBreak");	297	const WordBreak = @import("WordBreak");
		298	const Word = WordBreak.Word;