diff options
| author | 2025-05-13 17:19:56 -0400 | |
|---|---|---|
| committer | 2025-05-15 15:32:38 -0400 | |
| commit | 5cc8c1875a21bfb398e6685b03a29d6ba1cbf74a (patch) | |
| tree | f46287fbc0d92238644c23d0b176354567b647d1 /src/unicode_tests.zig | |
| parent | Reverse Word Iterator (diff) | |
| download | zg-5cc8c1875a21bfb398e6685b03a29d6ba1cbf74a.tar.gz zg-5cc8c1875a21bfb398e6685b03a29d6ba1cbf74a.tar.xz zg-5cc8c1875a21bfb398e6685b03a29d6ba1cbf74a.zip | |
Hooked up break test, some bugs squashed
The handling of ignorables is really different, because they 'adhere'
to the future of the iteration, not the past.
Diffstat (limited to 'src/unicode_tests.zig')
| -rw-r--r-- | src/unicode_tests.zig | 49 |
1 files changed, 34 insertions, 15 deletions
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig index 59f0c6f..8661bfd 100644 --- a/src/unicode_tests.zig +++ b/src/unicode_tests.zig | |||
| @@ -195,7 +195,7 @@ test "Segmentation Word Iterator" { | |||
| 195 | line = line[0..final]; | 195 | line = line[0..final]; |
| 196 | } | 196 | } |
| 197 | // Iterate over fields. | 197 | // Iterate over fields. |
| 198 | var want = std.ArrayList(Grapheme).init(allocator); | 198 | var want = std.ArrayList(Word).init(allocator); |
| 199 | defer want.deinit(); | 199 | defer want.deinit(); |
| 200 | 200 | ||
| 201 | var all_bytes = std.ArrayList(u8).init(allocator); | 201 | var all_bytes = std.ArrayList(u8).init(allocator); |
| @@ -219,22 +219,40 @@ test "Segmentation Word Iterator" { | |||
| 219 | gc_len += len; | 219 | gc_len += len; |
| 220 | } | 220 | } |
| 221 | 221 | ||
| 222 | try want.append(Grapheme{ .len = gc_len, .offset = bytes_index }); | 222 | try want.append(Word{ .len = gc_len, .offset = bytes_index }); |
| 223 | bytes_index += cp_index; | 223 | bytes_index += cp_index; |
| 224 | } | 224 | } |
| 225 | 225 | { | |
| 226 | var iter = wb.iterator(all_bytes.items); | 226 | var iter = wb.iterator(all_bytes.items); |
| 227 | 227 | ||
| 228 | // Check. | 228 | // Check. |
| 229 | for (want.items, 1..) |want_word, i| { | 229 | for (want.items, 1..) |want_word, i| { |
| 230 | const got_word = (iter.next()).?; | 230 | const got_word = (iter.next()).?; |
| 231 | std.testing.expectEqualStrings( | 231 | std.testing.expectEqualStrings( |
| 232 | want_word.bytes(all_bytes.items), | 232 | want_word.bytes(all_bytes.items), |
| 233 | got_word.bytes(all_bytes.items), | 233 | got_word.bytes(all_bytes.items), |
| 234 | ) catch |err| { | 234 | ) catch |err| { |
| 235 | debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, i }); | 235 | debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, i }); |
| 236 | return err; | 236 | return err; |
| 237 | }; | 237 | }; |
| 238 | } | ||
| 239 | } | ||
| 240 | { | ||
| 241 | var r_iter = wb.reverseIterator(all_bytes.items); | ||
| 242 | var idx = want.items.len - 1; | ||
| 243 | while (true) : (idx -= 1) { | ||
| 244 | const want_word = want.items[idx]; | ||
| 245 | const got_word = r_iter.prev().?; | ||
| 246 | std.testing.expectEqualSlices( | ||
| 247 | u8, | ||
| 248 | want_word.bytes(all_bytes.items), | ||
| 249 | got_word.bytes(all_bytes.items), | ||
| 250 | ) catch |err| { | ||
| 251 | debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, idx + 1 }); | ||
| 252 | return err; | ||
| 253 | }; | ||
| 254 | if (idx == 0) break; | ||
| 255 | } | ||
| 238 | } | 256 | } |
| 239 | } | 257 | } |
| 240 | } | 258 | } |
| @@ -277,3 +295,4 @@ const GraphemeIterator = @import("Graphemes").Iterator; | |||
| 277 | const Normalize = @import("Normalize"); | 295 | const Normalize = @import("Normalize"); |
| 278 | 296 | ||
| 279 | const WordBreak = @import("WordBreak"); | 297 | const WordBreak = @import("WordBreak"); |
| 298 | const Word = WordBreak.Word; | ||