diff options
| author | 2025-05-15 10:57:33 -0400 | |
|---|---|---|
| committer | 2025-05-15 15:32:43 -0400 | |
| commit | 736b4ccce2384c8f96e63d9c49ab4d6aee1d65a5 (patch) | |
| tree | 09cdc6762a519cd2f20efacfa4d1af082f983e85 /src/unicode_tests.zig | |
| parent | Rewrite wordAtIndex to use iterator flipping (diff) | |
| download | zg-736b4ccce2384c8f96e63d9c49ab4d6aee1d65a5.tar.gz zg-736b4ccce2384c8f96e63d9c49ab4d6aee1d65a5.tar.xz zg-736b4ccce2384c8f96e63d9c49ab4d6aee1d65a5.zip | |
wordAtIndex passes conformance
I removed the initAtIndex functions from the public vocabulary, because
the last couple of days of sweat and blood prove that it's hard to use
correctly.
That's probably it for WordBreak, now to fix the overlong bug on v0.14
and get this integrated with the new reverse grapheme iterator.
Diffstat (limited to 'src/unicode_tests.zig')
| -rw-r--r-- | src/unicode_tests.zig | 76 |
1 files changed, 63 insertions, 13 deletions
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig index ef459bf..8b02e98 100644 --- a/src/unicode_tests.zig +++ b/src/unicode_tests.zig | |||
| @@ -222,32 +222,58 @@ test "Segmentation Word Iterator" { | |||
| 222 | try want.append(Word{ .len = gc_len, .offset = bytes_index }); | 222 | try want.append(Word{ .len = gc_len, .offset = bytes_index }); |
| 223 | bytes_index += cp_index; | 223 | bytes_index += cp_index; |
| 224 | } | 224 | } |
| 225 | const this_str = all_bytes.items; | ||
| 226 | |||
| 225 | { | 227 | { |
| 226 | var iter = wb.iterator(all_bytes.items); | 228 | var iter = wb.iterator(this_str); |
| 227 | var peeked: ?Word = iter.peek(); | 229 | var peeked: ?Word = iter.peek(); |
| 228 | 230 | ||
| 229 | // Check. | 231 | // Check. |
| 230 | for (want.items, 1..) |want_word, i| { | 232 | for (want.items, 1..) |want_word, idx| { |
| 231 | const got_word = (iter.next()).?; | 233 | const got_word = (iter.next()).?; |
| 232 | std.testing.expectEqualStrings( | 234 | std.testing.expectEqualStrings( |
| 233 | want_word.bytes(all_bytes.items), | 235 | want_word.bytes(this_str), |
| 234 | got_word.bytes(all_bytes.items), | 236 | got_word.bytes(this_str), |
| 235 | ) catch |err| { | 237 | ) catch |err| { |
| 236 | debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, i }); | 238 | debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, idx }); |
| 237 | return err; | 239 | return err; |
| 238 | }; | 240 | }; |
| 239 | std.testing.expectEqualStrings( | 241 | std.testing.expectEqualStrings( |
| 240 | peeked.?.bytes(all_bytes.items), | 242 | peeked.?.bytes(this_str), |
| 241 | got_word.bytes(all_bytes.items), | 243 | got_word.bytes(this_str), |
| 242 | ) catch |err| { | 244 | ) catch |err| { |
| 243 | debug.print("Peek != word on line {d} #{d}\n", .{ line_iter.line, i }); | 245 | debug.print("Peek != word on line {d} #{d}\n", .{ line_iter.line, idx }); |
| 244 | return err; | 246 | return err; |
| 245 | }; | 247 | }; |
| 248 | var r_iter = iter.reverseIterator(); | ||
| 249 | const if_r_word = r_iter.prev(); | ||
| 250 | if (if_r_word) |r_word| { | ||
| 251 | std.testing.expectEqualStrings( | ||
| 252 | want_word.bytes(this_str), | ||
| 253 | r_word.bytes(this_str), | ||
| 254 | ) catch |err| { | ||
| 255 | debug.print("Reversal Error on line {d}, #{d}\n", .{ line_iter.line, idx }); | ||
| 256 | return err; | ||
| 257 | }; | ||
| 258 | } else { | ||
| 259 | try testing.expect(false); | ||
| 260 | } | ||
| 261 | for (got_word.offset..got_word.offset + got_word.len) |i| { | ||
| 262 | const this_word = wb.wordAtIndex(this_str, i); | ||
| 263 | std.testing.expectEqualSlices( | ||
| 264 | u8, | ||
| 265 | got_word.bytes(this_str), | ||
| 266 | this_word.bytes(this_str), | ||
| 267 | ) catch |err| { | ||
| 268 | debug.print("Wrong word on line {d} #{d} offset {d}\n", .{ line_iter.line, idx + 1, i }); | ||
| 269 | return err; | ||
| 270 | }; | ||
| 271 | } | ||
| 246 | peeked = iter.peek(); | 272 | peeked = iter.peek(); |
| 247 | } | 273 | } |
| 248 | } | 274 | } |
| 249 | { | 275 | { |
| 250 | var r_iter = wb.reverseIterator(all_bytes.items); | 276 | var r_iter = wb.reverseIterator(this_str); |
| 251 | var peeked: ?Word = r_iter.peek(); | 277 | var peeked: ?Word = r_iter.peek(); |
| 252 | var idx = want.items.len - 1; | 278 | var idx = want.items.len - 1; |
| 253 | 279 | ||
| @@ -256,19 +282,43 @@ test "Segmentation Word Iterator" { | |||
| 256 | const got_word = r_iter.prev().?; | 282 | const got_word = r_iter.prev().?; |
| 257 | std.testing.expectEqualSlices( | 283 | std.testing.expectEqualSlices( |
| 258 | u8, | 284 | u8, |
| 259 | want_word.bytes(all_bytes.items), | 285 | want_word.bytes(this_str), |
| 260 | got_word.bytes(all_bytes.items), | 286 | got_word.bytes(this_str), |
| 261 | ) catch |err| { | 287 | ) catch |err| { |
| 262 | debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, idx + 1 }); | 288 | debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, idx + 1 }); |
| 263 | return err; | 289 | return err; |
| 264 | }; | 290 | }; |
| 265 | std.testing.expectEqualStrings( | 291 | std.testing.expectEqualStrings( |
| 266 | peeked.?.bytes(all_bytes.items), | 292 | peeked.?.bytes(this_str), |
| 267 | got_word.bytes(all_bytes.items), | 293 | got_word.bytes(this_str), |
| 268 | ) catch |err| { | 294 | ) catch |err| { |
| 269 | debug.print("Peek != word on line {d} #{d}\n", .{ line_iter.line, idx + 1 }); | 295 | debug.print("Peek != word on line {d} #{d}\n", .{ line_iter.line, idx + 1 }); |
| 270 | return err; | 296 | return err; |
| 271 | }; | 297 | }; |
| 298 | var f_iter = r_iter.forwardIterator(); | ||
| 299 | const if_f_word = f_iter.next(); | ||
| 300 | if (if_f_word) |f_word| { | ||
| 301 | std.testing.expectEqualStrings( | ||
| 302 | want_word.bytes(this_str), | ||
| 303 | f_word.bytes(this_str), | ||
| 304 | ) catch |err| { | ||
| 305 | debug.print("Reversal Error on line {d}, #{d}\n", .{ line_iter.line, idx }); | ||
| 306 | return err; | ||
| 307 | }; | ||
| 308 | } else { | ||
| 309 | try testing.expect(false); | ||
| 310 | } | ||
| 311 | for (got_word.offset..got_word.offset + got_word.len) |i| { | ||
| 312 | const this_word = wb.wordAtIndex(this_str, i); | ||
| 313 | std.testing.expectEqualSlices( | ||
| 314 | u8, | ||
| 315 | got_word.bytes(this_str), | ||
| 316 | this_word.bytes(this_str), | ||
| 317 | ) catch |err| { | ||
| 318 | debug.print("Wrong word on line {d} #{d} offset {d}\n", .{ line_iter.line, idx + 1, i }); | ||
| 319 | return err; | ||
| 320 | }; | ||
| 321 | } | ||
| 272 | peeked = r_iter.peek(); | 322 | peeked = r_iter.peek(); |
| 273 | if (idx == 0) break; | 323 | if (idx == 0) break; |
| 274 | } | 324 | } |