summaryrefslogtreecommitdiff
path: root/src/unicode_tests.zig
diff options
context:
space:
mode:
authorGravatar Sam Atman2025-05-15 10:57:33 -0400
committerGravatar Sam Atman2025-05-15 15:32:43 -0400
commit736b4ccce2384c8f96e63d9c49ab4d6aee1d65a5 (patch)
tree09cdc6762a519cd2f20efacfa4d1af082f983e85 /src/unicode_tests.zig
parentRewrite wordAtIndex to use iterator flipping (diff)
downloadzg-736b4ccce2384c8f96e63d9c49ab4d6aee1d65a5.tar.gz
zg-736b4ccce2384c8f96e63d9c49ab4d6aee1d65a5.tar.xz
zg-736b4ccce2384c8f96e63d9c49ab4d6aee1d65a5.zip
wordAtIndex passes conformance
I removed the initAtIndex functions from the public vocabulary, because the last couple of days of sweat and blood prove that it's hard to use correctly. That's probably it for WordBreak, now to fix the overlong bug on v0.14 and get this integrated with the new reverse grapheme iterator.
Diffstat (limited to 'src/unicode_tests.zig')
-rw-r--r--src/unicode_tests.zig76
1 files changed, 63 insertions, 13 deletions
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig
index ef459bf..8b02e98 100644
--- a/src/unicode_tests.zig
+++ b/src/unicode_tests.zig
@@ -222,32 +222,58 @@ test "Segmentation Word Iterator" {
222 try want.append(Word{ .len = gc_len, .offset = bytes_index }); 222 try want.append(Word{ .len = gc_len, .offset = bytes_index });
223 bytes_index += cp_index; 223 bytes_index += cp_index;
224 } 224 }
225 const this_str = all_bytes.items;
226
225 { 227 {
226 var iter = wb.iterator(all_bytes.items); 228 var iter = wb.iterator(this_str);
227 var peeked: ?Word = iter.peek(); 229 var peeked: ?Word = iter.peek();
228 230
229 // Check. 231 // Check.
230 for (want.items, 1..) |want_word, i| { 232 for (want.items, 1..) |want_word, idx| {
231 const got_word = (iter.next()).?; 233 const got_word = (iter.next()).?;
232 std.testing.expectEqualStrings( 234 std.testing.expectEqualStrings(
233 want_word.bytes(all_bytes.items), 235 want_word.bytes(this_str),
234 got_word.bytes(all_bytes.items), 236 got_word.bytes(this_str),
235 ) catch |err| { 237 ) catch |err| {
236 debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, i }); 238 debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, idx });
237 return err; 239 return err;
238 }; 240 };
239 std.testing.expectEqualStrings( 241 std.testing.expectEqualStrings(
240 peeked.?.bytes(all_bytes.items), 242 peeked.?.bytes(this_str),
241 got_word.bytes(all_bytes.items), 243 got_word.bytes(this_str),
242 ) catch |err| { 244 ) catch |err| {
243 debug.print("Peek != word on line {d} #{d}\n", .{ line_iter.line, i }); 245 debug.print("Peek != word on line {d} #{d}\n", .{ line_iter.line, idx });
244 return err; 246 return err;
245 }; 247 };
248 var r_iter = iter.reverseIterator();
249 const if_r_word = r_iter.prev();
250 if (if_r_word) |r_word| {
251 std.testing.expectEqualStrings(
252 want_word.bytes(this_str),
253 r_word.bytes(this_str),
254 ) catch |err| {
255 debug.print("Reversal Error on line {d}, #{d}\n", .{ line_iter.line, idx });
256 return err;
257 };
258 } else {
259 try testing.expect(false);
260 }
261 for (got_word.offset..got_word.offset + got_word.len) |i| {
262 const this_word = wb.wordAtIndex(this_str, i);
263 std.testing.expectEqualSlices(
264 u8,
265 got_word.bytes(this_str),
266 this_word.bytes(this_str),
267 ) catch |err| {
268 debug.print("Wrong word on line {d} #{d} offset {d}\n", .{ line_iter.line, idx + 1, i });
269 return err;
270 };
271 }
246 peeked = iter.peek(); 272 peeked = iter.peek();
247 } 273 }
248 } 274 }
249 { 275 {
250 var r_iter = wb.reverseIterator(all_bytes.items); 276 var r_iter = wb.reverseIterator(this_str);
251 var peeked: ?Word = r_iter.peek(); 277 var peeked: ?Word = r_iter.peek();
252 var idx = want.items.len - 1; 278 var idx = want.items.len - 1;
253 279
@@ -256,19 +282,43 @@ test "Segmentation Word Iterator" {
256 const got_word = r_iter.prev().?; 282 const got_word = r_iter.prev().?;
257 std.testing.expectEqualSlices( 283 std.testing.expectEqualSlices(
258 u8, 284 u8,
259 want_word.bytes(all_bytes.items), 285 want_word.bytes(this_str),
260 got_word.bytes(all_bytes.items), 286 got_word.bytes(this_str),
261 ) catch |err| { 287 ) catch |err| {
262 debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, idx + 1 }); 288 debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, idx + 1 });
263 return err; 289 return err;
264 }; 290 };
265 std.testing.expectEqualStrings( 291 std.testing.expectEqualStrings(
266 peeked.?.bytes(all_bytes.items), 292 peeked.?.bytes(this_str),
267 got_word.bytes(all_bytes.items), 293 got_word.bytes(this_str),
268 ) catch |err| { 294 ) catch |err| {
269 debug.print("Peek != word on line {d} #{d}\n", .{ line_iter.line, idx + 1 }); 295 debug.print("Peek != word on line {d} #{d}\n", .{ line_iter.line, idx + 1 });
270 return err; 296 return err;
271 }; 297 };
298 var f_iter = r_iter.forwardIterator();
299 const if_f_word = f_iter.next();
300 if (if_f_word) |f_word| {
301 std.testing.expectEqualStrings(
302 want_word.bytes(this_str),
303 f_word.bytes(this_str),
304 ) catch |err| {
305 debug.print("Reversal Error on line {d}, #{d}\n", .{ line_iter.line, idx });
306 return err;
307 };
308 } else {
309 try testing.expect(false);
310 }
311 for (got_word.offset..got_word.offset + got_word.len) |i| {
312 const this_word = wb.wordAtIndex(this_str, i);
313 std.testing.expectEqualSlices(
314 u8,
315 got_word.bytes(this_str),
316 this_word.bytes(this_str),
317 ) catch |err| {
318 debug.print("Wrong word on line {d} #{d} offset {d}\n", .{ line_iter.line, idx + 1, i });
319 return err;
320 };
321 }
272 peeked = r_iter.peek(); 322 peeked = r_iter.peek();
273 if (idx == 0) break; 323 if (idx == 0) break;
274 } 324 }