summaryrefslogtreecommitdiff
path: root/src/unicode_tests.zig
diff options
context:
space:
mode:
authorGravatar Sam Atman2025-05-13 17:19:56 -0400
committerGravatar Sam Atman2025-05-15 15:32:38 -0400
commit5cc8c1875a21bfb398e6685b03a29d6ba1cbf74a (patch)
treef46287fbc0d92238644c23d0b176354567b647d1 /src/unicode_tests.zig
parentReverse Word Iterator (diff)
downloadzg-5cc8c1875a21bfb398e6685b03a29d6ba1cbf74a.tar.gz
zg-5cc8c1875a21bfb398e6685b03a29d6ba1cbf74a.tar.xz
zg-5cc8c1875a21bfb398e6685b03a29d6ba1cbf74a.zip
Hooked up break test, some bugs squashed
The handling of ignorables is really different, because they 'adhere' to the future of the iteration, not the past.
Diffstat (limited to 'src/unicode_tests.zig')
-rw-r--r--src/unicode_tests.zig49
1 files changed, 34 insertions, 15 deletions
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig
index 59f0c6f..8661bfd 100644
--- a/src/unicode_tests.zig
+++ b/src/unicode_tests.zig
@@ -195,7 +195,7 @@ test "Segmentation Word Iterator" {
195 line = line[0..final]; 195 line = line[0..final];
196 } 196 }
197 // Iterate over fields. 197 // Iterate over fields.
198 var want = std.ArrayList(Grapheme).init(allocator); 198 var want = std.ArrayList(Word).init(allocator);
199 defer want.deinit(); 199 defer want.deinit();
200 200
201 var all_bytes = std.ArrayList(u8).init(allocator); 201 var all_bytes = std.ArrayList(u8).init(allocator);
@@ -219,22 +219,40 @@ test "Segmentation Word Iterator" {
219 gc_len += len; 219 gc_len += len;
220 } 220 }
221 221
222 try want.append(Grapheme{ .len = gc_len, .offset = bytes_index }); 222 try want.append(Word{ .len = gc_len, .offset = bytes_index });
223 bytes_index += cp_index; 223 bytes_index += cp_index;
224 } 224 }
225 225 {
226 var iter = wb.iterator(all_bytes.items); 226 var iter = wb.iterator(all_bytes.items);
227 227
228 // Check. 228 // Check.
229 for (want.items, 1..) |want_word, i| { 229 for (want.items, 1..) |want_word, i| {
230 const got_word = (iter.next()).?; 230 const got_word = (iter.next()).?;
231 std.testing.expectEqualStrings( 231 std.testing.expectEqualStrings(
232 want_word.bytes(all_bytes.items), 232 want_word.bytes(all_bytes.items),
233 got_word.bytes(all_bytes.items), 233 got_word.bytes(all_bytes.items),
234 ) catch |err| { 234 ) catch |err| {
235 debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, i }); 235 debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, i });
236 return err; 236 return err;
237 }; 237 };
238 }
239 }
240 {
241 var r_iter = wb.reverseIterator(all_bytes.items);
242 var idx = want.items.len - 1;
243 while (true) : (idx -= 1) {
244 const want_word = want.items[idx];
245 const got_word = r_iter.prev().?;
246 std.testing.expectEqualSlices(
247 u8,
248 want_word.bytes(all_bytes.items),
249 got_word.bytes(all_bytes.items),
250 ) catch |err| {
251 debug.print("Error on line {d}, #{d}\n", .{ line_iter.line, idx + 1 });
252 return err;
253 };
254 if (idx == 0) break;
255 }
238 } 256 }
239 } 257 }
240} 258}
@@ -277,3 +295,4 @@ const GraphemeIterator = @import("Graphemes").Iterator;
277const Normalize = @import("Normalize"); 295const Normalize = @import("Normalize");
278 296
279const WordBreak = @import("WordBreak"); 297const WordBreak = @import("WordBreak");
298const Word = WordBreak.Word;