diff options
| author | 2025-05-15 23:20:50 -0400 | |
|---|---|---|
| committer | 2025-05-15 23:20:50 -0400 | |
| commit | 713c01c22c7c4051cfc2bd83811fd969b1ccaddc (patch) | |
| tree | ef316295fb9d42bde1121b1284312731b57946c8 /src/unicode_tests.zig | |
| parent | Merge commit 'b5d955f' into develop-next (diff) | |
| download | zg-713c01c22c7c4051cfc2bd83811fd969b1ccaddc.tar.gz zg-713c01c22c7c4051cfc2bd83811fd969b1ccaddc.tar.xz zg-713c01c22c7c4051cfc2bd83811fd969b1ccaddc.zip | |
Merge Grapheme Segmentation Iterator Tests
Diffstat (limited to 'src/unicode_tests.zig')
| -rw-r--r-- | src/unicode_tests.zig | 113 |
1 files changed, 34 insertions, 79 deletions
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig index 0204b92..7139d4c 100644 --- a/src/unicode_tests.zig +++ b/src/unicode_tests.zig | |||
| @@ -162,89 +162,44 @@ test "Segmentation GraphemeIterator" { | |||
| 162 | bytes_index += cp_index; | 162 | bytes_index += cp_index; |
| 163 | } | 163 | } |
| 164 | 164 | ||
| 165 | var iter = graph.iterator(all_bytes.items); | 165 | { |
| 166 | 166 | var iter = graph.iterator(all_bytes.items); | |
| 167 | // Check. | ||
| 168 | for (want.items) |want_gc| { | ||
| 169 | const got_gc = (iter.next()).?; | ||
| 170 | try std.testing.expectEqualStrings( | ||
| 171 | want_gc.bytes(all_bytes.items), | ||
| 172 | got_gc.bytes(all_bytes.items), | ||
| 173 | ); | ||
| 174 | } | ||
| 175 | } | ||
| 176 | } | ||
| 177 | |||
| 178 | test "Segmentation ReverseGraphemeIterator" { | ||
| 179 | const allocator = std.testing.allocator; | ||
| 180 | var file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakTest.txt", .{}); | ||
| 181 | defer file.close(); | ||
| 182 | var buf_reader = std.io.bufferedReader(file.reader()); | ||
| 183 | var input_stream = buf_reader.reader(); | ||
| 184 | |||
| 185 | const data = try Graphemes.init(allocator); | ||
| 186 | defer data.deinit(allocator); | ||
| 187 | |||
| 188 | var buf: [4096]u8 = undefined; | ||
| 189 | var line_no: usize = 1; | ||
| 190 | |||
| 191 | while (try input_stream.readUntilDelimiterOrEof(&buf, '\n')) |raw| : (line_no += 1) { | ||
| 192 | // Skip comments or empty lines. | ||
| 193 | if (raw.len == 0 or raw[0] == '#' or raw[0] == '@') continue; | ||
| 194 | |||
| 195 | // Clean up. | ||
| 196 | var line = std.mem.trimLeft(u8, raw, "÷ "); | ||
| 197 | if (std.mem.indexOf(u8, line, " ÷\t#")) |octo| { | ||
| 198 | line = line[0..octo]; | ||
| 199 | } | ||
| 200 | // Iterate over fields. | ||
| 201 | var want = std.ArrayList(Grapheme).init(allocator); | ||
| 202 | defer want.deinit(); | ||
| 203 | |||
| 204 | var all_bytes = std.ArrayList(u8).init(allocator); | ||
| 205 | defer all_bytes.deinit(); | ||
| 206 | |||
| 207 | var graphemes = std.mem.splitSequence(u8, line, " ÷ "); | ||
| 208 | var bytes_index: u32 = 0; | ||
| 209 | |||
| 210 | while (graphemes.next()) |field| { | ||
| 211 | var code_points = std.mem.splitScalar(u8, field, ' '); | ||
| 212 | var cp_buf: [4]u8 = undefined; | ||
| 213 | var cp_index: u32 = 0; | ||
| 214 | var gc_len: u8 = 0; | ||
| 215 | 167 | ||
| 216 | while (code_points.next()) |code_point| { | 168 | // Check. |
| 217 | if (std.mem.eql(u8, code_point, "×")) continue; | 169 | for (want.items) |want_gc| { |
| 218 | const cp: u21 = try std.fmt.parseInt(u21, code_point, 16); | 170 | const got_gc = (iter.next()).?; |
| 219 | const len = try unicode.utf8Encode(cp, &cp_buf); | 171 | try std.testing.expectEqualStrings( |
| 220 | try all_bytes.appendSlice(cp_buf[0..len]); | 172 | want_gc.bytes(all_bytes.items), |
| 221 | cp_index += len; | 173 | got_gc.bytes(all_bytes.items), |
| 222 | gc_len += len; | 174 | ); |
| 223 | } | 175 | } |
| 224 | |||
| 225 | try want.append(Grapheme{ .len = gc_len, .offset = bytes_index }); | ||
| 226 | bytes_index += cp_index; | ||
| 227 | } | 176 | } |
| 177 | { | ||
| 178 | var iter = graph.reverseIterator(all_bytes.items); | ||
| 228 | 179 | ||
| 229 | // std.debug.print("\nline {}: {s}\n", .{ line_no, all_bytes.items }); | 180 | // Check. |
| 230 | var iter = data.reverseIterator(all_bytes.items); | 181 | var i: usize = want.items.len; |
| 231 | 182 | while (i > 0) { | |
| 232 | // Check. | 183 | i -= 1; |
| 233 | var i: usize = want.items.len; | 184 | const want_gc = want.items[i]; |
| 234 | while (i > 0) { | 185 | const got_gc = iter.prev() orelse { |
| 235 | i -= 1; | 186 | std.debug.print( |
| 236 | const want_gc = want.items[i]; | 187 | "line {d} grapheme {d}: expected {any} found null\n", |
| 237 | const got_gc = iter.prev() orelse { | 188 | .{ line_iter.line, i, want_gc }, |
| 238 | std.debug.print("line {d} grapheme {d}: expected {any} found null\n", .{ line_no, i, want_gc }); | 189 | ); |
| 239 | return error.TestExpectedEqual; | 190 | return error.TestExpectedEqual; |
| 240 | }; | 191 | }; |
| 241 | std.testing.expectEqualStrings( | 192 | std.testing.expectEqualStrings( |
| 242 | want_gc.bytes(all_bytes.items), | 193 | want_gc.bytes(all_bytes.items), |
| 243 | got_gc.bytes(all_bytes.items), | 194 | got_gc.bytes(all_bytes.items), |
| 244 | ) catch |err| { | 195 | ) catch |err| { |
| 245 | std.debug.print("line {d} grapheme {d}: expected {any} found {any}\n", .{ line_no, i, want_gc, got_gc }); | 196 | std.debug.print( |
| 246 | return err; | 197 | "line {d} grapheme {d}: expected {any} found {any}\n", |
| 247 | }; | 198 | .{ line_iter.line, i, want_gc, got_gc }, |
| 199 | ); | ||
| 200 | return err; | ||
| 201 | }; | ||
| 202 | } | ||
| 248 | } | 203 | } |
| 249 | } | 204 | } |
| 250 | } | 205 | } |