summaryrefslogtreecommitdiff
path: root/src/unicode_tests.zig
diff options
context:
space:
mode:
authorGravatar Sam Atman2025-05-15 23:20:50 -0400
committerGravatar Sam Atman2025-05-15 23:20:50 -0400
commit713c01c22c7c4051cfc2bd83811fd969b1ccaddc (patch)
treeef316295fb9d42bde1121b1284312731b57946c8 /src/unicode_tests.zig
parentMerge commit 'b5d955f' into develop-next (diff)
downloadzg-713c01c22c7c4051cfc2bd83811fd969b1ccaddc.tar.gz
zg-713c01c22c7c4051cfc2bd83811fd969b1ccaddc.tar.xz
zg-713c01c22c7c4051cfc2bd83811fd969b1ccaddc.zip
Merge Grapheme Segmentation Iterator Tests
Diffstat (limited to 'src/unicode_tests.zig')
-rw-r--r--src/unicode_tests.zig113
1 files changed, 34 insertions, 79 deletions
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig
index 0204b92..7139d4c 100644
--- a/src/unicode_tests.zig
+++ b/src/unicode_tests.zig
@@ -162,89 +162,44 @@ test "Segmentation GraphemeIterator" {
162 bytes_index += cp_index; 162 bytes_index += cp_index;
163 } 163 }
164 164
165 var iter = graph.iterator(all_bytes.items); 165 {
166 166 var iter = graph.iterator(all_bytes.items);
167 // Check.
168 for (want.items) |want_gc| {
169 const got_gc = (iter.next()).?;
170 try std.testing.expectEqualStrings(
171 want_gc.bytes(all_bytes.items),
172 got_gc.bytes(all_bytes.items),
173 );
174 }
175 }
176}
177
178test "Segmentation ReverseGraphemeIterator" {
179 const allocator = std.testing.allocator;
180 var file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakTest.txt", .{});
181 defer file.close();
182 var buf_reader = std.io.bufferedReader(file.reader());
183 var input_stream = buf_reader.reader();
184
185 const data = try Graphemes.init(allocator);
186 defer data.deinit(allocator);
187
188 var buf: [4096]u8 = undefined;
189 var line_no: usize = 1;
190
191 while (try input_stream.readUntilDelimiterOrEof(&buf, '\n')) |raw| : (line_no += 1) {
192 // Skip comments or empty lines.
193 if (raw.len == 0 or raw[0] == '#' or raw[0] == '@') continue;
194
195 // Clean up.
196 var line = std.mem.trimLeft(u8, raw, "÷ ");
197 if (std.mem.indexOf(u8, line, " ÷\t#")) |octo| {
198 line = line[0..octo];
199 }
200 // Iterate over fields.
201 var want = std.ArrayList(Grapheme).init(allocator);
202 defer want.deinit();
203
204 var all_bytes = std.ArrayList(u8).init(allocator);
205 defer all_bytes.deinit();
206
207 var graphemes = std.mem.splitSequence(u8, line, " ÷ ");
208 var bytes_index: u32 = 0;
209
210 while (graphemes.next()) |field| {
211 var code_points = std.mem.splitScalar(u8, field, ' ');
212 var cp_buf: [4]u8 = undefined;
213 var cp_index: u32 = 0;
214 var gc_len: u8 = 0;
215 167
216 while (code_points.next()) |code_point| { 168 // Check.
217 if (std.mem.eql(u8, code_point, "×")) continue; 169 for (want.items) |want_gc| {
218 const cp: u21 = try std.fmt.parseInt(u21, code_point, 16); 170 const got_gc = (iter.next()).?;
219 const len = try unicode.utf8Encode(cp, &cp_buf); 171 try std.testing.expectEqualStrings(
220 try all_bytes.appendSlice(cp_buf[0..len]); 172 want_gc.bytes(all_bytes.items),
221 cp_index += len; 173 got_gc.bytes(all_bytes.items),
222 gc_len += len; 174 );
223 } 175 }
224
225 try want.append(Grapheme{ .len = gc_len, .offset = bytes_index });
226 bytes_index += cp_index;
227 } 176 }
177 {
178 var iter = graph.reverseIterator(all_bytes.items);
228 179
229 // std.debug.print("\nline {}: {s}\n", .{ line_no, all_bytes.items }); 180 // Check.
230 var iter = data.reverseIterator(all_bytes.items); 181 var i: usize = want.items.len;
231 182 while (i > 0) {
232 // Check. 183 i -= 1;
233 var i: usize = want.items.len; 184 const want_gc = want.items[i];
234 while (i > 0) { 185 const got_gc = iter.prev() orelse {
235 i -= 1; 186 std.debug.print(
236 const want_gc = want.items[i]; 187 "line {d} grapheme {d}: expected {any} found null\n",
237 const got_gc = iter.prev() orelse { 188 .{ line_iter.line, i, want_gc },
238 std.debug.print("line {d} grapheme {d}: expected {any} found null\n", .{ line_no, i, want_gc }); 189 );
239 return error.TestExpectedEqual; 190 return error.TestExpectedEqual;
240 }; 191 };
241 std.testing.expectEqualStrings( 192 std.testing.expectEqualStrings(
242 want_gc.bytes(all_bytes.items), 193 want_gc.bytes(all_bytes.items),
243 got_gc.bytes(all_bytes.items), 194 got_gc.bytes(all_bytes.items),
244 ) catch |err| { 195 ) catch |err| {
245 std.debug.print("line {d} grapheme {d}: expected {any} found {any}\n", .{ line_no, i, want_gc, got_gc }); 196 std.debug.print(
246 return err; 197 "line {d} grapheme {d}: expected {any} found {any}\n",
247 }; 198 .{ line_iter.line, i, want_gc, got_gc },
199 );
200 return err;
201 };
202 }
248 } 203 }
249 } 204 }
250} 205}