summaryrefslogtreecommitdiff
path: root/src/unicode_tests.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/unicode_tests.zig')
-rw-r--r--src/unicode_tests.zig74
1 files changed, 74 insertions, 0 deletions
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig
index 2249007..828559a 100644
--- a/src/unicode_tests.zig
+++ b/src/unicode_tests.zig
@@ -219,3 +219,77 @@ test "Segmentation GraphemeIterator" {
219 } 219 }
220 } 220 }
221} 221}
222
223test "Segmentation ReverseGraphemeIterator" {
224 const allocator = std.testing.allocator;
225 var file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakTest.txt", .{});
226 defer file.close();
227 var buf_reader = std.io.bufferedReader(file.reader());
228 var input_stream = buf_reader.reader();
229
230 const data = try Graphemes.init(allocator);
231 defer data.deinit(allocator);
232
233 var buf: [4096]u8 = undefined;
234 var line_no: usize = 1;
235
236 while (try input_stream.readUntilDelimiterOrEof(&buf, '\n')) |raw| : (line_no += 1) {
237 // Skip comments or empty lines.
238 if (raw.len == 0 or raw[0] == '#' or raw[0] == '@') continue;
239
240 // Clean up.
241 var line = std.mem.trimLeft(u8, raw, "÷ ");
242 if (std.mem.indexOf(u8, line, " ÷\t#")) |octo| {
243 line = line[0..octo];
244 }
245 // Iterate over fields.
246 var want = std.ArrayList(Grapheme).init(allocator);
247 defer want.deinit();
248
249 var all_bytes = std.ArrayList(u8).init(allocator);
250 defer all_bytes.deinit();
251
252 var graphemes = std.mem.splitSequence(u8, line, " ÷ ");
253 var bytes_index: u32 = 0;
254
255 while (graphemes.next()) |field| {
256 var code_points = std.mem.splitScalar(u8, field, ' ');
257 var cp_buf: [4]u8 = undefined;
258 var cp_index: u32 = 0;
259 var gc_len: u8 = 0;
260
261 while (code_points.next()) |code_point| {
262 if (std.mem.eql(u8, code_point, "×")) continue;
263 const cp: u21 = try std.fmt.parseInt(u21, code_point, 16);
264 const len = try unicode.utf8Encode(cp, &cp_buf);
265 try all_bytes.appendSlice(cp_buf[0..len]);
266 cp_index += len;
267 gc_len += len;
268 }
269
270 try want.append(Grapheme{ .len = gc_len, .offset = bytes_index });
271 bytes_index += cp_index;
272 }
273
274 // std.debug.print("\nline {}: {s}\n", .{ line_no, all_bytes.items });
275 var iter = data.reverseIterator(all_bytes.items);
276
277 // Check.
278 var i: usize = want.items.len;
279 while (i > 0) {
280 i -= 1;
281 const want_gc = want.items[i];
282 const got_gc = iter.prev() orelse {
283 std.debug.print("line {d} grapheme {d}: expected {any} found null\n", .{ line_no, i, want_gc });
284 return error.TestExpectedEqual;
285 };
286 std.testing.expectEqualStrings(
287 want_gc.bytes(all_bytes.items),
288 got_gc.bytes(all_bytes.items),
289 ) catch |err| {
290 std.debug.print("line {d} grapheme {d}: expected {any} found {any}\n", .{ line_no, i, want_gc, got_gc });
291 return err;
292 };
293 }
294 }
295}