diff options
Diffstat (limited to 'src/unicode_tests.zig')
| -rw-r--r-- | src/unicode_tests.zig | 42 |
1 files changed, 34 insertions, 8 deletions
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig index 448ce41..245c03f 100644 --- a/src/unicode_tests.zig +++ b/src/unicode_tests.zig | |||
| @@ -7,11 +7,37 @@ const mem = std.mem; | |||
| 7 | const testing = std.testing; | 7 | const testing = std.testing; |
| 8 | const unicode = std.unicode; | 8 | const unicode = std.unicode; |
| 9 | 9 | ||
| 10 | const grapheme = @import("grapheme"); | ||
| 10 | const Grapheme = @import("grapheme").Grapheme; | 11 | const Grapheme = @import("grapheme").Grapheme; |
| 11 | const GraphemeData = @import("grapheme").GraphemeData; | 12 | const GraphemeData = @import("grapheme").GraphemeData; |
| 12 | const GraphemeIterator = @import("grapheme").Iterator; | 13 | const GraphemeIterator = @import("grapheme").Iterator; |
| 13 | const Normalize = @import("Normalize"); | 14 | const Normalize = @import("Normalize"); |
| 14 | 15 | ||
| 16 | comptime { | ||
| 17 | testing.refAllDecls(grapheme); | ||
| 18 | } | ||
| 19 | test "Iterator.peek" { | ||
| 20 | const peek_seq = "aΔ👨🏻🌾→"; | ||
| 21 | const data = try GraphemeData.init(std.testing.allocator); | ||
| 22 | defer data.deinit(); | ||
| 23 | |||
| 24 | var iter = grapheme.Iterator.init(peek_seq, &data); | ||
| 25 | const peek_a = iter.peek().?; | ||
| 26 | const next_a = iter.next().?; | ||
| 27 | try std.testing.expectEqual(peek_a, next_a); | ||
| 28 | try std.testing.expectEqualStrings("a", peek_a.bytes(peek_seq)); | ||
| 29 | const peek_d1 = iter.peek().?; | ||
| 30 | const peek_d2 = iter.peek().?; | ||
| 31 | try std.testing.expectEqual(peek_d1, peek_d2); | ||
| 32 | const next_d = iter.next().?; | ||
| 33 | try std.testing.expectEqual(peek_d2, next_d); | ||
| 34 | try std.testing.expectEqual(iter.peek(), iter.next()); | ||
| 35 | try std.testing.expectEqual(iter.peek(), iter.next()); | ||
| 36 | try std.testing.expectEqual(null, iter.peek()); | ||
| 37 | try std.testing.expectEqual(null, iter.peek()); | ||
| 38 | try std.testing.expectEqual(iter.peek(), iter.next()); | ||
| 39 | } | ||
| 40 | |||
| 15 | test "Unicode normalization tests" { | 41 | test "Unicode normalization tests" { |
| 16 | var arena = heap.ArenaAllocator.init(testing.allocator); | 42 | var arena = heap.ArenaAllocator.init(testing.allocator); |
| 17 | defer arena.deinit(); | 43 | defer arena.deinit(); |
| @@ -35,7 +61,7 @@ test "Unicode normalization tests" { | |||
| 35 | // Skip comments or empty lines. | 61 | // Skip comments or empty lines. |
| 36 | if (line.len == 0 or line[0] == '#' or line[0] == '@') continue; | 62 | if (line.len == 0 or line[0] == '#' or line[0] == '@') continue; |
| 37 | // Iterate over fields. | 63 | // Iterate over fields. |
| 38 | var fields = mem.split(u8, line, ";"); | 64 | var fields = mem.splitScalar(u8, line, ';'); |
| 39 | var field_index: usize = 0; | 65 | var field_index: usize = 0; |
| 40 | var input: []u8 = undefined; | 66 | var input: []u8 = undefined; |
| 41 | defer allocator.free(input); | 67 | defer allocator.free(input); |
| @@ -45,7 +71,7 @@ test "Unicode normalization tests" { | |||
| 45 | var i_buf = std.ArrayList(u8).init(allocator); | 71 | var i_buf = std.ArrayList(u8).init(allocator); |
| 46 | defer i_buf.deinit(); | 72 | defer i_buf.deinit(); |
| 47 | 73 | ||
| 48 | var i_fields = mem.split(u8, field, " "); | 74 | var i_fields = mem.splitScalar(u8, field, ' '); |
| 49 | while (i_fields.next()) |s| { | 75 | while (i_fields.next()) |s| { |
| 50 | const icp = try fmt.parseInt(u21, s, 16); | 76 | const icp = try fmt.parseInt(u21, s, 16); |
| 51 | const len = try unicode.utf8Encode(icp, &cp_buf); | 77 | const len = try unicode.utf8Encode(icp, &cp_buf); |
| @@ -59,7 +85,7 @@ test "Unicode normalization tests" { | |||
| 59 | var w_buf = std.ArrayList(u8).init(allocator); | 85 | var w_buf = std.ArrayList(u8).init(allocator); |
| 60 | defer w_buf.deinit(); | 86 | defer w_buf.deinit(); |
| 61 | 87 | ||
| 62 | var w_fields = mem.split(u8, field, " "); | 88 | var w_fields = mem.splitScalar(u8, field, ' '); |
| 63 | while (w_fields.next()) |s| { | 89 | while (w_fields.next()) |s| { |
| 64 | const wcp = try fmt.parseInt(u21, s, 16); | 90 | const wcp = try fmt.parseInt(u21, s, 16); |
| 65 | const len = try unicode.utf8Encode(wcp, &cp_buf); | 91 | const len = try unicode.utf8Encode(wcp, &cp_buf); |
| @@ -76,7 +102,7 @@ test "Unicode normalization tests" { | |||
| 76 | var w_buf = std.ArrayList(u8).init(allocator); | 102 | var w_buf = std.ArrayList(u8).init(allocator); |
| 77 | defer w_buf.deinit(); | 103 | defer w_buf.deinit(); |
| 78 | 104 | ||
| 79 | var w_fields = mem.split(u8, field, " "); | 105 | var w_fields = mem.splitScalar(u8, field, ' '); |
| 80 | while (w_fields.next()) |s| { | 106 | while (w_fields.next()) |s| { |
| 81 | const wcp = try fmt.parseInt(u21, s, 16); | 107 | const wcp = try fmt.parseInt(u21, s, 16); |
| 82 | const len = try unicode.utf8Encode(wcp, &cp_buf); | 108 | const len = try unicode.utf8Encode(wcp, &cp_buf); |
| @@ -93,7 +119,7 @@ test "Unicode normalization tests" { | |||
| 93 | var w_buf = std.ArrayList(u8).init(allocator); | 119 | var w_buf = std.ArrayList(u8).init(allocator); |
| 94 | defer w_buf.deinit(); | 120 | defer w_buf.deinit(); |
| 95 | 121 | ||
| 96 | var w_fields = mem.split(u8, field, " "); | 122 | var w_fields = mem.splitScalar(u8, field, ' '); |
| 97 | while (w_fields.next()) |s| { | 123 | while (w_fields.next()) |s| { |
| 98 | const wcp = try fmt.parseInt(u21, s, 16); | 124 | const wcp = try fmt.parseInt(u21, s, 16); |
| 99 | const len = try unicode.utf8Encode(wcp, &cp_buf); | 125 | const len = try unicode.utf8Encode(wcp, &cp_buf); |
| @@ -110,7 +136,7 @@ test "Unicode normalization tests" { | |||
| 110 | var w_buf = std.ArrayList(u8).init(allocator); | 136 | var w_buf = std.ArrayList(u8).init(allocator); |
| 111 | defer w_buf.deinit(); | 137 | defer w_buf.deinit(); |
| 112 | 138 | ||
| 113 | var w_fields = mem.split(u8, field, " "); | 139 | var w_fields = mem.splitScalar(u8, field, ' '); |
| 114 | while (w_fields.next()) |s| { | 140 | while (w_fields.next()) |s| { |
| 115 | const wcp = try fmt.parseInt(u21, s, 16); | 141 | const wcp = try fmt.parseInt(u21, s, 16); |
| 116 | const len = try unicode.utf8Encode(wcp, &cp_buf); | 142 | const len = try unicode.utf8Encode(wcp, &cp_buf); |
| @@ -158,11 +184,11 @@ test "Segmentation GraphemeIterator" { | |||
| 158 | var all_bytes = std.ArrayList(u8).init(allocator); | 184 | var all_bytes = std.ArrayList(u8).init(allocator); |
| 159 | defer all_bytes.deinit(); | 185 | defer all_bytes.deinit(); |
| 160 | 186 | ||
| 161 | var graphemes = std.mem.split(u8, line, " ÷ "); | 187 | var graphemes = std.mem.splitSequence(u8, line, " ÷ "); |
| 162 | var bytes_index: u32 = 0; | 188 | var bytes_index: u32 = 0; |
| 163 | 189 | ||
| 164 | while (graphemes.next()) |field| { | 190 | while (graphemes.next()) |field| { |
| 165 | var code_points = std.mem.split(u8, field, " "); | 191 | var code_points = std.mem.splitScalar(u8, field, ' '); |
| 166 | var cp_buf: [4]u8 = undefined; | 192 | var cp_buf: [4]u8 = undefined; |
| 167 | var cp_index: u32 = 0; | 193 | var cp_index: u32 = 0; |
| 168 | var gc_len: u8 = 0; | 194 | var gc_len: u8 = 0; |