diff options
| author | 2025-04-30 11:58:19 -0400 | |
|---|---|---|
| committer | 2025-04-30 11:58:19 -0400 | |
| commit | 1be5e46490e061761b4b97dff5c6acb2181d6fe9 (patch) | |
| tree | 77a1edcdedd7afae7428e92feba37d2bb1035b22 | |
| parent | Add general tests step (diff) | |
| download | zg-1be5e46490e061761b4b97dff5c6acb2181d6fe9.tar.gz zg-1be5e46490e061761b4b97dff5c6acb2181d6fe9.tar.xz zg-1be5e46490e061761b4b97dff5c6acb2181d6fe9.zip | |
Factor out 'Data' for grapheme and DisplayWidth
In the process of refactoring the whole library, so that it doesn't
expose anything called "Data" separately from user functionality.
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | NEWS.md | 46 | ||||
| -rw-r--r-- | build.zig | 74 | ||||
| -rw-r--r-- | src/DisplayWidth.zig | 240 | ||||
| -rw-r--r-- | src/GraphemeData.zig | 12 | ||||
| -rw-r--r-- | src/Normalize.zig | 29 | ||||
| -rw-r--r-- | src/WidthData.zig | 32 | ||||
| -rw-r--r-- | src/grapheme.zig | 109 | ||||
| -rw-r--r-- | src/unicode_tests.zig | 10 |
9 files changed, 384 insertions, 169 deletions
| @@ -11,3 +11,4 @@ | |||
| 11 | 11 | ||
| 12 | .zig-cache/ | 12 | .zig-cache/ |
| 13 | zig-out/ | 13 | zig-out/ |
| 14 | notes.md | ||
| @@ -0,0 +1,46 @@ | |||
| 1 | # News | ||
| 2 | |||
| 3 | ## zg v0.14.0 Release Notes | ||
| 4 | |||
| 5 | This is the first minor point release since Sam Atman (me) took over | ||
| 6 | maintenance of `zg` from the inimitable José Colon, aka | ||
| 7 | @dude_the_builder. | ||
| 8 | |||
| 9 | As it's a fairly complex project, I'm adding a NEWS.md so that users | ||
| 10 | have a place to check for changes. | ||
| 11 | |||
| 12 | ### Data is Unmanaged | ||
| 13 | |||
| 14 | This is the biggest change. Prior to `v0.14`, all structs which need | ||
| 15 | heap allocation no longer have a copy of their allocator. It was felt | ||
| 16 | that this was redundant, especially when several such structures were | ||
| 17 | in use, and it reflects a general trend in the standard library toward | ||
| 18 | fewer managed data structures. | ||
| 19 | |||
| 20 | Getting up to speed is a matter of passing the allocator to `deinit`. | ||
| 21 | |||
| 22 | This change comes courtesy of [lch361](https://lch361.net), in his | ||
| 23 | first contribution to the repo. Thanks Lich! | ||
| 24 | |||
| 25 | ### Grapheme Iterator Creation | ||
| 26 | |||
| 27 | This is a modest streamlining of how a grapheme iterator is created. | ||
| 28 | |||
| 29 | Before: | ||
| 30 | |||
| 31 | ```zig | ||
| 32 | const gd = try grapheme.GraphemeData.init(allocator); | ||
| 33 | defer gd.deinit(); | ||
| 34 | var iter = grapheme.Iterator.init("🤘🏻some rad string! 🤘🏿", &gd); | ||
| 35 | ``` | ||
| 36 | |||
| 37 | Now: | ||
| 38 | |||
| 39 | ```zig | ||
| 40 | const gd = try grapheme.GraphemeData.init(allocator); | ||
| 41 | defer gd.deinit(allocator); | ||
| 42 | var iter = gd.iterator("🤘🏻some rad string! 🤘🏿"); | ||
| 43 | ``` | ||
| 44 | |||
| 45 | You can still make an iterator with `grapheme.Iterator.init`, but the | ||
| 46 | second argument has to be `&gd.gd`. | ||
| @@ -191,33 +191,18 @@ pub fn build(b: *std.Build) void { | |||
| 191 | }); | 191 | }); |
| 192 | const code_point_tr = b.addRunArtifact(code_point_t); | 192 | const code_point_tr = b.addRunArtifact(code_point_t); |
| 193 | 193 | ||
| 194 | // Grapheme clusters | 194 | // Graphemes |
| 195 | const grapheme_data = b.createModule(.{ | 195 | const graphemes = b.addModule("Graphemes", .{ |
| 196 | .root_source_file = b.path("src/GraphemeData.zig"), | ||
| 197 | .target = target, | ||
| 198 | .optimize = optimize, | ||
| 199 | }); | ||
| 200 | grapheme_data.addAnonymousImport("gbp", .{ .root_source_file = gbp_gen_out }); | ||
| 201 | |||
| 202 | const grapheme_data_t = b.addTest(.{ | ||
| 203 | .name = "grapheme_data", | ||
| 204 | .root_module = grapheme_data, | ||
| 205 | .target = target, | ||
| 206 | .optimize = optimize, | ||
| 207 | }); | ||
| 208 | const grapheme_data_tr = b.addRunArtifact(grapheme_data_t); | ||
| 209 | |||
| 210 | const grapheme = b.addModule("grapheme", .{ | ||
| 211 | .root_source_file = b.path("src/grapheme.zig"), | 196 | .root_source_file = b.path("src/grapheme.zig"), |
| 212 | .target = target, | 197 | .target = target, |
| 213 | .optimize = optimize, | 198 | .optimize = optimize, |
| 214 | }); | 199 | }); |
| 215 | grapheme.addImport("code_point", code_point); | 200 | graphemes.addAnonymousImport("gbp", .{ .root_source_file = gbp_gen_out }); |
| 216 | grapheme.addImport("GraphemeData", grapheme_data); | 201 | graphemes.addImport("code_point", code_point); |
| 217 | 202 | ||
| 218 | const grapheme_t = b.addTest(.{ | 203 | const grapheme_t = b.addTest(.{ |
| 219 | .name = "grapheme", | 204 | .name = "Graphemes", |
| 220 | .root_module = grapheme, | 205 | .root_module = graphemes, |
| 221 | .target = target, | 206 | .target = target, |
| 222 | .optimize = optimize, | 207 | .optimize = optimize, |
| 223 | }); | 208 | }); |
| @@ -239,31 +224,15 @@ pub fn build(b: *std.Build) void { | |||
| 239 | const ascii_tr = b.addRunArtifact(ascii_t); | 224 | const ascii_tr = b.addRunArtifact(ascii_t); |
| 240 | 225 | ||
| 241 | // Fixed pitch font display width | 226 | // Fixed pitch font display width |
| 242 | const width_data = b.createModule(.{ | ||
| 243 | .root_source_file = b.path("src/WidthData.zig"), | ||
| 244 | .target = target, | ||
| 245 | .optimize = optimize, | ||
| 246 | }); | ||
| 247 | width_data.addAnonymousImport("dwp", .{ .root_source_file = dwp_gen_out }); | ||
| 248 | width_data.addImport("GraphemeData", grapheme_data); | ||
| 249 | |||
| 250 | const width_data_t = b.addTest(.{ | ||
| 251 | .name = "width_data", | ||
| 252 | .root_module = width_data, | ||
| 253 | .target = target, | ||
| 254 | .optimize = optimize, | ||
| 255 | }); | ||
| 256 | const width_data_tr = b.addRunArtifact(width_data_t); | ||
| 257 | |||
| 258 | const display_width = b.addModule("DisplayWidth", .{ | 227 | const display_width = b.addModule("DisplayWidth", .{ |
| 259 | .root_source_file = b.path("src/DisplayWidth.zig"), | 228 | .root_source_file = b.path("src/DisplayWidth.zig"), |
| 260 | .target = target, | 229 | .target = target, |
| 261 | .optimize = optimize, | 230 | .optimize = optimize, |
| 262 | }); | 231 | }); |
| 232 | display_width.addAnonymousImport("dwp", .{ .root_source_file = dwp_gen_out }); | ||
| 263 | display_width.addImport("ascii", ascii); | 233 | display_width.addImport("ascii", ascii); |
| 264 | display_width.addImport("code_point", code_point); | 234 | display_width.addImport("code_point", code_point); |
| 265 | display_width.addImport("grapheme", grapheme); | 235 | display_width.addImport("Graphemes", graphemes); |
| 266 | display_width.addImport("DisplayWidthData", width_data); | ||
| 267 | display_width.addOptions("options", options); // For testing | 236 | display_width.addOptions("options", options); // For testing |
| 268 | 237 | ||
| 269 | const display_width_t = b.addTest(.{ | 238 | const display_width_t = b.addTest(.{ |
| @@ -361,6 +330,14 @@ pub fn build(b: *std.Build) void { | |||
| 361 | norm_data.addImport("HangulData", hangul_data); | 330 | norm_data.addImport("HangulData", hangul_data); |
| 362 | norm_data.addImport("NormPropsData", normp_data); | 331 | norm_data.addImport("NormPropsData", normp_data); |
| 363 | 332 | ||
| 333 | const norm_data_t = b.addTest(.{ | ||
| 334 | .name = "norm_data", | ||
| 335 | .root_module = norm_data, | ||
| 336 | .target = target, | ||
| 337 | .optimize = optimize, | ||
| 338 | }); | ||
| 339 | const norm_data_tr = b.addRunArtifact(norm_data_t); | ||
| 340 | |||
| 364 | const norm = b.addModule("Normalize", .{ | 341 | const norm = b.addModule("Normalize", .{ |
| 365 | .root_source_file = b.path("src/Normalize.zig"), | 342 | .root_source_file = b.path("src/Normalize.zig"), |
| 366 | .target = target, | 343 | .target = target, |
| @@ -370,13 +347,13 @@ pub fn build(b: *std.Build) void { | |||
| 370 | norm.addImport("code_point", code_point); | 347 | norm.addImport("code_point", code_point); |
| 371 | norm.addImport("NormData", norm_data); | 348 | norm.addImport("NormData", norm_data); |
| 372 | 349 | ||
| 373 | const norm_data_t = b.addTest(.{ | 350 | const norm_t = b.addTest(.{ |
| 374 | .name = "norm_data", | 351 | .name = "norm", |
| 375 | .root_module = norm_data, | 352 | .root_module = norm, |
| 376 | .target = target, | 353 | .target = target, |
| 377 | .optimize = optimize, | 354 | .optimize = optimize, |
| 378 | }); | 355 | }); |
| 379 | const norm_data_tr = b.addRunArtifact(norm_data_t); | 356 | const norm_tr = b.addRunArtifact(norm_t); |
| 380 | 357 | ||
| 381 | // General Category | 358 | // General Category |
| 382 | const gencat_data = b.addModule("GenCatData", .{ | 359 | const gencat_data = b.addModule("GenCatData", .{ |
| @@ -478,18 +455,14 @@ pub fn build(b: *std.Build) void { | |||
| 478 | .target = target, | 455 | .target = target, |
| 479 | .optimize = optimize, | 456 | .optimize = optimize, |
| 480 | }); | 457 | }); |
| 481 | unicode_tests.root_module.addImport("grapheme", grapheme); | 458 | unicode_tests.root_module.addImport("grapheme", graphemes); |
| 482 | unicode_tests.root_module.addImport("Normalize", norm); | 459 | unicode_tests.root_module.addImport("Normalize", norm); |
| 483 | 460 | ||
| 484 | const run_unicode_tests = b.addRunArtifact(unicode_tests); | 461 | const run_unicode_tests = b.addRunArtifact(unicode_tests); |
| 485 | 462 | ||
| 486 | const unicode_test_step = b.step("unicode-test", "Run Unicode tests"); | 463 | const test_step = b.step("test", "Run all module tests"); |
| 487 | unicode_test_step.dependOn(&run_unicode_tests.step); | 464 | test_step.dependOn(&run_unicode_tests.step); |
| 488 | |||
| 489 | const test_step = b.step("test", "Run general tests"); | ||
| 490 | test_step.dependOn(&code_point_tr.step); | 465 | test_step.dependOn(&code_point_tr.step); |
| 491 | test_step.dependOn(&grapheme_data_tr.step); | ||
| 492 | test_step.dependOn(&width_data_tr.step); | ||
| 493 | test_step.dependOn(&display_width_tr.step); | 466 | test_step.dependOn(&display_width_tr.step); |
| 494 | test_step.dependOn(&grapheme_tr.step); | 467 | test_step.dependOn(&grapheme_tr.step); |
| 495 | test_step.dependOn(&ascii_tr.step); | 468 | test_step.dependOn(&ascii_tr.step); |
| @@ -499,6 +472,7 @@ pub fn build(b: *std.Build) void { | |||
| 499 | test_step.dependOn(&hangul_data_tr.step); | 472 | test_step.dependOn(&hangul_data_tr.step); |
| 500 | test_step.dependOn(&normp_data_tr.step); | 473 | test_step.dependOn(&normp_data_tr.step); |
| 501 | test_step.dependOn(&norm_data_tr.step); | 474 | test_step.dependOn(&norm_data_tr.step); |
| 475 | test_step.dependOn(&norm_tr.step); | ||
| 502 | test_step.dependOn(&gencat_data_tr.step); | 476 | test_step.dependOn(&gencat_data_tr.step); |
| 503 | test_step.dependOn(&case_fold_tr.step); | 477 | test_step.dependOn(&case_fold_tr.step); |
| 504 | test_step.dependOn(&case_data_tr.step); | 478 | test_step.dependOn(&case_data_tr.step); |
diff --git a/src/DisplayWidth.zig b/src/DisplayWidth.zig index 8631bd4..11ec59e 100644 --- a/src/DisplayWidth.zig +++ b/src/DisplayWidth.zig | |||
| @@ -2,38 +2,131 @@ const std = @import("std"); | |||
| 2 | const builtin = @import("builtin"); | 2 | const builtin = @import("builtin"); |
| 3 | const options = @import("options"); | 3 | const options = @import("options"); |
| 4 | const ArrayList = std.ArrayList; | 4 | const ArrayList = std.ArrayList; |
| 5 | const compress = std.compress; | ||
| 5 | const mem = std.mem; | 6 | const mem = std.mem; |
| 6 | const simd = std.simd; | 7 | const simd = std.simd; |
| 7 | const testing = std.testing; | 8 | const testing = std.testing; |
| 8 | 9 | ||
| 9 | const ascii = @import("ascii"); | 10 | const ascii = @import("ascii"); |
| 10 | const CodePointIterator = @import("code_point").Iterator; | 11 | const CodePointIterator = @import("code_point").Iterator; |
| 11 | const GraphemeIterator = @import("grapheme").Iterator; | ||
| 12 | pub const DisplayWidthData = @import("DisplayWidthData"); | 12 | pub const DisplayWidthData = @import("DisplayWidthData"); |
| 13 | 13 | ||
| 14 | data: *const DisplayWidthData, | 14 | const Graphemes = @import("Graphemes"); |
| 15 | 15 | ||
| 16 | const Self = @This(); | 16 | g_data: Graphemes, |
| 17 | s1: []u16 = undefined, | ||
| 18 | s2: []i4 = undefined, | ||
| 19 | owns_gdata: bool, | ||
| 20 | |||
| 21 | const DisplayWidth = @This(); | ||
| 22 | |||
| 23 | pub fn init(allocator: mem.Allocator) mem.Allocator.Error!DisplayWidth { | ||
| 24 | var dw: DisplayWidth = try DisplayWidth.setup(allocator); | ||
| 25 | errdefer { | ||
| 26 | allocator.free(dw.s1); | ||
| 27 | allocator.free(dw.s2); | ||
| 28 | } | ||
| 29 | dw.owns_gdata = true; | ||
| 30 | dw.g_data = try Graphemes.init(allocator); | ||
| 31 | errdefer dw.g_data.deinit(allocator); | ||
| 32 | return dw; | ||
| 33 | } | ||
| 34 | |||
| 35 | pub fn initWithGraphemeData(allocator: mem.Allocator, g_data: Graphemes) mem.Allocator.Error!DisplayWidth { | ||
| 36 | var dw = try DisplayWidth.setup(allocator); | ||
| 37 | dw.g_data = g_data; | ||
| 38 | dw.owns_gdata = false; | ||
| 39 | return dw; | ||
| 40 | } | ||
| 41 | |||
| 42 | // Sets up the DisplayWidthData, leaving the GraphemeData undefined. | ||
| 43 | fn setup(allocator: mem.Allocator) mem.Allocator.Error!DisplayWidth { | ||
| 44 | const decompressor = compress.flate.inflate.decompressor; | ||
| 45 | const in_bytes = @embedFile("dwp"); | ||
| 46 | var in_fbs = std.io.fixedBufferStream(in_bytes); | ||
| 47 | var in_decomp = decompressor(.raw, in_fbs.reader()); | ||
| 48 | var reader = in_decomp.reader(); | ||
| 49 | |||
| 50 | const endian = builtin.cpu.arch.endian(); | ||
| 51 | |||
| 52 | var dw: DisplayWidth = undefined; | ||
| 53 | |||
| 54 | const stage_1_len: u16 = reader.readInt(u16, endian) catch unreachable; | ||
| 55 | dw.s1 = try allocator.alloc(u16, stage_1_len); | ||
| 56 | errdefer allocator.free(dw.s1); | ||
| 57 | for (0..stage_1_len) |i| dw.s1[i] = reader.readInt(u16, endian) catch unreachable; | ||
| 58 | |||
| 59 | const stage_2_len: u16 = reader.readInt(u16, endian) catch unreachable; | ||
| 60 | dw.s2 = try allocator.alloc(i4, stage_2_len); | ||
| 61 | errdefer allocator.free(dw.s2); | ||
| 62 | for (0..stage_2_len) |i| dw.s2[i] = @intCast(reader.readInt(i8, endian) catch unreachable); | ||
| 63 | |||
| 64 | return dw; | ||
| 65 | } | ||
| 66 | |||
| 67 | pub fn deinit(dw: *const DisplayWidth, allocator: mem.Allocator) void { | ||
| 68 | allocator.free(dw.s1); | ||
| 69 | allocator.free(dw.s2); | ||
| 70 | if (dw.owns_gdata) dw.g_data.deinit(allocator); | ||
| 71 | } | ||
| 72 | |||
| 73 | /// codePointWidth returns the number of cells `cp` requires when rendered | ||
| 74 | /// in a fixed-pitch font (i.e. a terminal screen). This can range from -1 to | ||
| 75 | /// 3, where BACKSPACE and DELETE return -1 and 3-em-dash returns 3. C0/C1 | ||
| 76 | /// control codes return 0. If `cjk` is true, ambiguous code points return 2, | ||
| 77 | /// otherwise they return 1. | ||
| 78 | pub fn codePointWidth(dw: DisplayWidth, cp: u21) i4 { | ||
| 79 | return dw.s2[dw.s1[cp >> 8] + (cp & 0xff)]; | ||
| 80 | } | ||
| 81 | |||
| 82 | test "codePointWidth" { | ||
| 83 | const dw = try DisplayWidth.init(std.testing.allocator); | ||
| 84 | defer dw.deinit(std.testing.allocator); | ||
| 85 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x0000)); // null | ||
| 86 | try testing.expectEqual(@as(i4, -1), dw.codePointWidth(0x8)); // \b | ||
| 87 | try testing.expectEqual(@as(i4, -1), dw.codePointWidth(0x7f)); // DEL | ||
| 88 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x0005)); // Cf | ||
| 89 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x0007)); // \a BEL | ||
| 90 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000A)); // \n LF | ||
| 91 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000B)); // \v VT | ||
| 92 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000C)); // \f FF | ||
| 93 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000D)); // \r CR | ||
| 94 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000E)); // SQ | ||
| 95 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000F)); // SI | ||
| 96 | |||
| 97 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x070F)); // Cf | ||
| 98 | try testing.expectEqual(@as(i4, 1), dw.codePointWidth(0x0603)); // Cf Arabic | ||
| 99 | |||
| 100 | try testing.expectEqual(@as(i4, 1), dw.codePointWidth(0x00AD)); // soft-hyphen | ||
| 101 | try testing.expectEqual(@as(i4, 2), dw.codePointWidth(0x2E3A)); // two-em dash | ||
| 102 | try testing.expectEqual(@as(i4, 3), dw.codePointWidth(0x2E3B)); // three-em dash | ||
| 103 | |||
| 104 | try testing.expectEqual(@as(i4, 1), dw.codePointWidth(0x00BD)); // ambiguous halfwidth | ||
| 105 | |||
| 106 | try testing.expectEqual(@as(i4, 1), dw.codePointWidth('é')); | ||
| 107 | try testing.expectEqual(@as(i4, 2), dw.codePointWidth('😊')); | ||
| 108 | try testing.expectEqual(@as(i4, 2), dw.codePointWidth('统')); | ||
| 109 | } | ||
| 17 | 110 | ||
| 18 | /// strWidth returns the total display width of `str` as the number of cells | 111 | /// strWidth returns the total display width of `str` as the number of cells |
| 19 | /// required in a fixed-pitch font (i.e. a terminal screen). | 112 | /// required in a fixed-pitch font (i.e. a terminal screen). |
| 20 | pub fn strWidth(self: Self, str: []const u8) usize { | 113 | pub fn strWidth(dw: DisplayWidth, str: []const u8) usize { |
| 21 | var total: isize = 0; | 114 | var total: isize = 0; |
| 22 | 115 | ||
| 23 | // ASCII fast path | 116 | // ASCII fast path |
| 24 | if (ascii.isAsciiOnly(str)) { | 117 | if (ascii.isAsciiOnly(str)) { |
| 25 | for (str) |b| total += self.data.codePointWidth(b); | 118 | for (str) |b| total += dw.codePointWidth(b); |
| 26 | return @intCast(@max(0, total)); | 119 | return @intCast(@max(0, total)); |
| 27 | } | 120 | } |
| 28 | 121 | ||
| 29 | var giter = GraphemeIterator.init(str, &self.data.g_data); | 122 | var giter = dw.g_data.iterator(str); |
| 30 | 123 | ||
| 31 | while (giter.next()) |gc| { | 124 | while (giter.next()) |gc| { |
| 32 | var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) }; | 125 | var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) }; |
| 33 | var gc_total: isize = 0; | 126 | var gc_total: isize = 0; |
| 34 | 127 | ||
| 35 | while (cp_iter.next()) |cp| { | 128 | while (cp_iter.next()) |cp| { |
| 36 | var w = self.data.codePointWidth(cp.code); | 129 | var w = dw.codePointWidth(cp.code); |
| 37 | 130 | ||
| 38 | if (w != 0) { | 131 | if (w != 0) { |
| 39 | // Handle text emoji sequence. | 132 | // Handle text emoji sequence. |
| @@ -58,41 +151,40 @@ pub fn strWidth(self: Self, str: []const u8) usize { | |||
| 58 | } | 151 | } |
| 59 | 152 | ||
| 60 | test "strWidth" { | 153 | test "strWidth" { |
| 61 | const data = try DisplayWidthData.init(testing.allocator); | 154 | const dw = try DisplayWidth.init(testing.allocator); |
| 62 | defer data.deinit(testing.allocator); | 155 | defer dw.deinit(testing.allocator); |
| 63 | const self = Self{ .data = &data }; | ||
| 64 | const c0 = options.c0_width orelse 0; | 156 | const c0 = options.c0_width orelse 0; |
| 65 | 157 | ||
| 66 | try testing.expectEqual(@as(usize, 5), self.strWidth("Hello\r\n")); | 158 | try testing.expectEqual(@as(usize, 5), dw.strWidth("Hello\r\n")); |
| 67 | try testing.expectEqual(@as(usize, 1), self.strWidth("\u{0065}\u{0301}")); | 159 | try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{0065}\u{0301}")); |
| 68 | try testing.expectEqual(@as(usize, 2), self.strWidth("\u{1F476}\u{1F3FF}\u{0308}\u{200D}\u{1F476}\u{1F3FF}")); | 160 | try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{1F476}\u{1F3FF}\u{0308}\u{200D}\u{1F476}\u{1F3FF}")); |
| 69 | try testing.expectEqual(@as(usize, 8), self.strWidth("Hello 😊")); | 161 | try testing.expectEqual(@as(usize, 8), dw.strWidth("Hello 😊")); |
| 70 | try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo 😊")); | 162 | try testing.expectEqual(@as(usize, 8), dw.strWidth("Héllo 😊")); |
| 71 | try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo :)")); | 163 | try testing.expectEqual(@as(usize, 8), dw.strWidth("Héllo :)")); |
| 72 | try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo 🇪🇸")); | 164 | try testing.expectEqual(@as(usize, 8), dw.strWidth("Héllo 🇪🇸")); |
| 73 | try testing.expectEqual(@as(usize, 2), self.strWidth("\u{26A1}")); // Lone emoji | 165 | try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{26A1}")); // Lone emoji |
| 74 | try testing.expectEqual(@as(usize, 1), self.strWidth("\u{26A1}\u{FE0E}")); // Text sequence | 166 | try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{26A1}\u{FE0E}")); // Text sequence |
| 75 | try testing.expectEqual(@as(usize, 2), self.strWidth("\u{26A1}\u{FE0F}")); // Presentation sequence | 167 | try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{26A1}\u{FE0F}")); // Presentation sequence |
| 76 | try testing.expectEqual(@as(usize, 1), self.strWidth("\u{2764}")); // Default text presentation | 168 | try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{2764}")); // Default text presentation |
| 77 | try testing.expectEqual(@as(usize, 1), self.strWidth("\u{2764}\u{FE0E}")); // Default text presentation with VS15 selector | 169 | try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{2764}\u{FE0E}")); // Default text presentation with VS15 selector |
| 78 | try testing.expectEqual(@as(usize, 2), self.strWidth("\u{2764}\u{FE0F}")); // Default text presentation with VS16 selector | 170 | try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{2764}\u{FE0F}")); // Default text presentation with VS16 selector |
| 79 | const expect_bs: usize = if (c0 == 0) 0 else 1 + c0; | 171 | const expect_bs: usize = if (c0 == 0) 0 else 1 + c0; |
| 80 | try testing.expectEqual(expect_bs, self.strWidth("A\x08")); // Backspace | 172 | try testing.expectEqual(expect_bs, dw.strWidth("A\x08")); // Backspace |
| 81 | try testing.expectEqual(expect_bs, self.strWidth("\x7FA")); // DEL | 173 | try testing.expectEqual(expect_bs, dw.strWidth("\x7FA")); // DEL |
| 82 | const expect_long_del: usize = if (c0 == 0) 0 else 1 + (c0 * 3); | 174 | const expect_long_del: usize = if (c0 == 0) 0 else 1 + (c0 * 3); |
| 83 | try testing.expectEqual(expect_long_del, self.strWidth("\x7FA\x08\x08")); // never less than 0 | 175 | try testing.expectEqual(expect_long_del, dw.strWidth("\x7FA\x08\x08")); // never less than 0 |
| 84 | 176 | ||
| 85 | // wcwidth Python lib tests. See: https://github.com/jquast/wcwidth/blob/master/tests/test_core.py | 177 | // wcwidth Python lib tests. See: https://github.com/jquast/wcwidth/blob/master/tests/test_core.py |
| 86 | const empty = ""; | 178 | const empty = ""; |
| 87 | try testing.expectEqual(@as(usize, 0), self.strWidth(empty)); | 179 | try testing.expectEqual(@as(usize, 0), dw.strWidth(empty)); |
| 88 | const with_null = "hello\x00world"; | 180 | const with_null = "hello\x00world"; |
| 89 | try testing.expectEqual(@as(usize, 10 + c0), self.strWidth(with_null)); | 181 | try testing.expectEqual(@as(usize, 10 + c0), dw.strWidth(with_null)); |
| 90 | const hello_jp = "コンニチハ, セカイ!"; | 182 | const hello_jp = "コンニチハ, セカイ!"; |
| 91 | try testing.expectEqual(@as(usize, 19), self.strWidth(hello_jp)); | 183 | try testing.expectEqual(@as(usize, 19), dw.strWidth(hello_jp)); |
| 92 | const control = "\x1b[0m"; | 184 | const control = "\x1b[0m"; |
| 93 | try testing.expectEqual(@as(usize, 3 + c0), self.strWidth(control)); | 185 | try testing.expectEqual(@as(usize, 3 + c0), dw.strWidth(control)); |
| 94 | const balinese = "\u{1B13}\u{1B28}\u{1B2E}\u{1B44}"; | 186 | const balinese = "\u{1B13}\u{1B28}\u{1B2E}\u{1B44}"; |
| 95 | try testing.expectEqual(@as(usize, 3), self.strWidth(balinese)); | 187 | try testing.expectEqual(@as(usize, 3), dw.strWidth(balinese)); |
| 96 | 188 | ||
| 97 | // These commented out tests require a new specification for complex scripts. | 189 | // These commented out tests require a new specification for complex scripts. |
| 98 | // See: https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf | 190 | // See: https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf |
| @@ -106,17 +198,17 @@ test "strWidth" { | |||
| 106 | // try testing.expectEqual(@as(usize, 3), strWidth(kannada_1)); | 198 | // try testing.expectEqual(@as(usize, 3), strWidth(kannada_1)); |
| 107 | // The following passes but as a mere coincidence. | 199 | // The following passes but as a mere coincidence. |
| 108 | const kannada_2 = "\u{0cb0}\u{0cbc}\u{0ccd}\u{0c9a}"; | 200 | const kannada_2 = "\u{0cb0}\u{0cbc}\u{0ccd}\u{0c9a}"; |
| 109 | try testing.expectEqual(@as(usize, 2), self.strWidth(kannada_2)); | 201 | try testing.expectEqual(@as(usize, 2), dw.strWidth(kannada_2)); |
| 110 | 202 | ||
| 111 | // From Rust https://github.com/jameslanska/unicode-display-width | 203 | // From Rust https://github.com/jameslanska/unicode-display-width |
| 112 | try testing.expectEqual(@as(usize, 15), self.strWidth("🔥🗡🍩👩🏻🚀⏰💃🏼🔦👍🏻")); | 204 | try testing.expectEqual(@as(usize, 15), dw.strWidth("🔥🗡🍩👩🏻🚀⏰💃🏼🔦👍🏻")); |
| 113 | try testing.expectEqual(@as(usize, 2), self.strWidth("🦀")); | 205 | try testing.expectEqual(@as(usize, 2), dw.strWidth("🦀")); |
| 114 | try testing.expectEqual(@as(usize, 2), self.strWidth("👨👩👧👧")); | 206 | try testing.expectEqual(@as(usize, 2), dw.strWidth("👨👩👧👧")); |
| 115 | try testing.expectEqual(@as(usize, 2), self.strWidth("👩🔬")); | 207 | try testing.expectEqual(@as(usize, 2), dw.strWidth("👩🔬")); |
| 116 | try testing.expectEqual(@as(usize, 9), self.strWidth("sane text")); | 208 | try testing.expectEqual(@as(usize, 9), dw.strWidth("sane text")); |
| 117 | try testing.expectEqual(@as(usize, 9), self.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ")); | 209 | try testing.expectEqual(@as(usize, 9), dw.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ")); |
| 118 | try testing.expectEqual(@as(usize, 17), self.strWidth("슬라바 우크라이나")); | 210 | try testing.expectEqual(@as(usize, 17), dw.strWidth("슬라바 우크라이나")); |
| 119 | try testing.expectEqual(@as(usize, 1), self.strWidth("\u{378}")); | 211 | try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{378}")); |
| 120 | } | 212 | } |
| 121 | 213 | ||
| 122 | /// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding. | 214 | /// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding. |
| @@ -124,17 +216,17 @@ test "strWidth" { | |||
| 124 | /// receive one additional pad. This makes sure the returned string fills the requested width. | 216 | /// receive one additional pad. This makes sure the returned string fills the requested width. |
| 125 | /// Caller must free returned bytes with `allocator`. | 217 | /// Caller must free returned bytes with `allocator`. |
| 126 | pub fn center( | 218 | pub fn center( |
| 127 | self: Self, | 219 | dw: DisplayWidth, |
| 128 | allocator: mem.Allocator, | 220 | allocator: mem.Allocator, |
| 129 | str: []const u8, | 221 | str: []const u8, |
| 130 | total_width: usize, | 222 | total_width: usize, |
| 131 | pad: []const u8, | 223 | pad: []const u8, |
| 132 | ) ![]u8 { | 224 | ) ![]u8 { |
| 133 | const str_width = self.strWidth(str); | 225 | const str_width = dw.strWidth(str); |
| 134 | if (str_width > total_width) return error.StrTooLong; | 226 | if (str_width > total_width) return error.StrTooLong; |
| 135 | if (str_width == total_width) return try allocator.dupe(u8, str); | 227 | if (str_width == total_width) return try allocator.dupe(u8, str); |
| 136 | 228 | ||
| 137 | const pad_width = self.strWidth(pad); | 229 | const pad_width = dw.strWidth(pad); |
| 138 | if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; | 230 | if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; |
| 139 | 231 | ||
| 140 | const margin_width = @divFloor((total_width - str_width), 2); | 232 | const margin_width = @divFloor((total_width - str_width), 2); |
| @@ -165,62 +257,61 @@ pub fn center( | |||
| 165 | 257 | ||
| 166 | test "center" { | 258 | test "center" { |
| 167 | const allocator = testing.allocator; | 259 | const allocator = testing.allocator; |
| 168 | const data = try DisplayWidthData.init(allocator); | 260 | const dw = try DisplayWidth.init(allocator); |
| 169 | defer data.deinit(allocator); | 261 | defer dw.deinit(allocator); |
| 170 | const self = Self{ .data = &data }; | ||
| 171 | 262 | ||
| 172 | // Input and width both have odd length | 263 | // Input and width both have odd length |
| 173 | var centered = try self.center(allocator, "abc", 9, "*"); | 264 | var centered = try dw.center(allocator, "abc", 9, "*"); |
| 174 | try testing.expectEqualSlices(u8, "***abc***", centered); | 265 | try testing.expectEqualSlices(u8, "***abc***", centered); |
| 175 | 266 | ||
| 176 | // Input and width both have even length | 267 | // Input and width both have even length |
| 177 | testing.allocator.free(centered); | 268 | testing.allocator.free(centered); |
| 178 | centered = try self.center(allocator, "w😊w", 10, "-"); | 269 | centered = try dw.center(allocator, "w😊w", 10, "-"); |
| 179 | try testing.expectEqualSlices(u8, "---w😊w---", centered); | 270 | try testing.expectEqualSlices(u8, "---w😊w---", centered); |
| 180 | 271 | ||
| 181 | // Input has even length, width has odd length | 272 | // Input has even length, width has odd length |
| 182 | testing.allocator.free(centered); | 273 | testing.allocator.free(centered); |
| 183 | centered = try self.center(allocator, "1234", 9, "-"); | 274 | centered = try dw.center(allocator, "1234", 9, "-"); |
| 184 | try testing.expectEqualSlices(u8, "--1234---", centered); | 275 | try testing.expectEqualSlices(u8, "--1234---", centered); |
| 185 | 276 | ||
| 186 | // Input has odd length, width has even length | 277 | // Input has odd length, width has even length |
| 187 | testing.allocator.free(centered); | 278 | testing.allocator.free(centered); |
| 188 | centered = try self.center(allocator, "123", 8, "-"); | 279 | centered = try dw.center(allocator, "123", 8, "-"); |
| 189 | try testing.expectEqualSlices(u8, "--123---", centered); | 280 | try testing.expectEqualSlices(u8, "--123---", centered); |
| 190 | 281 | ||
| 191 | // Input is the same length as the width | 282 | // Input is the same length as the width |
| 192 | testing.allocator.free(centered); | 283 | testing.allocator.free(centered); |
| 193 | centered = try self.center(allocator, "123", 3, "-"); | 284 | centered = try dw.center(allocator, "123", 3, "-"); |
| 194 | try testing.expectEqualSlices(u8, "123", centered); | 285 | try testing.expectEqualSlices(u8, "123", centered); |
| 195 | 286 | ||
| 196 | // Input is empty | 287 | // Input is empty |
| 197 | testing.allocator.free(centered); | 288 | testing.allocator.free(centered); |
| 198 | centered = try self.center(allocator, "", 3, "-"); | 289 | centered = try dw.center(allocator, "", 3, "-"); |
| 199 | try testing.expectEqualSlices(u8, "---", centered); | 290 | try testing.expectEqualSlices(u8, "---", centered); |
| 200 | 291 | ||
| 201 | // Input is empty and width is zero | 292 | // Input is empty and width is zero |
| 202 | testing.allocator.free(centered); | 293 | testing.allocator.free(centered); |
| 203 | centered = try self.center(allocator, "", 0, "-"); | 294 | centered = try dw.center(allocator, "", 0, "-"); |
| 204 | try testing.expectEqualSlices(u8, "", centered); | 295 | try testing.expectEqualSlices(u8, "", centered); |
| 205 | 296 | ||
| 206 | // Input is longer than the width, which is an error | 297 | // Input is longer than the width, which is an error |
| 207 | testing.allocator.free(centered); | 298 | testing.allocator.free(centered); |
| 208 | try testing.expectError(error.StrTooLong, self.center(allocator, "123", 2, "-")); | 299 | try testing.expectError(error.StrTooLong, dw.center(allocator, "123", 2, "-")); |
| 209 | } | 300 | } |
| 210 | 301 | ||
| 211 | /// padLeft returns a new string of width `total_width` (in display cells) using `pad` as padding | 302 | /// padLeft returns a new string of width `total_width` (in display cells) using `pad` as padding |
| 212 | /// on the left side. Caller must free returned bytes with `allocator`. | 303 | /// on the left side. Caller must free returned bytes with `allocator`. |
| 213 | pub fn padLeft( | 304 | pub fn padLeft( |
| 214 | self: Self, | 305 | dw: DisplayWidth, |
| 215 | allocator: mem.Allocator, | 306 | allocator: mem.Allocator, |
| 216 | str: []const u8, | 307 | str: []const u8, |
| 217 | total_width: usize, | 308 | total_width: usize, |
| 218 | pad: []const u8, | 309 | pad: []const u8, |
| 219 | ) ![]u8 { | 310 | ) ![]u8 { |
| 220 | const str_width = self.strWidth(str); | 311 | const str_width = dw.strWidth(str); |
| 221 | if (str_width > total_width) return error.StrTooLong; | 312 | if (str_width > total_width) return error.StrTooLong; |
| 222 | 313 | ||
| 223 | const pad_width = self.strWidth(pad); | 314 | const pad_width = dw.strWidth(pad); |
| 224 | if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; | 315 | if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; |
| 225 | 316 | ||
| 226 | const margin_width = total_width - str_width; | 317 | const margin_width = total_width - str_width; |
| @@ -244,32 +335,31 @@ pub fn padLeft( | |||
| 244 | 335 | ||
| 245 | test "padLeft" { | 336 | test "padLeft" { |
| 246 | const allocator = testing.allocator; | 337 | const allocator = testing.allocator; |
| 247 | const data = try DisplayWidthData.init(allocator); | 338 | const dw = try DisplayWidth.init(allocator); |
| 248 | defer data.deinit(allocator); | 339 | defer dw.deinit(allocator); |
| 249 | const self = Self{ .data = &data }; | ||
| 250 | 340 | ||
| 251 | var right_aligned = try self.padLeft(allocator, "abc", 9, "*"); | 341 | var right_aligned = try dw.padLeft(allocator, "abc", 9, "*"); |
| 252 | defer testing.allocator.free(right_aligned); | 342 | defer testing.allocator.free(right_aligned); |
| 253 | try testing.expectEqualSlices(u8, "******abc", right_aligned); | 343 | try testing.expectEqualSlices(u8, "******abc", right_aligned); |
| 254 | 344 | ||
| 255 | testing.allocator.free(right_aligned); | 345 | testing.allocator.free(right_aligned); |
| 256 | right_aligned = try self.padLeft(allocator, "w😊w", 10, "-"); | 346 | right_aligned = try dw.padLeft(allocator, "w😊w", 10, "-"); |
| 257 | try testing.expectEqualSlices(u8, "------w😊w", right_aligned); | 347 | try testing.expectEqualSlices(u8, "------w😊w", right_aligned); |
| 258 | } | 348 | } |
| 259 | 349 | ||
| 260 | /// padRight returns a new string of width `total_width` (in display cells) using `pad` as padding | 350 | /// padRight returns a new string of width `total_width` (in display cells) using `pad` as padding |
| 261 | /// on the right side. Caller must free returned bytes with `allocator`. | 351 | /// on the right side. Caller must free returned bytes with `allocator`. |
| 262 | pub fn padRight( | 352 | pub fn padRight( |
| 263 | self: Self, | 353 | dw: DisplayWidth, |
| 264 | allocator: mem.Allocator, | 354 | allocator: mem.Allocator, |
| 265 | str: []const u8, | 355 | str: []const u8, |
| 266 | total_width: usize, | 356 | total_width: usize, |
| 267 | pad: []const u8, | 357 | pad: []const u8, |
| 268 | ) ![]u8 { | 358 | ) ![]u8 { |
| 269 | const str_width = self.strWidth(str); | 359 | const str_width = dw.strWidth(str); |
| 270 | if (str_width > total_width) return error.StrTooLong; | 360 | if (str_width > total_width) return error.StrTooLong; |
| 271 | 361 | ||
| 272 | const pad_width = self.strWidth(pad); | 362 | const pad_width = dw.strWidth(pad); |
| 273 | if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; | 363 | if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; |
| 274 | 364 | ||
| 275 | const margin_width = total_width - str_width; | 365 | const margin_width = total_width - str_width; |
| @@ -294,16 +384,15 @@ pub fn padRight( | |||
| 294 | 384 | ||
| 295 | test "padRight" { | 385 | test "padRight" { |
| 296 | const allocator = testing.allocator; | 386 | const allocator = testing.allocator; |
| 297 | const data = try DisplayWidthData.init(allocator); | 387 | const dw = try DisplayWidth.init(allocator); |
| 298 | defer data.deinit(allocator); | 388 | defer dw.deinit(allocator); |
| 299 | const self = Self{ .data = &data }; | ||
| 300 | 389 | ||
| 301 | var left_aligned = try self.padRight(allocator, "abc", 9, "*"); | 390 | var left_aligned = try dw.padRight(allocator, "abc", 9, "*"); |
| 302 | defer testing.allocator.free(left_aligned); | 391 | defer testing.allocator.free(left_aligned); |
| 303 | try testing.expectEqualSlices(u8, "abc******", left_aligned); | 392 | try testing.expectEqualSlices(u8, "abc******", left_aligned); |
| 304 | 393 | ||
| 305 | testing.allocator.free(left_aligned); | 394 | testing.allocator.free(left_aligned); |
| 306 | left_aligned = try self.padRight(allocator, "w😊w", 10, "-"); | 395 | left_aligned = try dw.padRight(allocator, "w😊w", 10, "-"); |
| 307 | try testing.expectEqualSlices(u8, "w😊w------", left_aligned); | 396 | try testing.expectEqualSlices(u8, "w😊w------", left_aligned); |
| 308 | } | 397 | } |
| 309 | 398 | ||
| @@ -311,7 +400,7 @@ test "padRight" { | |||
| 311 | /// `threshold` defines how far the last column of the last word can be | 400 | /// `threshold` defines how far the last column of the last word can be |
| 312 | /// from the edge. Caller must free returned bytes with `allocator`. | 401 | /// from the edge. Caller must free returned bytes with `allocator`. |
| 313 | pub fn wrap( | 402 | pub fn wrap( |
| 314 | self: Self, | 403 | dw: DisplayWidth, |
| 315 | allocator: mem.Allocator, | 404 | allocator: mem.Allocator, |
| 316 | str: []const u8, | 405 | str: []const u8, |
| 317 | columns: usize, | 406 | columns: usize, |
| @@ -329,7 +418,7 @@ pub fn wrap( | |||
| 329 | while (word_iter.next()) |word| { | 418 | while (word_iter.next()) |word| { |
| 330 | try result.appendSlice(word); | 419 | try result.appendSlice(word); |
| 331 | try result.append(' '); | 420 | try result.append(' '); |
| 332 | line_width += self.strWidth(word) + 1; | 421 | line_width += dw.strWidth(word) + 1; |
| 333 | 422 | ||
| 334 | if (line_width > columns or columns - line_width <= threshold) { | 423 | if (line_width > columns or columns - line_width <= threshold) { |
| 335 | try result.append('\n'); | 424 | try result.append('\n'); |
| @@ -347,12 +436,11 @@ pub fn wrap( | |||
| 347 | 436 | ||
| 348 | test "wrap" { | 437 | test "wrap" { |
| 349 | const allocator = testing.allocator; | 438 | const allocator = testing.allocator; |
| 350 | const data = try DisplayWidthData.init(allocator); | 439 | const dw = try DisplayWidth.init(allocator); |
| 351 | defer data.deinit(allocator); | 440 | defer dw.deinit(allocator); |
| 352 | const self = Self{ .data = &data }; | ||
| 353 | 441 | ||
| 354 | const input = "The quick brown fox\r\njumped over the lazy dog!"; | 442 | const input = "The quick brown fox\r\njumped over the lazy dog!"; |
| 355 | const got = try self.wrap(allocator, input, 10, 3); | 443 | const got = try dw.wrap(allocator, input, 10, 3); |
| 356 | defer testing.allocator.free(got); | 444 | defer testing.allocator.free(got); |
| 357 | const want = "The quick \nbrown fox \njumped \nover the \nlazy dog!"; | 445 | const want = "The quick \nbrown fox \njumped \nover the \nlazy dog!"; |
| 358 | try testing.expectEqualStrings(want, got); | 446 | try testing.expectEqualStrings(want, got); |
diff --git a/src/GraphemeData.zig b/src/GraphemeData.zig index 6d3174d..df025cb 100644 --- a/src/GraphemeData.zig +++ b/src/GraphemeData.zig | |||
| @@ -36,7 +36,7 @@ s3: []u8 = undefined, | |||
| 36 | 36 | ||
| 37 | const Self = @This(); | 37 | const Self = @This(); |
| 38 | 38 | ||
| 39 | pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { | 39 | pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { |
| 40 | const decompressor = compress.flate.inflate.decompressor; | 40 | const decompressor = compress.flate.inflate.decompressor; |
| 41 | const in_bytes = @embedFile("gbp"); | 41 | const in_bytes = @embedFile("gbp"); |
| 42 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 42 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| @@ -65,23 +65,23 @@ pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { | |||
| 65 | return self; | 65 | return self; |
| 66 | } | 66 | } |
| 67 | 67 | ||
| 68 | pub fn deinit(self: *const Self, allocator: mem.Allocator) void { | 68 | pub inline fn deinit(self: *const Self, allocator: mem.Allocator) void { |
| 69 | allocator.free(self.s1); | 69 | allocator.free(self.s1); |
| 70 | allocator.free(self.s2); | 70 | allocator.free(self.s2); |
| 71 | allocator.free(self.s3); | 71 | allocator.free(self.s3); |
| 72 | } | 72 | } |
| 73 | 73 | ||
| 74 | /// Lookup the grapheme break property for a code point. | 74 | /// Lookup the grapheme break property for a code point. |
| 75 | pub fn gbp(self: Self, cp: u21) Gbp { | 75 | pub inline fn gbp(self: Self, cp: u21) Gbp { |
| 76 | return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 4); | 76 | return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 4); |
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | /// Lookup the indic syllable type for a code point. | 79 | /// Lookup the indic syllable type for a code point. |
| 80 | pub fn indic(self: Self, cp: u21) Indic { | 80 | pub inline fn indic(self: Self, cp: u21) Indic { |
| 81 | return @enumFromInt((self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7); | 81 | return @enumFromInt((self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7); |
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | /// Lookup the indic syllable type for a code point. | 84 | /// Lookup the emoji property for a code point. |
| 85 | pub fn isEmoji(self: Self, cp: u21) bool { | 85 | pub inline fn isEmoji(self: Self, cp: u21) bool { |
| 86 | return self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1; | 86 | return self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1; |
| 87 | } | 87 | } |
diff --git a/src/Normalize.zig b/src/Normalize.zig index a28b708..b738b27 100644 --- a/src/Normalize.zig +++ b/src/Normalize.zig | |||
| @@ -177,7 +177,7 @@ test "decompose" { | |||
| 177 | const allocator = testing.allocator; | 177 | const allocator = testing.allocator; |
| 178 | var data: NormData = undefined; | 178 | var data: NormData = undefined; |
| 179 | try NormData.init(&data, allocator); | 179 | try NormData.init(&data, allocator); |
| 180 | defer data.deinit(); | 180 | defer data.deinit(allocator); |
| 181 | var n = Self{ .norm_data = &data }; | 181 | var n = Self{ .norm_data = &data }; |
| 182 | 182 | ||
| 183 | var buf: [18]u21 = undefined; | 183 | var buf: [18]u21 = undefined; |
| @@ -307,11 +307,11 @@ test "nfd ASCII / no-alloc" { | |||
| 307 | const allocator = testing.allocator; | 307 | const allocator = testing.allocator; |
| 308 | var data: NormData = undefined; | 308 | var data: NormData = undefined; |
| 309 | try NormData.init(&data, allocator); | 309 | try NormData.init(&data, allocator); |
| 310 | defer data.deinit(); | 310 | defer data.deinit(allocator); |
| 311 | const n = Self{ .norm_data = &data }; | 311 | const n = Self{ .norm_data = &data }; |
| 312 | 312 | ||
| 313 | const result = try n.nfd(allocator, "Hello World!"); | 313 | const result = try n.nfd(allocator, "Hello World!"); |
| 314 | defer result.deinit(); | 314 | defer result.deinit(allocator); |
| 315 | 315 | ||
| 316 | try testing.expectEqualStrings("Hello World!", result.slice); | 316 | try testing.expectEqualStrings("Hello World!", result.slice); |
| 317 | } | 317 | } |
| @@ -320,11 +320,11 @@ test "nfd !ASCII / alloc" { | |||
| 320 | const allocator = testing.allocator; | 320 | const allocator = testing.allocator; |
| 321 | var data: NormData = undefined; | 321 | var data: NormData = undefined; |
| 322 | try NormData.init(&data, allocator); | 322 | try NormData.init(&data, allocator); |
| 323 | defer data.deinit(); | 323 | defer data.deinit(allocator); |
| 324 | const n = Self{ .norm_data = &data }; | 324 | const n = Self{ .norm_data = &data }; |
| 325 | 325 | ||
| 326 | const result = try n.nfd(allocator, "Héllo World! \u{3d3}"); | 326 | const result = try n.nfd(allocator, "Héllo World! \u{3d3}"); |
| 327 | defer result.deinit(); | 327 | defer result.deinit(allocator); |
| 328 | 328 | ||
| 329 | try testing.expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", result.slice); | 329 | try testing.expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", result.slice); |
| 330 | } | 330 | } |
| @@ -333,11 +333,11 @@ test "nfkd ASCII / no-alloc" { | |||
| 333 | const allocator = testing.allocator; | 333 | const allocator = testing.allocator; |
| 334 | var data: NormData = undefined; | 334 | var data: NormData = undefined; |
| 335 | try NormData.init(&data, allocator); | 335 | try NormData.init(&data, allocator); |
| 336 | defer data.deinit(); | 336 | defer data.deinit(allocator); |
| 337 | const n = Self{ .norm_data = &data }; | 337 | const n = Self{ .norm_data = &data }; |
| 338 | 338 | ||
| 339 | const result = try n.nfkd(allocator, "Hello World!"); | 339 | const result = try n.nfkd(allocator, "Hello World!"); |
| 340 | defer result.deinit(); | 340 | defer result.deinit(allocator); |
| 341 | 341 | ||
| 342 | try testing.expectEqualStrings("Hello World!", result.slice); | 342 | try testing.expectEqualStrings("Hello World!", result.slice); |
| 343 | } | 343 | } |
| @@ -346,11 +346,11 @@ test "nfkd !ASCII / alloc" { | |||
| 346 | const allocator = testing.allocator; | 346 | const allocator = testing.allocator; |
| 347 | var data: NormData = undefined; | 347 | var data: NormData = undefined; |
| 348 | try NormData.init(&data, allocator); | 348 | try NormData.init(&data, allocator); |
| 349 | defer data.deinit(); | 349 | defer data.deinit(allocator); |
| 350 | const n = Self{ .norm_data = &data }; | 350 | const n = Self{ .norm_data = &data }; |
| 351 | 351 | ||
| 352 | const result = try n.nfkd(allocator, "Héllo World! \u{3d3}"); | 352 | const result = try n.nfkd(allocator, "Héllo World! \u{3d3}"); |
| 353 | defer result.deinit(); | 353 | defer result.deinit(allocator); |
| 354 | 354 | ||
| 355 | try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice); | 355 | try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice); |
| 356 | } | 356 | } |
| @@ -546,11 +546,11 @@ test "nfc" { | |||
| 546 | const allocator = testing.allocator; | 546 | const allocator = testing.allocator; |
| 547 | var data: NormData = undefined; | 547 | var data: NormData = undefined; |
| 548 | try NormData.init(&data, allocator); | 548 | try NormData.init(&data, allocator); |
| 549 | defer data.deinit(); | 549 | defer data.deinit(allocator); |
| 550 | const n = Self{ .norm_data = &data }; | 550 | const n = Self{ .norm_data = &data }; |
| 551 | 551 | ||
| 552 | const result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); | 552 | const result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); |
| 553 | defer result.deinit(); | 553 | defer result.deinit(allocator); |
| 554 | 554 | ||
| 555 | try testing.expectEqualStrings("Complex char: \u{3D3}", result.slice); | 555 | try testing.expectEqualStrings("Complex char: \u{3D3}", result.slice); |
| 556 | } | 556 | } |
| @@ -559,11 +559,11 @@ test "nfkc" { | |||
| 559 | const allocator = testing.allocator; | 559 | const allocator = testing.allocator; |
| 560 | var data: NormData = undefined; | 560 | var data: NormData = undefined; |
| 561 | try NormData.init(&data, allocator); | 561 | try NormData.init(&data, allocator); |
| 562 | defer data.deinit(); | 562 | defer data.deinit(allocator); |
| 563 | const n = Self{ .norm_data = &data }; | 563 | const n = Self{ .norm_data = &data }; |
| 564 | 564 | ||
| 565 | const result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); | 565 | const result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); |
| 566 | defer result.deinit(); | 566 | defer result.deinit(allocator); |
| 567 | 567 | ||
| 568 | try testing.expectEqualStrings("Complex char: \u{038E}", result.slice); | 568 | try testing.expectEqualStrings("Complex char: \u{038E}", result.slice); |
| 569 | } | 569 | } |
| @@ -582,7 +582,7 @@ test "eql" { | |||
| 582 | const allocator = testing.allocator; | 582 | const allocator = testing.allocator; |
| 583 | var data: NormData = undefined; | 583 | var data: NormData = undefined; |
| 584 | try NormData.init(&data, allocator); | 584 | try NormData.init(&data, allocator); |
| 585 | defer data.deinit(); | 585 | defer data.deinit(allocator); |
| 586 | const n = Self{ .norm_data = &data }; | 586 | const n = Self{ .norm_data = &data }; |
| 587 | 587 | ||
| 588 | try testing.expect(try n.eql(allocator, "foé", "foe\u{0301}")); | 588 | try testing.expect(try n.eql(allocator, "foé", "foe\u{0301}")); |
| @@ -628,5 +628,4 @@ test "isLatin1Only" { | |||
| 628 | try testing.expect(isLatin1Only(latin1_only)); | 628 | try testing.expect(isLatin1Only(latin1_only)); |
| 629 | const not_latin1_only = "Héllo, World! \u{3d3}"; | 629 | const not_latin1_only = "Héllo, World! \u{3d3}"; |
| 630 | try testing.expect(!isLatin1Only(not_latin1_only)); | 630 | try testing.expect(!isLatin1Only(not_latin1_only)); |
| 631 | try testing.expect(false); | ||
| 632 | } | 631 | } |
diff --git a/src/WidthData.zig b/src/WidthData.zig index b07a679..ca7eaf0 100644 --- a/src/WidthData.zig +++ b/src/WidthData.zig | |||
| @@ -4,15 +4,36 @@ const compress = std.compress; | |||
| 4 | const mem = std.mem; | 4 | const mem = std.mem; |
| 5 | const testing = std.testing; | 5 | const testing = std.testing; |
| 6 | 6 | ||
| 7 | const GraphemeData = @import("GraphemeData"); | 7 | const Graphemes = @import("Graphemes"); |
| 8 | 8 | ||
| 9 | g_data: GraphemeData, | 9 | g_data: Graphemes, |
| 10 | s1: []u16 = undefined, | 10 | s1: []u16 = undefined, |
| 11 | s2: []i4 = undefined, | 11 | s2: []i4 = undefined, |
| 12 | owns_gdata: bool, | ||
| 12 | 13 | ||
| 13 | const Self = @This(); | 14 | const Self = @This(); |
| 14 | 15 | ||
| 15 | pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { | 16 | pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { |
| 17 | var self: Self = try Self.setup(allocator); | ||
| 18 | errdefer { | ||
| 19 | allocator.free(self.s1); | ||
| 20 | allocator.free(self.s2); | ||
| 21 | } | ||
| 22 | self.owns_gdata = true; | ||
| 23 | self.g_data = try Graphemes.init(allocator); | ||
| 24 | errdefer self.g_data.deinit(allocator); | ||
| 25 | return self; | ||
| 26 | } | ||
| 27 | |||
| 28 | pub fn initWithGraphemeData(allocator: mem.Allocator, g_data: Graphemes) mem.Allocator.Error!Self { | ||
| 29 | var self = try Self.setup(allocator); | ||
| 30 | self.g_data = g_data; | ||
| 31 | self.owns_gdata = false; | ||
| 32 | return self; | ||
| 33 | } | ||
| 34 | |||
| 35 | // Sets up the DisplayWidthData, leaving the GraphemeData undefined. | ||
| 36 | fn setup(allocator: mem.Allocator) mem.Allocator.Error!Self { | ||
| 16 | const decompressor = compress.flate.inflate.decompressor; | 37 | const decompressor = compress.flate.inflate.decompressor; |
| 17 | const in_bytes = @embedFile("dwp"); | 38 | const in_bytes = @embedFile("dwp"); |
| 18 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 39 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| @@ -21,10 +42,7 @@ pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { | |||
| 21 | 42 | ||
| 22 | const endian = builtin.cpu.arch.endian(); | 43 | const endian = builtin.cpu.arch.endian(); |
| 23 | 44 | ||
| 24 | var self = Self{ | 45 | var self: Self = undefined; |
| 25 | .g_data = try GraphemeData.init(allocator), | ||
| 26 | }; | ||
| 27 | errdefer self.g_data.deinit(allocator); | ||
| 28 | 46 | ||
| 29 | const stage_1_len: u16 = reader.readInt(u16, endian) catch unreachable; | 47 | const stage_1_len: u16 = reader.readInt(u16, endian) catch unreachable; |
| 30 | self.s1 = try allocator.alloc(u16, stage_1_len); | 48 | self.s1 = try allocator.alloc(u16, stage_1_len); |
| @@ -42,7 +60,7 @@ pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { | |||
| 42 | pub fn deinit(self: *const Self, allocator: mem.Allocator) void { | 60 | pub fn deinit(self: *const Self, allocator: mem.Allocator) void { |
| 43 | allocator.free(self.s1); | 61 | allocator.free(self.s1); |
| 44 | allocator.free(self.s2); | 62 | allocator.free(self.s2); |
| 45 | self.g_data.deinit(allocator); | 63 | if (self.owns_gdata) self.g_data.deinit(allocator); |
| 46 | } | 64 | } |
| 47 | 65 | ||
| 48 | /// codePointWidth returns the number of cells `cp` requires when rendered | 66 | /// codePointWidth returns the number of cells `cp` requires when rendered |
diff --git a/src/grapheme.zig b/src/grapheme.zig index 25fd71d..79cd2c6 100644 --- a/src/grapheme.zig +++ b/src/grapheme.zig | |||
| @@ -1,10 +1,99 @@ | |||
| 1 | const std = @import("std"); | 1 | const std = @import("std"); |
| 2 | const builtin = @import("builtin"); | ||
| 2 | const mem = std.mem; | 3 | const mem = std.mem; |
| 4 | const Allocator = mem.Allocator; | ||
| 5 | const compress = std.compress; | ||
| 3 | const unicode = std.unicode; | 6 | const unicode = std.unicode; |
| 4 | 7 | ||
| 5 | const CodePoint = @import("code_point").CodePoint; | 8 | const CodePoint = @import("code_point").CodePoint; |
| 6 | const CodePointIterator = @import("code_point").Iterator; | 9 | const CodePointIterator = @import("code_point").Iterator; |
| 7 | pub const GraphemeData = @import("GraphemeData"); | 10 | |
| 11 | s1: []u16 = undefined, | ||
| 12 | s2: []u16 = undefined, | ||
| 13 | s3: []u8 = undefined, | ||
| 14 | |||
| 15 | const Graphemes = @This(); | ||
| 16 | |||
| 17 | pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Graphemes { | ||
| 18 | const decompressor = compress.flate.inflate.decompressor; | ||
| 19 | const in_bytes = @embedFile("gbp"); | ||
| 20 | var in_fbs = std.io.fixedBufferStream(in_bytes); | ||
| 21 | var in_decomp = decompressor(.raw, in_fbs.reader()); | ||
| 22 | var reader = in_decomp.reader(); | ||
| 23 | |||
| 24 | const endian = builtin.cpu.arch.endian(); | ||
| 25 | |||
| 26 | var self = Graphemes{}; | ||
| 27 | |||
| 28 | const s1_len: u16 = reader.readInt(u16, endian) catch unreachable; | ||
| 29 | self.s1 = try allocator.alloc(u16, s1_len); | ||
| 30 | errdefer allocator.free(self.s1); | ||
| 31 | for (0..s1_len) |i| self.s1[i] = reader.readInt(u16, endian) catch unreachable; | ||
| 32 | |||
| 33 | const s2_len: u16 = reader.readInt(u16, endian) catch unreachable; | ||
| 34 | self.s2 = try allocator.alloc(u16, s2_len); | ||
| 35 | errdefer allocator.free(self.s2); | ||
| 36 | for (0..s2_len) |i| self.s2[i] = reader.readInt(u16, endian) catch unreachable; | ||
| 37 | |||
| 38 | const s3_len: u16 = reader.readInt(u16, endian) catch unreachable; | ||
| 39 | self.s3 = try allocator.alloc(u8, s3_len); | ||
| 40 | errdefer allocator.free(self.s3); | ||
| 41 | _ = reader.readAll(self.s3) catch unreachable; | ||
| 42 | |||
| 43 | return self; | ||
| 44 | } | ||
| 45 | |||
| 46 | pub fn deinit(graphemes: *const Graphemes, allocator: mem.Allocator) void { | ||
| 47 | allocator.free(graphemes.s1); | ||
| 48 | allocator.free(graphemes.s2); | ||
| 49 | allocator.free(graphemes.s3); | ||
| 50 | } | ||
| 51 | |||
| 52 | /// Lookup the grapheme break property for a code point. | ||
| 53 | pub fn gbp(graphemes: Graphemes, cp: u21) Gbp { | ||
| 54 | return @enumFromInt(graphemes.s3[graphemes.s2[graphemes.s1[cp >> 8] + (cp & 0xff)]] >> 4); | ||
| 55 | } | ||
| 56 | |||
| 57 | /// Lookup the indic syllable type for a code point. | ||
| 58 | pub fn indic(graphemes: Graphemes, cp: u21) Indic { | ||
| 59 | return @enumFromInt((graphemes.s3[graphemes.s2[graphemes.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7); | ||
| 60 | } | ||
| 61 | |||
| 62 | /// Lookup the emoji property for a code point. | ||
| 63 | pub fn isEmoji(graphemes: Graphemes, cp: u21) bool { | ||
| 64 | return graphemes.s3[graphemes.s2[graphemes.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1; | ||
| 65 | } | ||
| 66 | |||
| 67 | pub fn iterator(graphemes: *const Graphemes, string: []const u8) Iterator { | ||
| 68 | return Iterator.init(string, graphemes); | ||
| 69 | } | ||
| 70 | |||
| 71 | /// Indic syllable type. | ||
| 72 | pub const Indic = enum { | ||
| 73 | none, | ||
| 74 | |||
| 75 | Consonant, | ||
| 76 | Extend, | ||
| 77 | Linker, | ||
| 78 | }; | ||
| 79 | |||
| 80 | /// Grapheme break property. | ||
| 81 | pub const Gbp = enum { | ||
| 82 | none, | ||
| 83 | Control, | ||
| 84 | CR, | ||
| 85 | Extend, | ||
| 86 | L, | ||
| 87 | LF, | ||
| 88 | LV, | ||
| 89 | LVT, | ||
| 90 | Prepend, | ||
| 91 | Regional_Indicator, | ||
| 92 | SpacingMark, | ||
| 93 | T, | ||
| 94 | V, | ||
| 95 | ZWJ, | ||
| 96 | }; | ||
| 8 | 97 | ||
| 9 | /// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes. | 98 | /// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes. |
| 10 | pub const Grapheme = struct { | 99 | pub const Grapheme = struct { |
| @@ -22,12 +111,12 @@ pub const Grapheme = struct { | |||
| 22 | pub const Iterator = struct { | 111 | pub const Iterator = struct { |
| 23 | buf: [2]?CodePoint = .{ null, null }, | 112 | buf: [2]?CodePoint = .{ null, null }, |
| 24 | cp_iter: CodePointIterator, | 113 | cp_iter: CodePointIterator, |
| 25 | data: *const GraphemeData, | 114 | data: *const Graphemes, |
| 26 | 115 | ||
| 27 | const Self = @This(); | 116 | const Self = @This(); |
| 28 | 117 | ||
| 29 | /// Assumes `src` is valid UTF-8. | 118 | /// Assumes `src` is valid UTF-8. |
| 30 | pub fn init(str: []const u8, data: *const GraphemeData) Self { | 119 | pub fn init(str: []const u8, data: *const Graphemes) Self { |
| 31 | var self = Self{ .cp_iter = .{ .bytes = str }, .data = data }; | 120 | var self = Self{ .cp_iter = .{ .bytes = str }, .data = data }; |
| 32 | self.advance(); | 121 | self.advance(); |
| 33 | return self; | 122 | return self; |
| @@ -149,7 +238,7 @@ pub const Iterator = struct { | |||
| 149 | }; | 238 | }; |
| 150 | 239 | ||
| 151 | // Predicates | 240 | // Predicates |
| 152 | fn isBreaker(cp: u21, data: *const GraphemeData) bool { | 241 | fn isBreaker(cp: u21, data: *const Graphemes) bool { |
| 153 | // Extract relevant properties. | 242 | // Extract relevant properties. |
| 154 | const cp_gbp_prop = data.gbp(cp); | 243 | const cp_gbp_prop = data.gbp(cp); |
| 155 | return cp == '\x0d' or cp == '\x0a' or cp_gbp_prop == .Control; | 244 | return cp == '\x0d' or cp == '\x0a' or cp_gbp_prop == .Control; |
| @@ -202,7 +291,7 @@ pub const State = struct { | |||
| 202 | pub fn graphemeBreak( | 291 | pub fn graphemeBreak( |
| 203 | cp1: u21, | 292 | cp1: u21, |
| 204 | cp2: u21, | 293 | cp2: u21, |
| 205 | data: *const GraphemeData, | 294 | data: *const Graphemes, |
| 206 | state: *State, | 295 | state: *State, |
| 207 | ) bool { | 296 | ) bool { |
| 208 | // Extract relevant properties. | 297 | // Extract relevant properties. |
| @@ -306,25 +395,25 @@ test "Segmentation ZWJ and ZWSP emoji sequences" { | |||
| 306 | const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2; | 395 | const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2; |
| 307 | const no_joiner = seq_1 ++ seq_2; | 396 | const no_joiner = seq_1 ++ seq_2; |
| 308 | 397 | ||
| 309 | const data = try GraphemeData.init(std.testing.allocator); | 398 | const graphemes = try Graphemes.init(std.testing.allocator); |
| 310 | defer data.deinit(std.testing.allocator); | 399 | defer graphemes.deinit(std.testing.allocator); |
| 311 | 400 | ||
| 312 | { | 401 | { |
| 313 | var iter = Iterator.init(with_zwj, &data); | 402 | var iter = graphemes.iterator(with_zwj); |
| 314 | var i: usize = 0; | 403 | var i: usize = 0; |
| 315 | while (iter.next()) |_| : (i += 1) {} | 404 | while (iter.next()) |_| : (i += 1) {} |
| 316 | try std.testing.expectEqual(@as(usize, 1), i); | 405 | try std.testing.expectEqual(@as(usize, 1), i); |
| 317 | } | 406 | } |
| 318 | 407 | ||
| 319 | { | 408 | { |
| 320 | var iter = Iterator.init(with_zwsp, &data); | 409 | var iter = graphemes.iterator(with_zwsp); |
| 321 | var i: usize = 0; | 410 | var i: usize = 0; |
| 322 | while (iter.next()) |_| : (i += 1) {} | 411 | while (iter.next()) |_| : (i += 1) {} |
| 323 | try std.testing.expectEqual(@as(usize, 3), i); | 412 | try std.testing.expectEqual(@as(usize, 3), i); |
| 324 | } | 413 | } |
| 325 | 414 | ||
| 326 | { | 415 | { |
| 327 | var iter = Iterator.init(no_joiner, &data); | 416 | var iter = graphemes.iterator(no_joiner); |
| 328 | var i: usize = 0; | 417 | var i: usize = 0; |
| 329 | while (iter.next()) |_| : (i += 1) {} | 418 | while (iter.next()) |_| : (i += 1) {} |
| 330 | try std.testing.expectEqual(@as(usize, 2), i); | 419 | try std.testing.expectEqual(@as(usize, 2), i); |
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig index 7236ff6..de1b9ec 100644 --- a/src/unicode_tests.zig +++ b/src/unicode_tests.zig | |||
| @@ -9,7 +9,7 @@ const unicode = std.unicode; | |||
| 9 | 9 | ||
| 10 | const grapheme = @import("grapheme"); | 10 | const grapheme = @import("grapheme"); |
| 11 | const Grapheme = @import("grapheme").Grapheme; | 11 | const Grapheme = @import("grapheme").Grapheme; |
| 12 | const GraphemeData = @import("grapheme").GraphemeData; | 12 | const Graphemes = @import("grapheme"); |
| 13 | const GraphemeIterator = @import("grapheme").Iterator; | 13 | const GraphemeIterator = @import("grapheme").Iterator; |
| 14 | const Normalize = @import("Normalize"); | 14 | const Normalize = @import("Normalize"); |
| 15 | 15 | ||
| @@ -18,10 +18,10 @@ comptime { | |||
| 18 | } | 18 | } |
| 19 | test "Iterator.peek" { | 19 | test "Iterator.peek" { |
| 20 | const peek_seq = "aΔ👨🏻🌾→"; | 20 | const peek_seq = "aΔ👨🏻🌾→"; |
| 21 | const data = try GraphemeData.init(std.testing.allocator); | 21 | const data = try Graphemes.init(std.testing.allocator); |
| 22 | defer data.deinit(std.testing.allocator); | 22 | defer data.deinit(std.testing.allocator); |
| 23 | 23 | ||
| 24 | var iter = grapheme.Iterator.init(peek_seq, &data); | 24 | var iter = data.iterator(peek_seq); |
| 25 | const peek_a = iter.peek().?; | 25 | const peek_a = iter.peek().?; |
| 26 | const next_a = iter.next().?; | 26 | const next_a = iter.next().?; |
| 27 | try std.testing.expectEqual(peek_a, next_a); | 27 | try std.testing.expectEqual(peek_a, next_a); |
| @@ -162,7 +162,7 @@ test "Segmentation GraphemeIterator" { | |||
| 162 | var buf_reader = std.io.bufferedReader(file.reader()); | 162 | var buf_reader = std.io.bufferedReader(file.reader()); |
| 163 | var input_stream = buf_reader.reader(); | 163 | var input_stream = buf_reader.reader(); |
| 164 | 164 | ||
| 165 | const data = try GraphemeData.init(allocator); | 165 | const data = try Graphemes.init(allocator); |
| 166 | defer data.deinit(allocator); | 166 | defer data.deinit(allocator); |
| 167 | 167 | ||
| 168 | var buf: [4096]u8 = undefined; | 168 | var buf: [4096]u8 = undefined; |
| @@ -207,7 +207,7 @@ test "Segmentation GraphemeIterator" { | |||
| 207 | } | 207 | } |
| 208 | 208 | ||
| 209 | // std.debug.print("\nline {}: {s}\n", .{ line_no, all_bytes.items }); | 209 | // std.debug.print("\nline {}: {s}\n", .{ line_no, all_bytes.items }); |
| 210 | var iter = GraphemeIterator.init(all_bytes.items, &data); | 210 | var iter = data.iterator(all_bytes.items); |
| 211 | 211 | ||
| 212 | // Chaeck. | 212 | // Chaeck. |
| 213 | for (want.items) |want_gc| { | 213 | for (want.items) |want_gc| { |