diff options
| author | 2025-04-30 11:58:19 -0400 | |
|---|---|---|
| committer | 2025-04-30 11:58:19 -0400 | |
| commit | 1be5e46490e061761b4b97dff5c6acb2181d6fe9 (patch) | |
| tree | 77a1edcdedd7afae7428e92feba37d2bb1035b22 /src | |
| parent | Add general tests step (diff) | |
| download | zg-1be5e46490e061761b4b97dff5c6acb2181d6fe9.tar.gz zg-1be5e46490e061761b4b97dff5c6acb2181d6fe9.tar.xz zg-1be5e46490e061761b4b97dff5c6acb2181d6fe9.zip | |
Factor out 'Data' for grapheme and DisplayWidth
In the process of refactoring the whole library, so that it doesn't
expose anything called "Data" separately from user functionality.
Diffstat (limited to 'src')
| -rw-r--r-- | src/DisplayWidth.zig | 240 | ||||
| -rw-r--r-- | src/GraphemeData.zig | 12 | ||||
| -rw-r--r-- | src/Normalize.zig | 29 | ||||
| -rw-r--r-- | src/WidthData.zig | 32 | ||||
| -rw-r--r-- | src/grapheme.zig | 109 | ||||
| -rw-r--r-- | src/unicode_tests.zig | 10 |
6 files changed, 313 insertions, 119 deletions
diff --git a/src/DisplayWidth.zig b/src/DisplayWidth.zig index 8631bd4..11ec59e 100644 --- a/src/DisplayWidth.zig +++ b/src/DisplayWidth.zig | |||
| @@ -2,38 +2,131 @@ const std = @import("std"); | |||
| 2 | const builtin = @import("builtin"); | 2 | const builtin = @import("builtin"); |
| 3 | const options = @import("options"); | 3 | const options = @import("options"); |
| 4 | const ArrayList = std.ArrayList; | 4 | const ArrayList = std.ArrayList; |
| 5 | const compress = std.compress; | ||
| 5 | const mem = std.mem; | 6 | const mem = std.mem; |
| 6 | const simd = std.simd; | 7 | const simd = std.simd; |
| 7 | const testing = std.testing; | 8 | const testing = std.testing; |
| 8 | 9 | ||
| 9 | const ascii = @import("ascii"); | 10 | const ascii = @import("ascii"); |
| 10 | const CodePointIterator = @import("code_point").Iterator; | 11 | const CodePointIterator = @import("code_point").Iterator; |
| 11 | const GraphemeIterator = @import("grapheme").Iterator; | ||
| 12 | pub const DisplayWidthData = @import("DisplayWidthData"); | 12 | pub const DisplayWidthData = @import("DisplayWidthData"); |
| 13 | 13 | ||
| 14 | data: *const DisplayWidthData, | 14 | const Graphemes = @import("Graphemes"); |
| 15 | 15 | ||
| 16 | const Self = @This(); | 16 | g_data: Graphemes, |
| 17 | s1: []u16 = undefined, | ||
| 18 | s2: []i4 = undefined, | ||
| 19 | owns_gdata: bool, | ||
| 20 | |||
| 21 | const DisplayWidth = @This(); | ||
| 22 | |||
| 23 | pub fn init(allocator: mem.Allocator) mem.Allocator.Error!DisplayWidth { | ||
| 24 | var dw: DisplayWidth = try DisplayWidth.setup(allocator); | ||
| 25 | errdefer { | ||
| 26 | allocator.free(dw.s1); | ||
| 27 | allocator.free(dw.s2); | ||
| 28 | } | ||
| 29 | dw.owns_gdata = true; | ||
| 30 | dw.g_data = try Graphemes.init(allocator); | ||
| 31 | errdefer dw.g_data.deinit(allocator); | ||
| 32 | return dw; | ||
| 33 | } | ||
| 34 | |||
| 35 | pub fn initWithGraphemeData(allocator: mem.Allocator, g_data: Graphemes) mem.Allocator.Error!DisplayWidth { | ||
| 36 | var dw = try DisplayWidth.setup(allocator); | ||
| 37 | dw.g_data = g_data; | ||
| 38 | dw.owns_gdata = false; | ||
| 39 | return dw; | ||
| 40 | } | ||
| 41 | |||
| 42 | // Sets up the DisplayWidthData, leaving the GraphemeData undefined. | ||
| 43 | fn setup(allocator: mem.Allocator) mem.Allocator.Error!DisplayWidth { | ||
| 44 | const decompressor = compress.flate.inflate.decompressor; | ||
| 45 | const in_bytes = @embedFile("dwp"); | ||
| 46 | var in_fbs = std.io.fixedBufferStream(in_bytes); | ||
| 47 | var in_decomp = decompressor(.raw, in_fbs.reader()); | ||
| 48 | var reader = in_decomp.reader(); | ||
| 49 | |||
| 50 | const endian = builtin.cpu.arch.endian(); | ||
| 51 | |||
| 52 | var dw: DisplayWidth = undefined; | ||
| 53 | |||
| 54 | const stage_1_len: u16 = reader.readInt(u16, endian) catch unreachable; | ||
| 55 | dw.s1 = try allocator.alloc(u16, stage_1_len); | ||
| 56 | errdefer allocator.free(dw.s1); | ||
| 57 | for (0..stage_1_len) |i| dw.s1[i] = reader.readInt(u16, endian) catch unreachable; | ||
| 58 | |||
| 59 | const stage_2_len: u16 = reader.readInt(u16, endian) catch unreachable; | ||
| 60 | dw.s2 = try allocator.alloc(i4, stage_2_len); | ||
| 61 | errdefer allocator.free(dw.s2); | ||
| 62 | for (0..stage_2_len) |i| dw.s2[i] = @intCast(reader.readInt(i8, endian) catch unreachable); | ||
| 63 | |||
| 64 | return dw; | ||
| 65 | } | ||
| 66 | |||
| 67 | pub fn deinit(dw: *const DisplayWidth, allocator: mem.Allocator) void { | ||
| 68 | allocator.free(dw.s1); | ||
| 69 | allocator.free(dw.s2); | ||
| 70 | if (dw.owns_gdata) dw.g_data.deinit(allocator); | ||
| 71 | } | ||
| 72 | |||
| 73 | /// codePointWidth returns the number of cells `cp` requires when rendered | ||
| 74 | /// in a fixed-pitch font (i.e. a terminal screen). This can range from -1 to | ||
| 75 | /// 3, where BACKSPACE and DELETE return -1 and 3-em-dash returns 3. C0/C1 | ||
| 76 | /// control codes return 0. If `cjk` is true, ambiguous code points return 2, | ||
| 77 | /// otherwise they return 1. | ||
| 78 | pub fn codePointWidth(dw: DisplayWidth, cp: u21) i4 { | ||
| 79 | return dw.s2[dw.s1[cp >> 8] + (cp & 0xff)]; | ||
| 80 | } | ||
| 81 | |||
| 82 | test "codePointWidth" { | ||
| 83 | const dw = try DisplayWidth.init(std.testing.allocator); | ||
| 84 | defer dw.deinit(std.testing.allocator); | ||
| 85 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x0000)); // null | ||
| 86 | try testing.expectEqual(@as(i4, -1), dw.codePointWidth(0x8)); // \b | ||
| 87 | try testing.expectEqual(@as(i4, -1), dw.codePointWidth(0x7f)); // DEL | ||
| 88 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x0005)); // Cf | ||
| 89 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x0007)); // \a BEL | ||
| 90 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000A)); // \n LF | ||
| 91 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000B)); // \v VT | ||
| 92 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000C)); // \f FF | ||
| 93 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000D)); // \r CR | ||
| 94 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000E)); // SQ | ||
| 95 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000F)); // SI | ||
| 96 | |||
| 97 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x070F)); // Cf | ||
| 98 | try testing.expectEqual(@as(i4, 1), dw.codePointWidth(0x0603)); // Cf Arabic | ||
| 99 | |||
| 100 | try testing.expectEqual(@as(i4, 1), dw.codePointWidth(0x00AD)); // soft-hyphen | ||
| 101 | try testing.expectEqual(@as(i4, 2), dw.codePointWidth(0x2E3A)); // two-em dash | ||
| 102 | try testing.expectEqual(@as(i4, 3), dw.codePointWidth(0x2E3B)); // three-em dash | ||
| 103 | |||
| 104 | try testing.expectEqual(@as(i4, 1), dw.codePointWidth(0x00BD)); // ambiguous halfwidth | ||
| 105 | |||
| 106 | try testing.expectEqual(@as(i4, 1), dw.codePointWidth('é')); | ||
| 107 | try testing.expectEqual(@as(i4, 2), dw.codePointWidth('😊')); | ||
| 108 | try testing.expectEqual(@as(i4, 2), dw.codePointWidth('统')); | ||
| 109 | } | ||
| 17 | 110 | ||
| 18 | /// strWidth returns the total display width of `str` as the number of cells | 111 | /// strWidth returns the total display width of `str` as the number of cells |
| 19 | /// required in a fixed-pitch font (i.e. a terminal screen). | 112 | /// required in a fixed-pitch font (i.e. a terminal screen). |
| 20 | pub fn strWidth(self: Self, str: []const u8) usize { | 113 | pub fn strWidth(dw: DisplayWidth, str: []const u8) usize { |
| 21 | var total: isize = 0; | 114 | var total: isize = 0; |
| 22 | 115 | ||
| 23 | // ASCII fast path | 116 | // ASCII fast path |
| 24 | if (ascii.isAsciiOnly(str)) { | 117 | if (ascii.isAsciiOnly(str)) { |
| 25 | for (str) |b| total += self.data.codePointWidth(b); | 118 | for (str) |b| total += dw.codePointWidth(b); |
| 26 | return @intCast(@max(0, total)); | 119 | return @intCast(@max(0, total)); |
| 27 | } | 120 | } |
| 28 | 121 | ||
| 29 | var giter = GraphemeIterator.init(str, &self.data.g_data); | 122 | var giter = dw.g_data.iterator(str); |
| 30 | 123 | ||
| 31 | while (giter.next()) |gc| { | 124 | while (giter.next()) |gc| { |
| 32 | var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) }; | 125 | var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) }; |
| 33 | var gc_total: isize = 0; | 126 | var gc_total: isize = 0; |
| 34 | 127 | ||
| 35 | while (cp_iter.next()) |cp| { | 128 | while (cp_iter.next()) |cp| { |
| 36 | var w = self.data.codePointWidth(cp.code); | 129 | var w = dw.codePointWidth(cp.code); |
| 37 | 130 | ||
| 38 | if (w != 0) { | 131 | if (w != 0) { |
| 39 | // Handle text emoji sequence. | 132 | // Handle text emoji sequence. |
| @@ -58,41 +151,40 @@ pub fn strWidth(self: Self, str: []const u8) usize { | |||
| 58 | } | 151 | } |
| 59 | 152 | ||
| 60 | test "strWidth" { | 153 | test "strWidth" { |
| 61 | const data = try DisplayWidthData.init(testing.allocator); | 154 | const dw = try DisplayWidth.init(testing.allocator); |
| 62 | defer data.deinit(testing.allocator); | 155 | defer dw.deinit(testing.allocator); |
| 63 | const self = Self{ .data = &data }; | ||
| 64 | const c0 = options.c0_width orelse 0; | 156 | const c0 = options.c0_width orelse 0; |
| 65 | 157 | ||
| 66 | try testing.expectEqual(@as(usize, 5), self.strWidth("Hello\r\n")); | 158 | try testing.expectEqual(@as(usize, 5), dw.strWidth("Hello\r\n")); |
| 67 | try testing.expectEqual(@as(usize, 1), self.strWidth("\u{0065}\u{0301}")); | 159 | try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{0065}\u{0301}")); |
| 68 | try testing.expectEqual(@as(usize, 2), self.strWidth("\u{1F476}\u{1F3FF}\u{0308}\u{200D}\u{1F476}\u{1F3FF}")); | 160 | try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{1F476}\u{1F3FF}\u{0308}\u{200D}\u{1F476}\u{1F3FF}")); |
| 69 | try testing.expectEqual(@as(usize, 8), self.strWidth("Hello 😊")); | 161 | try testing.expectEqual(@as(usize, 8), dw.strWidth("Hello 😊")); |
| 70 | try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo 😊")); | 162 | try testing.expectEqual(@as(usize, 8), dw.strWidth("Héllo 😊")); |
| 71 | try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo :)")); | 163 | try testing.expectEqual(@as(usize, 8), dw.strWidth("Héllo :)")); |
| 72 | try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo 🇪🇸")); | 164 | try testing.expectEqual(@as(usize, 8), dw.strWidth("Héllo 🇪🇸")); |
| 73 | try testing.expectEqual(@as(usize, 2), self.strWidth("\u{26A1}")); // Lone emoji | 165 | try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{26A1}")); // Lone emoji |
| 74 | try testing.expectEqual(@as(usize, 1), self.strWidth("\u{26A1}\u{FE0E}")); // Text sequence | 166 | try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{26A1}\u{FE0E}")); // Text sequence |
| 75 | try testing.expectEqual(@as(usize, 2), self.strWidth("\u{26A1}\u{FE0F}")); // Presentation sequence | 167 | try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{26A1}\u{FE0F}")); // Presentation sequence |
| 76 | try testing.expectEqual(@as(usize, 1), self.strWidth("\u{2764}")); // Default text presentation | 168 | try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{2764}")); // Default text presentation |
| 77 | try testing.expectEqual(@as(usize, 1), self.strWidth("\u{2764}\u{FE0E}")); // Default text presentation with VS15 selector | 169 | try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{2764}\u{FE0E}")); // Default text presentation with VS15 selector |
| 78 | try testing.expectEqual(@as(usize, 2), self.strWidth("\u{2764}\u{FE0F}")); // Default text presentation with VS16 selector | 170 | try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{2764}\u{FE0F}")); // Default text presentation with VS16 selector |
| 79 | const expect_bs: usize = if (c0 == 0) 0 else 1 + c0; | 171 | const expect_bs: usize = if (c0 == 0) 0 else 1 + c0; |
| 80 | try testing.expectEqual(expect_bs, self.strWidth("A\x08")); // Backspace | 172 | try testing.expectEqual(expect_bs, dw.strWidth("A\x08")); // Backspace |
| 81 | try testing.expectEqual(expect_bs, self.strWidth("\x7FA")); // DEL | 173 | try testing.expectEqual(expect_bs, dw.strWidth("\x7FA")); // DEL |
| 82 | const expect_long_del: usize = if (c0 == 0) 0 else 1 + (c0 * 3); | 174 | const expect_long_del: usize = if (c0 == 0) 0 else 1 + (c0 * 3); |
| 83 | try testing.expectEqual(expect_long_del, self.strWidth("\x7FA\x08\x08")); // never less than 0 | 175 | try testing.expectEqual(expect_long_del, dw.strWidth("\x7FA\x08\x08")); // never less than 0 |
| 84 | 176 | ||
| 85 | // wcwidth Python lib tests. See: https://github.com/jquast/wcwidth/blob/master/tests/test_core.py | 177 | // wcwidth Python lib tests. See: https://github.com/jquast/wcwidth/blob/master/tests/test_core.py |
| 86 | const empty = ""; | 178 | const empty = ""; |
| 87 | try testing.expectEqual(@as(usize, 0), self.strWidth(empty)); | 179 | try testing.expectEqual(@as(usize, 0), dw.strWidth(empty)); |
| 88 | const with_null = "hello\x00world"; | 180 | const with_null = "hello\x00world"; |
| 89 | try testing.expectEqual(@as(usize, 10 + c0), self.strWidth(with_null)); | 181 | try testing.expectEqual(@as(usize, 10 + c0), dw.strWidth(with_null)); |
| 90 | const hello_jp = "コンニチハ, セカイ!"; | 182 | const hello_jp = "コンニチハ, セカイ!"; |
| 91 | try testing.expectEqual(@as(usize, 19), self.strWidth(hello_jp)); | 183 | try testing.expectEqual(@as(usize, 19), dw.strWidth(hello_jp)); |
| 92 | const control = "\x1b[0m"; | 184 | const control = "\x1b[0m"; |
| 93 | try testing.expectEqual(@as(usize, 3 + c0), self.strWidth(control)); | 185 | try testing.expectEqual(@as(usize, 3 + c0), dw.strWidth(control)); |
| 94 | const balinese = "\u{1B13}\u{1B28}\u{1B2E}\u{1B44}"; | 186 | const balinese = "\u{1B13}\u{1B28}\u{1B2E}\u{1B44}"; |
| 95 | try testing.expectEqual(@as(usize, 3), self.strWidth(balinese)); | 187 | try testing.expectEqual(@as(usize, 3), dw.strWidth(balinese)); |
| 96 | 188 | ||
| 97 | // These commented out tests require a new specification for complex scripts. | 189 | // These commented out tests require a new specification for complex scripts. |
| 98 | // See: https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf | 190 | // See: https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf |
| @@ -106,17 +198,17 @@ test "strWidth" { | |||
| 106 | // try testing.expectEqual(@as(usize, 3), strWidth(kannada_1)); | 198 | // try testing.expectEqual(@as(usize, 3), strWidth(kannada_1)); |
| 107 | // The following passes but as a mere coincidence. | 199 | // The following passes but as a mere coincidence. |
| 108 | const kannada_2 = "\u{0cb0}\u{0cbc}\u{0ccd}\u{0c9a}"; | 200 | const kannada_2 = "\u{0cb0}\u{0cbc}\u{0ccd}\u{0c9a}"; |
| 109 | try testing.expectEqual(@as(usize, 2), self.strWidth(kannada_2)); | 201 | try testing.expectEqual(@as(usize, 2), dw.strWidth(kannada_2)); |
| 110 | 202 | ||
| 111 | // From Rust https://github.com/jameslanska/unicode-display-width | 203 | // From Rust https://github.com/jameslanska/unicode-display-width |
| 112 | try testing.expectEqual(@as(usize, 15), self.strWidth("🔥🗡🍩👩🏻🚀⏰💃🏼🔦👍🏻")); | 204 | try testing.expectEqual(@as(usize, 15), dw.strWidth("🔥🗡🍩👩🏻🚀⏰💃🏼🔦👍🏻")); |
| 113 | try testing.expectEqual(@as(usize, 2), self.strWidth("🦀")); | 205 | try testing.expectEqual(@as(usize, 2), dw.strWidth("🦀")); |
| 114 | try testing.expectEqual(@as(usize, 2), self.strWidth("👨👩👧👧")); | 206 | try testing.expectEqual(@as(usize, 2), dw.strWidth("👨👩👧👧")); |
| 115 | try testing.expectEqual(@as(usize, 2), self.strWidth("👩🔬")); | 207 | try testing.expectEqual(@as(usize, 2), dw.strWidth("👩🔬")); |
| 116 | try testing.expectEqual(@as(usize, 9), self.strWidth("sane text")); | 208 | try testing.expectEqual(@as(usize, 9), dw.strWidth("sane text")); |
| 117 | try testing.expectEqual(@as(usize, 9), self.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ")); | 209 | try testing.expectEqual(@as(usize, 9), dw.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ")); |
| 118 | try testing.expectEqual(@as(usize, 17), self.strWidth("슬라바 우크라이나")); | 210 | try testing.expectEqual(@as(usize, 17), dw.strWidth("슬라바 우크라이나")); |
| 119 | try testing.expectEqual(@as(usize, 1), self.strWidth("\u{378}")); | 211 | try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{378}")); |
| 120 | } | 212 | } |
| 121 | 213 | ||
| 122 | /// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding. | 214 | /// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding. |
| @@ -124,17 +216,17 @@ test "strWidth" { | |||
| 124 | /// receive one additional pad. This makes sure the returned string fills the requested width. | 216 | /// receive one additional pad. This makes sure the returned string fills the requested width. |
| 125 | /// Caller must free returned bytes with `allocator`. | 217 | /// Caller must free returned bytes with `allocator`. |
| 126 | pub fn center( | 218 | pub fn center( |
| 127 | self: Self, | 219 | dw: DisplayWidth, |
| 128 | allocator: mem.Allocator, | 220 | allocator: mem.Allocator, |
| 129 | str: []const u8, | 221 | str: []const u8, |
| 130 | total_width: usize, | 222 | total_width: usize, |
| 131 | pad: []const u8, | 223 | pad: []const u8, |
| 132 | ) ![]u8 { | 224 | ) ![]u8 { |
| 133 | const str_width = self.strWidth(str); | 225 | const str_width = dw.strWidth(str); |
| 134 | if (str_width > total_width) return error.StrTooLong; | 226 | if (str_width > total_width) return error.StrTooLong; |
| 135 | if (str_width == total_width) return try allocator.dupe(u8, str); | 227 | if (str_width == total_width) return try allocator.dupe(u8, str); |
| 136 | 228 | ||
| 137 | const pad_width = self.strWidth(pad); | 229 | const pad_width = dw.strWidth(pad); |
| 138 | if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; | 230 | if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; |
| 139 | 231 | ||
| 140 | const margin_width = @divFloor((total_width - str_width), 2); | 232 | const margin_width = @divFloor((total_width - str_width), 2); |
| @@ -165,62 +257,61 @@ pub fn center( | |||
| 165 | 257 | ||
| 166 | test "center" { | 258 | test "center" { |
| 167 | const allocator = testing.allocator; | 259 | const allocator = testing.allocator; |
| 168 | const data = try DisplayWidthData.init(allocator); | 260 | const dw = try DisplayWidth.init(allocator); |
| 169 | defer data.deinit(allocator); | 261 | defer dw.deinit(allocator); |
| 170 | const self = Self{ .data = &data }; | ||
| 171 | 262 | ||
| 172 | // Input and width both have odd length | 263 | // Input and width both have odd length |
| 173 | var centered = try self.center(allocator, "abc", 9, "*"); | 264 | var centered = try dw.center(allocator, "abc", 9, "*"); |
| 174 | try testing.expectEqualSlices(u8, "***abc***", centered); | 265 | try testing.expectEqualSlices(u8, "***abc***", centered); |
| 175 | 266 | ||
| 176 | // Input and width both have even length | 267 | // Input and width both have even length |
| 177 | testing.allocator.free(centered); | 268 | testing.allocator.free(centered); |
| 178 | centered = try self.center(allocator, "w😊w", 10, "-"); | 269 | centered = try dw.center(allocator, "w😊w", 10, "-"); |
| 179 | try testing.expectEqualSlices(u8, "---w😊w---", centered); | 270 | try testing.expectEqualSlices(u8, "---w😊w---", centered); |
| 180 | 271 | ||
| 181 | // Input has even length, width has odd length | 272 | // Input has even length, width has odd length |
| 182 | testing.allocator.free(centered); | 273 | testing.allocator.free(centered); |
| 183 | centered = try self.center(allocator, "1234", 9, "-"); | 274 | centered = try dw.center(allocator, "1234", 9, "-"); |
| 184 | try testing.expectEqualSlices(u8, "--1234---", centered); | 275 | try testing.expectEqualSlices(u8, "--1234---", centered); |
| 185 | 276 | ||
| 186 | // Input has odd length, width has even length | 277 | // Input has odd length, width has even length |
| 187 | testing.allocator.free(centered); | 278 | testing.allocator.free(centered); |
| 188 | centered = try self.center(allocator, "123", 8, "-"); | 279 | centered = try dw.center(allocator, "123", 8, "-"); |
| 189 | try testing.expectEqualSlices(u8, "--123---", centered); | 280 | try testing.expectEqualSlices(u8, "--123---", centered); |
| 190 | 281 | ||
| 191 | // Input is the same length as the width | 282 | // Input is the same length as the width |
| 192 | testing.allocator.free(centered); | 283 | testing.allocator.free(centered); |
| 193 | centered = try self.center(allocator, "123", 3, "-"); | 284 | centered = try dw.center(allocator, "123", 3, "-"); |
| 194 | try testing.expectEqualSlices(u8, "123", centered); | 285 | try testing.expectEqualSlices(u8, "123", centered); |
| 195 | 286 | ||
| 196 | // Input is empty | 287 | // Input is empty |
| 197 | testing.allocator.free(centered); | 288 | testing.allocator.free(centered); |
| 198 | centered = try self.center(allocator, "", 3, "-"); | 289 | centered = try dw.center(allocator, "", 3, "-"); |
| 199 | try testing.expectEqualSlices(u8, "---", centered); | 290 | try testing.expectEqualSlices(u8, "---", centered); |
| 200 | 291 | ||
| 201 | // Input is empty and width is zero | 292 | // Input is empty and width is zero |
| 202 | testing.allocator.free(centered); | 293 | testing.allocator.free(centered); |
| 203 | centered = try self.center(allocator, "", 0, "-"); | 294 | centered = try dw.center(allocator, "", 0, "-"); |
| 204 | try testing.expectEqualSlices(u8, "", centered); | 295 | try testing.expectEqualSlices(u8, "", centered); |
| 205 | 296 | ||
| 206 | // Input is longer than the width, which is an error | 297 | // Input is longer than the width, which is an error |
| 207 | testing.allocator.free(centered); | 298 | testing.allocator.free(centered); |
| 208 | try testing.expectError(error.StrTooLong, self.center(allocator, "123", 2, "-")); | 299 | try testing.expectError(error.StrTooLong, dw.center(allocator, "123", 2, "-")); |
| 209 | } | 300 | } |
| 210 | 301 | ||
| 211 | /// padLeft returns a new string of width `total_width` (in display cells) using `pad` as padding | 302 | /// padLeft returns a new string of width `total_width` (in display cells) using `pad` as padding |
| 212 | /// on the left side. Caller must free returned bytes with `allocator`. | 303 | /// on the left side. Caller must free returned bytes with `allocator`. |
| 213 | pub fn padLeft( | 304 | pub fn padLeft( |
| 214 | self: Self, | 305 | dw: DisplayWidth, |
| 215 | allocator: mem.Allocator, | 306 | allocator: mem.Allocator, |
| 216 | str: []const u8, | 307 | str: []const u8, |
| 217 | total_width: usize, | 308 | total_width: usize, |
| 218 | pad: []const u8, | 309 | pad: []const u8, |
| 219 | ) ![]u8 { | 310 | ) ![]u8 { |
| 220 | const str_width = self.strWidth(str); | 311 | const str_width = dw.strWidth(str); |
| 221 | if (str_width > total_width) return error.StrTooLong; | 312 | if (str_width > total_width) return error.StrTooLong; |
| 222 | 313 | ||
| 223 | const pad_width = self.strWidth(pad); | 314 | const pad_width = dw.strWidth(pad); |
| 224 | if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; | 315 | if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; |
| 225 | 316 | ||
| 226 | const margin_width = total_width - str_width; | 317 | const margin_width = total_width - str_width; |
| @@ -244,32 +335,31 @@ pub fn padLeft( | |||
| 244 | 335 | ||
| 245 | test "padLeft" { | 336 | test "padLeft" { |
| 246 | const allocator = testing.allocator; | 337 | const allocator = testing.allocator; |
| 247 | const data = try DisplayWidthData.init(allocator); | 338 | const dw = try DisplayWidth.init(allocator); |
| 248 | defer data.deinit(allocator); | 339 | defer dw.deinit(allocator); |
| 249 | const self = Self{ .data = &data }; | ||
| 250 | 340 | ||
| 251 | var right_aligned = try self.padLeft(allocator, "abc", 9, "*"); | 341 | var right_aligned = try dw.padLeft(allocator, "abc", 9, "*"); |
| 252 | defer testing.allocator.free(right_aligned); | 342 | defer testing.allocator.free(right_aligned); |
| 253 | try testing.expectEqualSlices(u8, "******abc", right_aligned); | 343 | try testing.expectEqualSlices(u8, "******abc", right_aligned); |
| 254 | 344 | ||
| 255 | testing.allocator.free(right_aligned); | 345 | testing.allocator.free(right_aligned); |
| 256 | right_aligned = try self.padLeft(allocator, "w😊w", 10, "-"); | 346 | right_aligned = try dw.padLeft(allocator, "w😊w", 10, "-"); |
| 257 | try testing.expectEqualSlices(u8, "------w😊w", right_aligned); | 347 | try testing.expectEqualSlices(u8, "------w😊w", right_aligned); |
| 258 | } | 348 | } |
| 259 | 349 | ||
| 260 | /// padRight returns a new string of width `total_width` (in display cells) using `pad` as padding | 350 | /// padRight returns a new string of width `total_width` (in display cells) using `pad` as padding |
| 261 | /// on the right side. Caller must free returned bytes with `allocator`. | 351 | /// on the right side. Caller must free returned bytes with `allocator`. |
| 262 | pub fn padRight( | 352 | pub fn padRight( |
| 263 | self: Self, | 353 | dw: DisplayWidth, |
| 264 | allocator: mem.Allocator, | 354 | allocator: mem.Allocator, |
| 265 | str: []const u8, | 355 | str: []const u8, |
| 266 | total_width: usize, | 356 | total_width: usize, |
| 267 | pad: []const u8, | 357 | pad: []const u8, |
| 268 | ) ![]u8 { | 358 | ) ![]u8 { |
| 269 | const str_width = self.strWidth(str); | 359 | const str_width = dw.strWidth(str); |
| 270 | if (str_width > total_width) return error.StrTooLong; | 360 | if (str_width > total_width) return error.StrTooLong; |
| 271 | 361 | ||
| 272 | const pad_width = self.strWidth(pad); | 362 | const pad_width = dw.strWidth(pad); |
| 273 | if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; | 363 | if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; |
| 274 | 364 | ||
| 275 | const margin_width = total_width - str_width; | 365 | const margin_width = total_width - str_width; |
| @@ -294,16 +384,15 @@ pub fn padRight( | |||
| 294 | 384 | ||
| 295 | test "padRight" { | 385 | test "padRight" { |
| 296 | const allocator = testing.allocator; | 386 | const allocator = testing.allocator; |
| 297 | const data = try DisplayWidthData.init(allocator); | 387 | const dw = try DisplayWidth.init(allocator); |
| 298 | defer data.deinit(allocator); | 388 | defer dw.deinit(allocator); |
| 299 | const self = Self{ .data = &data }; | ||
| 300 | 389 | ||
| 301 | var left_aligned = try self.padRight(allocator, "abc", 9, "*"); | 390 | var left_aligned = try dw.padRight(allocator, "abc", 9, "*"); |
| 302 | defer testing.allocator.free(left_aligned); | 391 | defer testing.allocator.free(left_aligned); |
| 303 | try testing.expectEqualSlices(u8, "abc******", left_aligned); | 392 | try testing.expectEqualSlices(u8, "abc******", left_aligned); |
| 304 | 393 | ||
| 305 | testing.allocator.free(left_aligned); | 394 | testing.allocator.free(left_aligned); |
| 306 | left_aligned = try self.padRight(allocator, "w😊w", 10, "-"); | 395 | left_aligned = try dw.padRight(allocator, "w😊w", 10, "-"); |
| 307 | try testing.expectEqualSlices(u8, "w😊w------", left_aligned); | 396 | try testing.expectEqualSlices(u8, "w😊w------", left_aligned); |
| 308 | } | 397 | } |
| 309 | 398 | ||
| @@ -311,7 +400,7 @@ test "padRight" { | |||
| 311 | /// `threshold` defines how far the last column of the last word can be | 400 | /// `threshold` defines how far the last column of the last word can be |
| 312 | /// from the edge. Caller must free returned bytes with `allocator`. | 401 | /// from the edge. Caller must free returned bytes with `allocator`. |
| 313 | pub fn wrap( | 402 | pub fn wrap( |
| 314 | self: Self, | 403 | dw: DisplayWidth, |
| 315 | allocator: mem.Allocator, | 404 | allocator: mem.Allocator, |
| 316 | str: []const u8, | 405 | str: []const u8, |
| 317 | columns: usize, | 406 | columns: usize, |
| @@ -329,7 +418,7 @@ pub fn wrap( | |||
| 329 | while (word_iter.next()) |word| { | 418 | while (word_iter.next()) |word| { |
| 330 | try result.appendSlice(word); | 419 | try result.appendSlice(word); |
| 331 | try result.append(' '); | 420 | try result.append(' '); |
| 332 | line_width += self.strWidth(word) + 1; | 421 | line_width += dw.strWidth(word) + 1; |
| 333 | 422 | ||
| 334 | if (line_width > columns or columns - line_width <= threshold) { | 423 | if (line_width > columns or columns - line_width <= threshold) { |
| 335 | try result.append('\n'); | 424 | try result.append('\n'); |
| @@ -347,12 +436,11 @@ pub fn wrap( | |||
| 347 | 436 | ||
| 348 | test "wrap" { | 437 | test "wrap" { |
| 349 | const allocator = testing.allocator; | 438 | const allocator = testing.allocator; |
| 350 | const data = try DisplayWidthData.init(allocator); | 439 | const dw = try DisplayWidth.init(allocator); |
| 351 | defer data.deinit(allocator); | 440 | defer dw.deinit(allocator); |
| 352 | const self = Self{ .data = &data }; | ||
| 353 | 441 | ||
| 354 | const input = "The quick brown fox\r\njumped over the lazy dog!"; | 442 | const input = "The quick brown fox\r\njumped over the lazy dog!"; |
| 355 | const got = try self.wrap(allocator, input, 10, 3); | 443 | const got = try dw.wrap(allocator, input, 10, 3); |
| 356 | defer testing.allocator.free(got); | 444 | defer testing.allocator.free(got); |
| 357 | const want = "The quick \nbrown fox \njumped \nover the \nlazy dog!"; | 445 | const want = "The quick \nbrown fox \njumped \nover the \nlazy dog!"; |
| 358 | try testing.expectEqualStrings(want, got); | 446 | try testing.expectEqualStrings(want, got); |
diff --git a/src/GraphemeData.zig b/src/GraphemeData.zig index 6d3174d..df025cb 100644 --- a/src/GraphemeData.zig +++ b/src/GraphemeData.zig | |||
| @@ -36,7 +36,7 @@ s3: []u8 = undefined, | |||
| 36 | 36 | ||
| 37 | const Self = @This(); | 37 | const Self = @This(); |
| 38 | 38 | ||
| 39 | pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { | 39 | pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { |
| 40 | const decompressor = compress.flate.inflate.decompressor; | 40 | const decompressor = compress.flate.inflate.decompressor; |
| 41 | const in_bytes = @embedFile("gbp"); | 41 | const in_bytes = @embedFile("gbp"); |
| 42 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 42 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| @@ -65,23 +65,23 @@ pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { | |||
| 65 | return self; | 65 | return self; |
| 66 | } | 66 | } |
| 67 | 67 | ||
| 68 | pub fn deinit(self: *const Self, allocator: mem.Allocator) void { | 68 | pub inline fn deinit(self: *const Self, allocator: mem.Allocator) void { |
| 69 | allocator.free(self.s1); | 69 | allocator.free(self.s1); |
| 70 | allocator.free(self.s2); | 70 | allocator.free(self.s2); |
| 71 | allocator.free(self.s3); | 71 | allocator.free(self.s3); |
| 72 | } | 72 | } |
| 73 | 73 | ||
| 74 | /// Lookup the grapheme break property for a code point. | 74 | /// Lookup the grapheme break property for a code point. |
| 75 | pub fn gbp(self: Self, cp: u21) Gbp { | 75 | pub inline fn gbp(self: Self, cp: u21) Gbp { |
| 76 | return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 4); | 76 | return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 4); |
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | /// Lookup the indic syllable type for a code point. | 79 | /// Lookup the indic syllable type for a code point. |
| 80 | pub fn indic(self: Self, cp: u21) Indic { | 80 | pub inline fn indic(self: Self, cp: u21) Indic { |
| 81 | return @enumFromInt((self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7); | 81 | return @enumFromInt((self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7); |
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | /// Lookup the indic syllable type for a code point. | 84 | /// Lookup the emoji property for a code point. |
| 85 | pub fn isEmoji(self: Self, cp: u21) bool { | 85 | pub inline fn isEmoji(self: Self, cp: u21) bool { |
| 86 | return self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1; | 86 | return self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1; |
| 87 | } | 87 | } |
diff --git a/src/Normalize.zig b/src/Normalize.zig index a28b708..b738b27 100644 --- a/src/Normalize.zig +++ b/src/Normalize.zig | |||
| @@ -177,7 +177,7 @@ test "decompose" { | |||
| 177 | const allocator = testing.allocator; | 177 | const allocator = testing.allocator; |
| 178 | var data: NormData = undefined; | 178 | var data: NormData = undefined; |
| 179 | try NormData.init(&data, allocator); | 179 | try NormData.init(&data, allocator); |
| 180 | defer data.deinit(); | 180 | defer data.deinit(allocator); |
| 181 | var n = Self{ .norm_data = &data }; | 181 | var n = Self{ .norm_data = &data }; |
| 182 | 182 | ||
| 183 | var buf: [18]u21 = undefined; | 183 | var buf: [18]u21 = undefined; |
| @@ -307,11 +307,11 @@ test "nfd ASCII / no-alloc" { | |||
| 307 | const allocator = testing.allocator; | 307 | const allocator = testing.allocator; |
| 308 | var data: NormData = undefined; | 308 | var data: NormData = undefined; |
| 309 | try NormData.init(&data, allocator); | 309 | try NormData.init(&data, allocator); |
| 310 | defer data.deinit(); | 310 | defer data.deinit(allocator); |
| 311 | const n = Self{ .norm_data = &data }; | 311 | const n = Self{ .norm_data = &data }; |
| 312 | 312 | ||
| 313 | const result = try n.nfd(allocator, "Hello World!"); | 313 | const result = try n.nfd(allocator, "Hello World!"); |
| 314 | defer result.deinit(); | 314 | defer result.deinit(allocator); |
| 315 | 315 | ||
| 316 | try testing.expectEqualStrings("Hello World!", result.slice); | 316 | try testing.expectEqualStrings("Hello World!", result.slice); |
| 317 | } | 317 | } |
| @@ -320,11 +320,11 @@ test "nfd !ASCII / alloc" { | |||
| 320 | const allocator = testing.allocator; | 320 | const allocator = testing.allocator; |
| 321 | var data: NormData = undefined; | 321 | var data: NormData = undefined; |
| 322 | try NormData.init(&data, allocator); | 322 | try NormData.init(&data, allocator); |
| 323 | defer data.deinit(); | 323 | defer data.deinit(allocator); |
| 324 | const n = Self{ .norm_data = &data }; | 324 | const n = Self{ .norm_data = &data }; |
| 325 | 325 | ||
| 326 | const result = try n.nfd(allocator, "Héllo World! \u{3d3}"); | 326 | const result = try n.nfd(allocator, "Héllo World! \u{3d3}"); |
| 327 | defer result.deinit(); | 327 | defer result.deinit(allocator); |
| 328 | 328 | ||
| 329 | try testing.expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", result.slice); | 329 | try testing.expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", result.slice); |
| 330 | } | 330 | } |
| @@ -333,11 +333,11 @@ test "nfkd ASCII / no-alloc" { | |||
| 333 | const allocator = testing.allocator; | 333 | const allocator = testing.allocator; |
| 334 | var data: NormData = undefined; | 334 | var data: NormData = undefined; |
| 335 | try NormData.init(&data, allocator); | 335 | try NormData.init(&data, allocator); |
| 336 | defer data.deinit(); | 336 | defer data.deinit(allocator); |
| 337 | const n = Self{ .norm_data = &data }; | 337 | const n = Self{ .norm_data = &data }; |
| 338 | 338 | ||
| 339 | const result = try n.nfkd(allocator, "Hello World!"); | 339 | const result = try n.nfkd(allocator, "Hello World!"); |
| 340 | defer result.deinit(); | 340 | defer result.deinit(allocator); |
| 341 | 341 | ||
| 342 | try testing.expectEqualStrings("Hello World!", result.slice); | 342 | try testing.expectEqualStrings("Hello World!", result.slice); |
| 343 | } | 343 | } |
| @@ -346,11 +346,11 @@ test "nfkd !ASCII / alloc" { | |||
| 346 | const allocator = testing.allocator; | 346 | const allocator = testing.allocator; |
| 347 | var data: NormData = undefined; | 347 | var data: NormData = undefined; |
| 348 | try NormData.init(&data, allocator); | 348 | try NormData.init(&data, allocator); |
| 349 | defer data.deinit(); | 349 | defer data.deinit(allocator); |
| 350 | const n = Self{ .norm_data = &data }; | 350 | const n = Self{ .norm_data = &data }; |
| 351 | 351 | ||
| 352 | const result = try n.nfkd(allocator, "Héllo World! \u{3d3}"); | 352 | const result = try n.nfkd(allocator, "Héllo World! \u{3d3}"); |
| 353 | defer result.deinit(); | 353 | defer result.deinit(allocator); |
| 354 | 354 | ||
| 355 | try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice); | 355 | try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice); |
| 356 | } | 356 | } |
| @@ -546,11 +546,11 @@ test "nfc" { | |||
| 546 | const allocator = testing.allocator; | 546 | const allocator = testing.allocator; |
| 547 | var data: NormData = undefined; | 547 | var data: NormData = undefined; |
| 548 | try NormData.init(&data, allocator); | 548 | try NormData.init(&data, allocator); |
| 549 | defer data.deinit(); | 549 | defer data.deinit(allocator); |
| 550 | const n = Self{ .norm_data = &data }; | 550 | const n = Self{ .norm_data = &data }; |
| 551 | 551 | ||
| 552 | const result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); | 552 | const result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); |
| 553 | defer result.deinit(); | 553 | defer result.deinit(allocator); |
| 554 | 554 | ||
| 555 | try testing.expectEqualStrings("Complex char: \u{3D3}", result.slice); | 555 | try testing.expectEqualStrings("Complex char: \u{3D3}", result.slice); |
| 556 | } | 556 | } |
| @@ -559,11 +559,11 @@ test "nfkc" { | |||
| 559 | const allocator = testing.allocator; | 559 | const allocator = testing.allocator; |
| 560 | var data: NormData = undefined; | 560 | var data: NormData = undefined; |
| 561 | try NormData.init(&data, allocator); | 561 | try NormData.init(&data, allocator); |
| 562 | defer data.deinit(); | 562 | defer data.deinit(allocator); |
| 563 | const n = Self{ .norm_data = &data }; | 563 | const n = Self{ .norm_data = &data }; |
| 564 | 564 | ||
| 565 | const result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); | 565 | const result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); |
| 566 | defer result.deinit(); | 566 | defer result.deinit(allocator); |
| 567 | 567 | ||
| 568 | try testing.expectEqualStrings("Complex char: \u{038E}", result.slice); | 568 | try testing.expectEqualStrings("Complex char: \u{038E}", result.slice); |
| 569 | } | 569 | } |
| @@ -582,7 +582,7 @@ test "eql" { | |||
| 582 | const allocator = testing.allocator; | 582 | const allocator = testing.allocator; |
| 583 | var data: NormData = undefined; | 583 | var data: NormData = undefined; |
| 584 | try NormData.init(&data, allocator); | 584 | try NormData.init(&data, allocator); |
| 585 | defer data.deinit(); | 585 | defer data.deinit(allocator); |
| 586 | const n = Self{ .norm_data = &data }; | 586 | const n = Self{ .norm_data = &data }; |
| 587 | 587 | ||
| 588 | try testing.expect(try n.eql(allocator, "foé", "foe\u{0301}")); | 588 | try testing.expect(try n.eql(allocator, "foé", "foe\u{0301}")); |
| @@ -628,5 +628,4 @@ test "isLatin1Only" { | |||
| 628 | try testing.expect(isLatin1Only(latin1_only)); | 628 | try testing.expect(isLatin1Only(latin1_only)); |
| 629 | const not_latin1_only = "Héllo, World! \u{3d3}"; | 629 | const not_latin1_only = "Héllo, World! \u{3d3}"; |
| 630 | try testing.expect(!isLatin1Only(not_latin1_only)); | 630 | try testing.expect(!isLatin1Only(not_latin1_only)); |
| 631 | try testing.expect(false); | ||
| 632 | } | 631 | } |
diff --git a/src/WidthData.zig b/src/WidthData.zig index b07a679..ca7eaf0 100644 --- a/src/WidthData.zig +++ b/src/WidthData.zig | |||
| @@ -4,15 +4,36 @@ const compress = std.compress; | |||
| 4 | const mem = std.mem; | 4 | const mem = std.mem; |
| 5 | const testing = std.testing; | 5 | const testing = std.testing; |
| 6 | 6 | ||
| 7 | const GraphemeData = @import("GraphemeData"); | 7 | const Graphemes = @import("Graphemes"); |
| 8 | 8 | ||
| 9 | g_data: GraphemeData, | 9 | g_data: Graphemes, |
| 10 | s1: []u16 = undefined, | 10 | s1: []u16 = undefined, |
| 11 | s2: []i4 = undefined, | 11 | s2: []i4 = undefined, |
| 12 | owns_gdata: bool, | ||
| 12 | 13 | ||
| 13 | const Self = @This(); | 14 | const Self = @This(); |
| 14 | 15 | ||
| 15 | pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { | 16 | pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { |
| 17 | var self: Self = try Self.setup(allocator); | ||
| 18 | errdefer { | ||
| 19 | allocator.free(self.s1); | ||
| 20 | allocator.free(self.s2); | ||
| 21 | } | ||
| 22 | self.owns_gdata = true; | ||
| 23 | self.g_data = try Graphemes.init(allocator); | ||
| 24 | errdefer self.g_data.deinit(allocator); | ||
| 25 | return self; | ||
| 26 | } | ||
| 27 | |||
| 28 | pub fn initWithGraphemeData(allocator: mem.Allocator, g_data: Graphemes) mem.Allocator.Error!Self { | ||
| 29 | var self = try Self.setup(allocator); | ||
| 30 | self.g_data = g_data; | ||
| 31 | self.owns_gdata = false; | ||
| 32 | return self; | ||
| 33 | } | ||
| 34 | |||
| 35 | // Sets up the DisplayWidthData, leaving the GraphemeData undefined. | ||
| 36 | fn setup(allocator: mem.Allocator) mem.Allocator.Error!Self { | ||
| 16 | const decompressor = compress.flate.inflate.decompressor; | 37 | const decompressor = compress.flate.inflate.decompressor; |
| 17 | const in_bytes = @embedFile("dwp"); | 38 | const in_bytes = @embedFile("dwp"); |
| 18 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 39 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| @@ -21,10 +42,7 @@ pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { | |||
| 21 | 42 | ||
| 22 | const endian = builtin.cpu.arch.endian(); | 43 | const endian = builtin.cpu.arch.endian(); |
| 23 | 44 | ||
| 24 | var self = Self{ | 45 | var self: Self = undefined; |
| 25 | .g_data = try GraphemeData.init(allocator), | ||
| 26 | }; | ||
| 27 | errdefer self.g_data.deinit(allocator); | ||
| 28 | 46 | ||
| 29 | const stage_1_len: u16 = reader.readInt(u16, endian) catch unreachable; | 47 | const stage_1_len: u16 = reader.readInt(u16, endian) catch unreachable; |
| 30 | self.s1 = try allocator.alloc(u16, stage_1_len); | 48 | self.s1 = try allocator.alloc(u16, stage_1_len); |
| @@ -42,7 +60,7 @@ pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { | |||
| 42 | pub fn deinit(self: *const Self, allocator: mem.Allocator) void { | 60 | pub fn deinit(self: *const Self, allocator: mem.Allocator) void { |
| 43 | allocator.free(self.s1); | 61 | allocator.free(self.s1); |
| 44 | allocator.free(self.s2); | 62 | allocator.free(self.s2); |
| 45 | self.g_data.deinit(allocator); | 63 | if (self.owns_gdata) self.g_data.deinit(allocator); |
| 46 | } | 64 | } |
| 47 | 65 | ||
| 48 | /// codePointWidth returns the number of cells `cp` requires when rendered | 66 | /// codePointWidth returns the number of cells `cp` requires when rendered |
diff --git a/src/grapheme.zig b/src/grapheme.zig index 25fd71d..79cd2c6 100644 --- a/src/grapheme.zig +++ b/src/grapheme.zig | |||
| @@ -1,10 +1,99 @@ | |||
| 1 | const std = @import("std"); | 1 | const std = @import("std"); |
| 2 | const builtin = @import("builtin"); | ||
| 2 | const mem = std.mem; | 3 | const mem = std.mem; |
| 4 | const Allocator = mem.Allocator; | ||
| 5 | const compress = std.compress; | ||
| 3 | const unicode = std.unicode; | 6 | const unicode = std.unicode; |
| 4 | 7 | ||
| 5 | const CodePoint = @import("code_point").CodePoint; | 8 | const CodePoint = @import("code_point").CodePoint; |
| 6 | const CodePointIterator = @import("code_point").Iterator; | 9 | const CodePointIterator = @import("code_point").Iterator; |
| 7 | pub const GraphemeData = @import("GraphemeData"); | 10 | |
| 11 | s1: []u16 = undefined, | ||
| 12 | s2: []u16 = undefined, | ||
| 13 | s3: []u8 = undefined, | ||
| 14 | |||
| 15 | const Graphemes = @This(); | ||
| 16 | |||
| 17 | pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Graphemes { | ||
| 18 | const decompressor = compress.flate.inflate.decompressor; | ||
| 19 | const in_bytes = @embedFile("gbp"); | ||
| 20 | var in_fbs = std.io.fixedBufferStream(in_bytes); | ||
| 21 | var in_decomp = decompressor(.raw, in_fbs.reader()); | ||
| 22 | var reader = in_decomp.reader(); | ||
| 23 | |||
| 24 | const endian = builtin.cpu.arch.endian(); | ||
| 25 | |||
| 26 | var self = Graphemes{}; | ||
| 27 | |||
| 28 | const s1_len: u16 = reader.readInt(u16, endian) catch unreachable; | ||
| 29 | self.s1 = try allocator.alloc(u16, s1_len); | ||
| 30 | errdefer allocator.free(self.s1); | ||
| 31 | for (0..s1_len) |i| self.s1[i] = reader.readInt(u16, endian) catch unreachable; | ||
| 32 | |||
| 33 | const s2_len: u16 = reader.readInt(u16, endian) catch unreachable; | ||
| 34 | self.s2 = try allocator.alloc(u16, s2_len); | ||
| 35 | errdefer allocator.free(self.s2); | ||
| 36 | for (0..s2_len) |i| self.s2[i] = reader.readInt(u16, endian) catch unreachable; | ||
| 37 | |||
| 38 | const s3_len: u16 = reader.readInt(u16, endian) catch unreachable; | ||
| 39 | self.s3 = try allocator.alloc(u8, s3_len); | ||
| 40 | errdefer allocator.free(self.s3); | ||
| 41 | _ = reader.readAll(self.s3) catch unreachable; | ||
| 42 | |||
| 43 | return self; | ||
| 44 | } | ||
| 45 | |||
| 46 | pub fn deinit(graphemes: *const Graphemes, allocator: mem.Allocator) void { | ||
| 47 | allocator.free(graphemes.s1); | ||
| 48 | allocator.free(graphemes.s2); | ||
| 49 | allocator.free(graphemes.s3); | ||
| 50 | } | ||
| 51 | |||
| 52 | /// Lookup the grapheme break property for a code point. | ||
| 53 | pub fn gbp(graphemes: Graphemes, cp: u21) Gbp { | ||
| 54 | return @enumFromInt(graphemes.s3[graphemes.s2[graphemes.s1[cp >> 8] + (cp & 0xff)]] >> 4); | ||
| 55 | } | ||
| 56 | |||
| 57 | /// Lookup the indic syllable type for a code point. | ||
| 58 | pub fn indic(graphemes: Graphemes, cp: u21) Indic { | ||
| 59 | return @enumFromInt((graphemes.s3[graphemes.s2[graphemes.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7); | ||
| 60 | } | ||
| 61 | |||
| 62 | /// Lookup the emoji property for a code point. | ||
| 63 | pub fn isEmoji(graphemes: Graphemes, cp: u21) bool { | ||
| 64 | return graphemes.s3[graphemes.s2[graphemes.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1; | ||
| 65 | } | ||
| 66 | |||
| 67 | pub fn iterator(graphemes: *const Graphemes, string: []const u8) Iterator { | ||
| 68 | return Iterator.init(string, graphemes); | ||
| 69 | } | ||
| 70 | |||
| 71 | /// Indic syllable type. | ||
| 72 | pub const Indic = enum { | ||
| 73 | none, | ||
| 74 | |||
| 75 | Consonant, | ||
| 76 | Extend, | ||
| 77 | Linker, | ||
| 78 | }; | ||
| 79 | |||
| 80 | /// Grapheme break property. | ||
| 81 | pub const Gbp = enum { | ||
| 82 | none, | ||
| 83 | Control, | ||
| 84 | CR, | ||
| 85 | Extend, | ||
| 86 | L, | ||
| 87 | LF, | ||
| 88 | LV, | ||
| 89 | LVT, | ||
| 90 | Prepend, | ||
| 91 | Regional_Indicator, | ||
| 92 | SpacingMark, | ||
| 93 | T, | ||
| 94 | V, | ||
| 95 | ZWJ, | ||
| 96 | }; | ||
| 8 | 97 | ||
| 9 | /// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes. | 98 | /// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes. |
| 10 | pub const Grapheme = struct { | 99 | pub const Grapheme = struct { |
| @@ -22,12 +111,12 @@ pub const Grapheme = struct { | |||
| 22 | pub const Iterator = struct { | 111 | pub const Iterator = struct { |
| 23 | buf: [2]?CodePoint = .{ null, null }, | 112 | buf: [2]?CodePoint = .{ null, null }, |
| 24 | cp_iter: CodePointIterator, | 113 | cp_iter: CodePointIterator, |
| 25 | data: *const GraphemeData, | 114 | data: *const Graphemes, |
| 26 | 115 | ||
| 27 | const Self = @This(); | 116 | const Self = @This(); |
| 28 | 117 | ||
| 29 | /// Assumes `src` is valid UTF-8. | 118 | /// Assumes `src` is valid UTF-8. |
| 30 | pub fn init(str: []const u8, data: *const GraphemeData) Self { | 119 | pub fn init(str: []const u8, data: *const Graphemes) Self { |
| 31 | var self = Self{ .cp_iter = .{ .bytes = str }, .data = data }; | 120 | var self = Self{ .cp_iter = .{ .bytes = str }, .data = data }; |
| 32 | self.advance(); | 121 | self.advance(); |
| 33 | return self; | 122 | return self; |
| @@ -149,7 +238,7 @@ pub const Iterator = struct { | |||
| 149 | }; | 238 | }; |
| 150 | 239 | ||
| 151 | // Predicates | 240 | // Predicates |
| 152 | fn isBreaker(cp: u21, data: *const GraphemeData) bool { | 241 | fn isBreaker(cp: u21, data: *const Graphemes) bool { |
| 153 | // Extract relevant properties. | 242 | // Extract relevant properties. |
| 154 | const cp_gbp_prop = data.gbp(cp); | 243 | const cp_gbp_prop = data.gbp(cp); |
| 155 | return cp == '\x0d' or cp == '\x0a' or cp_gbp_prop == .Control; | 244 | return cp == '\x0d' or cp == '\x0a' or cp_gbp_prop == .Control; |
| @@ -202,7 +291,7 @@ pub const State = struct { | |||
| 202 | pub fn graphemeBreak( | 291 | pub fn graphemeBreak( |
| 203 | cp1: u21, | 292 | cp1: u21, |
| 204 | cp2: u21, | 293 | cp2: u21, |
| 205 | data: *const GraphemeData, | 294 | data: *const Graphemes, |
| 206 | state: *State, | 295 | state: *State, |
| 207 | ) bool { | 296 | ) bool { |
| 208 | // Extract relevant properties. | 297 | // Extract relevant properties. |
| @@ -306,25 +395,25 @@ test "Segmentation ZWJ and ZWSP emoji sequences" { | |||
| 306 | const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2; | 395 | const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2; |
| 307 | const no_joiner = seq_1 ++ seq_2; | 396 | const no_joiner = seq_1 ++ seq_2; |
| 308 | 397 | ||
| 309 | const data = try GraphemeData.init(std.testing.allocator); | 398 | const graphemes = try Graphemes.init(std.testing.allocator); |
| 310 | defer data.deinit(std.testing.allocator); | 399 | defer graphemes.deinit(std.testing.allocator); |
| 311 | 400 | ||
| 312 | { | 401 | { |
| 313 | var iter = Iterator.init(with_zwj, &data); | 402 | var iter = graphemes.iterator(with_zwj); |
| 314 | var i: usize = 0; | 403 | var i: usize = 0; |
| 315 | while (iter.next()) |_| : (i += 1) {} | 404 | while (iter.next()) |_| : (i += 1) {} |
| 316 | try std.testing.expectEqual(@as(usize, 1), i); | 405 | try std.testing.expectEqual(@as(usize, 1), i); |
| 317 | } | 406 | } |
| 318 | 407 | ||
| 319 | { | 408 | { |
| 320 | var iter = Iterator.init(with_zwsp, &data); | 409 | var iter = graphemes.iterator(with_zwsp); |
| 321 | var i: usize = 0; | 410 | var i: usize = 0; |
| 322 | while (iter.next()) |_| : (i += 1) {} | 411 | while (iter.next()) |_| : (i += 1) {} |
| 323 | try std.testing.expectEqual(@as(usize, 3), i); | 412 | try std.testing.expectEqual(@as(usize, 3), i); |
| 324 | } | 413 | } |
| 325 | 414 | ||
| 326 | { | 415 | { |
| 327 | var iter = Iterator.init(no_joiner, &data); | 416 | var iter = graphemes.iterator(no_joiner); |
| 328 | var i: usize = 0; | 417 | var i: usize = 0; |
| 329 | while (iter.next()) |_| : (i += 1) {} | 418 | while (iter.next()) |_| : (i += 1) {} |
| 330 | try std.testing.expectEqual(@as(usize, 2), i); | 419 | try std.testing.expectEqual(@as(usize, 2), i); |
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig index 7236ff6..de1b9ec 100644 --- a/src/unicode_tests.zig +++ b/src/unicode_tests.zig | |||
| @@ -9,7 +9,7 @@ const unicode = std.unicode; | |||
| 9 | 9 | ||
| 10 | const grapheme = @import("grapheme"); | 10 | const grapheme = @import("grapheme"); |
| 11 | const Grapheme = @import("grapheme").Grapheme; | 11 | const Grapheme = @import("grapheme").Grapheme; |
| 12 | const GraphemeData = @import("grapheme").GraphemeData; | 12 | const Graphemes = @import("grapheme"); |
| 13 | const GraphemeIterator = @import("grapheme").Iterator; | 13 | const GraphemeIterator = @import("grapheme").Iterator; |
| 14 | const Normalize = @import("Normalize"); | 14 | const Normalize = @import("Normalize"); |
| 15 | 15 | ||
| @@ -18,10 +18,10 @@ comptime { | |||
| 18 | } | 18 | } |
| 19 | test "Iterator.peek" { | 19 | test "Iterator.peek" { |
| 20 | const peek_seq = "aΔ👨🏻🌾→"; | 20 | const peek_seq = "aΔ👨🏻🌾→"; |
| 21 | const data = try GraphemeData.init(std.testing.allocator); | 21 | const data = try Graphemes.init(std.testing.allocator); |
| 22 | defer data.deinit(std.testing.allocator); | 22 | defer data.deinit(std.testing.allocator); |
| 23 | 23 | ||
| 24 | var iter = grapheme.Iterator.init(peek_seq, &data); | 24 | var iter = data.iterator(peek_seq); |
| 25 | const peek_a = iter.peek().?; | 25 | const peek_a = iter.peek().?; |
| 26 | const next_a = iter.next().?; | 26 | const next_a = iter.next().?; |
| 27 | try std.testing.expectEqual(peek_a, next_a); | 27 | try std.testing.expectEqual(peek_a, next_a); |
| @@ -162,7 +162,7 @@ test "Segmentation GraphemeIterator" { | |||
| 162 | var buf_reader = std.io.bufferedReader(file.reader()); | 162 | var buf_reader = std.io.bufferedReader(file.reader()); |
| 163 | var input_stream = buf_reader.reader(); | 163 | var input_stream = buf_reader.reader(); |
| 164 | 164 | ||
| 165 | const data = try GraphemeData.init(allocator); | 165 | const data = try Graphemes.init(allocator); |
| 166 | defer data.deinit(allocator); | 166 | defer data.deinit(allocator); |
| 167 | 167 | ||
| 168 | var buf: [4096]u8 = undefined; | 168 | var buf: [4096]u8 = undefined; |
| @@ -207,7 +207,7 @@ test "Segmentation GraphemeIterator" { | |||
| 207 | } | 207 | } |
| 208 | 208 | ||
| 209 | // std.debug.print("\nline {}: {s}\n", .{ line_no, all_bytes.items }); | 209 | // std.debug.print("\nline {}: {s}\n", .{ line_no, all_bytes.items }); |
| 210 | var iter = GraphemeIterator.init(all_bytes.items, &data); | 210 | var iter = data.iterator(all_bytes.items); |
| 211 | 211 | ||
| 212 | // Chaeck. | 212 | // Chaeck. |
| 213 | for (want.items) |want_gc| { | 213 | for (want.items) |want_gc| { |