diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/DisplayWidth.zig | 240 | ||||
| -rw-r--r-- | src/GraphemeData.zig | 12 | ||||
| -rw-r--r-- | src/Normalize.zig | 29 | ||||
| -rw-r--r-- | src/WidthData.zig | 32 | ||||
| -rw-r--r-- | src/grapheme.zig | 109 | ||||
| -rw-r--r-- | src/unicode_tests.zig | 10 |
6 files changed, 313 insertions, 119 deletions
diff --git a/src/DisplayWidth.zig b/src/DisplayWidth.zig index 8631bd4..11ec59e 100644 --- a/src/DisplayWidth.zig +++ b/src/DisplayWidth.zig | |||
| @@ -2,38 +2,131 @@ const std = @import("std"); | |||
| 2 | const builtin = @import("builtin"); | 2 | const builtin = @import("builtin"); |
| 3 | const options = @import("options"); | 3 | const options = @import("options"); |
| 4 | const ArrayList = std.ArrayList; | 4 | const ArrayList = std.ArrayList; |
| 5 | const compress = std.compress; | ||
| 5 | const mem = std.mem; | 6 | const mem = std.mem; |
| 6 | const simd = std.simd; | 7 | const simd = std.simd; |
| 7 | const testing = std.testing; | 8 | const testing = std.testing; |
| 8 | 9 | ||
| 9 | const ascii = @import("ascii"); | 10 | const ascii = @import("ascii"); |
| 10 | const CodePointIterator = @import("code_point").Iterator; | 11 | const CodePointIterator = @import("code_point").Iterator; |
| 11 | const GraphemeIterator = @import("grapheme").Iterator; | ||
| 12 | pub const DisplayWidthData = @import("DisplayWidthData"); | 12 | pub const DisplayWidthData = @import("DisplayWidthData"); |
| 13 | 13 | ||
| 14 | data: *const DisplayWidthData, | 14 | const Graphemes = @import("Graphemes"); |
| 15 | 15 | ||
| 16 | const Self = @This(); | 16 | g_data: Graphemes, |
| 17 | s1: []u16 = undefined, | ||
| 18 | s2: []i4 = undefined, | ||
| 19 | owns_gdata: bool, | ||
| 20 | |||
| 21 | const DisplayWidth = @This(); | ||
| 22 | |||
| 23 | pub fn init(allocator: mem.Allocator) mem.Allocator.Error!DisplayWidth { | ||
| 24 | var dw: DisplayWidth = try DisplayWidth.setup(allocator); | ||
| 25 | errdefer { | ||
| 26 | allocator.free(dw.s1); | ||
| 27 | allocator.free(dw.s2); | ||
| 28 | } | ||
| 29 | dw.owns_gdata = true; | ||
| 30 | dw.g_data = try Graphemes.init(allocator); | ||
| 31 | errdefer dw.g_data.deinit(allocator); | ||
| 32 | return dw; | ||
| 33 | } | ||
| 34 | |||
| 35 | pub fn initWithGraphemeData(allocator: mem.Allocator, g_data: Graphemes) mem.Allocator.Error!DisplayWidth { | ||
| 36 | var dw = try DisplayWidth.setup(allocator); | ||
| 37 | dw.g_data = g_data; | ||
| 38 | dw.owns_gdata = false; | ||
| 39 | return dw; | ||
| 40 | } | ||
| 41 | |||
| 42 | // Sets up the DisplayWidthData, leaving the GraphemeData undefined. | ||
| 43 | fn setup(allocator: mem.Allocator) mem.Allocator.Error!DisplayWidth { | ||
| 44 | const decompressor = compress.flate.inflate.decompressor; | ||
| 45 | const in_bytes = @embedFile("dwp"); | ||
| 46 | var in_fbs = std.io.fixedBufferStream(in_bytes); | ||
| 47 | var in_decomp = decompressor(.raw, in_fbs.reader()); | ||
| 48 | var reader = in_decomp.reader(); | ||
| 49 | |||
| 50 | const endian = builtin.cpu.arch.endian(); | ||
| 51 | |||
| 52 | var dw: DisplayWidth = undefined; | ||
| 53 | |||
| 54 | const stage_1_len: u16 = reader.readInt(u16, endian) catch unreachable; | ||
| 55 | dw.s1 = try allocator.alloc(u16, stage_1_len); | ||
| 56 | errdefer allocator.free(dw.s1); | ||
| 57 | for (0..stage_1_len) |i| dw.s1[i] = reader.readInt(u16, endian) catch unreachable; | ||
| 58 | |||
| 59 | const stage_2_len: u16 = reader.readInt(u16, endian) catch unreachable; | ||
| 60 | dw.s2 = try allocator.alloc(i4, stage_2_len); | ||
| 61 | errdefer allocator.free(dw.s2); | ||
| 62 | for (0..stage_2_len) |i| dw.s2[i] = @intCast(reader.readInt(i8, endian) catch unreachable); | ||
| 63 | |||
| 64 | return dw; | ||
| 65 | } | ||
| 66 | |||
| 67 | pub fn deinit(dw: *const DisplayWidth, allocator: mem.Allocator) void { | ||
| 68 | allocator.free(dw.s1); | ||
| 69 | allocator.free(dw.s2); | ||
| 70 | if (dw.owns_gdata) dw.g_data.deinit(allocator); | ||
| 71 | } | ||
| 72 | |||
| 73 | /// codePointWidth returns the number of cells `cp` requires when rendered | ||
| 74 | /// in a fixed-pitch font (i.e. a terminal screen). This can range from -1 to | ||
| 75 | /// 3, where BACKSPACE and DELETE return -1 and 3-em-dash returns 3. C0/C1 | ||
| 76 | /// control codes return 0. If `cjk` is true, ambiguous code points return 2, | ||
| 77 | /// otherwise they return 1. | ||
| 78 | pub fn codePointWidth(dw: DisplayWidth, cp: u21) i4 { | ||
| 79 | return dw.s2[dw.s1[cp >> 8] + (cp & 0xff)]; | ||
| 80 | } | ||
| 81 | |||
| 82 | test "codePointWidth" { | ||
| 83 | const dw = try DisplayWidth.init(std.testing.allocator); | ||
| 84 | defer dw.deinit(std.testing.allocator); | ||
| 85 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x0000)); // null | ||
| 86 | try testing.expectEqual(@as(i4, -1), dw.codePointWidth(0x8)); // \b | ||
| 87 | try testing.expectEqual(@as(i4, -1), dw.codePointWidth(0x7f)); // DEL | ||
| 88 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x0005)); // Cf | ||
| 89 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x0007)); // \a BEL | ||
| 90 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000A)); // \n LF | ||
| 91 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000B)); // \v VT | ||
| 92 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000C)); // \f FF | ||
| 93 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000D)); // \r CR | ||
| 94 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000E)); // SQ | ||
| 95 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000F)); // SI | ||
| 96 | |||
| 97 | try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x070F)); // Cf | ||
| 98 | try testing.expectEqual(@as(i4, 1), dw.codePointWidth(0x0603)); // Cf Arabic | ||
| 99 | |||
| 100 | try testing.expectEqual(@as(i4, 1), dw.codePointWidth(0x00AD)); // soft-hyphen | ||
| 101 | try testing.expectEqual(@as(i4, 2), dw.codePointWidth(0x2E3A)); // two-em dash | ||
| 102 | try testing.expectEqual(@as(i4, 3), dw.codePointWidth(0x2E3B)); // three-em dash | ||
| 103 | |||
| 104 | try testing.expectEqual(@as(i4, 1), dw.codePointWidth(0x00BD)); // ambiguous halfwidth | ||
| 105 | |||
| 106 | try testing.expectEqual(@as(i4, 1), dw.codePointWidth('Γ©')); | ||
| 107 | try testing.expectEqual(@as(i4, 2), dw.codePointWidth('π')); | ||
| 108 | try testing.expectEqual(@as(i4, 2), dw.codePointWidth('η»')); | ||
| 109 | } | ||
| 17 | 110 | ||
| 18 | /// strWidth returns the total display width of `str` as the number of cells | 111 | /// strWidth returns the total display width of `str` as the number of cells |
| 19 | /// required in a fixed-pitch font (i.e. a terminal screen). | 112 | /// required in a fixed-pitch font (i.e. a terminal screen). |
| 20 | pub fn strWidth(self: Self, str: []const u8) usize { | 113 | pub fn strWidth(dw: DisplayWidth, str: []const u8) usize { |
| 21 | var total: isize = 0; | 114 | var total: isize = 0; |
| 22 | 115 | ||
| 23 | // ASCII fast path | 116 | // ASCII fast path |
| 24 | if (ascii.isAsciiOnly(str)) { | 117 | if (ascii.isAsciiOnly(str)) { |
| 25 | for (str) |b| total += self.data.codePointWidth(b); | 118 | for (str) |b| total += dw.codePointWidth(b); |
| 26 | return @intCast(@max(0, total)); | 119 | return @intCast(@max(0, total)); |
| 27 | } | 120 | } |
| 28 | 121 | ||
| 29 | var giter = GraphemeIterator.init(str, &self.data.g_data); | 122 | var giter = dw.g_data.iterator(str); |
| 30 | 123 | ||
| 31 | while (giter.next()) |gc| { | 124 | while (giter.next()) |gc| { |
| 32 | var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) }; | 125 | var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) }; |
| 33 | var gc_total: isize = 0; | 126 | var gc_total: isize = 0; |
| 34 | 127 | ||
| 35 | while (cp_iter.next()) |cp| { | 128 | while (cp_iter.next()) |cp| { |
| 36 | var w = self.data.codePointWidth(cp.code); | 129 | var w = dw.codePointWidth(cp.code); |
| 37 | 130 | ||
| 38 | if (w != 0) { | 131 | if (w != 0) { |
| 39 | // Handle text emoji sequence. | 132 | // Handle text emoji sequence. |
| @@ -58,41 +151,40 @@ pub fn strWidth(self: Self, str: []const u8) usize { | |||
| 58 | } | 151 | } |
| 59 | 152 | ||
| 60 | test "strWidth" { | 153 | test "strWidth" { |
| 61 | const data = try DisplayWidthData.init(testing.allocator); | 154 | const dw = try DisplayWidth.init(testing.allocator); |
| 62 | defer data.deinit(testing.allocator); | 155 | defer dw.deinit(testing.allocator); |
| 63 | const self = Self{ .data = &data }; | ||
| 64 | const c0 = options.c0_width orelse 0; | 156 | const c0 = options.c0_width orelse 0; |
| 65 | 157 | ||
| 66 | try testing.expectEqual(@as(usize, 5), self.strWidth("Hello\r\n")); | 158 | try testing.expectEqual(@as(usize, 5), dw.strWidth("Hello\r\n")); |
| 67 | try testing.expectEqual(@as(usize, 1), self.strWidth("\u{0065}\u{0301}")); | 159 | try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{0065}\u{0301}")); |
| 68 | try testing.expectEqual(@as(usize, 2), self.strWidth("\u{1F476}\u{1F3FF}\u{0308}\u{200D}\u{1F476}\u{1F3FF}")); | 160 | try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{1F476}\u{1F3FF}\u{0308}\u{200D}\u{1F476}\u{1F3FF}")); |
| 69 | try testing.expectEqual(@as(usize, 8), self.strWidth("Hello π")); | 161 | try testing.expectEqual(@as(usize, 8), dw.strWidth("Hello π")); |
| 70 | try testing.expectEqual(@as(usize, 8), self.strWidth("HΓ©llo π")); | 162 | try testing.expectEqual(@as(usize, 8), dw.strWidth("HΓ©llo π")); |
| 71 | try testing.expectEqual(@as(usize, 8), self.strWidth("HΓ©llo :)")); | 163 | try testing.expectEqual(@as(usize, 8), dw.strWidth("HΓ©llo :)")); |
| 72 | try testing.expectEqual(@as(usize, 8), self.strWidth("HΓ©llo πͺπΈ")); | 164 | try testing.expectEqual(@as(usize, 8), dw.strWidth("HΓ©llo πͺπΈ")); |
| 73 | try testing.expectEqual(@as(usize, 2), self.strWidth("\u{26A1}")); // Lone emoji | 165 | try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{26A1}")); // Lone emoji |
| 74 | try testing.expectEqual(@as(usize, 1), self.strWidth("\u{26A1}\u{FE0E}")); // Text sequence | 166 | try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{26A1}\u{FE0E}")); // Text sequence |
| 75 | try testing.expectEqual(@as(usize, 2), self.strWidth("\u{26A1}\u{FE0F}")); // Presentation sequence | 167 | try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{26A1}\u{FE0F}")); // Presentation sequence |
| 76 | try testing.expectEqual(@as(usize, 1), self.strWidth("\u{2764}")); // Default text presentation | 168 | try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{2764}")); // Default text presentation |
| 77 | try testing.expectEqual(@as(usize, 1), self.strWidth("\u{2764}\u{FE0E}")); // Default text presentation with VS15 selector | 169 | try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{2764}\u{FE0E}")); // Default text presentation with VS15 selector |
| 78 | try testing.expectEqual(@as(usize, 2), self.strWidth("\u{2764}\u{FE0F}")); // Default text presentation with VS16 selector | 170 | try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{2764}\u{FE0F}")); // Default text presentation with VS16 selector |
| 79 | const expect_bs: usize = if (c0 == 0) 0 else 1 + c0; | 171 | const expect_bs: usize = if (c0 == 0) 0 else 1 + c0; |
| 80 | try testing.expectEqual(expect_bs, self.strWidth("A\x08")); // Backspace | 172 | try testing.expectEqual(expect_bs, dw.strWidth("A\x08")); // Backspace |
| 81 | try testing.expectEqual(expect_bs, self.strWidth("\x7FA")); // DEL | 173 | try testing.expectEqual(expect_bs, dw.strWidth("\x7FA")); // DEL |
| 82 | const expect_long_del: usize = if (c0 == 0) 0 else 1 + (c0 * 3); | 174 | const expect_long_del: usize = if (c0 == 0) 0 else 1 + (c0 * 3); |
| 83 | try testing.expectEqual(expect_long_del, self.strWidth("\x7FA\x08\x08")); // never less than 0 | 175 | try testing.expectEqual(expect_long_del, dw.strWidth("\x7FA\x08\x08")); // never less than 0 |
| 84 | 176 | ||
| 85 | // wcwidth Python lib tests. See: https://github.com/jquast/wcwidth/blob/master/tests/test_core.py | 177 | // wcwidth Python lib tests. See: https://github.com/jquast/wcwidth/blob/master/tests/test_core.py |
| 86 | const empty = ""; | 178 | const empty = ""; |
| 87 | try testing.expectEqual(@as(usize, 0), self.strWidth(empty)); | 179 | try testing.expectEqual(@as(usize, 0), dw.strWidth(empty)); |
| 88 | const with_null = "hello\x00world"; | 180 | const with_null = "hello\x00world"; |
| 89 | try testing.expectEqual(@as(usize, 10 + c0), self.strWidth(with_null)); | 181 | try testing.expectEqual(@as(usize, 10 + c0), dw.strWidth(with_null)); |
| 90 | const hello_jp = "γ³γ³γγγ, γ»γ«γ€!"; | 182 | const hello_jp = "γ³γ³γγγ, γ»γ«γ€!"; |
| 91 | try testing.expectEqual(@as(usize, 19), self.strWidth(hello_jp)); | 183 | try testing.expectEqual(@as(usize, 19), dw.strWidth(hello_jp)); |
| 92 | const control = "\x1b[0m"; | 184 | const control = "\x1b[0m"; |
| 93 | try testing.expectEqual(@as(usize, 3 + c0), self.strWidth(control)); | 185 | try testing.expectEqual(@as(usize, 3 + c0), dw.strWidth(control)); |
| 94 | const balinese = "\u{1B13}\u{1B28}\u{1B2E}\u{1B44}"; | 186 | const balinese = "\u{1B13}\u{1B28}\u{1B2E}\u{1B44}"; |
| 95 | try testing.expectEqual(@as(usize, 3), self.strWidth(balinese)); | 187 | try testing.expectEqual(@as(usize, 3), dw.strWidth(balinese)); |
| 96 | 188 | ||
| 97 | // These commented out tests require a new specification for complex scripts. | 189 | // These commented out tests require a new specification for complex scripts. |
| 98 | // See: https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf | 190 | // See: https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf |
| @@ -106,17 +198,17 @@ test "strWidth" { | |||
| 106 | // try testing.expectEqual(@as(usize, 3), strWidth(kannada_1)); | 198 | // try testing.expectEqual(@as(usize, 3), strWidth(kannada_1)); |
| 107 | // The following passes but as a mere coincidence. | 199 | // The following passes but as a mere coincidence. |
| 108 | const kannada_2 = "\u{0cb0}\u{0cbc}\u{0ccd}\u{0c9a}"; | 200 | const kannada_2 = "\u{0cb0}\u{0cbc}\u{0ccd}\u{0c9a}"; |
| 109 | try testing.expectEqual(@as(usize, 2), self.strWidth(kannada_2)); | 201 | try testing.expectEqual(@as(usize, 2), dw.strWidth(kannada_2)); |
| 110 | 202 | ||
| 111 | // From Rust https://github.com/jameslanska/unicode-display-width | 203 | // From Rust https://github.com/jameslanska/unicode-display-width |
| 112 | try testing.expectEqual(@as(usize, 15), self.strWidth("π₯π‘π©π©π»βπβ°ππΌπ¦ππ»")); | 204 | try testing.expectEqual(@as(usize, 15), dw.strWidth("π₯π‘π©π©π»βπβ°ππΌπ¦ππ»")); |
| 113 | try testing.expectEqual(@as(usize, 2), self.strWidth("π¦")); | 205 | try testing.expectEqual(@as(usize, 2), dw.strWidth("π¦")); |
| 114 | try testing.expectEqual(@as(usize, 2), self.strWidth("π¨βπ©βπ§βπ§")); | 206 | try testing.expectEqual(@as(usize, 2), dw.strWidth("π¨βπ©βπ§βπ§")); |
| 115 | try testing.expectEqual(@as(usize, 2), self.strWidth("π©βπ¬")); | 207 | try testing.expectEqual(@as(usize, 2), dw.strWidth("π©βπ¬")); |
| 116 | try testing.expectEqual(@as(usize, 9), self.strWidth("sane text")); | 208 | try testing.expectEqual(@as(usize, 9), dw.strWidth("sane text")); |
| 117 | try testing.expectEqual(@as(usize, 9), self.strWidth("αΊΜΓ‘Μ²lΝΜΜΜΜΝgΜΜΜΜΜΝ’ΝΝoΜͺΜTΜ’ΜΜ«ΜΜΝeΜ¬ΝΝΝΜΝxΜΊΜαΉΜΜΝ ")); | 209 | try testing.expectEqual(@as(usize, 9), dw.strWidth("αΊΜΓ‘Μ²lΝΜΜΜΜΝgΜΜΜΜΜΝ’ΝΝoΜͺΜTΜ’ΜΜ«ΜΜΝeΜ¬ΝΝΝΜΝxΜΊΜαΉΜΜΝ ")); |
| 118 | try testing.expectEqual(@as(usize, 17), self.strWidth("μ¬λΌλ° μ°ν¬λΌμ΄λ")); | 210 | try testing.expectEqual(@as(usize, 17), dw.strWidth("μ¬λΌλ° μ°ν¬λΌμ΄λ")); |
| 119 | try testing.expectEqual(@as(usize, 1), self.strWidth("\u{378}")); | 211 | try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{378}")); |
| 120 | } | 212 | } |
| 121 | 213 | ||
| 122 | /// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding. | 214 | /// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding. |
| @@ -124,17 +216,17 @@ test "strWidth" { | |||
| 124 | /// receive one additional pad. This makes sure the returned string fills the requested width. | 216 | /// receive one additional pad. This makes sure the returned string fills the requested width. |
| 125 | /// Caller must free returned bytes with `allocator`. | 217 | /// Caller must free returned bytes with `allocator`. |
| 126 | pub fn center( | 218 | pub fn center( |
| 127 | self: Self, | 219 | dw: DisplayWidth, |
| 128 | allocator: mem.Allocator, | 220 | allocator: mem.Allocator, |
| 129 | str: []const u8, | 221 | str: []const u8, |
| 130 | total_width: usize, | 222 | total_width: usize, |
| 131 | pad: []const u8, | 223 | pad: []const u8, |
| 132 | ) ![]u8 { | 224 | ) ![]u8 { |
| 133 | const str_width = self.strWidth(str); | 225 | const str_width = dw.strWidth(str); |
| 134 | if (str_width > total_width) return error.StrTooLong; | 226 | if (str_width > total_width) return error.StrTooLong; |
| 135 | if (str_width == total_width) return try allocator.dupe(u8, str); | 227 | if (str_width == total_width) return try allocator.dupe(u8, str); |
| 136 | 228 | ||
| 137 | const pad_width = self.strWidth(pad); | 229 | const pad_width = dw.strWidth(pad); |
| 138 | if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; | 230 | if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; |
| 139 | 231 | ||
| 140 | const margin_width = @divFloor((total_width - str_width), 2); | 232 | const margin_width = @divFloor((total_width - str_width), 2); |
| @@ -165,62 +257,61 @@ pub fn center( | |||
| 165 | 257 | ||
| 166 | test "center" { | 258 | test "center" { |
| 167 | const allocator = testing.allocator; | 259 | const allocator = testing.allocator; |
| 168 | const data = try DisplayWidthData.init(allocator); | 260 | const dw = try DisplayWidth.init(allocator); |
| 169 | defer data.deinit(allocator); | 261 | defer dw.deinit(allocator); |
| 170 | const self = Self{ .data = &data }; | ||
| 171 | 262 | ||
| 172 | // Input and width both have odd length | 263 | // Input and width both have odd length |
| 173 | var centered = try self.center(allocator, "abc", 9, "*"); | 264 | var centered = try dw.center(allocator, "abc", 9, "*"); |
| 174 | try testing.expectEqualSlices(u8, "***abc***", centered); | 265 | try testing.expectEqualSlices(u8, "***abc***", centered); |
| 175 | 266 | ||
| 176 | // Input and width both have even length | 267 | // Input and width both have even length |
| 177 | testing.allocator.free(centered); | 268 | testing.allocator.free(centered); |
| 178 | centered = try self.center(allocator, "wπw", 10, "-"); | 269 | centered = try dw.center(allocator, "wπw", 10, "-"); |
| 179 | try testing.expectEqualSlices(u8, "---wπw---", centered); | 270 | try testing.expectEqualSlices(u8, "---wπw---", centered); |
| 180 | 271 | ||
| 181 | // Input has even length, width has odd length | 272 | // Input has even length, width has odd length |
| 182 | testing.allocator.free(centered); | 273 | testing.allocator.free(centered); |
| 183 | centered = try self.center(allocator, "1234", 9, "-"); | 274 | centered = try dw.center(allocator, "1234", 9, "-"); |
| 184 | try testing.expectEqualSlices(u8, "--1234---", centered); | 275 | try testing.expectEqualSlices(u8, "--1234---", centered); |
| 185 | 276 | ||
| 186 | // Input has odd length, width has even length | 277 | // Input has odd length, width has even length |
| 187 | testing.allocator.free(centered); | 278 | testing.allocator.free(centered); |
| 188 | centered = try self.center(allocator, "123", 8, "-"); | 279 | centered = try dw.center(allocator, "123", 8, "-"); |
| 189 | try testing.expectEqualSlices(u8, "--123---", centered); | 280 | try testing.expectEqualSlices(u8, "--123---", centered); |
| 190 | 281 | ||
| 191 | // Input is the same length as the width | 282 | // Input is the same length as the width |
| 192 | testing.allocator.free(centered); | 283 | testing.allocator.free(centered); |
| 193 | centered = try self.center(allocator, "123", 3, "-"); | 284 | centered = try dw.center(allocator, "123", 3, "-"); |
| 194 | try testing.expectEqualSlices(u8, "123", centered); | 285 | try testing.expectEqualSlices(u8, "123", centered); |
| 195 | 286 | ||
| 196 | // Input is empty | 287 | // Input is empty |
| 197 | testing.allocator.free(centered); | 288 | testing.allocator.free(centered); |
| 198 | centered = try self.center(allocator, "", 3, "-"); | 289 | centered = try dw.center(allocator, "", 3, "-"); |
| 199 | try testing.expectEqualSlices(u8, "---", centered); | 290 | try testing.expectEqualSlices(u8, "---", centered); |
| 200 | 291 | ||
| 201 | // Input is empty and width is zero | 292 | // Input is empty and width is zero |
| 202 | testing.allocator.free(centered); | 293 | testing.allocator.free(centered); |
| 203 | centered = try self.center(allocator, "", 0, "-"); | 294 | centered = try dw.center(allocator, "", 0, "-"); |
| 204 | try testing.expectEqualSlices(u8, "", centered); | 295 | try testing.expectEqualSlices(u8, "", centered); |
| 205 | 296 | ||
| 206 | // Input is longer than the width, which is an error | 297 | // Input is longer than the width, which is an error |
| 207 | testing.allocator.free(centered); | 298 | testing.allocator.free(centered); |
| 208 | try testing.expectError(error.StrTooLong, self.center(allocator, "123", 2, "-")); | 299 | try testing.expectError(error.StrTooLong, dw.center(allocator, "123", 2, "-")); |
| 209 | } | 300 | } |
| 210 | 301 | ||
| 211 | /// padLeft returns a new string of width `total_width` (in display cells) using `pad` as padding | 302 | /// padLeft returns a new string of width `total_width` (in display cells) using `pad` as padding |
| 212 | /// on the left side. Caller must free returned bytes with `allocator`. | 303 | /// on the left side. Caller must free returned bytes with `allocator`. |
| 213 | pub fn padLeft( | 304 | pub fn padLeft( |
| 214 | self: Self, | 305 | dw: DisplayWidth, |
| 215 | allocator: mem.Allocator, | 306 | allocator: mem.Allocator, |
| 216 | str: []const u8, | 307 | str: []const u8, |
| 217 | total_width: usize, | 308 | total_width: usize, |
| 218 | pad: []const u8, | 309 | pad: []const u8, |
| 219 | ) ![]u8 { | 310 | ) ![]u8 { |
| 220 | const str_width = self.strWidth(str); | 311 | const str_width = dw.strWidth(str); |
| 221 | if (str_width > total_width) return error.StrTooLong; | 312 | if (str_width > total_width) return error.StrTooLong; |
| 222 | 313 | ||
| 223 | const pad_width = self.strWidth(pad); | 314 | const pad_width = dw.strWidth(pad); |
| 224 | if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; | 315 | if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; |
| 225 | 316 | ||
| 226 | const margin_width = total_width - str_width; | 317 | const margin_width = total_width - str_width; |
| @@ -244,32 +335,31 @@ pub fn padLeft( | |||
| 244 | 335 | ||
| 245 | test "padLeft" { | 336 | test "padLeft" { |
| 246 | const allocator = testing.allocator; | 337 | const allocator = testing.allocator; |
| 247 | const data = try DisplayWidthData.init(allocator); | 338 | const dw = try DisplayWidth.init(allocator); |
| 248 | defer data.deinit(allocator); | 339 | defer dw.deinit(allocator); |
| 249 | const self = Self{ .data = &data }; | ||
| 250 | 340 | ||
| 251 | var right_aligned = try self.padLeft(allocator, "abc", 9, "*"); | 341 | var right_aligned = try dw.padLeft(allocator, "abc", 9, "*"); |
| 252 | defer testing.allocator.free(right_aligned); | 342 | defer testing.allocator.free(right_aligned); |
| 253 | try testing.expectEqualSlices(u8, "******abc", right_aligned); | 343 | try testing.expectEqualSlices(u8, "******abc", right_aligned); |
| 254 | 344 | ||
| 255 | testing.allocator.free(right_aligned); | 345 | testing.allocator.free(right_aligned); |
| 256 | right_aligned = try self.padLeft(allocator, "wπw", 10, "-"); | 346 | right_aligned = try dw.padLeft(allocator, "wπw", 10, "-"); |
| 257 | try testing.expectEqualSlices(u8, "------wπw", right_aligned); | 347 | try testing.expectEqualSlices(u8, "------wπw", right_aligned); |
| 258 | } | 348 | } |
| 259 | 349 | ||
| 260 | /// padRight returns a new string of width `total_width` (in display cells) using `pad` as padding | 350 | /// padRight returns a new string of width `total_width` (in display cells) using `pad` as padding |
| 261 | /// on the right side. Caller must free returned bytes with `allocator`. | 351 | /// on the right side. Caller must free returned bytes with `allocator`. |
| 262 | pub fn padRight( | 352 | pub fn padRight( |
| 263 | self: Self, | 353 | dw: DisplayWidth, |
| 264 | allocator: mem.Allocator, | 354 | allocator: mem.Allocator, |
| 265 | str: []const u8, | 355 | str: []const u8, |
| 266 | total_width: usize, | 356 | total_width: usize, |
| 267 | pad: []const u8, | 357 | pad: []const u8, |
| 268 | ) ![]u8 { | 358 | ) ![]u8 { |
| 269 | const str_width = self.strWidth(str); | 359 | const str_width = dw.strWidth(str); |
| 270 | if (str_width > total_width) return error.StrTooLong; | 360 | if (str_width > total_width) return error.StrTooLong; |
| 271 | 361 | ||
| 272 | const pad_width = self.strWidth(pad); | 362 | const pad_width = dw.strWidth(pad); |
| 273 | if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; | 363 | if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; |
| 274 | 364 | ||
| 275 | const margin_width = total_width - str_width; | 365 | const margin_width = total_width - str_width; |
| @@ -294,16 +384,15 @@ pub fn padRight( | |||
| 294 | 384 | ||
| 295 | test "padRight" { | 385 | test "padRight" { |
| 296 | const allocator = testing.allocator; | 386 | const allocator = testing.allocator; |
| 297 | const data = try DisplayWidthData.init(allocator); | 387 | const dw = try DisplayWidth.init(allocator); |
| 298 | defer data.deinit(allocator); | 388 | defer dw.deinit(allocator); |
| 299 | const self = Self{ .data = &data }; | ||
| 300 | 389 | ||
| 301 | var left_aligned = try self.padRight(allocator, "abc", 9, "*"); | 390 | var left_aligned = try dw.padRight(allocator, "abc", 9, "*"); |
| 302 | defer testing.allocator.free(left_aligned); | 391 | defer testing.allocator.free(left_aligned); |
| 303 | try testing.expectEqualSlices(u8, "abc******", left_aligned); | 392 | try testing.expectEqualSlices(u8, "abc******", left_aligned); |
| 304 | 393 | ||
| 305 | testing.allocator.free(left_aligned); | 394 | testing.allocator.free(left_aligned); |
| 306 | left_aligned = try self.padRight(allocator, "wπw", 10, "-"); | 395 | left_aligned = try dw.padRight(allocator, "wπw", 10, "-"); |
| 307 | try testing.expectEqualSlices(u8, "wπw------", left_aligned); | 396 | try testing.expectEqualSlices(u8, "wπw------", left_aligned); |
| 308 | } | 397 | } |
| 309 | 398 | ||
| @@ -311,7 +400,7 @@ test "padRight" { | |||
| 311 | /// `threshold` defines how far the last column of the last word can be | 400 | /// `threshold` defines how far the last column of the last word can be |
| 312 | /// from the edge. Caller must free returned bytes with `allocator`. | 401 | /// from the edge. Caller must free returned bytes with `allocator`. |
| 313 | pub fn wrap( | 402 | pub fn wrap( |
| 314 | self: Self, | 403 | dw: DisplayWidth, |
| 315 | allocator: mem.Allocator, | 404 | allocator: mem.Allocator, |
| 316 | str: []const u8, | 405 | str: []const u8, |
| 317 | columns: usize, | 406 | columns: usize, |
| @@ -329,7 +418,7 @@ pub fn wrap( | |||
| 329 | while (word_iter.next()) |word| { | 418 | while (word_iter.next()) |word| { |
| 330 | try result.appendSlice(word); | 419 | try result.appendSlice(word); |
| 331 | try result.append(' '); | 420 | try result.append(' '); |
| 332 | line_width += self.strWidth(word) + 1; | 421 | line_width += dw.strWidth(word) + 1; |
| 333 | 422 | ||
| 334 | if (line_width > columns or columns - line_width <= threshold) { | 423 | if (line_width > columns or columns - line_width <= threshold) { |
| 335 | try result.append('\n'); | 424 | try result.append('\n'); |
| @@ -347,12 +436,11 @@ pub fn wrap( | |||
| 347 | 436 | ||
| 348 | test "wrap" { | 437 | test "wrap" { |
| 349 | const allocator = testing.allocator; | 438 | const allocator = testing.allocator; |
| 350 | const data = try DisplayWidthData.init(allocator); | 439 | const dw = try DisplayWidth.init(allocator); |
| 351 | defer data.deinit(allocator); | 440 | defer dw.deinit(allocator); |
| 352 | const self = Self{ .data = &data }; | ||
| 353 | 441 | ||
| 354 | const input = "The quick brown fox\r\njumped over the lazy dog!"; | 442 | const input = "The quick brown fox\r\njumped over the lazy dog!"; |
| 355 | const got = try self.wrap(allocator, input, 10, 3); | 443 | const got = try dw.wrap(allocator, input, 10, 3); |
| 356 | defer testing.allocator.free(got); | 444 | defer testing.allocator.free(got); |
| 357 | const want = "The quick \nbrown fox \njumped \nover the \nlazy dog!"; | 445 | const want = "The quick \nbrown fox \njumped \nover the \nlazy dog!"; |
| 358 | try testing.expectEqualStrings(want, got); | 446 | try testing.expectEqualStrings(want, got); |
diff --git a/src/GraphemeData.zig b/src/GraphemeData.zig index 6d3174d..df025cb 100644 --- a/src/GraphemeData.zig +++ b/src/GraphemeData.zig | |||
| @@ -36,7 +36,7 @@ s3: []u8 = undefined, | |||
| 36 | 36 | ||
| 37 | const Self = @This(); | 37 | const Self = @This(); |
| 38 | 38 | ||
| 39 | pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { | 39 | pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { |
| 40 | const decompressor = compress.flate.inflate.decompressor; | 40 | const decompressor = compress.flate.inflate.decompressor; |
| 41 | const in_bytes = @embedFile("gbp"); | 41 | const in_bytes = @embedFile("gbp"); |
| 42 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 42 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| @@ -65,23 +65,23 @@ pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { | |||
| 65 | return self; | 65 | return self; |
| 66 | } | 66 | } |
| 67 | 67 | ||
| 68 | pub fn deinit(self: *const Self, allocator: mem.Allocator) void { | 68 | pub inline fn deinit(self: *const Self, allocator: mem.Allocator) void { |
| 69 | allocator.free(self.s1); | 69 | allocator.free(self.s1); |
| 70 | allocator.free(self.s2); | 70 | allocator.free(self.s2); |
| 71 | allocator.free(self.s3); | 71 | allocator.free(self.s3); |
| 72 | } | 72 | } |
| 73 | 73 | ||
| 74 | /// Lookup the grapheme break property for a code point. | 74 | /// Lookup the grapheme break property for a code point. |
| 75 | pub fn gbp(self: Self, cp: u21) Gbp { | 75 | pub inline fn gbp(self: Self, cp: u21) Gbp { |
| 76 | return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 4); | 76 | return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 4); |
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | /// Lookup the indic syllable type for a code point. | 79 | /// Lookup the indic syllable type for a code point. |
| 80 | pub fn indic(self: Self, cp: u21) Indic { | 80 | pub inline fn indic(self: Self, cp: u21) Indic { |
| 81 | return @enumFromInt((self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7); | 81 | return @enumFromInt((self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7); |
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | /// Lookup the indic syllable type for a code point. | 84 | /// Lookup the emoji property for a code point. |
| 85 | pub fn isEmoji(self: Self, cp: u21) bool { | 85 | pub inline fn isEmoji(self: Self, cp: u21) bool { |
| 86 | return self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1; | 86 | return self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1; |
| 87 | } | 87 | } |
diff --git a/src/Normalize.zig b/src/Normalize.zig index a28b708..b738b27 100644 --- a/src/Normalize.zig +++ b/src/Normalize.zig | |||
| @@ -177,7 +177,7 @@ test "decompose" { | |||
| 177 | const allocator = testing.allocator; | 177 | const allocator = testing.allocator; |
| 178 | var data: NormData = undefined; | 178 | var data: NormData = undefined; |
| 179 | try NormData.init(&data, allocator); | 179 | try NormData.init(&data, allocator); |
| 180 | defer data.deinit(); | 180 | defer data.deinit(allocator); |
| 181 | var n = Self{ .norm_data = &data }; | 181 | var n = Self{ .norm_data = &data }; |
| 182 | 182 | ||
| 183 | var buf: [18]u21 = undefined; | 183 | var buf: [18]u21 = undefined; |
| @@ -307,11 +307,11 @@ test "nfd ASCII / no-alloc" { | |||
| 307 | const allocator = testing.allocator; | 307 | const allocator = testing.allocator; |
| 308 | var data: NormData = undefined; | 308 | var data: NormData = undefined; |
| 309 | try NormData.init(&data, allocator); | 309 | try NormData.init(&data, allocator); |
| 310 | defer data.deinit(); | 310 | defer data.deinit(allocator); |
| 311 | const n = Self{ .norm_data = &data }; | 311 | const n = Self{ .norm_data = &data }; |
| 312 | 312 | ||
| 313 | const result = try n.nfd(allocator, "Hello World!"); | 313 | const result = try n.nfd(allocator, "Hello World!"); |
| 314 | defer result.deinit(); | 314 | defer result.deinit(allocator); |
| 315 | 315 | ||
| 316 | try testing.expectEqualStrings("Hello World!", result.slice); | 316 | try testing.expectEqualStrings("Hello World!", result.slice); |
| 317 | } | 317 | } |
| @@ -320,11 +320,11 @@ test "nfd !ASCII / alloc" { | |||
| 320 | const allocator = testing.allocator; | 320 | const allocator = testing.allocator; |
| 321 | var data: NormData = undefined; | 321 | var data: NormData = undefined; |
| 322 | try NormData.init(&data, allocator); | 322 | try NormData.init(&data, allocator); |
| 323 | defer data.deinit(); | 323 | defer data.deinit(allocator); |
| 324 | const n = Self{ .norm_data = &data }; | 324 | const n = Self{ .norm_data = &data }; |
| 325 | 325 | ||
| 326 | const result = try n.nfd(allocator, "HΓ©llo World! \u{3d3}"); | 326 | const result = try n.nfd(allocator, "HΓ©llo World! \u{3d3}"); |
| 327 | defer result.deinit(); | 327 | defer result.deinit(allocator); |
| 328 | 328 | ||
| 329 | try testing.expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", result.slice); | 329 | try testing.expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", result.slice); |
| 330 | } | 330 | } |
| @@ -333,11 +333,11 @@ test "nfkd ASCII / no-alloc" { | |||
| 333 | const allocator = testing.allocator; | 333 | const allocator = testing.allocator; |
| 334 | var data: NormData = undefined; | 334 | var data: NormData = undefined; |
| 335 | try NormData.init(&data, allocator); | 335 | try NormData.init(&data, allocator); |
| 336 | defer data.deinit(); | 336 | defer data.deinit(allocator); |
| 337 | const n = Self{ .norm_data = &data }; | 337 | const n = Self{ .norm_data = &data }; |
| 338 | 338 | ||
| 339 | const result = try n.nfkd(allocator, "Hello World!"); | 339 | const result = try n.nfkd(allocator, "Hello World!"); |
| 340 | defer result.deinit(); | 340 | defer result.deinit(allocator); |
| 341 | 341 | ||
| 342 | try testing.expectEqualStrings("Hello World!", result.slice); | 342 | try testing.expectEqualStrings("Hello World!", result.slice); |
| 343 | } | 343 | } |
| @@ -346,11 +346,11 @@ test "nfkd !ASCII / alloc" { | |||
| 346 | const allocator = testing.allocator; | 346 | const allocator = testing.allocator; |
| 347 | var data: NormData = undefined; | 347 | var data: NormData = undefined; |
| 348 | try NormData.init(&data, allocator); | 348 | try NormData.init(&data, allocator); |
| 349 | defer data.deinit(); | 349 | defer data.deinit(allocator); |
| 350 | const n = Self{ .norm_data = &data }; | 350 | const n = Self{ .norm_data = &data }; |
| 351 | 351 | ||
| 352 | const result = try n.nfkd(allocator, "HΓ©llo World! \u{3d3}"); | 352 | const result = try n.nfkd(allocator, "HΓ©llo World! \u{3d3}"); |
| 353 | defer result.deinit(); | 353 | defer result.deinit(allocator); |
| 354 | 354 | ||
| 355 | try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice); | 355 | try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice); |
| 356 | } | 356 | } |
| @@ -546,11 +546,11 @@ test "nfc" { | |||
| 546 | const allocator = testing.allocator; | 546 | const allocator = testing.allocator; |
| 547 | var data: NormData = undefined; | 547 | var data: NormData = undefined; |
| 548 | try NormData.init(&data, allocator); | 548 | try NormData.init(&data, allocator); |
| 549 | defer data.deinit(); | 549 | defer data.deinit(allocator); |
| 550 | const n = Self{ .norm_data = &data }; | 550 | const n = Self{ .norm_data = &data }; |
| 551 | 551 | ||
| 552 | const result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); | 552 | const result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); |
| 553 | defer result.deinit(); | 553 | defer result.deinit(allocator); |
| 554 | 554 | ||
| 555 | try testing.expectEqualStrings("Complex char: \u{3D3}", result.slice); | 555 | try testing.expectEqualStrings("Complex char: \u{3D3}", result.slice); |
| 556 | } | 556 | } |
| @@ -559,11 +559,11 @@ test "nfkc" { | |||
| 559 | const allocator = testing.allocator; | 559 | const allocator = testing.allocator; |
| 560 | var data: NormData = undefined; | 560 | var data: NormData = undefined; |
| 561 | try NormData.init(&data, allocator); | 561 | try NormData.init(&data, allocator); |
| 562 | defer data.deinit(); | 562 | defer data.deinit(allocator); |
| 563 | const n = Self{ .norm_data = &data }; | 563 | const n = Self{ .norm_data = &data }; |
| 564 | 564 | ||
| 565 | const result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); | 565 | const result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); |
| 566 | defer result.deinit(); | 566 | defer result.deinit(allocator); |
| 567 | 567 | ||
| 568 | try testing.expectEqualStrings("Complex char: \u{038E}", result.slice); | 568 | try testing.expectEqualStrings("Complex char: \u{038E}", result.slice); |
| 569 | } | 569 | } |
| @@ -582,7 +582,7 @@ test "eql" { | |||
| 582 | const allocator = testing.allocator; | 582 | const allocator = testing.allocator; |
| 583 | var data: NormData = undefined; | 583 | var data: NormData = undefined; |
| 584 | try NormData.init(&data, allocator); | 584 | try NormData.init(&data, allocator); |
| 585 | defer data.deinit(); | 585 | defer data.deinit(allocator); |
| 586 | const n = Self{ .norm_data = &data }; | 586 | const n = Self{ .norm_data = &data }; |
| 587 | 587 | ||
| 588 | try testing.expect(try n.eql(allocator, "foΓ©", "foe\u{0301}")); | 588 | try testing.expect(try n.eql(allocator, "foΓ©", "foe\u{0301}")); |
| @@ -628,5 +628,4 @@ test "isLatin1Only" { | |||
| 628 | try testing.expect(isLatin1Only(latin1_only)); | 628 | try testing.expect(isLatin1Only(latin1_only)); |
| 629 | const not_latin1_only = "HΓ©llo, World! \u{3d3}"; | 629 | const not_latin1_only = "HΓ©llo, World! \u{3d3}"; |
| 630 | try testing.expect(!isLatin1Only(not_latin1_only)); | 630 | try testing.expect(!isLatin1Only(not_latin1_only)); |
| 631 | try testing.expect(false); | ||
| 632 | } | 631 | } |
diff --git a/src/WidthData.zig b/src/WidthData.zig index b07a679..ca7eaf0 100644 --- a/src/WidthData.zig +++ b/src/WidthData.zig | |||
| @@ -4,15 +4,36 @@ const compress = std.compress; | |||
| 4 | const mem = std.mem; | 4 | const mem = std.mem; |
| 5 | const testing = std.testing; | 5 | const testing = std.testing; |
| 6 | 6 | ||
| 7 | const GraphemeData = @import("GraphemeData"); | 7 | const Graphemes = @import("Graphemes"); |
| 8 | 8 | ||
| 9 | g_data: GraphemeData, | 9 | g_data: Graphemes, |
| 10 | s1: []u16 = undefined, | 10 | s1: []u16 = undefined, |
| 11 | s2: []i4 = undefined, | 11 | s2: []i4 = undefined, |
| 12 | owns_gdata: bool, | ||
| 12 | 13 | ||
| 13 | const Self = @This(); | 14 | const Self = @This(); |
| 14 | 15 | ||
| 15 | pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { | 16 | pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { |
| 17 | var self: Self = try Self.setup(allocator); | ||
| 18 | errdefer { | ||
| 19 | allocator.free(self.s1); | ||
| 20 | allocator.free(self.s2); | ||
| 21 | } | ||
| 22 | self.owns_gdata = true; | ||
| 23 | self.g_data = try Graphemes.init(allocator); | ||
| 24 | errdefer self.g_data.deinit(allocator); | ||
| 25 | return self; | ||
| 26 | } | ||
| 27 | |||
| 28 | pub fn initWithGraphemeData(allocator: mem.Allocator, g_data: Graphemes) mem.Allocator.Error!Self { | ||
| 29 | var self = try Self.setup(allocator); | ||
| 30 | self.g_data = g_data; | ||
| 31 | self.owns_gdata = false; | ||
| 32 | return self; | ||
| 33 | } | ||
| 34 | |||
| 35 | // Sets up the DisplayWidthData, leaving the GraphemeData undefined. | ||
| 36 | fn setup(allocator: mem.Allocator) mem.Allocator.Error!Self { | ||
| 16 | const decompressor = compress.flate.inflate.decompressor; | 37 | const decompressor = compress.flate.inflate.decompressor; |
| 17 | const in_bytes = @embedFile("dwp"); | 38 | const in_bytes = @embedFile("dwp"); |
| 18 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 39 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| @@ -21,10 +42,7 @@ pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { | |||
| 21 | 42 | ||
| 22 | const endian = builtin.cpu.arch.endian(); | 43 | const endian = builtin.cpu.arch.endian(); |
| 23 | 44 | ||
| 24 | var self = Self{ | 45 | var self: Self = undefined; |
| 25 | .g_data = try GraphemeData.init(allocator), | ||
| 26 | }; | ||
| 27 | errdefer self.g_data.deinit(allocator); | ||
| 28 | 46 | ||
| 29 | const stage_1_len: u16 = reader.readInt(u16, endian) catch unreachable; | 47 | const stage_1_len: u16 = reader.readInt(u16, endian) catch unreachable; |
| 30 | self.s1 = try allocator.alloc(u16, stage_1_len); | 48 | self.s1 = try allocator.alloc(u16, stage_1_len); |
| @@ -42,7 +60,7 @@ pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { | |||
| 42 | pub fn deinit(self: *const Self, allocator: mem.Allocator) void { | 60 | pub fn deinit(self: *const Self, allocator: mem.Allocator) void { |
| 43 | allocator.free(self.s1); | 61 | allocator.free(self.s1); |
| 44 | allocator.free(self.s2); | 62 | allocator.free(self.s2); |
| 45 | self.g_data.deinit(allocator); | 63 | if (self.owns_gdata) self.g_data.deinit(allocator); |
| 46 | } | 64 | } |
| 47 | 65 | ||
| 48 | /// codePointWidth returns the number of cells `cp` requires when rendered | 66 | /// codePointWidth returns the number of cells `cp` requires when rendered |
diff --git a/src/grapheme.zig b/src/grapheme.zig index 25fd71d..79cd2c6 100644 --- a/src/grapheme.zig +++ b/src/grapheme.zig | |||
| @@ -1,10 +1,99 @@ | |||
| 1 | const std = @import("std"); | 1 | const std = @import("std"); |
| 2 | const builtin = @import("builtin"); | ||
| 2 | const mem = std.mem; | 3 | const mem = std.mem; |
| 4 | const Allocator = mem.Allocator; | ||
| 5 | const compress = std.compress; | ||
| 3 | const unicode = std.unicode; | 6 | const unicode = std.unicode; |
| 4 | 7 | ||
| 5 | const CodePoint = @import("code_point").CodePoint; | 8 | const CodePoint = @import("code_point").CodePoint; |
| 6 | const CodePointIterator = @import("code_point").Iterator; | 9 | const CodePointIterator = @import("code_point").Iterator; |
| 7 | pub const GraphemeData = @import("GraphemeData"); | 10 | |
| 11 | s1: []u16 = undefined, | ||
| 12 | s2: []u16 = undefined, | ||
| 13 | s3: []u8 = undefined, | ||
| 14 | |||
| 15 | const Graphemes = @This(); | ||
| 16 | |||
| 17 | pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Graphemes { | ||
| 18 | const decompressor = compress.flate.inflate.decompressor; | ||
| 19 | const in_bytes = @embedFile("gbp"); | ||
| 20 | var in_fbs = std.io.fixedBufferStream(in_bytes); | ||
| 21 | var in_decomp = decompressor(.raw, in_fbs.reader()); | ||
| 22 | var reader = in_decomp.reader(); | ||
| 23 | |||
| 24 | const endian = builtin.cpu.arch.endian(); | ||
| 25 | |||
| 26 | var self = Graphemes{}; | ||
| 27 | |||
| 28 | const s1_len: u16 = reader.readInt(u16, endian) catch unreachable; | ||
| 29 | self.s1 = try allocator.alloc(u16, s1_len); | ||
| 30 | errdefer allocator.free(self.s1); | ||
| 31 | for (0..s1_len) |i| self.s1[i] = reader.readInt(u16, endian) catch unreachable; | ||
| 32 | |||
| 33 | const s2_len: u16 = reader.readInt(u16, endian) catch unreachable; | ||
| 34 | self.s2 = try allocator.alloc(u16, s2_len); | ||
| 35 | errdefer allocator.free(self.s2); | ||
| 36 | for (0..s2_len) |i| self.s2[i] = reader.readInt(u16, endian) catch unreachable; | ||
| 37 | |||
| 38 | const s3_len: u16 = reader.readInt(u16, endian) catch unreachable; | ||
| 39 | self.s3 = try allocator.alloc(u8, s3_len); | ||
| 40 | errdefer allocator.free(self.s3); | ||
| 41 | _ = reader.readAll(self.s3) catch unreachable; | ||
| 42 | |||
| 43 | return self; | ||
| 44 | } | ||
| 45 | |||
| 46 | pub fn deinit(graphemes: *const Graphemes, allocator: mem.Allocator) void { | ||
| 47 | allocator.free(graphemes.s1); | ||
| 48 | allocator.free(graphemes.s2); | ||
| 49 | allocator.free(graphemes.s3); | ||
| 50 | } | ||
| 51 | |||
| 52 | /// Lookup the grapheme break property for a code point. | ||
| 53 | pub fn gbp(graphemes: Graphemes, cp: u21) Gbp { | ||
| 54 | return @enumFromInt(graphemes.s3[graphemes.s2[graphemes.s1[cp >> 8] + (cp & 0xff)]] >> 4); | ||
| 55 | } | ||
| 56 | |||
| 57 | /// Lookup the indic syllable type for a code point. | ||
| 58 | pub fn indic(graphemes: Graphemes, cp: u21) Indic { | ||
| 59 | return @enumFromInt((graphemes.s3[graphemes.s2[graphemes.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7); | ||
| 60 | } | ||
| 61 | |||
| 62 | /// Lookup the emoji property for a code point. | ||
| 63 | pub fn isEmoji(graphemes: Graphemes, cp: u21) bool { | ||
| 64 | return graphemes.s3[graphemes.s2[graphemes.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1; | ||
| 65 | } | ||
| 66 | |||
| 67 | pub fn iterator(graphemes: *const Graphemes, string: []const u8) Iterator { | ||
| 68 | return Iterator.init(string, graphemes); | ||
| 69 | } | ||
| 70 | |||
| 71 | /// Indic syllable type. | ||
| 72 | pub const Indic = enum { | ||
| 73 | none, | ||
| 74 | |||
| 75 | Consonant, | ||
| 76 | Extend, | ||
| 77 | Linker, | ||
| 78 | }; | ||
| 79 | |||
| 80 | /// Grapheme break property. | ||
| 81 | pub const Gbp = enum { | ||
| 82 | none, | ||
| 83 | Control, | ||
| 84 | CR, | ||
| 85 | Extend, | ||
| 86 | L, | ||
| 87 | LF, | ||
| 88 | LV, | ||
| 89 | LVT, | ||
| 90 | Prepend, | ||
| 91 | Regional_Indicator, | ||
| 92 | SpacingMark, | ||
| 93 | T, | ||
| 94 | V, | ||
| 95 | ZWJ, | ||
| 96 | }; | ||
| 8 | 97 | ||
| 9 | /// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes. | 98 | /// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes. |
| 10 | pub const Grapheme = struct { | 99 | pub const Grapheme = struct { |
| @@ -22,12 +111,12 @@ pub const Grapheme = struct { | |||
| 22 | pub const Iterator = struct { | 111 | pub const Iterator = struct { |
| 23 | buf: [2]?CodePoint = .{ null, null }, | 112 | buf: [2]?CodePoint = .{ null, null }, |
| 24 | cp_iter: CodePointIterator, | 113 | cp_iter: CodePointIterator, |
| 25 | data: *const GraphemeData, | 114 | data: *const Graphemes, |
| 26 | 115 | ||
| 27 | const Self = @This(); | 116 | const Self = @This(); |
| 28 | 117 | ||
| 29 | /// Assumes `src` is valid UTF-8. | 118 | /// Assumes `src` is valid UTF-8. |
| 30 | pub fn init(str: []const u8, data: *const GraphemeData) Self { | 119 | pub fn init(str: []const u8, data: *const Graphemes) Self { |
| 31 | var self = Self{ .cp_iter = .{ .bytes = str }, .data = data }; | 120 | var self = Self{ .cp_iter = .{ .bytes = str }, .data = data }; |
| 32 | self.advance(); | 121 | self.advance(); |
| 33 | return self; | 122 | return self; |
| @@ -149,7 +238,7 @@ pub const Iterator = struct { | |||
| 149 | }; | 238 | }; |
| 150 | 239 | ||
| 151 | // Predicates | 240 | // Predicates |
| 152 | fn isBreaker(cp: u21, data: *const GraphemeData) bool { | 241 | fn isBreaker(cp: u21, data: *const Graphemes) bool { |
| 153 | // Extract relevant properties. | 242 | // Extract relevant properties. |
| 154 | const cp_gbp_prop = data.gbp(cp); | 243 | const cp_gbp_prop = data.gbp(cp); |
| 155 | return cp == '\x0d' or cp == '\x0a' or cp_gbp_prop == .Control; | 244 | return cp == '\x0d' or cp == '\x0a' or cp_gbp_prop == .Control; |
| @@ -202,7 +291,7 @@ pub const State = struct { | |||
| 202 | pub fn graphemeBreak( | 291 | pub fn graphemeBreak( |
| 203 | cp1: u21, | 292 | cp1: u21, |
| 204 | cp2: u21, | 293 | cp2: u21, |
| 205 | data: *const GraphemeData, | 294 | data: *const Graphemes, |
| 206 | state: *State, | 295 | state: *State, |
| 207 | ) bool { | 296 | ) bool { |
| 208 | // Extract relevant properties. | 297 | // Extract relevant properties. |
| @@ -306,25 +395,25 @@ test "Segmentation ZWJ and ZWSP emoji sequences" { | |||
| 306 | const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2; | 395 | const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2; |
| 307 | const no_joiner = seq_1 ++ seq_2; | 396 | const no_joiner = seq_1 ++ seq_2; |
| 308 | 397 | ||
| 309 | const data = try GraphemeData.init(std.testing.allocator); | 398 | const graphemes = try Graphemes.init(std.testing.allocator); |
| 310 | defer data.deinit(std.testing.allocator); | 399 | defer graphemes.deinit(std.testing.allocator); |
| 311 | 400 | ||
| 312 | { | 401 | { |
| 313 | var iter = Iterator.init(with_zwj, &data); | 402 | var iter = graphemes.iterator(with_zwj); |
| 314 | var i: usize = 0; | 403 | var i: usize = 0; |
| 315 | while (iter.next()) |_| : (i += 1) {} | 404 | while (iter.next()) |_| : (i += 1) {} |
| 316 | try std.testing.expectEqual(@as(usize, 1), i); | 405 | try std.testing.expectEqual(@as(usize, 1), i); |
| 317 | } | 406 | } |
| 318 | 407 | ||
| 319 | { | 408 | { |
| 320 | var iter = Iterator.init(with_zwsp, &data); | 409 | var iter = graphemes.iterator(with_zwsp); |
| 321 | var i: usize = 0; | 410 | var i: usize = 0; |
| 322 | while (iter.next()) |_| : (i += 1) {} | 411 | while (iter.next()) |_| : (i += 1) {} |
| 323 | try std.testing.expectEqual(@as(usize, 3), i); | 412 | try std.testing.expectEqual(@as(usize, 3), i); |
| 324 | } | 413 | } |
| 325 | 414 | ||
| 326 | { | 415 | { |
| 327 | var iter = Iterator.init(no_joiner, &data); | 416 | var iter = graphemes.iterator(no_joiner); |
| 328 | var i: usize = 0; | 417 | var i: usize = 0; |
| 329 | while (iter.next()) |_| : (i += 1) {} | 418 | while (iter.next()) |_| : (i += 1) {} |
| 330 | try std.testing.expectEqual(@as(usize, 2), i); | 419 | try std.testing.expectEqual(@as(usize, 2), i); |
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig index 7236ff6..de1b9ec 100644 --- a/src/unicode_tests.zig +++ b/src/unicode_tests.zig | |||
| @@ -9,7 +9,7 @@ const unicode = std.unicode; | |||
| 9 | 9 | ||
| 10 | const grapheme = @import("grapheme"); | 10 | const grapheme = @import("grapheme"); |
| 11 | const Grapheme = @import("grapheme").Grapheme; | 11 | const Grapheme = @import("grapheme").Grapheme; |
| 12 | const GraphemeData = @import("grapheme").GraphemeData; | 12 | const Graphemes = @import("grapheme"); |
| 13 | const GraphemeIterator = @import("grapheme").Iterator; | 13 | const GraphemeIterator = @import("grapheme").Iterator; |
| 14 | const Normalize = @import("Normalize"); | 14 | const Normalize = @import("Normalize"); |
| 15 | 15 | ||
| @@ -18,10 +18,10 @@ comptime { | |||
| 18 | } | 18 | } |
| 19 | test "Iterator.peek" { | 19 | test "Iterator.peek" { |
| 20 | const peek_seq = "aΞπ¨π»βπΎβ"; | 20 | const peek_seq = "aΞπ¨π»βπΎβ"; |
| 21 | const data = try GraphemeData.init(std.testing.allocator); | 21 | const data = try Graphemes.init(std.testing.allocator); |
| 22 | defer data.deinit(std.testing.allocator); | 22 | defer data.deinit(std.testing.allocator); |
| 23 | 23 | ||
| 24 | var iter = grapheme.Iterator.init(peek_seq, &data); | 24 | var iter = data.iterator(peek_seq); |
| 25 | const peek_a = iter.peek().?; | 25 | const peek_a = iter.peek().?; |
| 26 | const next_a = iter.next().?; | 26 | const next_a = iter.next().?; |
| 27 | try std.testing.expectEqual(peek_a, next_a); | 27 | try std.testing.expectEqual(peek_a, next_a); |
| @@ -162,7 +162,7 @@ test "Segmentation GraphemeIterator" { | |||
| 162 | var buf_reader = std.io.bufferedReader(file.reader()); | 162 | var buf_reader = std.io.bufferedReader(file.reader()); |
| 163 | var input_stream = buf_reader.reader(); | 163 | var input_stream = buf_reader.reader(); |
| 164 | 164 | ||
| 165 | const data = try GraphemeData.init(allocator); | 165 | const data = try Graphemes.init(allocator); |
| 166 | defer data.deinit(allocator); | 166 | defer data.deinit(allocator); |
| 167 | 167 | ||
| 168 | var buf: [4096]u8 = undefined; | 168 | var buf: [4096]u8 = undefined; |
| @@ -207,7 +207,7 @@ test "Segmentation GraphemeIterator" { | |||
| 207 | } | 207 | } |
| 208 | 208 | ||
| 209 | // std.debug.print("\nline {}: {s}\n", .{ line_no, all_bytes.items }); | 209 | // std.debug.print("\nline {}: {s}\n", .{ line_no, all_bytes.items }); |
| 210 | var iter = GraphemeIterator.init(all_bytes.items, &data); | 210 | var iter = data.iterator(all_bytes.items); |
| 211 | 211 | ||
| 212 | // Chaeck. | 212 | // Chaeck. |
| 213 | for (want.items) |want_gc| { | 213 | for (want.items) |want_gc| { |