summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/DisplayWidth.zig240
-rw-r--r--src/GraphemeData.zig12
-rw-r--r--src/Normalize.zig29
-rw-r--r--src/WidthData.zig32
-rw-r--r--src/grapheme.zig109
-rw-r--r--src/unicode_tests.zig10
6 files changed, 313 insertions, 119 deletions
diff --git a/src/DisplayWidth.zig b/src/DisplayWidth.zig
index 8631bd4..11ec59e 100644
--- a/src/DisplayWidth.zig
+++ b/src/DisplayWidth.zig
@@ -2,38 +2,131 @@ const std = @import("std");
2const builtin = @import("builtin"); 2const builtin = @import("builtin");
3const options = @import("options"); 3const options = @import("options");
4const ArrayList = std.ArrayList; 4const ArrayList = std.ArrayList;
5const compress = std.compress;
5const mem = std.mem; 6const mem = std.mem;
6const simd = std.simd; 7const simd = std.simd;
7const testing = std.testing; 8const testing = std.testing;
8 9
9const ascii = @import("ascii"); 10const ascii = @import("ascii");
10const CodePointIterator = @import("code_point").Iterator; 11const CodePointIterator = @import("code_point").Iterator;
11const GraphemeIterator = @import("grapheme").Iterator;
12pub const DisplayWidthData = @import("DisplayWidthData"); 12pub const DisplayWidthData = @import("DisplayWidthData");
13 13
14data: *const DisplayWidthData, 14const Graphemes = @import("Graphemes");
15 15
16const Self = @This(); 16g_data: Graphemes,
17s1: []u16 = undefined,
18s2: []i4 = undefined,
19owns_gdata: bool,
20
21const DisplayWidth = @This();
22
23pub fn init(allocator: mem.Allocator) mem.Allocator.Error!DisplayWidth {
24 var dw: DisplayWidth = try DisplayWidth.setup(allocator);
25 errdefer {
26 allocator.free(dw.s1);
27 allocator.free(dw.s2);
28 }
29 dw.owns_gdata = true;
30 dw.g_data = try Graphemes.init(allocator);
31 errdefer dw.g_data.deinit(allocator);
32 return dw;
33}
34
35pub fn initWithGraphemeData(allocator: mem.Allocator, g_data: Graphemes) mem.Allocator.Error!DisplayWidth {
36 var dw = try DisplayWidth.setup(allocator);
37 dw.g_data = g_data;
38 dw.owns_gdata = false;
39 return dw;
40}
41
42// Sets up the DisplayWidthData, leaving the GraphemeData undefined.
43fn setup(allocator: mem.Allocator) mem.Allocator.Error!DisplayWidth {
44 const decompressor = compress.flate.inflate.decompressor;
45 const in_bytes = @embedFile("dwp");
46 var in_fbs = std.io.fixedBufferStream(in_bytes);
47 var in_decomp = decompressor(.raw, in_fbs.reader());
48 var reader = in_decomp.reader();
49
50 const endian = builtin.cpu.arch.endian();
51
52 var dw: DisplayWidth = undefined;
53
54 const stage_1_len: u16 = reader.readInt(u16, endian) catch unreachable;
55 dw.s1 = try allocator.alloc(u16, stage_1_len);
56 errdefer allocator.free(dw.s1);
57 for (0..stage_1_len) |i| dw.s1[i] = reader.readInt(u16, endian) catch unreachable;
58
59 const stage_2_len: u16 = reader.readInt(u16, endian) catch unreachable;
60 dw.s2 = try allocator.alloc(i4, stage_2_len);
61 errdefer allocator.free(dw.s2);
62 for (0..stage_2_len) |i| dw.s2[i] = @intCast(reader.readInt(i8, endian) catch unreachable);
63
64 return dw;
65}
66
67pub fn deinit(dw: *const DisplayWidth, allocator: mem.Allocator) void {
68 allocator.free(dw.s1);
69 allocator.free(dw.s2);
70 if (dw.owns_gdata) dw.g_data.deinit(allocator);
71}
72
73/// codePointWidth returns the number of cells `cp` requires when rendered
74/// in a fixed-pitch font (i.e. a terminal screen). This can range from -1 to
75/// 3, where BACKSPACE and DELETE return -1 and 3-em-dash returns 3. C0/C1
76/// control codes return 0. If `cjk` is true, ambiguous code points return 2,
77/// otherwise they return 1.
78pub fn codePointWidth(dw: DisplayWidth, cp: u21) i4 {
79 return dw.s2[dw.s1[cp >> 8] + (cp & 0xff)];
80}
81
82test "codePointWidth" {
83 const dw = try DisplayWidth.init(std.testing.allocator);
84 defer dw.deinit(std.testing.allocator);
85 try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x0000)); // null
86 try testing.expectEqual(@as(i4, -1), dw.codePointWidth(0x8)); // \b
87 try testing.expectEqual(@as(i4, -1), dw.codePointWidth(0x7f)); // DEL
88 try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x0005)); // Cf
89 try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x0007)); // \a BEL
90 try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000A)); // \n LF
91 try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000B)); // \v VT
92 try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000C)); // \f FF
93 try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000D)); // \r CR
94 try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000E)); // SQ
95 try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000F)); // SI
96
97 try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x070F)); // Cf
98 try testing.expectEqual(@as(i4, 1), dw.codePointWidth(0x0603)); // Cf Arabic
99
100 try testing.expectEqual(@as(i4, 1), dw.codePointWidth(0x00AD)); // soft-hyphen
101 try testing.expectEqual(@as(i4, 2), dw.codePointWidth(0x2E3A)); // two-em dash
102 try testing.expectEqual(@as(i4, 3), dw.codePointWidth(0x2E3B)); // three-em dash
103
104 try testing.expectEqual(@as(i4, 1), dw.codePointWidth(0x00BD)); // ambiguous halfwidth
105
106 try testing.expectEqual(@as(i4, 1), dw.codePointWidth('Γ©'));
107 try testing.expectEqual(@as(i4, 2), dw.codePointWidth('😊'));
108 try testing.expectEqual(@as(i4, 2), dw.codePointWidth('统'));
109}
17 110
18/// strWidth returns the total display width of `str` as the number of cells 111/// strWidth returns the total display width of `str` as the number of cells
19/// required in a fixed-pitch font (i.e. a terminal screen). 112/// required in a fixed-pitch font (i.e. a terminal screen).
20pub fn strWidth(self: Self, str: []const u8) usize { 113pub fn strWidth(dw: DisplayWidth, str: []const u8) usize {
21 var total: isize = 0; 114 var total: isize = 0;
22 115
23 // ASCII fast path 116 // ASCII fast path
24 if (ascii.isAsciiOnly(str)) { 117 if (ascii.isAsciiOnly(str)) {
25 for (str) |b| total += self.data.codePointWidth(b); 118 for (str) |b| total += dw.codePointWidth(b);
26 return @intCast(@max(0, total)); 119 return @intCast(@max(0, total));
27 } 120 }
28 121
29 var giter = GraphemeIterator.init(str, &self.data.g_data); 122 var giter = dw.g_data.iterator(str);
30 123
31 while (giter.next()) |gc| { 124 while (giter.next()) |gc| {
32 var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) }; 125 var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) };
33 var gc_total: isize = 0; 126 var gc_total: isize = 0;
34 127
35 while (cp_iter.next()) |cp| { 128 while (cp_iter.next()) |cp| {
36 var w = self.data.codePointWidth(cp.code); 129 var w = dw.codePointWidth(cp.code);
37 130
38 if (w != 0) { 131 if (w != 0) {
39 // Handle text emoji sequence. 132 // Handle text emoji sequence.
@@ -58,41 +151,40 @@ pub fn strWidth(self: Self, str: []const u8) usize {
58} 151}
59 152
60test "strWidth" { 153test "strWidth" {
61 const data = try DisplayWidthData.init(testing.allocator); 154 const dw = try DisplayWidth.init(testing.allocator);
62 defer data.deinit(testing.allocator); 155 defer dw.deinit(testing.allocator);
63 const self = Self{ .data = &data };
64 const c0 = options.c0_width orelse 0; 156 const c0 = options.c0_width orelse 0;
65 157
66 try testing.expectEqual(@as(usize, 5), self.strWidth("Hello\r\n")); 158 try testing.expectEqual(@as(usize, 5), dw.strWidth("Hello\r\n"));
67 try testing.expectEqual(@as(usize, 1), self.strWidth("\u{0065}\u{0301}")); 159 try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{0065}\u{0301}"));
68 try testing.expectEqual(@as(usize, 2), self.strWidth("\u{1F476}\u{1F3FF}\u{0308}\u{200D}\u{1F476}\u{1F3FF}")); 160 try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{1F476}\u{1F3FF}\u{0308}\u{200D}\u{1F476}\u{1F3FF}"));
69 try testing.expectEqual(@as(usize, 8), self.strWidth("Hello 😊")); 161 try testing.expectEqual(@as(usize, 8), dw.strWidth("Hello 😊"));
70 try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo 😊")); 162 try testing.expectEqual(@as(usize, 8), dw.strWidth("Héllo 😊"));
71 try testing.expectEqual(@as(usize, 8), self.strWidth("HΓ©llo :)")); 163 try testing.expectEqual(@as(usize, 8), dw.strWidth("HΓ©llo :)"));
72 try testing.expectEqual(@as(usize, 8), self.strWidth("HΓ©llo πŸ‡ͺπŸ‡Έ")); 164 try testing.expectEqual(@as(usize, 8), dw.strWidth("HΓ©llo πŸ‡ͺπŸ‡Έ"));
73 try testing.expectEqual(@as(usize, 2), self.strWidth("\u{26A1}")); // Lone emoji 165 try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{26A1}")); // Lone emoji
74 try testing.expectEqual(@as(usize, 1), self.strWidth("\u{26A1}\u{FE0E}")); // Text sequence 166 try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{26A1}\u{FE0E}")); // Text sequence
75 try testing.expectEqual(@as(usize, 2), self.strWidth("\u{26A1}\u{FE0F}")); // Presentation sequence 167 try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{26A1}\u{FE0F}")); // Presentation sequence
76 try testing.expectEqual(@as(usize, 1), self.strWidth("\u{2764}")); // Default text presentation 168 try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{2764}")); // Default text presentation
77 try testing.expectEqual(@as(usize, 1), self.strWidth("\u{2764}\u{FE0E}")); // Default text presentation with VS15 selector 169 try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{2764}\u{FE0E}")); // Default text presentation with VS15 selector
78 try testing.expectEqual(@as(usize, 2), self.strWidth("\u{2764}\u{FE0F}")); // Default text presentation with VS16 selector 170 try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{2764}\u{FE0F}")); // Default text presentation with VS16 selector
79 const expect_bs: usize = if (c0 == 0) 0 else 1 + c0; 171 const expect_bs: usize = if (c0 == 0) 0 else 1 + c0;
80 try testing.expectEqual(expect_bs, self.strWidth("A\x08")); // Backspace 172 try testing.expectEqual(expect_bs, dw.strWidth("A\x08")); // Backspace
81 try testing.expectEqual(expect_bs, self.strWidth("\x7FA")); // DEL 173 try testing.expectEqual(expect_bs, dw.strWidth("\x7FA")); // DEL
82 const expect_long_del: usize = if (c0 == 0) 0 else 1 + (c0 * 3); 174 const expect_long_del: usize = if (c0 == 0) 0 else 1 + (c0 * 3);
83 try testing.expectEqual(expect_long_del, self.strWidth("\x7FA\x08\x08")); // never less than 0 175 try testing.expectEqual(expect_long_del, dw.strWidth("\x7FA\x08\x08")); // never less than 0
84 176
85 // wcwidth Python lib tests. See: https://github.com/jquast/wcwidth/blob/master/tests/test_core.py 177 // wcwidth Python lib tests. See: https://github.com/jquast/wcwidth/blob/master/tests/test_core.py
86 const empty = ""; 178 const empty = "";
87 try testing.expectEqual(@as(usize, 0), self.strWidth(empty)); 179 try testing.expectEqual(@as(usize, 0), dw.strWidth(empty));
88 const with_null = "hello\x00world"; 180 const with_null = "hello\x00world";
89 try testing.expectEqual(@as(usize, 10 + c0), self.strWidth(with_null)); 181 try testing.expectEqual(@as(usize, 10 + c0), dw.strWidth(with_null));
90 const hello_jp = "コンニチハ, γ‚»γ‚«γ‚€!"; 182 const hello_jp = "コンニチハ, γ‚»γ‚«γ‚€!";
91 try testing.expectEqual(@as(usize, 19), self.strWidth(hello_jp)); 183 try testing.expectEqual(@as(usize, 19), dw.strWidth(hello_jp));
92 const control = "\x1b[0m"; 184 const control = "\x1b[0m";
93 try testing.expectEqual(@as(usize, 3 + c0), self.strWidth(control)); 185 try testing.expectEqual(@as(usize, 3 + c0), dw.strWidth(control));
94 const balinese = "\u{1B13}\u{1B28}\u{1B2E}\u{1B44}"; 186 const balinese = "\u{1B13}\u{1B28}\u{1B2E}\u{1B44}";
95 try testing.expectEqual(@as(usize, 3), self.strWidth(balinese)); 187 try testing.expectEqual(@as(usize, 3), dw.strWidth(balinese));
96 188
97 // These commented out tests require a new specification for complex scripts. 189 // These commented out tests require a new specification for complex scripts.
98 // See: https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf 190 // See: https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
@@ -106,17 +198,17 @@ test "strWidth" {
106 // try testing.expectEqual(@as(usize, 3), strWidth(kannada_1)); 198 // try testing.expectEqual(@as(usize, 3), strWidth(kannada_1));
107 // The following passes but as a mere coincidence. 199 // The following passes but as a mere coincidence.
108 const kannada_2 = "\u{0cb0}\u{0cbc}\u{0ccd}\u{0c9a}"; 200 const kannada_2 = "\u{0cb0}\u{0cbc}\u{0ccd}\u{0c9a}";
109 try testing.expectEqual(@as(usize, 2), self.strWidth(kannada_2)); 201 try testing.expectEqual(@as(usize, 2), dw.strWidth(kannada_2));
110 202
111 // From Rust https://github.com/jameslanska/unicode-display-width 203 // From Rust https://github.com/jameslanska/unicode-display-width
112 try testing.expectEqual(@as(usize, 15), self.strWidth("πŸ”₯πŸ—‘πŸ©πŸ‘©πŸ»β€πŸš€β°πŸ’ƒπŸΌπŸ”¦πŸ‘πŸ»")); 204 try testing.expectEqual(@as(usize, 15), dw.strWidth("πŸ”₯πŸ—‘πŸ©πŸ‘©πŸ»β€πŸš€β°πŸ’ƒπŸΌπŸ”¦πŸ‘πŸ»"));
113 try testing.expectEqual(@as(usize, 2), self.strWidth("πŸ¦€")); 205 try testing.expectEqual(@as(usize, 2), dw.strWidth("πŸ¦€"));
114 try testing.expectEqual(@as(usize, 2), self.strWidth("πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘§")); 206 try testing.expectEqual(@as(usize, 2), dw.strWidth("πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘§"));
115 try testing.expectEqual(@as(usize, 2), self.strWidth("πŸ‘©β€πŸ”¬")); 207 try testing.expectEqual(@as(usize, 2), dw.strWidth("πŸ‘©β€πŸ”¬"));
116 try testing.expectEqual(@as(usize, 9), self.strWidth("sane text")); 208 try testing.expectEqual(@as(usize, 9), dw.strWidth("sane text"));
117 try testing.expectEqual(@as(usize, 9), self.strWidth("αΊ’ΜŒΓ‘Μ²lΝ”ΜΜžΜ„Μ‘ΝŒgΜ–Μ˜Μ˜Μ”Μ”Ν’ΝžΝoΜͺΜ”TΜ’Μ™Μ«ΜˆΜΝžeΜ¬ΝˆΝ•ΝŒΜΝ‘x̺̍ṭ̓̓ͅ")); 209 try testing.expectEqual(@as(usize, 9), dw.strWidth("αΊ’ΜŒΓ‘Μ²lΝ”ΜΜžΜ„Μ‘ΝŒgΜ–Μ˜Μ˜Μ”Μ”Ν’ΝžΝoΜͺΜ”TΜ’Μ™Μ«ΜˆΜΝžeΜ¬ΝˆΝ•ΝŒΜΝ‘x̺̍ṭ̓̓ͅ"));
118 try testing.expectEqual(@as(usize, 17), self.strWidth("μŠ¬λΌλ°” μš°ν¬λΌμ΄λ‚˜")); 210 try testing.expectEqual(@as(usize, 17), dw.strWidth("μŠ¬λΌλ°” μš°ν¬λΌμ΄λ‚˜"));
119 try testing.expectEqual(@as(usize, 1), self.strWidth("\u{378}")); 211 try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{378}"));
120} 212}
121 213
122/// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding. 214/// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding.
@@ -124,17 +216,17 @@ test "strWidth" {
124/// receive one additional pad. This makes sure the returned string fills the requested width. 216/// receive one additional pad. This makes sure the returned string fills the requested width.
125/// Caller must free returned bytes with `allocator`. 217/// Caller must free returned bytes with `allocator`.
126pub fn center( 218pub fn center(
127 self: Self, 219 dw: DisplayWidth,
128 allocator: mem.Allocator, 220 allocator: mem.Allocator,
129 str: []const u8, 221 str: []const u8,
130 total_width: usize, 222 total_width: usize,
131 pad: []const u8, 223 pad: []const u8,
132) ![]u8 { 224) ![]u8 {
133 const str_width = self.strWidth(str); 225 const str_width = dw.strWidth(str);
134 if (str_width > total_width) return error.StrTooLong; 226 if (str_width > total_width) return error.StrTooLong;
135 if (str_width == total_width) return try allocator.dupe(u8, str); 227 if (str_width == total_width) return try allocator.dupe(u8, str);
136 228
137 const pad_width = self.strWidth(pad); 229 const pad_width = dw.strWidth(pad);
138 if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; 230 if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
139 231
140 const margin_width = @divFloor((total_width - str_width), 2); 232 const margin_width = @divFloor((total_width - str_width), 2);
@@ -165,62 +257,61 @@ pub fn center(
165 257
166test "center" { 258test "center" {
167 const allocator = testing.allocator; 259 const allocator = testing.allocator;
168 const data = try DisplayWidthData.init(allocator); 260 const dw = try DisplayWidth.init(allocator);
169 defer data.deinit(allocator); 261 defer dw.deinit(allocator);
170 const self = Self{ .data = &data };
171 262
172 // Input and width both have odd length 263 // Input and width both have odd length
173 var centered = try self.center(allocator, "abc", 9, "*"); 264 var centered = try dw.center(allocator, "abc", 9, "*");
174 try testing.expectEqualSlices(u8, "***abc***", centered); 265 try testing.expectEqualSlices(u8, "***abc***", centered);
175 266
176 // Input and width both have even length 267 // Input and width both have even length
177 testing.allocator.free(centered); 268 testing.allocator.free(centered);
178 centered = try self.center(allocator, "w😊w", 10, "-"); 269 centered = try dw.center(allocator, "w😊w", 10, "-");
179 try testing.expectEqualSlices(u8, "---w😊w---", centered); 270 try testing.expectEqualSlices(u8, "---w😊w---", centered);
180 271
181 // Input has even length, width has odd length 272 // Input has even length, width has odd length
182 testing.allocator.free(centered); 273 testing.allocator.free(centered);
183 centered = try self.center(allocator, "1234", 9, "-"); 274 centered = try dw.center(allocator, "1234", 9, "-");
184 try testing.expectEqualSlices(u8, "--1234---", centered); 275 try testing.expectEqualSlices(u8, "--1234---", centered);
185 276
186 // Input has odd length, width has even length 277 // Input has odd length, width has even length
187 testing.allocator.free(centered); 278 testing.allocator.free(centered);
188 centered = try self.center(allocator, "123", 8, "-"); 279 centered = try dw.center(allocator, "123", 8, "-");
189 try testing.expectEqualSlices(u8, "--123---", centered); 280 try testing.expectEqualSlices(u8, "--123---", centered);
190 281
191 // Input is the same length as the width 282 // Input is the same length as the width
192 testing.allocator.free(centered); 283 testing.allocator.free(centered);
193 centered = try self.center(allocator, "123", 3, "-"); 284 centered = try dw.center(allocator, "123", 3, "-");
194 try testing.expectEqualSlices(u8, "123", centered); 285 try testing.expectEqualSlices(u8, "123", centered);
195 286
196 // Input is empty 287 // Input is empty
197 testing.allocator.free(centered); 288 testing.allocator.free(centered);
198 centered = try self.center(allocator, "", 3, "-"); 289 centered = try dw.center(allocator, "", 3, "-");
199 try testing.expectEqualSlices(u8, "---", centered); 290 try testing.expectEqualSlices(u8, "---", centered);
200 291
201 // Input is empty and width is zero 292 // Input is empty and width is zero
202 testing.allocator.free(centered); 293 testing.allocator.free(centered);
203 centered = try self.center(allocator, "", 0, "-"); 294 centered = try dw.center(allocator, "", 0, "-");
204 try testing.expectEqualSlices(u8, "", centered); 295 try testing.expectEqualSlices(u8, "", centered);
205 296
206 // Input is longer than the width, which is an error 297 // Input is longer than the width, which is an error
207 testing.allocator.free(centered); 298 testing.allocator.free(centered);
208 try testing.expectError(error.StrTooLong, self.center(allocator, "123", 2, "-")); 299 try testing.expectError(error.StrTooLong, dw.center(allocator, "123", 2, "-"));
209} 300}
210 301
211/// padLeft returns a new string of width `total_width` (in display cells) using `pad` as padding 302/// padLeft returns a new string of width `total_width` (in display cells) using `pad` as padding
212/// on the left side. Caller must free returned bytes with `allocator`. 303/// on the left side. Caller must free returned bytes with `allocator`.
213pub fn padLeft( 304pub fn padLeft(
214 self: Self, 305 dw: DisplayWidth,
215 allocator: mem.Allocator, 306 allocator: mem.Allocator,
216 str: []const u8, 307 str: []const u8,
217 total_width: usize, 308 total_width: usize,
218 pad: []const u8, 309 pad: []const u8,
219) ![]u8 { 310) ![]u8 {
220 const str_width = self.strWidth(str); 311 const str_width = dw.strWidth(str);
221 if (str_width > total_width) return error.StrTooLong; 312 if (str_width > total_width) return error.StrTooLong;
222 313
223 const pad_width = self.strWidth(pad); 314 const pad_width = dw.strWidth(pad);
224 if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; 315 if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
225 316
226 const margin_width = total_width - str_width; 317 const margin_width = total_width - str_width;
@@ -244,32 +335,31 @@ pub fn padLeft(
244 335
245test "padLeft" { 336test "padLeft" {
246 const allocator = testing.allocator; 337 const allocator = testing.allocator;
247 const data = try DisplayWidthData.init(allocator); 338 const dw = try DisplayWidth.init(allocator);
248 defer data.deinit(allocator); 339 defer dw.deinit(allocator);
249 const self = Self{ .data = &data };
250 340
251 var right_aligned = try self.padLeft(allocator, "abc", 9, "*"); 341 var right_aligned = try dw.padLeft(allocator, "abc", 9, "*");
252 defer testing.allocator.free(right_aligned); 342 defer testing.allocator.free(right_aligned);
253 try testing.expectEqualSlices(u8, "******abc", right_aligned); 343 try testing.expectEqualSlices(u8, "******abc", right_aligned);
254 344
255 testing.allocator.free(right_aligned); 345 testing.allocator.free(right_aligned);
256 right_aligned = try self.padLeft(allocator, "w😊w", 10, "-"); 346 right_aligned = try dw.padLeft(allocator, "w😊w", 10, "-");
257 try testing.expectEqualSlices(u8, "------w😊w", right_aligned); 347 try testing.expectEqualSlices(u8, "------w😊w", right_aligned);
258} 348}
259 349
260/// padRight returns a new string of width `total_width` (in display cells) using `pad` as padding 350/// padRight returns a new string of width `total_width` (in display cells) using `pad` as padding
261/// on the right side. Caller must free returned bytes with `allocator`. 351/// on the right side. Caller must free returned bytes with `allocator`.
262pub fn padRight( 352pub fn padRight(
263 self: Self, 353 dw: DisplayWidth,
264 allocator: mem.Allocator, 354 allocator: mem.Allocator,
265 str: []const u8, 355 str: []const u8,
266 total_width: usize, 356 total_width: usize,
267 pad: []const u8, 357 pad: []const u8,
268) ![]u8 { 358) ![]u8 {
269 const str_width = self.strWidth(str); 359 const str_width = dw.strWidth(str);
270 if (str_width > total_width) return error.StrTooLong; 360 if (str_width > total_width) return error.StrTooLong;
271 361
272 const pad_width = self.strWidth(pad); 362 const pad_width = dw.strWidth(pad);
273 if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; 363 if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
274 364
275 const margin_width = total_width - str_width; 365 const margin_width = total_width - str_width;
@@ -294,16 +384,15 @@ pub fn padRight(
294 384
295test "padRight" { 385test "padRight" {
296 const allocator = testing.allocator; 386 const allocator = testing.allocator;
297 const data = try DisplayWidthData.init(allocator); 387 const dw = try DisplayWidth.init(allocator);
298 defer data.deinit(allocator); 388 defer dw.deinit(allocator);
299 const self = Self{ .data = &data };
300 389
301 var left_aligned = try self.padRight(allocator, "abc", 9, "*"); 390 var left_aligned = try dw.padRight(allocator, "abc", 9, "*");
302 defer testing.allocator.free(left_aligned); 391 defer testing.allocator.free(left_aligned);
303 try testing.expectEqualSlices(u8, "abc******", left_aligned); 392 try testing.expectEqualSlices(u8, "abc******", left_aligned);
304 393
305 testing.allocator.free(left_aligned); 394 testing.allocator.free(left_aligned);
306 left_aligned = try self.padRight(allocator, "w😊w", 10, "-"); 395 left_aligned = try dw.padRight(allocator, "w😊w", 10, "-");
307 try testing.expectEqualSlices(u8, "w😊w------", left_aligned); 396 try testing.expectEqualSlices(u8, "w😊w------", left_aligned);
308} 397}
309 398
@@ -311,7 +400,7 @@ test "padRight" {
311/// `threshold` defines how far the last column of the last word can be 400/// `threshold` defines how far the last column of the last word can be
312/// from the edge. Caller must free returned bytes with `allocator`. 401/// from the edge. Caller must free returned bytes with `allocator`.
313pub fn wrap( 402pub fn wrap(
314 self: Self, 403 dw: DisplayWidth,
315 allocator: mem.Allocator, 404 allocator: mem.Allocator,
316 str: []const u8, 405 str: []const u8,
317 columns: usize, 406 columns: usize,
@@ -329,7 +418,7 @@ pub fn wrap(
329 while (word_iter.next()) |word| { 418 while (word_iter.next()) |word| {
330 try result.appendSlice(word); 419 try result.appendSlice(word);
331 try result.append(' '); 420 try result.append(' ');
332 line_width += self.strWidth(word) + 1; 421 line_width += dw.strWidth(word) + 1;
333 422
334 if (line_width > columns or columns - line_width <= threshold) { 423 if (line_width > columns or columns - line_width <= threshold) {
335 try result.append('\n'); 424 try result.append('\n');
@@ -347,12 +436,11 @@ pub fn wrap(
347 436
348test "wrap" { 437test "wrap" {
349 const allocator = testing.allocator; 438 const allocator = testing.allocator;
350 const data = try DisplayWidthData.init(allocator); 439 const dw = try DisplayWidth.init(allocator);
351 defer data.deinit(allocator); 440 defer dw.deinit(allocator);
352 const self = Self{ .data = &data };
353 441
354 const input = "The quick brown fox\r\njumped over the lazy dog!"; 442 const input = "The quick brown fox\r\njumped over the lazy dog!";
355 const got = try self.wrap(allocator, input, 10, 3); 443 const got = try dw.wrap(allocator, input, 10, 3);
356 defer testing.allocator.free(got); 444 defer testing.allocator.free(got);
357 const want = "The quick \nbrown fox \njumped \nover the \nlazy dog!"; 445 const want = "The quick \nbrown fox \njumped \nover the \nlazy dog!";
358 try testing.expectEqualStrings(want, got); 446 try testing.expectEqualStrings(want, got);
diff --git a/src/GraphemeData.zig b/src/GraphemeData.zig
index 6d3174d..df025cb 100644
--- a/src/GraphemeData.zig
+++ b/src/GraphemeData.zig
@@ -36,7 +36,7 @@ s3: []u8 = undefined,
36 36
37const Self = @This(); 37const Self = @This();
38 38
39pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { 39pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Self {
40 const decompressor = compress.flate.inflate.decompressor; 40 const decompressor = compress.flate.inflate.decompressor;
41 const in_bytes = @embedFile("gbp"); 41 const in_bytes = @embedFile("gbp");
42 var in_fbs = std.io.fixedBufferStream(in_bytes); 42 var in_fbs = std.io.fixedBufferStream(in_bytes);
@@ -65,23 +65,23 @@ pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self {
65 return self; 65 return self;
66} 66}
67 67
68pub fn deinit(self: *const Self, allocator: mem.Allocator) void { 68pub inline fn deinit(self: *const Self, allocator: mem.Allocator) void {
69 allocator.free(self.s1); 69 allocator.free(self.s1);
70 allocator.free(self.s2); 70 allocator.free(self.s2);
71 allocator.free(self.s3); 71 allocator.free(self.s3);
72} 72}
73 73
74/// Lookup the grapheme break property for a code point. 74/// Lookup the grapheme break property for a code point.
75pub fn gbp(self: Self, cp: u21) Gbp { 75pub inline fn gbp(self: Self, cp: u21) Gbp {
76 return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 4); 76 return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 4);
77} 77}
78 78
79/// Lookup the indic syllable type for a code point. 79/// Lookup the indic syllable type for a code point.
80pub fn indic(self: Self, cp: u21) Indic { 80pub inline fn indic(self: Self, cp: u21) Indic {
81 return @enumFromInt((self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7); 81 return @enumFromInt((self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7);
82} 82}
83 83
84/// Lookup the indic syllable type for a code point. 84/// Lookup the emoji property for a code point.
85pub fn isEmoji(self: Self, cp: u21) bool { 85pub inline fn isEmoji(self: Self, cp: u21) bool {
86 return self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1; 86 return self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1;
87} 87}
diff --git a/src/Normalize.zig b/src/Normalize.zig
index a28b708..b738b27 100644
--- a/src/Normalize.zig
+++ b/src/Normalize.zig
@@ -177,7 +177,7 @@ test "decompose" {
177 const allocator = testing.allocator; 177 const allocator = testing.allocator;
178 var data: NormData = undefined; 178 var data: NormData = undefined;
179 try NormData.init(&data, allocator); 179 try NormData.init(&data, allocator);
180 defer data.deinit(); 180 defer data.deinit(allocator);
181 var n = Self{ .norm_data = &data }; 181 var n = Self{ .norm_data = &data };
182 182
183 var buf: [18]u21 = undefined; 183 var buf: [18]u21 = undefined;
@@ -307,11 +307,11 @@ test "nfd ASCII / no-alloc" {
307 const allocator = testing.allocator; 307 const allocator = testing.allocator;
308 var data: NormData = undefined; 308 var data: NormData = undefined;
309 try NormData.init(&data, allocator); 309 try NormData.init(&data, allocator);
310 defer data.deinit(); 310 defer data.deinit(allocator);
311 const n = Self{ .norm_data = &data }; 311 const n = Self{ .norm_data = &data };
312 312
313 const result = try n.nfd(allocator, "Hello World!"); 313 const result = try n.nfd(allocator, "Hello World!");
314 defer result.deinit(); 314 defer result.deinit(allocator);
315 315
316 try testing.expectEqualStrings("Hello World!", result.slice); 316 try testing.expectEqualStrings("Hello World!", result.slice);
317} 317}
@@ -320,11 +320,11 @@ test "nfd !ASCII / alloc" {
320 const allocator = testing.allocator; 320 const allocator = testing.allocator;
321 var data: NormData = undefined; 321 var data: NormData = undefined;
322 try NormData.init(&data, allocator); 322 try NormData.init(&data, allocator);
323 defer data.deinit(); 323 defer data.deinit(allocator);
324 const n = Self{ .norm_data = &data }; 324 const n = Self{ .norm_data = &data };
325 325
326 const result = try n.nfd(allocator, "HΓ©llo World! \u{3d3}"); 326 const result = try n.nfd(allocator, "HΓ©llo World! \u{3d3}");
327 defer result.deinit(); 327 defer result.deinit(allocator);
328 328
329 try testing.expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", result.slice); 329 try testing.expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", result.slice);
330} 330}
@@ -333,11 +333,11 @@ test "nfkd ASCII / no-alloc" {
333 const allocator = testing.allocator; 333 const allocator = testing.allocator;
334 var data: NormData = undefined; 334 var data: NormData = undefined;
335 try NormData.init(&data, allocator); 335 try NormData.init(&data, allocator);
336 defer data.deinit(); 336 defer data.deinit(allocator);
337 const n = Self{ .norm_data = &data }; 337 const n = Self{ .norm_data = &data };
338 338
339 const result = try n.nfkd(allocator, "Hello World!"); 339 const result = try n.nfkd(allocator, "Hello World!");
340 defer result.deinit(); 340 defer result.deinit(allocator);
341 341
342 try testing.expectEqualStrings("Hello World!", result.slice); 342 try testing.expectEqualStrings("Hello World!", result.slice);
343} 343}
@@ -346,11 +346,11 @@ test "nfkd !ASCII / alloc" {
346 const allocator = testing.allocator; 346 const allocator = testing.allocator;
347 var data: NormData = undefined; 347 var data: NormData = undefined;
348 try NormData.init(&data, allocator); 348 try NormData.init(&data, allocator);
349 defer data.deinit(); 349 defer data.deinit(allocator);
350 const n = Self{ .norm_data = &data }; 350 const n = Self{ .norm_data = &data };
351 351
352 const result = try n.nfkd(allocator, "HΓ©llo World! \u{3d3}"); 352 const result = try n.nfkd(allocator, "HΓ©llo World! \u{3d3}");
353 defer result.deinit(); 353 defer result.deinit(allocator);
354 354
355 try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice); 355 try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice);
356} 356}
@@ -546,11 +546,11 @@ test "nfc" {
546 const allocator = testing.allocator; 546 const allocator = testing.allocator;
547 var data: NormData = undefined; 547 var data: NormData = undefined;
548 try NormData.init(&data, allocator); 548 try NormData.init(&data, allocator);
549 defer data.deinit(); 549 defer data.deinit(allocator);
550 const n = Self{ .norm_data = &data }; 550 const n = Self{ .norm_data = &data };
551 551
552 const result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); 552 const result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}");
553 defer result.deinit(); 553 defer result.deinit(allocator);
554 554
555 try testing.expectEqualStrings("Complex char: \u{3D3}", result.slice); 555 try testing.expectEqualStrings("Complex char: \u{3D3}", result.slice);
556} 556}
@@ -559,11 +559,11 @@ test "nfkc" {
559 const allocator = testing.allocator; 559 const allocator = testing.allocator;
560 var data: NormData = undefined; 560 var data: NormData = undefined;
561 try NormData.init(&data, allocator); 561 try NormData.init(&data, allocator);
562 defer data.deinit(); 562 defer data.deinit(allocator);
563 const n = Self{ .norm_data = &data }; 563 const n = Self{ .norm_data = &data };
564 564
565 const result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); 565 const result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}");
566 defer result.deinit(); 566 defer result.deinit(allocator);
567 567
568 try testing.expectEqualStrings("Complex char: \u{038E}", result.slice); 568 try testing.expectEqualStrings("Complex char: \u{038E}", result.slice);
569} 569}
@@ -582,7 +582,7 @@ test "eql" {
582 const allocator = testing.allocator; 582 const allocator = testing.allocator;
583 var data: NormData = undefined; 583 var data: NormData = undefined;
584 try NormData.init(&data, allocator); 584 try NormData.init(&data, allocator);
585 defer data.deinit(); 585 defer data.deinit(allocator);
586 const n = Self{ .norm_data = &data }; 586 const n = Self{ .norm_data = &data };
587 587
588 try testing.expect(try n.eql(allocator, "foΓ©", "foe\u{0301}")); 588 try testing.expect(try n.eql(allocator, "foΓ©", "foe\u{0301}"));
@@ -628,5 +628,4 @@ test "isLatin1Only" {
628 try testing.expect(isLatin1Only(latin1_only)); 628 try testing.expect(isLatin1Only(latin1_only));
629 const not_latin1_only = "HΓ©llo, World! \u{3d3}"; 629 const not_latin1_only = "HΓ©llo, World! \u{3d3}";
630 try testing.expect(!isLatin1Only(not_latin1_only)); 630 try testing.expect(!isLatin1Only(not_latin1_only));
631 try testing.expect(false);
632} 631}
diff --git a/src/WidthData.zig b/src/WidthData.zig
index b07a679..ca7eaf0 100644
--- a/src/WidthData.zig
+++ b/src/WidthData.zig
@@ -4,15 +4,36 @@ const compress = std.compress;
4const mem = std.mem; 4const mem = std.mem;
5const testing = std.testing; 5const testing = std.testing;
6 6
7const GraphemeData = @import("GraphemeData"); 7const Graphemes = @import("Graphemes");
8 8
9g_data: GraphemeData, 9g_data: Graphemes,
10s1: []u16 = undefined, 10s1: []u16 = undefined,
11s2: []i4 = undefined, 11s2: []i4 = undefined,
12owns_gdata: bool,
12 13
13const Self = @This(); 14const Self = @This();
14 15
15pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { 16pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self {
17 var self: Self = try Self.setup(allocator);
18 errdefer {
19 allocator.free(self.s1);
20 allocator.free(self.s2);
21 }
22 self.owns_gdata = true;
23 self.g_data = try Graphemes.init(allocator);
24 errdefer self.g_data.deinit(allocator);
25 return self;
26}
27
28pub fn initWithGraphemeData(allocator: mem.Allocator, g_data: Graphemes) mem.Allocator.Error!Self {
29 var self = try Self.setup(allocator);
30 self.g_data = g_data;
31 self.owns_gdata = false;
32 return self;
33}
34
35// Sets up the DisplayWidthData, leaving the GraphemeData undefined.
36fn setup(allocator: mem.Allocator) mem.Allocator.Error!Self {
16 const decompressor = compress.flate.inflate.decompressor; 37 const decompressor = compress.flate.inflate.decompressor;
17 const in_bytes = @embedFile("dwp"); 38 const in_bytes = @embedFile("dwp");
18 var in_fbs = std.io.fixedBufferStream(in_bytes); 39 var in_fbs = std.io.fixedBufferStream(in_bytes);
@@ -21,10 +42,7 @@ pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self {
21 42
22 const endian = builtin.cpu.arch.endian(); 43 const endian = builtin.cpu.arch.endian();
23 44
24 var self = Self{ 45 var self: Self = undefined;
25 .g_data = try GraphemeData.init(allocator),
26 };
27 errdefer self.g_data.deinit(allocator);
28 46
29 const stage_1_len: u16 = reader.readInt(u16, endian) catch unreachable; 47 const stage_1_len: u16 = reader.readInt(u16, endian) catch unreachable;
30 self.s1 = try allocator.alloc(u16, stage_1_len); 48 self.s1 = try allocator.alloc(u16, stage_1_len);
@@ -42,7 +60,7 @@ pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self {
42pub fn deinit(self: *const Self, allocator: mem.Allocator) void { 60pub fn deinit(self: *const Self, allocator: mem.Allocator) void {
43 allocator.free(self.s1); 61 allocator.free(self.s1);
44 allocator.free(self.s2); 62 allocator.free(self.s2);
45 self.g_data.deinit(allocator); 63 if (self.owns_gdata) self.g_data.deinit(allocator);
46} 64}
47 65
48/// codePointWidth returns the number of cells `cp` requires when rendered 66/// codePointWidth returns the number of cells `cp` requires when rendered
diff --git a/src/grapheme.zig b/src/grapheme.zig
index 25fd71d..79cd2c6 100644
--- a/src/grapheme.zig
+++ b/src/grapheme.zig
@@ -1,10 +1,99 @@
1const std = @import("std"); 1const std = @import("std");
2const builtin = @import("builtin");
2const mem = std.mem; 3const mem = std.mem;
4const Allocator = mem.Allocator;
5const compress = std.compress;
3const unicode = std.unicode; 6const unicode = std.unicode;
4 7
5const CodePoint = @import("code_point").CodePoint; 8const CodePoint = @import("code_point").CodePoint;
6const CodePointIterator = @import("code_point").Iterator; 9const CodePointIterator = @import("code_point").Iterator;
7pub const GraphemeData = @import("GraphemeData"); 10
11s1: []u16 = undefined,
12s2: []u16 = undefined,
13s3: []u8 = undefined,
14
15const Graphemes = @This();
16
17pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Graphemes {
18 const decompressor = compress.flate.inflate.decompressor;
19 const in_bytes = @embedFile("gbp");
20 var in_fbs = std.io.fixedBufferStream(in_bytes);
21 var in_decomp = decompressor(.raw, in_fbs.reader());
22 var reader = in_decomp.reader();
23
24 const endian = builtin.cpu.arch.endian();
25
26 var self = Graphemes{};
27
28 const s1_len: u16 = reader.readInt(u16, endian) catch unreachable;
29 self.s1 = try allocator.alloc(u16, s1_len);
30 errdefer allocator.free(self.s1);
31 for (0..s1_len) |i| self.s1[i] = reader.readInt(u16, endian) catch unreachable;
32
33 const s2_len: u16 = reader.readInt(u16, endian) catch unreachable;
34 self.s2 = try allocator.alloc(u16, s2_len);
35 errdefer allocator.free(self.s2);
36 for (0..s2_len) |i| self.s2[i] = reader.readInt(u16, endian) catch unreachable;
37
38 const s3_len: u16 = reader.readInt(u16, endian) catch unreachable;
39 self.s3 = try allocator.alloc(u8, s3_len);
40 errdefer allocator.free(self.s3);
41 _ = reader.readAll(self.s3) catch unreachable;
42
43 return self;
44}
45
46pub fn deinit(graphemes: *const Graphemes, allocator: mem.Allocator) void {
47 allocator.free(graphemes.s1);
48 allocator.free(graphemes.s2);
49 allocator.free(graphemes.s3);
50}
51
52/// Lookup the grapheme break property for a code point.
53pub fn gbp(graphemes: Graphemes, cp: u21) Gbp {
54 return @enumFromInt(graphemes.s3[graphemes.s2[graphemes.s1[cp >> 8] + (cp & 0xff)]] >> 4);
55}
56
57/// Lookup the indic syllable type for a code point.
58pub fn indic(graphemes: Graphemes, cp: u21) Indic {
59 return @enumFromInt((graphemes.s3[graphemes.s2[graphemes.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7);
60}
61
62/// Lookup the emoji property for a code point.
63pub fn isEmoji(graphemes: Graphemes, cp: u21) bool {
64 return graphemes.s3[graphemes.s2[graphemes.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1;
65}
66
67pub fn iterator(graphemes: *const Graphemes, string: []const u8) Iterator {
68 return Iterator.init(string, graphemes);
69}
70
71/// Indic syllable type.
72pub const Indic = enum {
73 none,
74
75 Consonant,
76 Extend,
77 Linker,
78};
79
80/// Grapheme break property.
81pub const Gbp = enum {
82 none,
83 Control,
84 CR,
85 Extend,
86 L,
87 LF,
88 LV,
89 LVT,
90 Prepend,
91 Regional_Indicator,
92 SpacingMark,
93 T,
94 V,
95 ZWJ,
96};
8 97
9/// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes. 98/// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes.
10pub const Grapheme = struct { 99pub const Grapheme = struct {
@@ -22,12 +111,12 @@ pub const Grapheme = struct {
22pub const Iterator = struct { 111pub const Iterator = struct {
23 buf: [2]?CodePoint = .{ null, null }, 112 buf: [2]?CodePoint = .{ null, null },
24 cp_iter: CodePointIterator, 113 cp_iter: CodePointIterator,
25 data: *const GraphemeData, 114 data: *const Graphemes,
26 115
27 const Self = @This(); 116 const Self = @This();
28 117
29 /// Assumes `src` is valid UTF-8. 118 /// Assumes `src` is valid UTF-8.
30 pub fn init(str: []const u8, data: *const GraphemeData) Self { 119 pub fn init(str: []const u8, data: *const Graphemes) Self {
31 var self = Self{ .cp_iter = .{ .bytes = str }, .data = data }; 120 var self = Self{ .cp_iter = .{ .bytes = str }, .data = data };
32 self.advance(); 121 self.advance();
33 return self; 122 return self;
@@ -149,7 +238,7 @@ pub const Iterator = struct {
149}; 238};
150 239
151// Predicates 240// Predicates
152fn isBreaker(cp: u21, data: *const GraphemeData) bool { 241fn isBreaker(cp: u21, data: *const Graphemes) bool {
153 // Extract relevant properties. 242 // Extract relevant properties.
154 const cp_gbp_prop = data.gbp(cp); 243 const cp_gbp_prop = data.gbp(cp);
155 return cp == '\x0d' or cp == '\x0a' or cp_gbp_prop == .Control; 244 return cp == '\x0d' or cp == '\x0a' or cp_gbp_prop == .Control;
@@ -202,7 +291,7 @@ pub const State = struct {
202pub fn graphemeBreak( 291pub fn graphemeBreak(
203 cp1: u21, 292 cp1: u21,
204 cp2: u21, 293 cp2: u21,
205 data: *const GraphemeData, 294 data: *const Graphemes,
206 state: *State, 295 state: *State,
207) bool { 296) bool {
208 // Extract relevant properties. 297 // Extract relevant properties.
@@ -306,25 +395,25 @@ test "Segmentation ZWJ and ZWSP emoji sequences" {
306 const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2; 395 const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2;
307 const no_joiner = seq_1 ++ seq_2; 396 const no_joiner = seq_1 ++ seq_2;
308 397
309 const data = try GraphemeData.init(std.testing.allocator); 398 const graphemes = try Graphemes.init(std.testing.allocator);
310 defer data.deinit(std.testing.allocator); 399 defer graphemes.deinit(std.testing.allocator);
311 400
312 { 401 {
313 var iter = Iterator.init(with_zwj, &data); 402 var iter = graphemes.iterator(with_zwj);
314 var i: usize = 0; 403 var i: usize = 0;
315 while (iter.next()) |_| : (i += 1) {} 404 while (iter.next()) |_| : (i += 1) {}
316 try std.testing.expectEqual(@as(usize, 1), i); 405 try std.testing.expectEqual(@as(usize, 1), i);
317 } 406 }
318 407
319 { 408 {
320 var iter = Iterator.init(with_zwsp, &data); 409 var iter = graphemes.iterator(with_zwsp);
321 var i: usize = 0; 410 var i: usize = 0;
322 while (iter.next()) |_| : (i += 1) {} 411 while (iter.next()) |_| : (i += 1) {}
323 try std.testing.expectEqual(@as(usize, 3), i); 412 try std.testing.expectEqual(@as(usize, 3), i);
324 } 413 }
325 414
326 { 415 {
327 var iter = Iterator.init(no_joiner, &data); 416 var iter = graphemes.iterator(no_joiner);
328 var i: usize = 0; 417 var i: usize = 0;
329 while (iter.next()) |_| : (i += 1) {} 418 while (iter.next()) |_| : (i += 1) {}
330 try std.testing.expectEqual(@as(usize, 2), i); 419 try std.testing.expectEqual(@as(usize, 2), i);
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig
index 7236ff6..de1b9ec 100644
--- a/src/unicode_tests.zig
+++ b/src/unicode_tests.zig
@@ -9,7 +9,7 @@ const unicode = std.unicode;
9 9
10const grapheme = @import("grapheme"); 10const grapheme = @import("grapheme");
11const Grapheme = @import("grapheme").Grapheme; 11const Grapheme = @import("grapheme").Grapheme;
12const GraphemeData = @import("grapheme").GraphemeData; 12const Graphemes = @import("grapheme");
13const GraphemeIterator = @import("grapheme").Iterator; 13const GraphemeIterator = @import("grapheme").Iterator;
14const Normalize = @import("Normalize"); 14const Normalize = @import("Normalize");
15 15
@@ -18,10 +18,10 @@ comptime {
18} 18}
19test "Iterator.peek" { 19test "Iterator.peek" {
20 const peek_seq = "aΞ”πŸ‘¨πŸ»β€πŸŒΎβ†’"; 20 const peek_seq = "aΞ”πŸ‘¨πŸ»β€πŸŒΎβ†’";
21 const data = try GraphemeData.init(std.testing.allocator); 21 const data = try Graphemes.init(std.testing.allocator);
22 defer data.deinit(std.testing.allocator); 22 defer data.deinit(std.testing.allocator);
23 23
24 var iter = grapheme.Iterator.init(peek_seq, &data); 24 var iter = data.iterator(peek_seq);
25 const peek_a = iter.peek().?; 25 const peek_a = iter.peek().?;
26 const next_a = iter.next().?; 26 const next_a = iter.next().?;
27 try std.testing.expectEqual(peek_a, next_a); 27 try std.testing.expectEqual(peek_a, next_a);
@@ -162,7 +162,7 @@ test "Segmentation GraphemeIterator" {
162 var buf_reader = std.io.bufferedReader(file.reader()); 162 var buf_reader = std.io.bufferedReader(file.reader());
163 var input_stream = buf_reader.reader(); 163 var input_stream = buf_reader.reader();
164 164
165 const data = try GraphemeData.init(allocator); 165 const data = try Graphemes.init(allocator);
166 defer data.deinit(allocator); 166 defer data.deinit(allocator);
167 167
168 var buf: [4096]u8 = undefined; 168 var buf: [4096]u8 = undefined;
@@ -207,7 +207,7 @@ test "Segmentation GraphemeIterator" {
207 } 207 }
208 208
209 // std.debug.print("\nline {}: {s}\n", .{ line_no, all_bytes.items }); 209 // std.debug.print("\nline {}: {s}\n", .{ line_no, all_bytes.items });
210 var iter = GraphemeIterator.init(all_bytes.items, &data); 210 var iter = data.iterator(all_bytes.items);
211 211
212 // Chaeck. 212 // Chaeck.
213 for (want.items) |want_gc| { 213 for (want.items) |want_gc| {