summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Sam Atman2025-04-30 11:58:19 -0400
committerGravatar Sam Atman2025-04-30 11:58:19 -0400
commit1be5e46490e061761b4b97dff5c6acb2181d6fe9 (patch)
tree77a1edcdedd7afae7428e92feba37d2bb1035b22 /src
parentAdd general tests step (diff)
downloadzg-1be5e46490e061761b4b97dff5c6acb2181d6fe9.tar.gz
zg-1be5e46490e061761b4b97dff5c6acb2181d6fe9.tar.xz
zg-1be5e46490e061761b4b97dff5c6acb2181d6fe9.zip
Factor out 'Data' for grapheme and DisplayWidth
In the process of refactoring the whole library, so that it doesn't expose anything called "Data" separately from user functionality.
Diffstat (limited to 'src')
-rw-r--r--src/DisplayWidth.zig240
-rw-r--r--src/GraphemeData.zig12
-rw-r--r--src/Normalize.zig29
-rw-r--r--src/WidthData.zig32
-rw-r--r--src/grapheme.zig109
-rw-r--r--src/unicode_tests.zig10
6 files changed, 313 insertions, 119 deletions
diff --git a/src/DisplayWidth.zig b/src/DisplayWidth.zig
index 8631bd4..11ec59e 100644
--- a/src/DisplayWidth.zig
+++ b/src/DisplayWidth.zig
@@ -2,38 +2,131 @@ const std = @import("std");
2const builtin = @import("builtin"); 2const builtin = @import("builtin");
3const options = @import("options"); 3const options = @import("options");
4const ArrayList = std.ArrayList; 4const ArrayList = std.ArrayList;
5const compress = std.compress;
5const mem = std.mem; 6const mem = std.mem;
6const simd = std.simd; 7const simd = std.simd;
7const testing = std.testing; 8const testing = std.testing;
8 9
9const ascii = @import("ascii"); 10const ascii = @import("ascii");
10const CodePointIterator = @import("code_point").Iterator; 11const CodePointIterator = @import("code_point").Iterator;
11const GraphemeIterator = @import("grapheme").Iterator;
12pub const DisplayWidthData = @import("DisplayWidthData"); 12pub const DisplayWidthData = @import("DisplayWidthData");
13 13
14data: *const DisplayWidthData, 14const Graphemes = @import("Graphemes");
15 15
16const Self = @This(); 16g_data: Graphemes,
17s1: []u16 = undefined,
18s2: []i4 = undefined,
19owns_gdata: bool,
20
21const DisplayWidth = @This();
22
23pub fn init(allocator: mem.Allocator) mem.Allocator.Error!DisplayWidth {
24 var dw: DisplayWidth = try DisplayWidth.setup(allocator);
25 errdefer {
26 allocator.free(dw.s1);
27 allocator.free(dw.s2);
28 }
29 dw.owns_gdata = true;
30 dw.g_data = try Graphemes.init(allocator);
31 errdefer dw.g_data.deinit(allocator);
32 return dw;
33}
34
35pub fn initWithGraphemeData(allocator: mem.Allocator, g_data: Graphemes) mem.Allocator.Error!DisplayWidth {
36 var dw = try DisplayWidth.setup(allocator);
37 dw.g_data = g_data;
38 dw.owns_gdata = false;
39 return dw;
40}
41
42// Sets up the DisplayWidthData, leaving the GraphemeData undefined.
43fn setup(allocator: mem.Allocator) mem.Allocator.Error!DisplayWidth {
44 const decompressor = compress.flate.inflate.decompressor;
45 const in_bytes = @embedFile("dwp");
46 var in_fbs = std.io.fixedBufferStream(in_bytes);
47 var in_decomp = decompressor(.raw, in_fbs.reader());
48 var reader = in_decomp.reader();
49
50 const endian = builtin.cpu.arch.endian();
51
52 var dw: DisplayWidth = undefined;
53
54 const stage_1_len: u16 = reader.readInt(u16, endian) catch unreachable;
55 dw.s1 = try allocator.alloc(u16, stage_1_len);
56 errdefer allocator.free(dw.s1);
57 for (0..stage_1_len) |i| dw.s1[i] = reader.readInt(u16, endian) catch unreachable;
58
59 const stage_2_len: u16 = reader.readInt(u16, endian) catch unreachable;
60 dw.s2 = try allocator.alloc(i4, stage_2_len);
61 errdefer allocator.free(dw.s2);
62 for (0..stage_2_len) |i| dw.s2[i] = @intCast(reader.readInt(i8, endian) catch unreachable);
63
64 return dw;
65}
66
67pub fn deinit(dw: *const DisplayWidth, allocator: mem.Allocator) void {
68 allocator.free(dw.s1);
69 allocator.free(dw.s2);
70 if (dw.owns_gdata) dw.g_data.deinit(allocator);
71}
72
73/// codePointWidth returns the number of cells `cp` requires when rendered
74/// in a fixed-pitch font (i.e. a terminal screen). This can range from -1 to
75/// 3, where BACKSPACE and DELETE return -1 and 3-em-dash returns 3. C0/C1
76/// control codes return 0. If `cjk` is true, ambiguous code points return 2,
77/// otherwise they return 1.
78pub fn codePointWidth(dw: DisplayWidth, cp: u21) i4 {
79 return dw.s2[dw.s1[cp >> 8] + (cp & 0xff)];
80}
81
82test "codePointWidth" {
83 const dw = try DisplayWidth.init(std.testing.allocator);
84 defer dw.deinit(std.testing.allocator);
85 try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x0000)); // null
86 try testing.expectEqual(@as(i4, -1), dw.codePointWidth(0x8)); // \b
87 try testing.expectEqual(@as(i4, -1), dw.codePointWidth(0x7f)); // DEL
88 try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x0005)); // Cf
89 try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x0007)); // \a BEL
90 try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000A)); // \n LF
91 try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000B)); // \v VT
92 try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000C)); // \f FF
93 try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000D)); // \r CR
94 try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000E)); // SQ
95 try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000F)); // SI
96
97 try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x070F)); // Cf
98 try testing.expectEqual(@as(i4, 1), dw.codePointWidth(0x0603)); // Cf Arabic
99
100 try testing.expectEqual(@as(i4, 1), dw.codePointWidth(0x00AD)); // soft-hyphen
101 try testing.expectEqual(@as(i4, 2), dw.codePointWidth(0x2E3A)); // two-em dash
102 try testing.expectEqual(@as(i4, 3), dw.codePointWidth(0x2E3B)); // three-em dash
103
104 try testing.expectEqual(@as(i4, 1), dw.codePointWidth(0x00BD)); // ambiguous halfwidth
105
106 try testing.expectEqual(@as(i4, 1), dw.codePointWidth('é'));
107 try testing.expectEqual(@as(i4, 2), dw.codePointWidth('😊'));
108 try testing.expectEqual(@as(i4, 2), dw.codePointWidth('统'));
109}
17 110
18/// strWidth returns the total display width of `str` as the number of cells 111/// strWidth returns the total display width of `str` as the number of cells
19/// required in a fixed-pitch font (i.e. a terminal screen). 112/// required in a fixed-pitch font (i.e. a terminal screen).
20pub fn strWidth(self: Self, str: []const u8) usize { 113pub fn strWidth(dw: DisplayWidth, str: []const u8) usize {
21 var total: isize = 0; 114 var total: isize = 0;
22 115
23 // ASCII fast path 116 // ASCII fast path
24 if (ascii.isAsciiOnly(str)) { 117 if (ascii.isAsciiOnly(str)) {
25 for (str) |b| total += self.data.codePointWidth(b); 118 for (str) |b| total += dw.codePointWidth(b);
26 return @intCast(@max(0, total)); 119 return @intCast(@max(0, total));
27 } 120 }
28 121
29 var giter = GraphemeIterator.init(str, &self.data.g_data); 122 var giter = dw.g_data.iterator(str);
30 123
31 while (giter.next()) |gc| { 124 while (giter.next()) |gc| {
32 var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) }; 125 var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) };
33 var gc_total: isize = 0; 126 var gc_total: isize = 0;
34 127
35 while (cp_iter.next()) |cp| { 128 while (cp_iter.next()) |cp| {
36 var w = self.data.codePointWidth(cp.code); 129 var w = dw.codePointWidth(cp.code);
37 130
38 if (w != 0) { 131 if (w != 0) {
39 // Handle text emoji sequence. 132 // Handle text emoji sequence.
@@ -58,41 +151,40 @@ pub fn strWidth(self: Self, str: []const u8) usize {
58} 151}
59 152
60test "strWidth" { 153test "strWidth" {
61 const data = try DisplayWidthData.init(testing.allocator); 154 const dw = try DisplayWidth.init(testing.allocator);
62 defer data.deinit(testing.allocator); 155 defer dw.deinit(testing.allocator);
63 const self = Self{ .data = &data };
64 const c0 = options.c0_width orelse 0; 156 const c0 = options.c0_width orelse 0;
65 157
66 try testing.expectEqual(@as(usize, 5), self.strWidth("Hello\r\n")); 158 try testing.expectEqual(@as(usize, 5), dw.strWidth("Hello\r\n"));
67 try testing.expectEqual(@as(usize, 1), self.strWidth("\u{0065}\u{0301}")); 159 try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{0065}\u{0301}"));
68 try testing.expectEqual(@as(usize, 2), self.strWidth("\u{1F476}\u{1F3FF}\u{0308}\u{200D}\u{1F476}\u{1F3FF}")); 160 try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{1F476}\u{1F3FF}\u{0308}\u{200D}\u{1F476}\u{1F3FF}"));
69 try testing.expectEqual(@as(usize, 8), self.strWidth("Hello 😊")); 161 try testing.expectEqual(@as(usize, 8), dw.strWidth("Hello 😊"));
70 try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo 😊")); 162 try testing.expectEqual(@as(usize, 8), dw.strWidth("Héllo 😊"));
71 try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo :)")); 163 try testing.expectEqual(@as(usize, 8), dw.strWidth("Héllo :)"));
72 try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo 🇪🇸")); 164 try testing.expectEqual(@as(usize, 8), dw.strWidth("Héllo 🇪🇸"));
73 try testing.expectEqual(@as(usize, 2), self.strWidth("\u{26A1}")); // Lone emoji 165 try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{26A1}")); // Lone emoji
74 try testing.expectEqual(@as(usize, 1), self.strWidth("\u{26A1}\u{FE0E}")); // Text sequence 166 try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{26A1}\u{FE0E}")); // Text sequence
75 try testing.expectEqual(@as(usize, 2), self.strWidth("\u{26A1}\u{FE0F}")); // Presentation sequence 167 try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{26A1}\u{FE0F}")); // Presentation sequence
76 try testing.expectEqual(@as(usize, 1), self.strWidth("\u{2764}")); // Default text presentation 168 try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{2764}")); // Default text presentation
77 try testing.expectEqual(@as(usize, 1), self.strWidth("\u{2764}\u{FE0E}")); // Default text presentation with VS15 selector 169 try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{2764}\u{FE0E}")); // Default text presentation with VS15 selector
78 try testing.expectEqual(@as(usize, 2), self.strWidth("\u{2764}\u{FE0F}")); // Default text presentation with VS16 selector 170 try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{2764}\u{FE0F}")); // Default text presentation with VS16 selector
79 const expect_bs: usize = if (c0 == 0) 0 else 1 + c0; 171 const expect_bs: usize = if (c0 == 0) 0 else 1 + c0;
80 try testing.expectEqual(expect_bs, self.strWidth("A\x08")); // Backspace 172 try testing.expectEqual(expect_bs, dw.strWidth("A\x08")); // Backspace
81 try testing.expectEqual(expect_bs, self.strWidth("\x7FA")); // DEL 173 try testing.expectEqual(expect_bs, dw.strWidth("\x7FA")); // DEL
82 const expect_long_del: usize = if (c0 == 0) 0 else 1 + (c0 * 3); 174 const expect_long_del: usize = if (c0 == 0) 0 else 1 + (c0 * 3);
83 try testing.expectEqual(expect_long_del, self.strWidth("\x7FA\x08\x08")); // never less than 0 175 try testing.expectEqual(expect_long_del, dw.strWidth("\x7FA\x08\x08")); // never less than 0
84 176
85 // wcwidth Python lib tests. See: https://github.com/jquast/wcwidth/blob/master/tests/test_core.py 177 // wcwidth Python lib tests. See: https://github.com/jquast/wcwidth/blob/master/tests/test_core.py
86 const empty = ""; 178 const empty = "";
87 try testing.expectEqual(@as(usize, 0), self.strWidth(empty)); 179 try testing.expectEqual(@as(usize, 0), dw.strWidth(empty));
88 const with_null = "hello\x00world"; 180 const with_null = "hello\x00world";
89 try testing.expectEqual(@as(usize, 10 + c0), self.strWidth(with_null)); 181 try testing.expectEqual(@as(usize, 10 + c0), dw.strWidth(with_null));
90 const hello_jp = "コンニチハ, セカイ!"; 182 const hello_jp = "コンニチハ, セカイ!";
91 try testing.expectEqual(@as(usize, 19), self.strWidth(hello_jp)); 183 try testing.expectEqual(@as(usize, 19), dw.strWidth(hello_jp));
92 const control = "\x1b[0m"; 184 const control = "\x1b[0m";
93 try testing.expectEqual(@as(usize, 3 + c0), self.strWidth(control)); 185 try testing.expectEqual(@as(usize, 3 + c0), dw.strWidth(control));
94 const balinese = "\u{1B13}\u{1B28}\u{1B2E}\u{1B44}"; 186 const balinese = "\u{1B13}\u{1B28}\u{1B2E}\u{1B44}";
95 try testing.expectEqual(@as(usize, 3), self.strWidth(balinese)); 187 try testing.expectEqual(@as(usize, 3), dw.strWidth(balinese));
96 188
97 // These commented out tests require a new specification for complex scripts. 189 // These commented out tests require a new specification for complex scripts.
98 // See: https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf 190 // See: https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
@@ -106,17 +198,17 @@ test "strWidth" {
106 // try testing.expectEqual(@as(usize, 3), strWidth(kannada_1)); 198 // try testing.expectEqual(@as(usize, 3), strWidth(kannada_1));
107 // The following passes but as a mere coincidence. 199 // The following passes but as a mere coincidence.
108 const kannada_2 = "\u{0cb0}\u{0cbc}\u{0ccd}\u{0c9a}"; 200 const kannada_2 = "\u{0cb0}\u{0cbc}\u{0ccd}\u{0c9a}";
109 try testing.expectEqual(@as(usize, 2), self.strWidth(kannada_2)); 201 try testing.expectEqual(@as(usize, 2), dw.strWidth(kannada_2));
110 202
111 // From Rust https://github.com/jameslanska/unicode-display-width 203 // From Rust https://github.com/jameslanska/unicode-display-width
112 try testing.expectEqual(@as(usize, 15), self.strWidth("🔥🗡🍩👩🏻‍🚀⏰💃🏼🔦👍🏻")); 204 try testing.expectEqual(@as(usize, 15), dw.strWidth("🔥🗡🍩👩🏻‍🚀⏰💃🏼🔦👍🏻"));
113 try testing.expectEqual(@as(usize, 2), self.strWidth("🦀")); 205 try testing.expectEqual(@as(usize, 2), dw.strWidth("🦀"));
114 try testing.expectEqual(@as(usize, 2), self.strWidth("👨‍👩‍👧‍👧")); 206 try testing.expectEqual(@as(usize, 2), dw.strWidth("👨‍👩‍👧‍👧"));
115 try testing.expectEqual(@as(usize, 2), self.strWidth("👩‍🔬")); 207 try testing.expectEqual(@as(usize, 2), dw.strWidth("👩‍🔬"));
116 try testing.expectEqual(@as(usize, 9), self.strWidth("sane text")); 208 try testing.expectEqual(@as(usize, 9), dw.strWidth("sane text"));
117 try testing.expectEqual(@as(usize, 9), self.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ")); 209 try testing.expectEqual(@as(usize, 9), dw.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ"));
118 try testing.expectEqual(@as(usize, 17), self.strWidth("슬라바 우크라이나")); 210 try testing.expectEqual(@as(usize, 17), dw.strWidth("슬라바 우크라이나"));
119 try testing.expectEqual(@as(usize, 1), self.strWidth("\u{378}")); 211 try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{378}"));
120} 212}
121 213
122/// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding. 214/// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding.
@@ -124,17 +216,17 @@ test "strWidth" {
124/// receive one additional pad. This makes sure the returned string fills the requested width. 216/// receive one additional pad. This makes sure the returned string fills the requested width.
125/// Caller must free returned bytes with `allocator`. 217/// Caller must free returned bytes with `allocator`.
126pub fn center( 218pub fn center(
127 self: Self, 219 dw: DisplayWidth,
128 allocator: mem.Allocator, 220 allocator: mem.Allocator,
129 str: []const u8, 221 str: []const u8,
130 total_width: usize, 222 total_width: usize,
131 pad: []const u8, 223 pad: []const u8,
132) ![]u8 { 224) ![]u8 {
133 const str_width = self.strWidth(str); 225 const str_width = dw.strWidth(str);
134 if (str_width > total_width) return error.StrTooLong; 226 if (str_width > total_width) return error.StrTooLong;
135 if (str_width == total_width) return try allocator.dupe(u8, str); 227 if (str_width == total_width) return try allocator.dupe(u8, str);
136 228
137 const pad_width = self.strWidth(pad); 229 const pad_width = dw.strWidth(pad);
138 if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; 230 if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
139 231
140 const margin_width = @divFloor((total_width - str_width), 2); 232 const margin_width = @divFloor((total_width - str_width), 2);
@@ -165,62 +257,61 @@ pub fn center(
165 257
166test "center" { 258test "center" {
167 const allocator = testing.allocator; 259 const allocator = testing.allocator;
168 const data = try DisplayWidthData.init(allocator); 260 const dw = try DisplayWidth.init(allocator);
169 defer data.deinit(allocator); 261 defer dw.deinit(allocator);
170 const self = Self{ .data = &data };
171 262
172 // Input and width both have odd length 263 // Input and width both have odd length
173 var centered = try self.center(allocator, "abc", 9, "*"); 264 var centered = try dw.center(allocator, "abc", 9, "*");
174 try testing.expectEqualSlices(u8, "***abc***", centered); 265 try testing.expectEqualSlices(u8, "***abc***", centered);
175 266
176 // Input and width both have even length 267 // Input and width both have even length
177 testing.allocator.free(centered); 268 testing.allocator.free(centered);
178 centered = try self.center(allocator, "w😊w", 10, "-"); 269 centered = try dw.center(allocator, "w😊w", 10, "-");
179 try testing.expectEqualSlices(u8, "---w😊w---", centered); 270 try testing.expectEqualSlices(u8, "---w😊w---", centered);
180 271
181 // Input has even length, width has odd length 272 // Input has even length, width has odd length
182 testing.allocator.free(centered); 273 testing.allocator.free(centered);
183 centered = try self.center(allocator, "1234", 9, "-"); 274 centered = try dw.center(allocator, "1234", 9, "-");
184 try testing.expectEqualSlices(u8, "--1234---", centered); 275 try testing.expectEqualSlices(u8, "--1234---", centered);
185 276
186 // Input has odd length, width has even length 277 // Input has odd length, width has even length
187 testing.allocator.free(centered); 278 testing.allocator.free(centered);
188 centered = try self.center(allocator, "123", 8, "-"); 279 centered = try dw.center(allocator, "123", 8, "-");
189 try testing.expectEqualSlices(u8, "--123---", centered); 280 try testing.expectEqualSlices(u8, "--123---", centered);
190 281
191 // Input is the same length as the width 282 // Input is the same length as the width
192 testing.allocator.free(centered); 283 testing.allocator.free(centered);
193 centered = try self.center(allocator, "123", 3, "-"); 284 centered = try dw.center(allocator, "123", 3, "-");
194 try testing.expectEqualSlices(u8, "123", centered); 285 try testing.expectEqualSlices(u8, "123", centered);
195 286
196 // Input is empty 287 // Input is empty
197 testing.allocator.free(centered); 288 testing.allocator.free(centered);
198 centered = try self.center(allocator, "", 3, "-"); 289 centered = try dw.center(allocator, "", 3, "-");
199 try testing.expectEqualSlices(u8, "---", centered); 290 try testing.expectEqualSlices(u8, "---", centered);
200 291
201 // Input is empty and width is zero 292 // Input is empty and width is zero
202 testing.allocator.free(centered); 293 testing.allocator.free(centered);
203 centered = try self.center(allocator, "", 0, "-"); 294 centered = try dw.center(allocator, "", 0, "-");
204 try testing.expectEqualSlices(u8, "", centered); 295 try testing.expectEqualSlices(u8, "", centered);
205 296
206 // Input is longer than the width, which is an error 297 // Input is longer than the width, which is an error
207 testing.allocator.free(centered); 298 testing.allocator.free(centered);
208 try testing.expectError(error.StrTooLong, self.center(allocator, "123", 2, "-")); 299 try testing.expectError(error.StrTooLong, dw.center(allocator, "123", 2, "-"));
209} 300}
210 301
211/// padLeft returns a new string of width `total_width` (in display cells) using `pad` as padding 302/// padLeft returns a new string of width `total_width` (in display cells) using `pad` as padding
212/// on the left side. Caller must free returned bytes with `allocator`. 303/// on the left side. Caller must free returned bytes with `allocator`.
213pub fn padLeft( 304pub fn padLeft(
214 self: Self, 305 dw: DisplayWidth,
215 allocator: mem.Allocator, 306 allocator: mem.Allocator,
216 str: []const u8, 307 str: []const u8,
217 total_width: usize, 308 total_width: usize,
218 pad: []const u8, 309 pad: []const u8,
219) ![]u8 { 310) ![]u8 {
220 const str_width = self.strWidth(str); 311 const str_width = dw.strWidth(str);
221 if (str_width > total_width) return error.StrTooLong; 312 if (str_width > total_width) return error.StrTooLong;
222 313
223 const pad_width = self.strWidth(pad); 314 const pad_width = dw.strWidth(pad);
224 if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; 315 if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
225 316
226 const margin_width = total_width - str_width; 317 const margin_width = total_width - str_width;
@@ -244,32 +335,31 @@ pub fn padLeft(
244 335
245test "padLeft" { 336test "padLeft" {
246 const allocator = testing.allocator; 337 const allocator = testing.allocator;
247 const data = try DisplayWidthData.init(allocator); 338 const dw = try DisplayWidth.init(allocator);
248 defer data.deinit(allocator); 339 defer dw.deinit(allocator);
249 const self = Self{ .data = &data };
250 340
251 var right_aligned = try self.padLeft(allocator, "abc", 9, "*"); 341 var right_aligned = try dw.padLeft(allocator, "abc", 9, "*");
252 defer testing.allocator.free(right_aligned); 342 defer testing.allocator.free(right_aligned);
253 try testing.expectEqualSlices(u8, "******abc", right_aligned); 343 try testing.expectEqualSlices(u8, "******abc", right_aligned);
254 344
255 testing.allocator.free(right_aligned); 345 testing.allocator.free(right_aligned);
256 right_aligned = try self.padLeft(allocator, "w😊w", 10, "-"); 346 right_aligned = try dw.padLeft(allocator, "w😊w", 10, "-");
257 try testing.expectEqualSlices(u8, "------w😊w", right_aligned); 347 try testing.expectEqualSlices(u8, "------w😊w", right_aligned);
258} 348}
259 349
260/// padRight returns a new string of width `total_width` (in display cells) using `pad` as padding 350/// padRight returns a new string of width `total_width` (in display cells) using `pad` as padding
261/// on the right side. Caller must free returned bytes with `allocator`. 351/// on the right side. Caller must free returned bytes with `allocator`.
262pub fn padRight( 352pub fn padRight(
263 self: Self, 353 dw: DisplayWidth,
264 allocator: mem.Allocator, 354 allocator: mem.Allocator,
265 str: []const u8, 355 str: []const u8,
266 total_width: usize, 356 total_width: usize,
267 pad: []const u8, 357 pad: []const u8,
268) ![]u8 { 358) ![]u8 {
269 const str_width = self.strWidth(str); 359 const str_width = dw.strWidth(str);
270 if (str_width > total_width) return error.StrTooLong; 360 if (str_width > total_width) return error.StrTooLong;
271 361
272 const pad_width = self.strWidth(pad); 362 const pad_width = dw.strWidth(pad);
273 if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; 363 if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
274 364
275 const margin_width = total_width - str_width; 365 const margin_width = total_width - str_width;
@@ -294,16 +384,15 @@ pub fn padRight(
294 384
295test "padRight" { 385test "padRight" {
296 const allocator = testing.allocator; 386 const allocator = testing.allocator;
297 const data = try DisplayWidthData.init(allocator); 387 const dw = try DisplayWidth.init(allocator);
298 defer data.deinit(allocator); 388 defer dw.deinit(allocator);
299 const self = Self{ .data = &data };
300 389
301 var left_aligned = try self.padRight(allocator, "abc", 9, "*"); 390 var left_aligned = try dw.padRight(allocator, "abc", 9, "*");
302 defer testing.allocator.free(left_aligned); 391 defer testing.allocator.free(left_aligned);
303 try testing.expectEqualSlices(u8, "abc******", left_aligned); 392 try testing.expectEqualSlices(u8, "abc******", left_aligned);
304 393
305 testing.allocator.free(left_aligned); 394 testing.allocator.free(left_aligned);
306 left_aligned = try self.padRight(allocator, "w😊w", 10, "-"); 395 left_aligned = try dw.padRight(allocator, "w😊w", 10, "-");
307 try testing.expectEqualSlices(u8, "w😊w------", left_aligned); 396 try testing.expectEqualSlices(u8, "w😊w------", left_aligned);
308} 397}
309 398
@@ -311,7 +400,7 @@ test "padRight" {
311/// `threshold` defines how far the last column of the last word can be 400/// `threshold` defines how far the last column of the last word can be
312/// from the edge. Caller must free returned bytes with `allocator`. 401/// from the edge. Caller must free returned bytes with `allocator`.
313pub fn wrap( 402pub fn wrap(
314 self: Self, 403 dw: DisplayWidth,
315 allocator: mem.Allocator, 404 allocator: mem.Allocator,
316 str: []const u8, 405 str: []const u8,
317 columns: usize, 406 columns: usize,
@@ -329,7 +418,7 @@ pub fn wrap(
329 while (word_iter.next()) |word| { 418 while (word_iter.next()) |word| {
330 try result.appendSlice(word); 419 try result.appendSlice(word);
331 try result.append(' '); 420 try result.append(' ');
332 line_width += self.strWidth(word) + 1; 421 line_width += dw.strWidth(word) + 1;
333 422
334 if (line_width > columns or columns - line_width <= threshold) { 423 if (line_width > columns or columns - line_width <= threshold) {
335 try result.append('\n'); 424 try result.append('\n');
@@ -347,12 +436,11 @@ pub fn wrap(
347 436
348test "wrap" { 437test "wrap" {
349 const allocator = testing.allocator; 438 const allocator = testing.allocator;
350 const data = try DisplayWidthData.init(allocator); 439 const dw = try DisplayWidth.init(allocator);
351 defer data.deinit(allocator); 440 defer dw.deinit(allocator);
352 const self = Self{ .data = &data };
353 441
354 const input = "The quick brown fox\r\njumped over the lazy dog!"; 442 const input = "The quick brown fox\r\njumped over the lazy dog!";
355 const got = try self.wrap(allocator, input, 10, 3); 443 const got = try dw.wrap(allocator, input, 10, 3);
356 defer testing.allocator.free(got); 444 defer testing.allocator.free(got);
357 const want = "The quick \nbrown fox \njumped \nover the \nlazy dog!"; 445 const want = "The quick \nbrown fox \njumped \nover the \nlazy dog!";
358 try testing.expectEqualStrings(want, got); 446 try testing.expectEqualStrings(want, got);
diff --git a/src/GraphemeData.zig b/src/GraphemeData.zig
index 6d3174d..df025cb 100644
--- a/src/GraphemeData.zig
+++ b/src/GraphemeData.zig
@@ -36,7 +36,7 @@ s3: []u8 = undefined,
36 36
37const Self = @This(); 37const Self = @This();
38 38
39pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { 39pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Self {
40 const decompressor = compress.flate.inflate.decompressor; 40 const decompressor = compress.flate.inflate.decompressor;
41 const in_bytes = @embedFile("gbp"); 41 const in_bytes = @embedFile("gbp");
42 var in_fbs = std.io.fixedBufferStream(in_bytes); 42 var in_fbs = std.io.fixedBufferStream(in_bytes);
@@ -65,23 +65,23 @@ pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self {
65 return self; 65 return self;
66} 66}
67 67
68pub fn deinit(self: *const Self, allocator: mem.Allocator) void { 68pub inline fn deinit(self: *const Self, allocator: mem.Allocator) void {
69 allocator.free(self.s1); 69 allocator.free(self.s1);
70 allocator.free(self.s2); 70 allocator.free(self.s2);
71 allocator.free(self.s3); 71 allocator.free(self.s3);
72} 72}
73 73
74/// Lookup the grapheme break property for a code point. 74/// Lookup the grapheme break property for a code point.
75pub fn gbp(self: Self, cp: u21) Gbp { 75pub inline fn gbp(self: Self, cp: u21) Gbp {
76 return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 4); 76 return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 4);
77} 77}
78 78
79/// Lookup the indic syllable type for a code point. 79/// Lookup the indic syllable type for a code point.
80pub fn indic(self: Self, cp: u21) Indic { 80pub inline fn indic(self: Self, cp: u21) Indic {
81 return @enumFromInt((self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7); 81 return @enumFromInt((self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7);
82} 82}
83 83
84/// Lookup the indic syllable type for a code point. 84/// Lookup the emoji property for a code point.
85pub fn isEmoji(self: Self, cp: u21) bool { 85pub inline fn isEmoji(self: Self, cp: u21) bool {
86 return self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1; 86 return self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1;
87} 87}
diff --git a/src/Normalize.zig b/src/Normalize.zig
index a28b708..b738b27 100644
--- a/src/Normalize.zig
+++ b/src/Normalize.zig
@@ -177,7 +177,7 @@ test "decompose" {
177 const allocator = testing.allocator; 177 const allocator = testing.allocator;
178 var data: NormData = undefined; 178 var data: NormData = undefined;
179 try NormData.init(&data, allocator); 179 try NormData.init(&data, allocator);
180 defer data.deinit(); 180 defer data.deinit(allocator);
181 var n = Self{ .norm_data = &data }; 181 var n = Self{ .norm_data = &data };
182 182
183 var buf: [18]u21 = undefined; 183 var buf: [18]u21 = undefined;
@@ -307,11 +307,11 @@ test "nfd ASCII / no-alloc" {
307 const allocator = testing.allocator; 307 const allocator = testing.allocator;
308 var data: NormData = undefined; 308 var data: NormData = undefined;
309 try NormData.init(&data, allocator); 309 try NormData.init(&data, allocator);
310 defer data.deinit(); 310 defer data.deinit(allocator);
311 const n = Self{ .norm_data = &data }; 311 const n = Self{ .norm_data = &data };
312 312
313 const result = try n.nfd(allocator, "Hello World!"); 313 const result = try n.nfd(allocator, "Hello World!");
314 defer result.deinit(); 314 defer result.deinit(allocator);
315 315
316 try testing.expectEqualStrings("Hello World!", result.slice); 316 try testing.expectEqualStrings("Hello World!", result.slice);
317} 317}
@@ -320,11 +320,11 @@ test "nfd !ASCII / alloc" {
320 const allocator = testing.allocator; 320 const allocator = testing.allocator;
321 var data: NormData = undefined; 321 var data: NormData = undefined;
322 try NormData.init(&data, allocator); 322 try NormData.init(&data, allocator);
323 defer data.deinit(); 323 defer data.deinit(allocator);
324 const n = Self{ .norm_data = &data }; 324 const n = Self{ .norm_data = &data };
325 325
326 const result = try n.nfd(allocator, "Héllo World! \u{3d3}"); 326 const result = try n.nfd(allocator, "Héllo World! \u{3d3}");
327 defer result.deinit(); 327 defer result.deinit(allocator);
328 328
329 try testing.expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", result.slice); 329 try testing.expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", result.slice);
330} 330}
@@ -333,11 +333,11 @@ test "nfkd ASCII / no-alloc" {
333 const allocator = testing.allocator; 333 const allocator = testing.allocator;
334 var data: NormData = undefined; 334 var data: NormData = undefined;
335 try NormData.init(&data, allocator); 335 try NormData.init(&data, allocator);
336 defer data.deinit(); 336 defer data.deinit(allocator);
337 const n = Self{ .norm_data = &data }; 337 const n = Self{ .norm_data = &data };
338 338
339 const result = try n.nfkd(allocator, "Hello World!"); 339 const result = try n.nfkd(allocator, "Hello World!");
340 defer result.deinit(); 340 defer result.deinit(allocator);
341 341
342 try testing.expectEqualStrings("Hello World!", result.slice); 342 try testing.expectEqualStrings("Hello World!", result.slice);
343} 343}
@@ -346,11 +346,11 @@ test "nfkd !ASCII / alloc" {
346 const allocator = testing.allocator; 346 const allocator = testing.allocator;
347 var data: NormData = undefined; 347 var data: NormData = undefined;
348 try NormData.init(&data, allocator); 348 try NormData.init(&data, allocator);
349 defer data.deinit(); 349 defer data.deinit(allocator);
350 const n = Self{ .norm_data = &data }; 350 const n = Self{ .norm_data = &data };
351 351
352 const result = try n.nfkd(allocator, "Héllo World! \u{3d3}"); 352 const result = try n.nfkd(allocator, "Héllo World! \u{3d3}");
353 defer result.deinit(); 353 defer result.deinit(allocator);
354 354
355 try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice); 355 try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice);
356} 356}
@@ -546,11 +546,11 @@ test "nfc" {
546 const allocator = testing.allocator; 546 const allocator = testing.allocator;
547 var data: NormData = undefined; 547 var data: NormData = undefined;
548 try NormData.init(&data, allocator); 548 try NormData.init(&data, allocator);
549 defer data.deinit(); 549 defer data.deinit(allocator);
550 const n = Self{ .norm_data = &data }; 550 const n = Self{ .norm_data = &data };
551 551
552 const result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); 552 const result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}");
553 defer result.deinit(); 553 defer result.deinit(allocator);
554 554
555 try testing.expectEqualStrings("Complex char: \u{3D3}", result.slice); 555 try testing.expectEqualStrings("Complex char: \u{3D3}", result.slice);
556} 556}
@@ -559,11 +559,11 @@ test "nfkc" {
559 const allocator = testing.allocator; 559 const allocator = testing.allocator;
560 var data: NormData = undefined; 560 var data: NormData = undefined;
561 try NormData.init(&data, allocator); 561 try NormData.init(&data, allocator);
562 defer data.deinit(); 562 defer data.deinit(allocator);
563 const n = Self{ .norm_data = &data }; 563 const n = Self{ .norm_data = &data };
564 564
565 const result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); 565 const result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}");
566 defer result.deinit(); 566 defer result.deinit(allocator);
567 567
568 try testing.expectEqualStrings("Complex char: \u{038E}", result.slice); 568 try testing.expectEqualStrings("Complex char: \u{038E}", result.slice);
569} 569}
@@ -582,7 +582,7 @@ test "eql" {
582 const allocator = testing.allocator; 582 const allocator = testing.allocator;
583 var data: NormData = undefined; 583 var data: NormData = undefined;
584 try NormData.init(&data, allocator); 584 try NormData.init(&data, allocator);
585 defer data.deinit(); 585 defer data.deinit(allocator);
586 const n = Self{ .norm_data = &data }; 586 const n = Self{ .norm_data = &data };
587 587
588 try testing.expect(try n.eql(allocator, "foé", "foe\u{0301}")); 588 try testing.expect(try n.eql(allocator, "foé", "foe\u{0301}"));
@@ -628,5 +628,4 @@ test "isLatin1Only" {
628 try testing.expect(isLatin1Only(latin1_only)); 628 try testing.expect(isLatin1Only(latin1_only));
629 const not_latin1_only = "Héllo, World! \u{3d3}"; 629 const not_latin1_only = "Héllo, World! \u{3d3}";
630 try testing.expect(!isLatin1Only(not_latin1_only)); 630 try testing.expect(!isLatin1Only(not_latin1_only));
631 try testing.expect(false);
632} 631}
diff --git a/src/WidthData.zig b/src/WidthData.zig
index b07a679..ca7eaf0 100644
--- a/src/WidthData.zig
+++ b/src/WidthData.zig
@@ -4,15 +4,36 @@ const compress = std.compress;
4const mem = std.mem; 4const mem = std.mem;
5const testing = std.testing; 5const testing = std.testing;
6 6
7const GraphemeData = @import("GraphemeData"); 7const Graphemes = @import("Graphemes");
8 8
9g_data: GraphemeData, 9g_data: Graphemes,
10s1: []u16 = undefined, 10s1: []u16 = undefined,
11s2: []i4 = undefined, 11s2: []i4 = undefined,
12owns_gdata: bool,
12 13
13const Self = @This(); 14const Self = @This();
14 15
15pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self { 16pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self {
17 var self: Self = try Self.setup(allocator);
18 errdefer {
19 allocator.free(self.s1);
20 allocator.free(self.s2);
21 }
22 self.owns_gdata = true;
23 self.g_data = try Graphemes.init(allocator);
24 errdefer self.g_data.deinit(allocator);
25 return self;
26}
27
28pub fn initWithGraphemeData(allocator: mem.Allocator, g_data: Graphemes) mem.Allocator.Error!Self {
29 var self = try Self.setup(allocator);
30 self.g_data = g_data;
31 self.owns_gdata = false;
32 return self;
33}
34
35// Sets up the DisplayWidthData, leaving the GraphemeData undefined.
36fn setup(allocator: mem.Allocator) mem.Allocator.Error!Self {
16 const decompressor = compress.flate.inflate.decompressor; 37 const decompressor = compress.flate.inflate.decompressor;
17 const in_bytes = @embedFile("dwp"); 38 const in_bytes = @embedFile("dwp");
18 var in_fbs = std.io.fixedBufferStream(in_bytes); 39 var in_fbs = std.io.fixedBufferStream(in_bytes);
@@ -21,10 +42,7 @@ pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self {
21 42
22 const endian = builtin.cpu.arch.endian(); 43 const endian = builtin.cpu.arch.endian();
23 44
24 var self = Self{ 45 var self: Self = undefined;
25 .g_data = try GraphemeData.init(allocator),
26 };
27 errdefer self.g_data.deinit(allocator);
28 46
29 const stage_1_len: u16 = reader.readInt(u16, endian) catch unreachable; 47 const stage_1_len: u16 = reader.readInt(u16, endian) catch unreachable;
30 self.s1 = try allocator.alloc(u16, stage_1_len); 48 self.s1 = try allocator.alloc(u16, stage_1_len);
@@ -42,7 +60,7 @@ pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self {
42pub fn deinit(self: *const Self, allocator: mem.Allocator) void { 60pub fn deinit(self: *const Self, allocator: mem.Allocator) void {
43 allocator.free(self.s1); 61 allocator.free(self.s1);
44 allocator.free(self.s2); 62 allocator.free(self.s2);
45 self.g_data.deinit(allocator); 63 if (self.owns_gdata) self.g_data.deinit(allocator);
46} 64}
47 65
48/// codePointWidth returns the number of cells `cp` requires when rendered 66/// codePointWidth returns the number of cells `cp` requires when rendered
diff --git a/src/grapheme.zig b/src/grapheme.zig
index 25fd71d..79cd2c6 100644
--- a/src/grapheme.zig
+++ b/src/grapheme.zig
@@ -1,10 +1,99 @@
1const std = @import("std"); 1const std = @import("std");
2const builtin = @import("builtin");
2const mem = std.mem; 3const mem = std.mem;
4const Allocator = mem.Allocator;
5const compress = std.compress;
3const unicode = std.unicode; 6const unicode = std.unicode;
4 7
5const CodePoint = @import("code_point").CodePoint; 8const CodePoint = @import("code_point").CodePoint;
6const CodePointIterator = @import("code_point").Iterator; 9const CodePointIterator = @import("code_point").Iterator;
7pub const GraphemeData = @import("GraphemeData"); 10
11s1: []u16 = undefined,
12s2: []u16 = undefined,
13s3: []u8 = undefined,
14
15const Graphemes = @This();
16
17pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Graphemes {
18 const decompressor = compress.flate.inflate.decompressor;
19 const in_bytes = @embedFile("gbp");
20 var in_fbs = std.io.fixedBufferStream(in_bytes);
21 var in_decomp = decompressor(.raw, in_fbs.reader());
22 var reader = in_decomp.reader();
23
24 const endian = builtin.cpu.arch.endian();
25
26 var self = Graphemes{};
27
28 const s1_len: u16 = reader.readInt(u16, endian) catch unreachable;
29 self.s1 = try allocator.alloc(u16, s1_len);
30 errdefer allocator.free(self.s1);
31 for (0..s1_len) |i| self.s1[i] = reader.readInt(u16, endian) catch unreachable;
32
33 const s2_len: u16 = reader.readInt(u16, endian) catch unreachable;
34 self.s2 = try allocator.alloc(u16, s2_len);
35 errdefer allocator.free(self.s2);
36 for (0..s2_len) |i| self.s2[i] = reader.readInt(u16, endian) catch unreachable;
37
38 const s3_len: u16 = reader.readInt(u16, endian) catch unreachable;
39 self.s3 = try allocator.alloc(u8, s3_len);
40 errdefer allocator.free(self.s3);
41 _ = reader.readAll(self.s3) catch unreachable;
42
43 return self;
44}
45
46pub fn deinit(graphemes: *const Graphemes, allocator: mem.Allocator) void {
47 allocator.free(graphemes.s1);
48 allocator.free(graphemes.s2);
49 allocator.free(graphemes.s3);
50}
51
52/// Lookup the grapheme break property for a code point.
53pub fn gbp(graphemes: Graphemes, cp: u21) Gbp {
54 return @enumFromInt(graphemes.s3[graphemes.s2[graphemes.s1[cp >> 8] + (cp & 0xff)]] >> 4);
55}
56
57/// Lookup the indic syllable type for a code point.
58pub fn indic(graphemes: Graphemes, cp: u21) Indic {
59 return @enumFromInt((graphemes.s3[graphemes.s2[graphemes.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7);
60}
61
62/// Lookup the emoji property for a code point.
63pub fn isEmoji(graphemes: Graphemes, cp: u21) bool {
64 return graphemes.s3[graphemes.s2[graphemes.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1;
65}
66
67pub fn iterator(graphemes: *const Graphemes, string: []const u8) Iterator {
68 return Iterator.init(string, graphemes);
69}
70
71/// Indic syllable type.
72pub const Indic = enum {
73 none,
74
75 Consonant,
76 Extend,
77 Linker,
78};
79
80/// Grapheme break property.
81pub const Gbp = enum {
82 none,
83 Control,
84 CR,
85 Extend,
86 L,
87 LF,
88 LV,
89 LVT,
90 Prepend,
91 Regional_Indicator,
92 SpacingMark,
93 T,
94 V,
95 ZWJ,
96};
8 97
9/// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes. 98/// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes.
10pub const Grapheme = struct { 99pub const Grapheme = struct {
@@ -22,12 +111,12 @@ pub const Grapheme = struct {
22pub const Iterator = struct { 111pub const Iterator = struct {
23 buf: [2]?CodePoint = .{ null, null }, 112 buf: [2]?CodePoint = .{ null, null },
24 cp_iter: CodePointIterator, 113 cp_iter: CodePointIterator,
25 data: *const GraphemeData, 114 data: *const Graphemes,
26 115
27 const Self = @This(); 116 const Self = @This();
28 117
29 /// Assumes `src` is valid UTF-8. 118 /// Assumes `src` is valid UTF-8.
30 pub fn init(str: []const u8, data: *const GraphemeData) Self { 119 pub fn init(str: []const u8, data: *const Graphemes) Self {
31 var self = Self{ .cp_iter = .{ .bytes = str }, .data = data }; 120 var self = Self{ .cp_iter = .{ .bytes = str }, .data = data };
32 self.advance(); 121 self.advance();
33 return self; 122 return self;
@@ -149,7 +238,7 @@ pub const Iterator = struct {
149}; 238};
150 239
151// Predicates 240// Predicates
152fn isBreaker(cp: u21, data: *const GraphemeData) bool { 241fn isBreaker(cp: u21, data: *const Graphemes) bool {
153 // Extract relevant properties. 242 // Extract relevant properties.
154 const cp_gbp_prop = data.gbp(cp); 243 const cp_gbp_prop = data.gbp(cp);
155 return cp == '\x0d' or cp == '\x0a' or cp_gbp_prop == .Control; 244 return cp == '\x0d' or cp == '\x0a' or cp_gbp_prop == .Control;
@@ -202,7 +291,7 @@ pub const State = struct {
202pub fn graphemeBreak( 291pub fn graphemeBreak(
203 cp1: u21, 292 cp1: u21,
204 cp2: u21, 293 cp2: u21,
205 data: *const GraphemeData, 294 data: *const Graphemes,
206 state: *State, 295 state: *State,
207) bool { 296) bool {
208 // Extract relevant properties. 297 // Extract relevant properties.
@@ -306,25 +395,25 @@ test "Segmentation ZWJ and ZWSP emoji sequences" {
306 const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2; 395 const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2;
307 const no_joiner = seq_1 ++ seq_2; 396 const no_joiner = seq_1 ++ seq_2;
308 397
309 const data = try GraphemeData.init(std.testing.allocator); 398 const graphemes = try Graphemes.init(std.testing.allocator);
310 defer data.deinit(std.testing.allocator); 399 defer graphemes.deinit(std.testing.allocator);
311 400
312 { 401 {
313 var iter = Iterator.init(with_zwj, &data); 402 var iter = graphemes.iterator(with_zwj);
314 var i: usize = 0; 403 var i: usize = 0;
315 while (iter.next()) |_| : (i += 1) {} 404 while (iter.next()) |_| : (i += 1) {}
316 try std.testing.expectEqual(@as(usize, 1), i); 405 try std.testing.expectEqual(@as(usize, 1), i);
317 } 406 }
318 407
319 { 408 {
320 var iter = Iterator.init(with_zwsp, &data); 409 var iter = graphemes.iterator(with_zwsp);
321 var i: usize = 0; 410 var i: usize = 0;
322 while (iter.next()) |_| : (i += 1) {} 411 while (iter.next()) |_| : (i += 1) {}
323 try std.testing.expectEqual(@as(usize, 3), i); 412 try std.testing.expectEqual(@as(usize, 3), i);
324 } 413 }
325 414
326 { 415 {
327 var iter = Iterator.init(no_joiner, &data); 416 var iter = graphemes.iterator(no_joiner);
328 var i: usize = 0; 417 var i: usize = 0;
329 while (iter.next()) |_| : (i += 1) {} 418 while (iter.next()) |_| : (i += 1) {}
330 try std.testing.expectEqual(@as(usize, 2), i); 419 try std.testing.expectEqual(@as(usize, 2), i);
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig
index 7236ff6..de1b9ec 100644
--- a/src/unicode_tests.zig
+++ b/src/unicode_tests.zig
@@ -9,7 +9,7 @@ const unicode = std.unicode;
9 9
10const grapheme = @import("grapheme"); 10const grapheme = @import("grapheme");
11const Grapheme = @import("grapheme").Grapheme; 11const Grapheme = @import("grapheme").Grapheme;
12const GraphemeData = @import("grapheme").GraphemeData; 12const Graphemes = @import("grapheme");
13const GraphemeIterator = @import("grapheme").Iterator; 13const GraphemeIterator = @import("grapheme").Iterator;
14const Normalize = @import("Normalize"); 14const Normalize = @import("Normalize");
15 15
@@ -18,10 +18,10 @@ comptime {
18} 18}
19test "Iterator.peek" { 19test "Iterator.peek" {
20 const peek_seq = "aΔ👨🏻‍🌾→"; 20 const peek_seq = "aΔ👨🏻‍🌾→";
21 const data = try GraphemeData.init(std.testing.allocator); 21 const data = try Graphemes.init(std.testing.allocator);
22 defer data.deinit(std.testing.allocator); 22 defer data.deinit(std.testing.allocator);
23 23
24 var iter = grapheme.Iterator.init(peek_seq, &data); 24 var iter = data.iterator(peek_seq);
25 const peek_a = iter.peek().?; 25 const peek_a = iter.peek().?;
26 const next_a = iter.next().?; 26 const next_a = iter.next().?;
27 try std.testing.expectEqual(peek_a, next_a); 27 try std.testing.expectEqual(peek_a, next_a);
@@ -162,7 +162,7 @@ test "Segmentation GraphemeIterator" {
162 var buf_reader = std.io.bufferedReader(file.reader()); 162 var buf_reader = std.io.bufferedReader(file.reader());
163 var input_stream = buf_reader.reader(); 163 var input_stream = buf_reader.reader();
164 164
165 const data = try GraphemeData.init(allocator); 165 const data = try Graphemes.init(allocator);
166 defer data.deinit(allocator); 166 defer data.deinit(allocator);
167 167
168 var buf: [4096]u8 = undefined; 168 var buf: [4096]u8 = undefined;
@@ -207,7 +207,7 @@ test "Segmentation GraphemeIterator" {
207 } 207 }
208 208
209 // std.debug.print("\nline {}: {s}\n", .{ line_no, all_bytes.items }); 209 // std.debug.print("\nline {}: {s}\n", .{ line_no, all_bytes.items });
210 var iter = GraphemeIterator.init(all_bytes.items, &data); 210 var iter = data.iterator(all_bytes.items);
211 211
212 // Chaeck. 212 // Chaeck.
213 for (want.items) |want_gc| { 213 for (want.items) |want_gc| {