summaryrefslogtreecommitdiff
path: root/src/DisplayWidth.zig
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-02-26 12:24:42 -0400
committerGravatar Jose Colon Rodriguez2024-02-26 12:24:42 -0400
commit836a4b6e63ac4bd7beb406cb20edf23f0bd342a9 (patch)
tree5f806a29594a9cb227aaa4d131209e10ff25aeee /src/DisplayWidth.zig
parentReplaced ccc_map with table. 20ms faster (diff)
downloadzg-836a4b6e63ac4bd7beb406cb20edf23f0bd342a9.tar.gz
zg-836a4b6e63ac4bd7beb406cb20edf23f0bd342a9.tar.xz
zg-836a4b6e63ac4bd7beb406cb20edf23f0bd342a9.zip
Using separate data struct model.
Diffstat (limited to 'src/DisplayWidth.zig')
-rw-r--r--src/DisplayWidth.zig351
1 files changed, 351 insertions, 0 deletions
diff --git a/src/DisplayWidth.zig b/src/DisplayWidth.zig
new file mode 100644
index 0000000..85d04a0
--- /dev/null
+++ b/src/DisplayWidth.zig
@@ -0,0 +1,351 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const ArrayList = std.ArrayList;
4const mem = std.mem;
5const simd = std.simd;
6const testing = std.testing;
7
8const ascii = @import("ascii");
9const CodePointIterator = @import("code_point").Iterator;
10const GraphemeIterator = @import("grapheme").Iterator;
11pub const Data = @import("DisplayWidthData");
12
13data: *Data,
14
15const Self = @This();
16
17/// strWidth returns the total display width of `str` as the number of cells
18/// required in a fixed-pitch font (i.e. a terminal screen).
19pub fn strWidth(self: Self, str: []const u8) usize {
20 var total: isize = 0;
21
22 // ASCII fast path
23 if (ascii.isAsciiOnly(str)) {
24 for (str) |b| total += self.data.codePointWidth(b);
25 return @intCast(@max(0, total));
26 }
27
28 var giter = GraphemeIterator.init(str, &self.data.g_data);
29
30 while (giter.next()) |gc| {
31 var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) };
32 var gc_total: isize = 0;
33
34 while (cp_iter.next()) |cp| {
35 var w = self.data.codePointWidth(cp.code);
36
37 if (w != 0) {
38 // Handle text emoji sequence.
39 if (cp_iter.next()) |ncp| {
40 // emoji text sequence.
41 if (ncp.code == 0xFE0E) w = 1;
42 }
43
44 // Only adding width of first non-zero-width code point.
45 if (gc_total == 0) {
46 gc_total = w;
47 break;
48 }
49 }
50 }
51
52 total += gc_total;
53 }
54
55 return @intCast(@max(0, total));
56}
57
58test "strWidth" {
59 var data = try Data.init(testing.allocator);
60 defer data.deinit();
61 const self = Self{ .data = &data };
62
63 try testing.expectEqual(@as(usize, 5), self.strWidth("Hello\r\n"));
64 try testing.expectEqual(@as(usize, 1), self.strWidth("\u{0065}\u{0301}"));
65 try testing.expectEqual(@as(usize, 2), self.strWidth("\u{1F476}\u{1F3FF}\u{0308}\u{200D}\u{1F476}\u{1F3FF}"));
66 try testing.expectEqual(@as(usize, 8), self.strWidth("Hello 😊"));
67 try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo 😊"));
68 try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo :)"));
69 try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo 🇪🇸"));
70 try testing.expectEqual(@as(usize, 2), self.strWidth("\u{26A1}")); // Lone emoji
71 try testing.expectEqual(@as(usize, 1), self.strWidth("\u{26A1}\u{FE0E}")); // Text sequence
72 try testing.expectEqual(@as(usize, 2), self.strWidth("\u{26A1}\u{FE0F}")); // Presentation sequence
73 try testing.expectEqual(@as(usize, 0), self.strWidth("A\x08")); // Backspace
74 try testing.expectEqual(@as(usize, 0), self.strWidth("\x7FA")); // DEL
75 try testing.expectEqual(@as(usize, 0), self.strWidth("\x7FA\x08\x08")); // never less than o
76
77 // wcwidth Python lib tests. See: https://github.com/jquast/wcwidth/blob/master/tests/test_core.py
78 const empty = "";
79 try testing.expectEqual(@as(usize, 0), self.strWidth(empty));
80 const with_null = "hello\x00world";
81 try testing.expectEqual(@as(usize, 10), self.strWidth(with_null));
82 const hello_jp = "コンニチハ, セカイ!";
83 try testing.expectEqual(@as(usize, 19), self.strWidth(hello_jp));
84 const control = "\x1b[0m";
85 try testing.expectEqual(@as(usize, 3), self.strWidth(control));
86 const balinese = "\u{1B13}\u{1B28}\u{1B2E}\u{1B44}";
87 try testing.expectEqual(@as(usize, 3), self.strWidth(balinese));
88
89 // These commented out tests require a new specification for complex scripts.
90 // See: https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
91 // const jamo = "\u{1100}\u{1160}";
92 // try testing.expectEqual(@as(usize, 3), strWidth(jamo));
93 // const devengari = "\u{0915}\u{094D}\u{0937}\u{093F}";
94 // try testing.expectEqual(@as(usize, 3), strWidth(devengari));
95 // const tamal = "\u{0b95}\u{0bcd}\u{0bb7}\u{0bcc}";
96 // try testing.expectEqual(@as(usize, 5), strWidth(tamal));
97 // const kannada_1 = "\u{0cb0}\u{0ccd}\u{0c9d}\u{0cc8}";
98 // try testing.expectEqual(@as(usize, 3), strWidth(kannada_1));
99 // The following passes but as a mere coincidence.
100 const kannada_2 = "\u{0cb0}\u{0cbc}\u{0ccd}\u{0c9a}";
101 try testing.expectEqual(@as(usize, 2), self.strWidth(kannada_2));
102
103 // From Rust https://github.com/jameslanska/unicode-display-width
104 try testing.expectEqual(@as(usize, 15), self.strWidth("🔥🗡🍩👩🏻‍🚀⏰💃🏼🔦👍🏻"));
105 try testing.expectEqual(@as(usize, 2), self.strWidth("🦀"));
106 try testing.expectEqual(@as(usize, 2), self.strWidth("👨‍👩‍👧‍👧"));
107 try testing.expectEqual(@as(usize, 2), self.strWidth("👩‍🔬"));
108 try testing.expectEqual(@as(usize, 9), self.strWidth("sane text"));
109 try testing.expectEqual(@as(usize, 9), self.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ"));
110 try testing.expectEqual(@as(usize, 17), self.strWidth("슬라바 우크라이나"));
111 try testing.expectEqual(@as(usize, 1), self.strWidth("\u{378}"));
112}
113
114/// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding.
115/// If the length of `str` and `total_width` have different parity, the right side of `str` will
116/// receive one additional pad. This makes sure the returned string fills the requested width.
117/// Caller must free returned bytes with `allocator`.
118pub fn center(
119 self: Self,
120 allocator: mem.Allocator,
121 str: []const u8,
122 total_width: usize,
123 pad: []const u8,
124) ![]u8 {
125 const str_width = self.strWidth(str);
126 if (str_width > total_width) return error.StrTooLong;
127 if (str_width == total_width) return try allocator.dupe(u8, str);
128
129 const pad_width = self.strWidth(pad);
130 if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
131
132 const margin_width = @divFloor((total_width - str_width), 2);
133 if (pad_width > margin_width) return error.PadTooLong;
134 const extra_pad: usize = if (total_width % 2 != str_width % 2) 1 else 0;
135 const pads = @divFloor(margin_width, pad_width) * 2 + extra_pad;
136
137 var result = try allocator.alloc(u8, pads * pad.len + str.len);
138 var bytes_index: usize = 0;
139 var pads_index: usize = 0;
140
141 while (pads_index < pads / 2) : (pads_index += 1) {
142 @memcpy(result[bytes_index..][0..pad.len], pad);
143 bytes_index += pad.len;
144 }
145
146 @memcpy(result[bytes_index..][0..str.len], str);
147 bytes_index += str.len;
148
149 pads_index = 0;
150 while (pads_index < pads / 2 + extra_pad) : (pads_index += 1) {
151 @memcpy(result[bytes_index..][0..pad.len], pad);
152 bytes_index += pad.len;
153 }
154
155 return result;
156}
157
158test "center" {
159 const allocator = testing.allocator;
160 var data = try Data.init(allocator);
161 defer data.deinit();
162 const self = Self{ .data = &data };
163
164 // Input and width both have odd length
165 var centered = try self.center(allocator, "abc", 9, "*");
166 try testing.expectEqualSlices(u8, "***abc***", centered);
167
168 // Input and width both have even length
169 testing.allocator.free(centered);
170 centered = try self.center(allocator, "w😊w", 10, "-");
171 try testing.expectEqualSlices(u8, "---w😊w---", centered);
172
173 // Input has even length, width has odd length
174 testing.allocator.free(centered);
175 centered = try self.center(allocator, "1234", 9, "-");
176 try testing.expectEqualSlices(u8, "--1234---", centered);
177
178 // Input has odd length, width has even length
179 testing.allocator.free(centered);
180 centered = try self.center(allocator, "123", 8, "-");
181 try testing.expectEqualSlices(u8, "--123---", centered);
182
183 // Input is the same length as the width
184 testing.allocator.free(centered);
185 centered = try self.center(allocator, "123", 3, "-");
186 try testing.expectEqualSlices(u8, "123", centered);
187
188 // Input is empty
189 testing.allocator.free(centered);
190 centered = try self.center(allocator, "", 3, "-");
191 try testing.expectEqualSlices(u8, "---", centered);
192
193 // Input is empty and width is zero
194 testing.allocator.free(centered);
195 centered = try self.center(allocator, "", 0, "-");
196 try testing.expectEqualSlices(u8, "", centered);
197
198 // Input is longer than the width, which is an error
199 testing.allocator.free(centered);
200 try testing.expectError(error.StrTooLong, self.center(allocator, "123", 2, "-"));
201}
202
203/// padLeft returns a new string of width `total_width` (in display cells) using `pad` as padding
204/// on the left side. Caller must free returned bytes with `allocator`.
205pub fn padLeft(
206 self: Self,
207 allocator: mem.Allocator,
208 str: []const u8,
209 total_width: usize,
210 pad: []const u8,
211) ![]u8 {
212 const str_width = self.strWidth(str);
213 if (str_width > total_width) return error.StrTooLong;
214
215 const pad_width = self.strWidth(pad);
216 if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
217
218 const margin_width = total_width - str_width;
219 if (pad_width > margin_width) return error.PadTooLong;
220
221 const pads = @divFloor(margin_width, pad_width);
222
223 var result = try allocator.alloc(u8, pads * pad.len + str.len);
224 var bytes_index: usize = 0;
225 var pads_index: usize = 0;
226
227 while (pads_index < pads) : (pads_index += 1) {
228 @memcpy(result[bytes_index..][0..pad.len], pad);
229 bytes_index += pad.len;
230 }
231
232 @memcpy(result[bytes_index..][0..str.len], str);
233
234 return result;
235}
236
237test "padLeft" {
238 const allocator = testing.allocator;
239 var data = try Data.init(allocator);
240 defer data.deinit();
241 const self = Self{ .data = &data };
242
243 var right_aligned = try self.padLeft(allocator, "abc", 9, "*");
244 defer testing.allocator.free(right_aligned);
245 try testing.expectEqualSlices(u8, "******abc", right_aligned);
246
247 testing.allocator.free(right_aligned);
248 right_aligned = try self.padLeft(allocator, "w😊w", 10, "-");
249 try testing.expectEqualSlices(u8, "------w😊w", right_aligned);
250}
251
252/// padRight returns a new string of width `total_width` (in display cells) using `pad` as padding
253/// on the right side. Caller must free returned bytes with `allocator`.
254pub fn padRight(
255 self: Self,
256 allocator: mem.Allocator,
257 str: []const u8,
258 total_width: usize,
259 pad: []const u8,
260) ![]u8 {
261 const str_width = self.strWidth(str);
262 if (str_width > total_width) return error.StrTooLong;
263
264 const pad_width = self.strWidth(pad);
265 if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
266
267 const margin_width = total_width - str_width;
268 if (pad_width > margin_width) return error.PadTooLong;
269
270 const pads = @divFloor(margin_width, pad_width);
271
272 var result = try allocator.alloc(u8, pads * pad.len + str.len);
273 var bytes_index: usize = 0;
274 var pads_index: usize = 0;
275
276 @memcpy(result[bytes_index..][0..str.len], str);
277 bytes_index += str.len;
278
279 while (pads_index < pads) : (pads_index += 1) {
280 @memcpy(result[bytes_index..][0..pad.len], pad);
281 bytes_index += pad.len;
282 }
283
284 return result;
285}
286
287test "padRight" {
288 const allocator = testing.allocator;
289 var data = try Data.init(allocator);
290 defer data.deinit();
291 const self = Self{ .data = &data };
292
293 var left_aligned = try self.padRight(allocator, "abc", 9, "*");
294 defer testing.allocator.free(left_aligned);
295 try testing.expectEqualSlices(u8, "abc******", left_aligned);
296
297 testing.allocator.free(left_aligned);
298 left_aligned = try self.padRight(allocator, "w😊w", 10, "-");
299 try testing.expectEqualSlices(u8, "w😊w------", left_aligned);
300}
301
302/// Wraps a string approximately at the given number of colums per line.
303/// `threshold` defines how far the last column of the last word can be
304/// from the edge. Caller must free returned bytes with `allocator`.
305pub fn wrap(
306 self: Self,
307 allocator: mem.Allocator,
308 str: []const u8,
309 columns: usize,
310 threshold: usize,
311) ![]u8 {
312 var result = ArrayList(u8).init(allocator);
313 defer result.deinit();
314
315 var line_iter = mem.tokenizeAny(u8, str, "\r\n");
316 var line_width: usize = 0;
317
318 while (line_iter.next()) |line| {
319 var word_iter = mem.tokenizeScalar(u8, line, ' ');
320
321 while (word_iter.next()) |word| {
322 try result.appendSlice(word);
323 try result.append(' ');
324 line_width += self.strWidth(word) + 1;
325
326 if (line_width > columns or columns - line_width <= threshold) {
327 try result.append('\n');
328 line_width = 0;
329 }
330 }
331 }
332
333 // Remove trailing space and newline.
334 _ = result.pop();
335 _ = result.pop();
336
337 return try result.toOwnedSlice();
338}
339
340test "wrap" {
341 const allocator = testing.allocator;
342 var data = try Data.init(allocator);
343 defer data.deinit();
344 const self = Self{ .data = &data };
345
346 const input = "The quick brown fox\r\njumped over the lazy dog!";
347 const got = try self.wrap(allocator, input, 10, 3);
348 defer testing.allocator.free(got);
349 const want = "The quick \nbrown fox \njumped \nover the \nlazy dog!";
350 try testing.expectEqualStrings(want, got);
351}