summaryrefslogtreecommitdiff
path: root/src/display_width.zig
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-02-26 12:24:42 -0400
committerGravatar Jose Colon Rodriguez2024-02-26 12:24:42 -0400
commit836a4b6e63ac4bd7beb406cb20edf23f0bd342a9 (patch)
tree5f806a29594a9cb227aaa4d131209e10ff25aeee /src/display_width.zig
parentReplaced ccc_map with table. 20ms faster (diff)
downloadzg-836a4b6e63ac4bd7beb406cb20edf23f0bd342a9.tar.gz
zg-836a4b6e63ac4bd7beb406cb20edf23f0bd342a9.tar.xz
zg-836a4b6e63ac4bd7beb406cb20edf23f0bd342a9.zip
Using separate data struct model.
Diffstat (limited to 'src/display_width.zig')
-rw-r--r--src/display_width.zig360
1 files changed, 0 insertions, 360 deletions
diff --git a/src/display_width.zig b/src/display_width.zig
deleted file mode 100644
index a916cac..0000000
--- a/src/display_width.zig
+++ /dev/null
@@ -1,360 +0,0 @@
1const std = @import("std");
2const simd = std.simd;
3const mem = std.mem;
4const testing = std.testing;
5
6const ascii = @import("ascii");
7const CodePointIterator = @import("code_point").Iterator;
8const dwp = @import("dwp");
9const GraphemeIterator = @import("grapheme").Iterator;
10
11/// codePointWidth returns the number of cells `cp` requires when rendered
12/// in a fixed-pitch font (i.e. a terminal screen). This can range from -1 to
13/// 3, where BACKSPACE and DELETE return -1 and 3-em-dash returns 3. C0/C1
14/// control codes return 0. If `cjk` is true, ambiguous code points return 2,
15/// otherwise they return 1.
16pub fn codePointWidth(cp: u21) i3 {
17 return dwp.stage_2[dwp.stage_1[cp >> 8] + (cp & 0xff)];
18}
19
20test "codePointWidth" {
21 try testing.expectEqual(@as(i3, 0), codePointWidth(0x0000)); // null
22 try testing.expectEqual(@as(i3, -1), codePointWidth(0x8)); // \b
23 try testing.expectEqual(@as(i3, -1), codePointWidth(0x7f)); // DEL
24 try testing.expectEqual(@as(i3, 0), codePointWidth(0x0005)); // Cf
25 try testing.expectEqual(@as(i3, 0), codePointWidth(0x0007)); // \a BEL
26 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000A)); // \n LF
27 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000B)); // \v VT
28 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000C)); // \f FF
29 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000D)); // \r CR
30 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000E)); // SQ
31 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000F)); // SI
32
33 try testing.expectEqual(@as(i3, 0), codePointWidth(0x070F)); // Cf
34 try testing.expectEqual(@as(i3, 1), codePointWidth(0x0603)); // Cf Arabic
35
36 try testing.expectEqual(@as(i3, 1), codePointWidth(0x00AD)); // soft-hyphen
37 try testing.expectEqual(@as(i3, 2), codePointWidth(0x2E3A)); // two-em dash
38 try testing.expectEqual(@as(i3, 3), codePointWidth(0x2E3B)); // three-em dash
39
40 try testing.expectEqual(@as(i3, 1), codePointWidth(0x00BD)); // ambiguous halfwidth
41
42 try testing.expectEqual(@as(i3, 1), codePointWidth('é'));
43 try testing.expectEqual(@as(i3, 2), codePointWidth('😊'));
44 try testing.expectEqual(@as(i3, 2), codePointWidth('统'));
45}
46
47/// strWidth returns the total display width of `str` as the number of cells
48/// required in a fixed-pitch font (i.e. a terminal screen).
49pub fn strWidth(str: []const u8) usize {
50 var total: isize = 0;
51
52 // ASCII fast path
53 if (ascii.isAsciiOnly(str)) {
54 for (str) |b| total += codePointWidth(b);
55 return @intCast(@max(0, total));
56 }
57
58 var giter = GraphemeIterator.init(str);
59
60 while (giter.next()) |gc| {
61 var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) };
62 var gc_total: isize = 0;
63
64 while (cp_iter.next()) |cp| {
65 var w = codePointWidth(cp.code);
66
67 if (w != 0) {
68 // Handle text emoji sequence.
69 if (cp_iter.next()) |ncp| {
70 // emoji text sequence.
71 if (ncp.code == 0xFE0E) w = 1;
72 }
73
74 // Only adding width of first non-zero-width code point.
75 if (gc_total == 0) {
76 gc_total = w;
77 break;
78 }
79 }
80 }
81
82 total += gc_total;
83 }
84
85 return @intCast(@max(0, total));
86}
87
88test "strWidth" {
89 try testing.expectEqual(@as(usize, 5), strWidth("Hello\r\n"));
90 try testing.expectEqual(@as(usize, 1), strWidth("\u{0065}\u{0301}"));
91 try testing.expectEqual(@as(usize, 2), strWidth("\u{1F476}\u{1F3FF}\u{0308}\u{200D}\u{1F476}\u{1F3FF}"));
92 try testing.expectEqual(@as(usize, 8), strWidth("Hello 😊"));
93 try testing.expectEqual(@as(usize, 8), strWidth("Héllo 😊"));
94 try testing.expectEqual(@as(usize, 8), strWidth("Héllo :)"));
95 try testing.expectEqual(@as(usize, 8), strWidth("Héllo 🇪🇸"));
96 try testing.expectEqual(@as(usize, 2), strWidth("\u{26A1}")); // Lone emoji
97 try testing.expectEqual(@as(usize, 1), strWidth("\u{26A1}\u{FE0E}")); // Text sequence
98 try testing.expectEqual(@as(usize, 2), strWidth("\u{26A1}\u{FE0F}")); // Presentation sequence
99 try testing.expectEqual(@as(usize, 0), strWidth("A\x08")); // Backspace
100 try testing.expectEqual(@as(usize, 0), strWidth("\x7FA")); // DEL
101 try testing.expectEqual(@as(usize, 0), strWidth("\x7FA\x08\x08")); // never less than o
102
103 // wcwidth Python lib tests. See: https://github.com/jquast/wcwidth/blob/master/tests/test_core.py
104 const empty = "";
105 try testing.expectEqual(@as(usize, 0), strWidth(empty));
106 const with_null = "hello\x00world";
107 try testing.expectEqual(@as(usize, 10), strWidth(with_null));
108 const hello_jp = "コンニチハ, セカイ!";
109 try testing.expectEqual(@as(usize, 19), strWidth(hello_jp));
110 const control = "\x1b[0m";
111 try testing.expectEqual(@as(usize, 3), strWidth(control));
112 const balinese = "\u{1B13}\u{1B28}\u{1B2E}\u{1B44}";
113 try testing.expectEqual(@as(usize, 3), strWidth(balinese));
114
115 // These commented out tests require a new specification for complex scripts.
116 // See: https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
117 // const jamo = "\u{1100}\u{1160}";
118 // try testing.expectEqual(@as(usize, 3), strWidth(jamo));
119 // const devengari = "\u{0915}\u{094D}\u{0937}\u{093F}";
120 // try testing.expectEqual(@as(usize, 3), strWidth(devengari));
121 // const tamal = "\u{0b95}\u{0bcd}\u{0bb7}\u{0bcc}";
122 // try testing.expectEqual(@as(usize, 5), strWidth(tamal));
123 // const kannada_1 = "\u{0cb0}\u{0ccd}\u{0c9d}\u{0cc8}";
124 // try testing.expectEqual(@as(usize, 3), strWidth(kannada_1));
125 // The following passes but as a mere coincidence.
126 const kannada_2 = "\u{0cb0}\u{0cbc}\u{0ccd}\u{0c9a}";
127 try testing.expectEqual(@as(usize, 2), strWidth(kannada_2));
128
129 // From Rust https://github.com/jameslanska/unicode-display-width
130 try testing.expectEqual(@as(usize, 15), strWidth("🔥🗡🍩👩🏻‍🚀⏰💃🏼🔦👍🏻"));
131 try testing.expectEqual(@as(usize, 2), strWidth("🦀"));
132 try testing.expectEqual(@as(usize, 2), strWidth("👨‍👩‍👧‍👧"));
133 try testing.expectEqual(@as(usize, 2), strWidth("👩‍🔬"));
134 try testing.expectEqual(@as(usize, 9), strWidth("sane text"));
135 try testing.expectEqual(@as(usize, 9), strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ"));
136 try testing.expectEqual(@as(usize, 17), strWidth("슬라바 우크라이나"));
137 try testing.expectEqual(@as(usize, 1), strWidth("\u{378}"));
138}
139
140/// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding.
141/// If the length of `str` and `total_width` have different parity, the right side of `str` will
142/// receive one additional pad. This makes sure the returned string fills the requested width.
143/// Caller must free returned bytes with `allocator`.
144pub fn center(
145 allocator: mem.Allocator,
146 str: []const u8,
147 total_width: usize,
148 pad: []const u8,
149) ![]u8 {
150 const str_width = strWidth(str);
151 if (str_width > total_width) return error.StrTooLong;
152 if (str_width == total_width) return try allocator.dupe(u8, str);
153
154 const pad_width = strWidth(pad);
155 if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
156
157 const margin_width = @divFloor((total_width - str_width), 2);
158 if (pad_width > margin_width) return error.PadTooLong;
159 const extra_pad: usize = if (total_width % 2 != str_width % 2) 1 else 0;
160 const pads = @divFloor(margin_width, pad_width) * 2 + extra_pad;
161
162 var result = try allocator.alloc(u8, pads * pad.len + str.len);
163 var bytes_index: usize = 0;
164 var pads_index: usize = 0;
165
166 while (pads_index < pads / 2) : (pads_index += 1) {
167 @memcpy(result[bytes_index..][0..pad.len], pad);
168 bytes_index += pad.len;
169 }
170
171 @memcpy(result[bytes_index..][0..str.len], str);
172 bytes_index += str.len;
173
174 pads_index = 0;
175 while (pads_index < pads / 2 + extra_pad) : (pads_index += 1) {
176 @memcpy(result[bytes_index..][0..pad.len], pad);
177 bytes_index += pad.len;
178 }
179
180 return result;
181}
182
183test "center" {
184 var allocator = std.testing.allocator;
185
186 // Input and width both have odd length
187 var centered = try center(allocator, "abc", 9, "*");
188 try testing.expectEqualSlices(u8, "***abc***", centered);
189
190 // Input and width both have even length
191 allocator.free(centered);
192 centered = try center(allocator, "w😊w", 10, "-");
193 try testing.expectEqualSlices(u8, "---w😊w---", centered);
194
195 // Input has even length, width has odd length
196 allocator.free(centered);
197 centered = try center(allocator, "1234", 9, "-");
198 try testing.expectEqualSlices(u8, "--1234---", centered);
199
200 // Input has odd length, width has even length
201 allocator.free(centered);
202 centered = try center(allocator, "123", 8, "-");
203 try testing.expectEqualSlices(u8, "--123---", centered);
204
205 // Input is the same length as the width
206 allocator.free(centered);
207 centered = try center(allocator, "123", 3, "-");
208 try testing.expectEqualSlices(u8, "123", centered);
209
210 // Input is empty
211 allocator.free(centered);
212 centered = try center(allocator, "", 3, "-");
213 try testing.expectEqualSlices(u8, "---", centered);
214
215 // Input is empty and width is zero
216 allocator.free(centered);
217 centered = try center(allocator, "", 0, "-");
218 try testing.expectEqualSlices(u8, "", centered);
219
220 // Input is longer than the width, which is an error
221 allocator.free(centered);
222 try testing.expectError(error.StrTooLong, center(allocator, "123", 2, "-"));
223}
224
225/// padLeft returns a new string of width `total_width` (in display cells) using `pad` as padding
226/// on the left side. Caller must free returned bytes with `allocator`.
227pub fn padLeft(
228 allocator: std.mem.Allocator,
229 str: []const u8,
230 total_width: usize,
231 pad: []const u8,
232) ![]u8 {
233 const str_width = strWidth(str);
234 if (str_width > total_width) return error.StrTooLong;
235
236 const pad_width = strWidth(pad);
237 if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
238
239 const margin_width = total_width - str_width;
240 if (pad_width > margin_width) return error.PadTooLong;
241
242 const pads = @divFloor(margin_width, pad_width);
243
244 var result = try allocator.alloc(u8, pads * pad.len + str.len);
245 var bytes_index: usize = 0;
246 var pads_index: usize = 0;
247
248 while (pads_index < pads) : (pads_index += 1) {
249 @memcpy(result[bytes_index..][0..pad.len], pad);
250 bytes_index += pad.len;
251 }
252
253 @memcpy(result[bytes_index..][0..str.len], str);
254
255 return result;
256}
257
258test "padLeft" {
259 var allocator = std.testing.allocator;
260
261 var right_aligned = try padLeft(allocator, "abc", 9, "*");
262 defer allocator.free(right_aligned);
263 try testing.expectEqualSlices(u8, "******abc", right_aligned);
264
265 allocator.free(right_aligned);
266 right_aligned = try padLeft(allocator, "w😊w", 10, "-");
267 try testing.expectEqualSlices(u8, "------w😊w", right_aligned);
268}
269
270/// padRight returns a new string of width `total_width` (in display cells) using `pad` as padding
271/// on the right side. Caller must free returned bytes with `allocator`.
272pub fn padRight(
273 allocator: std.mem.Allocator,
274 str: []const u8,
275 total_width: usize,
276 pad: []const u8,
277) ![]u8 {
278 const str_width = strWidth(str);
279 if (str_width > total_width) return error.StrTooLong;
280
281 const pad_width = strWidth(pad);
282 if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
283
284 const margin_width = total_width - str_width;
285 if (pad_width > margin_width) return error.PadTooLong;
286
287 const pads = @divFloor(margin_width, pad_width);
288
289 var result = try allocator.alloc(u8, pads * pad.len + str.len);
290 var bytes_index: usize = 0;
291 var pads_index: usize = 0;
292
293 @memcpy(result[bytes_index..][0..str.len], str);
294 bytes_index += str.len;
295
296 while (pads_index < pads) : (pads_index += 1) {
297 @memcpy(result[bytes_index..][0..pad.len], pad);
298 bytes_index += pad.len;
299 }
300
301 return result;
302}
303
304test "padRight" {
305 var allocator = std.testing.allocator;
306
307 var left_aligned = try padRight(allocator, "abc", 9, "*");
308 defer allocator.free(left_aligned);
309 try testing.expectEqualSlices(u8, "abc******", left_aligned);
310
311 allocator.free(left_aligned);
312 left_aligned = try padRight(allocator, "w😊w", 10, "-");
313 try testing.expectEqualSlices(u8, "w😊w------", left_aligned);
314}
315
316/// Wraps a string approximately at the given number of colums per line.
317/// `threshold` defines how far the last column of the last word can be
318/// from the edge. Caller must free returned bytes with `allocator`.
319pub fn wrap(
320 allocator: std.mem.Allocator,
321 str: []const u8,
322 columns: usize,
323 threshold: usize,
324) ![]u8 {
325 var result = std.ArrayList(u8).init(allocator);
326 defer result.deinit();
327
328 var line_iter = mem.tokenizeAny(u8, str, "\r\n");
329 var line_width: usize = 0;
330
331 while (line_iter.next()) |line| {
332 var word_iter = mem.tokenizeScalar(u8, line, ' ');
333
334 while (word_iter.next()) |word| {
335 try result.appendSlice(word);
336 try result.append(' ');
337 line_width += strWidth(word) + 1;
338
339 if (line_width > columns or columns - line_width <= threshold) {
340 try result.append('\n');
341 line_width = 0;
342 }
343 }
344 }
345
346 // Remove trailing space and newline.
347 _ = result.pop();
348 _ = result.pop();
349
350 return try result.toOwnedSlice();
351}
352
353test "wrap" {
354 var allocator = std.testing.allocator;
355 const input = "The quick brown fox\r\njumped over the lazy dog!";
356 const got = try wrap(allocator, input, 10, 3);
357 defer allocator.free(got);
358 const want = "The quick \nbrown fox \njumped \nover the \nlazy dog!";
359 try testing.expectEqualStrings(want, got);
360}