summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-02-18 17:04:22 -0400
committerGravatar Jose Colon Rodriguez2024-02-18 17:04:22 -0400
commit4eb86edf5dea66edebdc41ab2cccf3bf30295520 (patch)
tree05845ade9a04353bbbaaef35f4840d6d4ebca15d /src
parentUsing argsWithAllocator for Windows (diff)
downloadzg-4eb86edf5dea66edebdc41ab2cccf3bf30295520.tar.gz
zg-4eb86edf5dea66edebdc41ab2cccf3bf30295520.tar.xz
zg-4eb86edf5dea66edebdc41ab2cccf3bf30295520.zip
center, padLeft, padRight, and wrap
Diffstat (limited to 'src')
-rw-r--r--src/ascii.zig33
-rw-r--r--src/display_width.zig293
2 files changed, 281 insertions, 45 deletions
diff --git a/src/ascii.zig b/src/ascii.zig
new file mode 100644
index 0000000..6c28f25
--- /dev/null
+++ b/src/ascii.zig
@@ -0,0 +1,33 @@
1const std = @import("std");
2const simd = std.simd;
3const testing = std.testing;
4
5/// Returns true if `str` only contains ASCII bytes. Uses SIMD if possible.
6pub fn isAsciiOnly(str: []const u8) bool {
7 const vec_len = simd.suggestVectorLength(u8) orelse return for (str) |b| {
8 if (b > 127) break false;
9 } else true;
10
11 const Vec = @Vector(vec_len, u8);
12 var remaining = str;
13
14 while (true) {
15 if (remaining.len < vec_len) return for (remaining) |b| {
16 if (b > 127) break false;
17 } else true;
18
19 const v1 = remaining[0..vec_len].*;
20 const v2: Vec = @splat(127);
21 if (@reduce(.Or, v1 > v2)) return false;
22 remaining = remaining[vec_len..];
23 }
24
25 return true;
26}
27
28test "isAsciiOnly" {
29 const ascii_only = "Hello, World! 0123456789 !@#$%^&*()_-=+";
30 try testing.expect(isAsciiOnly(ascii_only));
31 const not_ascii_only = "Héllo, World! 0123456789 !@#$%^&*()_-=+";
32 try testing.expect(!isAsciiOnly(not_ascii_only));
33}
diff --git a/src/display_width.zig b/src/display_width.zig
index 71483ca..a916cac 100644
--- a/src/display_width.zig
+++ b/src/display_width.zig
@@ -1,10 +1,12 @@
1const std = @import("std"); 1const std = @import("std");
2const simd = std.simd; 2const simd = std.simd;
3const mem = std.mem;
3const testing = std.testing; 4const testing = std.testing;
4 5
6const ascii = @import("ascii");
5const CodePointIterator = @import("code_point").Iterator; 7const CodePointIterator = @import("code_point").Iterator;
6const GraphemeIterator = @import("grapheme").Iterator;
7const dwp = @import("dwp"); 8const dwp = @import("dwp");
9const GraphemeIterator = @import("grapheme").Iterator;
8 10
9/// codePointWidth returns the number of cells `cp` requires when rendered 11/// codePointWidth returns the number of cells `cp` requires when rendered
10/// in a fixed-pitch font (i.e. a terminal screen). This can range from -1 to 12/// in a fixed-pitch font (i.e. a terminal screen). This can range from -1 to
@@ -15,27 +17,31 @@ pub fn codePointWidth(cp: u21) i3 {
15 return dwp.stage_2[dwp.stage_1[cp >> 8] + (cp & 0xff)]; 17 return dwp.stage_2[dwp.stage_1[cp >> 8] + (cp & 0xff)];
16} 18}
17 19
18// Returns true if `str` only contains ASCII bytes. Uses SIMD if possible. 20test "codePointWidth" {
19fn isAsciiOnly(str: []const u8) bool { 21 try testing.expectEqual(@as(i3, 0), codePointWidth(0x0000)); // null
20 const vec_len = simd.suggestVectorLength(u8) orelse return for (str) |b| { 22 try testing.expectEqual(@as(i3, -1), codePointWidth(0x8)); // \b
21 if (b > 127) break false; 23 try testing.expectEqual(@as(i3, -1), codePointWidth(0x7f)); // DEL
22 } else true; 24 try testing.expectEqual(@as(i3, 0), codePointWidth(0x0005)); // Cf
25 try testing.expectEqual(@as(i3, 0), codePointWidth(0x0007)); // \a BEL
26 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000A)); // \n LF
27 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000B)); // \v VT
28 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000C)); // \f FF
29 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000D)); // \r CR
30 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000E)); // SQ
31 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000F)); // SI
23 32
24 const Vec = @Vector(vec_len, u8); 33 try testing.expectEqual(@as(i3, 0), codePointWidth(0x070F)); // Cf
25 var remaining = str; 34 try testing.expectEqual(@as(i3, 1), codePointWidth(0x0603)); // Cf Arabic
26 35
27 while (true) { 36 try testing.expectEqual(@as(i3, 1), codePointWidth(0x00AD)); // soft-hyphen
28 if (remaining.len < vec_len) return for (remaining) |b| { 37 try testing.expectEqual(@as(i3, 2), codePointWidth(0x2E3A)); // two-em dash
29 if (b > 127) break false; 38 try testing.expectEqual(@as(i3, 3), codePointWidth(0x2E3B)); // three-em dash
30 } else true;
31 39
32 const v1 = remaining[0..vec_len].*; 40 try testing.expectEqual(@as(i3, 1), codePointWidth(0x00BD)); // ambiguous halfwidth
33 const v2: Vec = @splat(127);
34 if (@reduce(.Or, v1 > v2)) return false;
35 remaining = remaining[vec_len..];
36 }
37 41
38 return true; 42 try testing.expectEqual(@as(i3, 1), codePointWidth('é'));
43 try testing.expectEqual(@as(i3, 2), codePointWidth('😊'));
44 try testing.expectEqual(@as(i3, 2), codePointWidth('统'));
39} 45}
40 46
41/// strWidth returns the total display width of `str` as the number of cells 47/// strWidth returns the total display width of `str` as the number of cells
@@ -44,7 +50,7 @@ pub fn strWidth(str: []const u8) usize {
44 var total: isize = 0; 50 var total: isize = 0;
45 51
46 // ASCII fast path 52 // ASCII fast path
47 if (isAsciiOnly(str)) { 53 if (ascii.isAsciiOnly(str)) {
48 for (str) |b| total += codePointWidth(b); 54 for (str) |b| total += codePointWidth(b);
49 return @intCast(@max(0, total)); 55 return @intCast(@max(0, total));
50 } 56 }
@@ -79,32 +85,7 @@ pub fn strWidth(str: []const u8) usize {
79 return @intCast(@max(0, total)); 85 return @intCast(@max(0, total));
80} 86}
81 87
82test "display_width Width" { 88test "strWidth" {
83 try testing.expectEqual(@as(i3, 0), codePointWidth(0x0000)); // null
84 try testing.expectEqual(@as(i3, -1), codePointWidth(0x8)); // \b
85 try testing.expectEqual(@as(i3, -1), codePointWidth(0x7f)); // DEL
86 try testing.expectEqual(@as(i3, 0), codePointWidth(0x0005)); // Cf
87 try testing.expectEqual(@as(i3, 0), codePointWidth(0x0007)); // \a BEL
88 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000A)); // \n LF
89 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000B)); // \v VT
90 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000C)); // \f FF
91 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000D)); // \r CR
92 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000E)); // SQ
93 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000F)); // SI
94
95 try testing.expectEqual(@as(i3, 0), codePointWidth(0x070F)); // Cf
96 try testing.expectEqual(@as(i3, 1), codePointWidth(0x0603)); // Cf Arabic
97
98 try testing.expectEqual(@as(i3, 1), codePointWidth(0x00AD)); // soft-hyphen
99 try testing.expectEqual(@as(i3, 2), codePointWidth(0x2E3A)); // two-em dash
100 try testing.expectEqual(@as(i3, 3), codePointWidth(0x2E3B)); // three-em dash
101
102 try testing.expectEqual(@as(i3, 1), codePointWidth(0x00BD)); // ambiguous halfwidth
103
104 try testing.expectEqual(@as(i3, 1), codePointWidth('é'));
105 try testing.expectEqual(@as(i3, 2), codePointWidth('😊'));
106 try testing.expectEqual(@as(i3, 2), codePointWidth('统'));
107
108 try testing.expectEqual(@as(usize, 5), strWidth("Hello\r\n")); 89 try testing.expectEqual(@as(usize, 5), strWidth("Hello\r\n"));
109 try testing.expectEqual(@as(usize, 1), strWidth("\u{0065}\u{0301}")); 90 try testing.expectEqual(@as(usize, 1), strWidth("\u{0065}\u{0301}"));
110 try testing.expectEqual(@as(usize, 2), strWidth("\u{1F476}\u{1F3FF}\u{0308}\u{200D}\u{1F476}\u{1F3FF}")); 91 try testing.expectEqual(@as(usize, 2), strWidth("\u{1F476}\u{1F3FF}\u{0308}\u{200D}\u{1F476}\u{1F3FF}"));
@@ -155,3 +136,225 @@ test "display_width Width" {
155 try testing.expectEqual(@as(usize, 17), strWidth("슬라바 우크라이나")); 136 try testing.expectEqual(@as(usize, 17), strWidth("슬라바 우크라이나"));
156 try testing.expectEqual(@as(usize, 1), strWidth("\u{378}")); 137 try testing.expectEqual(@as(usize, 1), strWidth("\u{378}"));
157} 138}
139
140/// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding.
141/// If the length of `str` and `total_width` have different parity, the right side of `str` will
142/// receive one additional pad. This makes sure the returned string fills the requested width.
143/// Caller must free returned bytes with `allocator`.
144pub fn center(
145 allocator: mem.Allocator,
146 str: []const u8,
147 total_width: usize,
148 pad: []const u8,
149) ![]u8 {
150 const str_width = strWidth(str);
151 if (str_width > total_width) return error.StrTooLong;
152 if (str_width == total_width) return try allocator.dupe(u8, str);
153
154 const pad_width = strWidth(pad);
155 if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
156
157 const margin_width = @divFloor((total_width - str_width), 2);
158 if (pad_width > margin_width) return error.PadTooLong;
159 const extra_pad: usize = if (total_width % 2 != str_width % 2) 1 else 0;
160 const pads = @divFloor(margin_width, pad_width) * 2 + extra_pad;
161
162 var result = try allocator.alloc(u8, pads * pad.len + str.len);
163 var bytes_index: usize = 0;
164 var pads_index: usize = 0;
165
166 while (pads_index < pads / 2) : (pads_index += 1) {
167 @memcpy(result[bytes_index..][0..pad.len], pad);
168 bytes_index += pad.len;
169 }
170
171 @memcpy(result[bytes_index..][0..str.len], str);
172 bytes_index += str.len;
173
174 pads_index = 0;
175 while (pads_index < pads / 2 + extra_pad) : (pads_index += 1) {
176 @memcpy(result[bytes_index..][0..pad.len], pad);
177 bytes_index += pad.len;
178 }
179
180 return result;
181}
182
183test "center" {
184 var allocator = std.testing.allocator;
185
186 // Input and width both have odd length
187 var centered = try center(allocator, "abc", 9, "*");
188 try testing.expectEqualSlices(u8, "***abc***", centered);
189
190 // Input and width both have even length
191 allocator.free(centered);
192 centered = try center(allocator, "w😊w", 10, "-");
193 try testing.expectEqualSlices(u8, "---w😊w---", centered);
194
195 // Input has even length, width has odd length
196 allocator.free(centered);
197 centered = try center(allocator, "1234", 9, "-");
198 try testing.expectEqualSlices(u8, "--1234---", centered);
199
200 // Input has odd length, width has even length
201 allocator.free(centered);
202 centered = try center(allocator, "123", 8, "-");
203 try testing.expectEqualSlices(u8, "--123---", centered);
204
205 // Input is the same length as the width
206 allocator.free(centered);
207 centered = try center(allocator, "123", 3, "-");
208 try testing.expectEqualSlices(u8, "123", centered);
209
210 // Input is empty
211 allocator.free(centered);
212 centered = try center(allocator, "", 3, "-");
213 try testing.expectEqualSlices(u8, "---", centered);
214
215 // Input is empty and width is zero
216 allocator.free(centered);
217 centered = try center(allocator, "", 0, "-");
218 try testing.expectEqualSlices(u8, "", centered);
219
220 // Input is longer than the width, which is an error
221 allocator.free(centered);
222 try testing.expectError(error.StrTooLong, center(allocator, "123", 2, "-"));
223}
224
225/// padLeft returns a new string of width `total_width` (in display cells) using `pad` as padding
226/// on the left side. Caller must free returned bytes with `allocator`.
227pub fn padLeft(
228 allocator: std.mem.Allocator,
229 str: []const u8,
230 total_width: usize,
231 pad: []const u8,
232) ![]u8 {
233 const str_width = strWidth(str);
234 if (str_width > total_width) return error.StrTooLong;
235
236 const pad_width = strWidth(pad);
237 if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
238
239 const margin_width = total_width - str_width;
240 if (pad_width > margin_width) return error.PadTooLong;
241
242 const pads = @divFloor(margin_width, pad_width);
243
244 var result = try allocator.alloc(u8, pads * pad.len + str.len);
245 var bytes_index: usize = 0;
246 var pads_index: usize = 0;
247
248 while (pads_index < pads) : (pads_index += 1) {
249 @memcpy(result[bytes_index..][0..pad.len], pad);
250 bytes_index += pad.len;
251 }
252
253 @memcpy(result[bytes_index..][0..str.len], str);
254
255 return result;
256}
257
258test "padLeft" {
259 var allocator = std.testing.allocator;
260
261 var right_aligned = try padLeft(allocator, "abc", 9, "*");
262 defer allocator.free(right_aligned);
263 try testing.expectEqualSlices(u8, "******abc", right_aligned);
264
265 allocator.free(right_aligned);
266 right_aligned = try padLeft(allocator, "w😊w", 10, "-");
267 try testing.expectEqualSlices(u8, "------w😊w", right_aligned);
268}
269
270/// padRight returns a new string of width `total_width` (in display cells) using `pad` as padding
271/// on the right side. Caller must free returned bytes with `allocator`.
272pub fn padRight(
273 allocator: std.mem.Allocator,
274 str: []const u8,
275 total_width: usize,
276 pad: []const u8,
277) ![]u8 {
278 const str_width = strWidth(str);
279 if (str_width > total_width) return error.StrTooLong;
280
281 const pad_width = strWidth(pad);
282 if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong;
283
284 const margin_width = total_width - str_width;
285 if (pad_width > margin_width) return error.PadTooLong;
286
287 const pads = @divFloor(margin_width, pad_width);
288
289 var result = try allocator.alloc(u8, pads * pad.len + str.len);
290 var bytes_index: usize = 0;
291 var pads_index: usize = 0;
292
293 @memcpy(result[bytes_index..][0..str.len], str);
294 bytes_index += str.len;
295
296 while (pads_index < pads) : (pads_index += 1) {
297 @memcpy(result[bytes_index..][0..pad.len], pad);
298 bytes_index += pad.len;
299 }
300
301 return result;
302}
303
304test "padRight" {
305 var allocator = std.testing.allocator;
306
307 var left_aligned = try padRight(allocator, "abc", 9, "*");
308 defer allocator.free(left_aligned);
309 try testing.expectEqualSlices(u8, "abc******", left_aligned);
310
311 allocator.free(left_aligned);
312 left_aligned = try padRight(allocator, "w😊w", 10, "-");
313 try testing.expectEqualSlices(u8, "w😊w------", left_aligned);
314}
315
316/// Wraps a string approximately at the given number of colums per line.
317/// `threshold` defines how far the last column of the last word can be
318/// from the edge. Caller must free returned bytes with `allocator`.
319pub fn wrap(
320 allocator: std.mem.Allocator,
321 str: []const u8,
322 columns: usize,
323 threshold: usize,
324) ![]u8 {
325 var result = std.ArrayList(u8).init(allocator);
326 defer result.deinit();
327
328 var line_iter = mem.tokenizeAny(u8, str, "\r\n");
329 var line_width: usize = 0;
330
331 while (line_iter.next()) |line| {
332 var word_iter = mem.tokenizeScalar(u8, line, ' ');
333
334 while (word_iter.next()) |word| {
335 try result.appendSlice(word);
336 try result.append(' ');
337 line_width += strWidth(word) + 1;
338
339 if (line_width > columns or columns - line_width <= threshold) {
340 try result.append('\n');
341 line_width = 0;
342 }
343 }
344 }
345
346 // Remove trailing space and newline.
347 _ = result.pop();
348 _ = result.pop();
349
350 return try result.toOwnedSlice();
351}
352
353test "wrap" {
354 var allocator = std.testing.allocator;
355 const input = "The quick brown fox\r\njumped over the lazy dog!";
356 const got = try wrap(allocator, input, 10, 3);
357 defer allocator.free(got);
358 const want = "The quick \nbrown fox \njumped \nover the \nlazy dog!";
359 try testing.expectEqualStrings(want, got);
360}