summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/display_width.zig111
-rw-r--r--src/main.zig14
2 files changed, 120 insertions, 5 deletions
diff --git a/src/display_width.zig b/src/display_width.zig
new file mode 100644
index 0000000..e06aa8f
--- /dev/null
+++ b/src/display_width.zig
@@ -0,0 +1,111 @@
1const std = @import("std");
2const testing = std.testing;
3
4const CodePointIterator = @import("CodePoint").CodePointIterator;
5const GraphemeIterator = @import("Grapheme").GraphemeIterator;
6const dwp = @import("dwp");
7
8/// codePointWidth returns the number of cells `cp` requires when rendered
9/// in a fixed-pitch font (i.e. a terminal screen). This can range from -1 to
10/// 3, where BACKSPACE and DELETE return -1 and 3-em-dash returns 3. C0/C1
11/// control codes return 0. If `cjk` is true, ambiguous code points return 2,
12/// otherwise they return 1.
13pub fn codePointWidth(cp: u21) i3 {
14 return dwp.stage_2[dwp.stage_1[cp >> 8] + (cp & 0xff)];
15}
16
17fn strWidth(str: []const u8) usize {
18 var total: isize = 0;
19 var giter = GraphemeIterator.init(str);
20
21 while (giter.next()) |gc| {
22 var cp_iter = CodePointIterator{ .bytes = str[gc.offset..][0..gc.len] };
23 var gc_total: isize = 0;
24
25 while (cp_iter.next()) |cp| {
26 var w = codePointWidth(cp.code);
27
28 if (w != 0) {
29 // Handle text emoji sequence.
30 if (cp_iter.next()) |ncp| {
31 // emoji text sequence.
32 if (ncp.code == 0xFE0E) w = 1;
33 }
34
35 // Only adding width of first non-zero-width code point.
36 if (gc_total == 0) gc_total = w;
37 }
38 }
39
40 total += gc_total;
41 }
42
43 return if (total > 0) @intCast(total) else 0;
44}
45
46test "display_width Width" {
47 try testing.expectEqual(@as(i3, 0), codePointWidth(0x0000)); // null
48 try testing.expectEqual(@as(i3, -1), codePointWidth(0x8)); // \b
49 try testing.expectEqual(@as(i3, -1), codePointWidth(0x7f)); // DEL
50 try testing.expectEqual(@as(i3, 0), codePointWidth(0x0005)); // Cf
51 try testing.expectEqual(@as(i3, 0), codePointWidth(0x0007)); // \a BEL
52 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000A)); // \n LF
53 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000B)); // \v VT
54 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000C)); // \f FF
55 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000D)); // \r CR
56 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000E)); // SQ
57 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000F)); // SI
58
59 try testing.expectEqual(@as(i3, 0), codePointWidth(0x070F)); // Cf
60 try testing.expectEqual(@as(i3, 1), codePointWidth(0x0603)); // Cf Arabic
61
62 try testing.expectEqual(@as(i3, 1), codePointWidth(0x00AD)); // soft-hyphen
63 try testing.expectEqual(@as(i3, 2), codePointWidth(0x2E3A)); // two-em dash
64 try testing.expectEqual(@as(i3, 3), codePointWidth(0x2E3B)); // three-em dash
65
66 try testing.expectEqual(@as(i3, 1), codePointWidth(0x00BD)); // ambiguous halfwidth
67
68 try testing.expectEqual(@as(i3, 1), codePointWidth('é'));
69 try testing.expectEqual(@as(i3, 2), codePointWidth('😊'));
70 try testing.expectEqual(@as(i3, 2), codePointWidth('统'));
71
72 try testing.expectEqual(@as(usize, 5), strWidth("Hello\r\n"));
73 try testing.expectEqual(@as(usize, 1), strWidth("\u{0065}\u{0301}"));
74 try testing.expectEqual(@as(usize, 2), strWidth("\u{1F476}\u{1F3FF}\u{0308}\u{200D}\u{1F476}\u{1F3FF}"));
75 try testing.expectEqual(@as(usize, 8), strWidth("Hello 😊"));
76 try testing.expectEqual(@as(usize, 8), strWidth("Héllo 😊"));
77 try testing.expectEqual(@as(usize, 8), strWidth("Héllo :)"));
78 try testing.expectEqual(@as(usize, 8), strWidth("Héllo 🇪🇸"));
79 try testing.expectEqual(@as(usize, 2), strWidth("\u{26A1}")); // Lone emoji
80 try testing.expectEqual(@as(usize, 1), strWidth("\u{26A1}\u{FE0E}")); // Text sequence
81 try testing.expectEqual(@as(usize, 2), strWidth("\u{26A1}\u{FE0F}")); // Presentation sequence
82 try testing.expectEqual(@as(usize, 0), strWidth("A\x08")); // Backspace
83 try testing.expectEqual(@as(usize, 0), strWidth("\x7FA")); // DEL
84 try testing.expectEqual(@as(usize, 0), strWidth("\x7FA\x08\x08")); // never less than o
85
86 // wcwidth Python lib tests. See: https://github.com/jquast/wcwidth/blob/master/tests/test_core.py
87 const empty = "";
88 try testing.expectEqual(@as(usize, 0), strWidth(empty));
89 const with_null = "hello\x00world";
90 try testing.expectEqual(@as(usize, 10), strWidth(with_null));
91 const hello_jp = "コンニチハ, セカイ!";
92 try testing.expectEqual(@as(usize, 19), strWidth(hello_jp));
93 const control = "\x1b[0m";
94 try testing.expectEqual(@as(usize, 3), strWidth(control));
95 const balinese = "\u{1B13}\u{1B28}\u{1B2E}\u{1B44}";
96 try testing.expectEqual(@as(usize, 3), strWidth(balinese));
97
98 // These commented out tests require a new specification for complex scripts.
99 // See: https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
100 // const jamo = "\u{1100}\u{1160}";
101 // try testing.expectEqual(@as(usize, 3), strWidth(jamo));
102 // const devengari = "\u{0915}\u{094D}\u{0937}\u{093F}";
103 // try testing.expectEqual(@as(usize, 3), strWidth(devengari));
104 // const tamal = "\u{0b95}\u{0bcd}\u{0bb7}\u{0bcc}";
105 // try testing.expectEqual(@as(usize, 5), strWidth(tamal));
106 // const kannada_1 = "\u{0cb0}\u{0ccd}\u{0c9d}\u{0cc8}";
107 // try testing.expectEqual(@as(usize, 3), strWidth(kannada_1));
108 // The following passes but as a mere coincidence.
109 const kannada_2 = "\u{0cb0}\u{0cbc}\u{0ccd}\u{0c9a}";
110 try testing.expectEqual(@as(usize, 2), strWidth(kannada_2));
111}
diff --git a/src/main.zig b/src/main.zig
index fe49300..3e65c7b 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -1,7 +1,10 @@
1const std = @import("std"); 1const std = @import("std");
2 2
3// const GraphemeIterator = @import("ziglyph").GraphemeIterator; 3// const GraphemeIterator = @import("ziglyph").GraphemeIterator;
4const GraphemeIterator = @import("Grapheme").GraphemeIterator; 4// const GraphemeIterator = @import("Grapheme").GraphemeIterator;
5// const codePointWidth = @import("ziglyph").display_width.codePointWidth;
6const codePointWidth = @import("display_width").codePointWidth;
7const CodePointIterator = @import("CodePoint").CodePointIterator;
5 8
6pub fn main() !void { 9pub fn main() !void {
7 var gpa = std.heap.GeneralPurposeAllocator(.{}){}; 10 var gpa = std.heap.GeneralPurposeAllocator(.{}){};
@@ -11,14 +14,15 @@ pub fn main() !void {
11 const input = try std.fs.cwd().readFileAlloc(allocator, "lang_mix.txt", std.math.maxInt(u32)); 14 const input = try std.fs.cwd().readFileAlloc(allocator, "lang_mix.txt", std.math.maxInt(u32));
12 defer allocator.free(input); 15 defer allocator.free(input);
13 16
14 var result: usize = 0; 17 var result: isize = 0;
15 var iter = GraphemeIterator.init(input); 18 // var iter = GraphemeIterator.init(input);
19 var iter = CodePointIterator{ .bytes = input };
16 20
17 var timer = try std.time.Timer.start(); 21 var timer = try std.time.Timer.start();
18 22
19 // for (0..50) |_| { 23 // for (0..50) |_| {
20 while (iter.next()) |_| result += 1; 24 while (iter.next()) |cp| result += codePointWidth(@intCast(cp.code));
21 iter.cp_iter.i = 0; 25 // iter.cp_iter.i = 0;
22 // } 26 // }
23 27
24 std.debug.print("result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); 28 std.debug.print("result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms });