summaryrefslogtreecommitdiff
path: root/src/WidthData.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/WidthData.zig')
-rw-r--r--src/WidthData.zig82
1 files changed, 82 insertions, 0 deletions
diff --git a/src/WidthData.zig b/src/WidthData.zig
new file mode 100644
index 0000000..32f8658
--- /dev/null
+++ b/src/WidthData.zig
@@ -0,0 +1,82 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5const testing = std.testing;
6
7const GraphemeData = @import("GraphemeData");
8
9allocator: mem.Allocator,
10g_data: GraphemeData,
11s1: []u16 = undefined,
12s2: []i3 = undefined,
13
14const Self = @This();
15
16pub fn init(allocator: mem.Allocator) !Self {
17 const decompressor = compress.deflate.decompressor;
18 const in_bytes = @embedFile("dwp");
19 var in_fbs = std.io.fixedBufferStream(in_bytes);
20 var in_decomp = try decompressor(allocator, in_fbs.reader(), null);
21 defer in_decomp.deinit();
22 var reader = in_decomp.reader();
23
24 const endian = builtin.cpu.arch.endian();
25
26 var self = Self{
27 .allocator = allocator,
28 .g_data = try GraphemeData.init(allocator),
29 };
30
31 const stage_1_len: u16 = try reader.readInt(u16, endian);
32 self.s1 = try allocator.alloc(u16, stage_1_len);
33 for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
34
35 const stage_2_len: u16 = try reader.readInt(u16, endian);
36 self.s2 = try allocator.alloc(i3, stage_2_len);
37 for (0..stage_2_len) |i| self.s2[i] = @intCast(try reader.readInt(i8, endian));
38
39 return self;
40}
41
42pub fn deinit(self: *Self) void {
43 self.allocator.free(self.s1);
44 self.allocator.free(self.s2);
45 self.g_data.deinit();
46}
47
48/// codePointWidth returns the number of cells `cp` requires when rendered
49/// in a fixed-pitch font (i.e. a terminal screen). This can range from -1 to
50/// 3, where BACKSPACE and DELETE return -1 and 3-em-dash returns 3. C0/C1
51/// control codes return 0. If `cjk` is true, ambiguous code points return 2,
52/// otherwise they return 1.
53pub inline fn codePointWidth(self: Self, cp: u21) i3 {
54 return self.s2[self.s1[cp >> 8] + (cp & 0xff)];
55}
56
57test "codePointWidth" {
58 try testing.expectEqual(@as(i3, 0), codePointWidth(0x0000)); // null
59 try testing.expectEqual(@as(i3, -1), codePointWidth(0x8)); // \b
60 try testing.expectEqual(@as(i3, -1), codePointWidth(0x7f)); // DEL
61 try testing.expectEqual(@as(i3, 0), codePointWidth(0x0005)); // Cf
62 try testing.expectEqual(@as(i3, 0), codePointWidth(0x0007)); // \a BEL
63 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000A)); // \n LF
64 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000B)); // \v VT
65 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000C)); // \f FF
66 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000D)); // \r CR
67 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000E)); // SQ
68 try testing.expectEqual(@as(i3, 0), codePointWidth(0x000F)); // SI
69
70 try testing.expectEqual(@as(i3, 0), codePointWidth(0x070F)); // Cf
71 try testing.expectEqual(@as(i3, 1), codePointWidth(0x0603)); // Cf Arabic
72
73 try testing.expectEqual(@as(i3, 1), codePointWidth(0x00AD)); // soft-hyphen
74 try testing.expectEqual(@as(i3, 2), codePointWidth(0x2E3A)); // two-em dash
75 try testing.expectEqual(@as(i3, 3), codePointWidth(0x2E3B)); // three-em dash
76
77 try testing.expectEqual(@as(i3, 1), codePointWidth(0x00BD)); // ambiguous halfwidth
78
79 try testing.expectEqual(@as(i3, 1), codePointWidth('é'));
80 try testing.expectEqual(@as(i3, 2), codePointWidth('😊'));
81 try testing.expectEqual(@as(i3, 2), codePointWidth('统'));
82}