summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Sam Atman2025-04-30 12:02:17 -0400
committerGravatar Sam Atman2025-04-30 12:02:17 -0400
commit7a212f5ec5aabf016d17d3ed28649e7982b810ef (patch)
treec6b06b0a0afb0ed2ba18f147d9ee200e5eee09a1 /src
parentFactor out 'Data' for grapheme and DisplayWidth (diff)
downloadzg-7a212f5ec5aabf016d17d3ed28649e7982b810ef.tar.gz
zg-7a212f5ec5aabf016d17d3ed28649e7982b810ef.tar.xz
zg-7a212f5ec5aabf016d17d3ed28649e7982b810ef.zip
grapheme now Graphemes, Data files gone
Diffstat (limited to 'src')
-rw-r--r--src/GraphemeData.zig87
-rw-r--r--src/Graphemes.zig (renamed from src/grapheme.zig)0
-rw-r--r--src/WidthData.zig102
-rw-r--r--src/unicode_tests.zig8
4 files changed, 4 insertions, 193 deletions
diff --git a/src/GraphemeData.zig b/src/GraphemeData.zig
deleted file mode 100644
index df025cb..0000000
--- a/src/GraphemeData.zig
+++ /dev/null
@@ -1,87 +0,0 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5
6/// Indic syllable type.
7pub const Indic = enum {
8 none,
9
10 Consonant,
11 Extend,
12 Linker,
13};
14
15/// Grapheme break property.
16pub const Gbp = enum {
17 none,
18 Control,
19 CR,
20 Extend,
21 L,
22 LF,
23 LV,
24 LVT,
25 Prepend,
26 Regional_Indicator,
27 SpacingMark,
28 T,
29 V,
30 ZWJ,
31};
32
33s1: []u16 = undefined,
34s2: []u16 = undefined,
35s3: []u8 = undefined,
36
37const Self = @This();
38
39pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Self {
40 const decompressor = compress.flate.inflate.decompressor;
41 const in_bytes = @embedFile("gbp");
42 var in_fbs = std.io.fixedBufferStream(in_bytes);
43 var in_decomp = decompressor(.raw, in_fbs.reader());
44 var reader = in_decomp.reader();
45
46 const endian = builtin.cpu.arch.endian();
47
48 var self = Self{};
49
50 const s1_len: u16 = reader.readInt(u16, endian) catch unreachable;
51 self.s1 = try allocator.alloc(u16, s1_len);
52 errdefer allocator.free(self.s1);
53 for (0..s1_len) |i| self.s1[i] = reader.readInt(u16, endian) catch unreachable;
54
55 const s2_len: u16 = reader.readInt(u16, endian) catch unreachable;
56 self.s2 = try allocator.alloc(u16, s2_len);
57 errdefer allocator.free(self.s2);
58 for (0..s2_len) |i| self.s2[i] = reader.readInt(u16, endian) catch unreachable;
59
60 const s3_len: u16 = reader.readInt(u16, endian) catch unreachable;
61 self.s3 = try allocator.alloc(u8, s3_len);
62 errdefer allocator.free(self.s3);
63 _ = reader.readAll(self.s3) catch unreachable;
64
65 return self;
66}
67
68pub inline fn deinit(self: *const Self, allocator: mem.Allocator) void {
69 allocator.free(self.s1);
70 allocator.free(self.s2);
71 allocator.free(self.s3);
72}
73
74/// Lookup the grapheme break property for a code point.
75pub inline fn gbp(self: Self, cp: u21) Gbp {
76 return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 4);
77}
78
79/// Lookup the indic syllable type for a code point.
80pub inline fn indic(self: Self, cp: u21) Indic {
81 return @enumFromInt((self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7);
82}
83
84/// Lookup the emoji property for a code point.
85pub inline fn isEmoji(self: Self, cp: u21) bool {
86 return self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1;
87}
diff --git a/src/grapheme.zig b/src/Graphemes.zig
index 79cd2c6..79cd2c6 100644
--- a/src/grapheme.zig
+++ b/src/Graphemes.zig
diff --git a/src/WidthData.zig b/src/WidthData.zig
deleted file mode 100644
index ca7eaf0..0000000
--- a/src/WidthData.zig
+++ /dev/null
@@ -1,102 +0,0 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5const testing = std.testing;
6
7const Graphemes = @import("Graphemes");
8
9g_data: Graphemes,
10s1: []u16 = undefined,
11s2: []i4 = undefined,
12owns_gdata: bool,
13
14const Self = @This();
15
16pub fn init(allocator: mem.Allocator) mem.Allocator.Error!Self {
17 var self: Self = try Self.setup(allocator);
18 errdefer {
19 allocator.free(self.s1);
20 allocator.free(self.s2);
21 }
22 self.owns_gdata = true;
23 self.g_data = try Graphemes.init(allocator);
24 errdefer self.g_data.deinit(allocator);
25 return self;
26}
27
28pub fn initWithGraphemeData(allocator: mem.Allocator, g_data: Graphemes) mem.Allocator.Error!Self {
29 var self = try Self.setup(allocator);
30 self.g_data = g_data;
31 self.owns_gdata = false;
32 return self;
33}
34
35// Sets up the DisplayWidthData, leaving the GraphemeData undefined.
36fn setup(allocator: mem.Allocator) mem.Allocator.Error!Self {
37 const decompressor = compress.flate.inflate.decompressor;
38 const in_bytes = @embedFile("dwp");
39 var in_fbs = std.io.fixedBufferStream(in_bytes);
40 var in_decomp = decompressor(.raw, in_fbs.reader());
41 var reader = in_decomp.reader();
42
43 const endian = builtin.cpu.arch.endian();
44
45 var self: Self = undefined;
46
47 const stage_1_len: u16 = reader.readInt(u16, endian) catch unreachable;
48 self.s1 = try allocator.alloc(u16, stage_1_len);
49 errdefer allocator.free(self.s1);
50 for (0..stage_1_len) |i| self.s1[i] = reader.readInt(u16, endian) catch unreachable;
51
52 const stage_2_len: u16 = reader.readInt(u16, endian) catch unreachable;
53 self.s2 = try allocator.alloc(i4, stage_2_len);
54 errdefer allocator.free(self.s2);
55 for (0..stage_2_len) |i| self.s2[i] = @intCast(reader.readInt(i8, endian) catch unreachable);
56
57 return self;
58}
59
60pub fn deinit(self: *const Self, allocator: mem.Allocator) void {
61 allocator.free(self.s1);
62 allocator.free(self.s2);
63 if (self.owns_gdata) self.g_data.deinit(allocator);
64}
65
66/// codePointWidth returns the number of cells `cp` requires when rendered
67/// in a fixed-pitch font (i.e. a terminal screen). This can range from -1 to
68/// 3, where BACKSPACE and DELETE return -1 and 3-em-dash returns 3. C0/C1
69/// control codes return 0. If `cjk` is true, ambiguous code points return 2,
70/// otherwise they return 1.
71pub fn codePointWidth(self: Self, cp: u21) i4 {
72 return self.s2[self.s1[cp >> 8] + (cp & 0xff)];
73}
74
75test "codePointWidth" {
76 const wd = try Self.init(std.testing.allocator);
77 defer wd.deinit(std.testing.allocator);
78 try testing.expectEqual(@as(i4, 0), wd.codePointWidth(0x0000)); // null
79 try testing.expectEqual(@as(i4, -1), wd.codePointWidth(0x8)); // \b
80 try testing.expectEqual(@as(i4, -1), wd.codePointWidth(0x7f)); // DEL
81 try testing.expectEqual(@as(i4, 0), wd.codePointWidth(0x0005)); // Cf
82 try testing.expectEqual(@as(i4, 0), wd.codePointWidth(0x0007)); // \a BEL
83 try testing.expectEqual(@as(i4, 0), wd.codePointWidth(0x000A)); // \n LF
84 try testing.expectEqual(@as(i4, 0), wd.codePointWidth(0x000B)); // \v VT
85 try testing.expectEqual(@as(i4, 0), wd.codePointWidth(0x000C)); // \f FF
86 try testing.expectEqual(@as(i4, 0), wd.codePointWidth(0x000D)); // \r CR
87 try testing.expectEqual(@as(i4, 0), wd.codePointWidth(0x000E)); // SQ
88 try testing.expectEqual(@as(i4, 0), wd.codePointWidth(0x000F)); // SI
89
90 try testing.expectEqual(@as(i4, 0), wd.codePointWidth(0x070F)); // Cf
91 try testing.expectEqual(@as(i4, 1), wd.codePointWidth(0x0603)); // Cf Arabic
92
93 try testing.expectEqual(@as(i4, 1), wd.codePointWidth(0x00AD)); // soft-hyphen
94 try testing.expectEqual(@as(i4, 2), wd.codePointWidth(0x2E3A)); // two-em dash
95 try testing.expectEqual(@as(i4, 3), wd.codePointWidth(0x2E3B)); // three-em dash
96
97 try testing.expectEqual(@as(i4, 1), wd.codePointWidth(0x00BD)); // ambiguous halfwidth
98
99 try testing.expectEqual(@as(i4, 1), wd.codePointWidth('é'));
100 try testing.expectEqual(@as(i4, 2), wd.codePointWidth('😊'));
101 try testing.expectEqual(@as(i4, 2), wd.codePointWidth('统'));
102}
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig
index de1b9ec..3cb5df5 100644
--- a/src/unicode_tests.zig
+++ b/src/unicode_tests.zig
@@ -7,10 +7,10 @@ const mem = std.mem;
7const testing = std.testing; 7const testing = std.testing;
8const unicode = std.unicode; 8const unicode = std.unicode;
9 9
10const grapheme = @import("grapheme"); 10const grapheme = @import("Graphemes");
11const Grapheme = @import("grapheme").Grapheme; 11const Grapheme = @import("Graphemes").Grapheme;
12const Graphemes = @import("grapheme"); 12const Graphemes = @import("Graphemes");
13const GraphemeIterator = @import("grapheme").Iterator; 13const GraphemeIterator = @import("Graphemes").Iterator;
14const Normalize = @import("Normalize"); 14const Normalize = @import("Normalize");
15 15
16comptime { 16comptime {