summaryrefslogtreecommitdiff
path: root/src/PropsData.zig
diff options
context:
space:
mode:
authorGravatar Sam Atman2025-04-30 15:32:34 -0400
committerGravatar Sam Atman2025-04-30 15:32:34 -0400
commit958c13ba442e7077a50d7163fdeb9bba378f95c2 (patch)
tree0727fd03ea2344ebbad842daa05b55ea0a143a6c /src/PropsData.zig
parentRemove FoldData, make CaseFolding (diff)
downloadzg-958c13ba442e7077a50d7163fdeb9bba378f95c2.tar.gz
zg-958c13ba442e7077a50d7163fdeb9bba378f95c2.tar.xz
zg-958c13ba442e7077a50d7163fdeb9bba378f95c2.zip
Rest of the Renamings
These get different names, but don't otherwise change.
Diffstat (limited to 'src/PropsData.zig')
-rw-r--r--src/PropsData.zig163
1 files changed, 0 insertions, 163 deletions
diff --git a/src/PropsData.zig b/src/PropsData.zig
deleted file mode 100644
index 46920be..0000000
--- a/src/PropsData.zig
+++ /dev/null
@@ -1,163 +0,0 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5const testing = std.testing;
6
7core_s1: []u16 = undefined,
8core_s2: []u8 = undefined,
9props_s1: []u16 = undefined,
10props_s2: []u8 = undefined,
11num_s1: []u16 = undefined,
12num_s2: []u8 = undefined,
13
14const Self = @This();
15
16pub fn init(allocator: mem.Allocator) !Self {
17 const decompressor = compress.flate.inflate.decompressor;
18 const endian = builtin.cpu.arch.endian();
19
20 // Process DerivedCoreProperties.txt
21 const core_bytes = @embedFile("core_props");
22 var core_fbs = std.io.fixedBufferStream(core_bytes);
23 var core_decomp = decompressor(.raw, core_fbs.reader());
24 var core_reader = core_decomp.reader();
25
26 var self = Self{};
27
28 const core_stage_1_len: u16 = try core_reader.readInt(u16, endian);
29 self.core_s1 = try allocator.alloc(u16, core_stage_1_len);
30 errdefer allocator.free(self.core_s1);
31 for (0..core_stage_1_len) |i| self.core_s1[i] = try core_reader.readInt(u16, endian);
32
33 const core_stage_2_len: u16 = try core_reader.readInt(u16, endian);
34 self.core_s2 = try allocator.alloc(u8, core_stage_2_len);
35 errdefer allocator.free(self.core_s2);
36 _ = try core_reader.readAll(self.core_s2);
37
38 // Process PropList.txt
39 const props_bytes = @embedFile("props");
40 var props_fbs = std.io.fixedBufferStream(props_bytes);
41 var props_decomp = decompressor(.raw, props_fbs.reader());
42 var props_reader = props_decomp.reader();
43
44 const stage_1_len: u16 = try props_reader.readInt(u16, endian);
45 self.props_s1 = try allocator.alloc(u16, stage_1_len);
46 errdefer allocator.free(self.props_s1);
47 for (0..stage_1_len) |i| self.props_s1[i] = try props_reader.readInt(u16, endian);
48
49 const stage_2_len: u16 = try props_reader.readInt(u16, endian);
50 self.props_s2 = try allocator.alloc(u8, stage_2_len);
51 errdefer allocator.free(self.props_s2);
52 _ = try props_reader.readAll(self.props_s2);
53
54 // Process DerivedNumericType.txt
55 const num_bytes = @embedFile("numeric");
56 var num_fbs = std.io.fixedBufferStream(num_bytes);
57 var num_decomp = decompressor(.raw, num_fbs.reader());
58 var num_reader = num_decomp.reader();
59
60 const num_stage_1_len: u16 = try num_reader.readInt(u16, endian);
61 self.num_s1 = try allocator.alloc(u16, num_stage_1_len);
62 errdefer allocator.free(self.num_s1);
63 for (0..num_stage_1_len) |i| self.num_s1[i] = try num_reader.readInt(u16, endian);
64
65 const num_stage_2_len: u16 = try num_reader.readInt(u16, endian);
66 self.num_s2 = try allocator.alloc(u8, num_stage_2_len);
67 errdefer allocator.free(self.num_s2);
68 _ = try num_reader.readAll(self.num_s2);
69
70 return self;
71}
72
73pub fn deinit(self: *const Self, allocator: mem.Allocator) void {
74 allocator.free(self.core_s1);
75 allocator.free(self.core_s2);
76 allocator.free(self.props_s1);
77 allocator.free(self.props_s2);
78 allocator.free(self.num_s1);
79 allocator.free(self.num_s2);
80}
81
82/// True if `cp` is a mathematical symbol.
83pub fn isMath(self: Self, cp: u21) bool {
84 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
85}
86
87/// True if `cp` is an alphabetic character.
88pub fn isAlphabetic(self: Self, cp: u21) bool {
89 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
90}
91
92/// True if `cp` is a valid identifier start character.
93pub fn isIdStart(self: Self, cp: u21) bool {
94 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
95}
96
97/// True if `cp` is a valid identifier continuation character.
98pub fn isIdContinue(self: Self, cp: u21) bool {
99 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8;
100}
101
102/// True if `cp` is a valid extended identifier start character.
103pub fn isXidStart(self: Self, cp: u21) bool {
104 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16;
105}
106
107/// True if `cp` is a valid extended identifier continuation character.
108pub fn isXidContinue(self: Self, cp: u21) bool {
109 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32;
110}
111
112/// True if `cp` is a whitespace character.
113pub fn isWhitespace(self: Self, cp: u21) bool {
114 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
115}
116
117/// True if `cp` is a hexadecimal digit.
118pub fn isHexDigit(self: Self, cp: u21) bool {
119 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
120}
121
122/// True if `cp` is a diacritic mark.
123pub fn isDiacritic(self: Self, cp: u21) bool {
124 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
125}
126
127/// True if `cp` is numeric.
128pub fn isNumeric(self: Self, cp: u21) bool {
129 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
130}
131
132/// True if `cp` is a digit.
133pub fn isDigit(self: Self, cp: u21) bool {
134 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
135}
136
137/// True if `cp` is decimal.
138pub fn isDecimal(self: Self, cp: u21) bool {
139 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
140}
141
142test "Props" {
143 const self = try init(testing.allocator);
144 defer self.deinit(testing.allocator);
145
146 try testing.expect(self.isHexDigit('F'));
147 try testing.expect(self.isHexDigit('a'));
148 try testing.expect(self.isHexDigit('8'));
149 try testing.expect(!self.isHexDigit('z'));
150
151 try testing.expect(self.isDiacritic('\u{301}'));
152 try testing.expect(self.isAlphabetic('A'));
153 try testing.expect(!self.isAlphabetic('3'));
154 try testing.expect(self.isMath('+'));
155
156 try testing.expect(self.isNumeric('\u{277f}'));
157 try testing.expect(self.isDigit('\u{2070}'));
158 try testing.expect(self.isDecimal('3'));
159
160 try testing.expect(!self.isNumeric('1'));
161 try testing.expect(!self.isDigit('2'));
162 try testing.expect(!self.isDecimal('g'));
163}