summaryrefslogtreecommitdiff
path: root/src/Properties.zig
diff options
context:
space:
mode:
authorGravatar Sam Atman2026-02-04 18:36:18 -0500
committerGravatar Sam Atman2026-02-04 18:36:18 -0500
commite476250ea9326b2550847b301c265115ff375a31 (patch)
treecf627ced47cecce80020b7a1f30aa51852c0c59b /src/Properties.zig
parentNormalization and case folding (diff)
downloadzg-e476250ea9326b2550847b301c265115ff375a31.tar.gz
zg-e476250ea9326b2550847b301c265115ff375a31.tar.xz
zg-e476250ea9326b2550847b301c265115ff375a31.zip
Rest of the 'easy' stuff
This gets us up to feature parity with Jacob's work. I want to eliminate that last allocation using the comptime hash map, and then see about eliminating allocations from case comparisons as well. That should just about do it.
Diffstat (limited to 'src/Properties.zig')
-rw-r--r--src/Properties.zig195
1 files changed, 63 insertions, 132 deletions
diff --git a/src/Properties.zig b/src/Properties.zig
index 432d176..f8c7cfc 100644
--- a/src/Properties.zig
+++ b/src/Properties.zig
@@ -1,177 +1,108 @@
1//! Properties module 1//! Properties module
2 2
3core_s1: []u16 = undefined, 3const Data = struct {
4core_s2: []u8 = undefined, 4 core_s1: []const u16 = undefined,
5props_s1: []u16 = undefined, 5 core_s2: []const u8 = undefined,
6props_s2: []u8 = undefined, 6 props_s1: []const u16 = undefined,
7num_s1: []u16 = undefined, 7 props_s2: []const u8 = undefined,
8num_s2: []u8 = undefined, 8 num_s1: []const u16 = undefined,
9 9 num_s2: []const u8 = undefined,
10const Properties = @This(); 10};
11 11
12pub fn init(allocator: Allocator) Allocator.Error!Properties { 12const properties = properties: {
13 var props = Properties{}; 13 const core_props = @import("core_props");
14 try props.setup(allocator); 14 const props_data = @import("props");
15 return props; 15 const numeric = @import("numeric");
16} 16 break :properties Data{
17 17 .core_s1 = &core_props.s1,
18pub fn setup(props: *Properties, allocator: Allocator) Allocator.Error!void { 18 .core_s2 = &core_props.s2,
19 props.setupInner(allocator) catch |err| { 19 .props_s1 = &props_data.s1,
20 switch (err) { 20 .props_s2 = &props_data.s2,
21 error.OutOfMemory => |e| return e, 21 .num_s1 = &numeric.s1,
22 else => unreachable, 22 .num_s2 = &numeric.s2,
23 }
24 }; 23 };
25} 24};
26
27inline fn setupInner(props: *Properties, allocator: Allocator) !void {
28 const endian = builtin.cpu.arch.endian();
29
30 // Process DerivedCoreProperties.txt
31 const core_bytes = @embedFile("core_props");
32 var core_fbs = std.io.fixedBufferStream(core_bytes);
33 var core_reader = core_fbs.reader();
34
35 const core_stage_1_len: u16 = try core_reader.readInt(u16, endian);
36 props.core_s1 = try allocator.alloc(u16, core_stage_1_len);
37 errdefer allocator.free(props.core_s1);
38 for (0..core_stage_1_len) |i| props.core_s1[i] = try core_reader.readInt(u16, endian);
39
40 const core_stage_2_len: u16 = try core_reader.readInt(u16, endian);
41 props.core_s2 = try allocator.alloc(u8, core_stage_2_len);
42 errdefer allocator.free(props.core_s2);
43 _ = try core_reader.readAll(props.core_s2);
44
45 // Process PropList.txt
46 const props_bytes = @embedFile("props");
47 var props_fbs = std.io.fixedBufferStream(props_bytes);
48 var props_reader = props_fbs.reader();
49
50 const stage_1_len: u16 = try props_reader.readInt(u16, endian);
51 props.props_s1 = try allocator.alloc(u16, stage_1_len);
52 errdefer allocator.free(props.props_s1);
53 for (0..stage_1_len) |i| props.props_s1[i] = try props_reader.readInt(u16, endian);
54
55 const stage_2_len: u16 = try props_reader.readInt(u16, endian);
56 props.props_s2 = try allocator.alloc(u8, stage_2_len);
57 errdefer allocator.free(props.props_s2);
58 _ = try props_reader.readAll(props.props_s2);
59
60 // Process DerivedNumericType.txt
61 const num_bytes = @embedFile("numeric");
62 var num_fbs = std.io.fixedBufferStream(num_bytes);
63 var num_reader = num_fbs.reader();
64
65 const num_stage_1_len: u16 = try num_reader.readInt(u16, endian);
66 props.num_s1 = try allocator.alloc(u16, num_stage_1_len);
67 errdefer allocator.free(props.num_s1);
68 for (0..num_stage_1_len) |i| props.num_s1[i] = try num_reader.readInt(u16, endian);
69
70 const num_stage_2_len: u16 = try num_reader.readInt(u16, endian);
71 props.num_s2 = try allocator.alloc(u8, num_stage_2_len);
72 errdefer allocator.free(props.num_s2);
73 _ = try num_reader.readAll(props.num_s2);
74}
75 25
76pub fn deinit(self: *const Properties, allocator: Allocator) void { 26const Properties = @This();
77 allocator.free(self.core_s1);
78 allocator.free(self.core_s2);
79 allocator.free(self.props_s1);
80 allocator.free(self.props_s2);
81 allocator.free(self.num_s1);
82 allocator.free(self.num_s2);
83}
84 27
85/// True if `cp` is a mathematical symbol. 28/// True if `cp` is a mathematical symbol.
86pub fn isMath(self: Properties, cp: u21) bool { 29pub fn isMath(cp: u21) bool {
87 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; 30 return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
88} 31}
89 32
90/// True if `cp` is an alphabetic character. 33/// True if `cp` is an alphabetic character.
91pub fn isAlphabetic(self: Properties, cp: u21) bool { 34pub fn isAlphabetic(cp: u21) bool {
92 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; 35 return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
93} 36}
94 37
95/// True if `cp` is a valid identifier start character. 38/// True if `cp` is a valid identifier start character.
96pub fn isIdStart(self: Properties, cp: u21) bool { 39pub fn isIdStart(cp: u21) bool {
97 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; 40 return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
98} 41}
99 42
100/// True if `cp` is a valid identifier continuation character. 43/// True if `cp` is a valid identifier continuation character.
101pub fn isIdContinue(self: Properties, cp: u21) bool { 44pub fn isIdContinue(cp: u21) bool {
102 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8; 45 return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8;
103} 46}
104 47
105/// True if `cp` is a valid extended identifier start character. 48/// True if `cp` is a valid extended identifier start character.
106pub fn isXidStart(self: Properties, cp: u21) bool { 49pub fn isXidStart(cp: u21) bool {
107 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16; 50 return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16;
108} 51}
109 52
110/// True if `cp` is a valid extended identifier continuation character. 53/// True if `cp` is a valid extended identifier continuation character.
111pub fn isXidContinue(self: Properties, cp: u21) bool { 54pub fn isXidContinue(cp: u21) bool {
112 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32; 55 return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32;
113} 56}
114 57
115/// True if `cp` is a whitespace character. 58/// True if `cp` is a whitespace character.
116pub fn isWhitespace(self: Properties, cp: u21) bool { 59pub fn isWhitespace(cp: u21) bool {
117 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; 60 return properties.props_s2[properties.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
118} 61}
119 62
120/// True if `cp` is a hexadecimal digit. 63/// True if `cp` is a hexadecimal digit.
121pub fn isHexDigit(self: Properties, cp: u21) bool { 64pub fn isHexDigit(cp: u21) bool {
122 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; 65 return properties.props_s2[properties.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
123} 66}
124 67
125/// True if `cp` is a diacritic mark. 68/// True if `cp` is a diacritic mark.
126pub fn isDiacritic(self: Properties, cp: u21) bool { 69pub fn isDiacritic(cp: u21) bool {
127 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; 70 return properties.props_s2[properties.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
128} 71}
129 72
130/// True if `cp` is numeric. 73/// True if `cp` is numeric.
131pub fn isNumeric(self: Properties, cp: u21) bool { 74pub fn isNumeric(cp: u21) bool {
132 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; 75 return properties.num_s2[properties.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
133} 76}
134 77
135/// True if `cp` is a digit. 78/// True if `cp` is a digit.
136pub fn isDigit(self: Properties, cp: u21) bool { 79pub fn isDigit(cp: u21) bool {
137 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; 80 return properties.num_s2[properties.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
138} 81}
139 82
140/// True if `cp` is decimal. 83/// True if `cp` is decimal.
141pub fn isDecimal(self: Properties, cp: u21) bool { 84pub fn isDecimal(cp: u21) bool {
142 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; 85 return properties.num_s2[properties.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
143} 86}
144 87
145test "Props" { 88test "Props" {
146 const self = try init(testing.allocator); 89 try testing.expect(Properties.isHexDigit('F'));
147 defer self.deinit(testing.allocator); 90 try testing.expect(Properties.isHexDigit('a'));
148 91 try testing.expect(Properties.isHexDigit('8'));
149 try testing.expect(self.isHexDigit('F')); 92 try testing.expect(!Properties.isHexDigit('z'));
150 try testing.expect(self.isHexDigit('a')); 93
151 try testing.expect(self.isHexDigit('8')); 94 try testing.expect(Properties.isDiacritic('\u{301}'));
152 try testing.expect(!self.isHexDigit('z')); 95 try testing.expect(Properties.isAlphabetic('A'));
153 96 try testing.expect(!Properties.isAlphabetic('3'));
154 try testing.expect(self.isDiacritic('\u{301}')); 97 try testing.expect(Properties.isMath('+'));
155 try testing.expect(self.isAlphabetic('A')); 98
156 try testing.expect(!self.isAlphabetic('3')); 99 try testing.expect(Properties.isNumeric('\u{277f}'));
157 try testing.expect(self.isMath('+')); 100 try testing.expect(Properties.isDigit('\u{2070}'));
158 101 try testing.expect(Properties.isDecimal('3'));
159 try testing.expect(self.isNumeric('\u{277f}')); 102
160 try testing.expect(self.isDigit('\u{2070}')); 103 try testing.expect(!Properties.isNumeric('1'));
161 try testing.expect(self.isDecimal('3')); 104 try testing.expect(!Properties.isDigit('2'));
162 105 try testing.expect(!Properties.isDecimal('g'));
163 try testing.expect(!self.isNumeric('1'));
164 try testing.expect(!self.isDigit('2'));
165 try testing.expect(!self.isDecimal('g'));
166}
167
168fn testAllocator(allocator: Allocator) !void {
169 var prop = try Properties.init(allocator);
170 prop.deinit(allocator);
171}
172
173test "Allocation failure" {
174 try testing.checkAllAllocationFailures(testing.allocator, testAllocator, .{});
175} 106}
176 107
177const std = @import("std"); 108const std = @import("std");