diff options
| author | 2026-02-04 18:36:18 -0500 | |
|---|---|---|
| committer | 2026-02-04 18:36:18 -0500 | |
| commit | e476250ea9326b2550847b301c265115ff375a31 (patch) | |
| tree | cf627ced47cecce80020b7a1f30aa51852c0c59b /src/Properties.zig | |
| parent | Normalization and case folding (diff) | |
| download | zg-e476250ea9326b2550847b301c265115ff375a31.tar.gz zg-e476250ea9326b2550847b301c265115ff375a31.tar.xz zg-e476250ea9326b2550847b301c265115ff375a31.zip | |
Rest of the 'easy' stuff
This gets us up to feature parity with Jacob's work. I want to
eliminate that last allocation using the comptime hash map, and then
see about eliminating allocations from case comparisons as well.
That should just about do it.
Diffstat (limited to 'src/Properties.zig')
| -rw-r--r-- | src/Properties.zig | 195 |
1 files changed, 63 insertions, 132 deletions
diff --git a/src/Properties.zig b/src/Properties.zig index 432d176..f8c7cfc 100644 --- a/src/Properties.zig +++ b/src/Properties.zig | |||
| @@ -1,177 +1,108 @@ | |||
| 1 | //! Properties module | 1 | //! Properties module |
| 2 | 2 | ||
| 3 | core_s1: []u16 = undefined, | 3 | const Data = struct { |
| 4 | core_s2: []u8 = undefined, | 4 | core_s1: []const u16 = undefined, |
| 5 | props_s1: []u16 = undefined, | 5 | core_s2: []const u8 = undefined, |
| 6 | props_s2: []u8 = undefined, | 6 | props_s1: []const u16 = undefined, |
| 7 | num_s1: []u16 = undefined, | 7 | props_s2: []const u8 = undefined, |
| 8 | num_s2: []u8 = undefined, | 8 | num_s1: []const u16 = undefined, |
| 9 | 9 | num_s2: []const u8 = undefined, | |
| 10 | const Properties = @This(); | 10 | }; |
| 11 | 11 | ||
| 12 | pub fn init(allocator: Allocator) Allocator.Error!Properties { | 12 | const properties = properties: { |
| 13 | var props = Properties{}; | 13 | const core_props = @import("core_props"); |
| 14 | try props.setup(allocator); | 14 | const props_data = @import("props"); |
| 15 | return props; | 15 | const numeric = @import("numeric"); |
| 16 | } | 16 | break :properties Data{ |
| 17 | 17 | .core_s1 = &core_props.s1, | |
| 18 | pub fn setup(props: *Properties, allocator: Allocator) Allocator.Error!void { | 18 | .core_s2 = &core_props.s2, |
| 19 | props.setupInner(allocator) catch |err| { | 19 | .props_s1 = &props_data.s1, |
| 20 | switch (err) { | 20 | .props_s2 = &props_data.s2, |
| 21 | error.OutOfMemory => |e| return e, | 21 | .num_s1 = &numeric.s1, |
| 22 | else => unreachable, | 22 | .num_s2 = &numeric.s2, |
| 23 | } | ||
| 24 | }; | 23 | }; |
| 25 | } | 24 | }; |
| 26 | |||
| 27 | inline fn setupInner(props: *Properties, allocator: Allocator) !void { | ||
| 28 | const endian = builtin.cpu.arch.endian(); | ||
| 29 | |||
| 30 | // Process DerivedCoreProperties.txt | ||
| 31 | const core_bytes = @embedFile("core_props"); | ||
| 32 | var core_fbs = std.io.fixedBufferStream(core_bytes); | ||
| 33 | var core_reader = core_fbs.reader(); | ||
| 34 | |||
| 35 | const core_stage_1_len: u16 = try core_reader.readInt(u16, endian); | ||
| 36 | props.core_s1 = try allocator.alloc(u16, core_stage_1_len); | ||
| 37 | errdefer allocator.free(props.core_s1); | ||
| 38 | for (0..core_stage_1_len) |i| props.core_s1[i] = try core_reader.readInt(u16, endian); | ||
| 39 | |||
| 40 | const core_stage_2_len: u16 = try core_reader.readInt(u16, endian); | ||
| 41 | props.core_s2 = try allocator.alloc(u8, core_stage_2_len); | ||
| 42 | errdefer allocator.free(props.core_s2); | ||
| 43 | _ = try core_reader.readAll(props.core_s2); | ||
| 44 | |||
| 45 | // Process PropList.txt | ||
| 46 | const props_bytes = @embedFile("props"); | ||
| 47 | var props_fbs = std.io.fixedBufferStream(props_bytes); | ||
| 48 | var props_reader = props_fbs.reader(); | ||
| 49 | |||
| 50 | const stage_1_len: u16 = try props_reader.readInt(u16, endian); | ||
| 51 | props.props_s1 = try allocator.alloc(u16, stage_1_len); | ||
| 52 | errdefer allocator.free(props.props_s1); | ||
| 53 | for (0..stage_1_len) |i| props.props_s1[i] = try props_reader.readInt(u16, endian); | ||
| 54 | |||
| 55 | const stage_2_len: u16 = try props_reader.readInt(u16, endian); | ||
| 56 | props.props_s2 = try allocator.alloc(u8, stage_2_len); | ||
| 57 | errdefer allocator.free(props.props_s2); | ||
| 58 | _ = try props_reader.readAll(props.props_s2); | ||
| 59 | |||
| 60 | // Process DerivedNumericType.txt | ||
| 61 | const num_bytes = @embedFile("numeric"); | ||
| 62 | var num_fbs = std.io.fixedBufferStream(num_bytes); | ||
| 63 | var num_reader = num_fbs.reader(); | ||
| 64 | |||
| 65 | const num_stage_1_len: u16 = try num_reader.readInt(u16, endian); | ||
| 66 | props.num_s1 = try allocator.alloc(u16, num_stage_1_len); | ||
| 67 | errdefer allocator.free(props.num_s1); | ||
| 68 | for (0..num_stage_1_len) |i| props.num_s1[i] = try num_reader.readInt(u16, endian); | ||
| 69 | |||
| 70 | const num_stage_2_len: u16 = try num_reader.readInt(u16, endian); | ||
| 71 | props.num_s2 = try allocator.alloc(u8, num_stage_2_len); | ||
| 72 | errdefer allocator.free(props.num_s2); | ||
| 73 | _ = try num_reader.readAll(props.num_s2); | ||
| 74 | } | ||
| 75 | 25 | ||
| 76 | pub fn deinit(self: *const Properties, allocator: Allocator) void { | 26 | const Properties = @This(); |
| 77 | allocator.free(self.core_s1); | ||
| 78 | allocator.free(self.core_s2); | ||
| 79 | allocator.free(self.props_s1); | ||
| 80 | allocator.free(self.props_s2); | ||
| 81 | allocator.free(self.num_s1); | ||
| 82 | allocator.free(self.num_s2); | ||
| 83 | } | ||
| 84 | 27 | ||
| 85 | /// True if `cp` is a mathematical symbol. | 28 | /// True if `cp` is a mathematical symbol. |
| 86 | pub fn isMath(self: Properties, cp: u21) bool { | 29 | pub fn isMath(cp: u21) bool { |
| 87 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; | 30 | return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; |
| 88 | } | 31 | } |
| 89 | 32 | ||
| 90 | /// True if `cp` is an alphabetic character. | 33 | /// True if `cp` is an alphabetic character. |
| 91 | pub fn isAlphabetic(self: Properties, cp: u21) bool { | 34 | pub fn isAlphabetic(cp: u21) bool { |
| 92 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; | 35 | return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; |
| 93 | } | 36 | } |
| 94 | 37 | ||
| 95 | /// True if `cp` is a valid identifier start character. | 38 | /// True if `cp` is a valid identifier start character. |
| 96 | pub fn isIdStart(self: Properties, cp: u21) bool { | 39 | pub fn isIdStart(cp: u21) bool { |
| 97 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; | 40 | return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; |
| 98 | } | 41 | } |
| 99 | 42 | ||
| 100 | /// True if `cp` is a valid identifier continuation character. | 43 | /// True if `cp` is a valid identifier continuation character. |
| 101 | pub fn isIdContinue(self: Properties, cp: u21) bool { | 44 | pub fn isIdContinue(cp: u21) bool { |
| 102 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8; | 45 | return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8; |
| 103 | } | 46 | } |
| 104 | 47 | ||
| 105 | /// True if `cp` is a valid extended identifier start character. | 48 | /// True if `cp` is a valid extended identifier start character. |
| 106 | pub fn isXidStart(self: Properties, cp: u21) bool { | 49 | pub fn isXidStart(cp: u21) bool { |
| 107 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16; | 50 | return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16; |
| 108 | } | 51 | } |
| 109 | 52 | ||
| 110 | /// True if `cp` is a valid extended identifier continuation character. | 53 | /// True if `cp` is a valid extended identifier continuation character. |
| 111 | pub fn isXidContinue(self: Properties, cp: u21) bool { | 54 | pub fn isXidContinue(cp: u21) bool { |
| 112 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32; | 55 | return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32; |
| 113 | } | 56 | } |
| 114 | 57 | ||
| 115 | /// True if `cp` is a whitespace character. | 58 | /// True if `cp` is a whitespace character. |
| 116 | pub fn isWhitespace(self: Properties, cp: u21) bool { | 59 | pub fn isWhitespace(cp: u21) bool { |
| 117 | return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; | 60 | return properties.props_s2[properties.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; |
| 118 | } | 61 | } |
| 119 | 62 | ||
| 120 | /// True if `cp` is a hexadecimal digit. | 63 | /// True if `cp` is a hexadecimal digit. |
| 121 | pub fn isHexDigit(self: Properties, cp: u21) bool { | 64 | pub fn isHexDigit(cp: u21) bool { |
| 122 | return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; | 65 | return properties.props_s2[properties.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; |
| 123 | } | 66 | } |
| 124 | 67 | ||
| 125 | /// True if `cp` is a diacritic mark. | 68 | /// True if `cp` is a diacritic mark. |
| 126 | pub fn isDiacritic(self: Properties, cp: u21) bool { | 69 | pub fn isDiacritic(cp: u21) bool { |
| 127 | return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; | 70 | return properties.props_s2[properties.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; |
| 128 | } | 71 | } |
| 129 | 72 | ||
| 130 | /// True if `cp` is numeric. | 73 | /// True if `cp` is numeric. |
| 131 | pub fn isNumeric(self: Properties, cp: u21) bool { | 74 | pub fn isNumeric(cp: u21) bool { |
| 132 | return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; | 75 | return properties.num_s2[properties.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; |
| 133 | } | 76 | } |
| 134 | 77 | ||
| 135 | /// True if `cp` is a digit. | 78 | /// True if `cp` is a digit. |
| 136 | pub fn isDigit(self: Properties, cp: u21) bool { | 79 | pub fn isDigit(cp: u21) bool { |
| 137 | return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; | 80 | return properties.num_s2[properties.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; |
| 138 | } | 81 | } |
| 139 | 82 | ||
| 140 | /// True if `cp` is decimal. | 83 | /// True if `cp` is decimal. |
| 141 | pub fn isDecimal(self: Properties, cp: u21) bool { | 84 | pub fn isDecimal(cp: u21) bool { |
| 142 | return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; | 85 | return properties.num_s2[properties.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; |
| 143 | } | 86 | } |
| 144 | 87 | ||
| 145 | test "Props" { | 88 | test "Props" { |
| 146 | const self = try init(testing.allocator); | 89 | try testing.expect(Properties.isHexDigit('F')); |
| 147 | defer self.deinit(testing.allocator); | 90 | try testing.expect(Properties.isHexDigit('a')); |
| 148 | 91 | try testing.expect(Properties.isHexDigit('8')); | |
| 149 | try testing.expect(self.isHexDigit('F')); | 92 | try testing.expect(!Properties.isHexDigit('z')); |
| 150 | try testing.expect(self.isHexDigit('a')); | 93 | |
| 151 | try testing.expect(self.isHexDigit('8')); | 94 | try testing.expect(Properties.isDiacritic('\u{301}')); |
| 152 | try testing.expect(!self.isHexDigit('z')); | 95 | try testing.expect(Properties.isAlphabetic('A')); |
| 153 | 96 | try testing.expect(!Properties.isAlphabetic('3')); | |
| 154 | try testing.expect(self.isDiacritic('\u{301}')); | 97 | try testing.expect(Properties.isMath('+')); |
| 155 | try testing.expect(self.isAlphabetic('A')); | 98 | |
| 156 | try testing.expect(!self.isAlphabetic('3')); | 99 | try testing.expect(Properties.isNumeric('\u{277f}')); |
| 157 | try testing.expect(self.isMath('+')); | 100 | try testing.expect(Properties.isDigit('\u{2070}')); |
| 158 | 101 | try testing.expect(Properties.isDecimal('3')); | |
| 159 | try testing.expect(self.isNumeric('\u{277f}')); | 102 | |
| 160 | try testing.expect(self.isDigit('\u{2070}')); | 103 | try testing.expect(!Properties.isNumeric('1')); |
| 161 | try testing.expect(self.isDecimal('3')); | 104 | try testing.expect(!Properties.isDigit('2')); |
| 162 | 105 | try testing.expect(!Properties.isDecimal('g')); | |
| 163 | try testing.expect(!self.isNumeric('1')); | ||
| 164 | try testing.expect(!self.isDigit('2')); | ||
| 165 | try testing.expect(!self.isDecimal('g')); | ||
| 166 | } | ||
| 167 | |||
| 168 | fn testAllocator(allocator: Allocator) !void { | ||
| 169 | var prop = try Properties.init(allocator); | ||
| 170 | prop.deinit(allocator); | ||
| 171 | } | ||
| 172 | |||
| 173 | test "Allocation failure" { | ||
| 174 | try testing.checkAllAllocationFailures(testing.allocator, testAllocator, .{}); | ||
| 175 | } | 106 | } |
| 176 | 107 | ||
| 177 | const std = @import("std"); | 108 | const std = @import("std"); |