diff options
Diffstat (limited to 'src/Properties.zig')
| -rw-r--r-- | src/Properties.zig | 195 |
1 files changed, 63 insertions, 132 deletions
diff --git a/src/Properties.zig b/src/Properties.zig index 432d176..f8c7cfc 100644 --- a/src/Properties.zig +++ b/src/Properties.zig | |||
| @@ -1,177 +1,108 @@ | |||
| 1 | //! Properties module | 1 | //! Properties module |
| 2 | 2 | ||
| 3 | core_s1: []u16 = undefined, | 3 | const Data = struct { |
| 4 | core_s2: []u8 = undefined, | 4 | core_s1: []const u16 = undefined, |
| 5 | props_s1: []u16 = undefined, | 5 | core_s2: []const u8 = undefined, |
| 6 | props_s2: []u8 = undefined, | 6 | props_s1: []const u16 = undefined, |
| 7 | num_s1: []u16 = undefined, | 7 | props_s2: []const u8 = undefined, |
| 8 | num_s2: []u8 = undefined, | 8 | num_s1: []const u16 = undefined, |
| 9 | 9 | num_s2: []const u8 = undefined, | |
| 10 | const Properties = @This(); | 10 | }; |
| 11 | 11 | ||
| 12 | pub fn init(allocator: Allocator) Allocator.Error!Properties { | 12 | const properties = properties: { |
| 13 | var props = Properties{}; | 13 | const core_props = @import("core_props"); |
| 14 | try props.setup(allocator); | 14 | const props_data = @import("props"); |
| 15 | return props; | 15 | const numeric = @import("numeric"); |
| 16 | } | 16 | break :properties Data{ |
| 17 | 17 | .core_s1 = &core_props.s1, | |
| 18 | pub fn setup(props: *Properties, allocator: Allocator) Allocator.Error!void { | 18 | .core_s2 = &core_props.s2, |
| 19 | props.setupInner(allocator) catch |err| { | 19 | .props_s1 = &props_data.s1, |
| 20 | switch (err) { | 20 | .props_s2 = &props_data.s2, |
| 21 | error.OutOfMemory => |e| return e, | 21 | .num_s1 = &numeric.s1, |
| 22 | else => unreachable, | 22 | .num_s2 = &numeric.s2, |
| 23 | } | ||
| 24 | }; | 23 | }; |
| 25 | } | 24 | }; |
| 26 | |||
| 27 | inline fn setupInner(props: *Properties, allocator: Allocator) !void { | ||
| 28 | const endian = builtin.cpu.arch.endian(); | ||
| 29 | |||
| 30 | // Process DerivedCoreProperties.txt | ||
| 31 | const core_bytes = @embedFile("core_props"); | ||
| 32 | var core_fbs = std.io.fixedBufferStream(core_bytes); | ||
| 33 | var core_reader = core_fbs.reader(); | ||
| 34 | |||
| 35 | const core_stage_1_len: u16 = try core_reader.readInt(u16, endian); | ||
| 36 | props.core_s1 = try allocator.alloc(u16, core_stage_1_len); | ||
| 37 | errdefer allocator.free(props.core_s1); | ||
| 38 | for (0..core_stage_1_len) |i| props.core_s1[i] = try core_reader.readInt(u16, endian); | ||
| 39 | |||
| 40 | const core_stage_2_len: u16 = try core_reader.readInt(u16, endian); | ||
| 41 | props.core_s2 = try allocator.alloc(u8, core_stage_2_len); | ||
| 42 | errdefer allocator.free(props.core_s2); | ||
| 43 | _ = try core_reader.readAll(props.core_s2); | ||
| 44 | |||
| 45 | // Process PropList.txt | ||
| 46 | const props_bytes = @embedFile("props"); | ||
| 47 | var props_fbs = std.io.fixedBufferStream(props_bytes); | ||
| 48 | var props_reader = props_fbs.reader(); | ||
| 49 | |||
| 50 | const stage_1_len: u16 = try props_reader.readInt(u16, endian); | ||
| 51 | props.props_s1 = try allocator.alloc(u16, stage_1_len); | ||
| 52 | errdefer allocator.free(props.props_s1); | ||
| 53 | for (0..stage_1_len) |i| props.props_s1[i] = try props_reader.readInt(u16, endian); | ||
| 54 | |||
| 55 | const stage_2_len: u16 = try props_reader.readInt(u16, endian); | ||
| 56 | props.props_s2 = try allocator.alloc(u8, stage_2_len); | ||
| 57 | errdefer allocator.free(props.props_s2); | ||
| 58 | _ = try props_reader.readAll(props.props_s2); | ||
| 59 | |||
| 60 | // Process DerivedNumericType.txt | ||
| 61 | const num_bytes = @embedFile("numeric"); | ||
| 62 | var num_fbs = std.io.fixedBufferStream(num_bytes); | ||
| 63 | var num_reader = num_fbs.reader(); | ||
| 64 | |||
| 65 | const num_stage_1_len: u16 = try num_reader.readInt(u16, endian); | ||
| 66 | props.num_s1 = try allocator.alloc(u16, num_stage_1_len); | ||
| 67 | errdefer allocator.free(props.num_s1); | ||
| 68 | for (0..num_stage_1_len) |i| props.num_s1[i] = try num_reader.readInt(u16, endian); | ||
| 69 | |||
| 70 | const num_stage_2_len: u16 = try num_reader.readInt(u16, endian); | ||
| 71 | props.num_s2 = try allocator.alloc(u8, num_stage_2_len); | ||
| 72 | errdefer allocator.free(props.num_s2); | ||
| 73 | _ = try num_reader.readAll(props.num_s2); | ||
| 74 | } | ||
| 75 | 25 | ||
| 76 | pub fn deinit(self: *const Properties, allocator: Allocator) void { | 26 | const Properties = @This(); |
| 77 | allocator.free(self.core_s1); | ||
| 78 | allocator.free(self.core_s2); | ||
| 79 | allocator.free(self.props_s1); | ||
| 80 | allocator.free(self.props_s2); | ||
| 81 | allocator.free(self.num_s1); | ||
| 82 | allocator.free(self.num_s2); | ||
| 83 | } | ||
| 84 | 27 | ||
| 85 | /// True if `cp` is a mathematical symbol. | 28 | /// True if `cp` is a mathematical symbol. |
| 86 | pub fn isMath(self: Properties, cp: u21) bool { | 29 | pub fn isMath(cp: u21) bool { |
| 87 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; | 30 | return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; |
| 88 | } | 31 | } |
| 89 | 32 | ||
| 90 | /// True if `cp` is an alphabetic character. | 33 | /// True if `cp` is an alphabetic character. |
| 91 | pub fn isAlphabetic(self: Properties, cp: u21) bool { | 34 | pub fn isAlphabetic(cp: u21) bool { |
| 92 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; | 35 | return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; |
| 93 | } | 36 | } |
| 94 | 37 | ||
| 95 | /// True if `cp` is a valid identifier start character. | 38 | /// True if `cp` is a valid identifier start character. |
| 96 | pub fn isIdStart(self: Properties, cp: u21) bool { | 39 | pub fn isIdStart(cp: u21) bool { |
| 97 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; | 40 | return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; |
| 98 | } | 41 | } |
| 99 | 42 | ||
| 100 | /// True if `cp` is a valid identifier continuation character. | 43 | /// True if `cp` is a valid identifier continuation character. |
| 101 | pub fn isIdContinue(self: Properties, cp: u21) bool { | 44 | pub fn isIdContinue(cp: u21) bool { |
| 102 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8; | 45 | return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8; |
| 103 | } | 46 | } |
| 104 | 47 | ||
| 105 | /// True if `cp` is a valid extended identifier start character. | 48 | /// True if `cp` is a valid extended identifier start character. |
| 106 | pub fn isXidStart(self: Properties, cp: u21) bool { | 49 | pub fn isXidStart(cp: u21) bool { |
| 107 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16; | 50 | return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16; |
| 108 | } | 51 | } |
| 109 | 52 | ||
| 110 | /// True if `cp` is a valid extended identifier continuation character. | 53 | /// True if `cp` is a valid extended identifier continuation character. |
| 111 | pub fn isXidContinue(self: Properties, cp: u21) bool { | 54 | pub fn isXidContinue(cp: u21) bool { |
| 112 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32; | 55 | return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32; |
| 113 | } | 56 | } |
| 114 | 57 | ||
| 115 | /// True if `cp` is a whitespace character. | 58 | /// True if `cp` is a whitespace character. |
| 116 | pub fn isWhitespace(self: Properties, cp: u21) bool { | 59 | pub fn isWhitespace(cp: u21) bool { |
| 117 | return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; | 60 | return properties.props_s2[properties.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; |
| 118 | } | 61 | } |
| 119 | 62 | ||
| 120 | /// True if `cp` is a hexadecimal digit. | 63 | /// True if `cp` is a hexadecimal digit. |
| 121 | pub fn isHexDigit(self: Properties, cp: u21) bool { | 64 | pub fn isHexDigit(cp: u21) bool { |
| 122 | return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; | 65 | return properties.props_s2[properties.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; |
| 123 | } | 66 | } |
| 124 | 67 | ||
| 125 | /// True if `cp` is a diacritic mark. | 68 | /// True if `cp` is a diacritic mark. |
| 126 | pub fn isDiacritic(self: Properties, cp: u21) bool { | 69 | pub fn isDiacritic(cp: u21) bool { |
| 127 | return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; | 70 | return properties.props_s2[properties.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; |
| 128 | } | 71 | } |
| 129 | 72 | ||
| 130 | /// True if `cp` is numeric. | 73 | /// True if `cp` is numeric. |
| 131 | pub fn isNumeric(self: Properties, cp: u21) bool { | 74 | pub fn isNumeric(cp: u21) bool { |
| 132 | return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; | 75 | return properties.num_s2[properties.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; |
| 133 | } | 76 | } |
| 134 | 77 | ||
| 135 | /// True if `cp` is a digit. | 78 | /// True if `cp` is a digit. |
| 136 | pub fn isDigit(self: Properties, cp: u21) bool { | 79 | pub fn isDigit(cp: u21) bool { |
| 137 | return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; | 80 | return properties.num_s2[properties.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; |
| 138 | } | 81 | } |
| 139 | 82 | ||
| 140 | /// True if `cp` is decimal. | 83 | /// True if `cp` is decimal. |
| 141 | pub fn isDecimal(self: Properties, cp: u21) bool { | 84 | pub fn isDecimal(cp: u21) bool { |
| 142 | return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; | 85 | return properties.num_s2[properties.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; |
| 143 | } | 86 | } |
| 144 | 87 | ||
| 145 | test "Props" { | 88 | test "Props" { |
| 146 | const self = try init(testing.allocator); | 89 | try testing.expect(Properties.isHexDigit('F')); |
| 147 | defer self.deinit(testing.allocator); | 90 | try testing.expect(Properties.isHexDigit('a')); |
| 148 | 91 | try testing.expect(Properties.isHexDigit('8')); | |
| 149 | try testing.expect(self.isHexDigit('F')); | 92 | try testing.expect(!Properties.isHexDigit('z')); |
| 150 | try testing.expect(self.isHexDigit('a')); | 93 | |
| 151 | try testing.expect(self.isHexDigit('8')); | 94 | try testing.expect(Properties.isDiacritic('\u{301}')); |
| 152 | try testing.expect(!self.isHexDigit('z')); | 95 | try testing.expect(Properties.isAlphabetic('A')); |
| 153 | 96 | try testing.expect(!Properties.isAlphabetic('3')); | |
| 154 | try testing.expect(self.isDiacritic('\u{301}')); | 97 | try testing.expect(Properties.isMath('+')); |
| 155 | try testing.expect(self.isAlphabetic('A')); | 98 | |
| 156 | try testing.expect(!self.isAlphabetic('3')); | 99 | try testing.expect(Properties.isNumeric('\u{277f}')); |
| 157 | try testing.expect(self.isMath('+')); | 100 | try testing.expect(Properties.isDigit('\u{2070}')); |
| 158 | 101 | try testing.expect(Properties.isDecimal('3')); | |
| 159 | try testing.expect(self.isNumeric('\u{277f}')); | 102 | |
| 160 | try testing.expect(self.isDigit('\u{2070}')); | 103 | try testing.expect(!Properties.isNumeric('1')); |
| 161 | try testing.expect(self.isDecimal('3')); | 104 | try testing.expect(!Properties.isDigit('2')); |
| 162 | 105 | try testing.expect(!Properties.isDecimal('g')); | |
| 163 | try testing.expect(!self.isNumeric('1')); | ||
| 164 | try testing.expect(!self.isDigit('2')); | ||
| 165 | try testing.expect(!self.isDecimal('g')); | ||
| 166 | } | ||
| 167 | |||
| 168 | fn testAllocator(allocator: Allocator) !void { | ||
| 169 | var prop = try Properties.init(allocator); | ||
| 170 | prop.deinit(allocator); | ||
| 171 | } | ||
| 172 | |||
| 173 | test "Allocation failure" { | ||
| 174 | try testing.checkAllAllocationFailures(testing.allocator, testAllocator, .{}); | ||
| 175 | } | 106 | } |
| 176 | 107 | ||
| 177 | const std = @import("std"); | 108 | const std = @import("std"); |