From e476250ea9326b2550847b301c265115ff375a31 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Wed, 4 Feb 2026 18:36:18 -0500 Subject: Rest of the 'easy' stuff This gets us up to feature parity with Jacob's work. I want to eliminate that last allocation using the comptime hash map, and then see about eliminating allocations from case comparisons as well. That should just about do it. --- src/Properties.zig | 195 +++++++++++++++++------------------------------------ 1 file changed, 63 insertions(+), 132 deletions(-) (limited to 'src/Properties.zig') diff --git a/src/Properties.zig b/src/Properties.zig index 432d176..f8c7cfc 100644 --- a/src/Properties.zig +++ b/src/Properties.zig @@ -1,177 +1,108 @@ //! Properties module -core_s1: []u16 = undefined, -core_s2: []u8 = undefined, -props_s1: []u16 = undefined, -props_s2: []u8 = undefined, -num_s1: []u16 = undefined, -num_s2: []u8 = undefined, - -const Properties = @This(); - -pub fn init(allocator: Allocator) Allocator.Error!Properties { - var props = Properties{}; - try props.setup(allocator); - return props; -} - -pub fn setup(props: *Properties, allocator: Allocator) Allocator.Error!void { - props.setupInner(allocator) catch |err| { - switch (err) { - error.OutOfMemory => |e| return e, - else => unreachable, - } +const Data = struct { + core_s1: []const u16 = undefined, + core_s2: []const u8 = undefined, + props_s1: []const u16 = undefined, + props_s2: []const u8 = undefined, + num_s1: []const u16 = undefined, + num_s2: []const u8 = undefined, +}; + +const properties = properties: { + const core_props = @import("core_props"); + const props_data = @import("props"); + const numeric = @import("numeric"); + break :properties Data{ + .core_s1 = &core_props.s1, + .core_s2 = &core_props.s2, + .props_s1 = &props_data.s1, + .props_s2 = &props_data.s2, + .num_s1 = &numeric.s1, + .num_s2 = &numeric.s2, }; -} - -inline fn setupInner(props: *Properties, allocator: Allocator) !void { - const endian = builtin.cpu.arch.endian(); - - // Process DerivedCoreProperties.txt - const core_bytes = @embedFile("core_props"); - var core_fbs = std.io.fixedBufferStream(core_bytes); - var core_reader = core_fbs.reader(); - - const core_stage_1_len: u16 = try core_reader.readInt(u16, endian); - props.core_s1 = try allocator.alloc(u16, core_stage_1_len); - errdefer allocator.free(props.core_s1); - for (0..core_stage_1_len) |i| props.core_s1[i] = try core_reader.readInt(u16, endian); - - const core_stage_2_len: u16 = try core_reader.readInt(u16, endian); - props.core_s2 = try allocator.alloc(u8, core_stage_2_len); - errdefer allocator.free(props.core_s2); - _ = try core_reader.readAll(props.core_s2); - - // Process PropList.txt - const props_bytes = @embedFile("props"); - var props_fbs = std.io.fixedBufferStream(props_bytes); - var props_reader = props_fbs.reader(); - - const stage_1_len: u16 = try props_reader.readInt(u16, endian); - props.props_s1 = try allocator.alloc(u16, stage_1_len); - errdefer allocator.free(props.props_s1); - for (0..stage_1_len) |i| props.props_s1[i] = try props_reader.readInt(u16, endian); - - const stage_2_len: u16 = try props_reader.readInt(u16, endian); - props.props_s2 = try allocator.alloc(u8, stage_2_len); - errdefer allocator.free(props.props_s2); - _ = try props_reader.readAll(props.props_s2); - - // Process DerivedNumericType.txt - const num_bytes = @embedFile("numeric"); - var num_fbs = std.io.fixedBufferStream(num_bytes); - var num_reader = num_fbs.reader(); - - const num_stage_1_len: u16 = try num_reader.readInt(u16, endian); - props.num_s1 = try allocator.alloc(u16, num_stage_1_len); - errdefer allocator.free(props.num_s1); - for (0..num_stage_1_len) |i| props.num_s1[i] = try num_reader.readInt(u16, endian); - - const num_stage_2_len: u16 = try num_reader.readInt(u16, endian); - props.num_s2 = try allocator.alloc(u8, num_stage_2_len); - errdefer allocator.free(props.num_s2); - _ = try num_reader.readAll(props.num_s2); -} +}; -pub fn deinit(self: *const Properties, allocator: Allocator) void { - allocator.free(self.core_s1); - allocator.free(self.core_s2); - allocator.free(self.props_s1); - allocator.free(self.props_s2); - allocator.free(self.num_s1); - allocator.free(self.num_s2); -} +const Properties = @This(); /// True if `cp` is a mathematical symbol. -pub fn isMath(self: Properties, cp: u21) bool { - return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; +pub fn isMath(cp: u21) bool { + return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; } /// True if `cp` is an alphabetic character. -pub fn isAlphabetic(self: Properties, cp: u21) bool { - return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; +pub fn isAlphabetic(cp: u21) bool { + return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; } /// True if `cp` is a valid identifier start character. -pub fn isIdStart(self: Properties, cp: u21) bool { - return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; +pub fn isIdStart(cp: u21) bool { + return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; } /// True if `cp` is a valid identifier continuation character. -pub fn isIdContinue(self: Properties, cp: u21) bool { - return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8; +pub fn isIdContinue(cp: u21) bool { + return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8; } /// True if `cp` is a valid extended identifier start character. -pub fn isXidStart(self: Properties, cp: u21) bool { - return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16; +pub fn isXidStart(cp: u21) bool { + return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16; } /// True if `cp` is a valid extended identifier continuation character. -pub fn isXidContinue(self: Properties, cp: u21) bool { - return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32; +pub fn isXidContinue(cp: u21) bool { + return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32; } /// True if `cp` is a whitespace character. -pub fn isWhitespace(self: Properties, cp: u21) bool { - return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; +pub fn isWhitespace(cp: u21) bool { + return properties.props_s2[properties.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; } /// True if `cp` is a hexadecimal digit. -pub fn isHexDigit(self: Properties, cp: u21) bool { - return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; +pub fn isHexDigit(cp: u21) bool { + return properties.props_s2[properties.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; } /// True if `cp` is a diacritic mark. -pub fn isDiacritic(self: Properties, cp: u21) bool { - return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; +pub fn isDiacritic(cp: u21) bool { + return properties.props_s2[properties.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; } /// True if `cp` is numeric. -pub fn isNumeric(self: Properties, cp: u21) bool { - return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; +pub fn isNumeric(cp: u21) bool { + return properties.num_s2[properties.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; } /// True if `cp` is a digit. -pub fn isDigit(self: Properties, cp: u21) bool { - return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; +pub fn isDigit(cp: u21) bool { + return properties.num_s2[properties.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; } /// True if `cp` is decimal. -pub fn isDecimal(self: Properties, cp: u21) bool { - return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; +pub fn isDecimal(cp: u21) bool { + return properties.num_s2[properties.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; } test "Props" { - const self = try init(testing.allocator); - defer self.deinit(testing.allocator); - - try testing.expect(self.isHexDigit('F')); - try testing.expect(self.isHexDigit('a')); - try testing.expect(self.isHexDigit('8')); - try testing.expect(!self.isHexDigit('z')); - - try testing.expect(self.isDiacritic('\u{301}')); - try testing.expect(self.isAlphabetic('A')); - try testing.expect(!self.isAlphabetic('3')); - try testing.expect(self.isMath('+')); - - try testing.expect(self.isNumeric('\u{277f}')); - try testing.expect(self.isDigit('\u{2070}')); - try testing.expect(self.isDecimal('3')); - - try testing.expect(!self.isNumeric('1')); - try testing.expect(!self.isDigit('2')); - try testing.expect(!self.isDecimal('g')); -} - -fn testAllocator(allocator: Allocator) !void { - var prop = try Properties.init(allocator); - prop.deinit(allocator); -} - -test "Allocation failure" { - try testing.checkAllAllocationFailures(testing.allocator, testAllocator, .{}); + try testing.expect(Properties.isHexDigit('F')); + try testing.expect(Properties.isHexDigit('a')); + try testing.expect(Properties.isHexDigit('8')); + try testing.expect(!Properties.isHexDigit('z')); + + try testing.expect(Properties.isDiacritic('\u{301}')); + try testing.expect(Properties.isAlphabetic('A')); + try testing.expect(!Properties.isAlphabetic('3')); + try testing.expect(Properties.isMath('+')); + + try testing.expect(Properties.isNumeric('\u{277f}')); + try testing.expect(Properties.isDigit('\u{2070}')); + try testing.expect(Properties.isDecimal('3')); + + try testing.expect(!Properties.isNumeric('1')); + try testing.expect(!Properties.isDigit('2')); + try testing.expect(!Properties.isDecimal('g')); } const std = @import("std"); -- cgit v1.2.3