From e476250ea9326b2550847b301c265115ff375a31 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Wed, 4 Feb 2026 18:36:18 -0500 Subject: Rest of the 'easy' stuff This gets us up to feature parity with Jacob's work. I want to eliminate that last allocation using the comptime hash map, and then see about eliminating allocations from case comparisons as well. That should just about do it. --- src/GeneralCategories.zig | 102 +++++++----------------- src/LetterCasing.zig | 179 ++++++++++++------------------------------ src/Properties.zig | 195 +++++++++++++++------------------------------- src/Scripts.zig | 82 +++++-------------- 4 files changed, 162 insertions(+), 396 deletions(-) (limited to 'src') diff --git a/src/GeneralCategories.zig b/src/GeneralCategories.zig index eee7e56..9a383bf 100644 --- a/src/GeneralCategories.zig +++ b/src/GeneralCategories.zig @@ -1,8 +1,19 @@ //! General Categories -s1: []u16 = undefined, -s2: []u5 = undefined, -s3: []u5 = undefined, +const Data = struct { + s1: []const u16 = undefined, + s2: []const u5 = undefined, + s3: []const u5 = undefined, +}; + +const general_categories = general_categories: { + const data = @import("gencat"); + break :general_categories Data{ + .s1 = &data.s1, + .s2 = &data.s2, + .s3 = &data.s3, + }; +}; /// General Category pub const Gc = enum { @@ -38,51 +49,14 @@ pub const Gc = enum { Zs, // Separator, Space }; -const GeneralCategories = @This(); - -pub fn init(allocator: Allocator) Allocator.Error!GeneralCategories { - var gencat = GeneralCategories{}; - try gencat.setup(allocator); - return gencat; -} - -pub fn setup(gencat: *GeneralCategories, allocator: Allocator) Allocator.Error!void { - const in_bytes = @embedFile("gencat"); - var in_fbs = std.io.fixedBufferStream(in_bytes); - var reader = in_fbs.reader(); - - const endian = builtin.cpu.arch.endian(); - - const s1_len: u16 = reader.readInt(u16, endian) catch unreachable; - gencat.s1 = try allocator.alloc(u16, s1_len); - errdefer allocator.free(gencat.s1); - for (0..s1_len) |i| gencat.s1[i] = reader.readInt(u16, endian) catch unreachable; - - const s2_len: u16 = reader.readInt(u16, endian) catch unreachable; - gencat.s2 = try allocator.alloc(u5, s2_len); - errdefer allocator.free(gencat.s2); - for (0..s2_len) |i| gencat.s2[i] = @intCast(reader.readInt(u8, endian) catch unreachable); - - const s3_len: u16 = reader.readInt(u8, endian) catch unreachable; - gencat.s3 = try allocator.alloc(u5, s3_len); - errdefer allocator.free(gencat.s3); - for (0..s3_len) |i| gencat.s3[i] = @intCast(reader.readInt(u8, endian) catch unreachable); -} - -pub fn deinit(gencat: *const GeneralCategories, allocator: mem.Allocator) void { - allocator.free(gencat.s1); - allocator.free(gencat.s2); - allocator.free(gencat.s3); -} - /// Lookup the General Category for `cp`. -pub fn gc(gencat: GeneralCategories, cp: u21) Gc { - return @enumFromInt(gencat.s3[gencat.s2[gencat.s1[cp >> 8] + (cp & 0xff)]]); +pub fn gc(cp: u21) Gc { + return @enumFromInt(general_categories.s3[general_categories.s2[general_categories.s1[cp >> 8] + (cp & 0xff)]]); } /// True if `cp` has an C general category. -pub fn isControl(gencat: GeneralCategories, cp: u21) bool { - return switch (gencat.gc(cp)) { +pub fn isControl(cp: u21) bool { + return switch (gc(cp)) { .Cc, .Cf, .Cn, @@ -94,8 +68,8 @@ pub fn isControl(gencat: GeneralCategories, cp: u21) bool { } /// True if `cp` has an L general category. -pub fn isLetter(gencat: GeneralCategories, cp: u21) bool { - return switch (gencat.gc(cp)) { +pub fn isLetter(cp: u21) bool { + return switch (gc(cp)) { .Ll, .Lm, .Lo, @@ -107,8 +81,8 @@ pub fn isLetter(gencat: GeneralCategories, cp: u21) bool { } /// True if `cp` has an M general category. -pub fn isMark(gencat: GeneralCategories, cp: u21) bool { - return switch (gencat.gc(cp)) { +pub fn isMark(cp: u21) bool { + return switch (gc(cp)) { .Mc, .Me, .Mn, @@ -118,8 +92,8 @@ pub fn isMark(gencat: GeneralCategories, cp: u21) bool { } /// True if `cp` has an N general category. -pub fn isNumber(gencat: GeneralCategories, cp: u21) bool { - return switch (gencat.gc(cp)) { +pub fn isNumber(cp: u21) bool { + return switch (gc(cp)) { .Nd, .Nl, .No, @@ -129,8 +103,8 @@ pub fn isNumber(gencat: GeneralCategories, cp: u21) bool { } /// True if `cp` has an P general category. -pub fn isPunctuation(gencat: GeneralCategories, cp: u21) bool { - return switch (gencat.gc(cp)) { +pub fn isPunctuation(cp: u21) bool { + return switch (gc(cp)) { .Pc, .Pd, .Pe, @@ -144,8 +118,8 @@ pub fn isPunctuation(gencat: GeneralCategories, cp: u21) bool { } /// True if `cp` has an S general category. -pub fn isSymbol(gencat: GeneralCategories, cp: u21) bool { - return switch (gencat.gc(cp)) { +pub fn isSymbol(cp: u21) bool { + return switch (gc(cp)) { .Sc, .Sk, .Sm, @@ -156,8 +130,8 @@ pub fn isSymbol(gencat: GeneralCategories, cp: u21) bool { } /// True if `cp` has an Z general category. -pub fn isSeparator(gencat: GeneralCategories, cp: u21) bool { - return switch (gencat.gc(cp)) { +pub fn isSeparator(cp: u21) bool { + return switch (gc(cp)) { .Zl, .Zp, .Zs, @@ -165,19 +139,3 @@ pub fn isSeparator(gencat: GeneralCategories, cp: u21) bool { else => false, }; } - -fn testAllocator(allocator: Allocator) !void { - var gen_cat = try GeneralCategories.init(allocator); - gen_cat.deinit(allocator); -} - -test "Allocation failure" { - try testing.checkAllAllocationFailures(testing.allocator, testAllocator, .{}); -} - -const std = @import("std"); -const builtin = @import("builtin"); -const compress = std.compress; -const mem = std.mem; -const testing = std.testing; -const Allocator = mem.Allocator; diff --git a/src/LetterCasing.zig b/src/LetterCasing.zig index 33096fc..24b67a0 100644 --- a/src/LetterCasing.zig +++ b/src/LetterCasing.zig @@ -1,120 +1,58 @@ const CodePointIterator = @import("code_point").Iterator; - -case_map: [][2]u21 = undefined, -prop_s1: []u16 = undefined, -prop_s2: []u8 = undefined, - -const LetterCasing = @This(); - -pub fn init(allocator: Allocator) Allocator.Error!LetterCasing { - var case = LetterCasing{}; - try case.setup(allocator); - return case; -} - -pub fn setup(case: *LetterCasing, allocator: Allocator) Allocator.Error!void { - case.setupInner(allocator) catch |err| { - switch (err) { - error.OutOfMemory => |e| return e, - else => unreachable, - } +const GeneralCategories = @import("GeneralCategories"); + +const Data = struct { + s1: []const u16 = undefined, + s2: []const u44 = undefined, +}; + +const letter_casing = letter_casing: { + const data = @import("case"); + break :letter_casing Data{ + .s1 = &data.s1, + .s2 = &data.s2, }; -} - -inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void { - const endian = builtin.cpu.arch.endian(); - - self.case_map = try allocator.alloc([2]u21, 0x110000); - errdefer allocator.free(self.case_map); - - for (0..0x110000) |i| { - const cp: u21 = @intCast(i); - self.case_map[cp] = .{ cp, cp }; - } - - // Uppercase - const upper_bytes = @embedFile("upper"); - var upper_fbs = std.io.fixedBufferStream(upper_bytes); - var upper_reader = upper_fbs.reader(); - - while (true) { - const cp = try upper_reader.readInt(i24, endian); - if (cp == 0) break; - const diff = try upper_reader.readInt(i24, endian); - self.case_map[@intCast(cp)][0] = @intCast(cp + diff); - } - - // Lowercase - const lower_bytes = @embedFile("lower"); - var lower_fbs = std.io.fixedBufferStream(lower_bytes); - var lower_reader = lower_fbs.reader(); - - while (true) { - const cp = try lower_reader.readInt(i24, endian); - if (cp == 0) break; - const diff = try lower_reader.readInt(i24, endian); - self.case_map[@intCast(cp)][1] = @intCast(cp + diff); - } - - // Case properties - const cp_bytes = @embedFile("case_prop"); - var cp_fbs = std.io.fixedBufferStream(cp_bytes); - var cp_reader = cp_fbs.reader(); - - const stage_1_len: u16 = try cp_reader.readInt(u16, endian); - self.prop_s1 = try allocator.alloc(u16, stage_1_len); - errdefer allocator.free(self.prop_s1); - for (0..stage_1_len) |i| self.prop_s1[i] = try cp_reader.readInt(u16, endian); - - const stage_2_len: u16 = try cp_reader.readInt(u16, endian); - self.prop_s2 = try allocator.alloc(u8, stage_2_len); - errdefer allocator.free(self.prop_s2); - _ = try cp_reader.readAll(self.prop_s2); -} - -pub fn deinit(self: *const LetterCasing, allocator: mem.Allocator) void { - allocator.free(self.case_map); - allocator.free(self.prop_s1); - allocator.free(self.prop_s2); -} +}; // Returns true if `cp` is either upper, lower, or title case. -pub fn isCased(self: LetterCasing, cp: u21) bool { - return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; +pub fn isCased(cp: u21) bool { + return isUpper(cp) or isLower(cp) or GeneralCategories.gc(cp) == .Lt; } // Returns true if `cp` is uppercase. -pub fn isUpper(self: LetterCasing, cp: u21) bool { - return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; +pub fn isUpper(cp: u21) bool { + // isUpper is true if we have a mapping to a lower character (bit 1) + return letter_casing.s2[letter_casing.s1[cp >> 8] + (cp & 0xff)] & 1 == 1; } -/// Returns true if `str` is all uppercase. -pub fn isUpperStr(self: LetterCasing, str: []const u8) bool { +/// Returns true if `str` is all non-lowercase. +pub fn isUpperStr(str: []const u8) bool { var iter = CodePointIterator{ .bytes = str }; return while (iter.next()) |cp| { - if (self.isCased(cp.code) and !self.isUpper(cp.code)) break false; + if (isLower(cp.code)) break false; } else true; } test "isUpperStr" { - const cd = try init(testing.allocator); - defer cd.deinit(testing.allocator); - - try testing.expect(cd.isUpperStr("HELLO, WORLD 2112!")); - try testing.expect(!cd.isUpperStr("hello, world 2112!")); - try testing.expect(!cd.isUpperStr("Hello, World 2112!")); + try testing.expect(isUpperStr("HELLO, WORLD 2112!")); + try testing.expect(!isUpperStr("hello, world 2112!")); + try testing.expect(!isUpperStr("Hello, World 2112!")); } /// Returns uppercase mapping for `cp`. -pub fn toUpper(self: LetterCasing, cp: u21) u21 { - return self.case_map[cp][0]; +pub fn toUpper(cp: u21) u21 { + const case_prop = letter_casing.s2[letter_casing.s1[cp >> 8] + (cp & 0xff)]; + if (case_prop & 2 == 2) { + return @intCast(case_prop >> (21 + 2)); + } else { + return cp; + } } /// Returns a new string with all letters in uppercase. /// Caller must free returned bytes with `allocator`. pub fn toUpperStr( - self: LetterCasing, allocator: mem.Allocator, str: []const u8, ) ![]u8 { @@ -125,7 +63,7 @@ pub fn toUpperStr( var buf: [4]u8 = undefined; while (iter.next()) |cp| { - const len = try unicode.utf8Encode(self.toUpper(cp.code), &buf); + const len = try unicode.utf8Encode(toUpper(cp.code), &buf); try bytes.appendSlice(buf[0..len]); } @@ -133,46 +71,45 @@ pub fn toUpperStr( } test "toUpperStr" { - const cd = try init(testing.allocator); - defer cd.deinit(testing.allocator); - - const uppered = try cd.toUpperStr(testing.allocator, "Hello, World 2112!"); + const uppered = try toUpperStr(testing.allocator, "Hello, World 2112!"); defer testing.allocator.free(uppered); try testing.expectEqualStrings("HELLO, WORLD 2112!", uppered); } // Returns true if `cp` is lowercase. -pub fn isLower(self: LetterCasing, cp: u21) bool { - return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; +pub fn isLower(cp: u21) bool { + // isLower is true if we have a mapping to an upper character (bit 2) + return letter_casing.s2[letter_casing.s1[cp >> 8] + (cp & 0xff)] & 2 == 2; } -/// Returns true if `str` is all lowercase. -pub fn isLowerStr(self: LetterCasing, str: []const u8) bool { +/// Returns true if `str` is all non-uppercase. +pub fn isLowerStr(str: []const u8) bool { var iter = CodePointIterator{ .bytes = str }; return while (iter.next()) |cp| { - if (self.isCased(cp.code) and !self.isLower(cp.code)) break false; + if (isUpper(cp.code)) break false; } else true; } test "isLowerStr" { - const cd = try init(testing.allocator); - defer cd.deinit(testing.allocator); - - try testing.expect(cd.isLowerStr("hello, world 2112!")); - try testing.expect(!cd.isLowerStr("HELLO, WORLD 2112!")); - try testing.expect(!cd.isLowerStr("Hello, World 2112!")); + try testing.expect(isLowerStr("hello, world 2112!")); + try testing.expect(!isLowerStr("HELLO, WORLD 2112!")); + try testing.expect(!isLowerStr("Hello, World 2112!")); } /// Returns lowercase mapping for `cp`. -pub fn toLower(self: LetterCasing, cp: u21) u21 { - return self.case_map[cp][1]; +pub fn toLower(cp: u21) u21 { + const case_prop = letter_casing.s2[letter_casing.s1[cp >> 8] + (cp & 0xff)]; + if (case_prop & 1 == 1) { + return @intCast((case_prop >> 2) & 0x1FFFFF); + } else { + return cp; + } } /// Returns a new string with all letters in lowercase. /// Caller must free returned bytes with `allocator`. pub fn toLowerStr( - self: LetterCasing, allocator: mem.Allocator, str: []const u8, ) ![]u8 { @@ -183,7 +120,7 @@ pub fn toLowerStr( var buf: [4]u8 = undefined; while (iter.next()) |cp| { - const len = try unicode.utf8Encode(self.toLower(cp.code), &buf); + const len = try unicode.utf8Encode(toLower(cp.code), &buf); try bytes.appendSlice(buf[0..len]); } @@ -191,27 +128,13 @@ pub fn toLowerStr( } test "toLowerStr" { - const cd = try init(testing.allocator); - defer cd.deinit(testing.allocator); - - const lowered = try cd.toLowerStr(testing.allocator, "Hello, World 2112!"); + const lowered = try toLowerStr(testing.allocator, "Hello, World 2112!"); defer testing.allocator.free(lowered); try testing.expectEqualStrings("hello, world 2112!", lowered); } -fn testAllocator(allocator: Allocator) !void { - var prop = try LetterCasing.init(allocator); - prop.deinit(allocator); -} - -test "Allocation failure" { - try testing.checkAllAllocationFailures(testing.allocator, testAllocator, .{}); -} - const std = @import("std"); const builtin = @import("builtin"); -const compress = std.compress; const mem = std.mem; -const Allocator = std.mem.Allocator; const testing = std.testing; const unicode = std.unicode; diff --git a/src/Properties.zig b/src/Properties.zig index 432d176..f8c7cfc 100644 --- a/src/Properties.zig +++ b/src/Properties.zig @@ -1,177 +1,108 @@ //! Properties module -core_s1: []u16 = undefined, -core_s2: []u8 = undefined, -props_s1: []u16 = undefined, -props_s2: []u8 = undefined, -num_s1: []u16 = undefined, -num_s2: []u8 = undefined, - -const Properties = @This(); - -pub fn init(allocator: Allocator) Allocator.Error!Properties { - var props = Properties{}; - try props.setup(allocator); - return props; -} - -pub fn setup(props: *Properties, allocator: Allocator) Allocator.Error!void { - props.setupInner(allocator) catch |err| { - switch (err) { - error.OutOfMemory => |e| return e, - else => unreachable, - } +const Data = struct { + core_s1: []const u16 = undefined, + core_s2: []const u8 = undefined, + props_s1: []const u16 = undefined, + props_s2: []const u8 = undefined, + num_s1: []const u16 = undefined, + num_s2: []const u8 = undefined, +}; + +const properties = properties: { + const core_props = @import("core_props"); + const props_data = @import("props"); + const numeric = @import("numeric"); + break :properties Data{ + .core_s1 = &core_props.s1, + .core_s2 = &core_props.s2, + .props_s1 = &props_data.s1, + .props_s2 = &props_data.s2, + .num_s1 = &numeric.s1, + .num_s2 = &numeric.s2, }; -} - -inline fn setupInner(props: *Properties, allocator: Allocator) !void { - const endian = builtin.cpu.arch.endian(); - - // Process DerivedCoreProperties.txt - const core_bytes = @embedFile("core_props"); - var core_fbs = std.io.fixedBufferStream(core_bytes); - var core_reader = core_fbs.reader(); - - const core_stage_1_len: u16 = try core_reader.readInt(u16, endian); - props.core_s1 = try allocator.alloc(u16, core_stage_1_len); - errdefer allocator.free(props.core_s1); - for (0..core_stage_1_len) |i| props.core_s1[i] = try core_reader.readInt(u16, endian); - - const core_stage_2_len: u16 = try core_reader.readInt(u16, endian); - props.core_s2 = try allocator.alloc(u8, core_stage_2_len); - errdefer allocator.free(props.core_s2); - _ = try core_reader.readAll(props.core_s2); - - // Process PropList.txt - const props_bytes = @embedFile("props"); - var props_fbs = std.io.fixedBufferStream(props_bytes); - var props_reader = props_fbs.reader(); - - const stage_1_len: u16 = try props_reader.readInt(u16, endian); - props.props_s1 = try allocator.alloc(u16, stage_1_len); - errdefer allocator.free(props.props_s1); - for (0..stage_1_len) |i| props.props_s1[i] = try props_reader.readInt(u16, endian); - - const stage_2_len: u16 = try props_reader.readInt(u16, endian); - props.props_s2 = try allocator.alloc(u8, stage_2_len); - errdefer allocator.free(props.props_s2); - _ = try props_reader.readAll(props.props_s2); - - // Process DerivedNumericType.txt - const num_bytes = @embedFile("numeric"); - var num_fbs = std.io.fixedBufferStream(num_bytes); - var num_reader = num_fbs.reader(); - - const num_stage_1_len: u16 = try num_reader.readInt(u16, endian); - props.num_s1 = try allocator.alloc(u16, num_stage_1_len); - errdefer allocator.free(props.num_s1); - for (0..num_stage_1_len) |i| props.num_s1[i] = try num_reader.readInt(u16, endian); - - const num_stage_2_len: u16 = try num_reader.readInt(u16, endian); - props.num_s2 = try allocator.alloc(u8, num_stage_2_len); - errdefer allocator.free(props.num_s2); - _ = try num_reader.readAll(props.num_s2); -} +}; -pub fn deinit(self: *const Properties, allocator: Allocator) void { - allocator.free(self.core_s1); - allocator.free(self.core_s2); - allocator.free(self.props_s1); - allocator.free(self.props_s2); - allocator.free(self.num_s1); - allocator.free(self.num_s2); -} +const Properties = @This(); /// True if `cp` is a mathematical symbol. -pub fn isMath(self: Properties, cp: u21) bool { - return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; +pub fn isMath(cp: u21) bool { + return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; } /// True if `cp` is an alphabetic character. -pub fn isAlphabetic(self: Properties, cp: u21) bool { - return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; +pub fn isAlphabetic(cp: u21) bool { + return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; } /// True if `cp` is a valid identifier start character. -pub fn isIdStart(self: Properties, cp: u21) bool { - return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; +pub fn isIdStart(cp: u21) bool { + return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; } /// True if `cp` is a valid identifier continuation character. -pub fn isIdContinue(self: Properties, cp: u21) bool { - return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8; +pub fn isIdContinue(cp: u21) bool { + return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8; } /// True if `cp` is a valid extended identifier start character. -pub fn isXidStart(self: Properties, cp: u21) bool { - return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16; +pub fn isXidStart(cp: u21) bool { + return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16; } /// True if `cp` is a valid extended identifier continuation character. -pub fn isXidContinue(self: Properties, cp: u21) bool { - return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32; +pub fn isXidContinue(cp: u21) bool { + return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32; } /// True if `cp` is a whitespace character. -pub fn isWhitespace(self: Properties, cp: u21) bool { - return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; +pub fn isWhitespace(cp: u21) bool { + return properties.props_s2[properties.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; } /// True if `cp` is a hexadecimal digit. -pub fn isHexDigit(self: Properties, cp: u21) bool { - return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; +pub fn isHexDigit(cp: u21) bool { + return properties.props_s2[properties.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; } /// True if `cp` is a diacritic mark. -pub fn isDiacritic(self: Properties, cp: u21) bool { - return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; +pub fn isDiacritic(cp: u21) bool { + return properties.props_s2[properties.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; } /// True if `cp` is numeric. -pub fn isNumeric(self: Properties, cp: u21) bool { - return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; +pub fn isNumeric(cp: u21) bool { + return properties.num_s2[properties.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; } /// True if `cp` is a digit. -pub fn isDigit(self: Properties, cp: u21) bool { - return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; +pub fn isDigit(cp: u21) bool { + return properties.num_s2[properties.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; } /// True if `cp` is decimal. -pub fn isDecimal(self: Properties, cp: u21) bool { - return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; +pub fn isDecimal(cp: u21) bool { + return properties.num_s2[properties.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; } test "Props" { - const self = try init(testing.allocator); - defer self.deinit(testing.allocator); - - try testing.expect(self.isHexDigit('F')); - try testing.expect(self.isHexDigit('a')); - try testing.expect(self.isHexDigit('8')); - try testing.expect(!self.isHexDigit('z')); - - try testing.expect(self.isDiacritic('\u{301}')); - try testing.expect(self.isAlphabetic('A')); - try testing.expect(!self.isAlphabetic('3')); - try testing.expect(self.isMath('+')); - - try testing.expect(self.isNumeric('\u{277f}')); - try testing.expect(self.isDigit('\u{2070}')); - try testing.expect(self.isDecimal('3')); - - try testing.expect(!self.isNumeric('1')); - try testing.expect(!self.isDigit('2')); - try testing.expect(!self.isDecimal('g')); -} - -fn testAllocator(allocator: Allocator) !void { - var prop = try Properties.init(allocator); - prop.deinit(allocator); -} - -test "Allocation failure" { - try testing.checkAllAllocationFailures(testing.allocator, testAllocator, .{}); + try testing.expect(Properties.isHexDigit('F')); + try testing.expect(Properties.isHexDigit('a')); + try testing.expect(Properties.isHexDigit('8')); + try testing.expect(!Properties.isHexDigit('z')); + + try testing.expect(Properties.isDiacritic('\u{301}')); + try testing.expect(Properties.isAlphabetic('A')); + try testing.expect(!Properties.isAlphabetic('3')); + try testing.expect(Properties.isMath('+')); + + try testing.expect(Properties.isNumeric('\u{277f}')); + try testing.expect(Properties.isDigit('\u{2070}')); + try testing.expect(Properties.isDecimal('3')); + + try testing.expect(!Properties.isNumeric('1')); + try testing.expect(!Properties.isDigit('2')); + try testing.expect(!Properties.isDecimal('g')); } const std = @import("std"); diff --git a/src/Scripts.zig b/src/Scripts.zig index 719b01f..4938318 100644 --- a/src/Scripts.zig +++ b/src/Scripts.zig @@ -1,8 +1,18 @@ //! Scripts Module +const Data = struct { + s1: []const u16 = undefined, + s2: []const u8 = undefined, + s3: []const u8 = undefined, +}; -s1: []u16 = undefined, -s2: []u8 = undefined, -s3: []u8 = undefined, +const scripts = scripts: { + const data = @import("script"); + break :scripts Data{ + .s1 = &data.s1, + .s2 = &data.s2, + .s3 = &data.s3, + }; +}; /// Scripts enum pub const Script = enum { @@ -178,76 +188,20 @@ pub const Script = enum { Yi, Zanabazar_Square, }; -const Scripts = @This(); - -pub fn init(allocator: Allocator) Allocator.Error!Scripts { - var scripts = Scripts{}; - try scripts.setup(allocator); - return scripts; -} - -pub fn setup(scripts: *Scripts, allocator: Allocator) Allocator.Error!void { - scripts.setupInner(allocator) catch |err| { - switch (err) { - error.OutOfMemory => |e| return e, - else => unreachable, - } - }; -} - -inline fn setupInner(scripts: *Scripts, allocator: mem.Allocator) !void { - const in_bytes = @embedFile("scripts"); - var in_fbs = std.io.fixedBufferStream(in_bytes); - var reader = in_fbs.reader(); - - const endian = builtin.cpu.arch.endian(); - - const s1_len: u16 = try reader.readInt(u16, endian); - scripts.s1 = try allocator.alloc(u16, s1_len); - errdefer allocator.free(scripts.s1); - for (0..s1_len) |i| scripts.s1[i] = try reader.readInt(u16, endian); - - const s2_len: u16 = try reader.readInt(u16, endian); - scripts.s2 = try allocator.alloc(u8, s2_len); - errdefer allocator.free(scripts.s2); - _ = try reader.readAll(scripts.s2); - - const s3_len: u16 = try reader.readInt(u8, endian); - scripts.s3 = try allocator.alloc(u8, s3_len); - errdefer allocator.free(scripts.s3); - _ = try reader.readAll(scripts.s3); -} - -pub fn deinit(self: *const Scripts, allocator: mem.Allocator) void { - allocator.free(self.s1); - allocator.free(self.s2); - allocator.free(self.s3); -} /// Lookup the Script type for `cp`. -pub fn script(self: Scripts, cp: u21) ?Script { - const byte = self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]; +pub fn script(cp: u21) ?Script { + const byte = scripts.s3[scripts.s2[scripts.s1[cp >> 8] + (cp & 0xff)]]; if (byte == 0) return null; return @enumFromInt(byte); } test "script" { - const self = try init(std.testing.allocator); - defer self.deinit(std.testing.allocator); - try testing.expectEqual(Script.Latin, self.script('A').?); -} - -fn testAllocator(allocator: Allocator) !void { - var prop = try Scripts.init(allocator); - prop.deinit(allocator); -} - -test "Allocation failure" { - try testing.checkAllAllocationFailures(testing.allocator, testAllocator, .{}); + try testing.expectEqual(Script.Latin, script('A').?); + // try testing.expectEqual(Script.Deseret, script('𐐌').?); } const std = @import("std"); const builtin = @import("builtin"); -const mem = std.mem; -const Allocator = mem.Allocator; +const unicode = std.unicode; const testing = std.testing; -- cgit v1.2.3