From d2d42bf3ef5490f6fdec73508c2493a666ecee41 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Wed, 30 Apr 2025 16:48:07 -0400 Subject: Setup variants for all allocating modules This harmonizes the allocating modules in a couple of ways. All can now be constructed by pointer, and all treat various miscellaneous read failures as `unreachable`, which indeed they should be. The README has been updated to inform users of this option. --- src/CaseFolding.zig | 18 +++++++-- src/GeneralCategories.zig | 59 +++++++++++++++------------- src/Graphemes.zig | 32 ++++++++-------- src/LetterCasing.zig | 62 ++++++++++++++++++------------ src/Normalize.zig | 39 +++++++++++++++---- src/Properties.zig | 98 +++++++++++++++++++++++++++-------------------- src/Scripts.zig | 66 ++++++++++++++++++------------- 7 files changed, 228 insertions(+), 146 deletions(-) (limited to 'src') diff --git a/src/CaseFolding.zig b/src/CaseFolding.zig index 162e82f..2e53bfa 100644 --- a/src/CaseFolding.zig +++ b/src/CaseFolding.zig @@ -11,20 +11,21 @@ owns_normalize: bool, const CaseFolding = @This(); -pub fn init(allocator: Allocator) !CaseFolding { +pub fn init(allocator: Allocator) Allocator.Error!CaseFolding { var case_fold: CaseFolding = undefined; try case_fold.setup(allocator); return case_fold; } -pub fn initWithNormalize(allocator: Allocator, norm: Normalize) !CaseFolding { +pub fn initWithNormalize(allocator: Allocator, norm: Normalize) Allocator.Error!CaseFolding { var casefold: CaseFolding = undefined; try casefold.setupWithNormalize(allocator, norm); return casefold; } -pub fn setup(casefold: *CaseFolding, allocator: Allocator) !void { +pub fn setup(casefold: *CaseFolding, allocator: Allocator) Allocator.Error!void { try casefold.setupImpl(allocator); + // Handle normalize memory separately during setup: casefold.owns_normalize = false; errdefer casefold.deinit(allocator); try casefold.normalize.setup(allocator); @@ -37,7 +38,16 @@ pub fn setupWithNormalize(casefold: *CaseFolding, allocator: Allocator, norm: No casefold.owns_normalize = false; } -fn setupImpl(casefold: *CaseFolding, allocator: Allocator) !void { +fn setupImpl(casefold: *CaseFolding, allocator: Allocator) Allocator.Error!void { + casefold.setupImplInner(allocator) catch |err| { + switch (err) { + error.OutOfMemory => |e| return e, + else => unreachable, + } + }; +} + +inline fn setupImplInner(casefold: *CaseFolding, allocator: Allocator) !void { const decompressor = compress.flate.inflate.decompressor; const in_bytes = @embedFile("fold"); var in_fbs = std.io.fixedBufferStream(in_bytes); diff --git a/src/GeneralCategories.zig b/src/GeneralCategories.zig index a69f7a2..b7c82c0 100644 --- a/src/GeneralCategories.zig +++ b/src/GeneralCategories.zig @@ -1,7 +1,8 @@ -const std = @import("std"); -const builtin = @import("builtin"); -const compress = std.compress; -const mem = std.mem; +//! General Categories + +s1: []u16 = undefined, +s2: []u5 = undefined, +s3: []u5 = undefined, /// General Category pub const Gc = enum { @@ -37,13 +38,15 @@ pub const Gc = enum { Zs, // Separator, Space }; -s1: []u16 = undefined, -s2: []u5 = undefined, -s3: []u5 = undefined, +const GeneralCategories = @This(); -const Self = @This(); +pub fn init(allocator: Allocator) Allocator.Error!GeneralCategories { + var gencat = GeneralCategories{}; + try gencat.setup(allocator); + return gencat; +} -pub fn init(allocator: mem.Allocator) !Self { +pub fn setup(self: *GeneralCategories, allocator: Allocator) Allocator.Error!void { const decompressor = compress.flate.inflate.decompressor; const in_bytes = @embedFile("gencat"); var in_fbs = std.io.fixedBufferStream(in_bytes); @@ -52,39 +55,35 @@ pub fn init(allocator: mem.Allocator) !Self { const endian = builtin.cpu.arch.endian(); - var self = Self{}; - - const s1_len: u16 = try reader.readInt(u16, endian); + const s1_len: u16 = reader.readInt(u16, endian) catch unreachable; self.s1 = try allocator.alloc(u16, s1_len); errdefer allocator.free(self.s1); for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian); - const s2_len: u16 = try reader.readInt(u16, endian); + const s2_len: u16 = reader.readInt(u16, endian) catch unreachable; self.s2 = try allocator.alloc(u5, s2_len); errdefer allocator.free(self.s2); - for (0..s2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian)); + for (0..s2_len) |i| self.s2[i] = @intCast(reader.readInt(u8, endian) catch unreachable); - const s3_len: u16 = try reader.readInt(u8, endian); + const s3_len: u16 = reader.readInt(u8, endian) catch unreachable; self.s3 = try allocator.alloc(u5, s3_len); errdefer allocator.free(self.s3); - for (0..s3_len) |i| self.s3[i] = @intCast(try reader.readInt(u8, endian)); - - return self; + for (0..s3_len) |i| self.s3[i] = @intCast(reader.readInt(u8, endian) catch unreachable); } -pub fn deinit(self: *const Self, allocator: mem.Allocator) void { +pub fn deinit(self: *const GeneralCategories, allocator: mem.Allocator) void { allocator.free(self.s1); allocator.free(self.s2); allocator.free(self.s3); } /// Lookup the General Category for `cp`. -pub fn gc(self: Self, cp: u21) Gc { +pub fn gc(self: GeneralCategories, cp: u21) Gc { return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]); } /// True if `cp` has an C general category. -pub fn isControl(self: Self, cp: u21) bool { +pub fn isControl(self: GeneralCategories, cp: u21) bool { return switch (self.gc(cp)) { .Cc, .Cf, @@ -97,7 +96,7 @@ pub fn isControl(self: Self, cp: u21) bool { } /// True if `cp` has an L general category. -pub fn isLetter(self: Self, cp: u21) bool { +pub fn isLetter(self: GeneralCategories, cp: u21) bool { return switch (self.gc(cp)) { .Ll, .Lm, @@ -110,7 +109,7 @@ pub fn isLetter(self: Self, cp: u21) bool { } /// True if `cp` has an M general category. -pub fn isMark(self: Self, cp: u21) bool { +pub fn isMark(self: GeneralCategories, cp: u21) bool { return switch (self.gc(cp)) { .Mc, .Me, @@ -121,7 +120,7 @@ pub fn isMark(self: Self, cp: u21) bool { } /// True if `cp` has an N general category. -pub fn isNumber(self: Self, cp: u21) bool { +pub fn isNumber(self: GeneralCategories, cp: u21) bool { return switch (self.gc(cp)) { .Nd, .Nl, @@ -132,7 +131,7 @@ pub fn isNumber(self: Self, cp: u21) bool { } /// True if `cp` has an P general category. -pub fn isPunctuation(self: Self, cp: u21) bool { +pub fn isPunctuation(self: GeneralCategories, cp: u21) bool { return switch (self.gc(cp)) { .Pc, .Pd, @@ -147,7 +146,7 @@ pub fn isPunctuation(self: Self, cp: u21) bool { } /// True if `cp` has an S general category. -pub fn isSymbol(self: Self, cp: u21) bool { +pub fn isSymbol(self: GeneralCategories, cp: u21) bool { return switch (self.gc(cp)) { .Sc, .Sk, @@ -159,7 +158,7 @@ pub fn isSymbol(self: Self, cp: u21) bool { } /// True if `cp` has an Z general category. -pub fn isSeparator(self: Self, cp: u21) bool { +pub fn isSeparator(self: GeneralCategories, cp: u21) bool { return switch (self.gc(cp)) { .Zl, .Zp, @@ -168,3 +167,9 @@ pub fn isSeparator(self: Self, cp: u21) bool { else => false, }; } + +const std = @import("std"); +const builtin = @import("builtin"); +const compress = std.compress; +const mem = std.mem; +const Allocator = mem.Allocator; diff --git a/src/Graphemes.zig b/src/Graphemes.zig index 79cd2c6..7bf328a 100644 --- a/src/Graphemes.zig +++ b/src/Graphemes.zig @@ -14,7 +14,13 @@ s3: []u8 = undefined, const Graphemes = @This(); -pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Graphemes { +pub fn init(allocator: Allocator) Allocator.Error!Graphemes { + var graphemes = Graphemes{}; + try graphemes.setup(allocator); + return graphemes; +} + +pub fn setup(graphemes: *Graphemes, allocator: Allocator) Allocator.Error!void { const decompressor = compress.flate.inflate.decompressor; const in_bytes = @embedFile("gbp"); var in_fbs = std.io.fixedBufferStream(in_bytes); @@ -23,27 +29,23 @@ pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Graphemes { const endian = builtin.cpu.arch.endian(); - var self = Graphemes{}; - const s1_len: u16 = reader.readInt(u16, endian) catch unreachable; - self.s1 = try allocator.alloc(u16, s1_len); - errdefer allocator.free(self.s1); - for (0..s1_len) |i| self.s1[i] = reader.readInt(u16, endian) catch unreachable; + graphemes.s1 = try allocator.alloc(u16, s1_len); + errdefer allocator.free(graphemes.s1); + for (0..s1_len) |i| graphemes.s1[i] = reader.readInt(u16, endian) catch unreachable; const s2_len: u16 = reader.readInt(u16, endian) catch unreachable; - self.s2 = try allocator.alloc(u16, s2_len); - errdefer allocator.free(self.s2); - for (0..s2_len) |i| self.s2[i] = reader.readInt(u16, endian) catch unreachable; + graphemes.s2 = try allocator.alloc(u16, s2_len); + errdefer allocator.free(graphemes.s2); + for (0..s2_len) |i| graphemes.s2[i] = reader.readInt(u16, endian) catch unreachable; const s3_len: u16 = reader.readInt(u16, endian) catch unreachable; - self.s3 = try allocator.alloc(u8, s3_len); - errdefer allocator.free(self.s3); - _ = reader.readAll(self.s3) catch unreachable; - - return self; + graphemes.s3 = try allocator.alloc(u8, s3_len); + errdefer allocator.free(graphemes.s3); + _ = reader.readAll(graphemes.s3) catch unreachable; } -pub fn deinit(graphemes: *const Graphemes, allocator: mem.Allocator) void { +pub fn deinit(graphemes: *const Graphemes, allocator: Allocator) void { allocator.free(graphemes.s1); allocator.free(graphemes.s2); allocator.free(graphemes.s3); diff --git a/src/LetterCasing.zig b/src/LetterCasing.zig index 0a0acb1..a7260b8 100644 --- a/src/LetterCasing.zig +++ b/src/LetterCasing.zig @@ -1,25 +1,31 @@ -const std = @import("std"); -const builtin = @import("builtin"); -const compress = std.compress; -const mem = std.mem; -const testing = std.testing; -const unicode = std.unicode; - const CodePointIterator = @import("code_point").Iterator; -case_map: [][2]u21, +case_map: [][2]u21 = undefined, prop_s1: []u16 = undefined, prop_s2: []u8 = undefined, -const Self = @This(); +const LetterCasing = @This(); + +pub fn init(allocator: Allocator) Allocator.Error!LetterCasing { + var case = LetterCasing{}; + try case.setup(allocator); + return case; +} -pub fn init(allocator: mem.Allocator) !Self { +pub fn setup(case: *LetterCasing, allocator: Allocator) Allocator.Error!void { + case.setupInner(allocator) catch |err| { + switch (err) { + error.OutOfMemory => |e| return e, + else => unreachable, + } + }; +} + +inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void { const decompressor = compress.flate.inflate.decompressor; const endian = builtin.cpu.arch.endian(); - var self = Self{ - .case_map = try allocator.alloc([2]u21, 0x110000), - }; + self.case_map = try allocator.alloc([2]u21, 0x110000); errdefer allocator.free(self.case_map); for (0..0x110000) |i| { @@ -68,28 +74,26 @@ pub fn init(allocator: mem.Allocator) !Self { self.prop_s2 = try allocator.alloc(u8, stage_2_len); errdefer allocator.free(self.prop_s2); _ = try cp_reader.readAll(self.prop_s2); - - return self; } -pub fn deinit(self: *const Self, allocator: mem.Allocator) void { +pub fn deinit(self: *const LetterCasing, allocator: mem.Allocator) void { allocator.free(self.case_map); allocator.free(self.prop_s1); allocator.free(self.prop_s2); } // Returns true if `cp` is either upper, lower, or title case. -pub fn isCased(self: Self, cp: u21) bool { +pub fn isCased(self: LetterCasing, cp: u21) bool { return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; } // Returns true if `cp` is uppercase. -pub fn isUpper(self: Self, cp: u21) bool { +pub fn isUpper(self: LetterCasing, cp: u21) bool { return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; } /// Returns true if `str` is all uppercase. -pub fn isUpperStr(self: Self, str: []const u8) bool { +pub fn isUpperStr(self: LetterCasing, str: []const u8) bool { var iter = CodePointIterator{ .bytes = str }; return while (iter.next()) |cp| { @@ -107,14 +111,14 @@ test "isUpperStr" { } /// Returns uppercase mapping for `cp`. -pub fn toUpper(self: Self, cp: u21) u21 { +pub fn toUpper(self: LetterCasing, cp: u21) u21 { return self.case_map[cp][0]; } /// Returns a new string with all letters in uppercase. /// Caller must free returned bytes with `allocator`. pub fn toUpperStr( - self: Self, + self: LetterCasing, allocator: mem.Allocator, str: []const u8, ) ![]u8 { @@ -142,12 +146,12 @@ test "toUpperStr" { } // Returns true if `cp` is lowercase. -pub fn isLower(self: Self, cp: u21) bool { +pub fn isLower(self: LetterCasing, cp: u21) bool { return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; } /// Returns true if `str` is all lowercase. -pub fn isLowerStr(self: Self, str: []const u8) bool { +pub fn isLowerStr(self: LetterCasing, str: []const u8) bool { var iter = CodePointIterator{ .bytes = str }; return while (iter.next()) |cp| { @@ -165,14 +169,14 @@ test "isLowerStr" { } /// Returns lowercase mapping for `cp`. -pub fn toLower(self: Self, cp: u21) u21 { +pub fn toLower(self: LetterCasing, cp: u21) u21 { return self.case_map[cp][1]; } /// Returns a new string with all letters in lowercase. /// Caller must free returned bytes with `allocator`. pub fn toLowerStr( - self: Self, + self: LetterCasing, allocator: mem.Allocator, str: []const u8, ) ![]u8 { @@ -198,3 +202,11 @@ test "toLowerStr" { defer testing.allocator.free(lowered); try testing.expectEqualStrings("hello, world 2112!", lowered); } + +const std = @import("std"); +const builtin = @import("builtin"); +const compress = std.compress; +const mem = std.mem; +const Allocator = std.mem.Allocator; +const testing = std.testing; +const unicode = std.unicode; diff --git a/src/Normalize.zig b/src/Normalize.zig index d8c867d..1500b4c 100644 --- a/src/Normalize.zig +++ b/src/Normalize.zig @@ -10,22 +10,47 @@ normp_data: NormPropsData = undefined, const Normalize = @This(); -pub fn init(allocator: Allocator) !Normalize { +pub fn init(allocator: Allocator) Allocator.Error!Normalize { var norm: Normalize = undefined; try norm.setup(allocator); return norm; } -pub fn setup(self: *Normalize, allocator: Allocator) !void { - self.canon_data = try CanonData.init(allocator); +pub fn setup(self: *Normalize, allocator: Allocator) Allocator.Error!void { + self.canon_data = CanonData.init(allocator) catch |err| { + switch (err) { + error.OutOfMemory => |e| return e, + else => unreachable, + } + }; errdefer self.canon_data.deinit(allocator); - self.ccc_data = try CccData.init(allocator); + self.ccc_data = CccData.init(allocator) catch |err| { + switch (err) { + error.OutOfMemory => |e| return e, + else => unreachable, + } + }; errdefer self.ccc_data.deinit(allocator); - self.compat_data = try CompatData.init(allocator); + self.compat_data = CompatData.init(allocator) catch |err| { + switch (err) { + error.OutOfMemory => |e| return e, + else => unreachable, + } + }; errdefer self.compat_data.deinit(allocator); - self.hangul_data = try HangulData.init(allocator); + self.hangul_data = HangulData.init(allocator) catch |err| { + switch (err) { + error.OutOfMemory => |e| return e, + else => unreachable, + } + }; errdefer self.hangul_data.deinit(allocator); - self.normp_data = try NormPropsData.init(allocator); + self.normp_data = NormPropsData.init(allocator) catch |err| { + switch (err) { + error.OutOfMemory => |e| return e, + else => unreachable, + } + }; } pub fn deinit(norm: *const Normalize, allocator: Allocator) void { diff --git a/src/Properties.zig b/src/Properties.zig index 46920be..f7e57ec 100644 --- a/src/Properties.zig +++ b/src/Properties.zig @@ -1,8 +1,4 @@ -const std = @import("std"); -const builtin = @import("builtin"); -const compress = std.compress; -const mem = std.mem; -const testing = std.testing; +//! Properties module core_s1: []u16 = undefined, core_s2: []u8 = undefined, @@ -11,9 +7,24 @@ props_s2: []u8 = undefined, num_s1: []u16 = undefined, num_s2: []u8 = undefined, -const Self = @This(); +const Properties = @This(); + +pub fn init(allocator: Allocator) Allocator.Error!Properties { + var props = Properties{}; + try props.setup(allocator); + return props; +} + +pub fn setup(props: *Properties, allocator: Allocator) Allocator.Error!void { + props.setupInner(allocator) catch |err| { + switch (err) { + error.OutOfMemory => |e| return e, + else => unreachable, + } + }; +} -pub fn init(allocator: mem.Allocator) !Self { +inline fn setupInner(props: *Properties, allocator: Allocator) !void { const decompressor = compress.flate.inflate.decompressor; const endian = builtin.cpu.arch.endian(); @@ -23,17 +34,15 @@ pub fn init(allocator: mem.Allocator) !Self { var core_decomp = decompressor(.raw, core_fbs.reader()); var core_reader = core_decomp.reader(); - var self = Self{}; - const core_stage_1_len: u16 = try core_reader.readInt(u16, endian); - self.core_s1 = try allocator.alloc(u16, core_stage_1_len); - errdefer allocator.free(self.core_s1); - for (0..core_stage_1_len) |i| self.core_s1[i] = try core_reader.readInt(u16, endian); + props.core_s1 = try allocator.alloc(u16, core_stage_1_len); + errdefer allocator.free(props.core_s1); + for (0..core_stage_1_len) |i| props.core_s1[i] = try core_reader.readInt(u16, endian); const core_stage_2_len: u16 = try core_reader.readInt(u16, endian); - self.core_s2 = try allocator.alloc(u8, core_stage_2_len); - errdefer allocator.free(self.core_s2); - _ = try core_reader.readAll(self.core_s2); + props.core_s2 = try allocator.alloc(u8, core_stage_2_len); + errdefer allocator.free(props.core_s2); + _ = try core_reader.readAll(props.core_s2); // Process PropList.txt const props_bytes = @embedFile("props"); @@ -42,14 +51,14 @@ pub fn init(allocator: mem.Allocator) !Self { var props_reader = props_decomp.reader(); const stage_1_len: u16 = try props_reader.readInt(u16, endian); - self.props_s1 = try allocator.alloc(u16, stage_1_len); - errdefer allocator.free(self.props_s1); - for (0..stage_1_len) |i| self.props_s1[i] = try props_reader.readInt(u16, endian); + props.props_s1 = try allocator.alloc(u16, stage_1_len); + errdefer allocator.free(props.props_s1); + for (0..stage_1_len) |i| props.props_s1[i] = try props_reader.readInt(u16, endian); const stage_2_len: u16 = try props_reader.readInt(u16, endian); - self.props_s2 = try allocator.alloc(u8, stage_2_len); - errdefer allocator.free(self.props_s2); - _ = try props_reader.readAll(self.props_s2); + props.props_s2 = try allocator.alloc(u8, stage_2_len); + errdefer allocator.free(props.props_s2); + _ = try props_reader.readAll(props.props_s2); // Process DerivedNumericType.txt const num_bytes = @embedFile("numeric"); @@ -58,19 +67,17 @@ pub fn init(allocator: mem.Allocator) !Self { var num_reader = num_decomp.reader(); const num_stage_1_len: u16 = try num_reader.readInt(u16, endian); - self.num_s1 = try allocator.alloc(u16, num_stage_1_len); - errdefer allocator.free(self.num_s1); - for (0..num_stage_1_len) |i| self.num_s1[i] = try num_reader.readInt(u16, endian); + props.num_s1 = try allocator.alloc(u16, num_stage_1_len); + errdefer allocator.free(props.num_s1); + for (0..num_stage_1_len) |i| props.num_s1[i] = try num_reader.readInt(u16, endian); const num_stage_2_len: u16 = try num_reader.readInt(u16, endian); - self.num_s2 = try allocator.alloc(u8, num_stage_2_len); - errdefer allocator.free(self.num_s2); - _ = try num_reader.readAll(self.num_s2); - - return self; + props.num_s2 = try allocator.alloc(u8, num_stage_2_len); + errdefer allocator.free(props.num_s2); + _ = try num_reader.readAll(props.num_s2); } -pub fn deinit(self: *const Self, allocator: mem.Allocator) void { +pub fn deinit(self: *const Properties, allocator: Allocator) void { allocator.free(self.core_s1); allocator.free(self.core_s2); allocator.free(self.props_s1); @@ -80,62 +87,62 @@ pub fn deinit(self: *const Self, allocator: mem.Allocator) void { } /// True if `cp` is a mathematical symbol. -pub fn isMath(self: Self, cp: u21) bool { +pub fn isMath(self: Properties, cp: u21) bool { return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; } /// True if `cp` is an alphabetic character. -pub fn isAlphabetic(self: Self, cp: u21) bool { +pub fn isAlphabetic(self: Properties, cp: u21) bool { return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; } /// True if `cp` is a valid identifier start character. -pub fn isIdStart(self: Self, cp: u21) bool { +pub fn isIdStart(self: Properties, cp: u21) bool { return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; } /// True if `cp` is a valid identifier continuation character. -pub fn isIdContinue(self: Self, cp: u21) bool { +pub fn isIdContinue(self: Properties, cp: u21) bool { return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8; } /// True if `cp` is a valid extended identifier start character. -pub fn isXidStart(self: Self, cp: u21) bool { +pub fn isXidStart(self: Properties, cp: u21) bool { return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16; } /// True if `cp` is a valid extended identifier continuation character. -pub fn isXidContinue(self: Self, cp: u21) bool { +pub fn isXidContinue(self: Properties, cp: u21) bool { return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32; } /// True if `cp` is a whitespace character. -pub fn isWhitespace(self: Self, cp: u21) bool { +pub fn isWhitespace(self: Properties, cp: u21) bool { return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; } /// True if `cp` is a hexadecimal digit. -pub fn isHexDigit(self: Self, cp: u21) bool { +pub fn isHexDigit(self: Properties, cp: u21) bool { return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; } /// True if `cp` is a diacritic mark. -pub fn isDiacritic(self: Self, cp: u21) bool { +pub fn isDiacritic(self: Properties, cp: u21) bool { return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; } /// True if `cp` is numeric. -pub fn isNumeric(self: Self, cp: u21) bool { +pub fn isNumeric(self: Properties, cp: u21) bool { return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; } /// True if `cp` is a digit. -pub fn isDigit(self: Self, cp: u21) bool { +pub fn isDigit(self: Properties, cp: u21) bool { return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; } /// True if `cp` is decimal. -pub fn isDecimal(self: Self, cp: u21) bool { +pub fn isDecimal(self: Properties, cp: u21) bool { return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; } @@ -161,3 +168,10 @@ test "Props" { try testing.expect(!self.isDigit('2')); try testing.expect(!self.isDecimal('g')); } + +const std = @import("std"); +const builtin = @import("builtin"); +const compress = std.compress; +const mem = std.mem; +const Allocator = mem.Allocator; +const testing = std.testing; diff --git a/src/Scripts.zig b/src/Scripts.zig index 4ad8549..f71a2b5 100644 --- a/src/Scripts.zig +++ b/src/Scripts.zig @@ -1,10 +1,10 @@ -const std = @import("std"); -const builtin = @import("builtin"); -const compress = std.compress; -const mem = std.mem; -const testing = std.testing; +//! Scripts Module + +s1: []u16 = undefined, +s2: []u8 = undefined, +s3: []u8 = undefined, -/// Scripts +/// Scripts enum pub const Script = enum { none, Adlam, @@ -172,13 +172,24 @@ pub const Script = enum { Zanabazar_Square, }; -s1: []u16 = undefined, -s2: []u8 = undefined, -s3: []u8 = undefined, +const Scripts = @This(); -const Self = @This(); +pub fn init(allocator: Allocator) Allocator.Error!Scripts { + var scripts = Scripts{}; + try scripts.setup(allocator); + return scripts; +} + +pub fn setup(scripts: *Scripts, allocator: Allocator) Allocator.Error!void { + scripts.setupInner(allocator) catch |err| { + switch (err) { + error.OutOfMemory => |e| return e, + else => unreachable, + } + }; +} -pub fn init(allocator: mem.Allocator) !Self { +inline fn setupInner(scripts: *Scripts, allocator: mem.Allocator) !void { const decompressor = compress.flate.inflate.decompressor; const in_bytes = @embedFile("scripts"); var in_fbs = std.io.fixedBufferStream(in_bytes); @@ -187,34 +198,30 @@ pub fn init(allocator: mem.Allocator) !Self { const endian = builtin.cpu.arch.endian(); - var self = Self{}; - const s1_len: u16 = try reader.readInt(u16, endian); - self.s1 = try allocator.alloc(u16, s1_len); - errdefer allocator.free(self.s1); - for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian); + scripts.s1 = try allocator.alloc(u16, s1_len); + errdefer allocator.free(scripts.s1); + for (0..s1_len) |i| scripts.s1[i] = try reader.readInt(u16, endian); const s2_len: u16 = try reader.readInt(u16, endian); - self.s2 = try allocator.alloc(u8, s2_len); - errdefer allocator.free(self.s2); - _ = try reader.readAll(self.s2); + scripts.s2 = try allocator.alloc(u8, s2_len); + errdefer allocator.free(scripts.s2); + _ = try reader.readAll(scripts.s2); const s3_len: u16 = try reader.readInt(u8, endian); - self.s3 = try allocator.alloc(u8, s3_len); - errdefer allocator.free(self.s3); - _ = try reader.readAll(self.s3); - - return self; + scripts.s3 = try allocator.alloc(u8, s3_len); + errdefer allocator.free(scripts.s3); + _ = try reader.readAll(scripts.s3); } -pub fn deinit(self: *const Self, allocator: mem.Allocator) void { +pub fn deinit(self: *const Scripts, allocator: mem.Allocator) void { allocator.free(self.s1); allocator.free(self.s2); allocator.free(self.s3); } /// Lookup the Script type for `cp`. -pub fn script(self: Self, cp: u21) ?Script { +pub fn script(self: Scripts, cp: u21) ?Script { const byte = self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]; if (byte == 0) return null; return @enumFromInt(byte); @@ -225,3 +232,10 @@ test "script" { defer self.deinit(std.testing.allocator); try testing.expectEqual(Script.Latin, self.script('A').?); } + +const std = @import("std"); +const builtin = @import("builtin"); +const compress = std.compress; +const mem = std.mem; +const Allocator = mem.Allocator; +const testing = std.testing; -- cgit v1.2.3