From d2d42bf3ef5490f6fdec73508c2493a666ecee41 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Wed, 30 Apr 2025 16:48:07 -0400 Subject: Setup variants for all allocating modules This harmonizes the allocating modules in a couple of ways. All can now be constructed by pointer, and all treat various miscellaneous read failures as `unreachable`, which indeed they should be. The README has been updated to inform users of this option. --- README.md | 20 ++++++++++ src/CaseFolding.zig | 18 +++++++-- src/GeneralCategories.zig | 59 +++++++++++++++------------- src/Graphemes.zig | 32 ++++++++-------- src/LetterCasing.zig | 62 ++++++++++++++++++------------ src/Normalize.zig | 39 +++++++++++++++---- src/Properties.zig | 98 +++++++++++++++++++++++++++-------------------- src/Scripts.zig | 66 ++++++++++++++++++------------- 8 files changed, 248 insertions(+), 146 deletions(-) diff --git a/README.md b/README.md index 80e544f..538a38d 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,24 @@ zg is a modular library. This approach minimizes binary file size and memory requirements by only including the Unicode data required for the specified module. The following sections describe the various modules and their specific use case. +### Init and Setup + +The code examples will show the use of `Module.init(allocator)` to create the +various modules. All of the allocating modules have a `setup` variant, which +takes a pointer and allocates in-place. + +Example use: + +```zig +test "Setup form" { + var graphemes = try allocator.create(Graphemes); + defer allocator.destroy(graphemes); + try graphemes.setup(allocator); + defer graphemes.deinit(allocator); +} +``` + + ## Code Points In the `code_point` module, you'll find a data structure representing a single code @@ -386,6 +404,8 @@ test "Initialize With a Normalize" { defer case_fold.deinit(allocator); } ``` +This has a `setupWithNormalize` variant as well, but note that this also takes +a `Normalize` struct, and not a pointer to it. ## Display Width of Characters and Strings diff --git a/src/CaseFolding.zig b/src/CaseFolding.zig index 162e82f..2e53bfa 100644 --- a/src/CaseFolding.zig +++ b/src/CaseFolding.zig @@ -11,20 +11,21 @@ owns_normalize: bool, const CaseFolding = @This(); -pub fn init(allocator: Allocator) !CaseFolding { +pub fn init(allocator: Allocator) Allocator.Error!CaseFolding { var case_fold: CaseFolding = undefined; try case_fold.setup(allocator); return case_fold; } -pub fn initWithNormalize(allocator: Allocator, norm: Normalize) !CaseFolding { +pub fn initWithNormalize(allocator: Allocator, norm: Normalize) Allocator.Error!CaseFolding { var casefold: CaseFolding = undefined; try casefold.setupWithNormalize(allocator, norm); return casefold; } -pub fn setup(casefold: *CaseFolding, allocator: Allocator) !void { +pub fn setup(casefold: *CaseFolding, allocator: Allocator) Allocator.Error!void { try casefold.setupImpl(allocator); + // Handle normalize memory separately during setup: casefold.owns_normalize = false; errdefer casefold.deinit(allocator); try casefold.normalize.setup(allocator); @@ -37,7 +38,16 @@ pub fn setupWithNormalize(casefold: *CaseFolding, allocator: Allocator, norm: No casefold.owns_normalize = false; } -fn setupImpl(casefold: *CaseFolding, allocator: Allocator) !void { +fn setupImpl(casefold: *CaseFolding, allocator: Allocator) Allocator.Error!void { + casefold.setupImplInner(allocator) catch |err| { + switch (err) { + error.OutOfMemory => |e| return e, + else => unreachable, + } + }; +} + +inline fn setupImplInner(casefold: *CaseFolding, allocator: Allocator) !void { const decompressor = compress.flate.inflate.decompressor; const in_bytes = @embedFile("fold"); var in_fbs = std.io.fixedBufferStream(in_bytes); diff --git a/src/GeneralCategories.zig b/src/GeneralCategories.zig index a69f7a2..b7c82c0 100644 --- a/src/GeneralCategories.zig +++ b/src/GeneralCategories.zig @@ -1,7 +1,8 @@ -const std = @import("std"); -const builtin = @import("builtin"); -const compress = std.compress; -const mem = std.mem; +//! General Categories + +s1: []u16 = undefined, +s2: []u5 = undefined, +s3: []u5 = undefined, /// General Category pub const Gc = enum { @@ -37,13 +38,15 @@ pub const Gc = enum { Zs, // Separator, Space }; -s1: []u16 = undefined, -s2: []u5 = undefined, -s3: []u5 = undefined, +const GeneralCategories = @This(); -const Self = @This(); +pub fn init(allocator: Allocator) Allocator.Error!GeneralCategories { + var gencat = GeneralCategories{}; + try gencat.setup(allocator); + return gencat; +} -pub fn init(allocator: mem.Allocator) !Self { +pub fn setup(self: *GeneralCategories, allocator: Allocator) Allocator.Error!void { const decompressor = compress.flate.inflate.decompressor; const in_bytes = @embedFile("gencat"); var in_fbs = std.io.fixedBufferStream(in_bytes); @@ -52,39 +55,35 @@ pub fn init(allocator: mem.Allocator) !Self { const endian = builtin.cpu.arch.endian(); - var self = Self{}; - - const s1_len: u16 = try reader.readInt(u16, endian); + const s1_len: u16 = reader.readInt(u16, endian) catch unreachable; self.s1 = try allocator.alloc(u16, s1_len); errdefer allocator.free(self.s1); for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian); - const s2_len: u16 = try reader.readInt(u16, endian); + const s2_len: u16 = reader.readInt(u16, endian) catch unreachable; self.s2 = try allocator.alloc(u5, s2_len); errdefer allocator.free(self.s2); - for (0..s2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian)); + for (0..s2_len) |i| self.s2[i] = @intCast(reader.readInt(u8, endian) catch unreachable); - const s3_len: u16 = try reader.readInt(u8, endian); + const s3_len: u16 = reader.readInt(u8, endian) catch unreachable; self.s3 = try allocator.alloc(u5, s3_len); errdefer allocator.free(self.s3); - for (0..s3_len) |i| self.s3[i] = @intCast(try reader.readInt(u8, endian)); - - return self; + for (0..s3_len) |i| self.s3[i] = @intCast(reader.readInt(u8, endian) catch unreachable); } -pub fn deinit(self: *const Self, allocator: mem.Allocator) void { +pub fn deinit(self: *const GeneralCategories, allocator: mem.Allocator) void { allocator.free(self.s1); allocator.free(self.s2); allocator.free(self.s3); } /// Lookup the General Category for `cp`. -pub fn gc(self: Self, cp: u21) Gc { +pub fn gc(self: GeneralCategories, cp: u21) Gc { return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]); } /// True if `cp` has an C general category. -pub fn isControl(self: Self, cp: u21) bool { +pub fn isControl(self: GeneralCategories, cp: u21) bool { return switch (self.gc(cp)) { .Cc, .Cf, @@ -97,7 +96,7 @@ pub fn isControl(self: Self, cp: u21) bool { } /// True if `cp` has an L general category. -pub fn isLetter(self: Self, cp: u21) bool { +pub fn isLetter(self: GeneralCategories, cp: u21) bool { return switch (self.gc(cp)) { .Ll, .Lm, @@ -110,7 +109,7 @@ pub fn isLetter(self: Self, cp: u21) bool { } /// True if `cp` has an M general category. -pub fn isMark(self: Self, cp: u21) bool { +pub fn isMark(self: GeneralCategories, cp: u21) bool { return switch (self.gc(cp)) { .Mc, .Me, @@ -121,7 +120,7 @@ pub fn isMark(self: Self, cp: u21) bool { } /// True if `cp` has an N general category. -pub fn isNumber(self: Self, cp: u21) bool { +pub fn isNumber(self: GeneralCategories, cp: u21) bool { return switch (self.gc(cp)) { .Nd, .Nl, @@ -132,7 +131,7 @@ pub fn isNumber(self: Self, cp: u21) bool { } /// True if `cp` has an P general category. -pub fn isPunctuation(self: Self, cp: u21) bool { +pub fn isPunctuation(self: GeneralCategories, cp: u21) bool { return switch (self.gc(cp)) { .Pc, .Pd, @@ -147,7 +146,7 @@ pub fn isPunctuation(self: Self, cp: u21) bool { } /// True if `cp` has an S general category. -pub fn isSymbol(self: Self, cp: u21) bool { +pub fn isSymbol(self: GeneralCategories, cp: u21) bool { return switch (self.gc(cp)) { .Sc, .Sk, @@ -159,7 +158,7 @@ pub fn isSymbol(self: Self, cp: u21) bool { } /// True if `cp` has an Z general category. -pub fn isSeparator(self: Self, cp: u21) bool { +pub fn isSeparator(self: GeneralCategories, cp: u21) bool { return switch (self.gc(cp)) { .Zl, .Zp, @@ -168,3 +167,9 @@ pub fn isSeparator(self: Self, cp: u21) bool { else => false, }; } + +const std = @import("std"); +const builtin = @import("builtin"); +const compress = std.compress; +const mem = std.mem; +const Allocator = mem.Allocator; diff --git a/src/Graphemes.zig b/src/Graphemes.zig index 79cd2c6..7bf328a 100644 --- a/src/Graphemes.zig +++ b/src/Graphemes.zig @@ -14,7 +14,13 @@ s3: []u8 = undefined, const Graphemes = @This(); -pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Graphemes { +pub fn init(allocator: Allocator) Allocator.Error!Graphemes { + var graphemes = Graphemes{}; + try graphemes.setup(allocator); + return graphemes; +} + +pub fn setup(graphemes: *Graphemes, allocator: Allocator) Allocator.Error!void { const decompressor = compress.flate.inflate.decompressor; const in_bytes = @embedFile("gbp"); var in_fbs = std.io.fixedBufferStream(in_bytes); @@ -23,27 +29,23 @@ pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Graphemes { const endian = builtin.cpu.arch.endian(); - var self = Graphemes{}; - const s1_len: u16 = reader.readInt(u16, endian) catch unreachable; - self.s1 = try allocator.alloc(u16, s1_len); - errdefer allocator.free(self.s1); - for (0..s1_len) |i| self.s1[i] = reader.readInt(u16, endian) catch unreachable; + graphemes.s1 = try allocator.alloc(u16, s1_len); + errdefer allocator.free(graphemes.s1); + for (0..s1_len) |i| graphemes.s1[i] = reader.readInt(u16, endian) catch unreachable; const s2_len: u16 = reader.readInt(u16, endian) catch unreachable; - self.s2 = try allocator.alloc(u16, s2_len); - errdefer allocator.free(self.s2); - for (0..s2_len) |i| self.s2[i] = reader.readInt(u16, endian) catch unreachable; + graphemes.s2 = try allocator.alloc(u16, s2_len); + errdefer allocator.free(graphemes.s2); + for (0..s2_len) |i| graphemes.s2[i] = reader.readInt(u16, endian) catch unreachable; const s3_len: u16 = reader.readInt(u16, endian) catch unreachable; - self.s3 = try allocator.alloc(u8, s3_len); - errdefer allocator.free(self.s3); - _ = reader.readAll(self.s3) catch unreachable; - - return self; + graphemes.s3 = try allocator.alloc(u8, s3_len); + errdefer allocator.free(graphemes.s3); + _ = reader.readAll(graphemes.s3) catch unreachable; } -pub fn deinit(graphemes: *const Graphemes, allocator: mem.Allocator) void { +pub fn deinit(graphemes: *const Graphemes, allocator: Allocator) void { allocator.free(graphemes.s1); allocator.free(graphemes.s2); allocator.free(graphemes.s3); diff --git a/src/LetterCasing.zig b/src/LetterCasing.zig index 0a0acb1..a7260b8 100644 --- a/src/LetterCasing.zig +++ b/src/LetterCasing.zig @@ -1,25 +1,31 @@ -const std = @import("std"); -const builtin = @import("builtin"); -const compress = std.compress; -const mem = std.mem; -const testing = std.testing; -const unicode = std.unicode; - const CodePointIterator = @import("code_point").Iterator; -case_map: [][2]u21, +case_map: [][2]u21 = undefined, prop_s1: []u16 = undefined, prop_s2: []u8 = undefined, -const Self = @This(); +const LetterCasing = @This(); + +pub fn init(allocator: Allocator) Allocator.Error!LetterCasing { + var case = LetterCasing{}; + try case.setup(allocator); + return case; +} -pub fn init(allocator: mem.Allocator) !Self { +pub fn setup(case: *LetterCasing, allocator: Allocator) Allocator.Error!void { + case.setupInner(allocator) catch |err| { + switch (err) { + error.OutOfMemory => |e| return e, + else => unreachable, + } + }; +} + +inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void { const decompressor = compress.flate.inflate.decompressor; const endian = builtin.cpu.arch.endian(); - var self = Self{ - .case_map = try allocator.alloc([2]u21, 0x110000), - }; + self.case_map = try allocator.alloc([2]u21, 0x110000); errdefer allocator.free(self.case_map); for (0..0x110000) |i| { @@ -68,28 +74,26 @@ pub fn init(allocator: mem.Allocator) !Self { self.prop_s2 = try allocator.alloc(u8, stage_2_len); errdefer allocator.free(self.prop_s2); _ = try cp_reader.readAll(self.prop_s2); - - return self; } -pub fn deinit(self: *const Self, allocator: mem.Allocator) void { +pub fn deinit(self: *const LetterCasing, allocator: mem.Allocator) void { allocator.free(self.case_map); allocator.free(self.prop_s1); allocator.free(self.prop_s2); } // Returns true if `cp` is either upper, lower, or title case. -pub fn isCased(self: Self, cp: u21) bool { +pub fn isCased(self: LetterCasing, cp: u21) bool { return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; } // Returns true if `cp` is uppercase. -pub fn isUpper(self: Self, cp: u21) bool { +pub fn isUpper(self: LetterCasing, cp: u21) bool { return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; } /// Returns true if `str` is all uppercase. -pub fn isUpperStr(self: Self, str: []const u8) bool { +pub fn isUpperStr(self: LetterCasing, str: []const u8) bool { var iter = CodePointIterator{ .bytes = str }; return while (iter.next()) |cp| { @@ -107,14 +111,14 @@ test "isUpperStr" { } /// Returns uppercase mapping for `cp`. -pub fn toUpper(self: Self, cp: u21) u21 { +pub fn toUpper(self: LetterCasing, cp: u21) u21 { return self.case_map[cp][0]; } /// Returns a new string with all letters in uppercase. /// Caller must free returned bytes with `allocator`. pub fn toUpperStr( - self: Self, + self: LetterCasing, allocator: mem.Allocator, str: []const u8, ) ![]u8 { @@ -142,12 +146,12 @@ test "toUpperStr" { } // Returns true if `cp` is lowercase. -pub fn isLower(self: Self, cp: u21) bool { +pub fn isLower(self: LetterCasing, cp: u21) bool { return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; } /// Returns true if `str` is all lowercase. -pub fn isLowerStr(self: Self, str: []const u8) bool { +pub fn isLowerStr(self: LetterCasing, str: []const u8) bool { var iter = CodePointIterator{ .bytes = str }; return while (iter.next()) |cp| { @@ -165,14 +169,14 @@ test "isLowerStr" { } /// Returns lowercase mapping for `cp`. -pub fn toLower(self: Self, cp: u21) u21 { +pub fn toLower(self: LetterCasing, cp: u21) u21 { return self.case_map[cp][1]; } /// Returns a new string with all letters in lowercase. /// Caller must free returned bytes with `allocator`. pub fn toLowerStr( - self: Self, + self: LetterCasing, allocator: mem.Allocator, str: []const u8, ) ![]u8 { @@ -198,3 +202,11 @@ test "toLowerStr" { defer testing.allocator.free(lowered); try testing.expectEqualStrings("hello, world 2112!", lowered); } + +const std = @import("std"); +const builtin = @import("builtin"); +const compress = std.compress; +const mem = std.mem; +const Allocator = std.mem.Allocator; +const testing = std.testing; +const unicode = std.unicode; diff --git a/src/Normalize.zig b/src/Normalize.zig index d8c867d..1500b4c 100644 --- a/src/Normalize.zig +++ b/src/Normalize.zig @@ -10,22 +10,47 @@ normp_data: NormPropsData = undefined, const Normalize = @This(); -pub fn init(allocator: Allocator) !Normalize { +pub fn init(allocator: Allocator) Allocator.Error!Normalize { var norm: Normalize = undefined; try norm.setup(allocator); return norm; } -pub fn setup(self: *Normalize, allocator: Allocator) !void { - self.canon_data = try CanonData.init(allocator); +pub fn setup(self: *Normalize, allocator: Allocator) Allocator.Error!void { + self.canon_data = CanonData.init(allocator) catch |err| { + switch (err) { + error.OutOfMemory => |e| return e, + else => unreachable, + } + }; errdefer self.canon_data.deinit(allocator); - self.ccc_data = try CccData.init(allocator); + self.ccc_data = CccData.init(allocator) catch |err| { + switch (err) { + error.OutOfMemory => |e| return e, + else => unreachable, + } + }; errdefer self.ccc_data.deinit(allocator); - self.compat_data = try CompatData.init(allocator); + self.compat_data = CompatData.init(allocator) catch |err| { + switch (err) { + error.OutOfMemory => |e| return e, + else => unreachable, + } + }; errdefer self.compat_data.deinit(allocator); - self.hangul_data = try HangulData.init(allocator); + self.hangul_data = HangulData.init(allocator) catch |err| { + switch (err) { + error.OutOfMemory => |e| return e, + else => unreachable, + } + }; errdefer self.hangul_data.deinit(allocator); - self.normp_data = try NormPropsData.init(allocator); + self.normp_data = NormPropsData.init(allocator) catch |err| { + switch (err) { + error.OutOfMemory => |e| return e, + else => unreachable, + } + }; } pub fn deinit(norm: *const Normalize, allocator: Allocator) void { diff --git a/src/Properties.zig b/src/Properties.zig index 46920be..f7e57ec 100644 --- a/src/Properties.zig +++ b/src/Properties.zig @@ -1,8 +1,4 @@ -const std = @import("std"); -const builtin = @import("builtin"); -const compress = std.compress; -const mem = std.mem; -const testing = std.testing; +//! Properties module core_s1: []u16 = undefined, core_s2: []u8 = undefined, @@ -11,9 +7,24 @@ props_s2: []u8 = undefined, num_s1: []u16 = undefined, num_s2: []u8 = undefined, -const Self = @This(); +const Properties = @This(); + +pub fn init(allocator: Allocator) Allocator.Error!Properties { + var props = Properties{}; + try props.setup(allocator); + return props; +} + +pub fn setup(props: *Properties, allocator: Allocator) Allocator.Error!void { + props.setupInner(allocator) catch |err| { + switch (err) { + error.OutOfMemory => |e| return e, + else => unreachable, + } + }; +} -pub fn init(allocator: mem.Allocator) !Self { +inline fn setupInner(props: *Properties, allocator: Allocator) !void { const decompressor = compress.flate.inflate.decompressor; const endian = builtin.cpu.arch.endian(); @@ -23,17 +34,15 @@ pub fn init(allocator: mem.Allocator) !Self { var core_decomp = decompressor(.raw, core_fbs.reader()); var core_reader = core_decomp.reader(); - var self = Self{}; - const core_stage_1_len: u16 = try core_reader.readInt(u16, endian); - self.core_s1 = try allocator.alloc(u16, core_stage_1_len); - errdefer allocator.free(self.core_s1); - for (0..core_stage_1_len) |i| self.core_s1[i] = try core_reader.readInt(u16, endian); + props.core_s1 = try allocator.alloc(u16, core_stage_1_len); + errdefer allocator.free(props.core_s1); + for (0..core_stage_1_len) |i| props.core_s1[i] = try core_reader.readInt(u16, endian); const core_stage_2_len: u16 = try core_reader.readInt(u16, endian); - self.core_s2 = try allocator.alloc(u8, core_stage_2_len); - errdefer allocator.free(self.core_s2); - _ = try core_reader.readAll(self.core_s2); + props.core_s2 = try allocator.alloc(u8, core_stage_2_len); + errdefer allocator.free(props.core_s2); + _ = try core_reader.readAll(props.core_s2); // Process PropList.txt const props_bytes = @embedFile("props"); @@ -42,14 +51,14 @@ pub fn init(allocator: mem.Allocator) !Self { var props_reader = props_decomp.reader(); const stage_1_len: u16 = try props_reader.readInt(u16, endian); - self.props_s1 = try allocator.alloc(u16, stage_1_len); - errdefer allocator.free(self.props_s1); - for (0..stage_1_len) |i| self.props_s1[i] = try props_reader.readInt(u16, endian); + props.props_s1 = try allocator.alloc(u16, stage_1_len); + errdefer allocator.free(props.props_s1); + for (0..stage_1_len) |i| props.props_s1[i] = try props_reader.readInt(u16, endian); const stage_2_len: u16 = try props_reader.readInt(u16, endian); - self.props_s2 = try allocator.alloc(u8, stage_2_len); - errdefer allocator.free(self.props_s2); - _ = try props_reader.readAll(self.props_s2); + props.props_s2 = try allocator.alloc(u8, stage_2_len); + errdefer allocator.free(props.props_s2); + _ = try props_reader.readAll(props.props_s2); // Process DerivedNumericType.txt const num_bytes = @embedFile("numeric"); @@ -58,19 +67,17 @@ pub fn init(allocator: mem.Allocator) !Self { var num_reader = num_decomp.reader(); const num_stage_1_len: u16 = try num_reader.readInt(u16, endian); - self.num_s1 = try allocator.alloc(u16, num_stage_1_len); - errdefer allocator.free(self.num_s1); - for (0..num_stage_1_len) |i| self.num_s1[i] = try num_reader.readInt(u16, endian); + props.num_s1 = try allocator.alloc(u16, num_stage_1_len); + errdefer allocator.free(props.num_s1); + for (0..num_stage_1_len) |i| props.num_s1[i] = try num_reader.readInt(u16, endian); const num_stage_2_len: u16 = try num_reader.readInt(u16, endian); - self.num_s2 = try allocator.alloc(u8, num_stage_2_len); - errdefer allocator.free(self.num_s2); - _ = try num_reader.readAll(self.num_s2); - - return self; + props.num_s2 = try allocator.alloc(u8, num_stage_2_len); + errdefer allocator.free(props.num_s2); + _ = try num_reader.readAll(props.num_s2); } -pub fn deinit(self: *const Self, allocator: mem.Allocator) void { +pub fn deinit(self: *const Properties, allocator: Allocator) void { allocator.free(self.core_s1); allocator.free(self.core_s2); allocator.free(self.props_s1); @@ -80,62 +87,62 @@ pub fn deinit(self: *const Self, allocator: mem.Allocator) void { } /// True if `cp` is a mathematical symbol. -pub fn isMath(self: Self, cp: u21) bool { +pub fn isMath(self: Properties, cp: u21) bool { return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; } /// True if `cp` is an alphabetic character. -pub fn isAlphabetic(self: Self, cp: u21) bool { +pub fn isAlphabetic(self: Properties, cp: u21) bool { return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; } /// True if `cp` is a valid identifier start character. -pub fn isIdStart(self: Self, cp: u21) bool { +pub fn isIdStart(self: Properties, cp: u21) bool { return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; } /// True if `cp` is a valid identifier continuation character. -pub fn isIdContinue(self: Self, cp: u21) bool { +pub fn isIdContinue(self: Properties, cp: u21) bool { return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8; } /// True if `cp` is a valid extended identifier start character. -pub fn isXidStart(self: Self, cp: u21) bool { +pub fn isXidStart(self: Properties, cp: u21) bool { return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16; } /// True if `cp` is a valid extended identifier continuation character. -pub fn isXidContinue(self: Self, cp: u21) bool { +pub fn isXidContinue(self: Properties, cp: u21) bool { return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32; } /// True if `cp` is a whitespace character. -pub fn isWhitespace(self: Self, cp: u21) bool { +pub fn isWhitespace(self: Properties, cp: u21) bool { return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; } /// True if `cp` is a hexadecimal digit. -pub fn isHexDigit(self: Self, cp: u21) bool { +pub fn isHexDigit(self: Properties, cp: u21) bool { return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; } /// True if `cp` is a diacritic mark. -pub fn isDiacritic(self: Self, cp: u21) bool { +pub fn isDiacritic(self: Properties, cp: u21) bool { return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; } /// True if `cp` is numeric. -pub fn isNumeric(self: Self, cp: u21) bool { +pub fn isNumeric(self: Properties, cp: u21) bool { return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; } /// True if `cp` is a digit. -pub fn isDigit(self: Self, cp: u21) bool { +pub fn isDigit(self: Properties, cp: u21) bool { return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; } /// True if `cp` is decimal. -pub fn isDecimal(self: Self, cp: u21) bool { +pub fn isDecimal(self: Properties, cp: u21) bool { return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; } @@ -161,3 +168,10 @@ test "Props" { try testing.expect(!self.isDigit('2')); try testing.expect(!self.isDecimal('g')); } + +const std = @import("std"); +const builtin = @import("builtin"); +const compress = std.compress; +const mem = std.mem; +const Allocator = mem.Allocator; +const testing = std.testing; diff --git a/src/Scripts.zig b/src/Scripts.zig index 4ad8549..f71a2b5 100644 --- a/src/Scripts.zig +++ b/src/Scripts.zig @@ -1,10 +1,10 @@ -const std = @import("std"); -const builtin = @import("builtin"); -const compress = std.compress; -const mem = std.mem; -const testing = std.testing; +//! Scripts Module + +s1: []u16 = undefined, +s2: []u8 = undefined, +s3: []u8 = undefined, -/// Scripts +/// Scripts enum pub const Script = enum { none, Adlam, @@ -172,13 +172,24 @@ pub const Script = enum { Zanabazar_Square, }; -s1: []u16 = undefined, -s2: []u8 = undefined, -s3: []u8 = undefined, +const Scripts = @This(); -const Self = @This(); +pub fn init(allocator: Allocator) Allocator.Error!Scripts { + var scripts = Scripts{}; + try scripts.setup(allocator); + return scripts; +} + +pub fn setup(scripts: *Scripts, allocator: Allocator) Allocator.Error!void { + scripts.setupInner(allocator) catch |err| { + switch (err) { + error.OutOfMemory => |e| return e, + else => unreachable, + } + }; +} -pub fn init(allocator: mem.Allocator) !Self { +inline fn setupInner(scripts: *Scripts, allocator: mem.Allocator) !void { const decompressor = compress.flate.inflate.decompressor; const in_bytes = @embedFile("scripts"); var in_fbs = std.io.fixedBufferStream(in_bytes); @@ -187,34 +198,30 @@ pub fn init(allocator: mem.Allocator) !Self { const endian = builtin.cpu.arch.endian(); - var self = Self{}; - const s1_len: u16 = try reader.readInt(u16, endian); - self.s1 = try allocator.alloc(u16, s1_len); - errdefer allocator.free(self.s1); - for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian); + scripts.s1 = try allocator.alloc(u16, s1_len); + errdefer allocator.free(scripts.s1); + for (0..s1_len) |i| scripts.s1[i] = try reader.readInt(u16, endian); const s2_len: u16 = try reader.readInt(u16, endian); - self.s2 = try allocator.alloc(u8, s2_len); - errdefer allocator.free(self.s2); - _ = try reader.readAll(self.s2); + scripts.s2 = try allocator.alloc(u8, s2_len); + errdefer allocator.free(scripts.s2); + _ = try reader.readAll(scripts.s2); const s3_len: u16 = try reader.readInt(u8, endian); - self.s3 = try allocator.alloc(u8, s3_len); - errdefer allocator.free(self.s3); - _ = try reader.readAll(self.s3); - - return self; + scripts.s3 = try allocator.alloc(u8, s3_len); + errdefer allocator.free(scripts.s3); + _ = try reader.readAll(scripts.s3); } -pub fn deinit(self: *const Self, allocator: mem.Allocator) void { +pub fn deinit(self: *const Scripts, allocator: mem.Allocator) void { allocator.free(self.s1); allocator.free(self.s2); allocator.free(self.s3); } /// Lookup the Script type for `cp`. -pub fn script(self: Self, cp: u21) ?Script { +pub fn script(self: Scripts, cp: u21) ?Script { const byte = self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]; if (byte == 0) return null; return @enumFromInt(byte); @@ -225,3 +232,10 @@ test "script" { defer self.deinit(std.testing.allocator); try testing.expectEqual(Script.Latin, self.script('A').?); } + +const std = @import("std"); +const builtin = @import("builtin"); +const compress = std.compress; +const mem = std.mem; +const Allocator = mem.Allocator; +const testing = std.testing; -- cgit v1.2.3