From 74be85ac145cc6de5d03348e07be8d982c2211cb Mon Sep 17 00:00:00 2001 From: Jose Colon Rodriguez Date: Thu, 28 Mar 2024 10:06:00 -0400 Subject: PropsData and errdefers for init fns --- src/CanonData.zig | 10 +++- src/CombiningData.zig | 2 + src/CompatData.zig | 1 + src/FoldData.zig | 8 ++++ src/GenCatData.zig | 3 ++ src/GraphemeData.zig | 3 ++ src/HangulData.zig | 2 + src/NormData.zig | 31 +++++++------ src/NormPropsData.zig | 2 + src/NumericData.zig | 10 +--- src/PropsData.zig | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/ScriptsData.zig | 3 ++ src/WidthData.zig | 3 ++ 13 files changed, 179 insertions(+), 22 deletions(-) create mode 100644 src/PropsData.zig (limited to 'src') diff --git a/src/CanonData.zig b/src/CanonData.zig index 9f1deb8..64d5555 100644 --- a/src/CanonData.zig +++ b/src/CanonData.zig @@ -24,6 +24,13 @@ pub fn init(allocator: mem.Allocator) !Self { .nfd = try allocator.alloc([]u21, 0x110000), }; + var slices: usize = 0; + errdefer { + self.nfc.deinit(); + for (self.nfd[0..slices]) |slice| self.allocator.free(slice); + self.allocator.free(self.nfd); + } + @memset(self.nfd, &.{}); while (true) { @@ -31,6 +38,7 @@ pub fn init(allocator: mem.Allocator) !Self { if (len == 0) break; const cp = try reader.readInt(u24, endian); self.nfd[cp] = try allocator.alloc(u21, len - 1); + slices += 1; for (0..len - 1) |i| { self.nfd[cp][i] = @intCast(try reader.readInt(u24, endian)); } @@ -42,7 +50,7 @@ pub fn init(allocator: mem.Allocator) !Self { return self; } -pub fn deinit(self: *const Self) void { +pub fn deinit(self: *Self) void { self.nfc.deinit(); for (self.nfd) |slice| self.allocator.free(slice); self.allocator.free(self.nfd); diff --git a/src/CombiningData.zig b/src/CombiningData.zig index c67638c..a40cbde 100644 --- a/src/CombiningData.zig +++ b/src/CombiningData.zig @@ -23,10 +23,12 @@ pub fn init(allocator: mem.Allocator) !Self { const stage_1_len: u16 = try reader.readInt(u16, endian); self.s1 = try allocator.alloc(u16, stage_1_len); + errdefer allocator.free(self.s1); for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian); const stage_2_len: u16 = try reader.readInt(u16, endian); self.s2 = try allocator.alloc(u8, stage_2_len); + errdefer allocator.free(self.s2); _ = try reader.readAll(self.s2); return self; diff --git a/src/CompatData.zig b/src/CompatData.zig index 67c43e6..a931cb3 100644 --- a/src/CompatData.zig +++ b/src/CompatData.zig @@ -21,6 +21,7 @@ pub fn init(allocator: mem.Allocator) !Self { .allocator = allocator, .nfkd = try allocator.alloc([]u21, 0x110000), }; + errdefer self.deinit(); @memset(self.nfkd, &.{}); diff --git a/src/FoldData.zig b/src/FoldData.zig index e387447..a06eefe 100644 --- a/src/FoldData.zig +++ b/src/FoldData.zig @@ -24,6 +24,13 @@ pub fn init(allocator: mem.Allocator) !Self { .cwcf = try allocator.alloc(bool, 0x110000), }; + var slices: usize = 0; + errdefer { + for (self.fold[0..slices]) |slice| self.allocator.free(slice); + self.allocator.free(self.fold); + self.allocator.free(self.cwcf); + } + @memset(self.fold, &.{}); @memset(self.cwcf, false); @@ -32,6 +39,7 @@ pub fn init(allocator: mem.Allocator) !Self { if (len == 0) break; const cp = try reader.readInt(u24, endian); self.fold[cp >> 1] = try allocator.alloc(u21, len - 1); + slices += 1; for (0..len - 1) |i| { self.fold[cp >> 1][i] = @intCast(try reader.readInt(u24, endian)); } diff --git a/src/GenCatData.zig b/src/GenCatData.zig index 37ae037..12501bf 100644 --- a/src/GenCatData.zig +++ b/src/GenCatData.zig @@ -58,14 +58,17 @@ pub fn init(allocator: mem.Allocator) !Self { const s1_len: u16 = try reader.readInt(u16, endian); self.s1 = try allocator.alloc(u16, s1_len); + errdefer allocator.free(self.s1); for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian); const s2_len: u16 = try reader.readInt(u16, endian); self.s2 = try allocator.alloc(u5, s2_len); + errdefer allocator.free(self.s2); for (0..s2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian)); const s3_len: u16 = try reader.readInt(u8, endian); self.s3 = try allocator.alloc(u5, s3_len); + errdefer allocator.free(self.s3); for (0..s3_len) |i| self.s3[i] = @intCast(try reader.readInt(u8, endian)); return self; diff --git a/src/GraphemeData.zig b/src/GraphemeData.zig index 971929a..500ffea 100644 --- a/src/GraphemeData.zig +++ b/src/GraphemeData.zig @@ -51,14 +51,17 @@ pub fn init(allocator: mem.Allocator) !Self { const s1_len: u16 = try reader.readInt(u16, endian); self.s1 = try allocator.alloc(u16, s1_len); + errdefer allocator.free(self.s1); for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian); const s2_len: u16 = try reader.readInt(u16, endian); self.s2 = try allocator.alloc(u16, s2_len); + errdefer allocator.free(self.s2); for (0..s2_len) |i| self.s2[i] = try reader.readInt(u16, endian); const s3_len: u16 = try reader.readInt(u16, endian); self.s3 = try allocator.alloc(u8, s3_len); + errdefer allocator.free(self.s3); _ = try reader.readAll(self.s3); return self; diff --git a/src/HangulData.zig b/src/HangulData.zig index ec360e9..99d91c1 100644 --- a/src/HangulData.zig +++ b/src/HangulData.zig @@ -32,10 +32,12 @@ pub fn init(allocator: mem.Allocator) !Self { const stage_1_len: u16 = try reader.readInt(u16, endian); self.s1 = try allocator.alloc(u16, stage_1_len); + errdefer allocator.free(self.s1); for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian); const stage_2_len: u16 = try reader.readInt(u16, endian); self.s2 = try allocator.alloc(u3, stage_2_len); + errdefer allocator.free(self.s2); for (0..stage_2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian)); return self; diff --git a/src/NormData.zig b/src/NormData.zig index 413619a..7ffe679 100644 --- a/src/NormData.zig +++ b/src/NormData.zig @@ -8,25 +8,30 @@ const FoldData = @import("FoldData"); const HangulData = @import("HangulData"); const NormPropsData = @import("NormPropsData"); -canon_data: CanonData, -ccc_data: CccData, -compat_data: CompatData, -hangul_data: HangulData, -normp_data: NormPropsData, +canon_data: CanonData = undefined, +ccc_data: CccData = undefined, +compat_data: CompatData = undefined, +hangul_data: HangulData = undefined, +normp_data: NormPropsData = undefined, const Self = @This(); pub fn init(allocator: std.mem.Allocator) !Self { - return Self{ - .canon_data = try CanonData.init(allocator), - .ccc_data = try CccData.init(allocator), - .compat_data = try CompatData.init(allocator), - .hangul_data = try HangulData.init(allocator), - .normp_data = try NormPropsData.init(allocator), - }; + var self = Self{}; + self.canon_data = try CanonData.init(allocator); + errdefer self.canon_data.deinit(); + self.ccc_data = try CccData.init(allocator); + errdefer self.ccc_data.deinit(); + self.compat_data = try CompatData.init(allocator); + errdefer self.compat_data.deinit(); + self.hangul_data = try HangulData.init(allocator); + errdefer self.hangul_data.deinit(); + self.normp_data = try NormPropsData.init(allocator); + + return self; } -pub fn deinit(self: *const Self) void { +pub fn deinit(self: *Self) void { self.canon_data.deinit(); self.ccc_data.deinit(); self.compat_data.deinit(); diff --git a/src/NormPropsData.zig b/src/NormPropsData.zig index 893a8d0..86d497b 100644 --- a/src/NormPropsData.zig +++ b/src/NormPropsData.zig @@ -23,10 +23,12 @@ pub fn init(allocator: mem.Allocator) !Self { const stage_1_len: u16 = try reader.readInt(u16, endian); self.s1 = try allocator.alloc(u16, stage_1_len); + errdefer allocator.free(self.s1); for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian); const stage_2_len: u16 = try reader.readInt(u16, endian); self.s2 = try allocator.alloc(u4, stage_2_len); + errdefer allocator.free(self.s2); for (0..stage_2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian)); return self; diff --git a/src/NumericData.zig b/src/NumericData.zig index 210d623..28e8206 100644 --- a/src/NumericData.zig +++ b/src/NumericData.zig @@ -24,10 +24,12 @@ pub fn init(allocator: mem.Allocator) !Self { const stage_1_len: u16 = try reader.readInt(u16, endian); self.s1 = try allocator.alloc(u16, stage_1_len); + errdefer allocator.free(self.s1); for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian); const stage_2_len: u16 = try reader.readInt(u16, endian); self.s2 = try allocator.alloc(u8, stage_2_len); + errdefer allocator.free(self.s2); _ = try reader.readAll(self.s2); return self; @@ -38,11 +40,6 @@ pub fn deinit(self: *const Self) void { self.allocator.free(self.s2); } -/// True if `cp` is any numeric type. -pub fn isNumber(self: Self, cp: u21) bool { - return self.isNumeric(cp) or self.isDigit(cp) or self.isDecimal(cp); -} - /// True if `cp` is numeric. pub inline fn isNumeric(self: Self, cp: u21) bool { return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 1 == 1; @@ -62,13 +59,10 @@ test "isDecimal" { const self = try init(testing.allocator); defer self.deinit(); - try testing.expect(self.isNumber('\u{277f}')); - try testing.expect(self.isNumber('3')); try testing.expect(self.isNumeric('\u{277f}')); try testing.expect(self.isDigit('\u{2070}')); try testing.expect(self.isDecimal('3')); - try testing.expect(!self.isNumber('z')); try testing.expect(!self.isNumeric('1')); try testing.expect(!self.isDigit('2')); try testing.expect(!self.isDecimal('g')); diff --git a/src/PropsData.zig b/src/PropsData.zig new file mode 100644 index 0000000..252462e --- /dev/null +++ b/src/PropsData.zig @@ -0,0 +1,123 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const compress = std.compress; +const mem = std.mem; +const testing = std.testing; + +allocator: mem.Allocator, +core_s1: []u16 = undefined, +core_s2: []u8 = undefined, +props_s1: []u16 = undefined, +props_s2: []u8 = undefined, + +const Self = @This(); + +pub fn init(allocator: mem.Allocator) !Self { + const decompressor = compress.deflate.decompressor; + const endian = builtin.cpu.arch.endian(); + + // Process DerivedCoreProperties.txt + const core_bytes = @embedFile("core_props"); + var core_fbs = std.io.fixedBufferStream(core_bytes); + var core_decomp = try decompressor(allocator, core_fbs.reader(), null); + defer core_decomp.deinit(); + var core_reader = core_decomp.reader(); + + var self = Self{ .allocator = allocator }; + + const core_stage_1_len: u16 = try core_reader.readInt(u16, endian); + self.core_s1 = try allocator.alloc(u16, core_stage_1_len); + errdefer allocator.free(self.core_s1); + for (0..core_stage_1_len) |i| self.core_s1[i] = try core_reader.readInt(u16, endian); + + const core_stage_2_len: u16 = try core_reader.readInt(u16, endian); + self.core_s2 = try allocator.alloc(u8, core_stage_2_len); + errdefer allocator.free(self.core_s2); + _ = try core_reader.readAll(self.core_s2); + + // Process PropList.txt + const props_bytes = @embedFile("props"); + var props_fbs = std.io.fixedBufferStream(props_bytes); + var props_decomp = try decompressor(allocator, props_fbs.reader(), null); + defer props_decomp.deinit(); + var props_reader = props_decomp.reader(); + + const stage_1_len: u16 = try props_reader.readInt(u16, endian); + self.props_s1 = try allocator.alloc(u16, stage_1_len); + errdefer allocator.free(self.props_s1); + for (0..stage_1_len) |i| self.props_s1[i] = try props_reader.readInt(u16, endian); + + const stage_2_len: u16 = try props_reader.readInt(u16, endian); + self.props_s2 = try allocator.alloc(u8, stage_2_len); + errdefer allocator.free(self.props_s2); + _ = try props_reader.readAll(self.props_s2); + + return self; +} + +pub fn deinit(self: *const Self) void { + self.allocator.free(self.core_s1); + self.allocator.free(self.core_s2); + self.allocator.free(self.props_s1); + self.allocator.free(self.props_s2); +} + +/// True if `cp` is a mathematical symbol. +pub inline fn isMath(self: Self, cp: u21) bool { + return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; +} + +/// True if `cp` is an alphabetic character. +pub inline fn isAlphabetic(self: Self, cp: u21) bool { + return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; +} + +/// True if `cp` is a valid identifier start character. +pub inline fn isIdStart(self: Self, cp: u21) bool { + return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; +} + +/// True if `cp` is a valid identifier continuation character. +pub inline fn isIdContinue(self: Self, cp: u21) bool { + return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8; +} + +/// True if `cp` is a valid extended identifier start character. +pub inline fn isXidStart(self: Self, cp: u21) bool { + return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16; +} + +/// True if `cp` is a valid extended identifier continuation character. +pub inline fn isXidContinue(self: Self, cp: u21) bool { + return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32; +} + +/// True if `cp` is a whitespace character. +pub inline fn isWhitespace(self: Self, cp: u21) bool { + return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; +} + +/// True if `cp` is a hexadecimal digit. +pub inline fn isHexDigit(self: Self, cp: u21) bool { + return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; +} + +/// True if `cp` is a diacritic mark. +pub inline fn isDiacritic(self: Self, cp: u21) bool { + return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; +} + +test "Props" { + const self = try init(testing.allocator); + defer self.deinit(); + + try testing.expect(self.isHexDigit('F')); + try testing.expect(self.isHexDigit('a')); + try testing.expect(self.isHexDigit('8')); + try testing.expect(!self.isHexDigit('z')); + + try testing.expect(self.isDiacritic('\u{301}')); + try testing.expect(self.isAlphabetic('A')); + try testing.expect(!self.isAlphabetic('3')); + try testing.expect(self.isMath('+')); +} diff --git a/src/ScriptsData.zig b/src/ScriptsData.zig index ac1c46a..4e371bf 100644 --- a/src/ScriptsData.zig +++ b/src/ScriptsData.zig @@ -193,14 +193,17 @@ pub fn init(allocator: mem.Allocator) !Self { const s1_len: u16 = try reader.readInt(u16, endian); self.s1 = try allocator.alloc(u16, s1_len); + errdefer allocator.free(self.s1); for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian); const s2_len: u16 = try reader.readInt(u16, endian); self.s2 = try allocator.alloc(u8, s2_len); + errdefer allocator.free(self.s2); _ = try reader.readAll(self.s2); const s3_len: u16 = try reader.readInt(u8, endian); self.s3 = try allocator.alloc(u8, s3_len); + errdefer allocator.free(self.s3); _ = try reader.readAll(self.s3); return self; diff --git a/src/WidthData.zig b/src/WidthData.zig index d17f0cd..b9ef84e 100644 --- a/src/WidthData.zig +++ b/src/WidthData.zig @@ -27,13 +27,16 @@ pub fn init(allocator: mem.Allocator) !Self { .allocator = allocator, .g_data = try GraphemeData.init(allocator), }; + errdefer self.g_data.deinit(); const stage_1_len: u16 = try reader.readInt(u16, endian); self.s1 = try allocator.alloc(u16, stage_1_len); + errdefer allocator.free(self.s1); for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian); const stage_2_len: u16 = try reader.readInt(u16, endian); self.s2 = try allocator.alloc(i3, stage_2_len); + errdefer allocator.free(self.s2); for (0..stage_2_len) |i| self.s2[i] = @intCast(try reader.readInt(i8, endian)); return self; -- cgit v1.2.3