diff options
| author | 2025-04-30 16:48:07 -0400 | |
|---|---|---|
| committer | 2025-04-30 16:48:07 -0400 | |
| commit | d2d42bf3ef5490f6fdec73508c2493a666ecee41 (patch) | |
| tree | 377794be59ece4118ca2449b705b8e7cc646abc0 | |
| parent | Update README.md to new API (diff) | |
| download | zg-d2d42bf3ef5490f6fdec73508c2493a666ecee41.tar.gz zg-d2d42bf3ef5490f6fdec73508c2493a666ecee41.tar.xz zg-d2d42bf3ef5490f6fdec73508c2493a666ecee41.zip | |
Setup variants for all allocating modules
This harmonizes the allocating modules in a couple of ways. All can
now be constructed by pointer, and all treat various miscellaneous
read failures as `unreachable`, which indeed they should be.
The README has been updated to inform users of this option.
| -rw-r--r-- | README.md | 20 | ||||
| -rw-r--r-- | src/CaseFolding.zig | 18 | ||||
| -rw-r--r-- | src/GeneralCategories.zig | 59 | ||||
| -rw-r--r-- | src/Graphemes.zig | 32 | ||||
| -rw-r--r-- | src/LetterCasing.zig | 62 | ||||
| -rw-r--r-- | src/Normalize.zig | 39 | ||||
| -rw-r--r-- | src/Properties.zig | 98 | ||||
| -rw-r--r-- | src/Scripts.zig | 66 |
8 files changed, 248 insertions, 146 deletions
| @@ -31,6 +31,24 @@ zg is a modular library. This approach minimizes binary file size and memory | |||
| 31 | requirements by only including the Unicode data required for the specified module. | 31 | requirements by only including the Unicode data required for the specified module. |
| 32 | The following sections describe the various modules and their specific use case. | 32 | The following sections describe the various modules and their specific use case. |
| 33 | 33 | ||
| 34 | ### Init and Setup | ||
| 35 | |||
| 36 | The code examples will show the use of `Module.init(allocator)` to create the | ||
| 37 | various modules. All of the allocating modules have a `setup` variant, which | ||
| 38 | takes a pointer and allocates in-place. | ||
| 39 | |||
| 40 | Example use: | ||
| 41 | |||
| 42 | ```zig | ||
| 43 | test "Setup form" { | ||
| 44 | var graphemes = try allocator.create(Graphemes); | ||
| 45 | defer allocator.destroy(graphemes); | ||
| 46 | try graphemes.setup(allocator); | ||
| 47 | defer graphemes.deinit(allocator); | ||
| 48 | } | ||
| 49 | ``` | ||
| 50 | |||
| 51 | |||
| 34 | ## Code Points | 52 | ## Code Points |
| 35 | 53 | ||
| 36 | In the `code_point` module, you'll find a data structure representing a single code | 54 | In the `code_point` module, you'll find a data structure representing a single code |
| @@ -386,6 +404,8 @@ test "Initialize With a Normalize" { | |||
| 386 | defer case_fold.deinit(allocator); | 404 | defer case_fold.deinit(allocator); |
| 387 | } | 405 | } |
| 388 | ``` | 406 | ``` |
| 407 | This has a `setupWithNormalize` variant as well, but note that this also takes | ||
| 408 | a `Normalize` struct, and not a pointer to it. | ||
| 389 | 409 | ||
| 390 | 410 | ||
| 391 | ## Display Width of Characters and Strings | 411 | ## Display Width of Characters and Strings |
diff --git a/src/CaseFolding.zig b/src/CaseFolding.zig index 162e82f..2e53bfa 100644 --- a/src/CaseFolding.zig +++ b/src/CaseFolding.zig | |||
| @@ -11,20 +11,21 @@ owns_normalize: bool, | |||
| 11 | 11 | ||
| 12 | const CaseFolding = @This(); | 12 | const CaseFolding = @This(); |
| 13 | 13 | ||
| 14 | pub fn init(allocator: Allocator) !CaseFolding { | 14 | pub fn init(allocator: Allocator) Allocator.Error!CaseFolding { |
| 15 | var case_fold: CaseFolding = undefined; | 15 | var case_fold: CaseFolding = undefined; |
| 16 | try case_fold.setup(allocator); | 16 | try case_fold.setup(allocator); |
| 17 | return case_fold; | 17 | return case_fold; |
| 18 | } | 18 | } |
| 19 | 19 | ||
| 20 | pub fn initWithNormalize(allocator: Allocator, norm: Normalize) !CaseFolding { | 20 | pub fn initWithNormalize(allocator: Allocator, norm: Normalize) Allocator.Error!CaseFolding { |
| 21 | var casefold: CaseFolding = undefined; | 21 | var casefold: CaseFolding = undefined; |
| 22 | try casefold.setupWithNormalize(allocator, norm); | 22 | try casefold.setupWithNormalize(allocator, norm); |
| 23 | return casefold; | 23 | return casefold; |
| 24 | } | 24 | } |
| 25 | 25 | ||
| 26 | pub fn setup(casefold: *CaseFolding, allocator: Allocator) !void { | 26 | pub fn setup(casefold: *CaseFolding, allocator: Allocator) Allocator.Error!void { |
| 27 | try casefold.setupImpl(allocator); | 27 | try casefold.setupImpl(allocator); |
| 28 | // Handle normalize memory separately during setup: | ||
| 28 | casefold.owns_normalize = false; | 29 | casefold.owns_normalize = false; |
| 29 | errdefer casefold.deinit(allocator); | 30 | errdefer casefold.deinit(allocator); |
| 30 | try casefold.normalize.setup(allocator); | 31 | try casefold.normalize.setup(allocator); |
| @@ -37,7 +38,16 @@ pub fn setupWithNormalize(casefold: *CaseFolding, allocator: Allocator, norm: No | |||
| 37 | casefold.owns_normalize = false; | 38 | casefold.owns_normalize = false; |
| 38 | } | 39 | } |
| 39 | 40 | ||
| 40 | fn setupImpl(casefold: *CaseFolding, allocator: Allocator) !void { | 41 | fn setupImpl(casefold: *CaseFolding, allocator: Allocator) Allocator.Error!void { |
| 42 | casefold.setupImplInner(allocator) catch |err| { | ||
| 43 | switch (err) { | ||
| 44 | error.OutOfMemory => |e| return e, | ||
| 45 | else => unreachable, | ||
| 46 | } | ||
| 47 | }; | ||
| 48 | } | ||
| 49 | |||
| 50 | inline fn setupImplInner(casefold: *CaseFolding, allocator: Allocator) !void { | ||
| 41 | const decompressor = compress.flate.inflate.decompressor; | 51 | const decompressor = compress.flate.inflate.decompressor; |
| 42 | const in_bytes = @embedFile("fold"); | 52 | const in_bytes = @embedFile("fold"); |
| 43 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 53 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
diff --git a/src/GeneralCategories.zig b/src/GeneralCategories.zig index a69f7a2..b7c82c0 100644 --- a/src/GeneralCategories.zig +++ b/src/GeneralCategories.zig | |||
| @@ -1,7 +1,8 @@ | |||
| 1 | const std = @import("std"); | 1 | //! General Categories |
| 2 | const builtin = @import("builtin"); | 2 | |
| 3 | const compress = std.compress; | 3 | s1: []u16 = undefined, |
| 4 | const mem = std.mem; | 4 | s2: []u5 = undefined, |
| 5 | s3: []u5 = undefined, | ||
| 5 | 6 | ||
| 6 | /// General Category | 7 | /// General Category |
| 7 | pub const Gc = enum { | 8 | pub const Gc = enum { |
| @@ -37,13 +38,15 @@ pub const Gc = enum { | |||
| 37 | Zs, // Separator, Space | 38 | Zs, // Separator, Space |
| 38 | }; | 39 | }; |
| 39 | 40 | ||
| 40 | s1: []u16 = undefined, | 41 | const GeneralCategories = @This(); |
| 41 | s2: []u5 = undefined, | ||
| 42 | s3: []u5 = undefined, | ||
| 43 | 42 | ||
| 44 | const Self = @This(); | 43 | pub fn init(allocator: Allocator) Allocator.Error!GeneralCategories { |
| 44 | var gencat = GeneralCategories{}; | ||
| 45 | try gencat.setup(allocator); | ||
| 46 | return gencat; | ||
| 47 | } | ||
| 45 | 48 | ||
| 46 | pub fn init(allocator: mem.Allocator) !Self { | 49 | pub fn setup(self: *GeneralCategories, allocator: Allocator) Allocator.Error!void { |
| 47 | const decompressor = compress.flate.inflate.decompressor; | 50 | const decompressor = compress.flate.inflate.decompressor; |
| 48 | const in_bytes = @embedFile("gencat"); | 51 | const in_bytes = @embedFile("gencat"); |
| 49 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 52 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| @@ -52,39 +55,35 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 52 | 55 | ||
| 53 | const endian = builtin.cpu.arch.endian(); | 56 | const endian = builtin.cpu.arch.endian(); |
| 54 | 57 | ||
| 55 | var self = Self{}; | 58 | const s1_len: u16 = reader.readInt(u16, endian) catch unreachable; |
| 56 | |||
| 57 | const s1_len: u16 = try reader.readInt(u16, endian); | ||
| 58 | self.s1 = try allocator.alloc(u16, s1_len); | 59 | self.s1 = try allocator.alloc(u16, s1_len); |
| 59 | errdefer allocator.free(self.s1); | 60 | errdefer allocator.free(self.s1); |
| 60 | for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian); | 61 | for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian); |
| 61 | 62 | ||
| 62 | const s2_len: u16 = try reader.readInt(u16, endian); | 63 | const s2_len: u16 = reader.readInt(u16, endian) catch unreachable; |
| 63 | self.s2 = try allocator.alloc(u5, s2_len); | 64 | self.s2 = try allocator.alloc(u5, s2_len); |
| 64 | errdefer allocator.free(self.s2); | 65 | errdefer allocator.free(self.s2); |
| 65 | for (0..s2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian)); | 66 | for (0..s2_len) |i| self.s2[i] = @intCast(reader.readInt(u8, endian) catch unreachable); |
| 66 | 67 | ||
| 67 | const s3_len: u16 = try reader.readInt(u8, endian); | 68 | const s3_len: u16 = reader.readInt(u8, endian) catch unreachable; |
| 68 | self.s3 = try allocator.alloc(u5, s3_len); | 69 | self.s3 = try allocator.alloc(u5, s3_len); |
| 69 | errdefer allocator.free(self.s3); | 70 | errdefer allocator.free(self.s3); |
| 70 | for (0..s3_len) |i| self.s3[i] = @intCast(try reader.readInt(u8, endian)); | 71 | for (0..s3_len) |i| self.s3[i] = @intCast(reader.readInt(u8, endian) catch unreachable); |
| 71 | |||
| 72 | return self; | ||
| 73 | } | 72 | } |
| 74 | 73 | ||
| 75 | pub fn deinit(self: *const Self, allocator: mem.Allocator) void { | 74 | pub fn deinit(self: *const GeneralCategories, allocator: mem.Allocator) void { |
| 76 | allocator.free(self.s1); | 75 | allocator.free(self.s1); |
| 77 | allocator.free(self.s2); | 76 | allocator.free(self.s2); |
| 78 | allocator.free(self.s3); | 77 | allocator.free(self.s3); |
| 79 | } | 78 | } |
| 80 | 79 | ||
| 81 | /// Lookup the General Category for `cp`. | 80 | /// Lookup the General Category for `cp`. |
| 82 | pub fn gc(self: Self, cp: u21) Gc { | 81 | pub fn gc(self: GeneralCategories, cp: u21) Gc { |
| 83 | return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]); | 82 | return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]); |
| 84 | } | 83 | } |
| 85 | 84 | ||
| 86 | /// True if `cp` has an C general category. | 85 | /// True if `cp` has an C general category. |
| 87 | pub fn isControl(self: Self, cp: u21) bool { | 86 | pub fn isControl(self: GeneralCategories, cp: u21) bool { |
| 88 | return switch (self.gc(cp)) { | 87 | return switch (self.gc(cp)) { |
| 89 | .Cc, | 88 | .Cc, |
| 90 | .Cf, | 89 | .Cf, |
| @@ -97,7 +96,7 @@ pub fn isControl(self: Self, cp: u21) bool { | |||
| 97 | } | 96 | } |
| 98 | 97 | ||
| 99 | /// True if `cp` has an L general category. | 98 | /// True if `cp` has an L general category. |
| 100 | pub fn isLetter(self: Self, cp: u21) bool { | 99 | pub fn isLetter(self: GeneralCategories, cp: u21) bool { |
| 101 | return switch (self.gc(cp)) { | 100 | return switch (self.gc(cp)) { |
| 102 | .Ll, | 101 | .Ll, |
| 103 | .Lm, | 102 | .Lm, |
| @@ -110,7 +109,7 @@ pub fn isLetter(self: Self, cp: u21) bool { | |||
| 110 | } | 109 | } |
| 111 | 110 | ||
| 112 | /// True if `cp` has an M general category. | 111 | /// True if `cp` has an M general category. |
| 113 | pub fn isMark(self: Self, cp: u21) bool { | 112 | pub fn isMark(self: GeneralCategories, cp: u21) bool { |
| 114 | return switch (self.gc(cp)) { | 113 | return switch (self.gc(cp)) { |
| 115 | .Mc, | 114 | .Mc, |
| 116 | .Me, | 115 | .Me, |
| @@ -121,7 +120,7 @@ pub fn isMark(self: Self, cp: u21) bool { | |||
| 121 | } | 120 | } |
| 122 | 121 | ||
| 123 | /// True if `cp` has an N general category. | 122 | /// True if `cp` has an N general category. |
| 124 | pub fn isNumber(self: Self, cp: u21) bool { | 123 | pub fn isNumber(self: GeneralCategories, cp: u21) bool { |
| 125 | return switch (self.gc(cp)) { | 124 | return switch (self.gc(cp)) { |
| 126 | .Nd, | 125 | .Nd, |
| 127 | .Nl, | 126 | .Nl, |
| @@ -132,7 +131,7 @@ pub fn isNumber(self: Self, cp: u21) bool { | |||
| 132 | } | 131 | } |
| 133 | 132 | ||
| 134 | /// True if `cp` has an P general category. | 133 | /// True if `cp` has an P general category. |
| 135 | pub fn isPunctuation(self: Self, cp: u21) bool { | 134 | pub fn isPunctuation(self: GeneralCategories, cp: u21) bool { |
| 136 | return switch (self.gc(cp)) { | 135 | return switch (self.gc(cp)) { |
| 137 | .Pc, | 136 | .Pc, |
| 138 | .Pd, | 137 | .Pd, |
| @@ -147,7 +146,7 @@ pub fn isPunctuation(self: Self, cp: u21) bool { | |||
| 147 | } | 146 | } |
| 148 | 147 | ||
| 149 | /// True if `cp` has an S general category. | 148 | /// True if `cp` has an S general category. |
| 150 | pub fn isSymbol(self: Self, cp: u21) bool { | 149 | pub fn isSymbol(self: GeneralCategories, cp: u21) bool { |
| 151 | return switch (self.gc(cp)) { | 150 | return switch (self.gc(cp)) { |
| 152 | .Sc, | 151 | .Sc, |
| 153 | .Sk, | 152 | .Sk, |
| @@ -159,7 +158,7 @@ pub fn isSymbol(self: Self, cp: u21) bool { | |||
| 159 | } | 158 | } |
| 160 | 159 | ||
| 161 | /// True if `cp` has an Z general category. | 160 | /// True if `cp` has an Z general category. |
| 162 | pub fn isSeparator(self: Self, cp: u21) bool { | 161 | pub fn isSeparator(self: GeneralCategories, cp: u21) bool { |
| 163 | return switch (self.gc(cp)) { | 162 | return switch (self.gc(cp)) { |
| 164 | .Zl, | 163 | .Zl, |
| 165 | .Zp, | 164 | .Zp, |
| @@ -168,3 +167,9 @@ pub fn isSeparator(self: Self, cp: u21) bool { | |||
| 168 | else => false, | 167 | else => false, |
| 169 | }; | 168 | }; |
| 170 | } | 169 | } |
| 170 | |||
| 171 | const std = @import("std"); | ||
| 172 | const builtin = @import("builtin"); | ||
| 173 | const compress = std.compress; | ||
| 174 | const mem = std.mem; | ||
| 175 | const Allocator = mem.Allocator; | ||
diff --git a/src/Graphemes.zig b/src/Graphemes.zig index 79cd2c6..7bf328a 100644 --- a/src/Graphemes.zig +++ b/src/Graphemes.zig | |||
| @@ -14,7 +14,13 @@ s3: []u8 = undefined, | |||
| 14 | 14 | ||
| 15 | const Graphemes = @This(); | 15 | const Graphemes = @This(); |
| 16 | 16 | ||
| 17 | pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Graphemes { | 17 | pub fn init(allocator: Allocator) Allocator.Error!Graphemes { |
| 18 | var graphemes = Graphemes{}; | ||
| 19 | try graphemes.setup(allocator); | ||
| 20 | return graphemes; | ||
| 21 | } | ||
| 22 | |||
| 23 | pub fn setup(graphemes: *Graphemes, allocator: Allocator) Allocator.Error!void { | ||
| 18 | const decompressor = compress.flate.inflate.decompressor; | 24 | const decompressor = compress.flate.inflate.decompressor; |
| 19 | const in_bytes = @embedFile("gbp"); | 25 | const in_bytes = @embedFile("gbp"); |
| 20 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 26 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| @@ -23,27 +29,23 @@ pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Graphemes { | |||
| 23 | 29 | ||
| 24 | const endian = builtin.cpu.arch.endian(); | 30 | const endian = builtin.cpu.arch.endian(); |
| 25 | 31 | ||
| 26 | var self = Graphemes{}; | ||
| 27 | |||
| 28 | const s1_len: u16 = reader.readInt(u16, endian) catch unreachable; | 32 | const s1_len: u16 = reader.readInt(u16, endian) catch unreachable; |
| 29 | self.s1 = try allocator.alloc(u16, s1_len); | 33 | graphemes.s1 = try allocator.alloc(u16, s1_len); |
| 30 | errdefer allocator.free(self.s1); | 34 | errdefer allocator.free(graphemes.s1); |
| 31 | for (0..s1_len) |i| self.s1[i] = reader.readInt(u16, endian) catch unreachable; | 35 | for (0..s1_len) |i| graphemes.s1[i] = reader.readInt(u16, endian) catch unreachable; |
| 32 | 36 | ||
| 33 | const s2_len: u16 = reader.readInt(u16, endian) catch unreachable; | 37 | const s2_len: u16 = reader.readInt(u16, endian) catch unreachable; |
| 34 | self.s2 = try allocator.alloc(u16, s2_len); | 38 | graphemes.s2 = try allocator.alloc(u16, s2_len); |
| 35 | errdefer allocator.free(self.s2); | 39 | errdefer allocator.free(graphemes.s2); |
| 36 | for (0..s2_len) |i| self.s2[i] = reader.readInt(u16, endian) catch unreachable; | 40 | for (0..s2_len) |i| graphemes.s2[i] = reader.readInt(u16, endian) catch unreachable; |
| 37 | 41 | ||
| 38 | const s3_len: u16 = reader.readInt(u16, endian) catch unreachable; | 42 | const s3_len: u16 = reader.readInt(u16, endian) catch unreachable; |
| 39 | self.s3 = try allocator.alloc(u8, s3_len); | 43 | graphemes.s3 = try allocator.alloc(u8, s3_len); |
| 40 | errdefer allocator.free(self.s3); | 44 | errdefer allocator.free(graphemes.s3); |
| 41 | _ = reader.readAll(self.s3) catch unreachable; | 45 | _ = reader.readAll(graphemes.s3) catch unreachable; |
| 42 | |||
| 43 | return self; | ||
| 44 | } | 46 | } |
| 45 | 47 | ||
| 46 | pub fn deinit(graphemes: *const Graphemes, allocator: mem.Allocator) void { | 48 | pub fn deinit(graphemes: *const Graphemes, allocator: Allocator) void { |
| 47 | allocator.free(graphemes.s1); | 49 | allocator.free(graphemes.s1); |
| 48 | allocator.free(graphemes.s2); | 50 | allocator.free(graphemes.s2); |
| 49 | allocator.free(graphemes.s3); | 51 | allocator.free(graphemes.s3); |
diff --git a/src/LetterCasing.zig b/src/LetterCasing.zig index 0a0acb1..a7260b8 100644 --- a/src/LetterCasing.zig +++ b/src/LetterCasing.zig | |||
| @@ -1,25 +1,31 @@ | |||
| 1 | const std = @import("std"); | ||
| 2 | const builtin = @import("builtin"); | ||
| 3 | const compress = std.compress; | ||
| 4 | const mem = std.mem; | ||
| 5 | const testing = std.testing; | ||
| 6 | const unicode = std.unicode; | ||
| 7 | |||
| 8 | const CodePointIterator = @import("code_point").Iterator; | 1 | const CodePointIterator = @import("code_point").Iterator; |
| 9 | 2 | ||
| 10 | case_map: [][2]u21, | 3 | case_map: [][2]u21 = undefined, |
| 11 | prop_s1: []u16 = undefined, | 4 | prop_s1: []u16 = undefined, |
| 12 | prop_s2: []u8 = undefined, | 5 | prop_s2: []u8 = undefined, |
| 13 | 6 | ||
| 14 | const Self = @This(); | 7 | const LetterCasing = @This(); |
| 8 | |||
| 9 | pub fn init(allocator: Allocator) Allocator.Error!LetterCasing { | ||
| 10 | var case = LetterCasing{}; | ||
| 11 | try case.setup(allocator); | ||
| 12 | return case; | ||
| 13 | } | ||
| 15 | 14 | ||
| 16 | pub fn init(allocator: mem.Allocator) !Self { | 15 | pub fn setup(case: *LetterCasing, allocator: Allocator) Allocator.Error!void { |
| 16 | case.setupInner(allocator) catch |err| { | ||
| 17 | switch (err) { | ||
| 18 | error.OutOfMemory => |e| return e, | ||
| 19 | else => unreachable, | ||
| 20 | } | ||
| 21 | }; | ||
| 22 | } | ||
| 23 | |||
| 24 | inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void { | ||
| 17 | const decompressor = compress.flate.inflate.decompressor; | 25 | const decompressor = compress.flate.inflate.decompressor; |
| 18 | const endian = builtin.cpu.arch.endian(); | 26 | const endian = builtin.cpu.arch.endian(); |
| 19 | 27 | ||
| 20 | var self = Self{ | 28 | self.case_map = try allocator.alloc([2]u21, 0x110000); |
| 21 | .case_map = try allocator.alloc([2]u21, 0x110000), | ||
| 22 | }; | ||
| 23 | errdefer allocator.free(self.case_map); | 29 | errdefer allocator.free(self.case_map); |
| 24 | 30 | ||
| 25 | for (0..0x110000) |i| { | 31 | for (0..0x110000) |i| { |
| @@ -68,28 +74,26 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 68 | self.prop_s2 = try allocator.alloc(u8, stage_2_len); | 74 | self.prop_s2 = try allocator.alloc(u8, stage_2_len); |
| 69 | errdefer allocator.free(self.prop_s2); | 75 | errdefer allocator.free(self.prop_s2); |
| 70 | _ = try cp_reader.readAll(self.prop_s2); | 76 | _ = try cp_reader.readAll(self.prop_s2); |
| 71 | |||
| 72 | return self; | ||
| 73 | } | 77 | } |
| 74 | 78 | ||
| 75 | pub fn deinit(self: *const Self, allocator: mem.Allocator) void { | 79 | pub fn deinit(self: *const LetterCasing, allocator: mem.Allocator) void { |
| 76 | allocator.free(self.case_map); | 80 | allocator.free(self.case_map); |
| 77 | allocator.free(self.prop_s1); | 81 | allocator.free(self.prop_s1); |
| 78 | allocator.free(self.prop_s2); | 82 | allocator.free(self.prop_s2); |
| 79 | } | 83 | } |
| 80 | 84 | ||
| 81 | // Returns true if `cp` is either upper, lower, or title case. | 85 | // Returns true if `cp` is either upper, lower, or title case. |
| 82 | pub fn isCased(self: Self, cp: u21) bool { | 86 | pub fn isCased(self: LetterCasing, cp: u21) bool { |
| 83 | return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; | 87 | return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; |
| 84 | } | 88 | } |
| 85 | 89 | ||
| 86 | // Returns true if `cp` is uppercase. | 90 | // Returns true if `cp` is uppercase. |
| 87 | pub fn isUpper(self: Self, cp: u21) bool { | 91 | pub fn isUpper(self: LetterCasing, cp: u21) bool { |
| 88 | return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; | 92 | return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; |
| 89 | } | 93 | } |
| 90 | 94 | ||
| 91 | /// Returns true if `str` is all uppercase. | 95 | /// Returns true if `str` is all uppercase. |
| 92 | pub fn isUpperStr(self: Self, str: []const u8) bool { | 96 | pub fn isUpperStr(self: LetterCasing, str: []const u8) bool { |
| 93 | var iter = CodePointIterator{ .bytes = str }; | 97 | var iter = CodePointIterator{ .bytes = str }; |
| 94 | 98 | ||
| 95 | return while (iter.next()) |cp| { | 99 | return while (iter.next()) |cp| { |
| @@ -107,14 +111,14 @@ test "isUpperStr" { | |||
| 107 | } | 111 | } |
| 108 | 112 | ||
| 109 | /// Returns uppercase mapping for `cp`. | 113 | /// Returns uppercase mapping for `cp`. |
| 110 | pub fn toUpper(self: Self, cp: u21) u21 { | 114 | pub fn toUpper(self: LetterCasing, cp: u21) u21 { |
| 111 | return self.case_map[cp][0]; | 115 | return self.case_map[cp][0]; |
| 112 | } | 116 | } |
| 113 | 117 | ||
| 114 | /// Returns a new string with all letters in uppercase. | 118 | /// Returns a new string with all letters in uppercase. |
| 115 | /// Caller must free returned bytes with `allocator`. | 119 | /// Caller must free returned bytes with `allocator`. |
| 116 | pub fn toUpperStr( | 120 | pub fn toUpperStr( |
| 117 | self: Self, | 121 | self: LetterCasing, |
| 118 | allocator: mem.Allocator, | 122 | allocator: mem.Allocator, |
| 119 | str: []const u8, | 123 | str: []const u8, |
| 120 | ) ![]u8 { | 124 | ) ![]u8 { |
| @@ -142,12 +146,12 @@ test "toUpperStr" { | |||
| 142 | } | 146 | } |
| 143 | 147 | ||
| 144 | // Returns true if `cp` is lowercase. | 148 | // Returns true if `cp` is lowercase. |
| 145 | pub fn isLower(self: Self, cp: u21) bool { | 149 | pub fn isLower(self: LetterCasing, cp: u21) bool { |
| 146 | return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; | 150 | return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; |
| 147 | } | 151 | } |
| 148 | 152 | ||
| 149 | /// Returns true if `str` is all lowercase. | 153 | /// Returns true if `str` is all lowercase. |
| 150 | pub fn isLowerStr(self: Self, str: []const u8) bool { | 154 | pub fn isLowerStr(self: LetterCasing, str: []const u8) bool { |
| 151 | var iter = CodePointIterator{ .bytes = str }; | 155 | var iter = CodePointIterator{ .bytes = str }; |
| 152 | 156 | ||
| 153 | return while (iter.next()) |cp| { | 157 | return while (iter.next()) |cp| { |
| @@ -165,14 +169,14 @@ test "isLowerStr" { | |||
| 165 | } | 169 | } |
| 166 | 170 | ||
| 167 | /// Returns lowercase mapping for `cp`. | 171 | /// Returns lowercase mapping for `cp`. |
| 168 | pub fn toLower(self: Self, cp: u21) u21 { | 172 | pub fn toLower(self: LetterCasing, cp: u21) u21 { |
| 169 | return self.case_map[cp][1]; | 173 | return self.case_map[cp][1]; |
| 170 | } | 174 | } |
| 171 | 175 | ||
| 172 | /// Returns a new string with all letters in lowercase. | 176 | /// Returns a new string with all letters in lowercase. |
| 173 | /// Caller must free returned bytes with `allocator`. | 177 | /// Caller must free returned bytes with `allocator`. |
| 174 | pub fn toLowerStr( | 178 | pub fn toLowerStr( |
| 175 | self: Self, | 179 | self: LetterCasing, |
| 176 | allocator: mem.Allocator, | 180 | allocator: mem.Allocator, |
| 177 | str: []const u8, | 181 | str: []const u8, |
| 178 | ) ![]u8 { | 182 | ) ![]u8 { |
| @@ -198,3 +202,11 @@ test "toLowerStr" { | |||
| 198 | defer testing.allocator.free(lowered); | 202 | defer testing.allocator.free(lowered); |
| 199 | try testing.expectEqualStrings("hello, world 2112!", lowered); | 203 | try testing.expectEqualStrings("hello, world 2112!", lowered); |
| 200 | } | 204 | } |
| 205 | |||
| 206 | const std = @import("std"); | ||
| 207 | const builtin = @import("builtin"); | ||
| 208 | const compress = std.compress; | ||
| 209 | const mem = std.mem; | ||
| 210 | const Allocator = std.mem.Allocator; | ||
| 211 | const testing = std.testing; | ||
| 212 | const unicode = std.unicode; | ||
diff --git a/src/Normalize.zig b/src/Normalize.zig index d8c867d..1500b4c 100644 --- a/src/Normalize.zig +++ b/src/Normalize.zig | |||
| @@ -10,22 +10,47 @@ normp_data: NormPropsData = undefined, | |||
| 10 | 10 | ||
| 11 | const Normalize = @This(); | 11 | const Normalize = @This(); |
| 12 | 12 | ||
| 13 | pub fn init(allocator: Allocator) !Normalize { | 13 | pub fn init(allocator: Allocator) Allocator.Error!Normalize { |
| 14 | var norm: Normalize = undefined; | 14 | var norm: Normalize = undefined; |
| 15 | try norm.setup(allocator); | 15 | try norm.setup(allocator); |
| 16 | return norm; | 16 | return norm; |
| 17 | } | 17 | } |
| 18 | 18 | ||
| 19 | pub fn setup(self: *Normalize, allocator: Allocator) !void { | 19 | pub fn setup(self: *Normalize, allocator: Allocator) Allocator.Error!void { |
| 20 | self.canon_data = try CanonData.init(allocator); | 20 | self.canon_data = CanonData.init(allocator) catch |err| { |
| 21 | switch (err) { | ||
| 22 | error.OutOfMemory => |e| return e, | ||
| 23 | else => unreachable, | ||
| 24 | } | ||
| 25 | }; | ||
| 21 | errdefer self.canon_data.deinit(allocator); | 26 | errdefer self.canon_data.deinit(allocator); |
| 22 | self.ccc_data = try CccData.init(allocator); | 27 | self.ccc_data = CccData.init(allocator) catch |err| { |
| 28 | switch (err) { | ||
| 29 | error.OutOfMemory => |e| return e, | ||
| 30 | else => unreachable, | ||
| 31 | } | ||
| 32 | }; | ||
| 23 | errdefer self.ccc_data.deinit(allocator); | 33 | errdefer self.ccc_data.deinit(allocator); |
| 24 | self.compat_data = try CompatData.init(allocator); | 34 | self.compat_data = CompatData.init(allocator) catch |err| { |
| 35 | switch (err) { | ||
| 36 | error.OutOfMemory => |e| return e, | ||
| 37 | else => unreachable, | ||
| 38 | } | ||
| 39 | }; | ||
| 25 | errdefer self.compat_data.deinit(allocator); | 40 | errdefer self.compat_data.deinit(allocator); |
| 26 | self.hangul_data = try HangulData.init(allocator); | 41 | self.hangul_data = HangulData.init(allocator) catch |err| { |
| 42 | switch (err) { | ||
| 43 | error.OutOfMemory => |e| return e, | ||
| 44 | else => unreachable, | ||
| 45 | } | ||
| 46 | }; | ||
| 27 | errdefer self.hangul_data.deinit(allocator); | 47 | errdefer self.hangul_data.deinit(allocator); |
| 28 | self.normp_data = try NormPropsData.init(allocator); | 48 | self.normp_data = NormPropsData.init(allocator) catch |err| { |
| 49 | switch (err) { | ||
| 50 | error.OutOfMemory => |e| return e, | ||
| 51 | else => unreachable, | ||
| 52 | } | ||
| 53 | }; | ||
| 29 | } | 54 | } |
| 30 | 55 | ||
| 31 | pub fn deinit(norm: *const Normalize, allocator: Allocator) void { | 56 | pub fn deinit(norm: *const Normalize, allocator: Allocator) void { |
diff --git a/src/Properties.zig b/src/Properties.zig index 46920be..f7e57ec 100644 --- a/src/Properties.zig +++ b/src/Properties.zig | |||
| @@ -1,8 +1,4 @@ | |||
| 1 | const std = @import("std"); | 1 | //! Properties module |
| 2 | const builtin = @import("builtin"); | ||
| 3 | const compress = std.compress; | ||
| 4 | const mem = std.mem; | ||
| 5 | const testing = std.testing; | ||
| 6 | 2 | ||
| 7 | core_s1: []u16 = undefined, | 3 | core_s1: []u16 = undefined, |
| 8 | core_s2: []u8 = undefined, | 4 | core_s2: []u8 = undefined, |
| @@ -11,9 +7,24 @@ props_s2: []u8 = undefined, | |||
| 11 | num_s1: []u16 = undefined, | 7 | num_s1: []u16 = undefined, |
| 12 | num_s2: []u8 = undefined, | 8 | num_s2: []u8 = undefined, |
| 13 | 9 | ||
| 14 | const Self = @This(); | 10 | const Properties = @This(); |
| 11 | |||
| 12 | pub fn init(allocator: Allocator) Allocator.Error!Properties { | ||
| 13 | var props = Properties{}; | ||
| 14 | try props.setup(allocator); | ||
| 15 | return props; | ||
| 16 | } | ||
| 17 | |||
| 18 | pub fn setup(props: *Properties, allocator: Allocator) Allocator.Error!void { | ||
| 19 | props.setupInner(allocator) catch |err| { | ||
| 20 | switch (err) { | ||
| 21 | error.OutOfMemory => |e| return e, | ||
| 22 | else => unreachable, | ||
| 23 | } | ||
| 24 | }; | ||
| 25 | } | ||
| 15 | 26 | ||
| 16 | pub fn init(allocator: mem.Allocator) !Self { | 27 | inline fn setupInner(props: *Properties, allocator: Allocator) !void { |
| 17 | const decompressor = compress.flate.inflate.decompressor; | 28 | const decompressor = compress.flate.inflate.decompressor; |
| 18 | const endian = builtin.cpu.arch.endian(); | 29 | const endian = builtin.cpu.arch.endian(); |
| 19 | 30 | ||
| @@ -23,17 +34,15 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 23 | var core_decomp = decompressor(.raw, core_fbs.reader()); | 34 | var core_decomp = decompressor(.raw, core_fbs.reader()); |
| 24 | var core_reader = core_decomp.reader(); | 35 | var core_reader = core_decomp.reader(); |
| 25 | 36 | ||
| 26 | var self = Self{}; | ||
| 27 | |||
| 28 | const core_stage_1_len: u16 = try core_reader.readInt(u16, endian); | 37 | const core_stage_1_len: u16 = try core_reader.readInt(u16, endian); |
| 29 | self.core_s1 = try allocator.alloc(u16, core_stage_1_len); | 38 | props.core_s1 = try allocator.alloc(u16, core_stage_1_len); |
| 30 | errdefer allocator.free(self.core_s1); | 39 | errdefer allocator.free(props.core_s1); |
| 31 | for (0..core_stage_1_len) |i| self.core_s1[i] = try core_reader.readInt(u16, endian); | 40 | for (0..core_stage_1_len) |i| props.core_s1[i] = try core_reader.readInt(u16, endian); |
| 32 | 41 | ||
| 33 | const core_stage_2_len: u16 = try core_reader.readInt(u16, endian); | 42 | const core_stage_2_len: u16 = try core_reader.readInt(u16, endian); |
| 34 | self.core_s2 = try allocator.alloc(u8, core_stage_2_len); | 43 | props.core_s2 = try allocator.alloc(u8, core_stage_2_len); |
| 35 | errdefer allocator.free(self.core_s2); | 44 | errdefer allocator.free(props.core_s2); |
| 36 | _ = try core_reader.readAll(self.core_s2); | 45 | _ = try core_reader.readAll(props.core_s2); |
| 37 | 46 | ||
| 38 | // Process PropList.txt | 47 | // Process PropList.txt |
| 39 | const props_bytes = @embedFile("props"); | 48 | const props_bytes = @embedFile("props"); |
| @@ -42,14 +51,14 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 42 | var props_reader = props_decomp.reader(); | 51 | var props_reader = props_decomp.reader(); |
| 43 | 52 | ||
| 44 | const stage_1_len: u16 = try props_reader.readInt(u16, endian); | 53 | const stage_1_len: u16 = try props_reader.readInt(u16, endian); |
| 45 | self.props_s1 = try allocator.alloc(u16, stage_1_len); | 54 | props.props_s1 = try allocator.alloc(u16, stage_1_len); |
| 46 | errdefer allocator.free(self.props_s1); | 55 | errdefer allocator.free(props.props_s1); |
| 47 | for (0..stage_1_len) |i| self.props_s1[i] = try props_reader.readInt(u16, endian); | 56 | for (0..stage_1_len) |i| props.props_s1[i] = try props_reader.readInt(u16, endian); |
| 48 | 57 | ||
| 49 | const stage_2_len: u16 = try props_reader.readInt(u16, endian); | 58 | const stage_2_len: u16 = try props_reader.readInt(u16, endian); |
| 50 | self.props_s2 = try allocator.alloc(u8, stage_2_len); | 59 | props.props_s2 = try allocator.alloc(u8, stage_2_len); |
| 51 | errdefer allocator.free(self.props_s2); | 60 | errdefer allocator.free(props.props_s2); |
| 52 | _ = try props_reader.readAll(self.props_s2); | 61 | _ = try props_reader.readAll(props.props_s2); |
| 53 | 62 | ||
| 54 | // Process DerivedNumericType.txt | 63 | // Process DerivedNumericType.txt |
| 55 | const num_bytes = @embedFile("numeric"); | 64 | const num_bytes = @embedFile("numeric"); |
| @@ -58,19 +67,17 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 58 | var num_reader = num_decomp.reader(); | 67 | var num_reader = num_decomp.reader(); |
| 59 | 68 | ||
| 60 | const num_stage_1_len: u16 = try num_reader.readInt(u16, endian); | 69 | const num_stage_1_len: u16 = try num_reader.readInt(u16, endian); |
| 61 | self.num_s1 = try allocator.alloc(u16, num_stage_1_len); | 70 | props.num_s1 = try allocator.alloc(u16, num_stage_1_len); |
| 62 | errdefer allocator.free(self.num_s1); | 71 | errdefer allocator.free(props.num_s1); |
| 63 | for (0..num_stage_1_len) |i| self.num_s1[i] = try num_reader.readInt(u16, endian); | 72 | for (0..num_stage_1_len) |i| props.num_s1[i] = try num_reader.readInt(u16, endian); |
| 64 | 73 | ||
| 65 | const num_stage_2_len: u16 = try num_reader.readInt(u16, endian); | 74 | const num_stage_2_len: u16 = try num_reader.readInt(u16, endian); |
| 66 | self.num_s2 = try allocator.alloc(u8, num_stage_2_len); | 75 | props.num_s2 = try allocator.alloc(u8, num_stage_2_len); |
| 67 | errdefer allocator.free(self.num_s2); | 76 | errdefer allocator.free(props.num_s2); |
| 68 | _ = try num_reader.readAll(self.num_s2); | 77 | _ = try num_reader.readAll(props.num_s2); |
| 69 | |||
| 70 | return self; | ||
| 71 | } | 78 | } |
| 72 | 79 | ||
| 73 | pub fn deinit(self: *const Self, allocator: mem.Allocator) void { | 80 | pub fn deinit(self: *const Properties, allocator: Allocator) void { |
| 74 | allocator.free(self.core_s1); | 81 | allocator.free(self.core_s1); |
| 75 | allocator.free(self.core_s2); | 82 | allocator.free(self.core_s2); |
| 76 | allocator.free(self.props_s1); | 83 | allocator.free(self.props_s1); |
| @@ -80,62 +87,62 @@ pub fn deinit(self: *const Self, allocator: mem.Allocator) void { | |||
| 80 | } | 87 | } |
| 81 | 88 | ||
| 82 | /// True if `cp` is a mathematical symbol. | 89 | /// True if `cp` is a mathematical symbol. |
| 83 | pub fn isMath(self: Self, cp: u21) bool { | 90 | pub fn isMath(self: Properties, cp: u21) bool { |
| 84 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; | 91 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; |
| 85 | } | 92 | } |
| 86 | 93 | ||
| 87 | /// True if `cp` is an alphabetic character. | 94 | /// True if `cp` is an alphabetic character. |
| 88 | pub fn isAlphabetic(self: Self, cp: u21) bool { | 95 | pub fn isAlphabetic(self: Properties, cp: u21) bool { |
| 89 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; | 96 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; |
| 90 | } | 97 | } |
| 91 | 98 | ||
| 92 | /// True if `cp` is a valid identifier start character. | 99 | /// True if `cp` is a valid identifier start character. |
| 93 | pub fn isIdStart(self: Self, cp: u21) bool { | 100 | pub fn isIdStart(self: Properties, cp: u21) bool { |
| 94 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; | 101 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; |
| 95 | } | 102 | } |
| 96 | 103 | ||
| 97 | /// True if `cp` is a valid identifier continuation character. | 104 | /// True if `cp` is a valid identifier continuation character. |
| 98 | pub fn isIdContinue(self: Self, cp: u21) bool { | 105 | pub fn isIdContinue(self: Properties, cp: u21) bool { |
| 99 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8; | 106 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8; |
| 100 | } | 107 | } |
| 101 | 108 | ||
| 102 | /// True if `cp` is a valid extended identifier start character. | 109 | /// True if `cp` is a valid extended identifier start character. |
| 103 | pub fn isXidStart(self: Self, cp: u21) bool { | 110 | pub fn isXidStart(self: Properties, cp: u21) bool { |
| 104 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16; | 111 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16; |
| 105 | } | 112 | } |
| 106 | 113 | ||
| 107 | /// True if `cp` is a valid extended identifier continuation character. | 114 | /// True if `cp` is a valid extended identifier continuation character. |
| 108 | pub fn isXidContinue(self: Self, cp: u21) bool { | 115 | pub fn isXidContinue(self: Properties, cp: u21) bool { |
| 109 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32; | 116 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32; |
| 110 | } | 117 | } |
| 111 | 118 | ||
| 112 | /// True if `cp` is a whitespace character. | 119 | /// True if `cp` is a whitespace character. |
| 113 | pub fn isWhitespace(self: Self, cp: u21) bool { | 120 | pub fn isWhitespace(self: Properties, cp: u21) bool { |
| 114 | return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; | 121 | return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; |
| 115 | } | 122 | } |
| 116 | 123 | ||
| 117 | /// True if `cp` is a hexadecimal digit. | 124 | /// True if `cp` is a hexadecimal digit. |
| 118 | pub fn isHexDigit(self: Self, cp: u21) bool { | 125 | pub fn isHexDigit(self: Properties, cp: u21) bool { |
| 119 | return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; | 126 | return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; |
| 120 | } | 127 | } |
| 121 | 128 | ||
| 122 | /// True if `cp` is a diacritic mark. | 129 | /// True if `cp` is a diacritic mark. |
| 123 | pub fn isDiacritic(self: Self, cp: u21) bool { | 130 | pub fn isDiacritic(self: Properties, cp: u21) bool { |
| 124 | return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; | 131 | return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; |
| 125 | } | 132 | } |
| 126 | 133 | ||
| 127 | /// True if `cp` is numeric. | 134 | /// True if `cp` is numeric. |
| 128 | pub fn isNumeric(self: Self, cp: u21) bool { | 135 | pub fn isNumeric(self: Properties, cp: u21) bool { |
| 129 | return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; | 136 | return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; |
| 130 | } | 137 | } |
| 131 | 138 | ||
| 132 | /// True if `cp` is a digit. | 139 | /// True if `cp` is a digit. |
| 133 | pub fn isDigit(self: Self, cp: u21) bool { | 140 | pub fn isDigit(self: Properties, cp: u21) bool { |
| 134 | return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; | 141 | return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; |
| 135 | } | 142 | } |
| 136 | 143 | ||
| 137 | /// True if `cp` is decimal. | 144 | /// True if `cp` is decimal. |
| 138 | pub fn isDecimal(self: Self, cp: u21) bool { | 145 | pub fn isDecimal(self: Properties, cp: u21) bool { |
| 139 | return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; | 146 | return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; |
| 140 | } | 147 | } |
| 141 | 148 | ||
| @@ -161,3 +168,10 @@ test "Props" { | |||
| 161 | try testing.expect(!self.isDigit('2')); | 168 | try testing.expect(!self.isDigit('2')); |
| 162 | try testing.expect(!self.isDecimal('g')); | 169 | try testing.expect(!self.isDecimal('g')); |
| 163 | } | 170 | } |
| 171 | |||
| 172 | const std = @import("std"); | ||
| 173 | const builtin = @import("builtin"); | ||
| 174 | const compress = std.compress; | ||
| 175 | const mem = std.mem; | ||
| 176 | const Allocator = mem.Allocator; | ||
| 177 | const testing = std.testing; | ||
diff --git a/src/Scripts.zig b/src/Scripts.zig index 4ad8549..f71a2b5 100644 --- a/src/Scripts.zig +++ b/src/Scripts.zig | |||
| @@ -1,10 +1,10 @@ | |||
| 1 | const std = @import("std"); | 1 | //! Scripts Module |
| 2 | const builtin = @import("builtin"); | 2 | |
| 3 | const compress = std.compress; | 3 | s1: []u16 = undefined, |
| 4 | const mem = std.mem; | 4 | s2: []u8 = undefined, |
| 5 | const testing = std.testing; | 5 | s3: []u8 = undefined, |
| 6 | 6 | ||
| 7 | /// Scripts | 7 | /// Scripts enum |
| 8 | pub const Script = enum { | 8 | pub const Script = enum { |
| 9 | none, | 9 | none, |
| 10 | Adlam, | 10 | Adlam, |
| @@ -172,13 +172,24 @@ pub const Script = enum { | |||
| 172 | Zanabazar_Square, | 172 | Zanabazar_Square, |
| 173 | }; | 173 | }; |
| 174 | 174 | ||
| 175 | s1: []u16 = undefined, | 175 | const Scripts = @This(); |
| 176 | s2: []u8 = undefined, | ||
| 177 | s3: []u8 = undefined, | ||
| 178 | 176 | ||
| 179 | const Self = @This(); | 177 | pub fn init(allocator: Allocator) Allocator.Error!Scripts { |
| 178 | var scripts = Scripts{}; | ||
| 179 | try scripts.setup(allocator); | ||
| 180 | return scripts; | ||
| 181 | } | ||
| 182 | |||
| 183 | pub fn setup(scripts: *Scripts, allocator: Allocator) Allocator.Error!void { | ||
| 184 | scripts.setupInner(allocator) catch |err| { | ||
| 185 | switch (err) { | ||
| 186 | error.OutOfMemory => |e| return e, | ||
| 187 | else => unreachable, | ||
| 188 | } | ||
| 189 | }; | ||
| 190 | } | ||
| 180 | 191 | ||
| 181 | pub fn init(allocator: mem.Allocator) !Self { | 192 | inline fn setupInner(scripts: *Scripts, allocator: mem.Allocator) !void { |
| 182 | const decompressor = compress.flate.inflate.decompressor; | 193 | const decompressor = compress.flate.inflate.decompressor; |
| 183 | const in_bytes = @embedFile("scripts"); | 194 | const in_bytes = @embedFile("scripts"); |
| 184 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 195 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| @@ -187,34 +198,30 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 187 | 198 | ||
| 188 | const endian = builtin.cpu.arch.endian(); | 199 | const endian = builtin.cpu.arch.endian(); |
| 189 | 200 | ||
| 190 | var self = Self{}; | ||
| 191 | |||
| 192 | const s1_len: u16 = try reader.readInt(u16, endian); | 201 | const s1_len: u16 = try reader.readInt(u16, endian); |
| 193 | self.s1 = try allocator.alloc(u16, s1_len); | 202 | scripts.s1 = try allocator.alloc(u16, s1_len); |
| 194 | errdefer allocator.free(self.s1); | 203 | errdefer allocator.free(scripts.s1); |
| 195 | for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian); | 204 | for (0..s1_len) |i| scripts.s1[i] = try reader.readInt(u16, endian); |
| 196 | 205 | ||
| 197 | const s2_len: u16 = try reader.readInt(u16, endian); | 206 | const s2_len: u16 = try reader.readInt(u16, endian); |
| 198 | self.s2 = try allocator.alloc(u8, s2_len); | 207 | scripts.s2 = try allocator.alloc(u8, s2_len); |
| 199 | errdefer allocator.free(self.s2); | 208 | errdefer allocator.free(scripts.s2); |
| 200 | _ = try reader.readAll(self.s2); | 209 | _ = try reader.readAll(scripts.s2); |
| 201 | 210 | ||
| 202 | const s3_len: u16 = try reader.readInt(u8, endian); | 211 | const s3_len: u16 = try reader.readInt(u8, endian); |
| 203 | self.s3 = try allocator.alloc(u8, s3_len); | 212 | scripts.s3 = try allocator.alloc(u8, s3_len); |
| 204 | errdefer allocator.free(self.s3); | 213 | errdefer allocator.free(scripts.s3); |
| 205 | _ = try reader.readAll(self.s3); | 214 | _ = try reader.readAll(scripts.s3); |
| 206 | |||
| 207 | return self; | ||
| 208 | } | 215 | } |
| 209 | 216 | ||
| 210 | pub fn deinit(self: *const Self, allocator: mem.Allocator) void { | 217 | pub fn deinit(self: *const Scripts, allocator: mem.Allocator) void { |
| 211 | allocator.free(self.s1); | 218 | allocator.free(self.s1); |
| 212 | allocator.free(self.s2); | 219 | allocator.free(self.s2); |
| 213 | allocator.free(self.s3); | 220 | allocator.free(self.s3); |
| 214 | } | 221 | } |
| 215 | 222 | ||
| 216 | /// Lookup the Script type for `cp`. | 223 | /// Lookup the Script type for `cp`. |
| 217 | pub fn script(self: Self, cp: u21) ?Script { | 224 | pub fn script(self: Scripts, cp: u21) ?Script { |
| 218 | const byte = self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]; | 225 | const byte = self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]; |
| 219 | if (byte == 0) return null; | 226 | if (byte == 0) return null; |
| 220 | return @enumFromInt(byte); | 227 | return @enumFromInt(byte); |
| @@ -225,3 +232,10 @@ test "script" { | |||
| 225 | defer self.deinit(std.testing.allocator); | 232 | defer self.deinit(std.testing.allocator); |
| 226 | try testing.expectEqual(Script.Latin, self.script('A').?); | 233 | try testing.expectEqual(Script.Latin, self.script('A').?); |
| 227 | } | 234 | } |
| 235 | |||
| 236 | const std = @import("std"); | ||
| 237 | const builtin = @import("builtin"); | ||
| 238 | const compress = std.compress; | ||
| 239 | const mem = std.mem; | ||
| 240 | const Allocator = mem.Allocator; | ||
| 241 | const testing = std.testing; | ||