diff options
Diffstat (limited to 'src/GeneralCategories.zig')
| -rw-r--r-- | src/GeneralCategories.zig | 102 |
1 files changed, 30 insertions, 72 deletions
diff --git a/src/GeneralCategories.zig b/src/GeneralCategories.zig index eee7e56..9a383bf 100644 --- a/src/GeneralCategories.zig +++ b/src/GeneralCategories.zig | |||
| @@ -1,8 +1,19 @@ | |||
| 1 | //! General Categories | 1 | //! General Categories |
| 2 | 2 | ||
| 3 | s1: []u16 = undefined, | 3 | const Data = struct { |
| 4 | s2: []u5 = undefined, | 4 | s1: []const u16 = undefined, |
| 5 | s3: []u5 = undefined, | 5 | s2: []const u5 = undefined, |
| 6 | s3: []const u5 = undefined, | ||
| 7 | }; | ||
| 8 | |||
| 9 | const general_categories = general_categories: { | ||
| 10 | const data = @import("gencat"); | ||
| 11 | break :general_categories Data{ | ||
| 12 | .s1 = &data.s1, | ||
| 13 | .s2 = &data.s2, | ||
| 14 | .s3 = &data.s3, | ||
| 15 | }; | ||
| 16 | }; | ||
| 6 | 17 | ||
| 7 | /// General Category | 18 | /// General Category |
| 8 | pub const Gc = enum { | 19 | pub const Gc = enum { |
| @@ -38,51 +49,14 @@ pub const Gc = enum { | |||
| 38 | Zs, // Separator, Space | 49 | Zs, // Separator, Space |
| 39 | }; | 50 | }; |
| 40 | 51 | ||
| 41 | const GeneralCategories = @This(); | ||
| 42 | |||
| 43 | pub fn init(allocator: Allocator) Allocator.Error!GeneralCategories { | ||
| 44 | var gencat = GeneralCategories{}; | ||
| 45 | try gencat.setup(allocator); | ||
| 46 | return gencat; | ||
| 47 | } | ||
| 48 | |||
| 49 | pub fn setup(gencat: *GeneralCategories, allocator: Allocator) Allocator.Error!void { | ||
| 50 | const in_bytes = @embedFile("gencat"); | ||
| 51 | var in_fbs = std.io.fixedBufferStream(in_bytes); | ||
| 52 | var reader = in_fbs.reader(); | ||
| 53 | |||
| 54 | const endian = builtin.cpu.arch.endian(); | ||
| 55 | |||
| 56 | const s1_len: u16 = reader.readInt(u16, endian) catch unreachable; | ||
| 57 | gencat.s1 = try allocator.alloc(u16, s1_len); | ||
| 58 | errdefer allocator.free(gencat.s1); | ||
| 59 | for (0..s1_len) |i| gencat.s1[i] = reader.readInt(u16, endian) catch unreachable; | ||
| 60 | |||
| 61 | const s2_len: u16 = reader.readInt(u16, endian) catch unreachable; | ||
| 62 | gencat.s2 = try allocator.alloc(u5, s2_len); | ||
| 63 | errdefer allocator.free(gencat.s2); | ||
| 64 | for (0..s2_len) |i| gencat.s2[i] = @intCast(reader.readInt(u8, endian) catch unreachable); | ||
| 65 | |||
| 66 | const s3_len: u16 = reader.readInt(u8, endian) catch unreachable; | ||
| 67 | gencat.s3 = try allocator.alloc(u5, s3_len); | ||
| 68 | errdefer allocator.free(gencat.s3); | ||
| 69 | for (0..s3_len) |i| gencat.s3[i] = @intCast(reader.readInt(u8, endian) catch unreachable); | ||
| 70 | } | ||
| 71 | |||
| 72 | pub fn deinit(gencat: *const GeneralCategories, allocator: mem.Allocator) void { | ||
| 73 | allocator.free(gencat.s1); | ||
| 74 | allocator.free(gencat.s2); | ||
| 75 | allocator.free(gencat.s3); | ||
| 76 | } | ||
| 77 | |||
| 78 | /// Lookup the General Category for `cp`. | 52 | /// Lookup the General Category for `cp`. |
| 79 | pub fn gc(gencat: GeneralCategories, cp: u21) Gc { | 53 | pub fn gc(cp: u21) Gc { |
| 80 | return @enumFromInt(gencat.s3[gencat.s2[gencat.s1[cp >> 8] + (cp & 0xff)]]); | 54 | return @enumFromInt(general_categories.s3[general_categories.s2[general_categories.s1[cp >> 8] + (cp & 0xff)]]); |
| 81 | } | 55 | } |
| 82 | 56 | ||
| 83 | /// True if `cp` has an C general category. | 57 | /// True if `cp` has an C general category. |
| 84 | pub fn isControl(gencat: GeneralCategories, cp: u21) bool { | 58 | pub fn isControl(cp: u21) bool { |
| 85 | return switch (gencat.gc(cp)) { | 59 | return switch (gc(cp)) { |
| 86 | .Cc, | 60 | .Cc, |
| 87 | .Cf, | 61 | .Cf, |
| 88 | .Cn, | 62 | .Cn, |
| @@ -94,8 +68,8 @@ pub fn isControl(gencat: GeneralCategories, cp: u21) bool { | |||
| 94 | } | 68 | } |
| 95 | 69 | ||
| 96 | /// True if `cp` has an L general category. | 70 | /// True if `cp` has an L general category. |
| 97 | pub fn isLetter(gencat: GeneralCategories, cp: u21) bool { | 71 | pub fn isLetter(cp: u21) bool { |
| 98 | return switch (gencat.gc(cp)) { | 72 | return switch (gc(cp)) { |
| 99 | .Ll, | 73 | .Ll, |
| 100 | .Lm, | 74 | .Lm, |
| 101 | .Lo, | 75 | .Lo, |
| @@ -107,8 +81,8 @@ pub fn isLetter(gencat: GeneralCategories, cp: u21) bool { | |||
| 107 | } | 81 | } |
| 108 | 82 | ||
| 109 | /// True if `cp` has an M general category. | 83 | /// True if `cp` has an M general category. |
| 110 | pub fn isMark(gencat: GeneralCategories, cp: u21) bool { | 84 | pub fn isMark(cp: u21) bool { |
| 111 | return switch (gencat.gc(cp)) { | 85 | return switch (gc(cp)) { |
| 112 | .Mc, | 86 | .Mc, |
| 113 | .Me, | 87 | .Me, |
| 114 | .Mn, | 88 | .Mn, |
| @@ -118,8 +92,8 @@ pub fn isMark(gencat: GeneralCategories, cp: u21) bool { | |||
| 118 | } | 92 | } |
| 119 | 93 | ||
| 120 | /// True if `cp` has an N general category. | 94 | /// True if `cp` has an N general category. |
| 121 | pub fn isNumber(gencat: GeneralCategories, cp: u21) bool { | 95 | pub fn isNumber(cp: u21) bool { |
| 122 | return switch (gencat.gc(cp)) { | 96 | return switch (gc(cp)) { |
| 123 | .Nd, | 97 | .Nd, |
| 124 | .Nl, | 98 | .Nl, |
| 125 | .No, | 99 | .No, |
| @@ -129,8 +103,8 @@ pub fn isNumber(gencat: GeneralCategories, cp: u21) bool { | |||
| 129 | } | 103 | } |
| 130 | 104 | ||
| 131 | /// True if `cp` has an P general category. | 105 | /// True if `cp` has an P general category. |
| 132 | pub fn isPunctuation(gencat: GeneralCategories, cp: u21) bool { | 106 | pub fn isPunctuation(cp: u21) bool { |
| 133 | return switch (gencat.gc(cp)) { | 107 | return switch (gc(cp)) { |
| 134 | .Pc, | 108 | .Pc, |
| 135 | .Pd, | 109 | .Pd, |
| 136 | .Pe, | 110 | .Pe, |
| @@ -144,8 +118,8 @@ pub fn isPunctuation(gencat: GeneralCategories, cp: u21) bool { | |||
| 144 | } | 118 | } |
| 145 | 119 | ||
| 146 | /// True if `cp` has an S general category. | 120 | /// True if `cp` has an S general category. |
| 147 | pub fn isSymbol(gencat: GeneralCategories, cp: u21) bool { | 121 | pub fn isSymbol(cp: u21) bool { |
| 148 | return switch (gencat.gc(cp)) { | 122 | return switch (gc(cp)) { |
| 149 | .Sc, | 123 | .Sc, |
| 150 | .Sk, | 124 | .Sk, |
| 151 | .Sm, | 125 | .Sm, |
| @@ -156,8 +130,8 @@ pub fn isSymbol(gencat: GeneralCategories, cp: u21) bool { | |||
| 156 | } | 130 | } |
| 157 | 131 | ||
| 158 | /// True if `cp` has an Z general category. | 132 | /// True if `cp` has an Z general category. |
| 159 | pub fn isSeparator(gencat: GeneralCategories, cp: u21) bool { | 133 | pub fn isSeparator(cp: u21) bool { |
| 160 | return switch (gencat.gc(cp)) { | 134 | return switch (gc(cp)) { |
| 161 | .Zl, | 135 | .Zl, |
| 162 | .Zp, | 136 | .Zp, |
| 163 | .Zs, | 137 | .Zs, |
| @@ -165,19 +139,3 @@ pub fn isSeparator(gencat: GeneralCategories, cp: u21) bool { | |||
| 165 | else => false, | 139 | else => false, |
| 166 | }; | 140 | }; |
| 167 | } | 141 | } |
| 168 | |||
| 169 | fn testAllocator(allocator: Allocator) !void { | ||
| 170 | var gen_cat = try GeneralCategories.init(allocator); | ||
| 171 | gen_cat.deinit(allocator); | ||
| 172 | } | ||
| 173 | |||
| 174 | test "Allocation failure" { | ||
| 175 | try testing.checkAllAllocationFailures(testing.allocator, testAllocator, .{}); | ||
| 176 | } | ||
| 177 | |||
| 178 | const std = @import("std"); | ||
| 179 | const builtin = @import("builtin"); | ||
| 180 | const compress = std.compress; | ||
| 181 | const mem = std.mem; | ||
| 182 | const testing = std.testing; | ||
| 183 | const Allocator = mem.Allocator; | ||