summaryrefslogtreecommitdiff
path: root/src/GeneralCategories.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/GeneralCategories.zig')
-rw-r--r--src/GeneralCategories.zig59
1 files changed, 32 insertions, 27 deletions
diff --git a/src/GeneralCategories.zig b/src/GeneralCategories.zig
index a69f7a2..b7c82c0 100644
--- a/src/GeneralCategories.zig
+++ b/src/GeneralCategories.zig
@@ -1,7 +1,8 @@
1const std = @import("std"); 1//! General Categories
2const builtin = @import("builtin"); 2
3const compress = std.compress; 3s1: []u16 = undefined,
4const mem = std.mem; 4s2: []u5 = undefined,
5s3: []u5 = undefined,
5 6
6/// General Category 7/// General Category
7pub const Gc = enum { 8pub const Gc = enum {
@@ -37,13 +38,15 @@ pub const Gc = enum {
37 Zs, // Separator, Space 38 Zs, // Separator, Space
38}; 39};
39 40
40s1: []u16 = undefined, 41const GeneralCategories = @This();
41s2: []u5 = undefined,
42s3: []u5 = undefined,
43 42
44const Self = @This(); 43pub fn init(allocator: Allocator) Allocator.Error!GeneralCategories {
44 var gencat = GeneralCategories{};
45 try gencat.setup(allocator);
46 return gencat;
47}
45 48
46pub fn init(allocator: mem.Allocator) !Self { 49pub fn setup(self: *GeneralCategories, allocator: Allocator) Allocator.Error!void {
47 const decompressor = compress.flate.inflate.decompressor; 50 const decompressor = compress.flate.inflate.decompressor;
48 const in_bytes = @embedFile("gencat"); 51 const in_bytes = @embedFile("gencat");
49 var in_fbs = std.io.fixedBufferStream(in_bytes); 52 var in_fbs = std.io.fixedBufferStream(in_bytes);
@@ -52,39 +55,35 @@ pub fn init(allocator: mem.Allocator) !Self {
52 55
53 const endian = builtin.cpu.arch.endian(); 56 const endian = builtin.cpu.arch.endian();
54 57
55 var self = Self{}; 58 const s1_len: u16 = reader.readInt(u16, endian) catch unreachable;
56
57 const s1_len: u16 = try reader.readInt(u16, endian);
58 self.s1 = try allocator.alloc(u16, s1_len); 59 self.s1 = try allocator.alloc(u16, s1_len);
59 errdefer allocator.free(self.s1); 60 errdefer allocator.free(self.s1);
60 for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian); 61 for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
61 62
62 const s2_len: u16 = try reader.readInt(u16, endian); 63 const s2_len: u16 = reader.readInt(u16, endian) catch unreachable;
63 self.s2 = try allocator.alloc(u5, s2_len); 64 self.s2 = try allocator.alloc(u5, s2_len);
64 errdefer allocator.free(self.s2); 65 errdefer allocator.free(self.s2);
65 for (0..s2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian)); 66 for (0..s2_len) |i| self.s2[i] = @intCast(reader.readInt(u8, endian) catch unreachable);
66 67
67 const s3_len: u16 = try reader.readInt(u8, endian); 68 const s3_len: u16 = reader.readInt(u8, endian) catch unreachable;
68 self.s3 = try allocator.alloc(u5, s3_len); 69 self.s3 = try allocator.alloc(u5, s3_len);
69 errdefer allocator.free(self.s3); 70 errdefer allocator.free(self.s3);
70 for (0..s3_len) |i| self.s3[i] = @intCast(try reader.readInt(u8, endian)); 71 for (0..s3_len) |i| self.s3[i] = @intCast(reader.readInt(u8, endian) catch unreachable);
71
72 return self;
73} 72}
74 73
75pub fn deinit(self: *const Self, allocator: mem.Allocator) void { 74pub fn deinit(self: *const GeneralCategories, allocator: mem.Allocator) void {
76 allocator.free(self.s1); 75 allocator.free(self.s1);
77 allocator.free(self.s2); 76 allocator.free(self.s2);
78 allocator.free(self.s3); 77 allocator.free(self.s3);
79} 78}
80 79
81/// Lookup the General Category for `cp`. 80/// Lookup the General Category for `cp`.
82pub fn gc(self: Self, cp: u21) Gc { 81pub fn gc(self: GeneralCategories, cp: u21) Gc {
83 return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]); 82 return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]);
84} 83}
85 84
86/// True if `cp` has an C general category. 85/// True if `cp` has an C general category.
87pub fn isControl(self: Self, cp: u21) bool { 86pub fn isControl(self: GeneralCategories, cp: u21) bool {
88 return switch (self.gc(cp)) { 87 return switch (self.gc(cp)) {
89 .Cc, 88 .Cc,
90 .Cf, 89 .Cf,
@@ -97,7 +96,7 @@ pub fn isControl(self: Self, cp: u21) bool {
97} 96}
98 97
99/// True if `cp` has an L general category. 98/// True if `cp` has an L general category.
100pub fn isLetter(self: Self, cp: u21) bool { 99pub fn isLetter(self: GeneralCategories, cp: u21) bool {
101 return switch (self.gc(cp)) { 100 return switch (self.gc(cp)) {
102 .Ll, 101 .Ll,
103 .Lm, 102 .Lm,
@@ -110,7 +109,7 @@ pub fn isLetter(self: Self, cp: u21) bool {
110} 109}
111 110
112/// True if `cp` has an M general category. 111/// True if `cp` has an M general category.
113pub fn isMark(self: Self, cp: u21) bool { 112pub fn isMark(self: GeneralCategories, cp: u21) bool {
114 return switch (self.gc(cp)) { 113 return switch (self.gc(cp)) {
115 .Mc, 114 .Mc,
116 .Me, 115 .Me,
@@ -121,7 +120,7 @@ pub fn isMark(self: Self, cp: u21) bool {
121} 120}
122 121
123/// True if `cp` has an N general category. 122/// True if `cp` has an N general category.
124pub fn isNumber(self: Self, cp: u21) bool { 123pub fn isNumber(self: GeneralCategories, cp: u21) bool {
125 return switch (self.gc(cp)) { 124 return switch (self.gc(cp)) {
126 .Nd, 125 .Nd,
127 .Nl, 126 .Nl,
@@ -132,7 +131,7 @@ pub fn isNumber(self: Self, cp: u21) bool {
132} 131}
133 132
134/// True if `cp` has an P general category. 133/// True if `cp` has an P general category.
135pub fn isPunctuation(self: Self, cp: u21) bool { 134pub fn isPunctuation(self: GeneralCategories, cp: u21) bool {
136 return switch (self.gc(cp)) { 135 return switch (self.gc(cp)) {
137 .Pc, 136 .Pc,
138 .Pd, 137 .Pd,
@@ -147,7 +146,7 @@ pub fn isPunctuation(self: Self, cp: u21) bool {
147} 146}
148 147
149/// True if `cp` has an S general category. 148/// True if `cp` has an S general category.
150pub fn isSymbol(self: Self, cp: u21) bool { 149pub fn isSymbol(self: GeneralCategories, cp: u21) bool {
151 return switch (self.gc(cp)) { 150 return switch (self.gc(cp)) {
152 .Sc, 151 .Sc,
153 .Sk, 152 .Sk,
@@ -159,7 +158,7 @@ pub fn isSymbol(self: Self, cp: u21) bool {
159} 158}
160 159
161/// True if `cp` has an Z general category. 160/// True if `cp` has an Z general category.
162pub fn isSeparator(self: Self, cp: u21) bool { 161pub fn isSeparator(self: GeneralCategories, cp: u21) bool {
163 return switch (self.gc(cp)) { 162 return switch (self.gc(cp)) {
164 .Zl, 163 .Zl,
165 .Zp, 164 .Zp,
@@ -168,3 +167,9 @@ pub fn isSeparator(self: Self, cp: u21) bool {
168 else => false, 167 else => false,
169 }; 168 };
170} 169}
170
171const std = @import("std");
172const builtin = @import("builtin");
173const compress = std.compress;
174const mem = std.mem;
175const Allocator = mem.Allocator;