summaryrefslogtreecommitdiff
path: root/src/GeneralCategories.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/GeneralCategories.zig')
-rw-r--r--src/GeneralCategories.zig102
1 files changed, 30 insertions, 72 deletions
diff --git a/src/GeneralCategories.zig b/src/GeneralCategories.zig
index eee7e56..9a383bf 100644
--- a/src/GeneralCategories.zig
+++ b/src/GeneralCategories.zig
@@ -1,8 +1,19 @@
1//! General Categories 1//! General Categories
2 2
3s1: []u16 = undefined, 3const Data = struct {
4s2: []u5 = undefined, 4 s1: []const u16 = undefined,
5s3: []u5 = undefined, 5 s2: []const u5 = undefined,
6 s3: []const u5 = undefined,
7};
8
9const general_categories = general_categories: {
10 const data = @import("gencat");
11 break :general_categories Data{
12 .s1 = &data.s1,
13 .s2 = &data.s2,
14 .s3 = &data.s3,
15 };
16};
6 17
7/// General Category 18/// General Category
8pub const Gc = enum { 19pub const Gc = enum {
@@ -38,51 +49,14 @@ pub const Gc = enum {
38 Zs, // Separator, Space 49 Zs, // Separator, Space
39}; 50};
40 51
41const GeneralCategories = @This();
42
43pub fn init(allocator: Allocator) Allocator.Error!GeneralCategories {
44 var gencat = GeneralCategories{};
45 try gencat.setup(allocator);
46 return gencat;
47}
48
49pub fn setup(gencat: *GeneralCategories, allocator: Allocator) Allocator.Error!void {
50 const in_bytes = @embedFile("gencat");
51 var in_fbs = std.io.fixedBufferStream(in_bytes);
52 var reader = in_fbs.reader();
53
54 const endian = builtin.cpu.arch.endian();
55
56 const s1_len: u16 = reader.readInt(u16, endian) catch unreachable;
57 gencat.s1 = try allocator.alloc(u16, s1_len);
58 errdefer allocator.free(gencat.s1);
59 for (0..s1_len) |i| gencat.s1[i] = reader.readInt(u16, endian) catch unreachable;
60
61 const s2_len: u16 = reader.readInt(u16, endian) catch unreachable;
62 gencat.s2 = try allocator.alloc(u5, s2_len);
63 errdefer allocator.free(gencat.s2);
64 for (0..s2_len) |i| gencat.s2[i] = @intCast(reader.readInt(u8, endian) catch unreachable);
65
66 const s3_len: u16 = reader.readInt(u8, endian) catch unreachable;
67 gencat.s3 = try allocator.alloc(u5, s3_len);
68 errdefer allocator.free(gencat.s3);
69 for (0..s3_len) |i| gencat.s3[i] = @intCast(reader.readInt(u8, endian) catch unreachable);
70}
71
72pub fn deinit(gencat: *const GeneralCategories, allocator: mem.Allocator) void {
73 allocator.free(gencat.s1);
74 allocator.free(gencat.s2);
75 allocator.free(gencat.s3);
76}
77
78/// Lookup the General Category for `cp`. 52/// Lookup the General Category for `cp`.
79pub fn gc(gencat: GeneralCategories, cp: u21) Gc { 53pub fn gc(cp: u21) Gc {
80 return @enumFromInt(gencat.s3[gencat.s2[gencat.s1[cp >> 8] + (cp & 0xff)]]); 54 return @enumFromInt(general_categories.s3[general_categories.s2[general_categories.s1[cp >> 8] + (cp & 0xff)]]);
81} 55}
82 56
83/// True if `cp` has an C general category. 57/// True if `cp` has an C general category.
84pub fn isControl(gencat: GeneralCategories, cp: u21) bool { 58pub fn isControl(cp: u21) bool {
85 return switch (gencat.gc(cp)) { 59 return switch (gc(cp)) {
86 .Cc, 60 .Cc,
87 .Cf, 61 .Cf,
88 .Cn, 62 .Cn,
@@ -94,8 +68,8 @@ pub fn isControl(gencat: GeneralCategories, cp: u21) bool {
94} 68}
95 69
96/// True if `cp` has an L general category. 70/// True if `cp` has an L general category.
97pub fn isLetter(gencat: GeneralCategories, cp: u21) bool { 71pub fn isLetter(cp: u21) bool {
98 return switch (gencat.gc(cp)) { 72 return switch (gc(cp)) {
99 .Ll, 73 .Ll,
100 .Lm, 74 .Lm,
101 .Lo, 75 .Lo,
@@ -107,8 +81,8 @@ pub fn isLetter(gencat: GeneralCategories, cp: u21) bool {
107} 81}
108 82
109/// True if `cp` has an M general category. 83/// True if `cp` has an M general category.
110pub fn isMark(gencat: GeneralCategories, cp: u21) bool { 84pub fn isMark(cp: u21) bool {
111 return switch (gencat.gc(cp)) { 85 return switch (gc(cp)) {
112 .Mc, 86 .Mc,
113 .Me, 87 .Me,
114 .Mn, 88 .Mn,
@@ -118,8 +92,8 @@ pub fn isMark(gencat: GeneralCategories, cp: u21) bool {
118} 92}
119 93
120/// True if `cp` has an N general category. 94/// True if `cp` has an N general category.
121pub fn isNumber(gencat: GeneralCategories, cp: u21) bool { 95pub fn isNumber(cp: u21) bool {
122 return switch (gencat.gc(cp)) { 96 return switch (gc(cp)) {
123 .Nd, 97 .Nd,
124 .Nl, 98 .Nl,
125 .No, 99 .No,
@@ -129,8 +103,8 @@ pub fn isNumber(gencat: GeneralCategories, cp: u21) bool {
129} 103}
130 104
131/// True if `cp` has an P general category. 105/// True if `cp` has an P general category.
132pub fn isPunctuation(gencat: GeneralCategories, cp: u21) bool { 106pub fn isPunctuation(cp: u21) bool {
133 return switch (gencat.gc(cp)) { 107 return switch (gc(cp)) {
134 .Pc, 108 .Pc,
135 .Pd, 109 .Pd,
136 .Pe, 110 .Pe,
@@ -144,8 +118,8 @@ pub fn isPunctuation(gencat: GeneralCategories, cp: u21) bool {
144} 118}
145 119
146/// True if `cp` has an S general category. 120/// True if `cp` has an S general category.
147pub fn isSymbol(gencat: GeneralCategories, cp: u21) bool { 121pub fn isSymbol(cp: u21) bool {
148 return switch (gencat.gc(cp)) { 122 return switch (gc(cp)) {
149 .Sc, 123 .Sc,
150 .Sk, 124 .Sk,
151 .Sm, 125 .Sm,
@@ -156,8 +130,8 @@ pub fn isSymbol(gencat: GeneralCategories, cp: u21) bool {
156} 130}
157 131
158/// True if `cp` has an Z general category. 132/// True if `cp` has an Z general category.
159pub fn isSeparator(gencat: GeneralCategories, cp: u21) bool { 133pub fn isSeparator(cp: u21) bool {
160 return switch (gencat.gc(cp)) { 134 return switch (gc(cp)) {
161 .Zl, 135 .Zl,
162 .Zp, 136 .Zp,
163 .Zs, 137 .Zs,
@@ -165,19 +139,3 @@ pub fn isSeparator(gencat: GeneralCategories, cp: u21) bool {
165 else => false, 139 else => false,
166 }; 140 };
167} 141}
168
169fn testAllocator(allocator: Allocator) !void {
170 var gen_cat = try GeneralCategories.init(allocator);
171 gen_cat.deinit(allocator);
172}
173
174test "Allocation failure" {
175 try testing.checkAllAllocationFailures(testing.allocator, testAllocator, .{});
176}
177
178const std = @import("std");
179const builtin = @import("builtin");
180const compress = std.compress;
181const mem = std.mem;
182const testing = std.testing;
183const Allocator = mem.Allocator;