summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--codegen/gencat.zig60
-rw-r--r--src/GenCatData.zig146
2 files changed, 146 insertions, 60 deletions
diff --git a/codegen/gencat.zig b/codegen/gencat.zig
index a25716b..a7713e6 100644
--- a/codegen/gencat.zig
+++ b/codegen/gencat.zig
@@ -2,36 +2,36 @@ const std = @import("std");
2const builtin = @import("builtin"); 2const builtin = @import("builtin");
3 3
4const Gc = enum { 4const Gc = enum {
5 Cc, 5 Cc, // Other, Control
6 Cf, 6 Cf, // Other, Format
7 Cn, 7 Cn, // Other, Unassigned
8 Co, 8 Co, // Other, Private Use
9 Cs, 9 Cs, // Other, Surrogate
10 Ll, 10 Ll, // Letter, Lowercase
11 Lm, 11 Lm, // Letter, Modifier
12 Lo, 12 Lo, // Letter, Other
13 Lt, 13 Lu, // Letter, Uppercase
14 Lu, 14 Lt, // Letter, Titlecase
15 Mc, 15 Mc, // Mark, Spacing Combining
16 Me, 16 Me, // Mark, Enclosing
17 Mn, 17 Mn, // Mark, Non-Spacing
18 Nd, 18 Nd, // Number, Decimal Digit
19 Nl, 19 Nl, // Number, Letter
20 No, 20 No, // Number, Other
21 Pc, 21 Pc, // Punctuation, Connector
22 Pd, 22 Pd, // Punctuation, Dash
23 Pe, 23 Pe, // Punctuation, Close
24 Pf, 24 Pf, // Punctuation, Final quote (may behave like Ps or Pe depending on usage)
25 Pi, 25 Pi, // Punctuation, Initial quote (may behave like Ps or Pe depending on usage)
26 Po, 26 Po, // Punctuation, Other
27 Ps, 27 Ps, // Punctuation, Open
28 Sc, 28 Sc, // Symbol, Currency
29 Sk, 29 Sk, // Symbol, Modifier
30 Sm, 30 Sm, // Symbol, Math
31 So, 31 So, // Symbol, Other
32 Zl, 32 Zl, // Separator, Line
33 Zp, 33 Zp, // Separator, Paragraph
34 Zs, 34 Zs, // Separator, Space
35}; 35};
36 36
37const block_size = 256; 37const block_size = 256;
diff --git a/src/GenCatData.zig b/src/GenCatData.zig
index 5496e4e..b45135b 100644
--- a/src/GenCatData.zig
+++ b/src/GenCatData.zig
@@ -5,36 +5,36 @@ const mem = std.mem;
5 5
6/// General Category 6/// General Category
7pub const Gc = enum { 7pub const Gc = enum {
8 Cc, 8 Cc, // Other, Control
9 Cf, 9 Cf, // Other, Format
10 Cn, 10 Cn, // Other, Unassigned
11 Co, 11 Co, // Other, Private Use
12 Cs, 12 Cs, // Other, Surrogate
13 Ll, 13 Ll, // Letter, Lowercase
14 Lm, 14 Lm, // Letter, Modifier
15 Lo, 15 Lo, // Letter, Other
16 Lt, 16 Lu, // Letter, Uppercase
17 Lu, 17 Lt, // Letter, Titlecase
18 Mc, 18 Mc, // Mark, Spacing Combining
19 Me, 19 Me, // Mark, Enclosing
20 Mn, 20 Mn, // Mark, Non-Spacing
21 Nd, 21 Nd, // Number, Decimal Digit
22 Nl, 22 Nl, // Number, Letter
23 No, 23 No, // Number, Other
24 Pc, 24 Pc, // Punctuation, Connector
25 Pd, 25 Pd, // Punctuation, Dash
26 Pe, 26 Pe, // Punctuation, Close
27 Pf, 27 Pf, // Punctuation, Final quote (may behave like Ps or Pe depending on usage)
28 Pi, 28 Pi, // Punctuation, Initial quote (may behave like Ps or Pe depending on usage)
29 Po, 29 Po, // Punctuation, Other
30 Ps, 30 Ps, // Punctuation, Open
31 Sc, 31 Sc, // Symbol, Currency
32 Sk, 32 Sk, // Symbol, Modifier
33 Sm, 33 Sm, // Symbol, Math
34 So, 34 So, // Symbol, Other
35 Zl, 35 Zl, // Separator, Line
36 Zp, 36 Zp, // Separator, Paragraph
37 Zs, 37 Zs, // Separator, Space
38}; 38};
39 39
40allocator: mem.Allocator, 40allocator: mem.Allocator,
@@ -81,3 +81,89 @@ pub fn deinit(self: *Self) void {
81pub inline fn gc(self: Self, cp: u21) Gc { 81pub inline fn gc(self: Self, cp: u21) Gc {
82 return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]); 82 return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]);
83} 83}
84
85/// True if `cp` has an C general category.
86pub fn isControl(self: Self, cp: u21) bool {
87 return switch (self.gc(cp)) {
88 .Cc,
89 .Cf,
90 .Cn,
91 .Co,
92 .Cs,
93 => true,
94 else => false,
95 };
96}
97
98/// True if `cp` has an L general category.
99pub fn isLetter(self: Self, cp: u21) bool {
100 return switch (self.gc(cp)) {
101 .Ll,
102 .Lm,
103 .Lo,
104 .Lu,
105 .Lt,
106 => true,
107 else => false,
108 };
109}
110
111/// True if `cp` has an M general category.
112pub fn isMark(self: Self, cp: u21) bool {
113 return switch (self.gc(cp)) {
114 .Mc,
115 .Me,
116 .Mn,
117 => true,
118 else => false,
119 };
120}
121
122/// True if `cp` has an N general category.
123pub fn isNumber(self: Self, cp: u21) bool {
124 return switch (self.gc(cp)) {
125 .Nd,
126 .Nl,
127 .No,
128 => true,
129 else => false,
130 };
131}
132
133/// True if `cp` has an P general category.
134pub fn isPunctuation(self: Self, cp: u21) bool {
135 return switch (self.gc(cp)) {
136 .Pc,
137 .Pd,
138 .Pe,
139 .Pf,
140 .Pi,
141 .Po,
142 .Ps,
143 => true,
144 else => false,
145 };
146}
147
148/// True if `cp` has an S general category.
149pub fn isSymbol(self: Self, cp: u21) bool {
150 return switch (self.gc(cp)) {
151 .Sc,
152 .Sk,
153 .Sm,
154 .So,
155 => true,
156 else => false,
157 };
158}
159
160/// True if `cp` has an Z general category.
161pub fn isSeparator(self: Self, cp: u21) bool {
162 return switch (self.gc(cp)) {
163 .Zl,
164 .Zp,
165 .Zs,
166 => true,
167 else => false,
168 };
169}