diff options
| -rw-r--r-- | codegen/gencat.zig | 60 | ||||
| -rw-r--r-- | src/GenCatData.zig | 146 |
2 files changed, 146 insertions, 60 deletions
diff --git a/codegen/gencat.zig b/codegen/gencat.zig index a25716b..a7713e6 100644 --- a/codegen/gencat.zig +++ b/codegen/gencat.zig | |||
| @@ -2,36 +2,36 @@ const std = @import("std"); | |||
| 2 | const builtin = @import("builtin"); | 2 | const builtin = @import("builtin"); |
| 3 | 3 | ||
| 4 | const Gc = enum { | 4 | const Gc = enum { |
| 5 | Cc, | 5 | Cc, // Other, Control |
| 6 | Cf, | 6 | Cf, // Other, Format |
| 7 | Cn, | 7 | Cn, // Other, Unassigned |
| 8 | Co, | 8 | Co, // Other, Private Use |
| 9 | Cs, | 9 | Cs, // Other, Surrogate |
| 10 | Ll, | 10 | Ll, // Letter, Lowercase |
| 11 | Lm, | 11 | Lm, // Letter, Modifier |
| 12 | Lo, | 12 | Lo, // Letter, Other |
| 13 | Lt, | 13 | Lu, // Letter, Uppercase |
| 14 | Lu, | 14 | Lt, // Letter, Titlecase |
| 15 | Mc, | 15 | Mc, // Mark, Spacing Combining |
| 16 | Me, | 16 | Me, // Mark, Enclosing |
| 17 | Mn, | 17 | Mn, // Mark, Non-Spacing |
| 18 | Nd, | 18 | Nd, // Number, Decimal Digit |
| 19 | Nl, | 19 | Nl, // Number, Letter |
| 20 | No, | 20 | No, // Number, Other |
| 21 | Pc, | 21 | Pc, // Punctuation, Connector |
| 22 | Pd, | 22 | Pd, // Punctuation, Dash |
| 23 | Pe, | 23 | Pe, // Punctuation, Close |
| 24 | Pf, | 24 | Pf, // Punctuation, Final quote (may behave like Ps or Pe depending on usage) |
| 25 | Pi, | 25 | Pi, // Punctuation, Initial quote (may behave like Ps or Pe depending on usage) |
| 26 | Po, | 26 | Po, // Punctuation, Other |
| 27 | Ps, | 27 | Ps, // Punctuation, Open |
| 28 | Sc, | 28 | Sc, // Symbol, Currency |
| 29 | Sk, | 29 | Sk, // Symbol, Modifier |
| 30 | Sm, | 30 | Sm, // Symbol, Math |
| 31 | So, | 31 | So, // Symbol, Other |
| 32 | Zl, | 32 | Zl, // Separator, Line |
| 33 | Zp, | 33 | Zp, // Separator, Paragraph |
| 34 | Zs, | 34 | Zs, // Separator, Space |
| 35 | }; | 35 | }; |
| 36 | 36 | ||
| 37 | const block_size = 256; | 37 | const block_size = 256; |
diff --git a/src/GenCatData.zig b/src/GenCatData.zig index 5496e4e..b45135b 100644 --- a/src/GenCatData.zig +++ b/src/GenCatData.zig | |||
| @@ -5,36 +5,36 @@ const mem = std.mem; | |||
| 5 | 5 | ||
| 6 | /// General Category | 6 | /// General Category |
| 7 | pub const Gc = enum { | 7 | pub const Gc = enum { |
| 8 | Cc, | 8 | Cc, // Other, Control |
| 9 | Cf, | 9 | Cf, // Other, Format |
| 10 | Cn, | 10 | Cn, // Other, Unassigned |
| 11 | Co, | 11 | Co, // Other, Private Use |
| 12 | Cs, | 12 | Cs, // Other, Surrogate |
| 13 | Ll, | 13 | Ll, // Letter, Lowercase |
| 14 | Lm, | 14 | Lm, // Letter, Modifier |
| 15 | Lo, | 15 | Lo, // Letter, Other |
| 16 | Lt, | 16 | Lu, // Letter, Uppercase |
| 17 | Lu, | 17 | Lt, // Letter, Titlecase |
| 18 | Mc, | 18 | Mc, // Mark, Spacing Combining |
| 19 | Me, | 19 | Me, // Mark, Enclosing |
| 20 | Mn, | 20 | Mn, // Mark, Non-Spacing |
| 21 | Nd, | 21 | Nd, // Number, Decimal Digit |
| 22 | Nl, | 22 | Nl, // Number, Letter |
| 23 | No, | 23 | No, // Number, Other |
| 24 | Pc, | 24 | Pc, // Punctuation, Connector |
| 25 | Pd, | 25 | Pd, // Punctuation, Dash |
| 26 | Pe, | 26 | Pe, // Punctuation, Close |
| 27 | Pf, | 27 | Pf, // Punctuation, Final quote (may behave like Ps or Pe depending on usage) |
| 28 | Pi, | 28 | Pi, // Punctuation, Initial quote (may behave like Ps or Pe depending on usage) |
| 29 | Po, | 29 | Po, // Punctuation, Other |
| 30 | Ps, | 30 | Ps, // Punctuation, Open |
| 31 | Sc, | 31 | Sc, // Symbol, Currency |
| 32 | Sk, | 32 | Sk, // Symbol, Modifier |
| 33 | Sm, | 33 | Sm, // Symbol, Math |
| 34 | So, | 34 | So, // Symbol, Other |
| 35 | Zl, | 35 | Zl, // Separator, Line |
| 36 | Zp, | 36 | Zp, // Separator, Paragraph |
| 37 | Zs, | 37 | Zs, // Separator, Space |
| 38 | }; | 38 | }; |
| 39 | 39 | ||
| 40 | allocator: mem.Allocator, | 40 | allocator: mem.Allocator, |
| @@ -81,3 +81,89 @@ pub fn deinit(self: *Self) void { | |||
| 81 | pub inline fn gc(self: Self, cp: u21) Gc { | 81 | pub inline fn gc(self: Self, cp: u21) Gc { |
| 82 | return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]); | 82 | return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]); |
| 83 | } | 83 | } |
| 84 | |||
| 85 | /// True if `cp` has an C general category. | ||
| 86 | pub fn isControl(self: Self, cp: u21) bool { | ||
| 87 | return switch (self.gc(cp)) { | ||
| 88 | .Cc, | ||
| 89 | .Cf, | ||
| 90 | .Cn, | ||
| 91 | .Co, | ||
| 92 | .Cs, | ||
| 93 | => true, | ||
| 94 | else => false, | ||
| 95 | }; | ||
| 96 | } | ||
| 97 | |||
| 98 | /// True if `cp` has an L general category. | ||
| 99 | pub fn isLetter(self: Self, cp: u21) bool { | ||
| 100 | return switch (self.gc(cp)) { | ||
| 101 | .Ll, | ||
| 102 | .Lm, | ||
| 103 | .Lo, | ||
| 104 | .Lu, | ||
| 105 | .Lt, | ||
| 106 | => true, | ||
| 107 | else => false, | ||
| 108 | }; | ||
| 109 | } | ||
| 110 | |||
| 111 | /// True if `cp` has an M general category. | ||
| 112 | pub fn isMark(self: Self, cp: u21) bool { | ||
| 113 | return switch (self.gc(cp)) { | ||
| 114 | .Mc, | ||
| 115 | .Me, | ||
| 116 | .Mn, | ||
| 117 | => true, | ||
| 118 | else => false, | ||
| 119 | }; | ||
| 120 | } | ||
| 121 | |||
| 122 | /// True if `cp` has an N general category. | ||
| 123 | pub fn isNumber(self: Self, cp: u21) bool { | ||
| 124 | return switch (self.gc(cp)) { | ||
| 125 | .Nd, | ||
| 126 | .Nl, | ||
| 127 | .No, | ||
| 128 | => true, | ||
| 129 | else => false, | ||
| 130 | }; | ||
| 131 | } | ||
| 132 | |||
| 133 | /// True if `cp` has an P general category. | ||
| 134 | pub fn isPunctuation(self: Self, cp: u21) bool { | ||
| 135 | return switch (self.gc(cp)) { | ||
| 136 | .Pc, | ||
| 137 | .Pd, | ||
| 138 | .Pe, | ||
| 139 | .Pf, | ||
| 140 | .Pi, | ||
| 141 | .Po, | ||
| 142 | .Ps, | ||
| 143 | => true, | ||
| 144 | else => false, | ||
| 145 | }; | ||
| 146 | } | ||
| 147 | |||
| 148 | /// True if `cp` has an S general category. | ||
| 149 | pub fn isSymbol(self: Self, cp: u21) bool { | ||
| 150 | return switch (self.gc(cp)) { | ||
| 151 | .Sc, | ||
| 152 | .Sk, | ||
| 153 | .Sm, | ||
| 154 | .So, | ||
| 155 | => true, | ||
| 156 | else => false, | ||
| 157 | }; | ||
| 158 | } | ||
| 159 | |||
| 160 | /// True if `cp` has an Z general category. | ||
| 161 | pub fn isSeparator(self: Self, cp: u21) bool { | ||
| 162 | return switch (self.gc(cp)) { | ||
| 163 | .Zl, | ||
| 164 | .Zp, | ||
| 165 | .Zs, | ||
| 166 | => true, | ||
| 167 | else => false, | ||
| 168 | }; | ||
| 169 | } | ||