summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-03-25 20:05:36 -0400
committerGravatar Jose Colon Rodriguez2024-03-25 20:05:36 -0400
commit961d2ec2f9075f30e9264ed2db6c394cfa5967f9 (patch)
tree4c6988e2de888c17507566c9d90d1e755ec4346f /src
parentNumericData (diff)
downloadzg-961d2ec2f9075f30e9264ed2db6c394cfa5967f9.tar.gz
zg-961d2ec2f9075f30e9264ed2db6c394cfa5967f9.tar.xz
zg-961d2ec2f9075f30e9264ed2db6c394cfa5967f9.zip
CaseData
Diffstat (limited to 'src')
-rw-r--r--src/CaseData.zig223
1 files changed, 223 insertions, 0 deletions
diff --git a/src/CaseData.zig b/src/CaseData.zig
new file mode 100644
index 0000000..38830e3
--- /dev/null
+++ b/src/CaseData.zig
@@ -0,0 +1,223 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5const testing = std.testing;
6const unicode = std.unicode;
7
8const CodePointIterator = @import("code_point").Iterator;
9
10allocator: mem.Allocator,
11case_map: [][3]u21,
12prop_s1: []u16 = undefined,
13prop_s2: []u8 = undefined,
14
15const Self = @This();
16
17pub fn init(allocator: mem.Allocator) !Self {
18 const decompressor = compress.deflate.decompressor;
19 const endian = builtin.cpu.arch.endian();
20
21 var self = Self{
22 .allocator = allocator,
23 .case_map = try allocator.alloc([3]u21, 0x110000),
24 };
25 errdefer allocator.free(self.case_map);
26
27 for (0..0x110000) |i| {
28 const cp: u21 = @intCast(i);
29 self.case_map[cp] = .{ cp, cp, cp };
30 }
31
32 // Uppercase
33 const upper_bytes = @embedFile("upper");
34 var upper_fbs = std.io.fixedBufferStream(upper_bytes);
35 var upper_decomp = try decompressor(allocator, upper_fbs.reader(), null);
36 defer upper_decomp.deinit();
37 var upper_reader = upper_decomp.reader();
38
39 while (true) {
40 const cp = try upper_reader.readInt(u24, endian);
41 if (cp == 0) break;
42 self.case_map[cp][0] = @intCast(try upper_reader.readInt(u24, endian));
43 }
44
45 // Lowercase
46 const lower_bytes = @embedFile("lower");
47 var lower_fbs = std.io.fixedBufferStream(lower_bytes);
48 var lower_decomp = try decompressor(allocator, lower_fbs.reader(), null);
49 defer lower_decomp.deinit();
50 var lower_reader = lower_decomp.reader();
51
52 while (true) {
53 const cp = try lower_reader.readInt(u24, endian);
54 if (cp == 0) break;
55 self.case_map[cp][1] = @intCast(try lower_reader.readInt(u24, endian));
56 }
57
58 // Titlercase
59 const title_bytes = @embedFile("title");
60 var title_fbs = std.io.fixedBufferStream(title_bytes);
61 var title_decomp = try decompressor(allocator, title_fbs.reader(), null);
62 defer title_decomp.deinit();
63 var title_reader = title_decomp.reader();
64
65 while (true) {
66 const cp = try title_reader.readInt(u24, endian);
67 if (cp == 0) break;
68 self.case_map[cp][2] = @intCast(try title_reader.readInt(u24, endian));
69 }
70
71 // Case properties
72 const cp_bytes = @embedFile("case_prop");
73 var cp_fbs = std.io.fixedBufferStream(cp_bytes);
74 var cp_decomp = try decompressor(allocator, cp_fbs.reader(), null);
75 defer cp_decomp.deinit();
76 var cp_reader = cp_decomp.reader();
77
78 const stage_1_len: u16 = try cp_reader.readInt(u16, endian);
79 self.prop_s1 = try allocator.alloc(u16, stage_1_len);
80 errdefer allocator.free(self.prop_s1);
81 for (0..stage_1_len) |i| self.prop_s1[i] = try cp_reader.readInt(u16, endian);
82
83 const stage_2_len: u16 = try cp_reader.readInt(u16, endian);
84 self.prop_s2 = try allocator.alloc(u8, stage_2_len);
85 errdefer allocator.free(self.prop_s2);
86 _ = try cp_reader.readAll(self.prop_s2);
87
88 return self;
89}
90
91pub fn deinit(self: *Self) void {
92 self.allocator.free(self.case_map);
93 self.allocator.free(self.prop_s1);
94 self.allocator.free(self.prop_s2);
95}
96
97// Returns true if `cp` is either upper, lower, or title case.
98pub inline fn isCased(self: Self, cp: u21) bool {
99 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
100}
101
102// Returns true if `cp` is uppercase.
103pub fn isUpper(self: Self, cp: u21) bool {
104 if (!self.isCased(cp)) return true;
105 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
106}
107
108/// Returns true if `str` is all uppercase.
109pub fn isUpperStr(self: Self, str: []const u8) bool {
110 var iter = CodePointIterator{ .bytes = str };
111
112 return while (iter.next()) |cp| {
113 if (!self.isUpper(cp.code)) break false;
114 } else true;
115}
116
117test "isUpperStr" {
118 var cd = try init(testing.allocator);
119 defer cd.deinit();
120
121 try testing.expect(cd.isUpperStr("HELLO, WORLD 2112!"));
122 try testing.expect(!cd.isUpperStr("hello, world 2112!"));
123 try testing.expect(!cd.isUpperStr("Hello, World 2112!"));
124}
125
126/// Returns a new string with all letters in uppercase.
127/// Caller must free returned bytes with `allocator`.
128pub fn toUpperStr(
129 self: Self,
130 allocator: mem.Allocator,
131 str: []const u8,
132) ![]u8 {
133 var bytes = std.ArrayList(u8).init(allocator);
134 defer bytes.deinit();
135
136 var iter = CodePointIterator{ .bytes = str };
137 var buf: [4]u8 = undefined;
138
139 while (iter.next()) |cp| {
140 const len = try unicode.utf8Encode(self.toUpper(cp.code), &buf);
141 try bytes.appendSlice(buf[0..len]);
142 }
143
144 return try bytes.toOwnedSlice();
145}
146
147test "toUpperStr" {
148 var cd = try init(testing.allocator);
149 defer cd.deinit();
150
151 const uppered = try cd.toUpperStr(testing.allocator, "Hello, World 2112!");
152 defer testing.allocator.free(uppered);
153 try testing.expectEqualStrings("HELLO, WORLD 2112!", uppered);
154}
155
156/// Returns uppercase mapping for `cp`.
157pub inline fn toUpper(self: Self, cp: u21) u21 {
158 return self.case_map[cp][0];
159}
160
161// Returns true if `cp` is lowercase.
162pub fn isLower(self: Self, cp: u21) bool {
163 if (!self.isCased(cp)) return true;
164 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
165}
166
167/// Returns lowercase mapping for `cp`.
168pub inline fn toLower(self: Self, cp: u21) u21 {
169 return self.case_map[cp][1];
170}
171
172/// Returns true if `str` is all lowercase.
173pub fn isLowerStr(self: Self, str: []const u8) bool {
174 var iter = CodePointIterator{ .bytes = str };
175
176 return while (iter.next()) |cp| {
177 if (!self.isLower(cp.code)) break false;
178 } else true;
179}
180
181test "isLowerStr" {
182 var cd = try init(testing.allocator);
183 defer cd.deinit();
184
185 try testing.expect(cd.isLowerStr("hello, world 2112!"));
186 try testing.expect(!cd.isLowerStr("HELLO, WORLD 2112!"));
187 try testing.expect(!cd.isLowerStr("Hello, World 2112!"));
188}
189
190/// Returns a new string with all letters in lowercase.
191/// Caller must free returned bytes with `allocator`.
192pub fn toLowerStr(
193 self: Self,
194 allocator: mem.Allocator,
195 str: []const u8,
196) ![]u8 {
197 var bytes = std.ArrayList(u8).init(allocator);
198 defer bytes.deinit();
199
200 var iter = CodePointIterator{ .bytes = str };
201 var buf: [4]u8 = undefined;
202
203 while (iter.next()) |cp| {
204 const len = try unicode.utf8Encode(self.toLower(cp.code), &buf);
205 try bytes.appendSlice(buf[0..len]);
206 }
207
208 return try bytes.toOwnedSlice();
209}
210
211test "toLowerStr" {
212 var cd = try init(testing.allocator);
213 defer cd.deinit();
214
215 const lowered = try cd.toLowerStr(testing.allocator, "Hello, World 2112!");
216 defer testing.allocator.free(lowered);
217 try testing.expectEqualStrings("hello, world 2112!", lowered);
218}
219
220/// Returns titlecase mapping for `cp`.
221pub inline fn toTitle(self: Self, cp: u21) u21 {
222 return self.case_map[cp][2];
223}