summaryrefslogtreecommitdiff
path: root/src/Canonical.zig
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-02-26 18:29:59 -0400
committerGravatar Jose Colon Rodriguez2024-02-26 18:29:59 -0400
commit5e9a06c217fbd09aa8cf95da139852560f3da7d0 (patch)
tree7e332579ca97656fc33521f4f00da2993c133e15 /src/Canonical.zig
parentUsing separate data struct model. (diff)
downloadzg-5e9a06c217fbd09aa8cf95da139852560f3da7d0.tar.gz
zg-5e9a06c217fbd09aa8cf95da139852560f3da7d0.tar.xz
zg-5e9a06c217fbd09aa8cf95da139852560f3da7d0.zip
Using NormData and CanonData in Normalizer
Diffstat (limited to 'src/Canonical.zig')
-rw-r--r--src/Canonical.zig45
1 files changed, 45 insertions, 0 deletions
diff --git a/src/Canonical.zig b/src/Canonical.zig
new file mode 100644
index 0000000..d54e828
--- /dev/null
+++ b/src/Canonical.zig
@@ -0,0 +1,45 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5
6allocator: mem.Allocator,
7nfd: [][2]u21 = undefined,
8
9const Self = @This();
10
11pub fn init(allocator: mem.Allocator) !Self {
12 const decompressor = compress.deflate.decompressor;
13 const in_bytes = @embedFile("canon");
14 var in_fbs = std.io.fixedBufferStream(in_bytes);
15 var in_decomp = try decompressor(allocator, in_fbs.reader(), null);
16 defer in_decomp.deinit();
17 var reader = in_decomp.reader();
18
19 const endian = builtin.cpu.arch.endian();
20 var self = Self{
21 .allocator = allocator,
22 .nfd = try allocator.alloc([2]u21, 0x110000),
23 };
24
25 for (0..0x110000) |i| self.nfd[i] = .{ @intCast(i), 0 };
26
27 while (true) {
28 const len: u8 = try reader.readInt(u8, endian);
29 if (len == 0) break;
30 const cp = try reader.readInt(u24, endian);
31 self.nfd[cp][0] = @intCast(try reader.readInt(u24, endian));
32 if (len == 3) self.nfd[cp][1] = @intCast(try reader.readInt(u24, endian));
33 }
34
35 return self;
36}
37
38pub fn deinit(self: *Self) void {
39 self.allocator.free(self.nfd);
40}
41
42/// Returns canonical decomposition for `cp`.
43pub inline fn toNfd(self: Self, cp: u21) [2]u21 {
44 return self.nfd[cp];
45}