summaryrefslogtreecommitdiff
path: root/src/CanonData.zig
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-02-26 20:22:41 -0400
committerGravatar Jose Colon Rodriguez2024-02-26 20:22:41 -0400
commitad306724ae574b1a22abbcb6de37e65a69db82e4 (patch)
tree4b78d89006a2524b63c0ca7530e4c56a111d6eca /src/CanonData.zig
parentUsing NormData nfc and nfd (diff)
downloadzg-ad306724ae574b1a22abbcb6de37e65a69db82e4.tar.gz
zg-ad306724ae574b1a22abbcb6de37e65a69db82e4.tar.xz
zg-ad306724ae574b1a22abbcb6de37e65a69db82e4.zip
Using NormData nfkd
Diffstat (limited to 'src/CanonData.zig')
-rw-r--r--src/CanonData.zig56
1 files changed, 56 insertions, 0 deletions
diff --git a/src/CanonData.zig b/src/CanonData.zig
new file mode 100644
index 0000000..81d3eec
--- /dev/null
+++ b/src/CanonData.zig
@@ -0,0 +1,56 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5
6allocator: mem.Allocator,
7nfc: std.AutoHashMap([2]u21, u21),
8nfd: [][2]u21 = undefined,
9
10const Self = @This();
11
12pub fn init(allocator: mem.Allocator) !Self {
13 const decompressor = compress.deflate.decompressor;
14 const in_bytes = @embedFile("canon");
15 var in_fbs = std.io.fixedBufferStream(in_bytes);
16 var in_decomp = try decompressor(allocator, in_fbs.reader(), null);
17 defer in_decomp.deinit();
18 var reader = in_decomp.reader();
19
20 const endian = builtin.cpu.arch.endian();
21 var self = Self{
22 .allocator = allocator,
23 .nfc = std.AutoHashMap([2]u21, u21).init(allocator),
24 .nfd = try allocator.alloc([2]u21, 0x110000),
25 };
26
27 for (0..0x110000) |i| self.nfd[i] = .{ @intCast(i), 0 };
28
29 while (true) {
30 const len: u8 = try reader.readInt(u8, endian);
31 if (len == 0) break;
32 const cp = try reader.readInt(u24, endian);
33 self.nfd[cp][0] = @intCast(try reader.readInt(u24, endian));
34 if (len == 3) {
35 self.nfd[cp][1] = @intCast(try reader.readInt(u24, endian));
36 try self.nfc.put(self.nfd[cp], @intCast(cp));
37 }
38 }
39
40 return self;
41}
42
43pub fn deinit(self: *Self) void {
44 self.nfc.deinit();
45 self.allocator.free(self.nfd);
46}
47
48/// Returns canonical decomposition for `cp`.
49pub inline fn toNfd(self: Self, cp: u21) [2]u21 {
50 return self.nfd[cp];
51}
52
53// Returns the primary composite for the codepoints in `cp`.
54pub inline fn toNfc(self: Self, cps: [2]u21) ?u21 {
55 return self.nfc.get(cps);
56}