summaryrefslogtreecommitdiff
path: root/src/CanonData.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/CanonData.zig')
-rw-r--r--src/CanonData.zig57
1 files changed, 17 insertions, 40 deletions
diff --git a/src/CanonData.zig b/src/CanonData.zig
index c972534..5c1ffa6 100644
--- a/src/CanonData.zig
+++ b/src/CanonData.zig
@@ -1,13 +1,23 @@
1//! Canonicalization Data 1//! Canonicalization Data
2 2
3s1: []const u16 = undefined, 3const Data = struct {
4s2: []const @import("canon").Canonicalization = undefined, 4 s1: []const u16 = undefined,
5nfc: std.AutoHashMapUnmanaged([2]u21, u21), 5 s2: []const @import("canon").Canonicalization = undefined,
6};
7
8const canon_data = canon_data: {
9 const canon_ = @import("canon");
10 break :canon_data Data{
11 .s1 = &canon_.s1,
12 .s2 = &canon_.s2,
13 };
14};
6 15
7const CanonData = @This(); 16const CanonData = @This();
8 17
9// There's a bug here, which is down to how static u21 vs. runtime are handled, 18// There's a bug here, which is down to how static u21 vs. runtime are handled,
10// the "unique representation" claim is not working out. So we do this: 19// the "unique representation" claim is not working out. AutoHash casts to bytes,
20// and that won't fly. So we do this:
11 21
12const Context = struct { 22const Context = struct {
13 pub fn hash(_: Context, cps: [2]u21) u64 { 23 pub fn hash(_: Context, cps: [2]u21) u64 {
@@ -22,47 +32,14 @@ const Context = struct {
22 32
23const c_map = comptime_map.ComptimeHashMap([2]u21, u21, Context, @import("canon").c_map); 33const c_map = comptime_map.ComptimeHashMap([2]u21, u21, Context, @import("canon").c_map);
24 34
25pub fn init(allocator: mem.Allocator) !CanonData {
26 var cdata = CanonData{
27 .nfc = .empty,
28 };
29 errdefer cdata.deinit(allocator);
30
31 const data = @import("canon");
32 cdata.s1 = &data.s1;
33 cdata.s2 = &data.s2;
34 var count: usize = 0;
35 for (data.composite) |cp| {
36 count += 1;
37 const cps = cdata.toNfd(cp);
38 std.debug.assert(cps.len == 2);
39 try cdata.nfc.put(allocator, cps[0..2].*, cp);
40 }
41
42 // var keys = cdata.nfc.keyIterator();
43 // while (keys.next()) |key| {
44 // const c32: [2]u32 = .{ key[0], key[1] };
45 // if (c_map.get(c32)) |_| {
46 // std.debug.print("got", .{});
47 // }
48 // }
49
50 return cdata;
51}
52
53pub fn deinit(cdata: *CanonData, allocator: mem.Allocator) void {
54 cdata.nfc.deinit(allocator);
55}
56
57/// Returns canonical decomposition for `cp`. 35/// Returns canonical decomposition for `cp`.
58pub fn toNfd(cdata: *const CanonData, cp: u21) []const u21 { 36pub fn toNfd(cp: u21) []const u21 {
59 const canon = &cdata.s2[cdata.s1[cp >> 8] + (cp & 0xff)]; 37 const canon = &canon_data.s2[canon_data.s1[cp >> 8] + (cp & 0xff)];
60 return canon.cps[0..canon.len]; 38 return canon.cps[0..canon.len];
61} 39}
62 40
63// Returns the primary composite for the codepoints in `cp`. 41// Returns the primary composite for the codepoints in `cp`.
64pub fn toNfc(cdata: *const CanonData, cps: [2]u21) ?u21 { 42pub fn toNfc(cps: [2]u21) ?u21 {
65 _ = cdata;
66 if (c_map.get(cps)) |cpp| { 43 if (c_map.get(cps)) |cpp| {
67 return cpp.*; 44 return cpp.*;
68 } else { 45 } else {