1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
|
//! Canonicalization Data
s1: []const u16 = undefined,
s2: []const @import("canon").Canonicalization = undefined,
nfc: std.AutoHashMapUnmanaged([2]u21, u21),
const CanonData = @This();
// There's a bug here, which is down to how static u21 vs. runtime are handled,
// the "unique representation" claim is not working out. So we do this:
const Context = struct {
pub fn hash(_: Context, cps: [2]u21) u64 {
const cp_44: u64 = (@as(u64, cps[0]) << 22) + cps[1];
return std.hash.int(cp_44);
}
pub fn eql(_: Context, cps1: [2]u21, cps2: [2]u21) bool {
return cps1[0] == cps2[0] and cps1[1] == cps2[1];
}
};
const c_map = comptime_map.ComptimeHashMap([2]u21, u21, Context, @import("canon").c_map);
pub fn init(allocator: mem.Allocator) !CanonData {
var cdata = CanonData{
.nfc = .empty,
};
errdefer cdata.deinit(allocator);
const data = @import("canon");
cdata.s1 = &data.s1;
cdata.s2 = &data.s2;
var count: usize = 0;
for (data.composite) |cp| {
count += 1;
const cps = cdata.toNfd(cp);
std.debug.assert(cps.len == 2);
try cdata.nfc.put(allocator, cps[0..2].*, cp);
}
// var keys = cdata.nfc.keyIterator();
// while (keys.next()) |key| {
// const c32: [2]u32 = .{ key[0], key[1] };
// if (c_map.get(c32)) |_| {
// std.debug.print("got", .{});
// }
// }
return cdata;
}
pub fn deinit(cdata: *CanonData, allocator: mem.Allocator) void {
cdata.nfc.deinit(allocator);
}
/// Returns canonical decomposition for `cp`.
pub fn toNfd(cdata: *const CanonData, cp: u21) []const u21 {
const canon = &cdata.s2[cdata.s1[cp >> 8] + (cp & 0xff)];
return canon.cps[0..canon.len];
}
// Returns the primary composite for the codepoints in `cp`.
pub fn toNfc(cdata: *const CanonData, cps: [2]u21) ?u21 {
_ = cdata;
if (c_map.get(cps)) |cpp| {
return cpp.*;
} else {
return null;
}
unreachable;
}
const std = @import("std");
const builtin = @import("builtin");
const mem = std.mem;
const comptime_map = @import("comptime_map.zig");
|