1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
|
//! Canonicalization Data
const Data = struct {
s1: []const u16 = undefined,
s2: []const @import("canon").Canonicalization = undefined,
};
// Canonicalization looks like this:
// const Canonicalization = struct {
// len: u3 = 0,
// cps: [2]u21 = [_]u21{0} ** 2,
// };
const canon_data = canon_data: {
const canon_ = @import("canon");
break :canon_data Data{
.s1 = &canon_.s1,
.s2 = &canon_.s2,
};
};
const CanonData = @This();
// There's a bug here, which is down to how static u21 vs. runtime are handled,
// the "unique representation" claim is not working out. AutoHash casts to bytes,
// and that won't fly. So we do a simple custom context which works for both.
const Context = struct {
pub fn hash(_: Context, cps: [2]u21) u64 {
const cp_44: u64 = (@as(u64, cps[0]) << 22) + cps[1];
return std.hash.int(cp_44);
}
pub fn eql(_: Context, cps1: [2]u21, cps2: [2]u21) bool {
return cps1[0] == cps2[0] and cps1[1] == cps2[1];
}
};
const c_map = comptime_map.ComptimeHashMap([2]u21, u21, Context, @import("canon").c_map);
/// Returns canonical decomposition for `cp`.
pub fn toNfd(cp: u21) []const u21 {
const canon = &canon_data.s2[canon_data.s1[cp >> 8] + (cp & 0xff)];
return canon.cps[0..canon.len];
}
// Returns the primary composite for the codepoints in `cp`.
pub fn toNfc(cps: [2]u21) ?u21 {
if (c_map.get(cps)) |cpp| {
return cpp.*;
} else {
return null;
}
unreachable;
}
const std = @import("std");
const builtin = @import("builtin");
const mem = std.mem;
const comptime_map = @import("comptime_map.zig");
test {
_ = comptime_map;
}
|