diff options
Diffstat (limited to 'src/FoldData.zig')
| -rw-r--r-- | src/FoldData.zig | 99 |
1 files changed, 0 insertions, 99 deletions
diff --git a/src/FoldData.zig b/src/FoldData.zig deleted file mode 100644 index b7fdceb..0000000 --- a/src/FoldData.zig +++ /dev/null | |||
| @@ -1,99 +0,0 @@ | |||
| 1 | const std = @import("std"); | ||
| 2 | const builtin = @import("builtin"); | ||
| 3 | const compress = std.compress; | ||
| 4 | const mem = std.mem; | ||
| 5 | |||
| 6 | cutoff: u21 = undefined, | ||
| 7 | cwcf_exceptions_min: u21 = undefined, | ||
| 8 | cwcf_exceptions_max: u21 = undefined, | ||
| 9 | cwcf_exceptions: []u21 = undefined, | ||
| 10 | multiple_start: u21 = undefined, | ||
| 11 | stage1: []u8 = undefined, | ||
| 12 | stage2: []u8 = undefined, | ||
| 13 | stage3: []i24 = undefined, | ||
| 14 | |||
| 15 | const FoldData = @This(); | ||
| 16 | |||
| 17 | pub fn init(allocator: mem.Allocator) !FoldData { | ||
| 18 | const decompressor = compress.flate.inflate.decompressor; | ||
| 19 | const in_bytes = @embedFile("fold"); | ||
| 20 | var in_fbs = std.io.fixedBufferStream(in_bytes); | ||
| 21 | var in_decomp = decompressor(.raw, in_fbs.reader()); | ||
| 22 | var reader = in_decomp.reader(); | ||
| 23 | |||
| 24 | const endian = builtin.cpu.arch.endian(); | ||
| 25 | |||
| 26 | var fdata = FoldData{}; | ||
| 27 | fdata.cutoff = @intCast(try reader.readInt(u24, endian)); | ||
| 28 | fdata.multiple_start = @intCast(try reader.readInt(u24, endian)); | ||
| 29 | |||
| 30 | var len = try reader.readInt(u16, endian); | ||
| 31 | fdata.stage1 = try allocator.alloc(u8, len); | ||
| 32 | errdefer allocator.free(fdata.stage1); | ||
| 33 | for (0..len) |i| fdata.stage1[i] = try reader.readInt(u8, endian); | ||
| 34 | |||
| 35 | len = try reader.readInt(u16, endian); | ||
| 36 | fdata.stage2 = try allocator.alloc(u8, len); | ||
| 37 | errdefer allocator.free(fdata.stage2); | ||
| 38 | for (0..len) |i| fdata.stage2[i] = try reader.readInt(u8, endian); | ||
| 39 | |||
| 40 | len = try reader.readInt(u16, endian); | ||
| 41 | fdata.stage3 = try allocator.alloc(i24, len); | ||
| 42 | errdefer allocator.free(fdata.stage3); | ||
| 43 | for (0..len) |i| fdata.stage3[i] = try reader.readInt(i24, endian); | ||
| 44 | |||
| 45 | fdata.cwcf_exceptions_min = @intCast(try reader.readInt(u24, endian)); | ||
| 46 | fdata.cwcf_exceptions_max = @intCast(try reader.readInt(u24, endian)); | ||
| 47 | len = try reader.readInt(u16, endian); | ||
| 48 | fdata.cwcf_exceptions = try allocator.alloc(u21, len); | ||
| 49 | errdefer allocator.free(fdata.cwcf_exceptions); | ||
| 50 | for (0..len) |i| fdata.cwcf_exceptions[i] = @intCast(try reader.readInt(u24, endian)); | ||
| 51 | |||
| 52 | return fdata; | ||
| 53 | } | ||
| 54 | |||
| 55 | pub fn deinit(fdata: *const FoldData, allocator: mem.Allocator) void { | ||
| 56 | allocator.free(fdata.stage1); | ||
| 57 | allocator.free(fdata.stage2); | ||
| 58 | allocator.free(fdata.stage3); | ||
| 59 | allocator.free(fdata.cwcf_exceptions); | ||
| 60 | } | ||
| 61 | |||
| 62 | /// Returns the case fold for `cp`. | ||
| 63 | pub fn caseFold(fdata: *const FoldData, cp: u21, buf: []u21) []const u21 { | ||
| 64 | if (cp >= fdata.cutoff) return &.{}; | ||
| 65 | |||
| 66 | const stage1_val = fdata.stage1[cp >> 8]; | ||
| 67 | if (stage1_val == 0) return &.{}; | ||
| 68 | |||
| 69 | const stage2_index = @as(usize, stage1_val) * 256 + (cp & 0xFF); | ||
| 70 | const stage3_index = fdata.stage2[stage2_index]; | ||
| 71 | |||
| 72 | if (stage3_index & 0x80 != 0) { | ||
| 73 | const real_index = @as(usize, fdata.multiple_start) + (stage3_index ^ 0x80) * 3; | ||
| 74 | const mapping = mem.sliceTo(fdata.stage3[real_index..][0..3], 0); | ||
| 75 | for (mapping, 0..) |c, i| buf[i] = @intCast(c); | ||
| 76 | |||
| 77 | return buf[0..mapping.len]; | ||
| 78 | } | ||
| 79 | |||
| 80 | const offset = fdata.stage3[stage3_index]; | ||
| 81 | if (offset == 0) return &.{}; | ||
| 82 | |||
| 83 | buf[0] = @intCast(@as(i32, cp) + offset); | ||
| 84 | |||
| 85 | return buf[0..1]; | ||
| 86 | } | ||
| 87 | |||
| 88 | /// Returns true when caseFold(NFD(`cp`)) != NFD(`cp`). | ||
| 89 | pub fn changesWhenCaseFolded(fdata: *const FoldData, cp: u21) bool { | ||
| 90 | var buf: [3]u21 = undefined; | ||
| 91 | const has_mapping = fdata.caseFold(cp, &buf).len != 0; | ||
| 92 | return has_mapping and !fdata.isCwcfException(cp); | ||
| 93 | } | ||
| 94 | |||
| 95 | fn isCwcfException(fdata: *const FoldData, cp: u21) bool { | ||
| 96 | return cp >= fdata.cwcf_exceptions_min and | ||
| 97 | cp <= fdata.cwcf_exceptions_max and | ||
| 98 | std.mem.indexOfScalar(u21, fdata.cwcf_exceptions, cp) != null; | ||
| 99 | } | ||