diff options
| author | 2024-06-26 12:08:08 -0400 | |
|---|---|---|
| committer | 2024-06-26 12:08:08 -0400 | |
| commit | 8ada7b4176d2c8afb7ecd01c4ac1aaa0f3b53cc0 (patch) | |
| tree | 4c73852d462aea2a800964ab2345b9d2f3d95607 | |
| parent | Merge pull request 'Normalize: Mark utf8Encode errors as unreachable, use exp... (diff) | |
| download | zg-8ada7b4176d2c8afb7ecd01c4ac1aaa0f3b53cc0.tar.gz zg-8ada7b4176d2c8afb7ecd01c4ac1aaa0f3b53cc0.tar.xz zg-8ada7b4176d2c8afb7ecd01c4ac1aaa0f3b53cc0.zip | |
Implemented sqeek502s case fold
| -rw-r--r-- | codegen/fold.zig | 289 | ||||
| -rw-r--r-- | src/CaseFold.zig | 3 | ||||
| -rw-r--r-- | src/FoldData.zig | 86 |
3 files changed, 245 insertions, 133 deletions
diff --git a/codegen/fold.zig b/codegen/fold.zig index 6dc51ac..ec024c5 100644 --- a/codegen/fold.zig +++ b/codegen/fold.zig | |||
| @@ -1,123 +1,218 @@ | |||
| 1 | const std = @import("std"); | 1 | const std = @import("std"); |
| 2 | const builtin = @import("builtin"); | 2 | const builtin = @import("builtin"); |
| 3 | const fmt = std.fmt; | 3 | |
| 4 | const mem = std.mem; | 4 | // From https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt |
| 5 | // const case_folding_txt = @embedFile("CaseFolding.txt"); | ||
| 5 | 6 | ||
| 6 | pub fn main() !void { | 7 | pub fn main() !void { |
| 7 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); | 8 | var gpa = std.heap.GeneralPurposeAllocator(.{}){}; |
| 8 | defer arena.deinit(); | 9 | defer std.debug.assert(gpa.deinit() == .ok); |
| 9 | const allocator = arena.allocator(); | 10 | const allocator = gpa.allocator(); |
| 11 | |||
| 12 | // const unbuf_stdout = std.io.getStdOut().writer(); | ||
| 13 | // var buf_stdout = std.io.bufferedWriter(unbuf_stdout); | ||
| 14 | // const writer = buf_stdout.writer(); | ||
| 10 | 15 | ||
| 11 | // Process DerivedCoreProperties.txt | 16 | var codepoint_mapping = std.AutoArrayHashMap(u21, [3]u21).init(allocator); |
| 12 | var cp_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{}); | 17 | defer codepoint_mapping.deinit(); |
| 18 | |||
| 19 | // Process | ||
| 20 | var cp_file = try std.fs.cwd().openFile("data/unicode/CaseFolding.txt", .{}); | ||
| 13 | defer cp_file.close(); | 21 | defer cp_file.close(); |
| 14 | var cp_buf = std.io.bufferedReader(cp_file.reader()); | 22 | var cp_buf = std.io.bufferedReader(cp_file.reader()); |
| 15 | const cp_reader = cp_buf.reader(); | 23 | const cp_reader = cp_buf.reader(); |
| 16 | 24 | ||
| 17 | var cp_map = std.AutoHashMap(u21, void).init(allocator); | 25 | // var line_it = std.mem.tokenizeAny(u8, case_folding_txt, "\r\n"); |
| 18 | defer cp_map.deinit(); | ||
| 19 | |||
| 20 | var line_buf: [4096]u8 = undefined; | 26 | var line_buf: [4096]u8 = undefined; |
| 21 | 27 | ||
| 22 | cp_lines: while (try cp_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { | 28 | while (try cp_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { |
| 29 | // while (line_it.next()) |line| { | ||
| 23 | if (line.len == 0 or line[0] == '#') continue; | 30 | if (line.len == 0 or line[0] == '#') continue; |
| 24 | 31 | ||
| 25 | const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; | 32 | var field_it = std.mem.splitScalar(u8, line, ';'); |
| 26 | 33 | const codepoint_str = field_it.first(); | |
| 27 | var field_iter = std.mem.tokenizeAny(u8, no_comment, "; "); | 34 | const codepoint = try std.fmt.parseUnsigned(u21, codepoint_str, 16); |
| 28 | var current_code: [2]u21 = undefined; | 35 | |
| 29 | 36 | const status = std.mem.trim(u8, field_it.next() orelse continue, " "); | |
| 30 | var i: usize = 0; | 37 | // Only interested in 'common' and 'full' |
| 31 | while (field_iter.next()) |field| : (i += 1) { | 38 | if (status[0] != 'C' and status[0] != 'F') continue; |
| 32 | switch (i) { | 39 | |
| 33 | 0 => { | 40 | const mapping = std.mem.trim(u8, field_it.next() orelse continue, " "); |
| 34 | // Code point(s) | 41 | var mapping_it = std.mem.splitScalar(u8, mapping, ' '); |
| 35 | if (std.mem.indexOf(u8, field, "..")) |dots| { | 42 | var mapping_buf = [_]u21{0} ** 3; |
| 36 | current_code = .{ | 43 | var mapping_i: u8 = 0; |
| 37 | try std.fmt.parseInt(u21, field[0..dots], 16), | 44 | while (mapping_it.next()) |mapping_c| { |
| 38 | try std.fmt.parseInt(u21, field[dots + 2 ..], 16), | 45 | mapping_buf[mapping_i] = try std.fmt.parseInt(u21, mapping_c, 16); |
| 39 | }; | 46 | mapping_i += 1; |
| 40 | } else { | ||
| 41 | const code = try std.fmt.parseInt(u21, field, 16); | ||
| 42 | current_code = .{ code, code }; | ||
| 43 | } | ||
| 44 | }, | ||
| 45 | 1 => { | ||
| 46 | // Core property | ||
| 47 | if (!mem.eql(u8, field, "Changes_When_Casefolded")) continue :cp_lines; | ||
| 48 | for (current_code[0]..current_code[1] + 1) |cp| try cp_map.put(@intCast(cp), {}); | ||
| 49 | }, | ||
| 50 | else => {}, | ||
| 51 | } | ||
| 52 | } | 47 | } |
| 53 | } | ||
| 54 | 48 | ||
| 55 | // Process CaseFolding.txt | 49 | try codepoint_mapping.putNoClobber(codepoint, mapping_buf); |
| 56 | var in_file = try std.fs.cwd().openFile("data/unicode/CaseFolding.txt", .{}); | 50 | } |
| 57 | defer in_file.close(); | ||
| 58 | var in_buf = std.io.bufferedReader(in_file.reader()); | ||
| 59 | const in_reader = in_buf.reader(); | ||
| 60 | 51 | ||
| 61 | var args_iter = try std.process.argsWithAllocator(allocator); | 52 | var offset_to_index = std.AutoHashMap(i32, u8).init(allocator); |
| 62 | defer args_iter.deinit(); | 53 | defer offset_to_index.deinit(); |
| 63 | _ = args_iter.skip(); | 54 | var unique_offsets = std.AutoArrayHashMap(i32, u32).init(allocator); |
| 64 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | 55 | defer unique_offsets.deinit(); |
| 56 | |||
| 57 | // First pass | ||
| 58 | { | ||
| 59 | var it = codepoint_mapping.iterator(); | ||
| 60 | while (it.next()) |entry| { | ||
| 61 | const codepoint = entry.key_ptr.*; | ||
| 62 | const mappings = std.mem.sliceTo(entry.value_ptr, 0); | ||
| 63 | if (mappings.len == 1) { | ||
| 64 | const offset: i32 = @as(i32, mappings[0]) - @as(i32, codepoint); | ||
| 65 | const result = try unique_offsets.getOrPut(offset); | ||
| 66 | if (!result.found_existing) result.value_ptr.* = 0; | ||
| 67 | result.value_ptr.* += 1; | ||
| 68 | } | ||
| 69 | } | ||
| 65 | 70 | ||
| 66 | const compressor = std.compress.flate.deflate.compressor; | 71 | // A codepoint mapping to itself (offset=0) is the most common case |
| 67 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | 72 | try unique_offsets.put(0, 0x10FFFF); |
| 68 | defer out_file.close(); | 73 | const C = struct { |
| 69 | var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); | 74 | vals: []u32, |
| 70 | const writer = out_comp.writer(); | ||
| 71 | 75 | ||
| 72 | const endian = builtin.cpu.arch.endian(); | 76 | pub fn lessThan(ctx: @This(), a_index: usize, b_index: usize) bool { |
| 77 | return ctx.vals[a_index] > ctx.vals[b_index]; | ||
| 78 | } | ||
| 79 | }; | ||
| 80 | unique_offsets.sort(C{ .vals = unique_offsets.values() }); | ||
| 81 | |||
| 82 | var offset_it = unique_offsets.iterator(); | ||
| 83 | var offset_index: u7 = 0; | ||
| 84 | while (offset_it.next()) |entry| { | ||
| 85 | try offset_to_index.put(entry.key_ptr.*, offset_index); | ||
| 86 | offset_index += 1; | ||
| 87 | } | ||
| 88 | } | ||
| 73 | 89 | ||
| 74 | lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { | 90 | var mappings_to_index = std.AutoArrayHashMap([3]u21, u8).init(allocator); |
| 75 | if (line.len == 0 or line[0] == '#') continue; | 91 | defer mappings_to_index.deinit(); |
| 92 | var codepoint_to_index = std.AutoHashMap(u21, u8).init(allocator); | ||
| 93 | defer codepoint_to_index.deinit(); | ||
| 94 | |||
| 95 | // Second pass | ||
| 96 | { | ||
| 97 | var count_multiple_codepoints: u8 = 0; | ||
| 98 | |||
| 99 | var it = codepoint_mapping.iterator(); | ||
| 100 | while (it.next()) |entry| { | ||
| 101 | const codepoint = entry.key_ptr.*; | ||
| 102 | const mappings = std.mem.sliceTo(entry.value_ptr, 0); | ||
| 103 | if (mappings.len > 1) { | ||
| 104 | const result = try mappings_to_index.getOrPut(entry.value_ptr.*); | ||
| 105 | if (!result.found_existing) { | ||
| 106 | result.value_ptr.* = 0x80 | count_multiple_codepoints; | ||
| 107 | count_multiple_codepoints += 1; | ||
| 108 | } | ||
| 109 | const index = result.value_ptr.*; | ||
| 110 | try codepoint_to_index.put(codepoint, index); | ||
| 111 | } else { | ||
| 112 | const offset: i32 = @as(i32, mappings[0]) - @as(i32, codepoint); | ||
| 113 | const index = offset_to_index.get(offset).?; | ||
| 114 | try codepoint_to_index.put(codepoint, index); | ||
| 115 | } | ||
| 116 | } | ||
| 117 | } | ||
| 76 | 118 | ||
| 77 | const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; | 119 | // Build the stage1/stage2/stage3 arrays and output them |
| 78 | 120 | { | |
| 79 | var field_iter = mem.tokenizeSequence(u8, no_comment, "; "); | 121 | const Block = [256]u8; |
| 80 | var cps: [4]u24 = undefined; | 122 | var stage2_blocks = std.AutoArrayHashMap(Block, void).init(allocator); |
| 81 | var len: usize = 2; | 123 | defer stage2_blocks.deinit(); |
| 82 | 124 | ||
| 83 | var i: usize = 0; | 125 | const empty_block: Block = [_]u8{0} ** 256; |
| 84 | while (field_iter.next()) |field| : (i += 1) { | 126 | try stage2_blocks.put(empty_block, {}); |
| 85 | switch (i) { | 127 | const stage1_len = (0x10FFFF / 256) + 1; |
| 86 | 0 => { | 128 | var stage1: [stage1_len]u8 = undefined; |
| 87 | var cp = try fmt.parseInt(u21, field, 16); | 129 | |
| 88 | cp <<= 1; | 130 | var codepoint: u21 = 0; |
| 89 | if (cp_map.contains(cp)) cp |= 1; | 131 | var block: Block = undefined; |
| 90 | cps[0] = cp; | 132 | while (codepoint <= 0x10FFFF) { |
| 91 | }, | 133 | const data_index = codepoint_to_index.get(codepoint) orelse 0; |
| 92 | 134 | block[codepoint % 256] = data_index; | |
| 93 | 1 => { | 135 | |
| 94 | if (!mem.eql(u8, field, "C") and !mem.eql(u8, field, "F")) continue :lines; | 136 | codepoint += 1; |
| 95 | if (mem.eql(u8, field, "F")) len = 3; | 137 | if (codepoint % 256 == 0) { |
| 96 | }, | 138 | const result = try stage2_blocks.getOrPut(block); |
| 97 | 139 | const index = result.index; | |
| 98 | 2 => { | 140 | stage1[(codepoint >> 8) - 1] = @intCast(index); |
| 99 | if (len == 3) { | ||
| 100 | // Full case fold | ||
| 101 | // std.debug.print("-->{s} {s}\n", .{ line, field }); | ||
| 102 | var cp_iter = mem.tokenizeScalar(u8, field, ' '); | ||
| 103 | len = 1; | ||
| 104 | while (cp_iter.next()) |cp_str| : (len += 1) { | ||
| 105 | cps[len] = try fmt.parseInt(u24, cp_str, 16); | ||
| 106 | } | ||
| 107 | } else { | ||
| 108 | // Common case fold | ||
| 109 | cps[1] = try fmt.parseInt(u24, field, 16); | ||
| 110 | } | ||
| 111 | }, | ||
| 112 | |||
| 113 | else => {}, | ||
| 114 | } | 141 | } |
| 115 | } | 142 | } |
| 116 | 143 | ||
| 117 | try writer.writeInt(u8, @intCast(len), endian); | 144 | const last_meaningful_block = std.mem.lastIndexOfNone(u8, &stage1, "\x00").?; |
| 118 | for (cps[0..len]) |cp| try writer.writeInt(u24, cp, endian); | 145 | const meaningful_stage1 = stage1[0 .. last_meaningful_block + 1]; |
| 146 | const codepoint_cutoff = (last_meaningful_block + 1) << 8; | ||
| 147 | const multiple_codepoint_start: usize = unique_offsets.count(); | ||
| 148 | |||
| 149 | var index: usize = 0; | ||
| 150 | const stage3_elems = unique_offsets.count() + mappings_to_index.count() * 3; | ||
| 151 | var stage3 = try allocator.alloc(i24, stage3_elems); | ||
| 152 | defer allocator.free(stage3); | ||
| 153 | for (unique_offsets.keys()) |key| { | ||
| 154 | stage3[index] = @intCast(key); | ||
| 155 | index += 1; | ||
| 156 | } | ||
| 157 | for (mappings_to_index.keys()) |key| { | ||
| 158 | stage3[index] = @intCast(key[0]); | ||
| 159 | stage3[index + 1] = @intCast(key[1]); | ||
| 160 | stage3[index + 2] = @intCast(key[2]); | ||
| 161 | index += 3; | ||
| 162 | } | ||
| 163 | |||
| 164 | const stage2_elems = stage2_blocks.count() * 256; | ||
| 165 | var stage2 = try allocator.alloc(u8, stage2_elems); | ||
| 166 | defer allocator.free(stage2); | ||
| 167 | for (stage2_blocks.keys(), 0..) |key, i| { | ||
| 168 | @memcpy(stage2[i * 256 ..][0..256], &key); | ||
| 169 | } | ||
| 170 | |||
| 171 | // try writer.print("const cutoff = 0x{X};\n", .{codepoint_cutoff}); | ||
| 172 | // try writeArray(writer, u8, "stage1", meaningful_stage1); | ||
| 173 | // try writeArray(writer, u8, "stage2", stage2); | ||
| 174 | // try writer.print("const multiple_start = {};\n", .{multiple_codepoint_start}); | ||
| 175 | // try writeArray(writer, i24, "stage3", stage3); | ||
| 176 | |||
| 177 | var args_iter = try std.process.argsWithAllocator(allocator); | ||
| 178 | defer args_iter.deinit(); | ||
| 179 | _ = args_iter.skip(); | ||
| 180 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | ||
| 181 | |||
| 182 | const compressor = std.compress.flate.deflate.compressor; | ||
| 183 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | ||
| 184 | defer out_file.close(); | ||
| 185 | var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); | ||
| 186 | const writer = out_comp.writer(); | ||
| 187 | |||
| 188 | const endian = builtin.cpu.arch.endian(); | ||
| 189 | |||
| 190 | try writer.writeInt(u24, @intCast(codepoint_cutoff), endian); | ||
| 191 | try writer.writeInt(u24, @intCast(multiple_codepoint_start), endian); | ||
| 192 | |||
| 193 | try writer.writeInt(u16, @intCast(meaningful_stage1.len), endian); | ||
| 194 | try writer.writeAll(meaningful_stage1); | ||
| 195 | |||
| 196 | try writer.writeInt(u16, @intCast(stage2.len), endian); | ||
| 197 | try writer.writeAll(stage2); | ||
| 198 | |||
| 199 | try writer.writeInt(u16, @intCast(stage3.len), endian); | ||
| 200 | for (stage3) |offset| try writer.writeInt(i24, offset, endian); | ||
| 201 | |||
| 202 | try out_comp.flush(); | ||
| 119 | } | 203 | } |
| 120 | 204 | ||
| 121 | try writer.writeInt(u16, 0, endian); | 205 | // try buf_stdout.flush(); |
| 122 | try out_comp.flush(); | ||
| 123 | } | 206 | } |
| 207 | |||
| 208 | // fn writeArray(writer: anytype, comptime T: type, name: []const u8, data: []const T) !void { | ||
| 209 | // try writer.print("const {s} = [{}]{s}{{", .{ name, data.len, @typeName(T) }); | ||
| 210 | // | ||
| 211 | // for (data, 0..) |v, i| { | ||
| 212 | // if (i % 32 == 0) try writer.writeAll("\n "); | ||
| 213 | // try writer.print("{},", .{v}); | ||
| 214 | // if (i != data.len - 1) try writer.writeByte(' '); | ||
| 215 | // } | ||
| 216 | // | ||
| 217 | // try writer.writeAll("\n};\n"); | ||
| 218 | // } | ||
diff --git a/src/CaseFold.zig b/src/CaseFold.zig index 3e7535e..19c9da8 100644 --- a/src/CaseFold.zig +++ b/src/CaseFold.zig | |||
| @@ -19,9 +19,10 @@ pub fn caseFold( | |||
| 19 | ) ![]const u21 { | 19 | ) ![]const u21 { |
| 20 | var cfcps = std.ArrayList(u21).init(allocator); | 20 | var cfcps = std.ArrayList(u21).init(allocator); |
| 21 | defer cfcps.deinit(); | 21 | defer cfcps.deinit(); |
| 22 | var buf: [3]u21 = undefined; | ||
| 22 | 23 | ||
| 23 | for (cps) |cp| { | 24 | for (cps) |cp| { |
| 24 | const cf = self.fold_data.caseFold(cp); | 25 | const cf = self.fold_data.caseFold(cp, &buf); |
| 25 | 26 | ||
| 26 | if (cf.len == 0) { | 27 | if (cf.len == 0) { |
| 27 | try cfcps.append(cp); | 28 | try cfcps.append(cp); |
diff --git a/src/FoldData.zig b/src/FoldData.zig index d4312b0..93613fe 100644 --- a/src/FoldData.zig +++ b/src/FoldData.zig | |||
| @@ -4,8 +4,11 @@ const compress = std.compress; | |||
| 4 | const mem = std.mem; | 4 | const mem = std.mem; |
| 5 | 5 | ||
| 6 | allocator: mem.Allocator, | 6 | allocator: mem.Allocator, |
| 7 | fold: [][]u21 = undefined, | 7 | cutoff: u21 = undefined, |
| 8 | cwcf: []bool = undefined, | 8 | multiple_start: u21 = undefined, |
| 9 | stage1: []u8 = undefined, | ||
| 10 | stage2: []u8 = undefined, | ||
| 11 | stage3: []i24 = undefined, | ||
| 9 | 12 | ||
| 10 | const Self = @This(); | 13 | const Self = @This(); |
| 11 | 14 | ||
| @@ -17,49 +20,62 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 17 | var reader = in_decomp.reader(); | 20 | var reader = in_decomp.reader(); |
| 18 | 21 | ||
| 19 | const endian = builtin.cpu.arch.endian(); | 22 | const endian = builtin.cpu.arch.endian(); |
| 20 | var self = Self{ | ||
| 21 | .allocator = allocator, | ||
| 22 | .fold = try allocator.alloc([]u21, 0x110000), | ||
| 23 | .cwcf = try allocator.alloc(bool, 0x110000), | ||
| 24 | }; | ||
| 25 | |||
| 26 | var slices: usize = 0; | ||
| 27 | errdefer { | ||
| 28 | for (self.fold[0..slices]) |slice| self.allocator.free(slice); | ||
| 29 | self.allocator.free(self.fold); | ||
| 30 | self.allocator.free(self.cwcf); | ||
| 31 | } | ||
| 32 | 23 | ||
| 33 | @memset(self.fold, &.{}); | 24 | var self = Self{ .allocator = allocator }; |
| 34 | @memset(self.cwcf, false); | 25 | self.cutoff = @intCast(try reader.readInt(u24, endian)); |
| 35 | 26 | self.multiple_start = @intCast(try reader.readInt(u24, endian)); | |
| 36 | while (true) { | 27 | |
| 37 | const len: u8 = try reader.readInt(u8, endian); | 28 | var len = try reader.readInt(u16, endian); |
| 38 | if (len == 0) break; | 29 | self.stage1 = try allocator.alloc(u8, len); |
| 39 | const cp = try reader.readInt(u24, endian); | 30 | errdefer allocator.free(self.stage1); |
| 40 | self.fold[cp >> 1] = try allocator.alloc(u21, len - 1); | 31 | for (0..len) |i| self.stage1[i] = try reader.readInt(u8, endian); |
| 41 | slices += 1; | 32 | |
| 42 | for (0..len - 1) |i| { | 33 | len = try reader.readInt(u16, endian); |
| 43 | self.fold[cp >> 1][i] = @intCast(try reader.readInt(u24, endian)); | 34 | self.stage2 = try allocator.alloc(u8, len); |
| 44 | } | 35 | errdefer allocator.free(self.stage2); |
| 45 | self.cwcf[cp >> 1] = cp & 1 == 1; | 36 | for (0..len) |i| self.stage2[i] = try reader.readInt(u8, endian); |
| 46 | } | 37 | |
| 38 | len = try reader.readInt(u16, endian); | ||
| 39 | self.stage3 = try allocator.alloc(i24, len); | ||
| 40 | errdefer allocator.free(self.stage3); | ||
| 41 | for (0..len) |i| self.stage3[i] = try reader.readInt(i24, endian); | ||
| 47 | 42 | ||
| 48 | return self; | 43 | return self; |
| 49 | } | 44 | } |
| 50 | 45 | ||
| 51 | pub fn deinit(self: *const Self) void { | 46 | pub fn deinit(self: *const Self) void { |
| 52 | for (self.fold) |slice| self.allocator.free(slice); | 47 | self.allocator.free(self.stage1); |
| 53 | self.allocator.free(self.fold); | 48 | self.allocator.free(self.stage2); |
| 54 | self.allocator.free(self.cwcf); | 49 | self.allocator.free(self.stage3); |
| 55 | } | 50 | } |
| 56 | 51 | ||
| 57 | /// Returns the case fold for `cp`. | 52 | /// Returns the case fold for `cp`. |
| 58 | pub inline fn caseFold(self: Self, cp: u21) []const u21 { | 53 | pub inline fn caseFold(self: Self, cp: u21, buf: []u21) []const u21 { |
| 59 | return self.fold[cp]; | 54 | if (cp >= self.cutoff) return &.{}; |
| 55 | |||
| 56 | const stage1_val = self.stage1[cp >> 8]; | ||
| 57 | if (stage1_val == 0) return &.{}; | ||
| 58 | |||
| 59 | const stage2_index = @as(usize, stage1_val) * 256 + (cp & 0xFF); | ||
| 60 | const stage3_index = self.stage2[stage2_index]; | ||
| 61 | |||
| 62 | if (stage3_index & 0x80 != 0) { | ||
| 63 | const real_index = @as(usize, self.multiple_start) + (stage3_index ^ 0x80) * 3; | ||
| 64 | const mapping = mem.sliceTo(self.stage3[real_index..][0..3], 0); | ||
| 65 | for (mapping, 0..) |c, i| buf[i] = @intCast(c); | ||
| 66 | |||
| 67 | return buf[0..mapping.len]; | ||
| 68 | } | ||
| 69 | |||
| 70 | const offset = self.stage3[stage3_index]; | ||
| 71 | if (offset == 0) return &.{}; | ||
| 72 | |||
| 73 | buf[0] = @intCast(@as(i32, cp) + offset); | ||
| 74 | |||
| 75 | return buf[0..1]; | ||
| 60 | } | 76 | } |
| 61 | 77 | ||
| 62 | /// Returns true when caseFold(NFD(`cp`)) != NFD(`cp`). | 78 | /// Returns true when caseFold(NFD(`cp`)) != NFD(`cp`). |
| 63 | pub inline fn changesWhenCaseFolded(self: Self, cp: u21) bool { | 79 | pub inline fn changesWhenCaseFolded(_: Self, _: u21) bool { |
| 64 | return self.cwcf[cp]; | 80 | return true; |
| 65 | } | 81 | } |