diff options
| author | 2024-06-27 10:45:35 +0000 | |
|---|---|---|
| committer | 2024-06-27 10:45:35 +0000 | |
| commit | cb09bc96b788182f5796b92a0227d595b874af86 (patch) | |
| tree | 9c6a3a96ebd31d960fd4e5312f11ab0fb5470a74 /src | |
| parent | Implements new case fold data encoding by @sqeek502 #8 (diff) | |
| parent | FoldData: Minimize Changes_When_Casefolded data (diff) | |
| download | zg-cb09bc96b788182f5796b92a0227d595b874af86.tar.gz zg-cb09bc96b788182f5796b92a0227d595b874af86.tar.xz zg-cb09bc96b788182f5796b92a0227d595b874af86.zip | |
Merge pull request 'FoldData: Minimize Changes_When_Casefolded data' (#10) from squeek502/zg:folddata-min into master
Reviewed-on: https://codeberg.org/dude_the_builder/zg/pulls/10
Diffstat (limited to 'src')
| -rw-r--r-- | src/FoldData.zig | 21 |
1 files changed, 16 insertions, 5 deletions
diff --git a/src/FoldData.zig b/src/FoldData.zig index b7bbbd1..d425178 100644 --- a/src/FoldData.zig +++ b/src/FoldData.zig | |||
| @@ -3,11 +3,11 @@ const builtin = @import("builtin"); | |||
| 3 | const compress = std.compress; | 3 | const compress = std.compress; |
| 4 | const mem = std.mem; | 4 | const mem = std.mem; |
| 5 | 5 | ||
| 6 | const cwcf_max = 0x1e950; | ||
| 7 | |||
| 8 | allocator: mem.Allocator, | 6 | allocator: mem.Allocator, |
| 9 | cutoff: u21 = undefined, | 7 | cutoff: u21 = undefined, |
| 10 | cwcf: [cwcf_max]bool = [_]bool{false} ** cwcf_max, | 8 | cwcf_exceptions_min: u21 = undefined, |
| 9 | cwcf_exceptions_max: u21 = undefined, | ||
| 10 | cwcf_exceptions: []u21 = undefined, | ||
| 11 | multiple_start: u21 = undefined, | 11 | multiple_start: u21 = undefined, |
| 12 | stage1: []u8 = undefined, | 12 | stage1: []u8 = undefined, |
| 13 | stage2: []u8 = undefined, | 13 | stage2: []u8 = undefined, |
| @@ -43,8 +43,11 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 43 | errdefer allocator.free(self.stage3); | 43 | errdefer allocator.free(self.stage3); |
| 44 | for (0..len) |i| self.stage3[i] = try reader.readInt(i24, endian); | 44 | for (0..len) |i| self.stage3[i] = try reader.readInt(i24, endian); |
| 45 | 45 | ||
| 46 | self.cwcf_exceptions_min = @intCast(try reader.readInt(u24, endian)); | ||
| 47 | self.cwcf_exceptions_max = @intCast(try reader.readInt(u24, endian)); | ||
| 46 | len = try reader.readInt(u16, endian); | 48 | len = try reader.readInt(u16, endian); |
| 47 | for (0..len) |_| self.cwcf[try reader.readInt(u24, endian)] = true; | 49 | self.cwcf_exceptions = try allocator.alloc(u21, len); |
| 50 | for (0..len) |i| self.cwcf_exceptions[i] = @intCast(try reader.readInt(u24, endian)); | ||
| 48 | 51 | ||
| 49 | return self; | 52 | return self; |
| 50 | } | 53 | } |
| @@ -83,5 +86,13 @@ pub fn caseFold(self: Self, cp: u21, buf: []u21) []const u21 { | |||
| 83 | 86 | ||
| 84 | /// Returns true when caseFold(NFD(`cp`)) != NFD(`cp`). | 87 | /// Returns true when caseFold(NFD(`cp`)) != NFD(`cp`). |
| 85 | pub fn changesWhenCaseFolded(self: Self, cp: u21) bool { | 88 | pub fn changesWhenCaseFolded(self: Self, cp: u21) bool { |
| 86 | return cp < cwcf_max and self.cwcf[cp]; | 89 | var buf: [3]u21 = undefined; |
| 90 | const has_mapping = self.caseFold(cp, &buf).len != 0; | ||
| 91 | return has_mapping and !self.isCwcfException(cp); | ||
| 92 | } | ||
| 93 | |||
| 94 | fn isCwcfException(self: Self, cp: u21) bool { | ||
| 95 | return cp >= self.cwcf_exceptions_min and | ||
| 96 | cp <= self.cwcf_exceptions_max and | ||
| 97 | std.mem.indexOfScalar(u21, self.cwcf_exceptions, cp) != null; | ||
| 87 | } | 98 | } |