diff options
| author | 2025-04-30 12:58:26 -0400 | |
|---|---|---|
| committer | 2025-04-30 13:01:37 -0400 | |
| commit | 3c2c30bfbe861c6c48acd8d7507886787197a788 (patch) | |
| tree | 875ba35c1954b201207452b18a189ebd70c0b596 /src/CanonData.zig | |
| parent | grapheme now Graphemes, Data files gone (diff) | |
| download | zg-3c2c30bfbe861c6c48acd8d7507886787197a788.tar.gz zg-3c2c30bfbe861c6c48acd8d7507886787197a788.tar.xz zg-3c2c30bfbe861c6c48acd8d7507886787197a788.zip | |
Merge NormData with Normalize
Diffstat (limited to 'src/CanonData.zig')
| -rw-r--r-- | src/CanonData.zig | 50 |
1 files changed, 26 insertions, 24 deletions
diff --git a/src/CanonData.zig b/src/CanonData.zig index 794748c..c67d1d6 100644 --- a/src/CanonData.zig +++ b/src/CanonData.zig | |||
| @@ -1,14 +1,11 @@ | |||
| 1 | const std = @import("std"); | 1 | //! Canonicalization Data |
| 2 | const builtin = @import("builtin"); | ||
| 3 | const compress = std.compress; | ||
| 4 | const mem = std.mem; | ||
| 5 | 2 | ||
| 6 | nfc: std.AutoHashMapUnmanaged([2]u21, u21), | 3 | nfc: std.AutoHashMapUnmanaged([2]u21, u21), |
| 7 | nfd: [][]u21 = undefined, | 4 | nfd: [][]u21 = undefined, |
| 8 | 5 | ||
| 9 | const Self = @This(); | 6 | const CanonData = @This(); |
| 10 | 7 | ||
| 11 | pub fn init(allocator: mem.Allocator) !Self { | 8 | pub fn init(allocator: mem.Allocator) !CanonData { |
| 12 | const decompressor = compress.flate.inflate.decompressor; | 9 | const decompressor = compress.flate.inflate.decompressor; |
| 13 | const in_bytes = @embedFile("canon"); | 10 | const in_bytes = @embedFile("canon"); |
| 14 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 11 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| @@ -16,49 +13,54 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 16 | var reader = in_decomp.reader(); | 13 | var reader = in_decomp.reader(); |
| 17 | 14 | ||
| 18 | const endian = builtin.cpu.arch.endian(); | 15 | const endian = builtin.cpu.arch.endian(); |
| 19 | var self = Self{ | 16 | var cdata = CanonData{ |
| 20 | .nfc = .{}, | 17 | .nfc = .empty, |
| 21 | .nfd = try allocator.alloc([]u21, 0x110000), | 18 | .nfd = try allocator.alloc([]u21, 0x110000), |
| 22 | }; | 19 | }; |
| 23 | 20 | ||
| 24 | var slices: usize = 0; | 21 | var slices: usize = 0; |
| 25 | errdefer { | 22 | errdefer { |
| 26 | self.nfc.deinit(allocator); | 23 | cdata.nfc.deinit(allocator); |
| 27 | for (self.nfd[0..slices]) |slice| allocator.free(slice); | 24 | for (cdata.nfd[0..slices]) |slice| allocator.free(slice); |
| 28 | allocator.free(self.nfd); | 25 | allocator.free(cdata.nfd); |
| 29 | } | 26 | } |
| 30 | 27 | ||
| 31 | @memset(self.nfd, &.{}); | 28 | @memset(cdata.nfd, &.{}); |
| 32 | 29 | ||
| 33 | while (true) { | 30 | while (true) { |
| 34 | const len: u8 = try reader.readInt(u8, endian); | 31 | const len: u8 = try reader.readInt(u8, endian); |
| 35 | if (len == 0) break; | 32 | if (len == 0) break; |
| 36 | const cp = try reader.readInt(u24, endian); | 33 | const cp = try reader.readInt(u24, endian); |
| 37 | self.nfd[cp] = try allocator.alloc(u21, len - 1); | 34 | cdata.nfd[cp] = try allocator.alloc(u21, len - 1); |
| 38 | slices += 1; | 35 | slices += 1; |
| 39 | for (0..len - 1) |i| { | 36 | for (0..len - 1) |i| { |
| 40 | self.nfd[cp][i] = @intCast(try reader.readInt(u24, endian)); | 37 | cdata.nfd[cp][i] = @intCast(try reader.readInt(u24, endian)); |
| 41 | } | 38 | } |
| 42 | if (len == 3) { | 39 | if (len == 3) { |
| 43 | try self.nfc.put(allocator, self.nfd[cp][0..2].*, @intCast(cp)); | 40 | try cdata.nfc.put(allocator, cdata.nfd[cp][0..2].*, @intCast(cp)); |
| 44 | } | 41 | } |
| 45 | } | 42 | } |
| 46 | 43 | ||
| 47 | return self; | 44 | return cdata; |
| 48 | } | 45 | } |
| 49 | 46 | ||
| 50 | pub fn deinit(self: *Self, allocator: mem.Allocator) void { | 47 | pub fn deinit(cdata: *CanonData, allocator: mem.Allocator) void { |
| 51 | self.nfc.deinit(allocator); | 48 | cdata.nfc.deinit(allocator); |
| 52 | for (self.nfd) |slice| allocator.free(slice); | 49 | for (cdata.nfd) |slice| allocator.free(slice); |
| 53 | allocator.free(self.nfd); | 50 | allocator.free(cdata.nfd); |
| 54 | } | 51 | } |
| 55 | 52 | ||
| 56 | /// Returns canonical decomposition for `cp`. | 53 | /// Returns canonical decomposition for `cp`. |
| 57 | pub fn toNfd(self: Self, cp: u21) []const u21 { | 54 | pub fn toNfd(cdata: *const CanonData, cp: u21) []const u21 { |
| 58 | return self.nfd[cp]; | 55 | return cdata.nfd[cp]; |
| 59 | } | 56 | } |
| 60 | 57 | ||
| 61 | // Returns the primary composite for the codepoints in `cp`. | 58 | // Returns the primary composite for the codepoints in `cp`. |
| 62 | pub fn toNfc(self: Self, cps: [2]u21) ?u21 { | 59 | pub fn toNfc(cdata: *const CanonData, cps: [2]u21) ?u21 { |
| 63 | return self.nfc.get(cps); | 60 | return cdata.nfc.get(cps); |
| 64 | } | 61 | } |
| 62 | |||
| 63 | const std = @import("std"); | ||
| 64 | const builtin = @import("builtin"); | ||
| 65 | const compress = std.compress; | ||
| 66 | const mem = std.mem; | ||