diff options
| author | 2024-02-26 18:54:11 -0400 | |
|---|---|---|
| committer | 2024-02-26 18:54:11 -0400 | |
| commit | 7d8f330db2bfb625a054eb7e21d397ff696c0b3f (patch) | |
| tree | ce62411836c8b5b8bd5addff4c0a66422a9922cc /src/Normalizer.zig | |
| parent | Using NormData and CanonData in Normalizer (diff) | |
| download | zg-7d8f330db2bfb625a054eb7e21d397ff696c0b3f.tar.gz zg-7d8f330db2bfb625a054eb7e21d397ff696c0b3f.tar.xz zg-7d8f330db2bfb625a054eb7e21d397ff696c0b3f.zip | |
Using NormData nfc and nfd
Diffstat (limited to 'src/Normalizer.zig')
| -rw-r--r-- | src/Normalizer.zig | 28 |
1 files changed, 3 insertions, 25 deletions
diff --git a/src/Normalizer.zig b/src/Normalizer.zig index 848cf20..2e2e6e4 100644 --- a/src/Normalizer.zig +++ b/src/Normalizer.zig | |||
| @@ -12,7 +12,6 @@ const norm_props = @import("ziglyph").normalization_props; | |||
| 12 | 12 | ||
| 13 | pub const NormData = @import("NormData"); | 13 | pub const NormData = @import("NormData"); |
| 14 | 14 | ||
| 15 | nfc_map: std.AutoHashMap([2]u21, u21), | ||
| 16 | nfkd_map: std.AutoHashMap(u21, [18]u21), | 15 | nfkd_map: std.AutoHashMap(u21, [18]u21), |
| 17 | norm_data: *NormData, | 16 | norm_data: *NormData, |
| 18 | 17 | ||
| @@ -20,40 +19,20 @@ const Self = @This(); | |||
| 20 | 19 | ||
| 21 | pub fn init(allocator: std.mem.Allocator, norm_data: *NormData) !Self { | 20 | pub fn init(allocator: std.mem.Allocator, norm_data: *NormData) !Self { |
| 22 | var self = Self{ | 21 | var self = Self{ |
| 23 | .nfc_map = std.AutoHashMap([2]u21, u21).init(allocator), | ||
| 24 | .nfkd_map = std.AutoHashMap(u21, [18]u21).init(allocator), | 22 | .nfkd_map = std.AutoHashMap(u21, [18]u21).init(allocator), |
| 25 | .norm_data = norm_data, | 23 | .norm_data = norm_data, |
| 26 | }; | 24 | }; |
| 27 | errdefer self.deinit(); | 25 | errdefer self.deinit(); |
| 28 | 26 | ||
| 29 | // Canonical compositions | ||
| 30 | const decompressor = std.compress.deflate.decompressor; | ||
| 31 | const comp_file = @embedFile("autogen/canonical_compositions.txt.deflate"); | ||
| 32 | var comp_stream = std.io.fixedBufferStream(comp_file); | ||
| 33 | var comp_decomp = try decompressor(allocator, comp_stream.reader(), null); | ||
| 34 | defer comp_decomp.deinit(); | ||
| 35 | |||
| 36 | var comp_buf = std.io.bufferedReader(comp_decomp.reader()); | ||
| 37 | const comp_reader = comp_buf.reader(); | ||
| 38 | var buf: [4096]u8 = undefined; | ||
| 39 | |||
| 40 | while (try comp_reader.readUntilDelimiterOrEof(&buf, '\n')) |line| { | ||
| 41 | if (line.len == 0) continue; | ||
| 42 | var fields = std.mem.split(u8, line, ";"); | ||
| 43 | const cp_a = try std.fmt.parseInt(u21, fields.next().?, 16); | ||
| 44 | const cp_b = try std.fmt.parseInt(u21, fields.next().?, 16); | ||
| 45 | const cp_c = try std.fmt.parseInt(u21, fields.next().?, 16); | ||
| 46 | try self.nfc_map.put(.{ cp_a, cp_b }, cp_c); | ||
| 47 | } | ||
| 48 | |||
| 49 | // Compatibility decompositions | 27 | // Compatibility decompositions |
| 50 | const dekomp_file = @embedFile("autogen/compatibility_decompositions.txt.deflate"); | 28 | const dekomp_file = @embedFile("autogen/compatibility_decompositions.txt.deflate"); |
| 51 | var dekomp_stream = std.io.fixedBufferStream(dekomp_file); | 29 | var dekomp_stream = std.io.fixedBufferStream(dekomp_file); |
| 52 | var dekomp_decomp = try decompressor(allocator, dekomp_stream.reader(), null); | 30 | var dekomp_decomp = try std.compress.deflate.decompressor(allocator, dekomp_stream.reader(), null); |
| 53 | defer dekomp_decomp.deinit(); | 31 | defer dekomp_decomp.deinit(); |
| 54 | 32 | ||
| 55 | var dekomp_buf = std.io.bufferedReader(dekomp_decomp.reader()); | 33 | var dekomp_buf = std.io.bufferedReader(dekomp_decomp.reader()); |
| 56 | const dekomp_reader = dekomp_buf.reader(); | 34 | const dekomp_reader = dekomp_buf.reader(); |
| 35 | var buf: [4096]u8 = undefined; | ||
| 57 | 36 | ||
| 58 | while (try dekomp_reader.readUntilDelimiterOrEof(&buf, '\n')) |line| { | 37 | while (try dekomp_reader.readUntilDelimiterOrEof(&buf, '\n')) |line| { |
| 59 | if (line.len == 0) continue; | 38 | if (line.len == 0) continue; |
| @@ -73,7 +52,6 @@ pub fn init(allocator: std.mem.Allocator, norm_data: *NormData) !Self { | |||
| 73 | } | 52 | } |
| 74 | 53 | ||
| 75 | pub fn deinit(self: *Self) void { | 54 | pub fn deinit(self: *Self) void { |
| 76 | self.nfc_map.deinit(); | ||
| 77 | self.nfkd_map.deinit(); | 55 | self.nfkd_map.deinit(); |
| 78 | } | 56 | } |
| 79 | 57 | ||
| @@ -510,7 +488,7 @@ fn nfxc(self: Self, allocator: std.mem.Allocator, str: []const u8, form: Form) ! | |||
| 510 | 488 | ||
| 511 | if (!processed_hangul) { | 489 | if (!processed_hangul) { |
| 512 | // L -> C not Hangul. | 490 | // L -> C not Hangul. |
| 513 | if (self.nfc_map.get(.{ L, C })) |P| { | 491 | if (self.norm_data.canon_data.toNfc(.{ L, C })) |P| { |
| 514 | if (!norm_props.isFcx(P)) { | 492 | if (!norm_props.isFcx(P)) { |
| 515 | d_list.items[sidx] = P; | 493 | d_list.items[sidx] = P; |
| 516 | d_list.items[i] = tombstone; // Mark for deletion. | 494 | d_list.items[i] = tombstone; // Mark for deletion. |