summaryrefslogtreecommitdiff
path: root/src/Normalizer.zig
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-02-27 09:26:40 -0400
committerGravatar Jose Colon Rodriguez2024-02-27 09:26:40 -0400
commit32c68059a05dde8a57a330db6d14a32506081516 (patch)
treec2b3b9bbbf48330db3570135d371cb92b552f1cb /src/Normalizer.zig
parentUsing NormData nfkd (diff)
downloadzg-32c68059a05dde8a57a330db6d14a32506081516.tar.gz
zg-32c68059a05dde8a57a330db6d14a32506081516.tar.xz
zg-32c68059a05dde8a57a330db6d14a32506081516.zip
Using HangulData in NormData
Diffstat (limited to 'src/Normalizer.zig')
-rw-r--r--src/Normalizer.zig23
1 files changed, 11 insertions, 12 deletions
diff --git a/src/Normalizer.zig b/src/Normalizer.zig
index 1434043..0670cae 100644
--- a/src/Normalizer.zig
+++ b/src/Normalizer.zig
@@ -7,7 +7,6 @@ const testing = std.testing;
7 7
8const CodePointIterator = @import("code_point").Iterator; 8const CodePointIterator = @import("code_point").Iterator;
9const case_fold_map = @import("ziglyph").case_folding; 9const case_fold_map = @import("ziglyph").case_folding;
10const hangul_map = @import("ziglyph").hangul;
11const norm_props = @import("ziglyph").normalization_props; 10const norm_props = @import("ziglyph").normalization_props;
12 11
13pub const NormData = @import("NormData"); 12pub const NormData = @import("NormData");
@@ -17,9 +16,9 @@ norm_data: *NormData,
17const Self = @This(); 16const Self = @This();
18 17
19// Hangul processing utilities. 18// Hangul processing utilities.
20fn isHangulPrecomposed(cp: u21) bool { 19fn isHangulPrecomposed(self: Self, cp: u21) bool {
21 if (hangul_map.syllableType(cp)) |kind| return kind == .LV or kind == .LVT; 20 const kind = self.norm_data.hangul_data.syllable(cp);
22 return false; 21 return kind == .LV or kind == .LVT;
23} 22}
24 23
25const SBase: u21 = 0xAC00; 24const SBase: u21 = 0xAC00;
@@ -117,7 +116,7 @@ pub fn decompose(self: Self, cp: u21, form: Form) Decomp {
117 } 116 }
118 117
119 // Hangul precomposed syllable full decomposition. 118 // Hangul precomposed syllable full decomposition.
120 if (isHangulPrecomposed(cp)) { 119 if (self.isHangulPrecomposed(cp)) {
121 const cps = decomposeHangul(cp); 120 const cps = decomposeHangul(cp);
122 @memcpy(dc.cps[0..cps.len], &cps); 121 @memcpy(dc.cps[0..cps.len], &cps);
123 return dc; 122 return dc;
@@ -335,12 +334,12 @@ test "nfkd !ASCII / alloc" {
335 334
336// Composition utilities. 335// Composition utilities.
337 336
338fn isHangul(cp: u21) bool { 337fn isHangul(self: Self, cp: u21) bool {
339 return cp >= 0x1100 and hangul_map.syllableType(cp) != null; 338 return cp >= 0x1100 and self.norm_data.hangul_data.syllable(cp) != .none;
340} 339}
341 340
342fn isNonHangulStarter(self: Self, cp: u21) bool { 341fn isNonHangulStarter(self: Self, cp: u21) bool {
343 return !isHangul(cp) and self.norm_data.ccc_data.isStarter(cp); 342 return !self.isHangul(cp) and self.norm_data.ccc_data.isStarter(cp);
344} 343}
345 344
346/// Normalizes `str` to NFC. 345/// Normalizes `str` to NFC.
@@ -395,7 +394,7 @@ fn nfxc(self: Self, allocator: std.mem.Allocator, str: []const u8, form: Form) !
395 for (d_list.items[(j + 1)..i]) |B| { 394 for (d_list.items[(j + 1)..i]) |B| {
396 const cc_B = self.norm_data.ccc_data.ccc(B); 395 const cc_B = self.norm_data.ccc_data.ccc(B);
397 // Check for blocking conditions. 396 // Check for blocking conditions.
398 if (isHangul(C)) { 397 if (self.isHangul(C)) {
399 if (cc_B != 0 or self.isNonHangulStarter(B)) continue :block_check; 398 if (cc_B != 0 or self.isNonHangulStarter(B)) continue :block_check;
400 } 399 }
401 if (cc_B >= cc_C) continue :block_check; 400 if (cc_B >= cc_C) continue :block_check;
@@ -414,9 +413,9 @@ fn nfxc(self: Self, allocator: std.mem.Allocator, str: []const u8, form: Form) !
414 const L = d_list.items[sidx]; 413 const L = d_list.items[sidx];
415 var processed_hangul = false; 414 var processed_hangul = false;
416 415
417 if (isHangul(L) and isHangul(C)) { 416 if (self.isHangul(L) and self.isHangul(C)) {
418 const l_stype = hangul_map.syllableType(L).?; 417 const l_stype = self.norm_data.hangul_data.syllable(L);
419 const c_stype = hangul_map.syllableType(C).?; 418 const c_stype = self.norm_data.hangul_data.syllable(C);
420 419
421 if (l_stype == .LV and c_stype == .T) { 420 if (l_stype == .LV and c_stype == .T) {
422 // LV, T 421 // LV, T