diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/Canonical.zig | 45 | ||||
| -rw-r--r-- | src/NormData.zig | 22 | ||||
| -rw-r--r-- | src/Normalizer.zig | 155 | ||||
| -rw-r--r-- | src/main.zig | 11 |
4 files changed, 143 insertions, 90 deletions
diff --git a/src/Canonical.zig b/src/Canonical.zig new file mode 100644 index 0000000..d54e828 --- /dev/null +++ b/src/Canonical.zig | |||
| @@ -0,0 +1,45 @@ | |||
| 1 | const std = @import("std"); | ||
| 2 | const builtin = @import("builtin"); | ||
| 3 | const compress = std.compress; | ||
| 4 | const mem = std.mem; | ||
| 5 | |||
| 6 | allocator: mem.Allocator, | ||
| 7 | nfd: [][2]u21 = undefined, | ||
| 8 | |||
| 9 | const Self = @This(); | ||
| 10 | |||
| 11 | pub fn init(allocator: mem.Allocator) !Self { | ||
| 12 | const decompressor = compress.deflate.decompressor; | ||
| 13 | const in_bytes = @embedFile("canon"); | ||
| 14 | var in_fbs = std.io.fixedBufferStream(in_bytes); | ||
| 15 | var in_decomp = try decompressor(allocator, in_fbs.reader(), null); | ||
| 16 | defer in_decomp.deinit(); | ||
| 17 | var reader = in_decomp.reader(); | ||
| 18 | |||
| 19 | const endian = builtin.cpu.arch.endian(); | ||
| 20 | var self = Self{ | ||
| 21 | .allocator = allocator, | ||
| 22 | .nfd = try allocator.alloc([2]u21, 0x110000), | ||
| 23 | }; | ||
| 24 | |||
| 25 | for (0..0x110000) |i| self.nfd[i] = .{ @intCast(i), 0 }; | ||
| 26 | |||
| 27 | while (true) { | ||
| 28 | const len: u8 = try reader.readInt(u8, endian); | ||
| 29 | if (len == 0) break; | ||
| 30 | const cp = try reader.readInt(u24, endian); | ||
| 31 | self.nfd[cp][0] = @intCast(try reader.readInt(u24, endian)); | ||
| 32 | if (len == 3) self.nfd[cp][1] = @intCast(try reader.readInt(u24, endian)); | ||
| 33 | } | ||
| 34 | |||
| 35 | return self; | ||
| 36 | } | ||
| 37 | |||
| 38 | pub fn deinit(self: *Self) void { | ||
| 39 | self.allocator.free(self.nfd); | ||
| 40 | } | ||
| 41 | |||
| 42 | /// Returns canonical decomposition for `cp`. | ||
| 43 | pub inline fn toNfd(self: Self, cp: u21) [2]u21 { | ||
| 44 | return self.nfd[cp]; | ||
| 45 | } | ||
diff --git a/src/NormData.zig b/src/NormData.zig new file mode 100644 index 0000000..c6fa8e8 --- /dev/null +++ b/src/NormData.zig | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | const std = @import("std"); | ||
| 2 | const mem = std.mem; | ||
| 3 | |||
| 4 | const CanonData = @import("CanonicalData"); | ||
| 5 | const CccData = @import("CombiningClassData"); | ||
| 6 | |||
| 7 | canon_data: CanonData, | ||
| 8 | ccc_data: CccData, | ||
| 9 | |||
| 10 | const Self = @This(); | ||
| 11 | |||
| 12 | pub fn init(allocator: std.mem.Allocator) !Self { | ||
| 13 | return Self{ | ||
| 14 | .canon_data = try CanonData.init(allocator), | ||
| 15 | .ccc_data = try CccData.init(allocator), | ||
| 16 | }; | ||
| 17 | } | ||
| 18 | |||
| 19 | pub fn deinit(self: *Self) void { | ||
| 20 | self.canon_data.deinit(); | ||
| 21 | self.ccc_data.deinit(); | ||
| 22 | } | ||
diff --git a/src/Normalizer.zig b/src/Normalizer.zig index 6a19f47..848cf20 100644 --- a/src/Normalizer.zig +++ b/src/Normalizer.zig | |||
| @@ -3,26 +3,26 @@ | |||
| 3 | //! string equality under different parameters related to normalization (see `eql`, `eqlCaseless`, `eqlIdentifiers`). | 3 | //! string equality under different parameters related to normalization (see `eql`, `eqlCaseless`, `eqlIdentifiers`). |
| 4 | 4 | ||
| 5 | const std = @import("std"); | 5 | const std = @import("std"); |
| 6 | const testing = std.testing; | ||
| 6 | 7 | ||
| 7 | const CodePointIterator = @import("code_point").Iterator; | 8 | const CodePointIterator = @import("code_point").Iterator; |
| 8 | const case_fold_map = @import("ziglyph").case_folding; | 9 | const case_fold_map = @import("ziglyph").case_folding; |
| 9 | const hangul_map = @import("ziglyph").hangul; | 10 | const hangul_map = @import("ziglyph").hangul; |
| 10 | const norm_props = @import("ziglyph").normalization_props; | 11 | const norm_props = @import("ziglyph").normalization_props; |
| 11 | pub const Data = @import("CombiningClassData"); | ||
| 12 | 12 | ||
| 13 | ccc_data: *Data, | 13 | pub const NormData = @import("NormData"); |
| 14 | |||
| 14 | nfc_map: std.AutoHashMap([2]u21, u21), | 15 | nfc_map: std.AutoHashMap([2]u21, u21), |
| 15 | nfd_map: std.AutoHashMap(u21, [2]u21), | ||
| 16 | nfkd_map: std.AutoHashMap(u21, [18]u21), | 16 | nfkd_map: std.AutoHashMap(u21, [18]u21), |
| 17 | norm_data: *NormData, | ||
| 17 | 18 | ||
| 18 | const Self = @This(); | 19 | const Self = @This(); |
| 19 | 20 | ||
| 20 | pub fn init(allocator: std.mem.Allocator, data: *Data) !Self { | 21 | pub fn init(allocator: std.mem.Allocator, norm_data: *NormData) !Self { |
| 21 | var self = Self{ | 22 | var self = Self{ |
| 22 | .ccc_data = data, | ||
| 23 | .nfc_map = std.AutoHashMap([2]u21, u21).init(allocator), | 23 | .nfc_map = std.AutoHashMap([2]u21, u21).init(allocator), |
| 24 | .nfd_map = std.AutoHashMap(u21, [2]u21).init(allocator), | ||
| 25 | .nfkd_map = std.AutoHashMap(u21, [18]u21).init(allocator), | 24 | .nfkd_map = std.AutoHashMap(u21, [18]u21).init(allocator), |
| 25 | .norm_data = norm_data, | ||
| 26 | }; | 26 | }; |
| 27 | errdefer self.deinit(); | 27 | errdefer self.deinit(); |
| 28 | 28 | ||
| @@ -46,24 +46,6 @@ pub fn init(allocator: std.mem.Allocator, data: *Data) !Self { | |||
| 46 | try self.nfc_map.put(.{ cp_a, cp_b }, cp_c); | 46 | try self.nfc_map.put(.{ cp_a, cp_b }, cp_c); |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | // Canonical decompositions | ||
| 50 | const decomp_file = @embedFile("autogen/canonical_decompositions.txt.deflate"); | ||
| 51 | var decomp_stream = std.io.fixedBufferStream(decomp_file); | ||
| 52 | var decomp_decomp = try decompressor(allocator, decomp_stream.reader(), null); | ||
| 53 | defer decomp_decomp.deinit(); | ||
| 54 | |||
| 55 | var decomp_buf = std.io.bufferedReader(decomp_decomp.reader()); | ||
| 56 | const decomp_reader = decomp_buf.reader(); | ||
| 57 | |||
| 58 | while (try decomp_reader.readUntilDelimiterOrEof(&buf, '\n')) |line| { | ||
| 59 | if (line.len == 0) continue; | ||
| 60 | var fields = std.mem.split(u8, line, ";"); | ||
| 61 | const cp_a = try std.fmt.parseInt(u21, fields.next().?, 16); | ||
| 62 | const cp_b = try std.fmt.parseInt(u21, fields.next().?, 16); | ||
| 63 | const cp_c = try std.fmt.parseInt(u21, fields.next().?, 16); | ||
| 64 | try self.nfd_map.put(cp_a, .{ cp_b, cp_c }); | ||
| 65 | } | ||
| 66 | |||
| 67 | // Compatibility decompositions | 49 | // Compatibility decompositions |
| 68 | const dekomp_file = @embedFile("autogen/compatibility_decompositions.txt.deflate"); | 50 | const dekomp_file = @embedFile("autogen/compatibility_decompositions.txt.deflate"); |
| 69 | var dekomp_stream = std.io.fixedBufferStream(dekomp_file); | 51 | var dekomp_stream = std.io.fixedBufferStream(dekomp_file); |
| @@ -92,14 +74,14 @@ pub fn init(allocator: std.mem.Allocator, data: *Data) !Self { | |||
| 92 | 74 | ||
| 93 | pub fn deinit(self: *Self) void { | 75 | pub fn deinit(self: *Self) void { |
| 94 | self.nfc_map.deinit(); | 76 | self.nfc_map.deinit(); |
| 95 | self.nfd_map.deinit(); | ||
| 96 | self.nfkd_map.deinit(); | 77 | self.nfkd_map.deinit(); |
| 97 | } | 78 | } |
| 98 | 79 | ||
| 99 | test "init / deinit" { | 80 | test "init / deinit" { |
| 100 | var data = try Data.init(std.testing.allocator); | 81 | const allocator = testing.allocator; |
| 101 | defer data.deinit(); | 82 | var norm_data = try NormData.init(allocator); |
| 102 | var n = try init(std.testing.allocator, &data); | 83 | defer norm_data.deinit(); |
| 84 | var n = try init(allocator, &norm_data); | ||
| 103 | defer n.deinit(); | 85 | defer n.deinit(); |
| 104 | } | 86 | } |
| 105 | 87 | ||
| @@ -169,17 +151,22 @@ const Decomp = struct { | |||
| 169 | pub fn mapping(self: Self, cp: u21, form: Form) Decomp { | 151 | pub fn mapping(self: Self, cp: u21, form: Form) Decomp { |
| 170 | std.debug.assert(form == .nfd or form == .nfkd); | 152 | std.debug.assert(form == .nfd or form == .nfkd); |
| 171 | 153 | ||
| 172 | var dc = Decomp{ .form = .same }; | 154 | var dc = Decomp{ .form = .nfd }; |
| 173 | dc.cps[0] = cp; | 155 | const canon_dc = self.norm_data.canon_data.toNfd(cp); |
| 156 | const len: usize = if (canon_dc[1] == 0) 1 else 2; | ||
| 157 | |||
| 158 | if (len == 1 and canon_dc[0] == cp) { | ||
| 159 | dc.form = .same; | ||
| 160 | dc.cps[0] = cp; | ||
| 161 | } else { | ||
| 162 | @memcpy(dc.cps[0..len], canon_dc[0..len]); | ||
| 163 | } | ||
| 174 | 164 | ||
| 175 | if (self.nfkd_map.get(cp)) |array| { | 165 | if (self.nfkd_map.get(cp)) |array| { |
| 176 | if (form != .nfd) { | 166 | if (form != .nfd) { |
| 177 | dc.form = .nfkd; | 167 | dc.form = .nfkd; |
| 178 | @memcpy(dc.cps[0..array.len], &array); | 168 | @memcpy(dc.cps[0..array.len], &array); |
| 179 | } | 169 | } |
| 180 | } else if (self.nfd_map.get(cp)) |array| { | ||
| 181 | dc.form = .nfd; | ||
| 182 | @memcpy(dc.cps[0..array.len], &array); | ||
| 183 | } | 170 | } |
| 184 | 171 | ||
| 185 | return dc; | 172 | return dc; |
| @@ -244,10 +231,10 @@ pub fn decompose(self: Self, cp: u21, form: Form) Decomp { | |||
| 244 | } | 231 | } |
| 245 | 232 | ||
| 246 | test "decompose" { | 233 | test "decompose" { |
| 247 | const allocator = std.testing.allocator; | 234 | const allocator = testing.allocator; |
| 248 | var data = try Data.init(allocator); | 235 | var norm_data = try NormData.init(allocator); |
| 249 | defer data.deinit(); | 236 | defer norm_data.deinit(); |
| 250 | var n = try init(allocator, &data); | 237 | var n = try init(allocator, &norm_data); |
| 251 | defer n.deinit(); | 238 | defer n.deinit(); |
| 252 | 239 | ||
| 253 | var dc = n.decompose('é', .nfd); | 240 | var dc = n.decompose('é', .nfd); |
| @@ -314,7 +301,7 @@ pub const Result = struct { | |||
| 314 | 301 | ||
| 315 | // Compares code points by Canonical Combining Class order. | 302 | // Compares code points by Canonical Combining Class order. |
| 316 | fn cccLess(self: Self, lhs: u21, rhs: u21) bool { | 303 | fn cccLess(self: Self, lhs: u21, rhs: u21) bool { |
| 317 | return self.ccc_data.ccc(lhs) < self.ccc_data.ccc(rhs); | 304 | return self.norm_data.ccc_data.ccc(lhs) < self.norm_data.ccc_data.ccc(rhs); |
| 318 | } | 305 | } |
| 319 | 306 | ||
| 320 | // Applies the Canonical Sorting Algorithm. | 307 | // Applies the Canonical Sorting Algorithm. |
| @@ -322,7 +309,7 @@ fn canonicalSort(self: Self, cps: []u21) void { | |||
| 322 | var i: usize = 0; | 309 | var i: usize = 0; |
| 323 | while (i < cps.len) : (i += 1) { | 310 | while (i < cps.len) : (i += 1) { |
| 324 | const start: usize = i; | 311 | const start: usize = i; |
| 325 | while (i < cps.len and self.ccc_data.ccc(cps[i]) != 0) : (i += 1) {} | 312 | while (i < cps.len and self.norm_data.ccc_data.ccc(cps[i]) != 0) : (i += 1) {} |
| 326 | std.mem.sort(u21, cps[start..i], self, cccLess); | 313 | std.mem.sort(u21, cps[start..i], self, cccLess); |
| 327 | } | 314 | } |
| 328 | } | 315 | } |
| @@ -368,10 +355,10 @@ fn nfxd(self: Self, allocator: std.mem.Allocator, str: []const u8, form: Form) ! | |||
| 368 | } | 355 | } |
| 369 | 356 | ||
| 370 | test "nfd ASCII / no-alloc" { | 357 | test "nfd ASCII / no-alloc" { |
| 371 | const allocator = std.testing.allocator; | 358 | const allocator = testing.allocator; |
| 372 | var data = try Data.init(allocator); | 359 | var norm_data = try NormData.init(allocator); |
| 373 | defer data.deinit(); | 360 | defer norm_data.deinit(); |
| 374 | var n = try init(allocator, &data); | 361 | var n = try init(allocator, &norm_data); |
| 375 | defer n.deinit(); | 362 | defer n.deinit(); |
| 376 | 363 | ||
| 377 | var result = try n.nfd(allocator, "Hello World!"); | 364 | var result = try n.nfd(allocator, "Hello World!"); |
| @@ -381,10 +368,10 @@ test "nfd ASCII / no-alloc" { | |||
| 381 | } | 368 | } |
| 382 | 369 | ||
| 383 | test "nfd !ASCII / alloc" { | 370 | test "nfd !ASCII / alloc" { |
| 384 | const allocator = std.testing.allocator; | 371 | const allocator = testing.allocator; |
| 385 | var data = try Data.init(allocator); | 372 | var norm_data = try NormData.init(allocator); |
| 386 | defer data.deinit(); | 373 | defer norm_data.deinit(); |
| 387 | var n = try init(allocator, &data); | 374 | var n = try init(allocator, &norm_data); |
| 388 | defer n.deinit(); | 375 | defer n.deinit(); |
| 389 | 376 | ||
| 390 | var result = try n.nfd(allocator, "Héllo World! \u{3d3}"); | 377 | var result = try n.nfd(allocator, "Héllo World! \u{3d3}"); |
| @@ -394,10 +381,10 @@ test "nfd !ASCII / alloc" { | |||
| 394 | } | 381 | } |
| 395 | 382 | ||
| 396 | test "nfkd ASCII / no-alloc" { | 383 | test "nfkd ASCII / no-alloc" { |
| 397 | const allocator = std.testing.allocator; | 384 | const allocator = testing.allocator; |
| 398 | var data = try Data.init(allocator); | 385 | var norm_data = try NormData.init(allocator); |
| 399 | defer data.deinit(); | 386 | defer norm_data.deinit(); |
| 400 | var n = try init(allocator, &data); | 387 | var n = try init(allocator, &norm_data); |
| 401 | defer n.deinit(); | 388 | defer n.deinit(); |
| 402 | 389 | ||
| 403 | var result = try n.nfkd(allocator, "Hello World!"); | 390 | var result = try n.nfkd(allocator, "Hello World!"); |
| @@ -407,10 +394,10 @@ test "nfkd ASCII / no-alloc" { | |||
| 407 | } | 394 | } |
| 408 | 395 | ||
| 409 | test "nfkd !ASCII / alloc" { | 396 | test "nfkd !ASCII / alloc" { |
| 410 | const allocator = std.testing.allocator; | 397 | const allocator = testing.allocator; |
| 411 | var data = try Data.init(allocator); | 398 | var norm_data = try NormData.init(allocator); |
| 412 | defer data.deinit(); | 399 | defer norm_data.deinit(); |
| 413 | var n = try init(allocator, &data); | 400 | var n = try init(allocator, &norm_data); |
| 414 | defer n.deinit(); | 401 | defer n.deinit(); |
| 415 | 402 | ||
| 416 | var result = try n.nfkd(allocator, "Héllo World! \u{3d3}"); | 403 | var result = try n.nfkd(allocator, "Héllo World! \u{3d3}"); |
| @@ -426,7 +413,7 @@ fn isHangul(cp: u21) bool { | |||
| 426 | } | 413 | } |
| 427 | 414 | ||
| 428 | fn isNonHangulStarter(self: Self, cp: u21) bool { | 415 | fn isNonHangulStarter(self: Self, cp: u21) bool { |
| 429 | return !isHangul(cp) and self.ccc_data.isStarter(cp); | 416 | return !isHangul(cp) and self.norm_data.ccc_data.isStarter(cp); |
| 430 | } | 417 | } |
| 431 | 418 | ||
| 432 | /// Normalizes `str` to NFC. | 419 | /// Normalizes `str` to NFC. |
| @@ -468,7 +455,7 @@ fn nfxc(self: Self, allocator: std.mem.Allocator, str: []const u8, form: Form) ! | |||
| 468 | 455 | ||
| 469 | block_check: while (i < d_list.items.len) : (i += 1) { | 456 | block_check: while (i < d_list.items.len) : (i += 1) { |
| 470 | const C = d_list.items[i]; | 457 | const C = d_list.items[i]; |
| 471 | const cc_C = self.ccc_data.ccc(C); | 458 | const cc_C = self.norm_data.ccc_data.ccc(C); |
| 472 | var starter_index: ?usize = null; | 459 | var starter_index: ?usize = null; |
| 473 | var j: usize = i; | 460 | var j: usize = i; |
| 474 | 461 | ||
| @@ -476,10 +463,10 @@ fn nfxc(self: Self, allocator: std.mem.Allocator, str: []const u8, form: Form) ! | |||
| 476 | j -= 1; | 463 | j -= 1; |
| 477 | 464 | ||
| 478 | // Check for starter. | 465 | // Check for starter. |
| 479 | if (self.ccc_data.isStarter(d_list.items[j])) { | 466 | if (self.norm_data.ccc_data.isStarter(d_list.items[j])) { |
| 480 | if (i - j > 1) { // If there's distance between the starting point and the current position. | 467 | if (i - j > 1) { // If there's distance between the starting point and the current position. |
| 481 | for (d_list.items[(j + 1)..i]) |B| { | 468 | for (d_list.items[(j + 1)..i]) |B| { |
| 482 | const cc_B = self.ccc_data.ccc(B); | 469 | const cc_B = self.norm_data.ccc_data.ccc(B); |
| 483 | // Check for blocking conditions. | 470 | // Check for blocking conditions. |
| 484 | if (isHangul(C)) { | 471 | if (isHangul(C)) { |
| 485 | if (cc_B != 0 or self.isNonHangulStarter(B)) continue :block_check; | 472 | if (cc_B != 0 or self.isNonHangulStarter(B)) continue :block_check; |
| @@ -563,10 +550,10 @@ fn nfxc(self: Self, allocator: std.mem.Allocator, str: []const u8, form: Form) ! | |||
| 563 | } | 550 | } |
| 564 | 551 | ||
| 565 | test "nfc" { | 552 | test "nfc" { |
| 566 | const allocator = std.testing.allocator; | 553 | const allocator = testing.allocator; |
| 567 | var data = try Data.init(allocator); | 554 | var norm_data = try NormData.init(allocator); |
| 568 | defer data.deinit(); | 555 | defer norm_data.deinit(); |
| 569 | var n = try init(allocator, &data); | 556 | var n = try init(allocator, &norm_data); |
| 570 | defer n.deinit(); | 557 | defer n.deinit(); |
| 571 | 558 | ||
| 572 | var result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); | 559 | var result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); |
| @@ -576,10 +563,10 @@ test "nfc" { | |||
| 576 | } | 563 | } |
| 577 | 564 | ||
| 578 | test "nfkc" { | 565 | test "nfkc" { |
| 579 | const allocator = std.testing.allocator; | 566 | const allocator = testing.allocator; |
| 580 | var data = try Data.init(allocator); | 567 | var norm_data = try NormData.init(allocator); |
| 581 | defer data.deinit(); | 568 | defer norm_data.deinit(); |
| 582 | var n = try init(allocator, &data); | 569 | var n = try init(allocator, &norm_data); |
| 583 | defer n.deinit(); | 570 | defer n.deinit(); |
| 584 | 571 | ||
| 585 | var result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); | 572 | var result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); |
| @@ -637,10 +624,10 @@ pub fn eql(self: Self, allocator: std.mem.Allocator, a: []const u8, b: []const u | |||
| 637 | } | 624 | } |
| 638 | 625 | ||
| 639 | test "eql" { | 626 | test "eql" { |
| 640 | const allocator = std.testing.allocator; | 627 | const allocator = testing.allocator; |
| 641 | var data = try Data.init(allocator); | 628 | var norm_data = try NormData.init(allocator); |
| 642 | defer data.deinit(); | 629 | defer norm_data.deinit(); |
| 643 | var n = try init(allocator, &data); | 630 | var n = try init(allocator, &norm_data); |
| 644 | defer n.deinit(); | 631 | defer n.deinit(); |
| 645 | 632 | ||
| 646 | try std.testing.expect(try n.eql(allocator, "foé", "foe\u{0301}")); | 633 | try std.testing.expect(try n.eql(allocator, "foé", "foe\u{0301}")); |
| @@ -706,10 +693,10 @@ pub fn eqlCaseless(self: Self, allocator: std.mem.Allocator, a: []const u8, b: [ | |||
| 706 | } | 693 | } |
| 707 | 694 | ||
| 708 | test "eqlCaseless" { | 695 | test "eqlCaseless" { |
| 709 | const allocator = std.testing.allocator; | 696 | const allocator = testing.allocator; |
| 710 | var data = try Data.init(allocator); | 697 | var norm_data = try NormData.init(allocator); |
| 711 | defer data.deinit(); | 698 | defer norm_data.deinit(); |
| 712 | var n = try init(allocator, &data); | 699 | var n = try init(allocator, &norm_data); |
| 713 | defer n.deinit(); | 700 | defer n.deinit(); |
| 714 | 701 | ||
| 715 | try std.testing.expect(try n.eqlCaseless(allocator, "Foϓ", "fo\u{03D2}\u{0301}")); | 702 | try std.testing.expect(try n.eqlCaseless(allocator, "Foϓ", "fo\u{03D2}\u{0301}")); |
| @@ -719,7 +706,7 @@ test "eqlCaseless" { | |||
| 719 | // FCD | 706 | // FCD |
| 720 | fn getLeadCcc(self: Self, cp: u21) u8 { | 707 | fn getLeadCcc(self: Self, cp: u21) u8 { |
| 721 | const dc = self.mapping(cp, .nfd); | 708 | const dc = self.mapping(cp, .nfd); |
| 722 | return self.ccc_data.ccc(dc.cps[0]); | 709 | return self.norm_data.ccc_data.ccc(dc.cps[0]); |
| 723 | } | 710 | } |
| 724 | 711 | ||
| 725 | fn getTrailCcc(self: Self, cp: u21) u8 { | 712 | fn getTrailCcc(self: Self, cp: u21) u8 { |
| @@ -727,7 +714,7 @@ fn getTrailCcc(self: Self, cp: u21) u8 { | |||
| 727 | const len = for (dc.cps, 0..) |dcp, i| { | 714 | const len = for (dc.cps, 0..) |dcp, i| { |
| 728 | if (dcp == 0) break i; | 715 | if (dcp == 0) break i; |
| 729 | } else dc.cps.len; | 716 | } else dc.cps.len; |
| 730 | return self.ccc_data.ccc(dc.cps[len - 1]); | 717 | return self.norm_data.ccc_data.ccc(dc.cps[len - 1]); |
| 731 | } | 718 | } |
| 732 | 719 | ||
| 733 | /// Fast check to detect if a string is already in NFC or NFD form. | 720 | /// Fast check to detect if a string is already in NFC or NFD form. |
| @@ -743,10 +730,10 @@ pub fn isFcd(self: Self, str: []const u8) bool { | |||
| 743 | } | 730 | } |
| 744 | 731 | ||
| 745 | test "isFcd" { | 732 | test "isFcd" { |
| 746 | const allocator = std.testing.allocator; | 733 | const allocator = testing.allocator; |
| 747 | var data = try Data.init(allocator); | 734 | var norm_data = try NormData.init(allocator); |
| 748 | defer data.deinit(); | 735 | defer norm_data.deinit(); |
| 749 | var n = try init(allocator, &data); | 736 | var n = try init(allocator, &norm_data); |
| 750 | defer n.deinit(); | 737 | defer n.deinit(); |
| 751 | 738 | ||
| 752 | const is_nfc = "José \u{3D3}"; | 739 | const is_nfc = "José \u{3D3}"; |
| @@ -764,9 +751,9 @@ test "Unicode normalization tests" { | |||
| 764 | defer arena.deinit(); | 751 | defer arena.deinit(); |
| 765 | var allocator = arena.allocator(); | 752 | var allocator = arena.allocator(); |
| 766 | 753 | ||
| 767 | var data = try Data.init(allocator); | 754 | var norm_data = try NormData.init(allocator); |
| 768 | defer data.deinit(); | 755 | defer norm_data.deinit(); |
| 769 | var n = try init(allocator, &data); | 756 | var n = try init(allocator, &norm_data); |
| 770 | defer n.deinit(); | 757 | defer n.deinit(); |
| 771 | 758 | ||
| 772 | var file = try std.fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{}); | 759 | var file = try std.fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{}); |
diff --git a/src/main.zig b/src/main.zig index 57db05b..d1a0bb3 100644 --- a/src/main.zig +++ b/src/main.zig | |||
| @@ -17,7 +17,7 @@ const std = @import("std"); | |||
| 17 | // const ascii = std.ascii; | 17 | // const ascii = std.ascii; |
| 18 | 18 | ||
| 19 | // const norm = @import("ziglyph").Normalizer; | 19 | // const norm = @import("ziglyph").Normalizer; |
| 20 | const Data = @import("Normalizer").Data; | 20 | const NormData = @import("Normalizer").NormData; |
| 21 | const norm = @import("Normalizer"); | 21 | const norm = @import("Normalizer"); |
| 22 | 22 | ||
| 23 | pub fn main() !void { | 23 | pub fn main() !void { |
| @@ -32,10 +32,9 @@ pub fn main() !void { | |||
| 32 | const input = try std.fs.cwd().readFileAlloc(allocator, in_path, std.math.maxInt(u32)); | 32 | const input = try std.fs.cwd().readFileAlloc(allocator, in_path, std.math.maxInt(u32)); |
| 33 | defer allocator.free(input); | 33 | defer allocator.free(input); |
| 34 | 34 | ||
| 35 | var data = try Data.init(allocator); | 35 | var norm_data = try NormData.init(allocator); |
| 36 | defer data.deinit(); | 36 | defer norm_data.deinit(); |
| 37 | 37 | var n = try norm.init(allocator, &norm_data); | |
| 38 | var n = try norm.init(allocator, &data); | ||
| 39 | defer n.deinit(); | 38 | defer n.deinit(); |
| 40 | // var n = try norm.init(allocator); | 39 | // var n = try norm.init(allocator); |
| 41 | // defer n.deinit(); | 40 | // defer n.deinit(); |
| @@ -53,7 +52,7 @@ pub fn main() !void { | |||
| 53 | // while (iter.next()) |_| result += 1; | 52 | // while (iter.next()) |_| result += 1; |
| 54 | // while (iter.next()) |line| result += strWidth(line, &data); | 53 | // while (iter.next()) |line| result += strWidth(line, &data); |
| 55 | while (iter.next()) |line| { | 54 | while (iter.next()) |line| { |
| 56 | var nfc = try n.nfc(allocator, line); | 55 | var nfc = try n.nfd(allocator, line); |
| 57 | result += nfc.slice.len; | 56 | result += nfc.slice.len; |
| 58 | nfc.deinit(); | 57 | nfc.deinit(); |
| 59 | } | 58 | } |