summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-02-27 10:22:45 -0400
committerGravatar Jose Colon Rodriguez2024-02-27 10:22:45 -0400
commit0f1a3614cc543312ccedbbd0ee2e3e5cb5b406da (patch)
tree9fc30751b7760427f088de23339f76b32149e75c
parentUsing HangulData in NormData (diff)
downloadzg-0f1a3614cc543312ccedbbd0ee2e3e5cb5b406da.tar.gz
zg-0f1a3614cc543312ccedbbd0ee2e3e5cb5b406da.tar.xz
zg-0f1a3614cc543312ccedbbd0ee2e3e5cb5b406da.zip
Removed equality functions from Normalizer
-rw-r--r--src/Normalizer.zig113
1 files changed, 3 insertions, 110 deletions
diff --git a/src/Normalizer.zig b/src/Normalizer.zig
index 0670cae..d1d7cee 100644
--- a/src/Normalizer.zig
+++ b/src/Normalizer.zig
@@ -1,12 +1,11 @@
1//! Normalizer contains functions and methods that implement Unicode Normalization algorithms. You can normalize strings 1//! Normalizer contains functions and methods that implement
2//! into NFC, NFKC, NFD, and NFKD normalization forms (see `nfc`, `nfkc`, `nfd`, and `nfkd`). You can also test for 2//! Unicode Normalization. You can normalize strings into NFC,
3//! string equality under different parameters related to normalization (see `eql`, `eqlCaseless`, `eqlIdentifiers`). 3//! NFKC, NFD, and NFKD normalization forms.
4 4
5const std = @import("std"); 5const std = @import("std");
6const testing = std.testing; 6const testing = std.testing;
7 7
8const CodePointIterator = @import("code_point").Iterator; 8const CodePointIterator = @import("code_point").Iterator;
9const case_fold_map = @import("ziglyph").case_folding;
10const norm_props = @import("ziglyph").normalization_props; 9const norm_props = @import("ziglyph").normalization_props;
11 10
12pub const NormData = @import("NormData"); 11pub const NormData = @import("NormData");
@@ -499,44 +498,6 @@ test "nfkc" {
499 try std.testing.expectEqualStrings("Complex char: \u{038E}", result.slice); 498 try std.testing.expectEqualStrings("Complex char: \u{038E}", result.slice);
500} 499}
501 500
502/// Tests for equality as per Unicode rules for Identifiers.
503pub fn eqlIdentifiers(allocator: std.mem.Allocator, a: []const u8, b: []const u8) !bool {
504 var list_a = try std.ArrayList(u21).initCapacity(allocator, a.len);
505 defer list_a.deinit();
506 var list_b = try std.ArrayList(u21).initCapacity(allocator, b.len);
507 defer list_b.deinit();
508
509 const Item = struct {
510 str: []const u8,
511 list: *std.ArrayList(u21),
512 };
513
514 const items = [_]Item{
515 .{ .str = a, .list = &list_a },
516 .{ .str = b, .list = &list_b },
517 };
518
519 for (items) |item| {
520 var cp_iter = CodePointIterator{ .bytes = item.str };
521 while (cp_iter.next()) |cp| {
522 if (norm_props.toNfkcCaseFold(cp.code)) |nfkcf| {
523 for (nfkcf) |c| {
524 if (c == 0) break;
525 item.list.appendAssumeCapacity(c);
526 }
527 } else {
528 item.list.appendAssumeCapacity(cp.code); // maps to itself
529 }
530 }
531 }
532
533 return std.mem.eql(u21, list_a.items, list_b.items);
534}
535
536test "eqlIdentifiers" {
537 try std.testing.expect(try eqlIdentifiers(std.testing.allocator, "Foé", "foé"));
538}
539
540/// Tests for equality of `a` and `b` after normalizing to NFD. 501/// Tests for equality of `a` and `b` after normalizing to NFD.
541pub fn eql(self: Self, allocator: std.mem.Allocator, a: []const u8, b: []const u8) !bool { 502pub fn eql(self: Self, allocator: std.mem.Allocator, a: []const u8, b: []const u8) !bool {
542 var norm_result_a = try self.nfd(allocator, a); 503 var norm_result_a = try self.nfd(allocator, a);
@@ -557,74 +518,6 @@ test "eql" {
557 try std.testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); 518 try std.testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}"));
558} 519}
559 520
560fn requiresNfdBeforeCaseFold(cp: u21) bool {
561 return switch (cp) {
562 0x0345 => true,
563 0x1F80...0x1FAF => true,
564 0x1FB2...0x1FB4 => true,
565 0x1FB7 => true,
566 0x1FBC => true,
567 0x1FC2...0x1FC4 => true,
568 0x1FC7 => true,
569 0x1FCC => true,
570 0x1FF2...0x1FF4 => true,
571 0x1FF7 => true,
572 0x1FFC => true,
573 else => false,
574 };
575}
576
577fn requiresPreNfd(str: []const u8) bool {
578 var cp_iter = CodePointIterator{ .bytes = str };
579
580 return while (cp_iter.next()) |cp| {
581 if (requiresNfdBeforeCaseFold(cp.code)) break true;
582 } else false;
583}
584
585/// `eqlCaseless` tests for equality of `a` and `b` after normalizing to NFD and ignoring letter case.
586pub fn eqlCaseless(self: Self, allocator: std.mem.Allocator, a: []const u8, b: []const u8) !bool {
587 // The long winding road of normalized caseless matching...
588 // NFD(CaseFold(NFD(str))) or NFD(CaseFold(str))
589 var norm_result_a: Result = Result{ .slice = a };
590 if (requiresPreNfd(a)) {
591 if (!self.isFcd(a)) {
592 norm_result_a = try self.nfd(allocator, a);
593 }
594 }
595 defer norm_result_a.deinit();
596
597 const cf_a = try case_fold_map.caseFoldStr(allocator, norm_result_a.slice);
598 defer allocator.free(cf_a);
599 norm_result_a.deinit();
600 norm_result_a = try self.nfd(allocator, cf_a);
601
602 var norm_result_b: Result = Result{ .slice = b };
603 if (requiresPreNfd(b)) {
604 if (!self.isFcd(b)) {
605 norm_result_b = try self.nfd(allocator, b);
606 }
607 }
608 defer norm_result_b.deinit();
609
610 const cf_b = try case_fold_map.caseFoldStr(allocator, norm_result_b.slice);
611 defer allocator.free(cf_b);
612 norm_result_b.deinit();
613 norm_result_b = try self.nfd(allocator, cf_b);
614
615 return std.mem.eql(u8, norm_result_a.slice, norm_result_b.slice);
616}
617
618test "eqlCaseless" {
619 const allocator = testing.allocator;
620 var data = try NormData.init(allocator);
621 defer data.deinit();
622 var n = Self{ .norm_data = &data };
623
624 try std.testing.expect(try n.eqlCaseless(allocator, "Foϓ", "fo\u{03D2}\u{0301}"));
625 try std.testing.expect(try n.eqlCaseless(allocator, "FOÉ", "foe\u{0301}")); // foÉ == foé
626}
627
628// FCD 521// FCD
629fn getLeadCcc(self: Self, cp: u21) u8 { 522fn getLeadCcc(self: Self, cp: u21) u8 {
630 const dc = self.mapping(cp, .nfd); 523 const dc = self.mapping(cp, .nfd);