Removed equality functions from Normalizer

author: Jose Colon Rodriguez 2024-02-27 10:22:45 -0400
committer: Jose Colon Rodriguez 2024-02-27 10:22:45 -0400
commit: 0f1a3614cc543312ccedbbd0ee2e3e5cb5b406da (patch)
tree: 9fc30751b7760427f088de23339f76b32149e75c
parent: Using HangulData in NormData (diff)
download: zg-0f1a3614cc543312ccedbbd0ee2e3e5cb5b406da.tar.gz
zg-0f1a3614cc543312ccedbbd0ee2e3e5cb5b406da.tar.xz
zg-0f1a3614cc543312ccedbbd0ee2e3e5cb5b406da.zip
1 files changed, 3 insertions, 110 deletions
diff --git a/src/Normalizer.zig b/src/Normalizer.zig
index 0670cae..d1d7cee 100644
--- a/src/Normalizer.zig
+++ b/src/Normalizer.zig
@@ -1,12 +1,11 @@
-//! Normalizer contains functions and methods that implement Unicode Normalization algorithms. You can normalize strings
+//! Normalizer contains functions and methods that implement
-//! into NFC, NFKC, NFD, and NFKD normalization forms (see `nfc`, `nfkc`, `nfd`, and `nfkd`). You can also test for
+//! Unicode Normalization. You can normalize strings into NFC,
-//! string equality under different parameters related to normalization (see `eql`, `eqlCaseless`, `eqlIdentifiers`).
+//! NFKC, NFD, and NFKD normalization forms.
 const std = @import("std");
 const testing = std.testing;
 const CodePointIterator = @import("code_point").Iterator;
-const case_fold_map = @import("ziglyph").case_folding;
 const norm_props = @import("ziglyph").normalization_props;
 pub const NormData = @import("NormData");
@@ -499,44 +498,6 @@ test "nfkc" {
    try std.testing.expectEqualStrings("Complex char: \u{038E}", result.slice);
 }
-/// Tests for equality as per Unicode rules for Identifiers.
-pub fn eqlIdentifiers(allocator: std.mem.Allocator, a: []const u8, b: []const u8) !bool {
-    var list_a = try std.ArrayList(u21).initCapacity(allocator, a.len);
-    defer list_a.deinit();
-    var list_b = try std.ArrayList(u21).initCapacity(allocator, b.len);
-    defer list_b.deinit();
-    const Item = struct {
-        str: []const u8,
-        list: *std.ArrayList(u21),
-    };
-    const items = [_]Item{
-        .{ .str = a, .list = &list_a },
-        .{ .str = b, .list = &list_b },
-    };
-    for (items) |item| {
-        var cp_iter = CodePointIterator{ .bytes = item.str };
-        while (cp_iter.next()) |cp| {
-            if (norm_props.toNfkcCaseFold(cp.code)) |nfkcf| {
-                for (nfkcf) |c| {
-                    if (c == 0) break;
-                    item.list.appendAssumeCapacity(c);
-                }
-            } else {
-                item.list.appendAssumeCapacity(cp.code); // maps to itself
-            }
-        }
-    }
-    return std.mem.eql(u21, list_a.items, list_b.items);
-}
-test "eqlIdentifiers" {
-    try std.testing.expect(try eqlIdentifiers(std.testing.allocator, "Foé", "foé"));
-}
 /// Tests for equality of `a` and `b` after normalizing to NFD.
 pub fn eql(self: Self, allocator: std.mem.Allocator, a: []const u8, b: []const u8) !bool {
    var norm_result_a = try self.nfd(allocator, a);
@@ -557,74 +518,6 @@ test "eql" {
    try std.testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}"));
 }
-fn requiresNfdBeforeCaseFold(cp: u21) bool {
-    return switch (cp) {
-        0x0345 => true,
-        0x1F80...0x1FAF => true,
-        0x1FB2...0x1FB4 => true,
-        0x1FB7 => true,
-        0x1FBC => true,
-        0x1FC2...0x1FC4 => true,
-        0x1FC7 => true,
-        0x1FCC => true,
-        0x1FF2...0x1FF4 => true,
-        0x1FF7 => true,
-        0x1FFC => true,
-        else => false,
-    };
-}
-fn requiresPreNfd(str: []const u8) bool {
-    var cp_iter = CodePointIterator{ .bytes = str };
-    return while (cp_iter.next()) |cp| {
-        if (requiresNfdBeforeCaseFold(cp.code)) break true;
-    } else false;
-}
-/// `eqlCaseless` tests for equality of `a` and `b` after normalizing to NFD and ignoring letter case.
-pub fn eqlCaseless(self: Self, allocator: std.mem.Allocator, a: []const u8, b: []const u8) !bool {
-    // The long winding road of normalized caseless matching...
-    // NFD(CaseFold(NFD(str))) or NFD(CaseFold(str))
-    var norm_result_a: Result = Result{ .slice = a };
-    if (requiresPreNfd(a)) {
-        if (!self.isFcd(a)) {
-            norm_result_a = try self.nfd(allocator, a);
-        }
-    }
-    defer norm_result_a.deinit();
-    const cf_a = try case_fold_map.caseFoldStr(allocator, norm_result_a.slice);
-    defer allocator.free(cf_a);
-    norm_result_a.deinit();
-    norm_result_a = try self.nfd(allocator, cf_a);
-    var norm_result_b: Result = Result{ .slice = b };
-    if (requiresPreNfd(b)) {
-        if (!self.isFcd(b)) {
-            norm_result_b = try self.nfd(allocator, b);
-        }
-    }
-    defer norm_result_b.deinit();
-    const cf_b = try case_fold_map.caseFoldStr(allocator, norm_result_b.slice);
-    defer allocator.free(cf_b);
-    norm_result_b.deinit();
-    norm_result_b = try self.nfd(allocator, cf_b);
-    return std.mem.eql(u8, norm_result_a.slice, norm_result_b.slice);
-}
-test "eqlCaseless" {
-    const allocator = testing.allocator;
-    var data = try NormData.init(allocator);
-    defer data.deinit();
-    var n = Self{ .norm_data = &data };
-    try std.testing.expect(try n.eqlCaseless(allocator, "Foϓ", "fo\u{03D2}\u{0301}"));
-    try std.testing.expect(try n.eqlCaseless(allocator, "FOÉ", "foe\u{0301}")); // foÉ == foé
-}
 // FCD
 fn getLeadCcc(self: Self, cp: u21) u8 {
    const dc = self.mapping(cp, .nfd);
author	Jose Colon Rodriguez	2024-02-27 10:22:45 -0400
committer	Jose Colon Rodriguez	2024-02-27 10:22:45 -0400
commit	0f1a3614cc543312ccedbbd0ee2e3e5cb5b406da (patch)
tree	9fc30751b7760427f088de23339f76b32149e75c
parent	Using HangulData in NormData (diff)
download	zg-0f1a3614cc543312ccedbbd0ee2e3e5cb5b406da.tar.gz zg-0f1a3614cc543312ccedbbd0ee2e3e5cb5b406da.tar.xz zg-0f1a3614cc543312ccedbbd0ee2e3e5cb5b406da.zip

diff --git a/src/Normalizer.zig b/src/Normalizer.zig index 0670cae..d1d7cee 100644 --- a/src/Normalizer.zig +++ b/src/Normalizer.zig
@@ -1,12 +1,11 @@
1	//! Normalizer contains functions and methods that implement Unicode Normalization algorithms. You can normalize strings	1	//! Normalizer contains functions and methods that implement
2	//! into NFC, NFKC, NFD, and NFKD normalization forms (see `nfc`, `nfkc`, `nfd`, and `nfkd`). You can also test for	2	//! Unicode Normalization. You can normalize strings into NFC,
3	//! string equality under different parameters related to normalization (see `eql`, `eqlCaseless`, `eqlIdentifiers`).	3	//! NFKC, NFD, and NFKD normalization forms.
4		4
5	const std = @import("std");	5	const std = @import("std");
6	const testing = std.testing;	6	const testing = std.testing;
7		7
8	const CodePointIterator = @import("code_point").Iterator;	8	const CodePointIterator = @import("code_point").Iterator;
9	const case_fold_map = @import("ziglyph").case_folding;
10	const norm_props = @import("ziglyph").normalization_props;	9	const norm_props = @import("ziglyph").normalization_props;
11		10
12	pub const NormData = @import("NormData");	11	pub const NormData = @import("NormData");
@@ -499,44 +498,6 @@ test "nfkc" {
499	try std.testing.expectEqualStrings("Complex char: \u{038E}", result.slice);	498	try std.testing.expectEqualStrings("Complex char: \u{038E}", result.slice);
500	}	499	}
501		500
502	/// Tests for equality as per Unicode rules for Identifiers.
503	pub fn eqlIdentifiers(allocator: std.mem.Allocator, a: []const u8, b: []const u8) !bool {
504	var list_a = try std.ArrayList(u21).initCapacity(allocator, a.len);
505	defer list_a.deinit();
506	var list_b = try std.ArrayList(u21).initCapacity(allocator, b.len);
507	defer list_b.deinit();
508
509	const Item = struct {
510	str: []const u8,
511	list: *std.ArrayList(u21),
512	};
513
514	const items = [_]Item{
515	.{ .str = a, .list = &list_a },
516	.{ .str = b, .list = &list_b },
517	};
518
519	for (items) \|item\| {
520	var cp_iter = CodePointIterator{ .bytes = item.str };
521	while (cp_iter.next()) \|cp\| {
522	if (norm_props.toNfkcCaseFold(cp.code)) \|nfkcf\| {
523	for (nfkcf) \|c\| {
524	if (c == 0) break;
525	item.list.appendAssumeCapacity(c);
526	}
527	} else {
528	item.list.appendAssumeCapacity(cp.code); // maps to itself
529	}
530	}
531	}
532
533	return std.mem.eql(u21, list_a.items, list_b.items);
534	}
535
536	test "eqlIdentifiers" {
537	try std.testing.expect(try eqlIdentifiers(std.testing.allocator, "Foé", "foé"));
538	}
539
540	/// Tests for equality of `a` and `b` after normalizing to NFD.	501	/// Tests for equality of `a` and `b` after normalizing to NFD.
541	pub fn eql(self: Self, allocator: std.mem.Allocator, a: []const u8, b: []const u8) !bool {	502	pub fn eql(self: Self, allocator: std.mem.Allocator, a: []const u8, b: []const u8) !bool {
542	var norm_result_a = try self.nfd(allocator, a);	503	var norm_result_a = try self.nfd(allocator, a);
@@ -557,74 +518,6 @@ test "eql" {
557	try std.testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}"));	518	try std.testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}"));
558	}	519	}
559		520
560	fn requiresNfdBeforeCaseFold(cp: u21) bool {
561	return switch (cp) {
562	0x0345 => true,
563	0x1F80...0x1FAF => true,
564	0x1FB2...0x1FB4 => true,
565	0x1FB7 => true,
566	0x1FBC => true,
567	0x1FC2...0x1FC4 => true,
568	0x1FC7 => true,
569	0x1FCC => true,
570	0x1FF2...0x1FF4 => true,
571	0x1FF7 => true,
572	0x1FFC => true,
573	else => false,
574	};
575	}
576
577	fn requiresPreNfd(str: []const u8) bool {
578	var cp_iter = CodePointIterator{ .bytes = str };
579
580	return while (cp_iter.next()) \|cp\| {
581	if (requiresNfdBeforeCaseFold(cp.code)) break true;
582	} else false;
583	}
584
585	/// `eqlCaseless` tests for equality of `a` and `b` after normalizing to NFD and ignoring letter case.
586	pub fn eqlCaseless(self: Self, allocator: std.mem.Allocator, a: []const u8, b: []const u8) !bool {
587	// The long winding road of normalized caseless matching...
588	// NFD(CaseFold(NFD(str))) or NFD(CaseFold(str))
589	var norm_result_a: Result = Result{ .slice = a };
590	if (requiresPreNfd(a)) {
591	if (!self.isFcd(a)) {
592	norm_result_a = try self.nfd(allocator, a);
593	}
594	}
595	defer norm_result_a.deinit();
596
597	const cf_a = try case_fold_map.caseFoldStr(allocator, norm_result_a.slice);
598	defer allocator.free(cf_a);
599	norm_result_a.deinit();
600	norm_result_a = try self.nfd(allocator, cf_a);
601
602	var norm_result_b: Result = Result{ .slice = b };
603	if (requiresPreNfd(b)) {
604	if (!self.isFcd(b)) {
605	norm_result_b = try self.nfd(allocator, b);
606	}
607	}
608	defer norm_result_b.deinit();
609
610	const cf_b = try case_fold_map.caseFoldStr(allocator, norm_result_b.slice);
611	defer allocator.free(cf_b);
612	norm_result_b.deinit();
613	norm_result_b = try self.nfd(allocator, cf_b);
614
615	return std.mem.eql(u8, norm_result_a.slice, norm_result_b.slice);
616	}
617
618	test "eqlCaseless" {
619	const allocator = testing.allocator;
620	var data = try NormData.init(allocator);
621	defer data.deinit();
622	var n = Self{ .norm_data = &data };
623
624	try std.testing.expect(try n.eqlCaseless(allocator, "Foϓ", "fo\u{03D2}\u{0301}"));
625	try std.testing.expect(try n.eqlCaseless(allocator, "FOÉ", "foe\u{0301}")); // foÉ == foé
626	}
627
628	// FCD	521	// FCD
629	fn getLeadCcc(self: Self, cp: u21) u8 {	522	fn getLeadCcc(self: Self, cp: u21) u8 {
630	const dc = self.mapping(cp, .nfd);	523	const dc = self.mapping(cp, .nfd);