summaryrefslogtreecommitdiff
path: root/src/Caser.zig
blob: d02370a6e2ade7f92797d7b1213218b52bc9ae86 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
const std = @import("std");
const mem = std.mem;
const testing = std.testing;

const ascii = @import("ascii");
pub const FoldData = @import("FoldData");
const Normalizer = @import("Normalizer");

fold_data: *const FoldData,

const Self = @This();

fn caseFold(
    self: Self,
    allocator: mem.Allocator,
    cps: []const u21,
) ![]const u21 {
    var cfcps = std.ArrayList(u21).init(allocator);
    defer cfcps.deinit();

    for (cps) |cp| {
        const cf = self.fold_data.caseFold(cp);

        if (cf.len == 0) {
            try cfcps.append(cp);
        } else {
            try cfcps.appendSlice(cf);
        }
    }

    return try cfcps.toOwnedSlice();
}

fn changesWhenCaseFolded(self: Self, cps: []const u21) bool {
    return for (cps) |cp| {
        if (self.fold_data.changesWhenCaseFolded(cp)) break true;
    } else false;
}

pub fn compatCaselessMatch(
    self: Self,
    allocator: mem.Allocator,
    normalizer: *const Normalizer,
    a: []const u8,
    b: []const u8,
) !bool {
    if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b);

    // Process a
    const nfd_a = try normalizer.nfxdCodePoints(allocator, a, .nfd);
    defer allocator.free(nfd_a);

    var need_frr_cf_nfd_a = false;
    var cf_nfd_a: []const u21 = nfd_a;
    if (self.changesWhenCaseFolded(nfd_a)) {
        cf_nfd_a = try self.caseFold(allocator, nfd_a);
        need_frr_cf_nfd_a = true;
    }
    defer if (need_frr_cf_nfd_a) allocator.free(cf_nfd_a);

    const nfkd_cf_nfd_a = try normalizer.nfkdCodePoints(allocator, cf_nfd_a);
    defer allocator.free(nfkd_cf_nfd_a);
    const cf_nfkd_cf_nfd_a = try self.caseFold(allocator, nfkd_cf_nfd_a);
    defer allocator.free(cf_nfkd_cf_nfd_a);
    const nfkd_cf_nfkd_cf_nfd_a = try normalizer.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a);
    defer allocator.free(nfkd_cf_nfkd_cf_nfd_a);

    // Process b
    const nfd_b = try normalizer.nfxdCodePoints(allocator, b, .nfd);
    defer allocator.free(nfd_b);

    var need_frr_cf_nfd_b = false;
    var cf_nfd_b: []const u21 = nfd_b;
    if (self.changesWhenCaseFolded(nfd_b)) {
        cf_nfd_b = try self.caseFold(allocator, nfd_b);
        need_frr_cf_nfd_b = true;
    }
    defer if (need_frr_cf_nfd_b) allocator.free(cf_nfd_b);

    const nfkd_cf_nfd_b = try normalizer.nfkdCodePoints(allocator, cf_nfd_b);
    defer allocator.free(nfkd_cf_nfd_b);
    const cf_nfkd_cf_nfd_b = try self.caseFold(allocator, nfkd_cf_nfd_b);
    defer allocator.free(cf_nfkd_cf_nfd_b);
    const nfkd_cf_nfkd_cf_nfd_b = try normalizer.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b);
    defer allocator.free(nfkd_cf_nfkd_cf_nfd_b);

    return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b);
}

test "compatCaselessMatch" {
    const allocator = testing.allocator;

    var norm_data = try Normalizer.NormData.init(allocator);
    defer norm_data.deinit();
    const n = Normalizer{ .norm_data = &norm_data };

    var fold_data = try FoldData.init(allocator);
    defer fold_data.deinit();
    const caser = Self{ .fold_data = &fold_data };

    try testing.expect(try caser.compatCaselessMatch(allocator, &n, "ascii only!", "ASCII Only!"));

    const a = "Héllo World! \u{3d3}";
    const b = "He\u{301}llo World! \u{3a5}\u{301}";
    try testing.expect(try caser.compatCaselessMatch(allocator, &n, a, b));

    const c = "He\u{301}llo World! \u{3d2}\u{301}";
    try testing.expect(try caser.compatCaselessMatch(allocator, &n, a, c));
}