summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-03-01 19:28:41 -0400
committerGravatar Jose Colon Rodriguez2024-03-01 19:28:41 -0400
commit9b435e69cb9f1572728b38457fabc9636bc47143 (patch)
treefabc23d4f122906457ab8e389ef44c23205d506e /src
parentNormalizer.eqlIgnoreCase compatibility caseless matching (diff)
downloadzg-9b435e69cb9f1572728b38457fabc9636bc47143.tar.gz
zg-9b435e69cb9f1572728b38457fabc9636bc47143.tar.xz
zg-9b435e69cb9f1572728b38457fabc9636bc47143.zip
Changes when case folded check; 20ms faster
Diffstat (limited to 'src')
-rw-r--r--src/FoldData.zig14
-rw-r--r--src/Normalizer.zig30
2 files changed, 38 insertions, 6 deletions
diff --git a/src/FoldData.zig b/src/FoldData.zig
index 139c677..2a9a1f5 100644
--- a/src/FoldData.zig
+++ b/src/FoldData.zig
@@ -5,6 +5,7 @@ const mem = std.mem;
5 5
6allocator: mem.Allocator, 6allocator: mem.Allocator,
7fold: [][]u21 = undefined, 7fold: [][]u21 = undefined,
8cwcf: []bool = undefined,
8 9
9const Self = @This(); 10const Self = @This();
10 11
@@ -20,18 +21,21 @@ pub fn init(allocator: mem.Allocator) !Self {
20 var self = Self{ 21 var self = Self{
21 .allocator = allocator, 22 .allocator = allocator,
22 .fold = try allocator.alloc([]u21, 0x110000), 23 .fold = try allocator.alloc([]u21, 0x110000),
24 .cwcf = try allocator.alloc(bool, 0x110000),
23 }; 25 };
24 26
25 @memset(self.fold, &.{}); 27 @memset(self.fold, &.{});
28 @memset(self.cwcf, false);
26 29
27 while (true) { 30 while (true) {
28 const len: u8 = try reader.readInt(u8, endian); 31 const len: u8 = try reader.readInt(u8, endian);
29 if (len == 0) break; 32 if (len == 0) break;
30 const cp = try reader.readInt(u24, endian); 33 const cp = try reader.readInt(u24, endian);
31 self.fold[cp] = try allocator.alloc(u21, len - 1); 34 self.fold[cp >> 1] = try allocator.alloc(u21, len - 1);
32 for (0..len - 1) |i| { 35 for (0..len - 1) |i| {
33 self.fold[cp][i] = @intCast(try reader.readInt(u24, endian)); 36 self.fold[cp >> 1][i] = @intCast(try reader.readInt(u24, endian));
34 } 37 }
38 self.cwcf[cp >> 1] = cp & 1 == 1;
35 } 39 }
36 40
37 return self; 41 return self;
@@ -40,9 +44,15 @@ pub fn init(allocator: mem.Allocator) !Self {
40pub fn deinit(self: *Self) void { 44pub fn deinit(self: *Self) void {
41 for (self.fold) |slice| self.allocator.free(slice); 45 for (self.fold) |slice| self.allocator.free(slice);
42 self.allocator.free(self.fold); 46 self.allocator.free(self.fold);
47 self.allocator.free(self.cwcf);
43} 48}
44 49
45/// Returns the case fold for `cp`. 50/// Returns the case fold for `cp`.
46pub inline fn caseFold(self: Self, cp: u21) []const u21 { 51pub inline fn caseFold(self: Self, cp: u21) []const u21 {
47 return self.fold[cp]; 52 return self.fold[cp];
48} 53}
54
55/// Returns true when caseFold(NFD(`cp`)) != NFD(`cp`).
56pub inline fn changesWhenCaseFolded(self: Self, cp: u21) bool {
57 return self.cwcf[cp];
58}
diff --git a/src/Normalizer.zig b/src/Normalizer.zig
index c68b2ec..5a26dfa 100644
--- a/src/Normalizer.zig
+++ b/src/Normalizer.zig
@@ -389,6 +389,12 @@ fn nfkdCodePoints(
389 return try dcp_list.toOwnedSlice(); 389 return try dcp_list.toOwnedSlice();
390} 390}
391 391
392fn changesWhenCaseFolded(self: Self, cps: []const u21) bool {
393 return for (cps) |cp| {
394 if (self.norm_data.fold_data.changesWhenCaseFolded(cp)) break true;
395 } else false;
396}
397
392pub fn eqlIgnoreCase( 398pub fn eqlIgnoreCase(
393 self: Self, 399 self: Self,
394 allocator: mem.Allocator, 400 allocator: mem.Allocator,
@@ -397,10 +403,18 @@ pub fn eqlIgnoreCase(
397) !bool { 403) !bool {
398 if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); 404 if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b);
399 405
406 // Process a
400 const nfd_a = try self.nfxdCodePoints(allocator, a, .nfd); 407 const nfd_a = try self.nfxdCodePoints(allocator, a, .nfd);
401 defer allocator.free(nfd_a); 408 defer allocator.free(nfd_a);
402 const cf_nfd_a = try self.caseFold(allocator, nfd_a); 409
403 defer allocator.free(cf_nfd_a); 410 var need_frr_cf_nfd_a = false;
411 var cf_nfd_a: []const u21 = nfd_a;
412 if (self.changesWhenCaseFolded(nfd_a)) {
413 cf_nfd_a = try self.caseFold(allocator, nfd_a);
414 need_frr_cf_nfd_a = true;
415 }
416 defer if (need_frr_cf_nfd_a) allocator.free(cf_nfd_a);
417
404 const nfkd_cf_nfd_a = try self.nfkdCodePoints(allocator, cf_nfd_a); 418 const nfkd_cf_nfd_a = try self.nfkdCodePoints(allocator, cf_nfd_a);
405 defer allocator.free(nfkd_cf_nfd_a); 419 defer allocator.free(nfkd_cf_nfd_a);
406 const cf_nfkd_cf_nfd_a = try self.caseFold(allocator, nfkd_cf_nfd_a); 420 const cf_nfkd_cf_nfd_a = try self.caseFold(allocator, nfkd_cf_nfd_a);
@@ -408,10 +422,18 @@ pub fn eqlIgnoreCase(
408 const nfkd_cf_nfkd_cf_nfd_a = try self.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a); 422 const nfkd_cf_nfkd_cf_nfd_a = try self.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a);
409 defer allocator.free(nfkd_cf_nfkd_cf_nfd_a); 423 defer allocator.free(nfkd_cf_nfkd_cf_nfd_a);
410 424
425 // Process b
411 const nfd_b = try self.nfxdCodePoints(allocator, b, .nfd); 426 const nfd_b = try self.nfxdCodePoints(allocator, b, .nfd);
412 defer allocator.free(nfd_b); 427 defer allocator.free(nfd_b);
413 const cf_nfd_b = try self.caseFold(allocator, nfd_b); 428
414 defer allocator.free(cf_nfd_b); 429 var need_frr_cf_nfd_b = false;
430 var cf_nfd_b: []const u21 = nfd_b;
431 if (self.changesWhenCaseFolded(nfd_b)) {
432 cf_nfd_b = try self.caseFold(allocator, nfd_b);
433 need_frr_cf_nfd_b = true;
434 }
435 defer if (need_frr_cf_nfd_b) allocator.free(cf_nfd_b);
436
415 const nfkd_cf_nfd_b = try self.nfkdCodePoints(allocator, cf_nfd_b); 437 const nfkd_cf_nfd_b = try self.nfkdCodePoints(allocator, cf_nfd_b);
416 defer allocator.free(nfkd_cf_nfd_b); 438 defer allocator.free(nfkd_cf_nfd_b);
417 const cf_nfkd_cf_nfd_b = try self.caseFold(allocator, nfkd_cf_nfd_b); 439 const cf_nfkd_cf_nfd_b = try self.caseFold(allocator, nfkd_cf_nfd_b);