diff options
| author | 2024-03-01 19:28:41 -0400 | |
|---|---|---|
| committer | 2024-03-01 19:28:41 -0400 | |
| commit | 9b435e69cb9f1572728b38457fabc9636bc47143 (patch) | |
| tree | fabc23d4f122906457ab8e389ef44c23205d506e /src | |
| parent | Normalizer.eqlIgnoreCase compatibility caseless matching (diff) | |
| download | zg-9b435e69cb9f1572728b38457fabc9636bc47143.tar.gz zg-9b435e69cb9f1572728b38457fabc9636bc47143.tar.xz zg-9b435e69cb9f1572728b38457fabc9636bc47143.zip | |
Changes when case folded check; 20ms faster
Diffstat (limited to 'src')
| -rw-r--r-- | src/FoldData.zig | 14 | ||||
| -rw-r--r-- | src/Normalizer.zig | 30 |
2 files changed, 38 insertions, 6 deletions
diff --git a/src/FoldData.zig b/src/FoldData.zig index 139c677..2a9a1f5 100644 --- a/src/FoldData.zig +++ b/src/FoldData.zig | |||
| @@ -5,6 +5,7 @@ const mem = std.mem; | |||
| 5 | 5 | ||
| 6 | allocator: mem.Allocator, | 6 | allocator: mem.Allocator, |
| 7 | fold: [][]u21 = undefined, | 7 | fold: [][]u21 = undefined, |
| 8 | cwcf: []bool = undefined, | ||
| 8 | 9 | ||
| 9 | const Self = @This(); | 10 | const Self = @This(); |
| 10 | 11 | ||
| @@ -20,18 +21,21 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 20 | var self = Self{ | 21 | var self = Self{ |
| 21 | .allocator = allocator, | 22 | .allocator = allocator, |
| 22 | .fold = try allocator.alloc([]u21, 0x110000), | 23 | .fold = try allocator.alloc([]u21, 0x110000), |
| 24 | .cwcf = try allocator.alloc(bool, 0x110000), | ||
| 23 | }; | 25 | }; |
| 24 | 26 | ||
| 25 | @memset(self.fold, &.{}); | 27 | @memset(self.fold, &.{}); |
| 28 | @memset(self.cwcf, false); | ||
| 26 | 29 | ||
| 27 | while (true) { | 30 | while (true) { |
| 28 | const len: u8 = try reader.readInt(u8, endian); | 31 | const len: u8 = try reader.readInt(u8, endian); |
| 29 | if (len == 0) break; | 32 | if (len == 0) break; |
| 30 | const cp = try reader.readInt(u24, endian); | 33 | const cp = try reader.readInt(u24, endian); |
| 31 | self.fold[cp] = try allocator.alloc(u21, len - 1); | 34 | self.fold[cp >> 1] = try allocator.alloc(u21, len - 1); |
| 32 | for (0..len - 1) |i| { | 35 | for (0..len - 1) |i| { |
| 33 | self.fold[cp][i] = @intCast(try reader.readInt(u24, endian)); | 36 | self.fold[cp >> 1][i] = @intCast(try reader.readInt(u24, endian)); |
| 34 | } | 37 | } |
| 38 | self.cwcf[cp >> 1] = cp & 1 == 1; | ||
| 35 | } | 39 | } |
| 36 | 40 | ||
| 37 | return self; | 41 | return self; |
| @@ -40,9 +44,15 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 40 | pub fn deinit(self: *Self) void { | 44 | pub fn deinit(self: *Self) void { |
| 41 | for (self.fold) |slice| self.allocator.free(slice); | 45 | for (self.fold) |slice| self.allocator.free(slice); |
| 42 | self.allocator.free(self.fold); | 46 | self.allocator.free(self.fold); |
| 47 | self.allocator.free(self.cwcf); | ||
| 43 | } | 48 | } |
| 44 | 49 | ||
| 45 | /// Returns the case fold for `cp`. | 50 | /// Returns the case fold for `cp`. |
| 46 | pub inline fn caseFold(self: Self, cp: u21) []const u21 { | 51 | pub inline fn caseFold(self: Self, cp: u21) []const u21 { |
| 47 | return self.fold[cp]; | 52 | return self.fold[cp]; |
| 48 | } | 53 | } |
| 54 | |||
| 55 | /// Returns true when caseFold(NFD(`cp`)) != NFD(`cp`). | ||
| 56 | pub inline fn changesWhenCaseFolded(self: Self, cp: u21) bool { | ||
| 57 | return self.cwcf[cp]; | ||
| 58 | } | ||
diff --git a/src/Normalizer.zig b/src/Normalizer.zig index c68b2ec..5a26dfa 100644 --- a/src/Normalizer.zig +++ b/src/Normalizer.zig | |||
| @@ -389,6 +389,12 @@ fn nfkdCodePoints( | |||
| 389 | return try dcp_list.toOwnedSlice(); | 389 | return try dcp_list.toOwnedSlice(); |
| 390 | } | 390 | } |
| 391 | 391 | ||
| 392 | fn changesWhenCaseFolded(self: Self, cps: []const u21) bool { | ||
| 393 | return for (cps) |cp| { | ||
| 394 | if (self.norm_data.fold_data.changesWhenCaseFolded(cp)) break true; | ||
| 395 | } else false; | ||
| 396 | } | ||
| 397 | |||
| 392 | pub fn eqlIgnoreCase( | 398 | pub fn eqlIgnoreCase( |
| 393 | self: Self, | 399 | self: Self, |
| 394 | allocator: mem.Allocator, | 400 | allocator: mem.Allocator, |
| @@ -397,10 +403,18 @@ pub fn eqlIgnoreCase( | |||
| 397 | ) !bool { | 403 | ) !bool { |
| 398 | if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); | 404 | if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); |
| 399 | 405 | ||
| 406 | // Process a | ||
| 400 | const nfd_a = try self.nfxdCodePoints(allocator, a, .nfd); | 407 | const nfd_a = try self.nfxdCodePoints(allocator, a, .nfd); |
| 401 | defer allocator.free(nfd_a); | 408 | defer allocator.free(nfd_a); |
| 402 | const cf_nfd_a = try self.caseFold(allocator, nfd_a); | 409 | |
| 403 | defer allocator.free(cf_nfd_a); | 410 | var need_frr_cf_nfd_a = false; |
| 411 | var cf_nfd_a: []const u21 = nfd_a; | ||
| 412 | if (self.changesWhenCaseFolded(nfd_a)) { | ||
| 413 | cf_nfd_a = try self.caseFold(allocator, nfd_a); | ||
| 414 | need_frr_cf_nfd_a = true; | ||
| 415 | } | ||
| 416 | defer if (need_frr_cf_nfd_a) allocator.free(cf_nfd_a); | ||
| 417 | |||
| 404 | const nfkd_cf_nfd_a = try self.nfkdCodePoints(allocator, cf_nfd_a); | 418 | const nfkd_cf_nfd_a = try self.nfkdCodePoints(allocator, cf_nfd_a); |
| 405 | defer allocator.free(nfkd_cf_nfd_a); | 419 | defer allocator.free(nfkd_cf_nfd_a); |
| 406 | const cf_nfkd_cf_nfd_a = try self.caseFold(allocator, nfkd_cf_nfd_a); | 420 | const cf_nfkd_cf_nfd_a = try self.caseFold(allocator, nfkd_cf_nfd_a); |
| @@ -408,10 +422,18 @@ pub fn eqlIgnoreCase( | |||
| 408 | const nfkd_cf_nfkd_cf_nfd_a = try self.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a); | 422 | const nfkd_cf_nfkd_cf_nfd_a = try self.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a); |
| 409 | defer allocator.free(nfkd_cf_nfkd_cf_nfd_a); | 423 | defer allocator.free(nfkd_cf_nfkd_cf_nfd_a); |
| 410 | 424 | ||
| 425 | // Process b | ||
| 411 | const nfd_b = try self.nfxdCodePoints(allocator, b, .nfd); | 426 | const nfd_b = try self.nfxdCodePoints(allocator, b, .nfd); |
| 412 | defer allocator.free(nfd_b); | 427 | defer allocator.free(nfd_b); |
| 413 | const cf_nfd_b = try self.caseFold(allocator, nfd_b); | 428 | |
| 414 | defer allocator.free(cf_nfd_b); | 429 | var need_frr_cf_nfd_b = false; |
| 430 | var cf_nfd_b: []const u21 = nfd_b; | ||
| 431 | if (self.changesWhenCaseFolded(nfd_b)) { | ||
| 432 | cf_nfd_b = try self.caseFold(allocator, nfd_b); | ||
| 433 | need_frr_cf_nfd_b = true; | ||
| 434 | } | ||
| 435 | defer if (need_frr_cf_nfd_b) allocator.free(cf_nfd_b); | ||
| 436 | |||
| 415 | const nfkd_cf_nfd_b = try self.nfkdCodePoints(allocator, cf_nfd_b); | 437 | const nfkd_cf_nfd_b = try self.nfkdCodePoints(allocator, cf_nfd_b); |
| 416 | defer allocator.free(nfkd_cf_nfd_b); | 438 | defer allocator.free(nfkd_cf_nfd_b); |
| 417 | const cf_nfkd_cf_nfd_b = try self.caseFold(allocator, nfkd_cf_nfd_b); | 439 | const cf_nfkd_cf_nfd_b = try self.caseFold(allocator, nfkd_cf_nfd_b); |