diff options
Diffstat (limited to 'src/Normalizer.zig')
| -rw-r--r-- | src/Normalizer.zig | 98 |
1 files changed, 3 insertions, 95 deletions
diff --git a/src/Normalizer.zig b/src/Normalizer.zig index 5a26dfa..3ff157c 100644 --- a/src/Normalizer.zig +++ b/src/Normalizer.zig | |||
| @@ -18,7 +18,7 @@ const ascii = @import("ascii"); | |||
| 18 | const CodePointIterator = @import("code_point").Iterator; | 18 | const CodePointIterator = @import("code_point").Iterator; |
| 19 | pub const NormData = @import("NormData"); | 19 | pub const NormData = @import("NormData"); |
| 20 | 20 | ||
| 21 | norm_data: *NormData, | 21 | norm_data: *const NormData, |
| 22 | 22 | ||
| 23 | const Self = @This(); | 23 | const Self = @This(); |
| 24 | 24 | ||
| @@ -255,7 +255,7 @@ pub fn nfkd(self: Self, allocator: mem.Allocator, str: []const u8) !Result { | |||
| 255 | return self.nfxd(allocator, str, .nfkd); | 255 | return self.nfxd(allocator, str, .nfkd); |
| 256 | } | 256 | } |
| 257 | 257 | ||
| 258 | fn nfxdCodePoints(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) ![]u21 { | 258 | pub fn nfxdCodePoints(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) ![]u21 { |
| 259 | var dcp_list = std.ArrayList(u21).init(allocator); | 259 | var dcp_list = std.ArrayList(u21).init(allocator); |
| 260 | defer dcp_list.deinit(); | 260 | defer dcp_list.deinit(); |
| 261 | 261 | ||
| @@ -343,28 +343,7 @@ test "nfkd !ASCII / alloc" { | |||
| 343 | try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice); | 343 | try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice); |
| 344 | } | 344 | } |
| 345 | 345 | ||
| 346 | fn caseFold( | 346 | pub fn nfkdCodePoints( |
| 347 | self: Self, | ||
| 348 | allocator: mem.Allocator, | ||
| 349 | cps: []const u21, | ||
| 350 | ) ![]const u21 { | ||
| 351 | var cfcps = std.ArrayList(u21).init(allocator); | ||
| 352 | defer cfcps.deinit(); | ||
| 353 | |||
| 354 | for (cps) |cp| { | ||
| 355 | const cf = self.norm_data.fold_data.caseFold(cp); | ||
| 356 | |||
| 357 | if (cf.len == 0) { | ||
| 358 | try cfcps.append(cp); | ||
| 359 | } else { | ||
| 360 | try cfcps.appendSlice(cf); | ||
| 361 | } | ||
| 362 | } | ||
| 363 | |||
| 364 | return try cfcps.toOwnedSlice(); | ||
| 365 | } | ||
| 366 | |||
| 367 | fn nfkdCodePoints( | ||
| 368 | self: Self, | 347 | self: Self, |
| 369 | allocator: mem.Allocator, | 348 | allocator: mem.Allocator, |
| 370 | cps: []const u21, | 349 | cps: []const u21, |
| @@ -389,77 +368,6 @@ fn nfkdCodePoints( | |||
| 389 | return try dcp_list.toOwnedSlice(); | 368 | return try dcp_list.toOwnedSlice(); |
| 390 | } | 369 | } |
| 391 | 370 | ||
| 392 | fn changesWhenCaseFolded(self: Self, cps: []const u21) bool { | ||
| 393 | return for (cps) |cp| { | ||
| 394 | if (self.norm_data.fold_data.changesWhenCaseFolded(cp)) break true; | ||
| 395 | } else false; | ||
| 396 | } | ||
| 397 | |||
| 398 | pub fn eqlIgnoreCase( | ||
| 399 | self: Self, | ||
| 400 | allocator: mem.Allocator, | ||
| 401 | a: []const u8, | ||
| 402 | b: []const u8, | ||
| 403 | ) !bool { | ||
| 404 | if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); | ||
| 405 | |||
| 406 | // Process a | ||
| 407 | const nfd_a = try self.nfxdCodePoints(allocator, a, .nfd); | ||
| 408 | defer allocator.free(nfd_a); | ||
| 409 | |||
| 410 | var need_frr_cf_nfd_a = false; | ||
| 411 | var cf_nfd_a: []const u21 = nfd_a; | ||
| 412 | if (self.changesWhenCaseFolded(nfd_a)) { | ||
| 413 | cf_nfd_a = try self.caseFold(allocator, nfd_a); | ||
| 414 | need_frr_cf_nfd_a = true; | ||
| 415 | } | ||
| 416 | defer if (need_frr_cf_nfd_a) allocator.free(cf_nfd_a); | ||
| 417 | |||
| 418 | const nfkd_cf_nfd_a = try self.nfkdCodePoints(allocator, cf_nfd_a); | ||
| 419 | defer allocator.free(nfkd_cf_nfd_a); | ||
| 420 | const cf_nfkd_cf_nfd_a = try self.caseFold(allocator, nfkd_cf_nfd_a); | ||
| 421 | defer allocator.free(cf_nfkd_cf_nfd_a); | ||
| 422 | const nfkd_cf_nfkd_cf_nfd_a = try self.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a); | ||
| 423 | defer allocator.free(nfkd_cf_nfkd_cf_nfd_a); | ||
| 424 | |||
| 425 | // Process b | ||
| 426 | const nfd_b = try self.nfxdCodePoints(allocator, b, .nfd); | ||
| 427 | defer allocator.free(nfd_b); | ||
| 428 | |||
| 429 | var need_frr_cf_nfd_b = false; | ||
| 430 | var cf_nfd_b: []const u21 = nfd_b; | ||
| 431 | if (self.changesWhenCaseFolded(nfd_b)) { | ||
| 432 | cf_nfd_b = try self.caseFold(allocator, nfd_b); | ||
| 433 | need_frr_cf_nfd_b = true; | ||
| 434 | } | ||
| 435 | defer if (need_frr_cf_nfd_b) allocator.free(cf_nfd_b); | ||
| 436 | |||
| 437 | const nfkd_cf_nfd_b = try self.nfkdCodePoints(allocator, cf_nfd_b); | ||
| 438 | defer allocator.free(nfkd_cf_nfd_b); | ||
| 439 | const cf_nfkd_cf_nfd_b = try self.caseFold(allocator, nfkd_cf_nfd_b); | ||
| 440 | defer allocator.free(cf_nfkd_cf_nfd_b); | ||
| 441 | const nfkd_cf_nfkd_cf_nfd_b = try self.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b); | ||
| 442 | defer allocator.free(nfkd_cf_nfkd_cf_nfd_b); | ||
| 443 | |||
| 444 | return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b); | ||
| 445 | } | ||
| 446 | |||
| 447 | test "eqlIgnoreCase" { | ||
| 448 | const allocator = testing.allocator; | ||
| 449 | var data = try NormData.init(allocator); | ||
| 450 | defer data.deinit(); | ||
| 451 | var n = Self{ .norm_data = &data }; | ||
| 452 | |||
| 453 | try testing.expect(try n.eqlIgnoreCase(allocator, "ascii only!", "ASCII Only!")); | ||
| 454 | |||
| 455 | const a = "Héllo World! \u{3d3}"; | ||
| 456 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; | ||
| 457 | try testing.expect(try n.eqlIgnoreCase(allocator, a, b)); | ||
| 458 | |||
| 459 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; | ||
| 460 | try testing.expect(try n.eqlIgnoreCase(allocator, a, c)); | ||
| 461 | } | ||
| 462 | |||
| 463 | // Composition (NFC, NFKC) | 371 | // Composition (NFC, NFKC) |
| 464 | 372 | ||
| 465 | fn isHangul(self: Self, cp: u21) bool { | 373 | fn isHangul(self: Self, cp: u21) bool { |