summaryrefslogtreecommitdiff
path: root/src/Normalizer.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/Normalizer.zig')
-rw-r--r--src/Normalizer.zig98
1 files changed, 3 insertions, 95 deletions
diff --git a/src/Normalizer.zig b/src/Normalizer.zig
index 5a26dfa..3ff157c 100644
--- a/src/Normalizer.zig
+++ b/src/Normalizer.zig
@@ -18,7 +18,7 @@ const ascii = @import("ascii");
18const CodePointIterator = @import("code_point").Iterator; 18const CodePointIterator = @import("code_point").Iterator;
19pub const NormData = @import("NormData"); 19pub const NormData = @import("NormData");
20 20
21norm_data: *NormData, 21norm_data: *const NormData,
22 22
23const Self = @This(); 23const Self = @This();
24 24
@@ -255,7 +255,7 @@ pub fn nfkd(self: Self, allocator: mem.Allocator, str: []const u8) !Result {
255 return self.nfxd(allocator, str, .nfkd); 255 return self.nfxd(allocator, str, .nfkd);
256} 256}
257 257
258fn nfxdCodePoints(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) ![]u21 { 258pub fn nfxdCodePoints(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) ![]u21 {
259 var dcp_list = std.ArrayList(u21).init(allocator); 259 var dcp_list = std.ArrayList(u21).init(allocator);
260 defer dcp_list.deinit(); 260 defer dcp_list.deinit();
261 261
@@ -343,28 +343,7 @@ test "nfkd !ASCII / alloc" {
343 try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice); 343 try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice);
344} 344}
345 345
346fn caseFold( 346pub fn nfkdCodePoints(
347 self: Self,
348 allocator: mem.Allocator,
349 cps: []const u21,
350) ![]const u21 {
351 var cfcps = std.ArrayList(u21).init(allocator);
352 defer cfcps.deinit();
353
354 for (cps) |cp| {
355 const cf = self.norm_data.fold_data.caseFold(cp);
356
357 if (cf.len == 0) {
358 try cfcps.append(cp);
359 } else {
360 try cfcps.appendSlice(cf);
361 }
362 }
363
364 return try cfcps.toOwnedSlice();
365}
366
367fn nfkdCodePoints(
368 self: Self, 347 self: Self,
369 allocator: mem.Allocator, 348 allocator: mem.Allocator,
370 cps: []const u21, 349 cps: []const u21,
@@ -389,77 +368,6 @@ fn nfkdCodePoints(
389 return try dcp_list.toOwnedSlice(); 368 return try dcp_list.toOwnedSlice();
390} 369}
391 370
392fn changesWhenCaseFolded(self: Self, cps: []const u21) bool {
393 return for (cps) |cp| {
394 if (self.norm_data.fold_data.changesWhenCaseFolded(cp)) break true;
395 } else false;
396}
397
398pub fn eqlIgnoreCase(
399 self: Self,
400 allocator: mem.Allocator,
401 a: []const u8,
402 b: []const u8,
403) !bool {
404 if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b);
405
406 // Process a
407 const nfd_a = try self.nfxdCodePoints(allocator, a, .nfd);
408 defer allocator.free(nfd_a);
409
410 var need_frr_cf_nfd_a = false;
411 var cf_nfd_a: []const u21 = nfd_a;
412 if (self.changesWhenCaseFolded(nfd_a)) {
413 cf_nfd_a = try self.caseFold(allocator, nfd_a);
414 need_frr_cf_nfd_a = true;
415 }
416 defer if (need_frr_cf_nfd_a) allocator.free(cf_nfd_a);
417
418 const nfkd_cf_nfd_a = try self.nfkdCodePoints(allocator, cf_nfd_a);
419 defer allocator.free(nfkd_cf_nfd_a);
420 const cf_nfkd_cf_nfd_a = try self.caseFold(allocator, nfkd_cf_nfd_a);
421 defer allocator.free(cf_nfkd_cf_nfd_a);
422 const nfkd_cf_nfkd_cf_nfd_a = try self.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a);
423 defer allocator.free(nfkd_cf_nfkd_cf_nfd_a);
424
425 // Process b
426 const nfd_b = try self.nfxdCodePoints(allocator, b, .nfd);
427 defer allocator.free(nfd_b);
428
429 var need_frr_cf_nfd_b = false;
430 var cf_nfd_b: []const u21 = nfd_b;
431 if (self.changesWhenCaseFolded(nfd_b)) {
432 cf_nfd_b = try self.caseFold(allocator, nfd_b);
433 need_frr_cf_nfd_b = true;
434 }
435 defer if (need_frr_cf_nfd_b) allocator.free(cf_nfd_b);
436
437 const nfkd_cf_nfd_b = try self.nfkdCodePoints(allocator, cf_nfd_b);
438 defer allocator.free(nfkd_cf_nfd_b);
439 const cf_nfkd_cf_nfd_b = try self.caseFold(allocator, nfkd_cf_nfd_b);
440 defer allocator.free(cf_nfkd_cf_nfd_b);
441 const nfkd_cf_nfkd_cf_nfd_b = try self.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b);
442 defer allocator.free(nfkd_cf_nfkd_cf_nfd_b);
443
444 return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b);
445}
446
447test "eqlIgnoreCase" {
448 const allocator = testing.allocator;
449 var data = try NormData.init(allocator);
450 defer data.deinit();
451 var n = Self{ .norm_data = &data };
452
453 try testing.expect(try n.eqlIgnoreCase(allocator, "ascii only!", "ASCII Only!"));
454
455 const a = "Héllo World! \u{3d3}";
456 const b = "He\u{301}llo World! \u{3a5}\u{301}";
457 try testing.expect(try n.eqlIgnoreCase(allocator, a, b));
458
459 const c = "He\u{301}llo World! \u{3d2}\u{301}";
460 try testing.expect(try n.eqlIgnoreCase(allocator, a, c));
461}
462
463// Composition (NFC, NFKC) 371// Composition (NFC, NFKC)
464 372
465fn isHangul(self: Self, cp: u21) bool { 373fn isHangul(self: Self, cp: u21) bool {