summaryrefslogtreecommitdiff
path: root/src/Normalize.zig
diff options
context:
space:
mode:
authorGravatar Sam Atman2025-04-30 12:58:26 -0400
committerGravatar Sam Atman2025-04-30 13:01:37 -0400
commit3c2c30bfbe861c6c48acd8d7507886787197a788 (patch)
tree875ba35c1954b201207452b18a189ebd70c0b596 /src/Normalize.zig
parentgrapheme now Graphemes, Data files gone (diff)
downloadzg-3c2c30bfbe861c6c48acd8d7507886787197a788.tar.gz
zg-3c2c30bfbe861c6c48acd8d7507886787197a788.tar.xz
zg-3c2c30bfbe861c6c48acd8d7507886787197a788.zip
Merge NormData with Normalize
Diffstat (limited to 'src/Normalize.zig')
-rw-r--r--src/Normalize.zig193
1 files changed, 108 insertions, 85 deletions
diff --git a/src/Normalize.zig b/src/Normalize.zig
index b738b27..4f014cf 100644
--- a/src/Normalize.zig
+++ b/src/Normalize.zig
@@ -2,23 +2,41 @@
2//! Unicode Normalization. You can normalize strings into NFC, 2//! Unicode Normalization. You can normalize strings into NFC,
3//! NFKC, NFD, and NFKD normalization forms. 3//! NFKC, NFD, and NFKD normalization forms.
4 4
5const std = @import("std"); 5canon_data: CanonData = undefined,
6const debug = std.debug; 6ccc_data: CccData = undefined,
7const assert = debug.assert; 7compat_data: CompatData = undefined,
8const fmt = std.fmt; 8hangul_data: HangulData = undefined,
9const heap = std.heap; 9normp_data: NormPropsData = undefined,
10const mem = std.mem; 10
11const simd = std.simd; 11const Normalize = @This();
12const testing = std.testing; 12
13const unicode = std.unicode; 13pub fn init(allocator: Allocator) !Normalize {
14 14 var norm: Normalize = undefined;
15const ascii = @import("ascii"); 15 try norm.setup(allocator);
16const CodePointIterator = @import("code_point").Iterator; 16 return norm;
17pub const NormData = @import("NormData"); 17}
18 18
19norm_data: *const NormData, 19pub fn setup(self: *Normalize, allocator: Allocator) !void {
20 self.canon_data = try CanonData.init(allocator);
21 errdefer self.canon_data.deinit(allocator);
22 self.ccc_data = try CccData.init(allocator);
23 errdefer self.ccc_data.deinit(allocator);
24 self.compat_data = try CompatData.init(allocator);
25 errdefer self.compat_data.deinit(allocator);
26 self.hangul_data = try HangulData.init(allocator);
27 errdefer self.hangul_data.deinit(allocator);
28 self.normp_data = try NormPropsData.init(allocator);
29}
20 30
21const Self = @This(); 31pub fn deinit(norm: *const Normalize, allocator: Allocator) void {
32 // Reasonably safe (?)
33 var mut_norm = @constCast(norm);
34 mut_norm.canon_data.deinit(allocator);
35 mut_norm.ccc_data.deinit(allocator);
36 mut_norm.compat_data.deinit(allocator);
37 mut_norm.hangul_data.deinit(allocator);
38 mut_norm.normp_data.deinit(allocator);
39}
22 40
23const SBase: u21 = 0xAC00; 41const SBase: u21 = 0xAC00;
24const LBase: u21 = 0x1100; 42const LBase: u21 = 0x1100;
@@ -30,8 +48,8 @@ const TCount: u21 = 28;
30const NCount: u21 = 588; // VCount * TCount 48const NCount: u21 = 588; // VCount * TCount
31const SCount: u21 = 11172; // LCount * NCount 49const SCount: u21 = 11172; // LCount * NCount
32 50
33fn decomposeHangul(self: Self, cp: u21, buf: []u21) ?Decomp { 51fn decomposeHangul(self: Normalize, cp: u21, buf: []u21) ?Decomp {
34 const kind = self.norm_data.hangul_data.syllable(cp); 52 const kind = self.hangul_data.syllable(cp);
35 if (kind != .LV and kind != .LVT) return null; 53 if (kind != .LV and kind != .LVT) return null;
36 54
37 const SIndex: u21 = cp - SBase; 55 const SIndex: u21 = cp - SBase;
@@ -90,21 +108,21 @@ const Decomp = struct {
90}; 108};
91 109
92// `mapping` retrieves the decomposition mapping for a code point as per the UCD. 110// `mapping` retrieves the decomposition mapping for a code point as per the UCD.
93fn mapping(self: Self, cp: u21, form: Form) Decomp { 111fn mapping(self: Normalize, cp: u21, form: Form) Decomp {
94 var dc = Decomp{}; 112 var dc = Decomp{};
95 113
96 switch (form) { 114 switch (form) {
97 .nfd => { 115 .nfd => {
98 dc.cps = self.norm_data.canon_data.toNfd(cp); 116 dc.cps = self.canon_data.toNfd(cp);
99 if (dc.cps.len != 0) dc.form = .nfd; 117 if (dc.cps.len != 0) dc.form = .nfd;
100 }, 118 },
101 119
102 .nfkd => { 120 .nfkd => {
103 dc.cps = self.norm_data.compat_data.toNfkd(cp); 121 dc.cps = self.compat_data.toNfkd(cp);
104 if (dc.cps.len != 0) { 122 if (dc.cps.len != 0) {
105 dc.form = .nfkd; 123 dc.form = .nfkd;
106 } else { 124 } else {
107 dc.cps = self.norm_data.canon_data.toNfd(cp); 125 dc.cps = self.canon_data.toNfd(cp);
108 if (dc.cps.len != 0) dc.form = .nfkd; 126 if (dc.cps.len != 0) dc.form = .nfkd;
109 } 127 }
110 }, 128 },
@@ -117,7 +135,7 @@ fn mapping(self: Self, cp: u21, form: Form) Decomp {
117 135
118// `decompose` a code point to the specified normalization form, which should be either `.nfd` or `.nfkd`. 136// `decompose` a code point to the specified normalization form, which should be either `.nfd` or `.nfkd`.
119fn decompose( 137fn decompose(
120 self: Self, 138 self: Normalize,
121 cp: u21, 139 cp: u21,
122 form: Form, 140 form: Form,
123 buf: []u21, 141 buf: []u21,
@@ -127,8 +145,8 @@ fn decompose(
127 145
128 // NFD / NFKD quick checks. 146 // NFD / NFKD quick checks.
129 switch (form) { 147 switch (form) {
130 .nfd => if (self.norm_data.normp_data.isNfd(cp)) return .{}, 148 .nfd => if (self.normp_data.isNfd(cp)) return .{},
131 .nfkd => if (self.norm_data.normp_data.isNfkd(cp)) return .{}, 149 .nfkd => if (self.normp_data.isNfkd(cp)) return .{},
132 else => @panic("Normalizer.decompose only accepts form .nfd or .nfkd."), 150 else => @panic("Normalizer.decompose only accepts form .nfd or .nfkd."),
133 } 151 }
134 152
@@ -175,10 +193,8 @@ fn decompose(
175 193
176test "decompose" { 194test "decompose" {
177 const allocator = testing.allocator; 195 const allocator = testing.allocator;
178 var data: NormData = undefined; 196 const n = try Normalize.init(allocator);
179 try NormData.init(&data, allocator); 197 defer n.deinit(allocator);
180 defer data.deinit(allocator);
181 var n = Self{ .norm_data = &data };
182 198
183 var buf: [18]u21 = undefined; 199 var buf: [18]u21 = undefined;
184 200
@@ -228,42 +244,42 @@ pub const Result = struct {
228 slice: []const u8, 244 slice: []const u8,
229 245
230 /// Ensures that the slice result is a copy of the input, by making a copy if it was not. 246 /// Ensures that the slice result is a copy of the input, by making a copy if it was not.
231 pub fn toOwned(result: Result, allocator: mem.Allocator) error{OutOfMemory}!Result { 247 pub fn toOwned(result: Result, allocator: Allocator) error{OutOfMemory}!Result {
232 if (result.allocated) return result; 248 if (result.allocated) return result;
233 return .{ .allocated = true, .slice = try allocator.dupe(u8, result.slice) }; 249 return .{ .allocated = true, .slice = try allocator.dupe(u8, result.slice) };
234 } 250 }
235 251
236 pub fn deinit(self: *const Result, allocator: mem.Allocator) void { 252 pub fn deinit(self: *const Result, allocator: Allocator) void {
237 if (self.allocated) allocator.free(self.slice); 253 if (self.allocated) allocator.free(self.slice);
238 } 254 }
239}; 255};
240 256
241// Compares code points by Canonical Combining Class order. 257// Compares code points by Canonical Combining Class order.
242fn cccLess(self: Self, lhs: u21, rhs: u21) bool { 258fn cccLess(self: Normalize, lhs: u21, rhs: u21) bool {
243 return self.norm_data.ccc_data.ccc(lhs) < self.norm_data.ccc_data.ccc(rhs); 259 return self.ccc_data.ccc(lhs) < self.ccc_data.ccc(rhs);
244} 260}
245 261
246// Applies the Canonical Sorting Algorithm. 262// Applies the Canonical Sorting Algorithm.
247fn canonicalSort(self: Self, cps: []u21) void { 263fn canonicalSort(self: Normalize, cps: []u21) void {
248 var i: usize = 0; 264 var i: usize = 0;
249 while (i < cps.len) : (i += 1) { 265 while (i < cps.len) : (i += 1) {
250 const start: usize = i; 266 const start: usize = i;
251 while (i < cps.len and self.norm_data.ccc_data.ccc(cps[i]) != 0) : (i += 1) {} 267 while (i < cps.len and self.ccc_data.ccc(cps[i]) != 0) : (i += 1) {}
252 mem.sort(u21, cps[start..i], self, cccLess); 268 mem.sort(u21, cps[start..i], self, cccLess);
253 } 269 }
254} 270}
255 271
256/// Normalize `str` to NFD. 272/// Normalize `str` to NFD.
257pub fn nfd(self: Self, allocator: mem.Allocator, str: []const u8) mem.Allocator.Error!Result { 273pub fn nfd(self: Normalize, allocator: Allocator, str: []const u8) Allocator.Error!Result {
258 return self.nfxd(allocator, str, .nfd); 274 return self.nfxd(allocator, str, .nfd);
259} 275}
260 276
261/// Normalize `str` to NFKD. 277/// Normalize `str` to NFKD.
262pub fn nfkd(self: Self, allocator: mem.Allocator, str: []const u8) mem.Allocator.Error!Result { 278pub fn nfkd(self: Normalize, allocator: Allocator, str: []const u8) Allocator.Error!Result {
263 return self.nfxd(allocator, str, .nfkd); 279 return self.nfxd(allocator, str, .nfkd);
264} 280}
265 281
266pub fn nfxdCodePoints(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) mem.Allocator.Error![]u21 { 282pub fn nfxdCodePoints(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allocator.Error![]u21 {
267 var dcp_list = std.ArrayList(u21).init(allocator); 283 var dcp_list = std.ArrayList(u21).init(allocator);
268 defer dcp_list.deinit(); 284 defer dcp_list.deinit();
269 285
@@ -284,7 +300,7 @@ pub fn nfxdCodePoints(self: Self, allocator: mem.Allocator, str: []const u8, for
284 return try dcp_list.toOwnedSlice(); 300 return try dcp_list.toOwnedSlice();
285} 301}
286 302
287fn nfxd(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) mem.Allocator.Error!Result { 303fn nfxd(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allocator.Error!Result {
288 // Quick checks. 304 // Quick checks.
289 if (ascii.isAsciiOnly(str)) return Result{ .slice = str }; 305 if (ascii.isAsciiOnly(str)) return Result{ .slice = str };
290 306
@@ -305,10 +321,8 @@ fn nfxd(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) mem.A
305 321
306test "nfd ASCII / no-alloc" { 322test "nfd ASCII / no-alloc" {
307 const allocator = testing.allocator; 323 const allocator = testing.allocator;
308 var data: NormData = undefined; 324 const n = try Normalize.init(allocator);
309 try NormData.init(&data, allocator); 325 defer n.deinit(allocator);
310 defer data.deinit(allocator);
311 const n = Self{ .norm_data = &data };
312 326
313 const result = try n.nfd(allocator, "Hello World!"); 327 const result = try n.nfd(allocator, "Hello World!");
314 defer result.deinit(allocator); 328 defer result.deinit(allocator);
@@ -318,10 +332,8 @@ test "nfd ASCII / no-alloc" {
318 332
319test "nfd !ASCII / alloc" { 333test "nfd !ASCII / alloc" {
320 const allocator = testing.allocator; 334 const allocator = testing.allocator;
321 var data: NormData = undefined; 335 const n = try Normalize.init(allocator);
322 try NormData.init(&data, allocator); 336 defer n.deinit(allocator);
323 defer data.deinit(allocator);
324 const n = Self{ .norm_data = &data };
325 337
326 const result = try n.nfd(allocator, "Héllo World! \u{3d3}"); 338 const result = try n.nfd(allocator, "Héllo World! \u{3d3}");
327 defer result.deinit(allocator); 339 defer result.deinit(allocator);
@@ -331,10 +343,8 @@ test "nfd !ASCII / alloc" {
331 343
332test "nfkd ASCII / no-alloc" { 344test "nfkd ASCII / no-alloc" {
333 const allocator = testing.allocator; 345 const allocator = testing.allocator;
334 var data: NormData = undefined; 346 const n = try Normalize.init(allocator);
335 try NormData.init(&data, allocator); 347 defer n.deinit(allocator);
336 defer data.deinit(allocator);
337 const n = Self{ .norm_data = &data };
338 348
339 const result = try n.nfkd(allocator, "Hello World!"); 349 const result = try n.nfkd(allocator, "Hello World!");
340 defer result.deinit(allocator); 350 defer result.deinit(allocator);
@@ -344,10 +354,8 @@ test "nfkd ASCII / no-alloc" {
344 354
345test "nfkd !ASCII / alloc" { 355test "nfkd !ASCII / alloc" {
346 const allocator = testing.allocator; 356 const allocator = testing.allocator;
347 var data: NormData = undefined; 357 const n = try Normalize.init(allocator);
348 try NormData.init(&data, allocator); 358 defer n.deinit(allocator);
349 defer data.deinit(allocator);
350 const n = Self{ .norm_data = &data };
351 359
352 const result = try n.nfkd(allocator, "Héllo World! \u{3d3}"); 360 const result = try n.nfkd(allocator, "Héllo World! \u{3d3}");
353 defer result.deinit(allocator); 361 defer result.deinit(allocator);
@@ -356,10 +364,10 @@ test "nfkd !ASCII / alloc" {
356} 364}
357 365
358pub fn nfdCodePoints( 366pub fn nfdCodePoints(
359 self: Self, 367 self: Normalize,
360 allocator: mem.Allocator, 368 allocator: Allocator,
361 cps: []const u21, 369 cps: []const u21,
362) mem.Allocator.Error![]u21 { 370) Allocator.Error![]u21 {
363 var dcp_list = std.ArrayList(u21).init(allocator); 371 var dcp_list = std.ArrayList(u21).init(allocator);
364 defer dcp_list.deinit(); 372 defer dcp_list.deinit();
365 373
@@ -381,10 +389,10 @@ pub fn nfdCodePoints(
381} 389}
382 390
383pub fn nfkdCodePoints( 391pub fn nfkdCodePoints(
384 self: Self, 392 self: Normalize,
385 allocator: mem.Allocator, 393 allocator: Allocator,
386 cps: []const u21, 394 cps: []const u21,
387) mem.Allocator.Error![]u21 { 395) Allocator.Error![]u21 {
388 var dcp_list = std.ArrayList(u21).init(allocator); 396 var dcp_list = std.ArrayList(u21).init(allocator);
389 defer dcp_list.deinit(); 397 defer dcp_list.deinit();
390 398
@@ -407,21 +415,21 @@ pub fn nfkdCodePoints(
407 415
408// Composition (NFC, NFKC) 416// Composition (NFC, NFKC)
409 417
410fn isHangul(self: Self, cp: u21) bool { 418fn isHangul(self: Normalize, cp: u21) bool {
411 return cp >= 0x1100 and self.norm_data.hangul_data.syllable(cp) != .none; 419 return cp >= 0x1100 and self.hangul_data.syllable(cp) != .none;
412} 420}
413 421
414/// Normalizes `str` to NFC. 422/// Normalizes `str` to NFC.
415pub fn nfc(self: Self, allocator: mem.Allocator, str: []const u8) mem.Allocator.Error!Result { 423pub fn nfc(self: Normalize, allocator: Allocator, str: []const u8) Allocator.Error!Result {
416 return self.nfxc(allocator, str, .nfc); 424 return self.nfxc(allocator, str, .nfc);
417} 425}
418 426
419/// Normalizes `str` to NFKC. 427/// Normalizes `str` to NFKC.
420pub fn nfkc(self: Self, allocator: mem.Allocator, str: []const u8) mem.Allocator.Error!Result { 428pub fn nfkc(self: Normalize, allocator: Allocator, str: []const u8) Allocator.Error!Result {
421 return self.nfxc(allocator, str, .nfkc); 429 return self.nfxc(allocator, str, .nfkc);
422} 430}
423 431
424fn nfxc(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) mem.Allocator.Error!Result { 432fn nfxc(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allocator.Error!Result {
425 // Quick checks. 433 // Quick checks.
426 if (ascii.isAsciiOnly(str)) return Result{ .slice = str }; 434 if (ascii.isAsciiOnly(str)) return Result{ .slice = str };
427 if (form == .nfc and isLatin1Only(str)) return Result{ .slice = str }; 435 if (form == .nfc and isLatin1Only(str)) return Result{ .slice = str };
@@ -446,7 +454,7 @@ fn nfxc(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) mem.A
446 block_check: while (i < dcps.len) : (i += 1) { 454 block_check: while (i < dcps.len) : (i += 1) {
447 const C = dcps[i]; 455 const C = dcps[i];
448 if (C == tombstone) continue :block_check; 456 if (C == tombstone) continue :block_check;
449 const cc_C = self.norm_data.ccc_data.ccc(C); 457 const cc_C = self.ccc_data.ccc(C);
450 var starter_index: ?usize = null; 458 var starter_index: ?usize = null;
451 var j: usize = i; 459 var j: usize = i;
452 460
@@ -456,11 +464,11 @@ fn nfxc(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) mem.A
456 if (dcps[j] == tombstone) continue; 464 if (dcps[j] == tombstone) continue;
457 465
458 // Check for starter. 466 // Check for starter.
459 if (self.norm_data.ccc_data.isStarter(dcps[j])) { 467 if (self.ccc_data.isStarter(dcps[j])) {
460 // Check for blocking conditions. 468 // Check for blocking conditions.
461 for (dcps[(j + 1)..i]) |B| { 469 for (dcps[(j + 1)..i]) |B| {
462 if (B == tombstone) continue; 470 if (B == tombstone) continue;
463 const cc_B = self.norm_data.ccc_data.ccc(B); 471 const cc_B = self.ccc_data.ccc(B);
464 if (cc_B != 0 and self.isHangul(C)) continue :block_check; 472 if (cc_B != 0 and self.isHangul(C)) continue :block_check;
465 if (cc_B >= cc_C) continue :block_check; 473 if (cc_B >= cc_C) continue :block_check;
466 } 474 }
@@ -484,8 +492,8 @@ fn nfxc(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) mem.A
484 // them algorithmically if possible. 492 // them algorithmically if possible.
485 if (self.isHangul(L) and self.isHangul(C)) { 493 if (self.isHangul(L) and self.isHangul(C)) {
486 // Get Hangul syllable types. 494 // Get Hangul syllable types.
487 const l_stype = self.norm_data.hangul_data.syllable(L); 495 const l_stype = self.hangul_data.syllable(L);
488 const c_stype = self.norm_data.hangul_data.syllable(C); 496 const c_stype = self.hangul_data.syllable(C);
489 497
490 if (l_stype == .LV and c_stype == .T) { 498 if (l_stype == .LV and c_stype == .T) {
491 // LV, T canonical composition. 499 // LV, T canonical composition.
@@ -508,13 +516,13 @@ fn nfxc(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) mem.A
508 if (!processed_hangul) { 516 if (!processed_hangul) {
509 // L, C are not Hangul, so check for primary composite 517 // L, C are not Hangul, so check for primary composite
510 // in the Unicode Character Database. 518 // in the Unicode Character Database.
511 if (self.norm_data.canon_data.toNfc(.{ L, C })) |P| { 519 if (self.canon_data.toNfc(.{ L, C })) |P| {
512 // We have a primary composite P for L, C. 520 // We have a primary composite P for L, C.
513 // We must check if P is not in the Full 521 // We must check if P is not in the Full
514 // Composition Exclusions (FCX) list, 522 // Composition Exclusions (FCX) list,
515 // preventing it from appearing in any 523 // preventing it from appearing in any
516 // composed form (NFC, NFKC). 524 // composed form (NFC, NFKC).
517 if (!self.norm_data.normp_data.isFcx(P)) { 525 if (!self.normp_data.isFcx(P)) {
518 dcps[sidx] = P; 526 dcps[sidx] = P;
519 dcps[i] = tombstone; // Mark for deletion. 527 dcps[i] = tombstone; // Mark for deletion.
520 deleted += 1; 528 deleted += 1;
@@ -544,10 +552,8 @@ fn nfxc(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) mem.A
544 552
545test "nfc" { 553test "nfc" {
546 const allocator = testing.allocator; 554 const allocator = testing.allocator;
547 var data: NormData = undefined; 555 const n = try Normalize.init(allocator);
548 try NormData.init(&data, allocator); 556 defer n.deinit(allocator);
549 defer data.deinit(allocator);
550 const n = Self{ .norm_data = &data };
551 557
552 const result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); 558 const result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}");
553 defer result.deinit(allocator); 559 defer result.deinit(allocator);
@@ -557,10 +563,8 @@ test "nfc" {
557 563
558test "nfkc" { 564test "nfkc" {
559 const allocator = testing.allocator; 565 const allocator = testing.allocator;
560 var data: NormData = undefined; 566 const n = try Normalize.init(allocator);
561 try NormData.init(&data, allocator); 567 defer n.deinit(allocator);
562 defer data.deinit(allocator);
563 const n = Self{ .norm_data = &data };
564 568
565 const result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); 569 const result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}");
566 defer result.deinit(allocator); 570 defer result.deinit(allocator);
@@ -569,7 +573,7 @@ test "nfkc" {
569} 573}
570 574
571/// Tests for equality of `a` and `b` after normalizing to NFC. 575/// Tests for equality of `a` and `b` after normalizing to NFC.
572pub fn eql(self: Self, allocator: mem.Allocator, a: []const u8, b: []const u8) !bool { 576pub fn eql(self: Normalize, allocator: Allocator, a: []const u8, b: []const u8) !bool {
573 const norm_result_a = try self.nfc(allocator, a); 577 const norm_result_a = try self.nfc(allocator, a);
574 defer norm_result_a.deinit(allocator); 578 defer norm_result_a.deinit(allocator);
575 const norm_result_b = try self.nfc(allocator, b); 579 const norm_result_b = try self.nfc(allocator, b);
@@ -580,10 +584,8 @@ pub fn eql(self: Self, allocator: mem.Allocator, a: []const u8, b: []const u8) !
580 584
581test "eql" { 585test "eql" {
582 const allocator = testing.allocator; 586 const allocator = testing.allocator;
583 var data: NormData = undefined; 587 const n = try Normalize.init(allocator);
584 try NormData.init(&data, allocator); 588 defer n.deinit(allocator);
585 defer data.deinit(allocator);
586 const n = Self{ .norm_data = &data };
587 589
588 try testing.expect(try n.eql(allocator, "foé", "foe\u{0301}")); 590 try testing.expect(try n.eql(allocator, "foé", "foe\u{0301}"));
589 try testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); 591 try testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}"));
@@ -629,3 +631,24 @@ test "isLatin1Only" {
629 const not_latin1_only = "Héllo, World! \u{3d3}"; 631 const not_latin1_only = "Héllo, World! \u{3d3}";
630 try testing.expect(!isLatin1Only(not_latin1_only)); 632 try testing.expect(!isLatin1Only(not_latin1_only));
631} 633}
634
635const std = @import("std");
636const debug = std.debug;
637const assert = debug.assert;
638const fmt = std.fmt;
639const heap = std.heap;
640const mem = std.mem;
641const simd = std.simd;
642const testing = std.testing;
643const unicode = std.unicode;
644const Allocator = std.mem.Allocator;
645
646const ascii = @import("ascii");
647const CodePointIterator = @import("code_point").Iterator;
648
649const CanonData = @import("CanonData");
650const CccData = @import("CombiningData");
651const CompatData = @import("CompatData");
652const FoldData = @import("FoldData");
653const HangulData = @import("HangulData");
654const NormPropsData = @import("NormPropsData");