summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--build.zig38
-rw-r--r--codegen/canon.zig68
-rw-r--r--src/Canonical.zig45
-rw-r--r--src/NormData.zig22
-rw-r--r--src/Normalizer.zig155
-rw-r--r--src/main.zig11
6 files changed, 242 insertions, 97 deletions
diff --git a/build.zig b/build.zig
index 7cfb979..2c3ccdc 100644
--- a/build.zig
+++ b/build.zig
@@ -34,6 +34,15 @@ pub fn build(b: *std.Build) void {
34 const dwp_gen_out = run_dwp_gen_exe.addOutputFileArg("dwp.bin.z"); 34 const dwp_gen_out = run_dwp_gen_exe.addOutputFileArg("dwp.bin.z");
35 35
36 // Normalization properties 36 // Normalization properties
37 const canon_gen_exe = b.addExecutable(.{
38 .name = "canon",
39 .root_source_file = .{ .path = "codegen/canon.zig" },
40 .target = b.host,
41 .optimize = .Debug,
42 });
43 const run_canon_gen_exe = b.addRunArtifact(canon_gen_exe);
44 const canon_gen_out = run_canon_gen_exe.addOutputFileArg("canon.bin.z");
45
37 const ccc_gen_exe = b.addExecutable(.{ 46 const ccc_gen_exe = b.addExecutable(.{
38 .name = "ccc", 47 .name = "ccc",
39 .root_source_file = .{ .path = "codegen/ccc.zig" }, 48 .root_source_file = .{ .path = "codegen/ccc.zig" },
@@ -101,6 +110,21 @@ pub fn build(b: *std.Build) void {
101 }); 110 });
102 ccc_data.addAnonymousImport("ccc", .{ .root_source_file = ccc_gen_out }); 111 ccc_data.addAnonymousImport("ccc", .{ .root_source_file = ccc_gen_out });
103 112
113 const canon_data = b.createModule(.{
114 .root_source_file = .{ .path = "src/Canonical.zig" },
115 .target = target,
116 .optimize = optimize,
117 });
118 canon_data.addAnonymousImport("canon", .{ .root_source_file = canon_gen_out });
119
120 const norm_data = b.createModule(.{
121 .root_source_file = .{ .path = "src/NormData.zig" },
122 .target = target,
123 .optimize = optimize,
124 });
125 norm_data.addImport("CanonicalData", canon_data);
126 norm_data.addImport("CombiningClassData", ccc_data);
127
104 const norm = b.addModule("Normalizer", .{ 128 const norm = b.addModule("Normalizer", .{
105 .root_source_file = .{ .path = "src/Normalizer.zig" }, 129 .root_source_file = .{ .path = "src/Normalizer.zig" },
106 .target = target, 130 .target = target,
@@ -108,7 +132,7 @@ pub fn build(b: *std.Build) void {
108 }); 132 });
109 norm.addImport("code_point", code_point); 133 norm.addImport("code_point", code_point);
110 norm.addImport("ziglyph", ziglyph.module("ziglyph")); 134 norm.addImport("ziglyph", ziglyph.module("ziglyph"));
111 norm.addImport("CombiningClassData", ccc_data); 135 norm.addImport("NormData", norm_data);
112 136
113 // Benchmark rig 137 // Benchmark rig
114 const exe = b.addExecutable(.{ 138 const exe = b.addExecutable(.{
@@ -134,18 +158,18 @@ pub fn build(b: *std.Build) void {
134 158
135 // Tests 159 // Tests
136 const exe_unit_tests = b.addTest(.{ 160 const exe_unit_tests = b.addTest(.{
137 .root_source_file = .{ .path = "src/DisplayWidth.zig" }, 161 .root_source_file = .{ .path = "src/Normalizer.zig" },
138 .target = target, 162 .target = target,
139 .optimize = optimize, 163 .optimize = optimize,
140 }); 164 });
141 exe_unit_tests.root_module.addImport("ascii", ascii); 165 // exe_unit_tests.root_module.addImport("ascii", ascii);
142 exe_unit_tests.root_module.addImport("code_point", code_point); 166 exe_unit_tests.root_module.addImport("code_point", code_point);
143 // exe_unit_tests.root_module.addImport("GraphemeData", grapheme_data); 167 // exe_unit_tests.root_module.addImport("GraphemeData", grapheme_data);
144 exe_unit_tests.root_module.addImport("grapheme", grapheme); 168 // exe_unit_tests.root_module.addImport("grapheme", grapheme);
145 // exe_unit_tests.root_module.addImport("ziglyph", ziglyph.module("ziglyph")); 169 exe_unit_tests.root_module.addImport("ziglyph", ziglyph.module("ziglyph"));
146 // exe_unit_tests.root_module.addAnonymousImport("normp", .{ .root_source_file = normp_gen_out }); 170 // exe_unit_tests.root_module.addAnonymousImport("normp", .{ .root_source_file = normp_gen_out });
147 exe_unit_tests.root_module.addImport("DisplayWidthData", dw_data); 171 // exe_unit_tests.root_module.addImport("DisplayWidthData", dw_data);
148 // exe_unit_tests.root_module.addImport("CombiningClassData", ccc_data); 172 exe_unit_tests.root_module.addImport("NormData", norm_data);
149 173
150 const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests); 174 const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);
151 175
diff --git a/codegen/canon.zig b/codegen/canon.zig
new file mode 100644
index 0000000..9d72edd
--- /dev/null
+++ b/codegen/canon.zig
@@ -0,0 +1,68 @@
1const std = @import("std");
2const builtin = @import("builtin");
3
4pub fn main() !void {
5 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
6 defer arena.deinit();
7 const allocator = arena.allocator();
8
9 // Process DerivedEastAsianWidth.txt
10 var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{});
11 defer in_file.close();
12 var in_buf = std.io.bufferedReader(in_file.reader());
13 const in_reader = in_buf.reader();
14
15 var args_iter = try std.process.argsWithAllocator(allocator);
16 defer args_iter.deinit();
17 _ = args_iter.skip();
18 const output_path = args_iter.next() orelse @panic("No output file arg!");
19
20 const compressor = std.compress.deflate.compressor;
21 var out_file = try std.fs.cwd().createFile(output_path, .{});
22 defer out_file.close();
23 var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression });
24 defer out_comp.deinit();
25 const writer = out_comp.writer();
26
27 const endian = builtin.cpu.arch.endian();
28 var line_buf: [4096]u8 = undefined;
29
30 lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
31 if (line.len == 0) continue;
32
33 var field_iter = std.mem.splitScalar(u8, line, ';');
34 var cps: [3]u24 = undefined;
35 var len: u8 = 2;
36
37 var i: usize = 0;
38 while (field_iter.next()) |field| : (i += 1) {
39 switch (i) {
40 0 => cps[0] = try std.fmt.parseInt(u24, field, 16),
41
42 5 => {
43 // Not canonical.
44 if (field.len == 0 or field[0] == '<') continue :lines;
45 if (std.mem.indexOfScalar(u8, field, ' ')) |space| {
46 // Canonical
47 len = 3;
48 cps[1] = try std.fmt.parseInt(u24, field[0..space], 16);
49 cps[2] = try std.fmt.parseInt(u24, field[space + 1 ..], 16);
50 } else {
51 // Singleton
52 cps[1] = try std.fmt.parseInt(u24, field, 16);
53 }
54 },
55
56 2 => if (line[0] == '<') continue :lines,
57
58 else => {},
59 }
60 }
61
62 try writer.writeInt(u8, @intCast(len), endian);
63 for (cps[0..len]) |cp| try writer.writeInt(u24, cp, endian);
64 }
65
66 try writer.writeInt(u16, 0, endian);
67 try out_comp.flush();
68}
diff --git a/src/Canonical.zig b/src/Canonical.zig
new file mode 100644
index 0000000..d54e828
--- /dev/null
+++ b/src/Canonical.zig
@@ -0,0 +1,45 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5
6allocator: mem.Allocator,
7nfd: [][2]u21 = undefined,
8
9const Self = @This();
10
11pub fn init(allocator: mem.Allocator) !Self {
12 const decompressor = compress.deflate.decompressor;
13 const in_bytes = @embedFile("canon");
14 var in_fbs = std.io.fixedBufferStream(in_bytes);
15 var in_decomp = try decompressor(allocator, in_fbs.reader(), null);
16 defer in_decomp.deinit();
17 var reader = in_decomp.reader();
18
19 const endian = builtin.cpu.arch.endian();
20 var self = Self{
21 .allocator = allocator,
22 .nfd = try allocator.alloc([2]u21, 0x110000),
23 };
24
25 for (0..0x110000) |i| self.nfd[i] = .{ @intCast(i), 0 };
26
27 while (true) {
28 const len: u8 = try reader.readInt(u8, endian);
29 if (len == 0) break;
30 const cp = try reader.readInt(u24, endian);
31 self.nfd[cp][0] = @intCast(try reader.readInt(u24, endian));
32 if (len == 3) self.nfd[cp][1] = @intCast(try reader.readInt(u24, endian));
33 }
34
35 return self;
36}
37
38pub fn deinit(self: *Self) void {
39 self.allocator.free(self.nfd);
40}
41
42/// Returns canonical decomposition for `cp`.
43pub inline fn toNfd(self: Self, cp: u21) [2]u21 {
44 return self.nfd[cp];
45}
diff --git a/src/NormData.zig b/src/NormData.zig
new file mode 100644
index 0000000..c6fa8e8
--- /dev/null
+++ b/src/NormData.zig
@@ -0,0 +1,22 @@
1const std = @import("std");
2const mem = std.mem;
3
4const CanonData = @import("CanonicalData");
5const CccData = @import("CombiningClassData");
6
7canon_data: CanonData,
8ccc_data: CccData,
9
10const Self = @This();
11
12pub fn init(allocator: std.mem.Allocator) !Self {
13 return Self{
14 .canon_data = try CanonData.init(allocator),
15 .ccc_data = try CccData.init(allocator),
16 };
17}
18
19pub fn deinit(self: *Self) void {
20 self.canon_data.deinit();
21 self.ccc_data.deinit();
22}
diff --git a/src/Normalizer.zig b/src/Normalizer.zig
index 6a19f47..848cf20 100644
--- a/src/Normalizer.zig
+++ b/src/Normalizer.zig
@@ -3,26 +3,26 @@
3//! string equality under different parameters related to normalization (see `eql`, `eqlCaseless`, `eqlIdentifiers`). 3//! string equality under different parameters related to normalization (see `eql`, `eqlCaseless`, `eqlIdentifiers`).
4 4
5const std = @import("std"); 5const std = @import("std");
6const testing = std.testing;
6 7
7const CodePointIterator = @import("code_point").Iterator; 8const CodePointIterator = @import("code_point").Iterator;
8const case_fold_map = @import("ziglyph").case_folding; 9const case_fold_map = @import("ziglyph").case_folding;
9const hangul_map = @import("ziglyph").hangul; 10const hangul_map = @import("ziglyph").hangul;
10const norm_props = @import("ziglyph").normalization_props; 11const norm_props = @import("ziglyph").normalization_props;
11pub const Data = @import("CombiningClassData");
12 12
13ccc_data: *Data, 13pub const NormData = @import("NormData");
14
14nfc_map: std.AutoHashMap([2]u21, u21), 15nfc_map: std.AutoHashMap([2]u21, u21),
15nfd_map: std.AutoHashMap(u21, [2]u21),
16nfkd_map: std.AutoHashMap(u21, [18]u21), 16nfkd_map: std.AutoHashMap(u21, [18]u21),
17norm_data: *NormData,
17 18
18const Self = @This(); 19const Self = @This();
19 20
20pub fn init(allocator: std.mem.Allocator, data: *Data) !Self { 21pub fn init(allocator: std.mem.Allocator, norm_data: *NormData) !Self {
21 var self = Self{ 22 var self = Self{
22 .ccc_data = data,
23 .nfc_map = std.AutoHashMap([2]u21, u21).init(allocator), 23 .nfc_map = std.AutoHashMap([2]u21, u21).init(allocator),
24 .nfd_map = std.AutoHashMap(u21, [2]u21).init(allocator),
25 .nfkd_map = std.AutoHashMap(u21, [18]u21).init(allocator), 24 .nfkd_map = std.AutoHashMap(u21, [18]u21).init(allocator),
25 .norm_data = norm_data,
26 }; 26 };
27 errdefer self.deinit(); 27 errdefer self.deinit();
28 28
@@ -46,24 +46,6 @@ pub fn init(allocator: std.mem.Allocator, data: *Data) !Self {
46 try self.nfc_map.put(.{ cp_a, cp_b }, cp_c); 46 try self.nfc_map.put(.{ cp_a, cp_b }, cp_c);
47 } 47 }
48 48
49 // Canonical decompositions
50 const decomp_file = @embedFile("autogen/canonical_decompositions.txt.deflate");
51 var decomp_stream = std.io.fixedBufferStream(decomp_file);
52 var decomp_decomp = try decompressor(allocator, decomp_stream.reader(), null);
53 defer decomp_decomp.deinit();
54
55 var decomp_buf = std.io.bufferedReader(decomp_decomp.reader());
56 const decomp_reader = decomp_buf.reader();
57
58 while (try decomp_reader.readUntilDelimiterOrEof(&buf, '\n')) |line| {
59 if (line.len == 0) continue;
60 var fields = std.mem.split(u8, line, ";");
61 const cp_a = try std.fmt.parseInt(u21, fields.next().?, 16);
62 const cp_b = try std.fmt.parseInt(u21, fields.next().?, 16);
63 const cp_c = try std.fmt.parseInt(u21, fields.next().?, 16);
64 try self.nfd_map.put(cp_a, .{ cp_b, cp_c });
65 }
66
67 // Compatibility decompositions 49 // Compatibility decompositions
68 const dekomp_file = @embedFile("autogen/compatibility_decompositions.txt.deflate"); 50 const dekomp_file = @embedFile("autogen/compatibility_decompositions.txt.deflate");
69 var dekomp_stream = std.io.fixedBufferStream(dekomp_file); 51 var dekomp_stream = std.io.fixedBufferStream(dekomp_file);
@@ -92,14 +74,14 @@ pub fn init(allocator: std.mem.Allocator, data: *Data) !Self {
92 74
93pub fn deinit(self: *Self) void { 75pub fn deinit(self: *Self) void {
94 self.nfc_map.deinit(); 76 self.nfc_map.deinit();
95 self.nfd_map.deinit();
96 self.nfkd_map.deinit(); 77 self.nfkd_map.deinit();
97} 78}
98 79
99test "init / deinit" { 80test "init / deinit" {
100 var data = try Data.init(std.testing.allocator); 81 const allocator = testing.allocator;
101 defer data.deinit(); 82 var norm_data = try NormData.init(allocator);
102 var n = try init(std.testing.allocator, &data); 83 defer norm_data.deinit();
84 var n = try init(allocator, &norm_data);
103 defer n.deinit(); 85 defer n.deinit();
104} 86}
105 87
@@ -169,17 +151,22 @@ const Decomp = struct {
169pub fn mapping(self: Self, cp: u21, form: Form) Decomp { 151pub fn mapping(self: Self, cp: u21, form: Form) Decomp {
170 std.debug.assert(form == .nfd or form == .nfkd); 152 std.debug.assert(form == .nfd or form == .nfkd);
171 153
172 var dc = Decomp{ .form = .same }; 154 var dc = Decomp{ .form = .nfd };
173 dc.cps[0] = cp; 155 const canon_dc = self.norm_data.canon_data.toNfd(cp);
156 const len: usize = if (canon_dc[1] == 0) 1 else 2;
157
158 if (len == 1 and canon_dc[0] == cp) {
159 dc.form = .same;
160 dc.cps[0] = cp;
161 } else {
162 @memcpy(dc.cps[0..len], canon_dc[0..len]);
163 }
174 164
175 if (self.nfkd_map.get(cp)) |array| { 165 if (self.nfkd_map.get(cp)) |array| {
176 if (form != .nfd) { 166 if (form != .nfd) {
177 dc.form = .nfkd; 167 dc.form = .nfkd;
178 @memcpy(dc.cps[0..array.len], &array); 168 @memcpy(dc.cps[0..array.len], &array);
179 } 169 }
180 } else if (self.nfd_map.get(cp)) |array| {
181 dc.form = .nfd;
182 @memcpy(dc.cps[0..array.len], &array);
183 } 170 }
184 171
185 return dc; 172 return dc;
@@ -244,10 +231,10 @@ pub fn decompose(self: Self, cp: u21, form: Form) Decomp {
244} 231}
245 232
246test "decompose" { 233test "decompose" {
247 const allocator = std.testing.allocator; 234 const allocator = testing.allocator;
248 var data = try Data.init(allocator); 235 var norm_data = try NormData.init(allocator);
249 defer data.deinit(); 236 defer norm_data.deinit();
250 var n = try init(allocator, &data); 237 var n = try init(allocator, &norm_data);
251 defer n.deinit(); 238 defer n.deinit();
252 239
253 var dc = n.decompose('é', .nfd); 240 var dc = n.decompose('é', .nfd);
@@ -314,7 +301,7 @@ pub const Result = struct {
314 301
315// Compares code points by Canonical Combining Class order. 302// Compares code points by Canonical Combining Class order.
316fn cccLess(self: Self, lhs: u21, rhs: u21) bool { 303fn cccLess(self: Self, lhs: u21, rhs: u21) bool {
317 return self.ccc_data.ccc(lhs) < self.ccc_data.ccc(rhs); 304 return self.norm_data.ccc_data.ccc(lhs) < self.norm_data.ccc_data.ccc(rhs);
318} 305}
319 306
320// Applies the Canonical Sorting Algorithm. 307// Applies the Canonical Sorting Algorithm.
@@ -322,7 +309,7 @@ fn canonicalSort(self: Self, cps: []u21) void {
322 var i: usize = 0; 309 var i: usize = 0;
323 while (i < cps.len) : (i += 1) { 310 while (i < cps.len) : (i += 1) {
324 const start: usize = i; 311 const start: usize = i;
325 while (i < cps.len and self.ccc_data.ccc(cps[i]) != 0) : (i += 1) {} 312 while (i < cps.len and self.norm_data.ccc_data.ccc(cps[i]) != 0) : (i += 1) {}
326 std.mem.sort(u21, cps[start..i], self, cccLess); 313 std.mem.sort(u21, cps[start..i], self, cccLess);
327 } 314 }
328} 315}
@@ -368,10 +355,10 @@ fn nfxd(self: Self, allocator: std.mem.Allocator, str: []const u8, form: Form) !
368} 355}
369 356
370test "nfd ASCII / no-alloc" { 357test "nfd ASCII / no-alloc" {
371 const allocator = std.testing.allocator; 358 const allocator = testing.allocator;
372 var data = try Data.init(allocator); 359 var norm_data = try NormData.init(allocator);
373 defer data.deinit(); 360 defer norm_data.deinit();
374 var n = try init(allocator, &data); 361 var n = try init(allocator, &norm_data);
375 defer n.deinit(); 362 defer n.deinit();
376 363
377 var result = try n.nfd(allocator, "Hello World!"); 364 var result = try n.nfd(allocator, "Hello World!");
@@ -381,10 +368,10 @@ test "nfd ASCII / no-alloc" {
381} 368}
382 369
383test "nfd !ASCII / alloc" { 370test "nfd !ASCII / alloc" {
384 const allocator = std.testing.allocator; 371 const allocator = testing.allocator;
385 var data = try Data.init(allocator); 372 var norm_data = try NormData.init(allocator);
386 defer data.deinit(); 373 defer norm_data.deinit();
387 var n = try init(allocator, &data); 374 var n = try init(allocator, &norm_data);
388 defer n.deinit(); 375 defer n.deinit();
389 376
390 var result = try n.nfd(allocator, "Héllo World! \u{3d3}"); 377 var result = try n.nfd(allocator, "Héllo World! \u{3d3}");
@@ -394,10 +381,10 @@ test "nfd !ASCII / alloc" {
394} 381}
395 382
396test "nfkd ASCII / no-alloc" { 383test "nfkd ASCII / no-alloc" {
397 const allocator = std.testing.allocator; 384 const allocator = testing.allocator;
398 var data = try Data.init(allocator); 385 var norm_data = try NormData.init(allocator);
399 defer data.deinit(); 386 defer norm_data.deinit();
400 var n = try init(allocator, &data); 387 var n = try init(allocator, &norm_data);
401 defer n.deinit(); 388 defer n.deinit();
402 389
403 var result = try n.nfkd(allocator, "Hello World!"); 390 var result = try n.nfkd(allocator, "Hello World!");
@@ -407,10 +394,10 @@ test "nfkd ASCII / no-alloc" {
407} 394}
408 395
409test "nfkd !ASCII / alloc" { 396test "nfkd !ASCII / alloc" {
410 const allocator = std.testing.allocator; 397 const allocator = testing.allocator;
411 var data = try Data.init(allocator); 398 var norm_data = try NormData.init(allocator);
412 defer data.deinit(); 399 defer norm_data.deinit();
413 var n = try init(allocator, &data); 400 var n = try init(allocator, &norm_data);
414 defer n.deinit(); 401 defer n.deinit();
415 402
416 var result = try n.nfkd(allocator, "Héllo World! \u{3d3}"); 403 var result = try n.nfkd(allocator, "Héllo World! \u{3d3}");
@@ -426,7 +413,7 @@ fn isHangul(cp: u21) bool {
426} 413}
427 414
428fn isNonHangulStarter(self: Self, cp: u21) bool { 415fn isNonHangulStarter(self: Self, cp: u21) bool {
429 return !isHangul(cp) and self.ccc_data.isStarter(cp); 416 return !isHangul(cp) and self.norm_data.ccc_data.isStarter(cp);
430} 417}
431 418
432/// Normalizes `str` to NFC. 419/// Normalizes `str` to NFC.
@@ -468,7 +455,7 @@ fn nfxc(self: Self, allocator: std.mem.Allocator, str: []const u8, form: Form) !
468 455
469 block_check: while (i < d_list.items.len) : (i += 1) { 456 block_check: while (i < d_list.items.len) : (i += 1) {
470 const C = d_list.items[i]; 457 const C = d_list.items[i];
471 const cc_C = self.ccc_data.ccc(C); 458 const cc_C = self.norm_data.ccc_data.ccc(C);
472 var starter_index: ?usize = null; 459 var starter_index: ?usize = null;
473 var j: usize = i; 460 var j: usize = i;
474 461
@@ -476,10 +463,10 @@ fn nfxc(self: Self, allocator: std.mem.Allocator, str: []const u8, form: Form) !
476 j -= 1; 463 j -= 1;
477 464
478 // Check for starter. 465 // Check for starter.
479 if (self.ccc_data.isStarter(d_list.items[j])) { 466 if (self.norm_data.ccc_data.isStarter(d_list.items[j])) {
480 if (i - j > 1) { // If there's distance between the starting point and the current position. 467 if (i - j > 1) { // If there's distance between the starting point and the current position.
481 for (d_list.items[(j + 1)..i]) |B| { 468 for (d_list.items[(j + 1)..i]) |B| {
482 const cc_B = self.ccc_data.ccc(B); 469 const cc_B = self.norm_data.ccc_data.ccc(B);
483 // Check for blocking conditions. 470 // Check for blocking conditions.
484 if (isHangul(C)) { 471 if (isHangul(C)) {
485 if (cc_B != 0 or self.isNonHangulStarter(B)) continue :block_check; 472 if (cc_B != 0 or self.isNonHangulStarter(B)) continue :block_check;
@@ -563,10 +550,10 @@ fn nfxc(self: Self, allocator: std.mem.Allocator, str: []const u8, form: Form) !
563} 550}
564 551
565test "nfc" { 552test "nfc" {
566 const allocator = std.testing.allocator; 553 const allocator = testing.allocator;
567 var data = try Data.init(allocator); 554 var norm_data = try NormData.init(allocator);
568 defer data.deinit(); 555 defer norm_data.deinit();
569 var n = try init(allocator, &data); 556 var n = try init(allocator, &norm_data);
570 defer n.deinit(); 557 defer n.deinit();
571 558
572 var result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); 559 var result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}");
@@ -576,10 +563,10 @@ test "nfc" {
576} 563}
577 564
578test "nfkc" { 565test "nfkc" {
579 const allocator = std.testing.allocator; 566 const allocator = testing.allocator;
580 var data = try Data.init(allocator); 567 var norm_data = try NormData.init(allocator);
581 defer data.deinit(); 568 defer norm_data.deinit();
582 var n = try init(allocator, &data); 569 var n = try init(allocator, &norm_data);
583 defer n.deinit(); 570 defer n.deinit();
584 571
585 var result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); 572 var result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}");
@@ -637,10 +624,10 @@ pub fn eql(self: Self, allocator: std.mem.Allocator, a: []const u8, b: []const u
637} 624}
638 625
639test "eql" { 626test "eql" {
640 const allocator = std.testing.allocator; 627 const allocator = testing.allocator;
641 var data = try Data.init(allocator); 628 var norm_data = try NormData.init(allocator);
642 defer data.deinit(); 629 defer norm_data.deinit();
643 var n = try init(allocator, &data); 630 var n = try init(allocator, &norm_data);
644 defer n.deinit(); 631 defer n.deinit();
645 632
646 try std.testing.expect(try n.eql(allocator, "foé", "foe\u{0301}")); 633 try std.testing.expect(try n.eql(allocator, "foé", "foe\u{0301}"));
@@ -706,10 +693,10 @@ pub fn eqlCaseless(self: Self, allocator: std.mem.Allocator, a: []const u8, b: [
706} 693}
707 694
708test "eqlCaseless" { 695test "eqlCaseless" {
709 const allocator = std.testing.allocator; 696 const allocator = testing.allocator;
710 var data = try Data.init(allocator); 697 var norm_data = try NormData.init(allocator);
711 defer data.deinit(); 698 defer norm_data.deinit();
712 var n = try init(allocator, &data); 699 var n = try init(allocator, &norm_data);
713 defer n.deinit(); 700 defer n.deinit();
714 701
715 try std.testing.expect(try n.eqlCaseless(allocator, "Foϓ", "fo\u{03D2}\u{0301}")); 702 try std.testing.expect(try n.eqlCaseless(allocator, "Foϓ", "fo\u{03D2}\u{0301}"));
@@ -719,7 +706,7 @@ test "eqlCaseless" {
719// FCD 706// FCD
720fn getLeadCcc(self: Self, cp: u21) u8 { 707fn getLeadCcc(self: Self, cp: u21) u8 {
721 const dc = self.mapping(cp, .nfd); 708 const dc = self.mapping(cp, .nfd);
722 return self.ccc_data.ccc(dc.cps[0]); 709 return self.norm_data.ccc_data.ccc(dc.cps[0]);
723} 710}
724 711
725fn getTrailCcc(self: Self, cp: u21) u8 { 712fn getTrailCcc(self: Self, cp: u21) u8 {
@@ -727,7 +714,7 @@ fn getTrailCcc(self: Self, cp: u21) u8 {
727 const len = for (dc.cps, 0..) |dcp, i| { 714 const len = for (dc.cps, 0..) |dcp, i| {
728 if (dcp == 0) break i; 715 if (dcp == 0) break i;
729 } else dc.cps.len; 716 } else dc.cps.len;
730 return self.ccc_data.ccc(dc.cps[len - 1]); 717 return self.norm_data.ccc_data.ccc(dc.cps[len - 1]);
731} 718}
732 719
733/// Fast check to detect if a string is already in NFC or NFD form. 720/// Fast check to detect if a string is already in NFC or NFD form.
@@ -743,10 +730,10 @@ pub fn isFcd(self: Self, str: []const u8) bool {
743} 730}
744 731
745test "isFcd" { 732test "isFcd" {
746 const allocator = std.testing.allocator; 733 const allocator = testing.allocator;
747 var data = try Data.init(allocator); 734 var norm_data = try NormData.init(allocator);
748 defer data.deinit(); 735 defer norm_data.deinit();
749 var n = try init(allocator, &data); 736 var n = try init(allocator, &norm_data);
750 defer n.deinit(); 737 defer n.deinit();
751 738
752 const is_nfc = "José \u{3D3}"; 739 const is_nfc = "José \u{3D3}";
@@ -764,9 +751,9 @@ test "Unicode normalization tests" {
764 defer arena.deinit(); 751 defer arena.deinit();
765 var allocator = arena.allocator(); 752 var allocator = arena.allocator();
766 753
767 var data = try Data.init(allocator); 754 var norm_data = try NormData.init(allocator);
768 defer data.deinit(); 755 defer norm_data.deinit();
769 var n = try init(allocator, &data); 756 var n = try init(allocator, &norm_data);
770 defer n.deinit(); 757 defer n.deinit();
771 758
772 var file = try std.fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{}); 759 var file = try std.fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{});
diff --git a/src/main.zig b/src/main.zig
index 57db05b..d1a0bb3 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -17,7 +17,7 @@ const std = @import("std");
17// const ascii = std.ascii; 17// const ascii = std.ascii;
18 18
19// const norm = @import("ziglyph").Normalizer; 19// const norm = @import("ziglyph").Normalizer;
20const Data = @import("Normalizer").Data; 20const NormData = @import("Normalizer").NormData;
21const norm = @import("Normalizer"); 21const norm = @import("Normalizer");
22 22
23pub fn main() !void { 23pub fn main() !void {
@@ -32,10 +32,9 @@ pub fn main() !void {
32 const input = try std.fs.cwd().readFileAlloc(allocator, in_path, std.math.maxInt(u32)); 32 const input = try std.fs.cwd().readFileAlloc(allocator, in_path, std.math.maxInt(u32));
33 defer allocator.free(input); 33 defer allocator.free(input);
34 34
35 var data = try Data.init(allocator); 35 var norm_data = try NormData.init(allocator);
36 defer data.deinit(); 36 defer norm_data.deinit();
37 37 var n = try norm.init(allocator, &norm_data);
38 var n = try norm.init(allocator, &data);
39 defer n.deinit(); 38 defer n.deinit();
40 // var n = try norm.init(allocator); 39 // var n = try norm.init(allocator);
41 // defer n.deinit(); 40 // defer n.deinit();
@@ -53,7 +52,7 @@ pub fn main() !void {
53 // while (iter.next()) |_| result += 1; 52 // while (iter.next()) |_| result += 1;
54 // while (iter.next()) |line| result += strWidth(line, &data); 53 // while (iter.next()) |line| result += strWidth(line, &data);
55 while (iter.next()) |line| { 54 while (iter.next()) |line| {
56 var nfc = try n.nfc(allocator, line); 55 var nfc = try n.nfd(allocator, line);
57 result += nfc.slice.len; 56 result += nfc.slice.len;
58 nfc.deinit(); 57 nfc.deinit();
59 } 58 }