diff options
| -rw-r--r-- | build.zig | 27 | ||||
| -rw-r--r-- | codegen/compat.zig | 65 | ||||
| -rw-r--r-- | src/CanonData.zig (renamed from src/Canonical.zig) | 0 | ||||
| -rw-r--r-- | src/CombiningData.zig (renamed from src/CombiningClassData.zig) | 0 | ||||
| -rw-r--r-- | src/CompatData.zig | 50 | ||||
| -rw-r--r-- | src/NormData.zig | 8 | ||||
| -rw-r--r-- | src/Normalizer.zig | 129 | ||||
| -rw-r--r-- | src/WidthData.zig (renamed from src/DisplayWidthData.zig) | 0 | ||||
| -rw-r--r-- | src/autogen/compatibility_decompositions.txt.deflate | bin | 15332 -> 0 bytes | |||
| -rw-r--r-- | src/main.zig | 11 |
10 files changed, 184 insertions, 106 deletions
| @@ -43,6 +43,15 @@ pub fn build(b: *std.Build) void { | |||
| 43 | const run_canon_gen_exe = b.addRunArtifact(canon_gen_exe); | 43 | const run_canon_gen_exe = b.addRunArtifact(canon_gen_exe); |
| 44 | const canon_gen_out = run_canon_gen_exe.addOutputFileArg("canon.bin.z"); | 44 | const canon_gen_out = run_canon_gen_exe.addOutputFileArg("canon.bin.z"); |
| 45 | 45 | ||
| 46 | const compat_gen_exe = b.addExecutable(.{ | ||
| 47 | .name = "compat", | ||
| 48 | .root_source_file = .{ .path = "codegen/compat.zig" }, | ||
| 49 | .target = b.host, | ||
| 50 | .optimize = .Debug, | ||
| 51 | }); | ||
| 52 | const run_compat_gen_exe = b.addRunArtifact(compat_gen_exe); | ||
| 53 | const compat_gen_out = run_compat_gen_exe.addOutputFileArg("compat.bin.z"); | ||
| 54 | |||
| 46 | const ccc_gen_exe = b.addExecutable(.{ | 55 | const ccc_gen_exe = b.addExecutable(.{ |
| 47 | .name = "ccc", | 56 | .name = "ccc", |
| 48 | .root_source_file = .{ .path = "codegen/ccc.zig" }, | 57 | .root_source_file = .{ .path = "codegen/ccc.zig" }, |
| @@ -85,7 +94,7 @@ pub fn build(b: *std.Build) void { | |||
| 85 | 94 | ||
| 86 | // Fixed pitch font display width | 95 | // Fixed pitch font display width |
| 87 | const dw_data = b.createModule(.{ | 96 | const dw_data = b.createModule(.{ |
| 88 | .root_source_file = .{ .path = "src/DisplayWidthData.zig" }, | 97 | .root_source_file = .{ .path = "src/WidthData.zig" }, |
| 89 | .target = target, | 98 | .target = target, |
| 90 | .optimize = optimize, | 99 | .optimize = optimize, |
| 91 | }); | 100 | }); |
| @@ -104,26 +113,34 @@ pub fn build(b: *std.Build) void { | |||
| 104 | 113 | ||
| 105 | // Normalization | 114 | // Normalization |
| 106 | const ccc_data = b.createModule(.{ | 115 | const ccc_data = b.createModule(.{ |
| 107 | .root_source_file = .{ .path = "src/CombiningClassData.zig" }, | 116 | .root_source_file = .{ .path = "src/CombiningData.zig" }, |
| 108 | .target = target, | 117 | .target = target, |
| 109 | .optimize = optimize, | 118 | .optimize = optimize, |
| 110 | }); | 119 | }); |
| 111 | ccc_data.addAnonymousImport("ccc", .{ .root_source_file = ccc_gen_out }); | 120 | ccc_data.addAnonymousImport("ccc", .{ .root_source_file = ccc_gen_out }); |
| 112 | 121 | ||
| 113 | const canon_data = b.createModule(.{ | 122 | const canon_data = b.createModule(.{ |
| 114 | .root_source_file = .{ .path = "src/Canonical.zig" }, | 123 | .root_source_file = .{ .path = "src/CanonData.zig" }, |
| 115 | .target = target, | 124 | .target = target, |
| 116 | .optimize = optimize, | 125 | .optimize = optimize, |
| 117 | }); | 126 | }); |
| 118 | canon_data.addAnonymousImport("canon", .{ .root_source_file = canon_gen_out }); | 127 | canon_data.addAnonymousImport("canon", .{ .root_source_file = canon_gen_out }); |
| 119 | 128 | ||
| 129 | const compat_data = b.createModule(.{ | ||
| 130 | .root_source_file = .{ .path = "src/CompatData.zig" }, | ||
| 131 | .target = target, | ||
| 132 | .optimize = optimize, | ||
| 133 | }); | ||
| 134 | compat_data.addAnonymousImport("compat", .{ .root_source_file = compat_gen_out }); | ||
| 135 | |||
| 120 | const norm_data = b.createModule(.{ | 136 | const norm_data = b.createModule(.{ |
| 121 | .root_source_file = .{ .path = "src/NormData.zig" }, | 137 | .root_source_file = .{ .path = "src/NormData.zig" }, |
| 122 | .target = target, | 138 | .target = target, |
| 123 | .optimize = optimize, | 139 | .optimize = optimize, |
| 124 | }); | 140 | }); |
| 125 | norm_data.addImport("CanonicalData", canon_data); | 141 | norm_data.addImport("CanonData", canon_data); |
| 126 | norm_data.addImport("CombiningClassData", ccc_data); | 142 | norm_data.addImport("CompatData", compat_data); |
| 143 | norm_data.addImport("CombiningData", ccc_data); | ||
| 127 | 144 | ||
| 128 | const norm = b.addModule("Normalizer", .{ | 145 | const norm = b.addModule("Normalizer", .{ |
| 129 | .root_source_file = .{ .path = "src/Normalizer.zig" }, | 146 | .root_source_file = .{ .path = "src/Normalizer.zig" }, |
diff --git a/codegen/compat.zig b/codegen/compat.zig new file mode 100644 index 0000000..916d4d0 --- /dev/null +++ b/codegen/compat.zig | |||
| @@ -0,0 +1,65 @@ | |||
| 1 | const std = @import("std"); | ||
| 2 | const builtin = @import("builtin"); | ||
| 3 | |||
| 4 | pub fn main() !void { | ||
| 5 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); | ||
| 6 | defer arena.deinit(); | ||
| 7 | const allocator = arena.allocator(); | ||
| 8 | |||
| 9 | // Process DerivedEastAsianWidth.txt | ||
| 10 | var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{}); | ||
| 11 | defer in_file.close(); | ||
| 12 | var in_buf = std.io.bufferedReader(in_file.reader()); | ||
| 13 | const in_reader = in_buf.reader(); | ||
| 14 | |||
| 15 | var args_iter = try std.process.argsWithAllocator(allocator); | ||
| 16 | defer args_iter.deinit(); | ||
| 17 | _ = args_iter.skip(); | ||
| 18 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | ||
| 19 | |||
| 20 | const compressor = std.compress.deflate.compressor; | ||
| 21 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | ||
| 22 | defer out_file.close(); | ||
| 23 | var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression }); | ||
| 24 | defer out_comp.deinit(); | ||
| 25 | const writer = out_comp.writer(); | ||
| 26 | |||
| 27 | const endian = builtin.cpu.arch.endian(); | ||
| 28 | var line_buf: [4096]u8 = undefined; | ||
| 29 | |||
| 30 | lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { | ||
| 31 | if (line.len == 0) continue; | ||
| 32 | |||
| 33 | var field_iter = std.mem.splitScalar(u8, line, ';'); | ||
| 34 | var cps: [19]u24 = undefined; | ||
| 35 | var len: u8 = 1; | ||
| 36 | |||
| 37 | var i: usize = 0; | ||
| 38 | while (field_iter.next()) |field| : (i += 1) { | ||
| 39 | switch (i) { | ||
| 40 | 0 => cps[0] = try std.fmt.parseInt(u24, field, 16), | ||
| 41 | |||
| 42 | 5 => { | ||
| 43 | // Not compatibility. | ||
| 44 | if (field.len == 0 or field[0] != '<') continue :lines; | ||
| 45 | var cp_iter = std.mem.tokenizeScalar(u8, field, ' '); | ||
| 46 | _ = cp_iter.next(); // <compat type> | ||
| 47 | |||
| 48 | while (cp_iter.next()) |cp_str| : (len += 1) { | ||
| 49 | cps[len] = try std.fmt.parseInt(u24, cp_str, 16); | ||
| 50 | } | ||
| 51 | }, | ||
| 52 | |||
| 53 | 2 => if (line[0] == '<') continue :lines, | ||
| 54 | |||
| 55 | else => {}, | ||
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 59 | try writer.writeInt(u8, @intCast(len), endian); | ||
| 60 | for (cps[0..len]) |cp| try writer.writeInt(u24, cp, endian); | ||
| 61 | } | ||
| 62 | |||
| 63 | try writer.writeInt(u16, 0, endian); | ||
| 64 | try out_comp.flush(); | ||
| 65 | } | ||
diff --git a/src/Canonical.zig b/src/CanonData.zig index 81d3eec..81d3eec 100644 --- a/src/Canonical.zig +++ b/src/CanonData.zig | |||
diff --git a/src/CombiningClassData.zig b/src/CombiningData.zig index 95c947d..95c947d 100644 --- a/src/CombiningClassData.zig +++ b/src/CombiningData.zig | |||
diff --git a/src/CompatData.zig b/src/CompatData.zig new file mode 100644 index 0000000..a1f5de6 --- /dev/null +++ b/src/CompatData.zig | |||
| @@ -0,0 +1,50 @@ | |||
| 1 | const std = @import("std"); | ||
| 2 | const builtin = @import("builtin"); | ||
| 3 | const compress = std.compress; | ||
| 4 | const mem = std.mem; | ||
| 5 | |||
| 6 | allocator: mem.Allocator, | ||
| 7 | nfkd: [][]u21 = undefined, | ||
| 8 | |||
| 9 | const Self = @This(); | ||
| 10 | |||
| 11 | pub fn init(allocator: mem.Allocator) !Self { | ||
| 12 | const decompressor = compress.deflate.decompressor; | ||
| 13 | const in_bytes = @embedFile("compat"); | ||
| 14 | var in_fbs = std.io.fixedBufferStream(in_bytes); | ||
| 15 | var in_decomp = try decompressor(allocator, in_fbs.reader(), null); | ||
| 16 | defer in_decomp.deinit(); | ||
| 17 | var reader = in_decomp.reader(); | ||
| 18 | |||
| 19 | const endian = builtin.cpu.arch.endian(); | ||
| 20 | var self = Self{ | ||
| 21 | .allocator = allocator, | ||
| 22 | .nfkd = try allocator.alloc([]u21, 0x110000), | ||
| 23 | }; | ||
| 24 | |||
| 25 | for (0..0x110000) |i| self.nfkd[i] = &.{}; | ||
| 26 | |||
| 27 | while (true) { | ||
| 28 | const len: u8 = try reader.readInt(u8, endian); | ||
| 29 | if (len == 0) break; | ||
| 30 | const cp = try reader.readInt(u24, endian); | ||
| 31 | self.nfkd[cp] = try allocator.alloc(u21, len - 1); | ||
| 32 | for (0..len - 1) |i| { | ||
| 33 | self.nfkd[cp][i] = @intCast(try reader.readInt(u24, endian)); | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 37 | return self; | ||
| 38 | } | ||
| 39 | |||
| 40 | pub fn deinit(self: *Self) void { | ||
| 41 | for (self.nfkd) |slice| { | ||
| 42 | if (slice.len != 0) self.allocator.free(slice); | ||
| 43 | } | ||
| 44 | self.allocator.free(self.nfkd); | ||
| 45 | } | ||
| 46 | |||
| 47 | /// Returns compatibility decomposition for `cp`. | ||
| 48 | pub inline fn toNfkd(self: Self, cp: u21) []u21 { | ||
| 49 | return self.nfkd[cp]; | ||
| 50 | } | ||
diff --git a/src/NormData.zig b/src/NormData.zig index c6fa8e8..83110f0 100644 --- a/src/NormData.zig +++ b/src/NormData.zig | |||
| @@ -1,11 +1,13 @@ | |||
| 1 | const std = @import("std"); | 1 | const std = @import("std"); |
| 2 | const mem = std.mem; | 2 | const mem = std.mem; |
| 3 | 3 | ||
| 4 | const CanonData = @import("CanonicalData"); | 4 | const CanonData = @import("CanonData"); |
| 5 | const CccData = @import("CombiningClassData"); | 5 | const CccData = @import("CombiningData"); |
| 6 | const CompatData = @import("CompatData"); | ||
| 6 | 7 | ||
| 7 | canon_data: CanonData, | 8 | canon_data: CanonData, |
| 8 | ccc_data: CccData, | 9 | ccc_data: CccData, |
| 10 | compat_data: CompatData, | ||
| 9 | 11 | ||
| 10 | const Self = @This(); | 12 | const Self = @This(); |
| 11 | 13 | ||
| @@ -13,10 +15,12 @@ pub fn init(allocator: std.mem.Allocator) !Self { | |||
| 13 | return Self{ | 15 | return Self{ |
| 14 | .canon_data = try CanonData.init(allocator), | 16 | .canon_data = try CanonData.init(allocator), |
| 15 | .ccc_data = try CccData.init(allocator), | 17 | .ccc_data = try CccData.init(allocator), |
| 18 | .compat_data = try CompatData.init(allocator), | ||
| 16 | }; | 19 | }; |
| 17 | } | 20 | } |
| 18 | 21 | ||
| 19 | pub fn deinit(self: *Self) void { | 22 | pub fn deinit(self: *Self) void { |
| 20 | self.canon_data.deinit(); | 23 | self.canon_data.deinit(); |
| 21 | self.ccc_data.deinit(); | 24 | self.ccc_data.deinit(); |
| 25 | self.compat_data.deinit(); | ||
| 22 | } | 26 | } |
diff --git a/src/Normalizer.zig b/src/Normalizer.zig index 2e2e6e4..1434043 100644 --- a/src/Normalizer.zig +++ b/src/Normalizer.zig | |||
| @@ -12,57 +12,10 @@ const norm_props = @import("ziglyph").normalization_props; | |||
| 12 | 12 | ||
| 13 | pub const NormData = @import("NormData"); | 13 | pub const NormData = @import("NormData"); |
| 14 | 14 | ||
| 15 | nfkd_map: std.AutoHashMap(u21, [18]u21), | ||
| 16 | norm_data: *NormData, | 15 | norm_data: *NormData, |
| 17 | 16 | ||
| 18 | const Self = @This(); | 17 | const Self = @This(); |
| 19 | 18 | ||
| 20 | pub fn init(allocator: std.mem.Allocator, norm_data: *NormData) !Self { | ||
| 21 | var self = Self{ | ||
| 22 | .nfkd_map = std.AutoHashMap(u21, [18]u21).init(allocator), | ||
| 23 | .norm_data = norm_data, | ||
| 24 | }; | ||
| 25 | errdefer self.deinit(); | ||
| 26 | |||
| 27 | // Compatibility decompositions | ||
| 28 | const dekomp_file = @embedFile("autogen/compatibility_decompositions.txt.deflate"); | ||
| 29 | var dekomp_stream = std.io.fixedBufferStream(dekomp_file); | ||
| 30 | var dekomp_decomp = try std.compress.deflate.decompressor(allocator, dekomp_stream.reader(), null); | ||
| 31 | defer dekomp_decomp.deinit(); | ||
| 32 | |||
| 33 | var dekomp_buf = std.io.bufferedReader(dekomp_decomp.reader()); | ||
| 34 | const dekomp_reader = dekomp_buf.reader(); | ||
| 35 | var buf: [4096]u8 = undefined; | ||
| 36 | |||
| 37 | while (try dekomp_reader.readUntilDelimiterOrEof(&buf, '\n')) |line| { | ||
| 38 | if (line.len == 0) continue; | ||
| 39 | var fields = std.mem.split(u8, line, ";"); | ||
| 40 | const cp_a = try std.fmt.parseInt(u21, fields.next().?, 16); | ||
| 41 | var cps = [_]u21{0} ** 18; | ||
| 42 | var i: usize = 0; | ||
| 43 | |||
| 44 | while (fields.next()) |cp| : (i += 1) { | ||
| 45 | cps[i] = try std.fmt.parseInt(u21, cp, 16); | ||
| 46 | } | ||
| 47 | |||
| 48 | try self.nfkd_map.put(cp_a, cps); | ||
| 49 | } | ||
| 50 | |||
| 51 | return self; | ||
| 52 | } | ||
| 53 | |||
| 54 | pub fn deinit(self: *Self) void { | ||
| 55 | self.nfkd_map.deinit(); | ||
| 56 | } | ||
| 57 | |||
| 58 | test "init / deinit" { | ||
| 59 | const allocator = testing.allocator; | ||
| 60 | var norm_data = try NormData.init(allocator); | ||
| 61 | defer norm_data.deinit(); | ||
| 62 | var n = try init(allocator, &norm_data); | ||
| 63 | defer n.deinit(); | ||
| 64 | } | ||
| 65 | |||
| 66 | // Hangul processing utilities. | 19 | // Hangul processing utilities. |
| 67 | fn isHangulPrecomposed(cp: u21) bool { | 20 | fn isHangulPrecomposed(cp: u21) bool { |
| 68 | if (hangul_map.syllableType(cp)) |kind| return kind == .LV or kind == .LVT; | 21 | if (hangul_map.syllableType(cp)) |kind| return kind == .LV or kind == .LVT; |
| @@ -140,10 +93,11 @@ pub fn mapping(self: Self, cp: u21, form: Form) Decomp { | |||
| 140 | @memcpy(dc.cps[0..len], canon_dc[0..len]); | 93 | @memcpy(dc.cps[0..len], canon_dc[0..len]); |
| 141 | } | 94 | } |
| 142 | 95 | ||
| 143 | if (self.nfkd_map.get(cp)) |array| { | 96 | const compat_dc = self.norm_data.compat_data.toNfkd(cp); |
| 97 | if (compat_dc.len != 0) { | ||
| 144 | if (form != .nfd) { | 98 | if (form != .nfd) { |
| 145 | dc.form = .nfkd; | 99 | dc.form = .nfkd; |
| 146 | @memcpy(dc.cps[0..array.len], &array); | 100 | @memcpy(dc.cps[0..compat_dc.len], compat_dc); |
| 147 | } | 101 | } |
| 148 | } | 102 | } |
| 149 | 103 | ||
| @@ -210,10 +164,9 @@ pub fn decompose(self: Self, cp: u21, form: Form) Decomp { | |||
| 210 | 164 | ||
| 211 | test "decompose" { | 165 | test "decompose" { |
| 212 | const allocator = testing.allocator; | 166 | const allocator = testing.allocator; |
| 213 | var norm_data = try NormData.init(allocator); | 167 | var data = try NormData.init(allocator); |
| 214 | defer norm_data.deinit(); | 168 | defer data.deinit(); |
| 215 | var n = try init(allocator, &norm_data); | 169 | var n = Self{ .norm_data = &data }; |
| 216 | defer n.deinit(); | ||
| 217 | 170 | ||
| 218 | var dc = n.decompose('é', .nfd); | 171 | var dc = n.decompose('é', .nfd); |
| 219 | try std.testing.expect(dc.form == .nfd); | 172 | try std.testing.expect(dc.form == .nfd); |
| @@ -334,10 +287,9 @@ fn nfxd(self: Self, allocator: std.mem.Allocator, str: []const u8, form: Form) ! | |||
| 334 | 287 | ||
| 335 | test "nfd ASCII / no-alloc" { | 288 | test "nfd ASCII / no-alloc" { |
| 336 | const allocator = testing.allocator; | 289 | const allocator = testing.allocator; |
| 337 | var norm_data = try NormData.init(allocator); | 290 | var data = try NormData.init(allocator); |
| 338 | defer norm_data.deinit(); | 291 | defer data.deinit(); |
| 339 | var n = try init(allocator, &norm_data); | 292 | var n = Self{ .norm_data = &data }; |
| 340 | defer n.deinit(); | ||
| 341 | 293 | ||
| 342 | var result = try n.nfd(allocator, "Hello World!"); | 294 | var result = try n.nfd(allocator, "Hello World!"); |
| 343 | defer result.deinit(); | 295 | defer result.deinit(); |
| @@ -347,10 +299,9 @@ test "nfd ASCII / no-alloc" { | |||
| 347 | 299 | ||
| 348 | test "nfd !ASCII / alloc" { | 300 | test "nfd !ASCII / alloc" { |
| 349 | const allocator = testing.allocator; | 301 | const allocator = testing.allocator; |
| 350 | var norm_data = try NormData.init(allocator); | 302 | var data = try NormData.init(allocator); |
| 351 | defer norm_data.deinit(); | 303 | defer data.deinit(); |
| 352 | var n = try init(allocator, &norm_data); | 304 | var n = Self{ .norm_data = &data }; |
| 353 | defer n.deinit(); | ||
| 354 | 305 | ||
| 355 | var result = try n.nfd(allocator, "Héllo World! \u{3d3}"); | 306 | var result = try n.nfd(allocator, "Héllo World! \u{3d3}"); |
| 356 | defer result.deinit(); | 307 | defer result.deinit(); |
| @@ -360,10 +311,9 @@ test "nfd !ASCII / alloc" { | |||
| 360 | 311 | ||
| 361 | test "nfkd ASCII / no-alloc" { | 312 | test "nfkd ASCII / no-alloc" { |
| 362 | const allocator = testing.allocator; | 313 | const allocator = testing.allocator; |
| 363 | var norm_data = try NormData.init(allocator); | 314 | var data = try NormData.init(allocator); |
| 364 | defer norm_data.deinit(); | 315 | defer data.deinit(); |
| 365 | var n = try init(allocator, &norm_data); | 316 | var n = Self{ .norm_data = &data }; |
| 366 | defer n.deinit(); | ||
| 367 | 317 | ||
| 368 | var result = try n.nfkd(allocator, "Hello World!"); | 318 | var result = try n.nfkd(allocator, "Hello World!"); |
| 369 | defer result.deinit(); | 319 | defer result.deinit(); |
| @@ -373,10 +323,9 @@ test "nfkd ASCII / no-alloc" { | |||
| 373 | 323 | ||
| 374 | test "nfkd !ASCII / alloc" { | 324 | test "nfkd !ASCII / alloc" { |
| 375 | const allocator = testing.allocator; | 325 | const allocator = testing.allocator; |
| 376 | var norm_data = try NormData.init(allocator); | 326 | var data = try NormData.init(allocator); |
| 377 | defer norm_data.deinit(); | 327 | defer data.deinit(); |
| 378 | var n = try init(allocator, &norm_data); | 328 | var n = Self{ .norm_data = &data }; |
| 379 | defer n.deinit(); | ||
| 380 | 329 | ||
| 381 | var result = try n.nfkd(allocator, "Héllo World! \u{3d3}"); | 330 | var result = try n.nfkd(allocator, "Héllo World! \u{3d3}"); |
| 382 | defer result.deinit(); | 331 | defer result.deinit(); |
| @@ -529,10 +478,9 @@ fn nfxc(self: Self, allocator: std.mem.Allocator, str: []const u8, form: Form) ! | |||
| 529 | 478 | ||
| 530 | test "nfc" { | 479 | test "nfc" { |
| 531 | const allocator = testing.allocator; | 480 | const allocator = testing.allocator; |
| 532 | var norm_data = try NormData.init(allocator); | 481 | var data = try NormData.init(allocator); |
| 533 | defer norm_data.deinit(); | 482 | defer data.deinit(); |
| 534 | var n = try init(allocator, &norm_data); | 483 | var n = Self{ .norm_data = &data }; |
| 535 | defer n.deinit(); | ||
| 536 | 484 | ||
| 537 | var result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); | 485 | var result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); |
| 538 | defer result.deinit(); | 486 | defer result.deinit(); |
| @@ -542,10 +490,9 @@ test "nfc" { | |||
| 542 | 490 | ||
| 543 | test "nfkc" { | 491 | test "nfkc" { |
| 544 | const allocator = testing.allocator; | 492 | const allocator = testing.allocator; |
| 545 | var norm_data = try NormData.init(allocator); | 493 | var data = try NormData.init(allocator); |
| 546 | defer norm_data.deinit(); | 494 | defer data.deinit(); |
| 547 | var n = try init(allocator, &norm_data); | 495 | var n = Self{ .norm_data = &data }; |
| 548 | defer n.deinit(); | ||
| 549 | 496 | ||
| 550 | var result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); | 497 | var result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); |
| 551 | defer result.deinit(); | 498 | defer result.deinit(); |
| @@ -603,10 +550,9 @@ pub fn eql(self: Self, allocator: std.mem.Allocator, a: []const u8, b: []const u | |||
| 603 | 550 | ||
| 604 | test "eql" { | 551 | test "eql" { |
| 605 | const allocator = testing.allocator; | 552 | const allocator = testing.allocator; |
| 606 | var norm_data = try NormData.init(allocator); | 553 | var data = try NormData.init(allocator); |
| 607 | defer norm_data.deinit(); | 554 | defer data.deinit(); |
| 608 | var n = try init(allocator, &norm_data); | 555 | var n = Self{ .norm_data = &data }; |
| 609 | defer n.deinit(); | ||
| 610 | 556 | ||
| 611 | try std.testing.expect(try n.eql(allocator, "foé", "foe\u{0301}")); | 557 | try std.testing.expect(try n.eql(allocator, "foé", "foe\u{0301}")); |
| 612 | try std.testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); | 558 | try std.testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); |
| @@ -672,10 +618,9 @@ pub fn eqlCaseless(self: Self, allocator: std.mem.Allocator, a: []const u8, b: [ | |||
| 672 | 618 | ||
| 673 | test "eqlCaseless" { | 619 | test "eqlCaseless" { |
| 674 | const allocator = testing.allocator; | 620 | const allocator = testing.allocator; |
| 675 | var norm_data = try NormData.init(allocator); | 621 | var data = try NormData.init(allocator); |
| 676 | defer norm_data.deinit(); | 622 | defer data.deinit(); |
| 677 | var n = try init(allocator, &norm_data); | 623 | var n = Self{ .norm_data = &data }; |
| 678 | defer n.deinit(); | ||
| 679 | 624 | ||
| 680 | try std.testing.expect(try n.eqlCaseless(allocator, "Foϓ", "fo\u{03D2}\u{0301}")); | 625 | try std.testing.expect(try n.eqlCaseless(allocator, "Foϓ", "fo\u{03D2}\u{0301}")); |
| 681 | try std.testing.expect(try n.eqlCaseless(allocator, "FOÉ", "foe\u{0301}")); // foÉ == foé | 626 | try std.testing.expect(try n.eqlCaseless(allocator, "FOÉ", "foe\u{0301}")); // foÉ == foé |
| @@ -709,10 +654,9 @@ pub fn isFcd(self: Self, str: []const u8) bool { | |||
| 709 | 654 | ||
| 710 | test "isFcd" { | 655 | test "isFcd" { |
| 711 | const allocator = testing.allocator; | 656 | const allocator = testing.allocator; |
| 712 | var norm_data = try NormData.init(allocator); | 657 | var data = try NormData.init(allocator); |
| 713 | defer norm_data.deinit(); | 658 | defer data.deinit(); |
| 714 | var n = try init(allocator, &norm_data); | 659 | var n = Self{ .norm_data = &data }; |
| 715 | defer n.deinit(); | ||
| 716 | 660 | ||
| 717 | const is_nfc = "José \u{3D3}"; | 661 | const is_nfc = "José \u{3D3}"; |
| 718 | try std.testing.expect(n.isFcd(is_nfc)); | 662 | try std.testing.expect(n.isFcd(is_nfc)); |
| @@ -729,10 +673,9 @@ test "Unicode normalization tests" { | |||
| 729 | defer arena.deinit(); | 673 | defer arena.deinit(); |
| 730 | var allocator = arena.allocator(); | 674 | var allocator = arena.allocator(); |
| 731 | 675 | ||
| 732 | var norm_data = try NormData.init(allocator); | 676 | var data = try NormData.init(allocator); |
| 733 | defer norm_data.deinit(); | 677 | defer data.deinit(); |
| 734 | var n = try init(allocator, &norm_data); | 678 | var n = Self{ .norm_data = &data }; |
| 735 | defer n.deinit(); | ||
| 736 | 679 | ||
| 737 | var file = try std.fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{}); | 680 | var file = try std.fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{}); |
| 738 | defer file.close(); | 681 | defer file.close(); |
diff --git a/src/DisplayWidthData.zig b/src/WidthData.zig index 32f8658..32f8658 100644 --- a/src/DisplayWidthData.zig +++ b/src/WidthData.zig | |||
diff --git a/src/autogen/compatibility_decompositions.txt.deflate b/src/autogen/compatibility_decompositions.txt.deflate deleted file mode 100644 index 0370b4c..0000000 --- a/src/autogen/compatibility_decompositions.txt.deflate +++ /dev/null | |||
| Binary files differ | |||
diff --git a/src/main.zig b/src/main.zig index 05c2ea4..2c2cf8c 100644 --- a/src/main.zig +++ b/src/main.zig | |||
| @@ -16,9 +16,9 @@ const std = @import("std"); | |||
| 16 | // const ascii = @import("ascii"); | 16 | // const ascii = @import("ascii"); |
| 17 | // const ascii = std.ascii; | 17 | // const ascii = std.ascii; |
| 18 | 18 | ||
| 19 | // const norm = @import("ziglyph").Normalizer; | 19 | // const Normalizer = @import("ziglyph").Normalizer; |
| 20 | const NormData = @import("Normalizer").NormData; | 20 | const NormData = @import("Normalizer").NormData; |
| 21 | const norm = @import("Normalizer"); | 21 | const Normalizer = @import("Normalizer"); |
| 22 | 22 | ||
| 23 | pub fn main() !void { | 23 | pub fn main() !void { |
| 24 | var args_iter = std.process.args(); | 24 | var args_iter = std.process.args(); |
| @@ -32,10 +32,9 @@ pub fn main() !void { | |||
| 32 | const input = try std.fs.cwd().readFileAlloc(allocator, in_path, std.math.maxInt(u32)); | 32 | const input = try std.fs.cwd().readFileAlloc(allocator, in_path, std.math.maxInt(u32)); |
| 33 | defer allocator.free(input); | 33 | defer allocator.free(input); |
| 34 | 34 | ||
| 35 | var norm_data = try NormData.init(allocator); | 35 | var data = try NormData.init(allocator); |
| 36 | defer norm_data.deinit(); | 36 | defer data.deinit(); |
| 37 | var n = try norm.init(allocator, &norm_data); | 37 | var n = Normalizer{ .norm_data = &data }; |
| 38 | defer n.deinit(); | ||
| 39 | // var n = try norm.init(allocator); | 38 | // var n = try norm.init(allocator); |
| 40 | // defer n.deinit(); | 39 | // defer n.deinit(); |
| 41 | 40 | ||