summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/CanonData.zig (renamed from src/Canonical.zig)0
-rw-r--r--src/CombiningData.zig (renamed from src/CombiningClassData.zig)0
-rw-r--r--src/CompatData.zig50
-rw-r--r--src/NormData.zig8
-rw-r--r--src/Normalizer.zig129
-rw-r--r--src/WidthData.zig (renamed from src/DisplayWidthData.zig)0
-rw-r--r--src/autogen/compatibility_decompositions.txt.deflatebin15332 -> 0 bytes
-rw-r--r--src/main.zig11
8 files changed, 97 insertions, 101 deletions
diff --git a/src/Canonical.zig b/src/CanonData.zig
index 81d3eec..81d3eec 100644
--- a/src/Canonical.zig
+++ b/src/CanonData.zig
diff --git a/src/CombiningClassData.zig b/src/CombiningData.zig
index 95c947d..95c947d 100644
--- a/src/CombiningClassData.zig
+++ b/src/CombiningData.zig
diff --git a/src/CompatData.zig b/src/CompatData.zig
new file mode 100644
index 0000000..a1f5de6
--- /dev/null
+++ b/src/CompatData.zig
@@ -0,0 +1,50 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5
6allocator: mem.Allocator,
7nfkd: [][]u21 = undefined,
8
9const Self = @This();
10
11pub fn init(allocator: mem.Allocator) !Self {
12 const decompressor = compress.deflate.decompressor;
13 const in_bytes = @embedFile("compat");
14 var in_fbs = std.io.fixedBufferStream(in_bytes);
15 var in_decomp = try decompressor(allocator, in_fbs.reader(), null);
16 defer in_decomp.deinit();
17 var reader = in_decomp.reader();
18
19 const endian = builtin.cpu.arch.endian();
20 var self = Self{
21 .allocator = allocator,
22 .nfkd = try allocator.alloc([]u21, 0x110000),
23 };
24
25 for (0..0x110000) |i| self.nfkd[i] = &.{};
26
27 while (true) {
28 const len: u8 = try reader.readInt(u8, endian);
29 if (len == 0) break;
30 const cp = try reader.readInt(u24, endian);
31 self.nfkd[cp] = try allocator.alloc(u21, len - 1);
32 for (0..len - 1) |i| {
33 self.nfkd[cp][i] = @intCast(try reader.readInt(u24, endian));
34 }
35 }
36
37 return self;
38}
39
40pub fn deinit(self: *Self) void {
41 for (self.nfkd) |slice| {
42 if (slice.len != 0) self.allocator.free(slice);
43 }
44 self.allocator.free(self.nfkd);
45}
46
47/// Returns compatibility decomposition for `cp`.
48pub inline fn toNfkd(self: Self, cp: u21) []u21 {
49 return self.nfkd[cp];
50}
diff --git a/src/NormData.zig b/src/NormData.zig
index c6fa8e8..83110f0 100644
--- a/src/NormData.zig
+++ b/src/NormData.zig
@@ -1,11 +1,13 @@
1const std = @import("std"); 1const std = @import("std");
2const mem = std.mem; 2const mem = std.mem;
3 3
4const CanonData = @import("CanonicalData"); 4const CanonData = @import("CanonData");
5const CccData = @import("CombiningClassData"); 5const CccData = @import("CombiningData");
6const CompatData = @import("CompatData");
6 7
7canon_data: CanonData, 8canon_data: CanonData,
8ccc_data: CccData, 9ccc_data: CccData,
10compat_data: CompatData,
9 11
10const Self = @This(); 12const Self = @This();
11 13
@@ -13,10 +15,12 @@ pub fn init(allocator: std.mem.Allocator) !Self {
13 return Self{ 15 return Self{
14 .canon_data = try CanonData.init(allocator), 16 .canon_data = try CanonData.init(allocator),
15 .ccc_data = try CccData.init(allocator), 17 .ccc_data = try CccData.init(allocator),
18 .compat_data = try CompatData.init(allocator),
16 }; 19 };
17} 20}
18 21
19pub fn deinit(self: *Self) void { 22pub fn deinit(self: *Self) void {
20 self.canon_data.deinit(); 23 self.canon_data.deinit();
21 self.ccc_data.deinit(); 24 self.ccc_data.deinit();
25 self.compat_data.deinit();
22} 26}
diff --git a/src/Normalizer.zig b/src/Normalizer.zig
index 2e2e6e4..1434043 100644
--- a/src/Normalizer.zig
+++ b/src/Normalizer.zig
@@ -12,57 +12,10 @@ const norm_props = @import("ziglyph").normalization_props;
12 12
13pub const NormData = @import("NormData"); 13pub const NormData = @import("NormData");
14 14
15nfkd_map: std.AutoHashMap(u21, [18]u21),
16norm_data: *NormData, 15norm_data: *NormData,
17 16
18const Self = @This(); 17const Self = @This();
19 18
20pub fn init(allocator: std.mem.Allocator, norm_data: *NormData) !Self {
21 var self = Self{
22 .nfkd_map = std.AutoHashMap(u21, [18]u21).init(allocator),
23 .norm_data = norm_data,
24 };
25 errdefer self.deinit();
26
27 // Compatibility decompositions
28 const dekomp_file = @embedFile("autogen/compatibility_decompositions.txt.deflate");
29 var dekomp_stream = std.io.fixedBufferStream(dekomp_file);
30 var dekomp_decomp = try std.compress.deflate.decompressor(allocator, dekomp_stream.reader(), null);
31 defer dekomp_decomp.deinit();
32
33 var dekomp_buf = std.io.bufferedReader(dekomp_decomp.reader());
34 const dekomp_reader = dekomp_buf.reader();
35 var buf: [4096]u8 = undefined;
36
37 while (try dekomp_reader.readUntilDelimiterOrEof(&buf, '\n')) |line| {
38 if (line.len == 0) continue;
39 var fields = std.mem.split(u8, line, ";");
40 const cp_a = try std.fmt.parseInt(u21, fields.next().?, 16);
41 var cps = [_]u21{0} ** 18;
42 var i: usize = 0;
43
44 while (fields.next()) |cp| : (i += 1) {
45 cps[i] = try std.fmt.parseInt(u21, cp, 16);
46 }
47
48 try self.nfkd_map.put(cp_a, cps);
49 }
50
51 return self;
52}
53
54pub fn deinit(self: *Self) void {
55 self.nfkd_map.deinit();
56}
57
58test "init / deinit" {
59 const allocator = testing.allocator;
60 var norm_data = try NormData.init(allocator);
61 defer norm_data.deinit();
62 var n = try init(allocator, &norm_data);
63 defer n.deinit();
64}
65
66// Hangul processing utilities. 19// Hangul processing utilities.
67fn isHangulPrecomposed(cp: u21) bool { 20fn isHangulPrecomposed(cp: u21) bool {
68 if (hangul_map.syllableType(cp)) |kind| return kind == .LV or kind == .LVT; 21 if (hangul_map.syllableType(cp)) |kind| return kind == .LV or kind == .LVT;
@@ -140,10 +93,11 @@ pub fn mapping(self: Self, cp: u21, form: Form) Decomp {
140 @memcpy(dc.cps[0..len], canon_dc[0..len]); 93 @memcpy(dc.cps[0..len], canon_dc[0..len]);
141 } 94 }
142 95
143 if (self.nfkd_map.get(cp)) |array| { 96 const compat_dc = self.norm_data.compat_data.toNfkd(cp);
97 if (compat_dc.len != 0) {
144 if (form != .nfd) { 98 if (form != .nfd) {
145 dc.form = .nfkd; 99 dc.form = .nfkd;
146 @memcpy(dc.cps[0..array.len], &array); 100 @memcpy(dc.cps[0..compat_dc.len], compat_dc);
147 } 101 }
148 } 102 }
149 103
@@ -210,10 +164,9 @@ pub fn decompose(self: Self, cp: u21, form: Form) Decomp {
210 164
211test "decompose" { 165test "decompose" {
212 const allocator = testing.allocator; 166 const allocator = testing.allocator;
213 var norm_data = try NormData.init(allocator); 167 var data = try NormData.init(allocator);
214 defer norm_data.deinit(); 168 defer data.deinit();
215 var n = try init(allocator, &norm_data); 169 var n = Self{ .norm_data = &data };
216 defer n.deinit();
217 170
218 var dc = n.decompose('é', .nfd); 171 var dc = n.decompose('é', .nfd);
219 try std.testing.expect(dc.form == .nfd); 172 try std.testing.expect(dc.form == .nfd);
@@ -334,10 +287,9 @@ fn nfxd(self: Self, allocator: std.mem.Allocator, str: []const u8, form: Form) !
334 287
335test "nfd ASCII / no-alloc" { 288test "nfd ASCII / no-alloc" {
336 const allocator = testing.allocator; 289 const allocator = testing.allocator;
337 var norm_data = try NormData.init(allocator); 290 var data = try NormData.init(allocator);
338 defer norm_data.deinit(); 291 defer data.deinit();
339 var n = try init(allocator, &norm_data); 292 var n = Self{ .norm_data = &data };
340 defer n.deinit();
341 293
342 var result = try n.nfd(allocator, "Hello World!"); 294 var result = try n.nfd(allocator, "Hello World!");
343 defer result.deinit(); 295 defer result.deinit();
@@ -347,10 +299,9 @@ test "nfd ASCII / no-alloc" {
347 299
348test "nfd !ASCII / alloc" { 300test "nfd !ASCII / alloc" {
349 const allocator = testing.allocator; 301 const allocator = testing.allocator;
350 var norm_data = try NormData.init(allocator); 302 var data = try NormData.init(allocator);
351 defer norm_data.deinit(); 303 defer data.deinit();
352 var n = try init(allocator, &norm_data); 304 var n = Self{ .norm_data = &data };
353 defer n.deinit();
354 305
355 var result = try n.nfd(allocator, "Héllo World! \u{3d3}"); 306 var result = try n.nfd(allocator, "Héllo World! \u{3d3}");
356 defer result.deinit(); 307 defer result.deinit();
@@ -360,10 +311,9 @@ test "nfd !ASCII / alloc" {
360 311
361test "nfkd ASCII / no-alloc" { 312test "nfkd ASCII / no-alloc" {
362 const allocator = testing.allocator; 313 const allocator = testing.allocator;
363 var norm_data = try NormData.init(allocator); 314 var data = try NormData.init(allocator);
364 defer norm_data.deinit(); 315 defer data.deinit();
365 var n = try init(allocator, &norm_data); 316 var n = Self{ .norm_data = &data };
366 defer n.deinit();
367 317
368 var result = try n.nfkd(allocator, "Hello World!"); 318 var result = try n.nfkd(allocator, "Hello World!");
369 defer result.deinit(); 319 defer result.deinit();
@@ -373,10 +323,9 @@ test "nfkd ASCII / no-alloc" {
373 323
374test "nfkd !ASCII / alloc" { 324test "nfkd !ASCII / alloc" {
375 const allocator = testing.allocator; 325 const allocator = testing.allocator;
376 var norm_data = try NormData.init(allocator); 326 var data = try NormData.init(allocator);
377 defer norm_data.deinit(); 327 defer data.deinit();
378 var n = try init(allocator, &norm_data); 328 var n = Self{ .norm_data = &data };
379 defer n.deinit();
380 329
381 var result = try n.nfkd(allocator, "Héllo World! \u{3d3}"); 330 var result = try n.nfkd(allocator, "Héllo World! \u{3d3}");
382 defer result.deinit(); 331 defer result.deinit();
@@ -529,10 +478,9 @@ fn nfxc(self: Self, allocator: std.mem.Allocator, str: []const u8, form: Form) !
529 478
530test "nfc" { 479test "nfc" {
531 const allocator = testing.allocator; 480 const allocator = testing.allocator;
532 var norm_data = try NormData.init(allocator); 481 var data = try NormData.init(allocator);
533 defer norm_data.deinit(); 482 defer data.deinit();
534 var n = try init(allocator, &norm_data); 483 var n = Self{ .norm_data = &data };
535 defer n.deinit();
536 484
537 var result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); 485 var result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}");
538 defer result.deinit(); 486 defer result.deinit();
@@ -542,10 +490,9 @@ test "nfc" {
542 490
543test "nfkc" { 491test "nfkc" {
544 const allocator = testing.allocator; 492 const allocator = testing.allocator;
545 var norm_data = try NormData.init(allocator); 493 var data = try NormData.init(allocator);
546 defer norm_data.deinit(); 494 defer data.deinit();
547 var n = try init(allocator, &norm_data); 495 var n = Self{ .norm_data = &data };
548 defer n.deinit();
549 496
550 var result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); 497 var result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}");
551 defer result.deinit(); 498 defer result.deinit();
@@ -603,10 +550,9 @@ pub fn eql(self: Self, allocator: std.mem.Allocator, a: []const u8, b: []const u
603 550
604test "eql" { 551test "eql" {
605 const allocator = testing.allocator; 552 const allocator = testing.allocator;
606 var norm_data = try NormData.init(allocator); 553 var data = try NormData.init(allocator);
607 defer norm_data.deinit(); 554 defer data.deinit();
608 var n = try init(allocator, &norm_data); 555 var n = Self{ .norm_data = &data };
609 defer n.deinit();
610 556
611 try std.testing.expect(try n.eql(allocator, "foé", "foe\u{0301}")); 557 try std.testing.expect(try n.eql(allocator, "foé", "foe\u{0301}"));
612 try std.testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); 558 try std.testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}"));
@@ -672,10 +618,9 @@ pub fn eqlCaseless(self: Self, allocator: std.mem.Allocator, a: []const u8, b: [
672 618
673test "eqlCaseless" { 619test "eqlCaseless" {
674 const allocator = testing.allocator; 620 const allocator = testing.allocator;
675 var norm_data = try NormData.init(allocator); 621 var data = try NormData.init(allocator);
676 defer norm_data.deinit(); 622 defer data.deinit();
677 var n = try init(allocator, &norm_data); 623 var n = Self{ .norm_data = &data };
678 defer n.deinit();
679 624
680 try std.testing.expect(try n.eqlCaseless(allocator, "Foϓ", "fo\u{03D2}\u{0301}")); 625 try std.testing.expect(try n.eqlCaseless(allocator, "Foϓ", "fo\u{03D2}\u{0301}"));
681 try std.testing.expect(try n.eqlCaseless(allocator, "FOÉ", "foe\u{0301}")); // foÉ == foé 626 try std.testing.expect(try n.eqlCaseless(allocator, "FOÉ", "foe\u{0301}")); // foÉ == foé
@@ -709,10 +654,9 @@ pub fn isFcd(self: Self, str: []const u8) bool {
709 654
710test "isFcd" { 655test "isFcd" {
711 const allocator = testing.allocator; 656 const allocator = testing.allocator;
712 var norm_data = try NormData.init(allocator); 657 var data = try NormData.init(allocator);
713 defer norm_data.deinit(); 658 defer data.deinit();
714 var n = try init(allocator, &norm_data); 659 var n = Self{ .norm_data = &data };
715 defer n.deinit();
716 660
717 const is_nfc = "José \u{3D3}"; 661 const is_nfc = "José \u{3D3}";
718 try std.testing.expect(n.isFcd(is_nfc)); 662 try std.testing.expect(n.isFcd(is_nfc));
@@ -729,10 +673,9 @@ test "Unicode normalization tests" {
729 defer arena.deinit(); 673 defer arena.deinit();
730 var allocator = arena.allocator(); 674 var allocator = arena.allocator();
731 675
732 var norm_data = try NormData.init(allocator); 676 var data = try NormData.init(allocator);
733 defer norm_data.deinit(); 677 defer data.deinit();
734 var n = try init(allocator, &norm_data); 678 var n = Self{ .norm_data = &data };
735 defer n.deinit();
736 679
737 var file = try std.fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{}); 680 var file = try std.fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{});
738 defer file.close(); 681 defer file.close();
diff --git a/src/DisplayWidthData.zig b/src/WidthData.zig
index 32f8658..32f8658 100644
--- a/src/DisplayWidthData.zig
+++ b/src/WidthData.zig
diff --git a/src/autogen/compatibility_decompositions.txt.deflate b/src/autogen/compatibility_decompositions.txt.deflate
deleted file mode 100644
index 0370b4c..0000000
--- a/src/autogen/compatibility_decompositions.txt.deflate
+++ /dev/null
Binary files differ
diff --git a/src/main.zig b/src/main.zig
index 05c2ea4..2c2cf8c 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -16,9 +16,9 @@ const std = @import("std");
16// const ascii = @import("ascii"); 16// const ascii = @import("ascii");
17// const ascii = std.ascii; 17// const ascii = std.ascii;
18 18
19// const norm = @import("ziglyph").Normalizer; 19// const Normalizer = @import("ziglyph").Normalizer;
20const NormData = @import("Normalizer").NormData; 20const NormData = @import("Normalizer").NormData;
21const norm = @import("Normalizer"); 21const Normalizer = @import("Normalizer");
22 22
23pub fn main() !void { 23pub fn main() !void {
24 var args_iter = std.process.args(); 24 var args_iter = std.process.args();
@@ -32,10 +32,9 @@ pub fn main() !void {
32 const input = try std.fs.cwd().readFileAlloc(allocator, in_path, std.math.maxInt(u32)); 32 const input = try std.fs.cwd().readFileAlloc(allocator, in_path, std.math.maxInt(u32));
33 defer allocator.free(input); 33 defer allocator.free(input);
34 34
35 var norm_data = try NormData.init(allocator); 35 var data = try NormData.init(allocator);
36 defer norm_data.deinit(); 36 defer data.deinit();
37 var n = try norm.init(allocator, &norm_data); 37 var n = Normalizer{ .norm_data = &data };
38 defer n.deinit();
39 // var n = try norm.init(allocator); 38 // var n = try norm.init(allocator);
40 // defer n.deinit(); 39 // defer n.deinit();
41 40