summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/NumericData.zig75
-rw-r--r--src/main.zig32
2 files changed, 95 insertions, 12 deletions
diff --git a/src/NumericData.zig b/src/NumericData.zig
new file mode 100644
index 0000000..baf8f11
--- /dev/null
+++ b/src/NumericData.zig
@@ -0,0 +1,75 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5const testing = std.testing;
6
7allocator: mem.Allocator,
8s1: []u16 = undefined,
9s2: []u8 = undefined,
10
11const Self = @This();
12
13pub fn init(allocator: mem.Allocator) !Self {
14 const decompressor = compress.deflate.decompressor;
15 const in_bytes = @embedFile("numeric");
16 var in_fbs = std.io.fixedBufferStream(in_bytes);
17 var in_decomp = try decompressor(allocator, in_fbs.reader(), null);
18 defer in_decomp.deinit();
19 var reader = in_decomp.reader();
20
21 const endian = builtin.cpu.arch.endian();
22
23 var self = Self{ .allocator = allocator };
24
25 const stage_1_len: u16 = try reader.readInt(u16, endian);
26 self.s1 = try allocator.alloc(u16, stage_1_len);
27 for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
28
29 const stage_2_len: u16 = try reader.readInt(u16, endian);
30 self.s2 = try allocator.alloc(u8, stage_2_len);
31 _ = try reader.readAll(self.s2);
32
33 return self;
34}
35
36pub fn deinit(self: *Self) void {
37 self.allocator.free(self.s1);
38 self.allocator.free(self.s2);
39}
40
41/// True if `cp` is any numeric type.
42pub fn isNumber(self: Self, cp: u21) bool {
43 return self.isNumeric(cp) or self.isDigit(cp) or self.isDecimal(cp);
44}
45
46/// True if `cp` is numeric.
47pub inline fn isNumeric(self: Self, cp: u21) bool {
48 return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
49}
50
51/// True if `cp` is a digit.
52pub inline fn isDigit(self: Self, cp: u21) bool {
53 return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
54}
55
56/// True if `cp` is decimal.
57pub inline fn isDecimal(self: Self, cp: u21) bool {
58 return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
59}
60
61test "isDecimal" {
62 var self = try init(testing.allocator);
63 defer self.deinit();
64
65 try testing.expect(self.isNumber('\u{277f}'));
66 try testing.expect(self.isNumber('3'));
67 try testing.expect(self.isNumeric('\u{277f}'));
68 try testing.expect(self.isDigit('\u{2070}'));
69 try testing.expect(self.isDecimal('3'));
70
71 try testing.expect(!self.isNumber('z'));
72 try testing.expect(!self.isNumeric('1'));
73 try testing.expect(!self.isDigit('2'));
74 try testing.expect(!self.isDecimal('g'));
75}
diff --git a/src/main.zig b/src/main.zig
index 0b0d550..52d823c 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -11,18 +11,20 @@ const std = @import("std");
11// const strWidth = @import("display_width").strWidth; 11// const strWidth = @import("display_width").strWidth;
12 12
13// const CodePointIterator = @import("ziglyph").CodePointIterator; 13// const CodePointIterator = @import("ziglyph").CodePointIterator;
14// const CodePointIterator = @import("code_point").Iterator; 14const CodePointIterator = @import("code_point").Iterator;
15 15
16// const ascii = @import("ascii"); 16// const ascii = @import("ascii");
17// const ascii = std.ascii; 17// const ascii = std.ascii;
18 18
19// const Normalize = @import("ziglyph").Normalizer; 19// const Normalize = @import("ziglyph").Normalizer;
20const Normalize = @import("Normalize"); 20// const Normalize = @import("Normalize");
21 21
22// const CaseFold = @import("CaseFold"); 22// const CaseFold = @import("CaseFold");
23 23
24// const GenCatData = @import("GenCatData"); 24// const GenCatData = @import("GenCatData");
25 25
26const NumericData = @import("NumericData");
27
26pub fn main() !void { 28pub fn main() !void {
27 var args_iter = std.process.args(); 29 var args_iter = std.process.args();
28 _ = args_iter.skip(); 30 _ = args_iter.skip();
@@ -39,9 +41,9 @@ pub fn main() !void {
39 ); 41 );
40 defer allocator.free(input); 42 defer allocator.free(input);
41 43
42 var norm_data = try Normalize.NormData.init(allocator); 44 // var norm_data = try Normalize.NormData.init(allocator);
43 defer norm_data.deinit(); 45 // defer norm_data.deinit();
44 var norm = Normalize{ .norm_data = &norm_data }; 46 // var norm = Normalize{ .norm_data = &norm_data };
45 // var norm = try Normalize.init(allocator); 47 // var norm = try Normalize.init(allocator);
46 // defer norm.deinit(); 48 // defer norm.deinit();
47 49
@@ -52,10 +54,13 @@ pub fn main() !void {
52 // defer fold_data.deinit(); 54 // defer fold_data.deinit();
53 // var caser = CaseFold{ .fold_data = &fold_data }; 55 // var caser = CaseFold{ .fold_data = &fold_data };
54 56
57 var num_data = try NumericData.init(allocator);
58 defer num_data.deinit();
59
55 // var iter = GraphemeIterator.init(input, &data); 60 // var iter = GraphemeIterator.init(input, &data);
56 // defer iter.deinit(); 61 // defer iter.deinit();
57 // var iter = CodePointIterator{ .bytes = input }; 62 var iter = CodePointIterator{ .bytes = input };
58 var iter = std.mem.splitScalar(u8, input, '\n'); 63 // var iter = std.mem.splitScalar(u8, input, '\n');
59 64
60 var result: usize = 0; 65 var result: usize = 0;
61 // var prev_line: []const u8 = ""; 66 // var prev_line: []const u8 = "";
@@ -65,11 +70,11 @@ pub fn main() !void {
65 // while (iter.next()) |cp| result += codePointWidth(@intCast(cp.code)); 70 // while (iter.next()) |cp| result += codePointWidth(@intCast(cp.code));
66 // while (iter.next()) |_| result += 1; 71 // while (iter.next()) |_| result += 1;
67 // while (iter.next()) |line| result += strWidth(line, &data); 72 // while (iter.next()) |line| result += strWidth(line, &data);
68 while (iter.next()) |line| { 73 // while (iter.next()) |line| {
69 const nfc = try norm.nfc(allocator, line); 74 // const nfc = try norm.nfc(allocator, line);
70 result += nfc.slice.len; 75 // result += nfc.slice.len;
71 // nfc.deinit(); 76 // // nfc.deinit();
72 } 77 // }
73 // while (iter.next()) |cp| { 78 // while (iter.next()) |cp| {
74 // if (cp.code == 'É') std.debug.print("`{u}` Gc: {s}\n", .{ cp.code, @tagName(gencat_data.gc(cp.code)) }); 79 // if (cp.code == 'É') std.debug.print("`{u}` Gc: {s}\n", .{ cp.code, @tagName(gencat_data.gc(cp.code)) });
75 // result += 1; 80 // result += 1;
@@ -80,6 +85,9 @@ pub fn main() !void {
80 // } 85 // }
81 // prev_line = line; 86 // prev_line = line;
82 // } 87 // }
88 while (iter.next()) |cp| {
89 if (num_data.isNumberic(cp)) result += 1;
90 }
83 91
84 std.debug.print("result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); 92 std.debug.print("result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms });
85} 93}