summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-02-12 10:51:34 -0400
committerGravatar Jose Colon Rodriguez2024-02-12 10:51:34 -0400
commitbfb31cbc33716220b42bb398471840a4fbed0d89 (patch)
tree3f3614621f34f066bd192ce93d3f82810205e5ca /src
parentCreated Trie (diff)
downloadzg-bfb31cbc33716220b42bb398471840a4fbed0d89.tar.gz
zg-bfb31cbc33716220b42bb398471840a4fbed0d89.tar.xz
zg-bfb31cbc33716220b42bb398471840a4fbed0d89.zip
Using Trie super slow
Diffstat (limited to 'src')
-rw-r--r--src/Grapheme.zig7
-rw-r--r--src/gbp.zig67
-rw-r--r--src/main.zig6
-rw-r--r--src/trie.zig49
4 files changed, 117 insertions, 12 deletions
diff --git a/src/Grapheme.zig b/src/Grapheme.zig
index a8a7638..73f6d57 100644
--- a/src/Grapheme.zig
+++ b/src/Grapheme.zig
@@ -1,13 +1,15 @@
1//! `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes. 1//! `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes.
2 2
3const std = @import("std"); 3const std = @import("std");
4const mem = std.mem;
4const unicode = std.unicode; 5const unicode = std.unicode;
5 6
6const CodePoint = @import("ziglyph").CodePoint; 7const CodePoint = @import("ziglyph").CodePoint;
7const CodePointIterator = CodePoint.CodePointIterator; 8const CodePointIterator = CodePoint.CodePointIterator;
8const emoji = @import("ziglyph").emoji; 9const emoji = @import("ziglyph").emoji;
9 10
10const gbp = @import("gbp"); 11// const gbp = @import("gbp");
12const gbp = @import("gbp.zig");
11 13
12pub const Grapheme = @This(); 14pub const Grapheme = @This();
13 15
@@ -32,7 +34,8 @@ pub const GraphemeIterator = struct {
32 const Self = @This(); 34 const Self = @This();
33 35
34 /// Assumes `src` is valid UTF-8. 36 /// Assumes `src` is valid UTF-8.
35 pub fn init(str: []const u8) Self { 37 pub fn init(allocator: mem.Allocator, str: []const u8) !Self {
38 try gbp.init(allocator);
36 var self = Self{ .cp_iter = CodePointIterator{ .bytes = str } }; 39 var self = Self{ .cp_iter = CodePointIterator{ .bytes = str } };
37 self.buf[1] = self.cp_iter.next(); 40 self.buf[1] = self.cp_iter.next();
38 41
diff --git a/src/gbp.zig b/src/gbp.zig
new file mode 100644
index 0000000..fa4ad54
--- /dev/null
+++ b/src/gbp.zig
@@ -0,0 +1,67 @@
1const std = @import("std");
2const mem = std.mem;
3
4const gbp = @import("ziglyph").grapheme_break;
5const Trie = @import("trie.zig").Trie;
6const Prop = @import("trie.zig").Prop;
7
8var trie: Trie = undefined;
9
10pub fn init(allocator: mem.Allocator) !void {
11 trie = .{ .allocator = allocator, .root = .{} };
12
13 for ('\u{0}'..'\u{10ffff}') |i| {
14 const cp: u21 = @intCast(i);
15 const prop = Prop.forCodePoint(cp);
16 if (prop == .none) continue;
17 try trie.put(cp, prop);
18 }
19
20 const prop = Prop.forCodePoint('\u{10ffff}');
21 if (prop == .none) return;
22 try trie.put('\u{10ffff}', prop);
23}
24
25inline fn getProp(cp: u21) Prop {
26 return if (trie.get(cp)) |prop| prop else .none;
27}
28
29pub inline fn isControl(cp: u21) bool {
30 return getProp(cp) == .control;
31}
32
33pub inline fn isExtend(cp: u21) bool {
34 return getProp(cp) == .extend;
35}
36
37pub inline fn isL(cp: u21) bool {
38 return getProp(cp) == .hangul_l;
39}
40pub inline fn isLv(cp: u21) bool {
41 return getProp(cp) == .hangul_lv;
42}
43pub inline fn isLvt(cp: u21) bool {
44 return getProp(cp) == .hangul_lvt;
45}
46pub inline fn isV(cp: u21) bool {
47 return getProp(cp) == .hangul_v;
48}
49pub inline fn isT(cp: u21) bool {
50 return getProp(cp) == .hangul_t;
51}
52
53pub inline fn isPrepend(cp: u21) bool {
54 return getProp(cp) == .prepend;
55}
56
57pub inline fn isRegionalIndicator(cp: u21) bool {
58 return getProp(cp) == .regional;
59}
60
61pub inline fn isSpacingmark(cp: u21) bool {
62 return getProp(cp) == .spacing;
63}
64
65pub inline fn isZwj(cp: u21) bool {
66 return getProp(cp) == .zwj;
67}
diff --git a/src/main.zig b/src/main.zig
index b517641..6cc9fe4 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -5,8 +5,12 @@ const GraphemeIterator = @import("Grapheme.zig").GraphemeIterator;
5const input = @embedFile("lang_mix.txt"); 5const input = @embedFile("lang_mix.txt");
6 6
7pub fn main() !void { 7pub fn main() !void {
8 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
9 defer arena.deinit();
10 const allocator = arena.allocator();
11
8 var result: usize = 0; 12 var result: usize = 0;
9 var iter = GraphemeIterator.init(input); 13 var iter = try GraphemeIterator.init(allocator, input);
10 14
11 var timer = try std.time.Timer.start(); 15 var timer = try std.time.Timer.start();
12 16
diff --git a/src/trie.zig b/src/trie.zig
index ee77954..8d2f258 100644
--- a/src/trie.zig
+++ b/src/trie.zig
@@ -1,11 +1,42 @@
1const std = @import("std"); 1const std = @import("std");
2const mem = std.mem; 2const mem = std.mem;
3 3
4pub const Color = enum { red, blue }; 4const gbp = @import("ziglyph").grapheme_break;
5
6pub const Prop = enum {
7 none,
8 control,
9 extend,
10 hangul_l,
11 hangul_lv,
12 hangul_lvt,
13 hangul_v,
14 hangul_t,
15 prepend,
16 regional,
17 spacing,
18 zwj,
19
20 pub fn forCodePoint(cp: u21) Prop {
21 if (gbp.isControl(cp)) return .control;
22 if (gbp.isExtend(cp)) return .extend;
23 if (gbp.isL(cp)) return .hangul_l;
24 if (gbp.isLv(cp)) return .hangul_lv;
25 if (gbp.isLvt(cp)) return .hangul_lvt;
26 if (gbp.isT(cp)) return .hangul_t;
27 if (gbp.isV(cp)) return .hangul_v;
28 if (gbp.isPrepend(cp)) return .prepend;
29 if (gbp.isRegionalIndicator(cp)) return .regional;
30 if (gbp.isSpacingmark(cp)) return .spacing;
31 if (gbp.isZwj(cp)) return .zwj;
32
33 return .none;
34 }
35};
5 36
6pub const Node = struct { 37pub const Node = struct {
7 children: [256]?*Node = [_]?*Node{null} ** 256, 38 children: [256]?*Node = [_]?*Node{null} ** 256,
8 value: ?Color = null, 39 value: ?Prop = null,
9}; 40};
10 41
11pub const Trie = struct { 42pub const Trie = struct {
@@ -26,7 +57,7 @@ pub const Trie = struct {
26 bytes[0..]; 57 bytes[0..];
27 } 58 }
28 59
29 pub fn put(self: *Trie, cp: u24, v: Color) !void { 60 pub fn put(self: *Trie, cp: u24, v: Prop) !void {
30 const s = asBytes(cp); 61 const s = asBytes(cp);
31 var current: *Node = &self.root; 62 var current: *Node = &self.root;
32 63
@@ -49,7 +80,7 @@ pub const Trie = struct {
49 } 80 }
50 } 81 }
51 82
52 pub fn get(self: Trie, cp: u24) ?Color { 83 pub fn get(self: Trie, cp: u24) ?Prop {
53 const s = asBytes(cp); 84 const s = asBytes(cp);
54 var current = &self.root; 85 var current = &self.root;
55 86
@@ -73,9 +104,9 @@ test "Trie works" {
73 const cp_2: u21 = '\u{10ff}'; 104 const cp_2: u21 = '\u{10ff}';
74 const cp_3: u21 = '\u{10}'; 105 const cp_3: u21 = '\u{10}';
75 106
76 try trie.put(cp_1, .red); 107 try trie.put(cp_1, .control);
77 try trie.put(cp_3, .blue); 108 try trie.put(cp_3, .zwj);
78 try std.testing.expectEqual(@as(?Color, .red), trie.get(cp_1)); 109 try std.testing.expectEqual(@as(?Prop, .control), trie.get(cp_1));
79 try std.testing.expectEqual(@as(?Color, null), trie.get(cp_2)); 110 try std.testing.expectEqual(@as(?Prop, null), trie.get(cp_2));
80 try std.testing.expectEqual(@as(?Color, .blue), trie.get(cp_3)); 111 try std.testing.expectEqual(@as(?Prop, .zwj), trie.get(cp_3));
81} 112}