summaryrefslogtreecommitdiff
path: root/src/trie.zig
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-02-12 19:04:50 -0400
committerGravatar Jose Colon Rodriguez2024-02-12 19:04:50 -0400
commitd2a38e9c2952ec4b003a5ba58f70fc21fcf088c5 (patch)
tree7b48d02e3505af142128c484fe76b7aee9091cf9 /src/trie.zig
parentUsing Trie super slow (diff)
downloadzg-d2a38e9c2952ec4b003a5ba58f70fc21fcf088c5.tar.gz
zg-d2a38e9c2952ec4b003a5ba58f70fc21fcf088c5.tar.xz
zg-d2a38e9c2952ec4b003a5ba58f70fc21fcf088c5.zip
Flat array
Diffstat (limited to 'src/trie.zig')
-rw-r--r--src/trie.zig112
1 files changed, 0 insertions, 112 deletions
diff --git a/src/trie.zig b/src/trie.zig
deleted file mode 100644
index 8d2f258..0000000
--- a/src/trie.zig
+++ /dev/null
@@ -1,112 +0,0 @@
1const std = @import("std");
2const mem = std.mem;
3
4const gbp = @import("ziglyph").grapheme_break;
5
6pub const Prop = enum {
7 none,
8 control,
9 extend,
10 hangul_l,
11 hangul_lv,
12 hangul_lvt,
13 hangul_v,
14 hangul_t,
15 prepend,
16 regional,
17 spacing,
18 zwj,
19
20 pub fn forCodePoint(cp: u21) Prop {
21 if (gbp.isControl(cp)) return .control;
22 if (gbp.isExtend(cp)) return .extend;
23 if (gbp.isL(cp)) return .hangul_l;
24 if (gbp.isLv(cp)) return .hangul_lv;
25 if (gbp.isLvt(cp)) return .hangul_lvt;
26 if (gbp.isT(cp)) return .hangul_t;
27 if (gbp.isV(cp)) return .hangul_v;
28 if (gbp.isPrepend(cp)) return .prepend;
29 if (gbp.isRegionalIndicator(cp)) return .regional;
30 if (gbp.isSpacingmark(cp)) return .spacing;
31 if (gbp.isZwj(cp)) return .zwj;
32
33 return .none;
34 }
35};
36
37pub const Node = struct {
38 children: [256]?*Node = [_]?*Node{null} ** 256,
39 value: ?Prop = null,
40};
41
42pub const Trie = struct {
43 allocator: mem.Allocator,
44 root: Node,
45 node_count: usize = 0,
46
47 fn asBytes(cp: u24) []const u8 {
48 const bytes: [3]u8 = @bitCast(cp);
49
50 return if (bytes[0] < 128)
51 bytes[0..1]
52 else if (bytes[1] == 0)
53 bytes[0..1]
54 else if (bytes[2] == 0)
55 bytes[0..2]
56 else
57 bytes[0..];
58 }
59
60 pub fn put(self: *Trie, cp: u24, v: Prop) !void {
61 const s = asBytes(cp);
62 var current: *Node = &self.root;
63
64 for (s, 0..) |b, i| {
65 if (current.children[b]) |n| {
66 if (i == s.len - 1) {
67 n.value = v;
68 return;
69 }
70
71 current = n;
72 continue;
73 }
74
75 self.node_count += 1;
76 const new_node = try self.allocator.create(Node);
77 new_node.* = .{ .value = if (i == s.len - 1) v else null };
78 current.children[b] = new_node;
79 current = new_node;
80 }
81 }
82
83 pub fn get(self: Trie, cp: u24) ?Prop {
84 const s = asBytes(cp);
85 var current = &self.root;
86
87 return for (s, 0..) |b, i| {
88 if (current.children[b]) |n| {
89 if (i == s.len - 1) break if (n.value) |v| v else null;
90 current = n;
91 }
92 } else null;
93 }
94};
95
96test "Trie works" {
97 var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
98 defer arena.deinit();
99 const allocator = arena.allocator();
100
101 var trie = Trie{ .allocator = allocator, .root = .{} };
102
103 const cp_1: u21 = '\u{10ffff}';
104 const cp_2: u21 = '\u{10ff}';
105 const cp_3: u21 = '\u{10}';
106
107 try trie.put(cp_1, .control);
108 try trie.put(cp_3, .zwj);
109 try std.testing.expectEqual(@as(?Prop, .control), trie.get(cp_1));
110 try std.testing.expectEqual(@as(?Prop, null), trie.get(cp_2));
111 try std.testing.expectEqual(@as(?Prop, .zwj), trie.get(cp_3));
112}