diff options
| author | 2024-02-12 19:04:50 -0400 | |
|---|---|---|
| committer | 2024-02-12 19:04:50 -0400 | |
| commit | d2a38e9c2952ec4b003a5ba58f70fc21fcf088c5 (patch) | |
| tree | 7b48d02e3505af142128c484fe76b7aee9091cf9 /src/trie.zig | |
| parent | Using Trie super slow (diff) | |
| download | zg-d2a38e9c2952ec4b003a5ba58f70fc21fcf088c5.tar.gz zg-d2a38e9c2952ec4b003a5ba58f70fc21fcf088c5.tar.xz zg-d2a38e9c2952ec4b003a5ba58f70fc21fcf088c5.zip | |
Flat array
Diffstat (limited to 'src/trie.zig')
| -rw-r--r-- | src/trie.zig | 112 |
1 files changed, 0 insertions, 112 deletions
diff --git a/src/trie.zig b/src/trie.zig deleted file mode 100644 index 8d2f258..0000000 --- a/src/trie.zig +++ /dev/null | |||
| @@ -1,112 +0,0 @@ | |||
| 1 | const std = @import("std"); | ||
| 2 | const mem = std.mem; | ||
| 3 | |||
| 4 | const gbp = @import("ziglyph").grapheme_break; | ||
| 5 | |||
| 6 | pub const Prop = enum { | ||
| 7 | none, | ||
| 8 | control, | ||
| 9 | extend, | ||
| 10 | hangul_l, | ||
| 11 | hangul_lv, | ||
| 12 | hangul_lvt, | ||
| 13 | hangul_v, | ||
| 14 | hangul_t, | ||
| 15 | prepend, | ||
| 16 | regional, | ||
| 17 | spacing, | ||
| 18 | zwj, | ||
| 19 | |||
| 20 | pub fn forCodePoint(cp: u21) Prop { | ||
| 21 | if (gbp.isControl(cp)) return .control; | ||
| 22 | if (gbp.isExtend(cp)) return .extend; | ||
| 23 | if (gbp.isL(cp)) return .hangul_l; | ||
| 24 | if (gbp.isLv(cp)) return .hangul_lv; | ||
| 25 | if (gbp.isLvt(cp)) return .hangul_lvt; | ||
| 26 | if (gbp.isT(cp)) return .hangul_t; | ||
| 27 | if (gbp.isV(cp)) return .hangul_v; | ||
| 28 | if (gbp.isPrepend(cp)) return .prepend; | ||
| 29 | if (gbp.isRegionalIndicator(cp)) return .regional; | ||
| 30 | if (gbp.isSpacingmark(cp)) return .spacing; | ||
| 31 | if (gbp.isZwj(cp)) return .zwj; | ||
| 32 | |||
| 33 | return .none; | ||
| 34 | } | ||
| 35 | }; | ||
| 36 | |||
| 37 | pub const Node = struct { | ||
| 38 | children: [256]?*Node = [_]?*Node{null} ** 256, | ||
| 39 | value: ?Prop = null, | ||
| 40 | }; | ||
| 41 | |||
| 42 | pub const Trie = struct { | ||
| 43 | allocator: mem.Allocator, | ||
| 44 | root: Node, | ||
| 45 | node_count: usize = 0, | ||
| 46 | |||
| 47 | fn asBytes(cp: u24) []const u8 { | ||
| 48 | const bytes: [3]u8 = @bitCast(cp); | ||
| 49 | |||
| 50 | return if (bytes[0] < 128) | ||
| 51 | bytes[0..1] | ||
| 52 | else if (bytes[1] == 0) | ||
| 53 | bytes[0..1] | ||
| 54 | else if (bytes[2] == 0) | ||
| 55 | bytes[0..2] | ||
| 56 | else | ||
| 57 | bytes[0..]; | ||
| 58 | } | ||
| 59 | |||
| 60 | pub fn put(self: *Trie, cp: u24, v: Prop) !void { | ||
| 61 | const s = asBytes(cp); | ||
| 62 | var current: *Node = &self.root; | ||
| 63 | |||
| 64 | for (s, 0..) |b, i| { | ||
| 65 | if (current.children[b]) |n| { | ||
| 66 | if (i == s.len - 1) { | ||
| 67 | n.value = v; | ||
| 68 | return; | ||
| 69 | } | ||
| 70 | |||
| 71 | current = n; | ||
| 72 | continue; | ||
| 73 | } | ||
| 74 | |||
| 75 | self.node_count += 1; | ||
| 76 | const new_node = try self.allocator.create(Node); | ||
| 77 | new_node.* = .{ .value = if (i == s.len - 1) v else null }; | ||
| 78 | current.children[b] = new_node; | ||
| 79 | current = new_node; | ||
| 80 | } | ||
| 81 | } | ||
| 82 | |||
| 83 | pub fn get(self: Trie, cp: u24) ?Prop { | ||
| 84 | const s = asBytes(cp); | ||
| 85 | var current = &self.root; | ||
| 86 | |||
| 87 | return for (s, 0..) |b, i| { | ||
| 88 | if (current.children[b]) |n| { | ||
| 89 | if (i == s.len - 1) break if (n.value) |v| v else null; | ||
| 90 | current = n; | ||
| 91 | } | ||
| 92 | } else null; | ||
| 93 | } | ||
| 94 | }; | ||
| 95 | |||
| 96 | test "Trie works" { | ||
| 97 | var arena = std.heap.ArenaAllocator.init(std.testing.allocator); | ||
| 98 | defer arena.deinit(); | ||
| 99 | const allocator = arena.allocator(); | ||
| 100 | |||
| 101 | var trie = Trie{ .allocator = allocator, .root = .{} }; | ||
| 102 | |||
| 103 | const cp_1: u21 = '\u{10ffff}'; | ||
| 104 | const cp_2: u21 = '\u{10ff}'; | ||
| 105 | const cp_3: u21 = '\u{10}'; | ||
| 106 | |||
| 107 | try trie.put(cp_1, .control); | ||
| 108 | try trie.put(cp_3, .zwj); | ||
| 109 | try std.testing.expectEqual(@as(?Prop, .control), trie.get(cp_1)); | ||
| 110 | try std.testing.expectEqual(@as(?Prop, null), trie.get(cp_2)); | ||
| 111 | try std.testing.expectEqual(@as(?Prop, .zwj), trie.get(cp_3)); | ||
| 112 | } | ||