From ba5d9081b479e95ffa7f3baf751beedd370cec14 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Wed, 4 Feb 2026 18:01:36 -0500 Subject: Normalization and case folding Both of which deserve some further attention. --- src/comptime_map.zig | 176 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 src/comptime_map.zig (limited to 'src/comptime_map.zig') diff --git a/src/comptime_map.zig b/src/comptime_map.zig new file mode 100644 index 0000000..b2c4d11 --- /dev/null +++ b/src/comptime_map.zig @@ -0,0 +1,176 @@ +//! A copypasta of Vexu's comptime hashmap: +//! +//! https://github.com/Vexu/comptime_hash_map +//! +//! (License at base of file, thanks Vexu!) +//! + +/// A comptime hashmap constructed with automatically selected hash and eql functions. +pub fn AutoComptimeHashMap(comptime K: type, comptime V: type, comptime values: anytype) type { + return ComptimeHashMap(K, V, hash_map.AutoContext(K), values); +} + +/// Builtin hashmap for strings as keys. +pub fn ComptimeStringHashMap(comptime V: type, comptime values: anytype) type { + return ComptimeHashMap([]const u8, V, hash_map.StringContext, values); +} + +/// A hashmap which is constructed at compile time from constant values. +/// Intended to be used as a faster lookup table. +pub fn ComptimeHashMap(comptime K: type, comptime V: type, comptime ctx: type, comptime values: anytype) type { + std.debug.assert(values.len != 0); + @setEvalBranchQuota(1000 * values.len); + + const Entry = struct { + key: K = undefined, + val: V = undefined, + used: bool = false, + }; + + // ensure that the hash map will be at most 60% full + const size = if (@import("builtin").is_test) values.len else math.ceilPowerOfTwo(usize, values.len * 5 / 3) catch unreachable; + comptime var slots = [1]Entry{.{}} ** size; + comptime var distance: [size]usize = .{0} ** size; + + comptime var max_distance_from_start_index = 0; + + slot_loop: for (values) |kv| { + var key: K = kv.@"0"; + var value: V = kv.@"1"; + + const start_index = @as(usize, ctx.hash(undefined, key)) & (size - 1); + + var roll_over = 0; + var distance_from_start_index = 0; + while (roll_over < size) : ({ + roll_over += 1; + distance_from_start_index += 1; + }) { + const index = (start_index + roll_over) & (size - 1); + const entry = &slots[index]; + + if (entry.used and !ctx.eql(undefined, entry.key, key)) { + if (distance[index] < distance_from_start_index) { + // robin hood to the rescue + const tmp = slots[index]; + max_distance_from_start_index = @max(max_distance_from_start_index, distance_from_start_index); + entry.* = .{ + .used = true, + .key = key, + .val = value, + }; + const tmp_distance = distance[index]; + distance[index] = distance_from_start_index; + key = tmp.key; + value = tmp.val; + distance_from_start_index = tmp_distance; + } + continue; + } + + max_distance_from_start_index = @max(distance_from_start_index, max_distance_from_start_index); + entry.* = .{ + .used = true, + .key = key, + .val = value, + }; + distance[index] = distance_from_start_index; + continue :slot_loop; + } + unreachable; // put into a full map + } + + return struct { + const entries = slots; + + pub fn has(key: K) bool { + return get(key) != null; + } + + pub fn get(key: K) ?*const V { + const start_index = @as(usize, ctx.hash(undefined, key)) & (size - 1); + { + var roll_over: usize = 0; + while (roll_over <= max_distance_from_start_index) : (roll_over += 1) { + const index = (start_index + roll_over) & (size - 1); + const entry = &entries[index]; + + if (!entry.used) return null; + if (ctx.eql(undefined, entry.key, key)) return &entry.val; + } + } + return null; + } + }; +} + +test "basic usage" { + const map = ComptimeStringHashMap(usize, .{ + .{ "foo", 1 }, + .{ "bar", 2 }, + .{ "baz", 3 }, + .{ "quux", 4 }, + .{ "Foo", 1 }, + .{ "Bar", 2 }, + .{ "Baz", 3 }, + .{ "Quux", 4 }, + }); + + try testing.expect(map.has("foo")); + try testing.expect(map.has("bar")); + try testing.expect(map.has("Foo")); + try testing.expect(map.has("Bar")); + try testing.expect(!map.has("zig")); + try testing.expect(!map.has("ziguana")); + + try testing.expect(map.get("baz").?.* == 3); + try testing.expect(map.get("quux").?.* == 4); + try testing.expect(map.get("nah") == null); + try testing.expect(map.get("...") == null); +} + +test "auto comptime hash map" { + const map = AutoComptimeHashMap(usize, []const u8, .{ + .{ 1, "foo" }, + .{ 2, "bar" }, + .{ 3, "baz" }, + .{ 45, "quux" }, + }); + + try testing.expect(map.has(1)); + try testing.expect(map.has(2)); + try testing.expect(!map.has(4)); + try testing.expect(!map.has(1_000_000)); + + try testing.expectEqualStrings("foo", map.get(1).?.*); + try testing.expectEqualStrings("bar", map.get(2).?.*); + try testing.expect(map.get(4) == null); + try testing.expect(map.get(4_000_000) == null); +} + +const std = @import("std"); +const hash_map = std.hash_map; +const testing = std.testing; +const math = std.math; + +// MIT License +// +// Copyright (c) 2020 Veikka Tuominen +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. -- cgit v1.2.3