summaryrefslogtreecommitdiff
path: root/codegen/compat.zig
blob: a9d1f927759394cb7d07cbaef6c0edce07abb626 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
const std = @import("std");
const builtin = @import("builtin");

const block_size = 256;
const Block = [block_size][]const u21;

const BlockMap = std.HashMap(
    Block,
    u16,
    struct {
        pub fn hash(_: @This(), k: Block) u64 {
            var hasher = std.hash.Wyhash.init(0);
            std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
            return hasher.final();
        }

        pub fn eql(_: @This(), aBlock: Block, bBlock: Block) bool {
            return for (aBlock, bBlock) |a, b| {
                if (a.len != b.len) return false;
                for (a, b) |a_cp, b_cp| {
                    if (a_cp != b_cp) return false;
                }
            } else true;
        }
    },
    std.hash_map.default_max_load_percentage,
);

pub fn main() anyerror!void {
    var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
    defer arena.deinit();
    const allocator = arena.allocator();

    // Process UnicodeData.txt
    var in_reader = std.io.Reader.fixed(@embedFile("UnicodeData.txt"));
    var args_iter = try std.process.argsWithAllocator(allocator);
    defer args_iter.deinit();
    _ = args_iter.skip();
    const output_path = args_iter.next() orelse @panic("No output file arg!");

    var compat_map = std.AutoHashMap(u21, []u21).init(allocator);
    defer compat_map.deinit();

    while (in_reader.takeDelimiterInclusive('\n')) |line| {
        if (line.len == 0) continue;

        var field_iter = std.mem.splitScalar(u8, line, ';');
        var cp: u21 = undefined;

        var i: usize = 0;
        while (field_iter.next()) |field| : (i += 1) {
            if (field.len == 0) continue;

            switch (i) {
                0 => {
                    cp = try std.fmt.parseInt(u21, field, 16);
                },

                5 => {
                    // Not compatibility.
                    if (field[0] != '<') continue;

                    var cp_iter = std.mem.tokenizeScalar(u8, field, ' ');
                    _ = cp_iter.next(); // <compat type>

                    var cps: [18]u21 = undefined;
                    var len: u8 = 0;

                    while (cp_iter.next()) |cp_str| : (len += 1) {
                        cps[len] = try std.fmt.parseInt(u21, cp_str, 16);
                    }

                    const slice = try allocator.dupe(u21, cps[0..len]);
                    try compat_map.put(cp, slice);
                },

                else => {},
            }
        }
    } else |err| switch (err) {
        error.EndOfStream => {},
        else => {
            return err;
        },
    }

    // Build multi-tiered lookup tables for compatibility decompositions
    var blocks_map = BlockMap.init(allocator);
    defer blocks_map.deinit();

    var stage1 = std.array_list.Managed(u16).init(allocator);
    defer stage1.deinit();

    var stage2 = std.array_list.Managed([]const u21).init(allocator);
    defer stage2.deinit();

    var block: Block = [_][]const u21{&[_]u21{}} ** block_size;
    var block_len: u16 = 0;

    for (0..0x110000) |i| {
        const cp: u21 = @intCast(i);
        const compat: []const u21 = compat_map.get(cp) orelse &[_]u21{};

        block[block_len] = compat;
        block_len += 1;

        if (block_len < block_size and cp != 0x10ffff) continue;

        const gop = try blocks_map.getOrPut(block);
        if (!gop.found_existing) {
            gop.value_ptr.* = @intCast(stage2.items.len);
            try stage2.appendSlice(&block);
        }

        try stage1.append(gop.value_ptr.*);
        block_len = 0;
    }
    // Write out
    var write_buf: [4096]u8 = undefined;
    var out_file = try std.fs.cwd().createFile(output_path, .{});
    defer out_file.close();
    var writer = out_file.writer(&write_buf);

    try writer.interface.print(
        \\//! This file is auto-generated. Do not edit.
        \\
        \\pub const s1: [{}]u16 = .{{
    , .{stage1.items.len});
    for (stage1.items) |entry| try writer.interface.print("{}, ", .{entry});

    try writer.interface.print(
        \\
        \\}};
        \\
        \\pub const s2: [{}][]const u21 = .{{
    , .{stage2.items.len});
    for (stage2.items) |entry| {
        try writer.interface.print("&.{any}, ", .{entry});
    }

    try writer.interface.writeAll(
        \\};
    );

    try writer.interface.flush();
}