summaryrefslogtreecommitdiff
path: root/src/PropsData.zig
blob: 252462e59b6e338e18181677af007490f8dcd2cf (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
const std = @import("std");
const builtin = @import("builtin");
const compress = std.compress;
const mem = std.mem;
const testing = std.testing;

allocator: mem.Allocator,
core_s1: []u16 = undefined,
core_s2: []u8 = undefined,
props_s1: []u16 = undefined,
props_s2: []u8 = undefined,

const Self = @This();

pub fn init(allocator: mem.Allocator) !Self {
    const decompressor = compress.deflate.decompressor;
    const endian = builtin.cpu.arch.endian();

    // Process DerivedCoreProperties.txt
    const core_bytes = @embedFile("core_props");
    var core_fbs = std.io.fixedBufferStream(core_bytes);
    var core_decomp = try decompressor(allocator, core_fbs.reader(), null);
    defer core_decomp.deinit();
    var core_reader = core_decomp.reader();

    var self = Self{ .allocator = allocator };

    const core_stage_1_len: u16 = try core_reader.readInt(u16, endian);
    self.core_s1 = try allocator.alloc(u16, core_stage_1_len);
    errdefer allocator.free(self.core_s1);
    for (0..core_stage_1_len) |i| self.core_s1[i] = try core_reader.readInt(u16, endian);

    const core_stage_2_len: u16 = try core_reader.readInt(u16, endian);
    self.core_s2 = try allocator.alloc(u8, core_stage_2_len);
    errdefer allocator.free(self.core_s2);
    _ = try core_reader.readAll(self.core_s2);

    // Process PropList.txt
    const props_bytes = @embedFile("props");
    var props_fbs = std.io.fixedBufferStream(props_bytes);
    var props_decomp = try decompressor(allocator, props_fbs.reader(), null);
    defer props_decomp.deinit();
    var props_reader = props_decomp.reader();

    const stage_1_len: u16 = try props_reader.readInt(u16, endian);
    self.props_s1 = try allocator.alloc(u16, stage_1_len);
    errdefer allocator.free(self.props_s1);
    for (0..stage_1_len) |i| self.props_s1[i] = try props_reader.readInt(u16, endian);

    const stage_2_len: u16 = try props_reader.readInt(u16, endian);
    self.props_s2 = try allocator.alloc(u8, stage_2_len);
    errdefer allocator.free(self.props_s2);
    _ = try props_reader.readAll(self.props_s2);

    return self;
}

pub fn deinit(self: *const Self) void {
    self.allocator.free(self.core_s1);
    self.allocator.free(self.core_s2);
    self.allocator.free(self.props_s1);
    self.allocator.free(self.props_s2);
}

/// True if `cp` is a mathematical symbol.
pub inline fn isMath(self: Self, cp: u21) bool {
    return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
}

/// True if `cp` is an alphabetic character.
pub inline fn isAlphabetic(self: Self, cp: u21) bool {
    return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
}

/// True if `cp` is a valid identifier start character.
pub inline fn isIdStart(self: Self, cp: u21) bool {
    return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
}

/// True if `cp` is a valid identifier continuation character.
pub inline fn isIdContinue(self: Self, cp: u21) bool {
    return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8;
}

/// True if `cp` is a valid extended identifier start character.
pub inline fn isXidStart(self: Self, cp: u21) bool {
    return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16;
}

/// True if `cp` is a valid extended identifier continuation character.
pub inline fn isXidContinue(self: Self, cp: u21) bool {
    return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32;
}

/// True if `cp` is a whitespace character.
pub inline fn isWhitespace(self: Self, cp: u21) bool {
    return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
}

/// True if `cp` is a hexadecimal digit.
pub inline fn isHexDigit(self: Self, cp: u21) bool {
    return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
}

/// True if `cp` is a diacritic mark.
pub inline fn isDiacritic(self: Self, cp: u21) bool {
    return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
}

test "Props" {
    const self = try init(testing.allocator);
    defer self.deinit();

    try testing.expect(self.isHexDigit('F'));
    try testing.expect(self.isHexDigit('a'));
    try testing.expect(self.isHexDigit('8'));
    try testing.expect(!self.isHexDigit('z'));

    try testing.expect(self.isDiacritic('\u{301}'));
    try testing.expect(self.isAlphabetic('A'));
    try testing.expect(!self.isAlphabetic('3'));
    try testing.expect(self.isMath('+'));
}