summaryrefslogtreecommitdiff
path: root/src/LetterCasing.zig
blob: 24b67a0f448e6f5094d239f183bbf9c0f51f239c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
const CodePointIterator = @import("code_point").Iterator;
const GeneralCategories = @import("GeneralCategories");

const Data = struct {
    s1: []const u16 = undefined,
    s2: []const u44 = undefined,
};

const letter_casing = letter_casing: {
    const data = @import("case");
    break :letter_casing Data{
        .s1 = &data.s1,
        .s2 = &data.s2,
    };
};

// Returns true if `cp` is either upper, lower, or title case.
pub fn isCased(cp: u21) bool {
    return isUpper(cp) or isLower(cp) or GeneralCategories.gc(cp) == .Lt;
}

// Returns true if `cp` is uppercase.
pub fn isUpper(cp: u21) bool {
    // isUpper is true if we have a mapping to a lower character (bit 1)
    return letter_casing.s2[letter_casing.s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
}

/// Returns true if `str` is all non-lowercase.
pub fn isUpperStr(str: []const u8) bool {
    var iter = CodePointIterator{ .bytes = str };

    return while (iter.next()) |cp| {
        if (isLower(cp.code)) break false;
    } else true;
}

test "isUpperStr" {
    try testing.expect(isUpperStr("HELLO, WORLD 2112!"));
    try testing.expect(!isUpperStr("hello, world 2112!"));
    try testing.expect(!isUpperStr("Hello, World 2112!"));
}

/// Returns uppercase mapping for `cp`.
pub fn toUpper(cp: u21) u21 {
    const case_prop = letter_casing.s2[letter_casing.s1[cp >> 8] + (cp & 0xff)];
    if (case_prop & 2 == 2) {
        return @intCast(case_prop >> (21 + 2));
    } else {
        return cp;
    }
}

/// Returns a new string with all letters in uppercase.
/// Caller must free returned bytes with `allocator`.
pub fn toUpperStr(
    allocator: mem.Allocator,
    str: []const u8,
) ![]u8 {
    var bytes = std.array_list.Managed(u8).init(allocator);
    defer bytes.deinit();

    var iter = CodePointIterator{ .bytes = str };
    var buf: [4]u8 = undefined;

    while (iter.next()) |cp| {
        const len = try unicode.utf8Encode(toUpper(cp.code), &buf);
        try bytes.appendSlice(buf[0..len]);
    }

    return try bytes.toOwnedSlice();
}

test "toUpperStr" {
    const uppered = try toUpperStr(testing.allocator, "Hello, World 2112!");
    defer testing.allocator.free(uppered);
    try testing.expectEqualStrings("HELLO, WORLD 2112!", uppered);
}

// Returns true if `cp` is lowercase.
pub fn isLower(cp: u21) bool {
    // isLower is true if we have a mapping to an upper character (bit 2)
    return letter_casing.s2[letter_casing.s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
}

/// Returns true if `str` is all non-uppercase.
pub fn isLowerStr(str: []const u8) bool {
    var iter = CodePointIterator{ .bytes = str };

    return while (iter.next()) |cp| {
        if (isUpper(cp.code)) break false;
    } else true;
}

test "isLowerStr" {
    try testing.expect(isLowerStr("hello, world 2112!"));
    try testing.expect(!isLowerStr("HELLO, WORLD 2112!"));
    try testing.expect(!isLowerStr("Hello, World 2112!"));
}

/// Returns lowercase mapping for `cp`.
pub fn toLower(cp: u21) u21 {
    const case_prop = letter_casing.s2[letter_casing.s1[cp >> 8] + (cp & 0xff)];
    if (case_prop & 1 == 1) {
        return @intCast((case_prop >> 2) & 0x1FFFFF);
    } else {
        return cp;
    }
}

/// Returns a new string with all letters in lowercase.
/// Caller must free returned bytes with `allocator`.
pub fn toLowerStr(
    allocator: mem.Allocator,
    str: []const u8,
) ![]u8 {
    var bytes = std.array_list.Managed(u8).init(allocator);
    defer bytes.deinit();

    var iter = CodePointIterator{ .bytes = str };
    var buf: [4]u8 = undefined;

    while (iter.next()) |cp| {
        const len = try unicode.utf8Encode(toLower(cp.code), &buf);
        try bytes.appendSlice(buf[0..len]);
    }

    return try bytes.toOwnedSlice();
}

test "toLowerStr" {
    const lowered = try toLowerStr(testing.allocator, "Hello, World 2112!");
    defer testing.allocator.free(lowered);
    try testing.expectEqualStrings("hello, world 2112!", lowered);
}

const std = @import("std");
const builtin = @import("builtin");
const mem = std.mem;
const testing = std.testing;
const unicode = std.unicode;