diff options
| author | 2024-08-29 20:41:04 +0800 | |
|---|---|---|
| committer | 2024-08-29 20:41:04 +0800 | |
| commit | 34b7dc964b3516457f04d00d3ec910d3b6fd585b (patch) | |
| tree | 7a0a44ce334cc1411952cdbde056b7b57720f06e /src/utils.zig | |
| parent | Do some extra user input validation (diff) | |
| download | ukkobot-34b7dc964b3516457f04d00d3ec910d3b6fd585b.tar.gz ukkobot-34b7dc964b3516457f04d00d3ec910d3b6fd585b.tar.xz ukkobot-34b7dc964b3516457f04d00d3ec910d3b6fd585b.zip | |
thank you Q&A, now theres proper unicode support n shit
Diffstat (limited to 'src/utils.zig')
| -rw-r--r-- | src/utils.zig | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/src/utils.zig b/src/utils.zig index c6e8508..631e464 100644 --- a/src/utils.zig +++ b/src/utils.zig | |||
| @@ -2,6 +2,9 @@ const std = @import("std"); | |||
| 2 | 2 | ||
| 3 | const Allocator = std.mem.Allocator; | 3 | const Allocator = std.mem.Allocator; |
| 4 | const ArrayList = std.ArrayList; | 4 | const ArrayList = std.ArrayList; |
| 5 | const CaseData = @import("CaseData"); | ||
| 6 | const GenCatData = @import("GenCatData"); | ||
| 7 | const Utf8View = std.unicode.Utf8View; | ||
| 5 | 8 | ||
| 6 | pub fn escapeXml(writer: anytype, text: []const u8) !void { | 9 | pub fn escapeXml(writer: anytype, text: []const u8) !void { |
| 7 | for (text) |ch| { | 10 | for (text) |ch| { |
| @@ -15,6 +18,26 @@ pub fn escapeXml(writer: anytype, text: []const u8) !void { | |||
| 15 | } | 18 | } |
| 16 | } | 19 | } |
| 17 | 20 | ||
| 21 | var gcd_global: ?GenCatData = null; | ||
| 22 | |||
| 23 | pub fn getGCD() !GenCatData { | ||
| 24 | if (gcd_global) |gcd| { | ||
| 25 | return gcd; | ||
| 26 | } | ||
| 27 | gcd_global = try GenCatData.init(std.heap.page_allocator); | ||
| 28 | return gcd_global.?; | ||
| 29 | } | ||
| 30 | |||
| 31 | var cd_global: ?CaseData = null; | ||
| 32 | |||
| 33 | pub fn getCD() !CaseData { | ||
| 34 | if (cd_global) |cd| { | ||
| 35 | return cd; | ||
| 36 | } | ||
| 37 | cd_global = try CaseData.init(std.heap.page_allocator); | ||
| 38 | return cd_global.?; | ||
| 39 | } | ||
| 40 | |||
| 18 | pub inline fn isNull(value: anytype) bool { | 41 | pub inline fn isNull(value: anytype) bool { |
| 19 | return switch (@typeInfo(@TypeOf(value))) { | 42 | return switch (@typeInfo(@TypeOf(value))) { |
| 20 | .Null => true, | 43 | .Null => true, |
| @@ -22,3 +45,35 @@ pub inline fn isNull(value: anytype) bool { | |||
| 22 | else => false, | 45 | else => false, |
| 23 | }; | 46 | }; |
| 24 | } | 47 | } |
| 48 | |||
| 49 | pub fn trim(str: []const u8) ![]const u8 { | ||
| 50 | const view = try Utf8View.init(str); | ||
| 51 | const gcd = try getGCD(); | ||
| 52 | |||
| 53 | var it = view.iterator(); | ||
| 54 | var idx: usize = 0; | ||
| 55 | const first = while (it.nextCodepoint()) |cp| { | ||
| 56 | if (!isTrimmable(gcd, cp)) { | ||
| 57 | break idx; | ||
| 58 | } | ||
| 59 | idx = it.i; | ||
| 60 | } else { | ||
| 61 | return ""; | ||
| 62 | }; | ||
| 63 | |||
| 64 | idx = it.i; | ||
| 65 | |||
| 66 | var last = first; | ||
| 67 | while (it.nextCodepoint()) |cp| { | ||
| 68 | if (!isTrimmable(gcd, cp)) { | ||
| 69 | last = idx + (std.unicode.utf8CodepointSequenceLength(cp) catch unreachable) - 1; | ||
| 70 | } | ||
| 71 | idx = it.i; | ||
| 72 | } | ||
| 73 | |||
| 74 | return str[first .. last + 1]; | ||
| 75 | } | ||
| 76 | |||
| 77 | inline fn isTrimmable(gcd: GenCatData, cp: u21) bool { | ||
| 78 | return gcd.isSeparator(cp) or gcd.isControl(cp); | ||
| 79 | } | ||