thank you Q&A, now theres proper unicode support n shit

author: Uko Kokņevičs 2024-08-29 20:41:04 +0800
committer: Uko Kokņevičs 2024-08-29 20:41:04 +0800
commit: 34b7dc964b3516457f04d00d3ec910d3b6fd585b (patch)
tree: 7a0a44ce334cc1411952cdbde056b7b57720f06e /src
parent: Do some extra user input validation (diff)
download: ukkobot-34b7dc964b3516457f04d00d3ec910d3b6fd585b.tar.gz
ukkobot-34b7dc964b3516457f04d00d3ec910d3b6fd585b.tar.xz
ukkobot-34b7dc964b3516457f04d00d3ec910d3b6fd585b.zip
2 files changed, 85 insertions, 20 deletions
diff --git a/src/main.zig b/src/main.zig
index 544b105..2e33cfc 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -199,22 +199,29 @@ fn onTextMessage(bot: *Bot, msg: types.Message, text: []const u8) !void {
            },
        });
    } else if (std.ascii.startsWithIgnoreCase(text, "big ")) {
-        var output = ArrayList(u8).init(bot.allocator);
+        const trimmed = try utils.trim(text[4..]);
-        defer output.deinit();
+        const cd = try utils.getCD();
+        if (trimmed.len > 0) {
+            const uppercased = try cd.toUpperStr(bot.allocator, trimmed);
+            defer bot.allocator.free(uppercased);
-        try output.appendSlice("<b>");
+            var output = ArrayList(u8).init(bot.allocator);
-        try utils.escapeXml(output.writer(), text[4..]);
+            defer output.deinit();
-        try output.appendSlice("</b>");
-        try bot.sendMessage_(.{
+            try output.appendSlice("<b>");
-            .chat_id = msg.chat.id,
+            try utils.escapeXml(output.writer(), uppercased);
-            .text = output.items,
+            try output.appendSlice("</b>");
-            .parse_mode = .html,
-            .reply_parameters = .{
+            try bot.sendMessage_(.{
-                .message_id = msg.message_id,
                .chat_id = msg.chat.id,
-            },
+                .text = output.items,
-        });
+                .parse_mode = .html,
+                .reply_parameters = .{
+                    .message_id = msg.message_id,
+                    .chat_id = msg.chat.id,
+                },
+            });
+        }
    } else if (std.ascii.eqlIgnoreCase(text, "forgor")) {
        try bot.sendMessage_(.{
            .chat_id = msg.chat.id,
@@ -244,14 +251,17 @@ fn onTextMessage(bot: *Bot, msg: types.Message, text: []const u8) !void {
            },
        });
    } else if (std.ascii.startsWithIgnoreCase(text, "say ")) {
-        try bot.sendMessage_(.{
+        const trimmed = try utils.trim(text[4..]);
-            .chat_id = msg.chat.id,
+        if (trimmed.len > 0) {
-            .text = text[4..],
+            try bot.sendMessage_(.{
-            .reply_parameters = .{
-                .message_id = msg.message_id,
                .chat_id = msg.chat.id,
-            },
+                .text = trimmed,
-        });
+                .reply_parameters = .{
+                    .message_id = msg.message_id,
+                    .chat_id = msg.chat.id,
+                },
+            });
+        }
    } else if (std.ascii.eqlIgnoreCase(text, "uwu")) {
        try bot.sendMessage_(.{
            .chat_id = msg.chat.id,
diff --git a/src/utils.zig b/src/utils.zig
index c6e8508..631e464 100644
--- a/src/utils.zig
+++ b/src/utils.zig
@@ -2,6 +2,9 @@ const std = @import("std");
 const Allocator = std.mem.Allocator;
 const ArrayList = std.ArrayList;
+const CaseData = @import("CaseData");
+const GenCatData = @import("GenCatData");
+const Utf8View = std.unicode.Utf8View;
 pub fn escapeXml(writer: anytype, text: []const u8) !void {
    for (text) |ch| {
@@ -15,6 +18,26 @@ pub fn escapeXml(writer: anytype, text: []const u8) !void {
    }
 }
+var gcd_global: ?GenCatData = null;
+pub fn getGCD() !GenCatData {
+    if (gcd_global) |gcd| {
+        return gcd;
+    }
+    gcd_global = try GenCatData.init(std.heap.page_allocator);
+    return gcd_global.?;
+}
+var cd_global: ?CaseData = null;
+pub fn getCD() !CaseData {
+    if (cd_global) |cd| {
+        return cd;
+    }
+    cd_global = try CaseData.init(std.heap.page_allocator);
+    return cd_global.?;
+}
 pub inline fn isNull(value: anytype) bool {
    return switch (@typeInfo(@TypeOf(value))) {
        .Null => true,
@@ -22,3 +45,35 @@ pub inline fn isNull(value: anytype) bool {
        else => false,
    };
 }
+pub fn trim(str: []const u8) ![]const u8 {
+    const view = try Utf8View.init(str);
+    const gcd = try getGCD();
+    var it = view.iterator();
+    var idx: usize = 0;
+    const first = while (it.nextCodepoint()) |cp| {
+        if (!isTrimmable(gcd, cp)) {
+            break idx;
+        }
+        idx = it.i;
+    } else {
+        return "";
+    };
+    idx = it.i;
+    var last = first;
+    while (it.nextCodepoint()) |cp| {
+        if (!isTrimmable(gcd, cp)) {
+            last = idx + (std.unicode.utf8CodepointSequenceLength(cp) catch unreachable) - 1;
+        }
+        idx = it.i;
+    }
+    return str[first .. last + 1];
+}
+inline fn isTrimmable(gcd: GenCatData, cp: u21) bool {
+    return gcd.isSeparator(cp) or gcd.isControl(cp);
+}
author	Uko Kokņevičs	2024-08-29 20:41:04 +0800
committer	Uko Kokņevičs	2024-08-29 20:41:04 +0800
commit	34b7dc964b3516457f04d00d3ec910d3b6fd585b (patch)
tree	7a0a44ce334cc1411952cdbde056b7b57720f06e /src
parent	Do some extra user input validation (diff)
download	ukkobot-34b7dc964b3516457f04d00d3ec910d3b6fd585b.tar.gz ukkobot-34b7dc964b3516457f04d00d3ec910d3b6fd585b.tar.xz ukkobot-34b7dc964b3516457f04d00d3ec910d3b6fd585b.zip

diff --git a/src/main.zig b/src/main.zig index 544b105..2e33cfc 100644 --- a/src/main.zig +++ b/src/main.zig
@@ -199,22 +199,29 @@ fn onTextMessage(bot: *Bot, msg: types.Message, text: []const u8) !void {
199	},	199	},
200	});	200	});
201	} else if (std.ascii.startsWithIgnoreCase(text, "big ")) {	201	} else if (std.ascii.startsWithIgnoreCase(text, "big ")) {
202	var output = ArrayList(u8).init(bot.allocator);	202	const trimmed = try utils.trim(text[4..]);
203	defer output.deinit();	203	const cd = try utils.getCD();
		204	if (trimmed.len > 0) {
		205	const uppercased = try cd.toUpperStr(bot.allocator, trimmed);
		206	defer bot.allocator.free(uppercased);
204		207
205	try output.appendSlice("<b>");	208	var output = ArrayList(u8).init(bot.allocator);
206	try utils.escapeXml(output.writer(), text[4..]);	209	defer output.deinit();
207	try output.appendSlice("</b>");
208		210
209	try bot.sendMessage_(.{	211	try output.appendSlice("<b>");
210	.chat_id = msg.chat.id,	212	try utils.escapeXml(output.writer(), uppercased);
211	.text = output.items,	213	try output.appendSlice("</b>");
212	.parse_mode = .html,	214
213	.reply_parameters = .{	215	try bot.sendMessage_(.{
214	.message_id = msg.message_id,
215	.chat_id = msg.chat.id,	216	.chat_id = msg.chat.id,
216	},	217	.text = output.items,
217	});	218	.parse_mode = .html,
		219	.reply_parameters = .{
		220	.message_id = msg.message_id,
		221	.chat_id = msg.chat.id,
		222	},
		223	});
		224	}
218	} else if (std.ascii.eqlIgnoreCase(text, "forgor")) {	225	} else if (std.ascii.eqlIgnoreCase(text, "forgor")) {
219	try bot.sendMessage_(.{	226	try bot.sendMessage_(.{
220	.chat_id = msg.chat.id,	227	.chat_id = msg.chat.id,
@@ -244,14 +251,17 @@ fn onTextMessage(bot: *Bot, msg: types.Message, text: []const u8) !void {
244	},	251	},
245	});	252	});
246	} else if (std.ascii.startsWithIgnoreCase(text, "say ")) {	253	} else if (std.ascii.startsWithIgnoreCase(text, "say ")) {
247	try bot.sendMessage_(.{	254	const trimmed = try utils.trim(text[4..]);
248	.chat_id = msg.chat.id,	255	if (trimmed.len > 0) {
249	.text = text[4..],	256	try bot.sendMessage_(.{
250	.reply_parameters = .{
251	.message_id = msg.message_id,
252	.chat_id = msg.chat.id,	257	.chat_id = msg.chat.id,
253	},	258	.text = trimmed,
254	});	259	.reply_parameters = .{
		260	.message_id = msg.message_id,
		261	.chat_id = msg.chat.id,
		262	},
		263	});
		264	}
255	} else if (std.ascii.eqlIgnoreCase(text, "uwu")) {	265	} else if (std.ascii.eqlIgnoreCase(text, "uwu")) {
256	try bot.sendMessage_(.{	266	try bot.sendMessage_(.{
257	.chat_id = msg.chat.id,	267	.chat_id = msg.chat.id,


diff --git a/src/utils.zig b/src/utils.zig index c6e8508..631e464 100644 --- a/src/utils.zig +++ b/src/utils.zig
@@ -2,6 +2,9 @@ const std = @import("std");
2		2
3	const Allocator = std.mem.Allocator;	3	const Allocator = std.mem.Allocator;
4	const ArrayList = std.ArrayList;	4	const ArrayList = std.ArrayList;
		5	const CaseData = @import("CaseData");
		6	const GenCatData = @import("GenCatData");
		7	const Utf8View = std.unicode.Utf8View;
5		8
6	pub fn escapeXml(writer: anytype, text: []const u8) !void {	9	pub fn escapeXml(writer: anytype, text: []const u8) !void {
7	for (text) \|ch\| {	10	for (text) \|ch\| {
@@ -15,6 +18,26 @@ pub fn escapeXml(writer: anytype, text: []const u8) !void {
15	}	18	}
16	}	19	}
17		20
		21	var gcd_global: ?GenCatData = null;
		22
		23	pub fn getGCD() !GenCatData {
		24	if (gcd_global) \|gcd\| {
		25	return gcd;
		26	}
		27	gcd_global = try GenCatData.init(std.heap.page_allocator);
		28	return gcd_global.?;
		29	}
		30
		31	var cd_global: ?CaseData = null;
		32
		33	pub fn getCD() !CaseData {
		34	if (cd_global) \|cd\| {
		35	return cd;
		36	}
		37	cd_global = try CaseData.init(std.heap.page_allocator);
		38	return cd_global.?;
		39	}
		40
18	pub inline fn isNull(value: anytype) bool {	41	pub inline fn isNull(value: anytype) bool {
19	return switch (@typeInfo(@TypeOf(value))) {	42	return switch (@typeInfo(@TypeOf(value))) {
20	.Null => true,	43	.Null => true,
@@ -22,3 +45,35 @@ pub inline fn isNull(value: anytype) bool {
22	else => false,	45	else => false,
23	};	46	};
24	}	47	}
		48
		49	pub fn trim(str: []const u8) ![]const u8 {
		50	const view = try Utf8View.init(str);
		51	const gcd = try getGCD();
		52
		53	var it = view.iterator();
		54	var idx: usize = 0;
		55	const first = while (it.nextCodepoint()) \|cp\| {
		56	if (!isTrimmable(gcd, cp)) {
		57	break idx;
		58	}
		59	idx = it.i;
		60	} else {
		61	return "";
		62	};
		63
		64	idx = it.i;
		65
		66	var last = first;
		67	while (it.nextCodepoint()) \|cp\| {
		68	if (!isTrimmable(gcd, cp)) {
		69	last = idx + (std.unicode.utf8CodepointSequenceLength(cp) catch unreachable) - 1;
		70	}
		71	idx = it.i;
		72	}
		73
		74	return str[first .. last + 1];
		75	}
		76
		77	inline fn isTrimmable(gcd: GenCatData, cp: u21) bool {
		78	return gcd.isSeparator(cp) or gcd.isControl(cp);
		79	}