From f1cfab7059e922d45ebbe19c58acef8fa80dc85e Mon Sep 17 00:00:00 2001 From: Jose Colon Rodriguez Date: Sun, 31 Mar 2024 11:26:14 -0400 Subject: Added benchmarks; Unicode version; Removed Ziglyph dep --- bench/src/tests.zig | 229 +++++++++++++++++++++++++++++++++++++++ bench/src/zg_case.zig | 43 ++++++++ bench/src/zg_caseless.zig | 60 ++++++++++ bench/src/zg_codepoint.zig | 27 +++++ bench/src/zg_grapheme.zig | 29 +++++ bench/src/zg_normalize.zig | 76 +++++++++++++ bench/src/zg_width.zig | 33 ++++++ bench/src/ziglyph_case.zig | 41 +++++++ bench/src/ziglyph_caseless.zig | 35 ++++++ bench/src/ziglyph_codepoint.zig | 27 +++++ bench/src/ziglyph_grapheme.zig | 27 +++++ bench/src/ziglyph_normalizer.zig | 75 +++++++++++++ bench/src/ziglyph_width.zig | 30 +++++ 13 files changed, 732 insertions(+) create mode 100644 bench/src/tests.zig create mode 100644 bench/src/zg_case.zig create mode 100644 bench/src/zg_caseless.zig create mode 100644 bench/src/zg_codepoint.zig create mode 100644 bench/src/zg_grapheme.zig create mode 100644 bench/src/zg_normalize.zig create mode 100644 bench/src/zg_width.zig create mode 100644 bench/src/ziglyph_case.zig create mode 100644 bench/src/ziglyph_caseless.zig create mode 100644 bench/src/ziglyph_codepoint.zig create mode 100644 bench/src/ziglyph_grapheme.zig create mode 100644 bench/src/ziglyph_normalizer.zig create mode 100644 bench/src/ziglyph_width.zig (limited to 'bench/src') diff --git a/bench/src/tests.zig b/bench/src/tests.zig new file mode 100644 index 0000000..a8a2a98 --- /dev/null +++ b/bench/src/tests.zig @@ -0,0 +1,229 @@ +const std = @import("std"); +const testing = std.testing; +const expect = testing.expect; +const expectEqual = testing.expectEqual; +const expectEqualStrings = testing.expectEqualStrings; + +const allocator = testing.allocator; + +const GenCatData = @import("GenCatData"); + +test "General Category" { + const gcd = try GenCatData.init(allocator); + defer gcd.deinit(); + + try expect(gcd.gc('A') == .Lu); // Lu: uppercase letter + try expect(gcd.gc('3') == .Nd); // Nd: Decimal number + try expect(gcd.isControl(0)); + try expect(gcd.isLetter('z')); + try expect(gcd.isMark('\u{301}')); + try expect(gcd.isNumber('3')); + try expect(gcd.isPunctuation('[')); + try expect(gcd.isSeparator(' ')); + try expect(gcd.isSymbol('©')); +} + +const PropsData = @import("PropsData"); + +test "Properties" { + const pd = try PropsData.init(allocator); + defer pd.deinit(); + + try expect(pd.isMath('+')); + try expect(pd.isAlphabetic('Z')); + try expect(pd.isWhitespace(' ')); + try expect(pd.isHexDigit('f')); + try expect(!pd.isHexDigit('z')); + + try expect(pd.isDiacritic('\u{301}')); + try expect(pd.isIdStart('Z')); // Identifier start character + try expect(!pd.isIdStart('1')); + try expect(pd.isIdContinue('1')); + try expect(pd.isXidStart('\u{b33}')); // Extended identifier start character + try expect(pd.isXidContinue('\u{e33}')); + try expect(!pd.isXidStart('1')); + + // Note surprising Unicode numeric types! + try expect(pd.isNumeric('\u{277f}')); + try expect(!pd.isNumeric('3')); + try expect(pd.isDigit('\u{2070}')); + try expect(!pd.isDigit('3')); + try expect(pd.isDecimal('3')); +} + +const CaseData = @import("CaseData"); + +test "Case" { + const cd = try CaseData.init(allocator); + defer cd.deinit(); + + try expect(cd.isUpper('A')); + try expect('A' == cd.toUpper('a')); + try expect(cd.isLower('a')); + try expect('a' == cd.toLower('A')); + + try expect(cd.isCased('É')); + try expect(!cd.isCased('3')); + + try expect(cd.isUpperStr("HELLO 123!")); + const ucased = try cd.toUpperStr(allocator, "hello 123"); + defer allocator.free(ucased); + try expectEqualStrings("HELLO 123", ucased); + + try expect(cd.isLowerStr("hello 123!")); + const lcased = try cd.toLowerStr(allocator, "HELLO 123"); + defer allocator.free(lcased); + try expectEqualStrings("hello 123", lcased); +} + +const Normalize = @import("Normalize"); + +test "Normalization" { + var norm_data = try Normalize.NormData.init(allocator); + defer norm_data.deinit(); + const n = Normalize{ .norm_data = &norm_data }; + + // NFD: Canonical decomposition + const nfd_result = try n.nfd(allocator, "Héllo World! \u{3d3}"); + defer nfd_result.deinit(); + try expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", nfd_result.slice); + + // NFKD: Compatibility decomposition + const nfkd_result = try n.nfkd(allocator, "Héllo World! \u{3d3}"); + defer nfkd_result.deinit(); + try expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", nfkd_result.slice); + + // NFC: Canonical composition + const nfc_result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); + defer nfc_result.deinit(); + try expectEqualStrings("Complex char: \u{3D3}", nfc_result.slice); + + // NFKC: Compatibility composition + const nfkc_result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); + defer nfkc_result.deinit(); + try expectEqualStrings("Complex char: \u{038E}", nfkc_result.slice); + + // Test for equality of two strings after normalizing to NFC. + try expect(try n.eql(allocator, "foé", "foe\u{0301}")); + try expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); +} + +const CaseFold = @import("CaseFold"); + +test "Caseless matching" { + var norm_data = try Normalize.NormData.init(allocator); + defer norm_data.deinit(); + const n = Normalize{ .norm_data = &norm_data }; + + const cfd = try CaseFold.FoldData.init(allocator); + defer cfd.deinit(); + const cf = CaseFold{ .fold_data = &cfd }; + + // compatCaselessMatch provides the deepest level of caseless + // matching because it decomposes and composes fully to NFKC. + const a = "Héllo World! \u{3d3}"; + const b = "He\u{301}llo World! \u{3a5}\u{301}"; + try expect(try cf.compatCaselessMatch(allocator, &n, a, b)); + + const c = "He\u{301}llo World! \u{3d2}\u{301}"; + try expect(try cf.compatCaselessMatch(allocator, &n, a, c)); + + // canonCaselessMatch isn't as comprehensive as compatCaselessMatch + // because it only decomposes and composes to NFC. But it's faster. + try expect(!try cf.canonCaselessMatch(allocator, &n, a, b)); + try expect(try cf.canonCaselessMatch(allocator, &n, a, c)); +} + +const DisplayWidth = @import("DisplayWidth"); + +test "Display width" { + const dwd = try DisplayWidth.DisplayWidthData.init(allocator); + defer dwd.deinit(); + const dw = DisplayWidth{ .data = &dwd }; + + // String display width + try expectEqual(@as(usize, 5), dw.strWidth("Hello\r\n")); + try expectEqual(@as(usize, 8), dw.strWidth("Hello 😊")); + try expectEqual(@as(usize, 8), dw.strWidth("Héllo 😊")); + try expectEqual(@as(usize, 9), dw.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ")); + try expectEqual(@as(usize, 17), dw.strWidth("슬라바 우크라이나")); + + // Centering text + const centered = try dw.center(allocator, "w😊w", 10, "-"); + defer allocator.free(centered); + try expectEqualStrings("---w😊w---", centered); + + // Pad left + const right_aligned = try dw.padLeft(allocator, "abc", 9, "*"); + defer allocator.free(right_aligned); + try expectEqualStrings("******abc", right_aligned); + + // Pad right + const left_aligned = try dw.padRight(allocator, "abc", 9, "*"); + defer allocator.free(left_aligned); + try expectEqualStrings("abc******", left_aligned); + + // Wrap text + const input = "The quick brown fox\r\njumped over the lazy dog!"; + const wrapped = try dw.wrap(allocator, input, 10, 3); + defer allocator.free(wrapped); + const want = + \\The quick + \\brown fox + \\jumped + \\over the + \\lazy dog! + ; + try expectEqualStrings(want, wrapped); +} + +const code_point = @import("code_point"); + +test "Code point iterator" { + const str = "Hi 😊"; + var iter = code_point.Iterator{ .bytes = str }; + var i: usize = 0; + + while (iter.next()) |cp| : (i += 1) { + if (i == 0) try expect(cp.code == 'H'); + if (i == 1) try expect(cp.code == 'i'); + if (i == 2) try expect(cp.code == ' '); + + if (i == 3) { + try expect(cp.code == '😊'); + try expect(cp.offset == 3); + try expect(cp.len == 4); + } + } +} + +const grapheme = @import("grapheme"); + +test "Grapheme cluster iterator" { + const gd = try grapheme.GraphemeData.init(allocator); + defer gd.deinit(); + const str = "He\u{301}"; // Hé + var iter = grapheme.Iterator.init(str, &gd); + var i: usize = 0; + + while (iter.next()) |gc| : (i += 1) { + if (i == 0) try expect(gc.len == 1); + + if (i == 1) { + try expect(gc.len == 3); + try expect(gc.offset == 1); + try expectEqualStrings("e\u{301}", gc.bytes(str)); + } + } +} + +const ScriptsData = @import("ScriptsData"); + +test "Scripts" { + const sd = try ScriptsData.init(allocator); + defer sd.deinit(); + + try expect(sd.script('A') == .Latin); + try expect(sd.script('Ω') == .Greek); + try expect(sd.script('צ') == .Hebrew); +} diff --git a/bench/src/zg_case.zig b/bench/src/zg_case.zig new file mode 100644 index 0000000..e602c00 --- /dev/null +++ b/bench/src/zg_case.zig @@ -0,0 +1,43 @@ +const std = @import("std"); + +const CaseData = @import("CaseData"); + +pub fn main() !void { + var args_iter = std.process.args(); + _ = args_iter.skip(); + const in_path = args_iter.next() orelse return error.MissingArg; + + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + + const input = try std.fs.cwd().readFileAlloc( + allocator, + in_path, + std.math.maxInt(u32), + ); + defer allocator.free(input); + + const case_data = try CaseData.init(allocator); + + var iter = std.mem.splitScalar(u8, input, '\n'); + var result: usize = 0; + var timer = try std.time.Timer.start(); + + while (iter.next()) |line| { + const upper = try case_data.toUpperStr(allocator, line); + const lower = try case_data.toLowerStr(allocator, line); + result += upper.len + lower.len; + } + std.debug.print("zg toUpperStr/toLowerStr: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); + + result = 0; + iter.reset(); + timer.reset(); + + while (iter.next()) |line| { + if (case_data.isUpperStr(line)) result += 1; + if (case_data.isLowerStr(line)) result += 2; + } + std.debug.print("zg isUpperStr/isLowerStr: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); +} diff --git a/bench/src/zg_caseless.zig b/bench/src/zg_caseless.zig new file mode 100644 index 0000000..410a62b --- /dev/null +++ b/bench/src/zg_caseless.zig @@ -0,0 +1,60 @@ +const std = @import("std"); + +const CaseFold = @import("CaseFold"); +const Normalize = @import("Normalize"); + +pub fn main() !void { + var args_iter = std.process.args(); + _ = args_iter.skip(); + const in_path = args_iter.next() orelse return error.MissingArg; + + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + + const input = try std.fs.cwd().readFileAlloc( + allocator, + in_path, + std.math.maxInt(u32), + ); + defer allocator.free(input); + + const fold_data = try CaseFold.FoldData.init(allocator); + var case_fold = CaseFold{ .fold_data = &fold_data }; + const norm_data = try Normalize.NormData.init(allocator); + var normalize = Normalize{ .norm_data = &norm_data }; + + var iter = std.mem.splitScalar(u8, input, '\n'); + var result: usize = 0; + var buf: [256]u8 = [_]u8{'z'} ** 256; + var prev_line: []const u8 = buf[0..1]; + var timer = try std.time.Timer.start(); + + while (iter.next()) |line| { + if (try case_fold.compatCaselessMatch( + allocator, + &normalize, + prev_line, + line, + )) result += 1; + @memcpy(buf[0..line.len], line); + prev_line = buf[0..line.len]; + } + std.debug.print("zg CaseFold.compatCaselessMatch: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); + + result = 0; + iter.reset(); + timer.reset(); + + while (iter.next()) |line| { + if (try case_fold.canonCaselessMatch( + allocator, + &normalize, + prev_line, + line, + )) result += 1; + @memcpy(buf[0..line.len], line); + prev_line = buf[0..line.len]; + } + std.debug.print("zg CaseFold.canonCaselessMatch: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); +} diff --git a/bench/src/zg_codepoint.zig b/bench/src/zg_codepoint.zig new file mode 100644 index 0000000..0564fa1 --- /dev/null +++ b/bench/src/zg_codepoint.zig @@ -0,0 +1,27 @@ +const std = @import("std"); + +const CodePointIterator = @import("code_point").Iterator; + +pub fn main() !void { + var args_iter = std.process.args(); + _ = args_iter.skip(); + const in_path = args_iter.next() orelse return error.MissingArg; + + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + + const input = try std.fs.cwd().readFileAlloc( + allocator, + in_path, + std.math.maxInt(u32), + ); + defer allocator.free(input); + + var iter = CodePointIterator{ .bytes = input }; + var result: usize = 0; + var timer = try std.time.Timer.start(); + + while (iter.next()) |_| result += 1; + std.debug.print("zg CodePointIterator: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); +} diff --git a/bench/src/zg_grapheme.zig b/bench/src/zg_grapheme.zig new file mode 100644 index 0000000..106b970 --- /dev/null +++ b/bench/src/zg_grapheme.zig @@ -0,0 +1,29 @@ +const std = @import("std"); + +const GraphemeData = @import("grapheme").GraphemeData; +const GraphemeIterator = @import("grapheme").Iterator; + +pub fn main() !void { + var args_iter = std.process.args(); + _ = args_iter.skip(); + const in_path = args_iter.next() orelse return error.MissingArg; + + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + + const input = try std.fs.cwd().readFileAlloc( + allocator, + in_path, + std.math.maxInt(u32), + ); + defer allocator.free(input); + + const grapheme_data = try GraphemeData.init(allocator); + var iter = GraphemeIterator.init(input, &grapheme_data); + var result: usize = 0; + var timer = try std.time.Timer.start(); + + while (iter.next()) |_| result += 1; + std.debug.print("zg GraphemeIterator: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); +} diff --git a/bench/src/zg_normalize.zig b/bench/src/zg_normalize.zig new file mode 100644 index 0000000..956106f --- /dev/null +++ b/bench/src/zg_normalize.zig @@ -0,0 +1,76 @@ +const std = @import("std"); + +const Normalize = @import("Normalize"); + +pub fn main() !void { + var args_iter = std.process.args(); + _ = args_iter.skip(); + const in_path = args_iter.next() orelse return error.MissingArg; + + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + + const input = try std.fs.cwd().readFileAlloc( + allocator, + in_path, + std.math.maxInt(u32), + ); + defer allocator.free(input); + + const norm_data = try Normalize.NormData.init(allocator); + const normalize = Normalize{ .norm_data = &norm_data }; + + var iter = std.mem.splitScalar(u8, input, '\n'); + var result: usize = 0; + var timer = try std.time.Timer.start(); + + while (iter.next()) |line| { + const nfkc = try normalize.nfkc(allocator, line); + result += nfkc.slice.len; + } + std.debug.print("zg Normalize.nfkc: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); + + result = 0; + iter.reset(); + timer.reset(); + + while (iter.next()) |line| { + const nfc = try normalize.nfc(allocator, line); + result += nfc.slice.len; + } + std.debug.print("zg Normalize.nfc: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); + + result = 0; + iter.reset(); + timer.reset(); + + while (iter.next()) |line| { + const nfkd = try normalize.nfkd(allocator, line); + result += nfkd.slice.len; + } + std.debug.print("zg Normalize.nfkd: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); + + result = 0; + iter.reset(); + timer.reset(); + + while (iter.next()) |line| { + const nfd = try normalize.nfd(allocator, line); + result += nfd.slice.len; + } + std.debug.print("zg Normalize.nfd: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); + + result = 0; + iter.reset(); + var buf: [256]u8 = [_]u8{'z'} ** 256; + var prev_line: []const u8 = buf[0..1]; + timer.reset(); + + while (iter.next()) |line| { + if (try normalize.eql(allocator, prev_line, line)) result += 1; + @memcpy(buf[0..line.len], line); + prev_line = buf[0..line.len]; + } + std.debug.print("Zg Normalize.eql: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); +} diff --git a/bench/src/zg_width.zig b/bench/src/zg_width.zig new file mode 100644 index 0000000..c0ce57c --- /dev/null +++ b/bench/src/zg_width.zig @@ -0,0 +1,33 @@ +const std = @import("std"); + +const DisplayWidth = @import("DisplayWidth"); + +pub fn main() !void { + var args_iter = std.process.args(); + _ = args_iter.skip(); + const in_path = args_iter.next() orelse return error.MissingArg; + + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + + const input = try std.fs.cwd().readFileAlloc( + allocator, + in_path, + std.math.maxInt(u32), + ); + defer allocator.free(input); + + const width_data = try DisplayWidth.DisplayWidthData.init(allocator); + const display_width = DisplayWidth{ .data = &width_data }; + + var iter = std.mem.splitScalar(u8, input, '\n'); + var result: usize = 0; + var timer = try std.time.Timer.start(); + + while (iter.next()) |line| { + const width = display_width.strWidth(line); + result += width; + } + std.debug.print("zg DisplayWidth.strWidth: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); +} diff --git a/bench/src/ziglyph_case.zig b/bench/src/ziglyph_case.zig new file mode 100644 index 0000000..c6ef6ec --- /dev/null +++ b/bench/src/ziglyph_case.zig @@ -0,0 +1,41 @@ +const std = @import("std"); + +const ziglyph = @import("ziglyph"); + +pub fn main() !void { + var args_iter = std.process.args(); + _ = args_iter.skip(); + const in_path = args_iter.next() orelse return error.MissingArg; + + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + + const input = try std.fs.cwd().readFileAlloc( + allocator, + in_path, + std.math.maxInt(u32), + ); + defer allocator.free(input); + + var iter = std.mem.splitScalar(u8, input, '\n'); + var result: usize = 0; + var timer = try std.time.Timer.start(); + + while (iter.next()) |line| { + const upper = try ziglyph.toUpperStr(allocator, line); + const lower = try ziglyph.toLowerStr(allocator, line); + result += upper.len + lower.len; + } + std.debug.print("Ziglyph toUpperStr/toLowerStr: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); + + result = 0; + iter.reset(); + timer.reset(); + + while (iter.next()) |line| { + if (ziglyph.isUpperStr(line)) result += 1; + if (ziglyph.isLowerStr(line)) result += 2; + } + std.debug.print("Ziglyph isUpperStr/isLowerStr: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); +} diff --git a/bench/src/ziglyph_caseless.zig b/bench/src/ziglyph_caseless.zig new file mode 100644 index 0000000..f80668e --- /dev/null +++ b/bench/src/ziglyph_caseless.zig @@ -0,0 +1,35 @@ +const std = @import("std"); + +const Normalizer = @import("ziglyph").Normalizer; + +pub fn main() !void { + var args_iter = std.process.args(); + _ = args_iter.skip(); + const in_path = args_iter.next() orelse return error.MissingArg; + + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + + const input = try std.fs.cwd().readFileAlloc( + allocator, + in_path, + std.math.maxInt(u32), + ); + defer allocator.free(input); + + var normalizer = try Normalizer.init(allocator); + + var iter = std.mem.splitScalar(u8, input, '\n'); + var result: usize = 0; + var buf: [256]u8 = [_]u8{'z'} ** 256; + var prev_line: []const u8 = buf[0..1]; + var timer = try std.time.Timer.start(); + + while (iter.next()) |line| { + if (try normalizer.eqlCaseless(allocator, prev_line, line)) result += 1; + @memcpy(buf[0..line.len], line); + prev_line = buf[0..line.len]; + } + std.debug.print("Ziglyph Normalizer.eqlCaseless: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); +} diff --git a/bench/src/ziglyph_codepoint.zig b/bench/src/ziglyph_codepoint.zig new file mode 100644 index 0000000..4c8fc18 --- /dev/null +++ b/bench/src/ziglyph_codepoint.zig @@ -0,0 +1,27 @@ +const std = @import("std"); + +const CodePointIterator = @import("ziglyph").CodePointIterator; + +pub fn main() !void { + var args_iter = std.process.args(); + _ = args_iter.skip(); + const in_path = args_iter.next() orelse return error.MissingArg; + + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + + const input = try std.fs.cwd().readFileAlloc( + allocator, + in_path, + std.math.maxInt(u32), + ); + defer allocator.free(input); + + var iter = CodePointIterator{ .bytes = input }; + var result: usize = 0; + var timer = try std.time.Timer.start(); + + while (iter.next()) |_| result += 1; + std.debug.print("Ziglyph CodePointIterator: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); +} diff --git a/bench/src/ziglyph_grapheme.zig b/bench/src/ziglyph_grapheme.zig new file mode 100644 index 0000000..b47eea3 --- /dev/null +++ b/bench/src/ziglyph_grapheme.zig @@ -0,0 +1,27 @@ +const std = @import("std"); + +const GraphemeIterator = @import("ziglyph").GraphemeIterator; + +pub fn main() !void { + var args_iter = std.process.args(); + _ = args_iter.skip(); + const in_path = args_iter.next() orelse return error.MissingArg; + + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + + const input = try std.fs.cwd().readFileAlloc( + allocator, + in_path, + std.math.maxInt(u32), + ); + defer allocator.free(input); + + var iter = GraphemeIterator.init(input); + var result: usize = 0; + var timer = try std.time.Timer.start(); + + while (iter.next()) |_| result += 1; + std.debug.print("Ziglyph GraphemeIterator: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); +} diff --git a/bench/src/ziglyph_normalizer.zig b/bench/src/ziglyph_normalizer.zig new file mode 100644 index 0000000..fea162c --- /dev/null +++ b/bench/src/ziglyph_normalizer.zig @@ -0,0 +1,75 @@ +const std = @import("std"); + +const Normalizer = @import("ziglyph").Normalizer; + +pub fn main() !void { + var args_iter = std.process.args(); + _ = args_iter.skip(); + const in_path = args_iter.next() orelse return error.MissingArg; + + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + + const input = try std.fs.cwd().readFileAlloc( + allocator, + in_path, + std.math.maxInt(u32), + ); + defer allocator.free(input); + + var normalizer = try Normalizer.init(allocator); + + var iter = std.mem.splitScalar(u8, input, '\n'); + var result: usize = 0; + var timer = try std.time.Timer.start(); + + while (iter.next()) |line| { + const nfkc = try normalizer.nfkc(allocator, line); + result += nfkc.slice.len; + } + std.debug.print("Ziglyph Normalizer.nfkc: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); + + result = 0; + iter.reset(); + timer.reset(); + + while (iter.next()) |line| { + const nfc = try normalizer.nfc(allocator, line); + result += nfc.slice.len; + } + std.debug.print("Ziglyph Normalizer.nfc: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); + + result = 0; + iter.reset(); + timer.reset(); + + while (iter.next()) |line| { + const nfkd = try normalizer.nfkd(allocator, line); + result += nfkd.slice.len; + } + std.debug.print("Ziglyph Normalizer.nfkd: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); + + result = 0; + iter.reset(); + timer.reset(); + + while (iter.next()) |line| { + const nfd = try normalizer.nfd(allocator, line); + result += nfd.slice.len; + } + std.debug.print("Ziglyph Normalizer.nfd: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); + + result = 0; + iter.reset(); + var buf: [256]u8 = [_]u8{'z'} ** 256; + var prev_line: []const u8 = buf[0..1]; + timer.reset(); + + while (iter.next()) |line| { + if (try normalizer.eql(allocator, prev_line, line)) result += 1; + @memcpy(buf[0..line.len], line); + prev_line = buf[0..line.len]; + } + std.debug.print("Ziglyph Normalizer.eql: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); +} diff --git a/bench/src/ziglyph_width.zig b/bench/src/ziglyph_width.zig new file mode 100644 index 0000000..01dd9ee --- /dev/null +++ b/bench/src/ziglyph_width.zig @@ -0,0 +1,30 @@ +const std = @import("std"); + +const display_width = @import("ziglyph").display_width; + +pub fn main() !void { + var args_iter = std.process.args(); + _ = args_iter.skip(); + const in_path = args_iter.next() orelse return error.MissingArg; + + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + + const input = try std.fs.cwd().readFileAlloc( + allocator, + in_path, + std.math.maxInt(u32), + ); + defer allocator.free(input); + + var iter = std.mem.splitScalar(u8, input, '\n'); + var result: usize = 0; + var timer = try std.time.Timer.start(); + + while (iter.next()) |line| { + const width = try display_width.strWidth(line, .half); + result += width; + } + std.debug.print("Ziglyph display_width.strWidth: result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); +} -- cgit v1.2.3