//! Display Width module //! //! Answers questions about the printable width in monospaced fonts of the //! string of interest. graphemes: Graphemes = undefined, s1: []u16 = undefined, s2: []i4 = undefined, owns_graphemes: bool = true, const DisplayWidth = @This(); pub fn init(allocator: Allocator) Allocator.Error!DisplayWidth { var dw = DisplayWidth{}; try dw.setup(allocator); errdefer { allocator.free(dw.s1); allocator.free(dw.s2); } dw.owns_graphemes = true; dw.graphemes = try Graphemes.init(allocator); errdefer dw.graphemes.deinit(allocator); return dw; } pub fn initWithGraphemes(allocator: Allocator, graphemes: Graphemes) Allocator.Error!DisplayWidth { var dw = DisplayWidth{}; try dw.setup(allocator); dw.graphemes = graphemes; dw.owns_graphemes = false; return dw; } pub fn setupWithGraphemes(dw: *DisplayWidth, allocator: Allocator, graphemes: Graphemes) Allocator.Error!void { try dw.setup(allocator); dw.graphemes = graphemes; dw.owns_graphemes = false; } // Sets up the DisplayWidthData, leaving the GraphemeData undefined. pub fn setup(dw: *DisplayWidth, allocator: Allocator) Allocator.Error!void { const in_bytes = @embedFile("dwp"); var in_fbs = std.io.fixedBufferStream(in_bytes); var reader = in_fbs.reader(); const endian = builtin.cpu.arch.endian(); const stage_1_len: u16 = reader.readInt(u16, endian) catch unreachable; dw.s1 = try allocator.alloc(u16, stage_1_len); errdefer allocator.free(dw.s1); for (0..stage_1_len) |i| dw.s1[i] = reader.readInt(u16, endian) catch unreachable; const stage_2_len: u16 = reader.readInt(u16, endian) catch unreachable; dw.s2 = try allocator.alloc(i4, stage_2_len); errdefer allocator.free(dw.s2); for (0..stage_2_len) |i| dw.s2[i] = @intCast(reader.readInt(i8, endian) catch unreachable); } pub fn deinit(dw: *const DisplayWidth, allocator: Allocator) void { allocator.free(dw.s1); allocator.free(dw.s2); if (dw.owns_graphemes) dw.graphemes.deinit(allocator); } /// codePointWidth returns the number of cells `cp` requires when rendered /// in a fixed-pitch font (i.e. a terminal screen). This can range from -1 to /// 3, where BACKSPACE and DELETE return -1 and 3-em-dash returns 3. C0/C1 /// control codes return 0. If `cjk` is true, ambiguous code points return 2, /// otherwise they return 1. pub fn codePointWidth(dw: DisplayWidth, cp: u21) i4 { return dw.s2[dw.s1[cp >> 8] + (cp & 0xff)]; } test "codePointWidth" { const dw = try DisplayWidth.init(std.testing.allocator); defer dw.deinit(std.testing.allocator); try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x0000)); // null try testing.expectEqual(@as(i4, -1), dw.codePointWidth(0x8)); // \b try testing.expectEqual(@as(i4, -1), dw.codePointWidth(0x7f)); // DEL try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x0005)); // Cf try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x0007)); // \a BEL try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000A)); // \n LF try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000B)); // \v VT try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000C)); // \f FF try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000D)); // \r CR try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000E)); // SQ try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x000F)); // SI try testing.expectEqual(@as(i4, 0), dw.codePointWidth(0x070F)); // Cf try testing.expectEqual(@as(i4, 1), dw.codePointWidth(0x0603)); // Cf Arabic try testing.expectEqual(@as(i4, 1), dw.codePointWidth(0x00AD)); // soft-hyphen try testing.expectEqual(@as(i4, 2), dw.codePointWidth(0x2E3A)); // two-em dash try testing.expectEqual(@as(i4, 3), dw.codePointWidth(0x2E3B)); // three-em dash try testing.expectEqual(@as(i4, 1), dw.codePointWidth(0x00BD)); // ambiguous halfwidth try testing.expectEqual(@as(i4, 1), dw.codePointWidth('Γ©')); try testing.expectEqual(@as(i4, 2), dw.codePointWidth('😊')); try testing.expectEqual(@as(i4, 2), dw.codePointWidth('统')); } /// strWidth returns the total display width of `str` as the number of cells /// required in a fixed-pitch font (i.e. a terminal screen). pub fn strWidth(dw: DisplayWidth, str: []const u8) usize { var total: isize = 0; // ASCII fast path if (ascii.isAsciiOnly(str)) { for (str) |b| total += dw.codePointWidth(b); return @intCast(@max(0, total)); } var giter = dw.graphemes.iterator(str); while (giter.next()) |gc| { var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) }; var gc_total: isize = 0; while (cp_iter.next()) |cp| { var w = dw.codePointWidth(cp.code); if (w != 0) { // Handle text emoji sequence. if (cp_iter.next()) |ncp| { // emoji text sequence. if (ncp.code == 0xFE0E) w = 1; if (ncp.code == 0xFE0F) w = 2; // Skin tones if (0x1F3FB <= ncp.code and ncp.code <= 0x1F3FF) w = 2; } // Only adding width of first non-zero-width code point. if (gc_total == 0) { gc_total = w; break; } } } total += gc_total; } return @intCast(@max(0, total)); } test "strWidth" { const dw = try DisplayWidth.init(testing.allocator); defer dw.deinit(testing.allocator); const c0 = options.c0_width orelse 0; try testing.expectEqual(@as(usize, 5), dw.strWidth("Hello\r\n")); try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{0065}\u{0301}")); try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{1F476}\u{1F3FF}\u{0308}\u{200D}\u{1F476}\u{1F3FF}")); try testing.expectEqual(@as(usize, 8), dw.strWidth("Hello 😊")); try testing.expectEqual(@as(usize, 8), dw.strWidth("HΓ©llo 😊")); try testing.expectEqual(@as(usize, 8), dw.strWidth("HΓ©llo :)")); try testing.expectEqual(@as(usize, 8), dw.strWidth("HΓ©llo πŸ‡ͺπŸ‡Έ")); try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{26A1}")); // Lone emoji try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{26A1}\u{FE0E}")); // Text sequence try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{26A1}\u{FE0F}")); // Presentation sequence try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{2764}")); // Default text presentation try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{2764}\u{FE0E}")); // Default text presentation with VS15 selector try testing.expectEqual(@as(usize, 2), dw.strWidth("\u{2764}\u{FE0F}")); // Default text presentation with VS16 selector const expect_bs: usize = if (c0 == 0) 0 else 1 + c0; try testing.expectEqual(expect_bs, dw.strWidth("A\x08")); // Backspace try testing.expectEqual(expect_bs, dw.strWidth("\x7FA")); // DEL const expect_long_del: usize = if (c0 == 0) 0 else 1 + (c0 * 3); try testing.expectEqual(expect_long_del, dw.strWidth("\x7FA\x08\x08")); // never less than 0 // wcwidth Python lib tests. See: https://github.com/jquast/wcwidth/blob/master/tests/test_core.py const empty = ""; try testing.expectEqual(@as(usize, 0), dw.strWidth(empty)); const with_null = "hello\x00world"; try testing.expectEqual(@as(usize, 10 + c0), dw.strWidth(with_null)); const hello_jp = "コンニチハ, γ‚»γ‚«γ‚€!"; try testing.expectEqual(@as(usize, 19), dw.strWidth(hello_jp)); const control = "\x1b[0m"; try testing.expectEqual(@as(usize, 3 + c0), dw.strWidth(control)); const balinese = "\u{1B13}\u{1B28}\u{1B2E}\u{1B44}"; try testing.expectEqual(@as(usize, 3), dw.strWidth(balinese)); // These commented out tests require a new specification for complex scripts. // See: https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf // const jamo = "\u{1100}\u{1160}"; // try testing.expectEqual(@as(usize, 3), strWidth(jamo)); // const devengari = "\u{0915}\u{094D}\u{0937}\u{093F}"; // try testing.expectEqual(@as(usize, 3), strWidth(devengari)); // const tamal = "\u{0b95}\u{0bcd}\u{0bb7}\u{0bcc}"; // try testing.expectEqual(@as(usize, 5), strWidth(tamal)); // const kannada_1 = "\u{0cb0}\u{0ccd}\u{0c9d}\u{0cc8}"; // try testing.expectEqual(@as(usize, 3), strWidth(kannada_1)); // The following passes but as a mere coincidence. const kannada_2 = "\u{0cb0}\u{0cbc}\u{0ccd}\u{0c9a}"; try testing.expectEqual(@as(usize, 2), dw.strWidth(kannada_2)); // From Rust https://github.com/jameslanska/unicode-display-width try testing.expectEqual(@as(usize, 15), dw.strWidth("πŸ”₯πŸ—‘πŸ©πŸ‘©πŸ»β€πŸš€β°πŸ’ƒπŸΌπŸ”¦πŸ‘πŸ»")); try testing.expectEqual(@as(usize, 2), dw.strWidth("πŸ¦€")); try testing.expectEqual(@as(usize, 2), dw.strWidth("πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘§")); try testing.expectEqual(@as(usize, 2), dw.strWidth("πŸ‘©β€πŸ”¬")); try testing.expectEqual(@as(usize, 9), dw.strWidth("sane text")); try testing.expectEqual(@as(usize, 9), dw.strWidth("αΊ’ΜŒΓ‘Μ²lΝ”ΜΜžΜ„Μ‘ΝŒgΜ–Μ˜Μ˜Μ”Μ”Ν’ΝžΝoΜͺΜ”TΜ’Μ™Μ«ΜˆΜΝžeΜ¬ΝˆΝ•ΝŒΜΝ‘x̺̍ṭ̓̓ͅ")); try testing.expectEqual(@as(usize, 17), dw.strWidth("μŠ¬λΌλ°” μš°ν¬λΌμ΄λ‚˜")); try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{378}")); // https://codeberg.org/atman/zg/issues/82 try testing.expectEqual(@as(usize, 12), dw.strWidth("✍️✍🏻✍🏼✍🏽✍🏾✍🏿")); } /// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding. /// If the length of `str` and `total_width` have different parity, the right side of `str` will /// receive one additional pad. This makes sure the returned string fills the requested width. /// Caller must free returned bytes with `allocator`. pub fn center( dw: DisplayWidth, allocator: mem.Allocator, str: []const u8, total_width: usize, pad: []const u8, ) ![]u8 { const str_width = dw.strWidth(str); if (str_width > total_width) return error.StrTooLong; if (str_width == total_width) return try allocator.dupe(u8, str); const pad_width = dw.strWidth(pad); if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; const margin_width = @divFloor((total_width - str_width), 2); if (pad_width > margin_width) return error.PadTooLong; const extra_pad: usize = if (total_width % 2 != str_width % 2) 1 else 0; const pads = @divFloor(margin_width, pad_width) * 2 + extra_pad; var result = try allocator.alloc(u8, pads * pad.len + str.len); var bytes_index: usize = 0; var pads_index: usize = 0; while (pads_index < pads / 2) : (pads_index += 1) { @memcpy(result[bytes_index..][0..pad.len], pad); bytes_index += pad.len; } @memcpy(result[bytes_index..][0..str.len], str); bytes_index += str.len; pads_index = 0; while (pads_index < pads / 2 + extra_pad) : (pads_index += 1) { @memcpy(result[bytes_index..][0..pad.len], pad); bytes_index += pad.len; } return result; } test "center" { const allocator = testing.allocator; const dw = try DisplayWidth.init(allocator); defer dw.deinit(allocator); // Input and width both have odd length var centered = try dw.center(allocator, "abc", 9, "*"); try testing.expectEqualSlices(u8, "***abc***", centered); // Input and width both have even length testing.allocator.free(centered); centered = try dw.center(allocator, "w😊w", 10, "-"); try testing.expectEqualSlices(u8, "---w😊w---", centered); // Input has even length, width has odd length testing.allocator.free(centered); centered = try dw.center(allocator, "1234", 9, "-"); try testing.expectEqualSlices(u8, "--1234---", centered); // Input has odd length, width has even length testing.allocator.free(centered); centered = try dw.center(allocator, "123", 8, "-"); try testing.expectEqualSlices(u8, "--123---", centered); // Input is the same length as the width testing.allocator.free(centered); centered = try dw.center(allocator, "123", 3, "-"); try testing.expectEqualSlices(u8, "123", centered); // Input is empty testing.allocator.free(centered); centered = try dw.center(allocator, "", 3, "-"); try testing.expectEqualSlices(u8, "---", centered); // Input is empty and width is zero testing.allocator.free(centered); centered = try dw.center(allocator, "", 0, "-"); try testing.expectEqualSlices(u8, "", centered); // Input is longer than the width, which is an error testing.allocator.free(centered); try testing.expectError(error.StrTooLong, dw.center(allocator, "123", 2, "-")); } /// padLeft returns a new string of width `total_width` (in display cells) using `pad` as padding /// on the left side. Caller must free returned bytes with `allocator`. pub fn padLeft( dw: DisplayWidth, allocator: mem.Allocator, str: []const u8, total_width: usize, pad: []const u8, ) ![]u8 { const str_width = dw.strWidth(str); if (str_width > total_width) return error.StrTooLong; const pad_width = dw.strWidth(pad); if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; const margin_width = total_width - str_width; if (pad_width > margin_width) return error.PadTooLong; const pads = @divFloor(margin_width, pad_width); var result = try allocator.alloc(u8, pads * pad.len + str.len); var bytes_index: usize = 0; var pads_index: usize = 0; while (pads_index < pads) : (pads_index += 1) { @memcpy(result[bytes_index..][0..pad.len], pad); bytes_index += pad.len; } @memcpy(result[bytes_index..][0..str.len], str); return result; } test "padLeft" { const allocator = testing.allocator; const dw = try DisplayWidth.init(allocator); defer dw.deinit(allocator); var right_aligned = try dw.padLeft(allocator, "abc", 9, "*"); defer testing.allocator.free(right_aligned); try testing.expectEqualSlices(u8, "******abc", right_aligned); testing.allocator.free(right_aligned); right_aligned = try dw.padLeft(allocator, "w😊w", 10, "-"); try testing.expectEqualSlices(u8, "------w😊w", right_aligned); } /// padRight returns a new string of width `total_width` (in display cells) using `pad` as padding /// on the right side. Caller must free returned bytes with `allocator`. pub fn padRight( dw: DisplayWidth, allocator: mem.Allocator, str: []const u8, total_width: usize, pad: []const u8, ) ![]u8 { const str_width = dw.strWidth(str); if (str_width > total_width) return error.StrTooLong; const pad_width = dw.strWidth(pad); if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; const margin_width = total_width - str_width; if (pad_width > margin_width) return error.PadTooLong; const pads = @divFloor(margin_width, pad_width); var result = try allocator.alloc(u8, pads * pad.len + str.len); var bytes_index: usize = 0; var pads_index: usize = 0; @memcpy(result[bytes_index..][0..str.len], str); bytes_index += str.len; while (pads_index < pads) : (pads_index += 1) { @memcpy(result[bytes_index..][0..pad.len], pad); bytes_index += pad.len; } return result; } test "padRight" { const allocator = testing.allocator; const dw = try DisplayWidth.init(allocator); defer dw.deinit(allocator); var left_aligned = try dw.padRight(allocator, "abc", 9, "*"); defer testing.allocator.free(left_aligned); try testing.expectEqualSlices(u8, "abc******", left_aligned); testing.allocator.free(left_aligned); left_aligned = try dw.padRight(allocator, "w😊w", 10, "-"); try testing.expectEqualSlices(u8, "w😊w------", left_aligned); } /// Wraps a string approximately at the given number of colums per line. /// `threshold` defines how far the last column of the last word can be /// from the edge. Caller must free returned bytes with `allocator`. pub fn wrap( dw: DisplayWidth, allocator: mem.Allocator, str: []const u8, columns: usize, threshold: usize, ) ![]u8 { var result = std.array_list.Managed(u8).init(allocator); defer result.deinit(); var line_iter = mem.tokenizeAny(u8, str, "\r\n"); var line_width: usize = 0; while (line_iter.next()) |line| { var word_iter = mem.tokenizeScalar(u8, line, ' '); while (word_iter.next()) |word| { try result.appendSlice(word); try result.append(' '); line_width += dw.strWidth(word) + 1; if (line_width > columns or columns - line_width <= threshold) { try result.append('\n'); line_width = 0; } } } // Remove trailing space and newline. if (result.items[result.items.len - 1] == '\n') _ = result.pop(); if (result.items[result.items.len - 1] == ' ') _ = result.pop(); return try result.toOwnedSlice(); } test "wrap" { const allocator = testing.allocator; const dw = try DisplayWidth.init(allocator); defer dw.deinit(allocator); const input = "The quick brown fox\r\njumped over the lazy dog!"; const got = try dw.wrap(allocator, input, 10, 3); defer testing.allocator.free(got); const want = "The quick \nbrown fox \njumped \nover the \nlazy dog!"; try testing.expectEqualStrings(want, got); } test "zg/74" { var debug_alloc = std.heap.DebugAllocator(.{}).init; const allocator = debug_alloc.allocator(); defer _ = debug_alloc.deinit(); const dw = try DisplayWidth.init(allocator); defer dw.deinit(allocator); const wrapped = try dw.wrap(allocator, "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam pellentesque pulvinar felis, sit amet commodo ligula feugiat sed. Sed quis malesuada elit, nec eleifend lectus. Sed tincidunt finibus aliquet. Praesent consectetur nibh libero, tempus imperdiet lorem congue eget.", 16, 1); defer allocator.free(wrapped); const expected_wrap = "Lorem ipsum dolor \nsit amet, consectetur \nadipiscing elit. \nNullam pellentesque \npulvinar felis, \nsit amet commodo \nligula feugiat \nsed. Sed quis malesuada \nelit, nec eleifend \nlectus. Sed tincidunt \nfinibus aliquet. \nPraesent consectetur \nnibh libero, tempus \nimperdiet lorem \ncongue eget."; try std.testing.expectEqualStrings(expected_wrap, wrapped); } fn testAllocation(allocator: Allocator) !void { { var dw = try DisplayWidth.init(allocator); dw.deinit(allocator); } { var graph = try Graphemes.init(allocator); defer graph.deinit(allocator); var dw = try DisplayWidth.initWithGraphemes(allocator, graph); dw.deinit(allocator); } } test "allocation test" { try testing.checkAllAllocationFailures(testing.allocator, testAllocation, .{}); } const std = @import("std"); const builtin = @import("builtin"); const options = @import("options"); const mem = std.mem; const Allocator = mem.Allocator; const simd = std.simd; const testing = std.testing; const ascii = @import("ascii"); const CodePointIterator = @import("code_point").Iterator; const Graphemes = @import("Graphemes");