diff options
Diffstat (limited to 'src/display_width.zig')
| -rw-r--r-- | src/display_width.zig | 111 |
1 files changed, 111 insertions, 0 deletions
diff --git a/src/display_width.zig b/src/display_width.zig new file mode 100644 index 0000000..e06aa8f --- /dev/null +++ b/src/display_width.zig | |||
| @@ -0,0 +1,111 @@ | |||
| 1 | const std = @import("std"); | ||
| 2 | const testing = std.testing; | ||
| 3 | |||
| 4 | const CodePointIterator = @import("CodePoint").CodePointIterator; | ||
| 5 | const GraphemeIterator = @import("Grapheme").GraphemeIterator; | ||
| 6 | const dwp = @import("dwp"); | ||
| 7 | |||
| 8 | /// codePointWidth returns the number of cells `cp` requires when rendered | ||
| 9 | /// in a fixed-pitch font (i.e. a terminal screen). This can range from -1 to | ||
| 10 | /// 3, where BACKSPACE and DELETE return -1 and 3-em-dash returns 3. C0/C1 | ||
| 11 | /// control codes return 0. If `cjk` is true, ambiguous code points return 2, | ||
| 12 | /// otherwise they return 1. | ||
| 13 | pub fn codePointWidth(cp: u21) i3 { | ||
| 14 | return dwp.stage_2[dwp.stage_1[cp >> 8] + (cp & 0xff)]; | ||
| 15 | } | ||
| 16 | |||
| 17 | fn strWidth(str: []const u8) usize { | ||
| 18 | var total: isize = 0; | ||
| 19 | var giter = GraphemeIterator.init(str); | ||
| 20 | |||
| 21 | while (giter.next()) |gc| { | ||
| 22 | var cp_iter = CodePointIterator{ .bytes = str[gc.offset..][0..gc.len] }; | ||
| 23 | var gc_total: isize = 0; | ||
| 24 | |||
| 25 | while (cp_iter.next()) |cp| { | ||
| 26 | var w = codePointWidth(cp.code); | ||
| 27 | |||
| 28 | if (w != 0) { | ||
| 29 | // Handle text emoji sequence. | ||
| 30 | if (cp_iter.next()) |ncp| { | ||
| 31 | // emoji text sequence. | ||
| 32 | if (ncp.code == 0xFE0E) w = 1; | ||
| 33 | } | ||
| 34 | |||
| 35 | // Only adding width of first non-zero-width code point. | ||
| 36 | if (gc_total == 0) gc_total = w; | ||
| 37 | } | ||
| 38 | } | ||
| 39 | |||
| 40 | total += gc_total; | ||
| 41 | } | ||
| 42 | |||
| 43 | return if (total > 0) @intCast(total) else 0; | ||
| 44 | } | ||
| 45 | |||
| 46 | test "display_width Width" { | ||
| 47 | try testing.expectEqual(@as(i3, 0), codePointWidth(0x0000)); // null | ||
| 48 | try testing.expectEqual(@as(i3, -1), codePointWidth(0x8)); // \b | ||
| 49 | try testing.expectEqual(@as(i3, -1), codePointWidth(0x7f)); // DEL | ||
| 50 | try testing.expectEqual(@as(i3, 0), codePointWidth(0x0005)); // Cf | ||
| 51 | try testing.expectEqual(@as(i3, 0), codePointWidth(0x0007)); // \a BEL | ||
| 52 | try testing.expectEqual(@as(i3, 0), codePointWidth(0x000A)); // \n LF | ||
| 53 | try testing.expectEqual(@as(i3, 0), codePointWidth(0x000B)); // \v VT | ||
| 54 | try testing.expectEqual(@as(i3, 0), codePointWidth(0x000C)); // \f FF | ||
| 55 | try testing.expectEqual(@as(i3, 0), codePointWidth(0x000D)); // \r CR | ||
| 56 | try testing.expectEqual(@as(i3, 0), codePointWidth(0x000E)); // SQ | ||
| 57 | try testing.expectEqual(@as(i3, 0), codePointWidth(0x000F)); // SI | ||
| 58 | |||
| 59 | try testing.expectEqual(@as(i3, 0), codePointWidth(0x070F)); // Cf | ||
| 60 | try testing.expectEqual(@as(i3, 1), codePointWidth(0x0603)); // Cf Arabic | ||
| 61 | |||
| 62 | try testing.expectEqual(@as(i3, 1), codePointWidth(0x00AD)); // soft-hyphen | ||
| 63 | try testing.expectEqual(@as(i3, 2), codePointWidth(0x2E3A)); // two-em dash | ||
| 64 | try testing.expectEqual(@as(i3, 3), codePointWidth(0x2E3B)); // three-em dash | ||
| 65 | |||
| 66 | try testing.expectEqual(@as(i3, 1), codePointWidth(0x00BD)); // ambiguous halfwidth | ||
| 67 | |||
| 68 | try testing.expectEqual(@as(i3, 1), codePointWidth('é')); | ||
| 69 | try testing.expectEqual(@as(i3, 2), codePointWidth('😊')); | ||
| 70 | try testing.expectEqual(@as(i3, 2), codePointWidth('统')); | ||
| 71 | |||
| 72 | try testing.expectEqual(@as(usize, 5), strWidth("Hello\r\n")); | ||
| 73 | try testing.expectEqual(@as(usize, 1), strWidth("\u{0065}\u{0301}")); | ||
| 74 | try testing.expectEqual(@as(usize, 2), strWidth("\u{1F476}\u{1F3FF}\u{0308}\u{200D}\u{1F476}\u{1F3FF}")); | ||
| 75 | try testing.expectEqual(@as(usize, 8), strWidth("Hello 😊")); | ||
| 76 | try testing.expectEqual(@as(usize, 8), strWidth("Héllo 😊")); | ||
| 77 | try testing.expectEqual(@as(usize, 8), strWidth("Héllo :)")); | ||
| 78 | try testing.expectEqual(@as(usize, 8), strWidth("Héllo 🇪🇸")); | ||
| 79 | try testing.expectEqual(@as(usize, 2), strWidth("\u{26A1}")); // Lone emoji | ||
| 80 | try testing.expectEqual(@as(usize, 1), strWidth("\u{26A1}\u{FE0E}")); // Text sequence | ||
| 81 | try testing.expectEqual(@as(usize, 2), strWidth("\u{26A1}\u{FE0F}")); // Presentation sequence | ||
| 82 | try testing.expectEqual(@as(usize, 0), strWidth("A\x08")); // Backspace | ||
| 83 | try testing.expectEqual(@as(usize, 0), strWidth("\x7FA")); // DEL | ||
| 84 | try testing.expectEqual(@as(usize, 0), strWidth("\x7FA\x08\x08")); // never less than o | ||
| 85 | |||
| 86 | // wcwidth Python lib tests. See: https://github.com/jquast/wcwidth/blob/master/tests/test_core.py | ||
| 87 | const empty = ""; | ||
| 88 | try testing.expectEqual(@as(usize, 0), strWidth(empty)); | ||
| 89 | const with_null = "hello\x00world"; | ||
| 90 | try testing.expectEqual(@as(usize, 10), strWidth(with_null)); | ||
| 91 | const hello_jp = "コンニチハ, セカイ!"; | ||
| 92 | try testing.expectEqual(@as(usize, 19), strWidth(hello_jp)); | ||
| 93 | const control = "\x1b[0m"; | ||
| 94 | try testing.expectEqual(@as(usize, 3), strWidth(control)); | ||
| 95 | const balinese = "\u{1B13}\u{1B28}\u{1B2E}\u{1B44}"; | ||
| 96 | try testing.expectEqual(@as(usize, 3), strWidth(balinese)); | ||
| 97 | |||
| 98 | // These commented out tests require a new specification for complex scripts. | ||
| 99 | // See: https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf | ||
| 100 | // const jamo = "\u{1100}\u{1160}"; | ||
| 101 | // try testing.expectEqual(@as(usize, 3), strWidth(jamo)); | ||
| 102 | // const devengari = "\u{0915}\u{094D}\u{0937}\u{093F}"; | ||
| 103 | // try testing.expectEqual(@as(usize, 3), strWidth(devengari)); | ||
| 104 | // const tamal = "\u{0b95}\u{0bcd}\u{0bb7}\u{0bcc}"; | ||
| 105 | // try testing.expectEqual(@as(usize, 5), strWidth(tamal)); | ||
| 106 | // const kannada_1 = "\u{0cb0}\u{0ccd}\u{0c9d}\u{0cc8}"; | ||
| 107 | // try testing.expectEqual(@as(usize, 3), strWidth(kannada_1)); | ||
| 108 | // The following passes but as a mere coincidence. | ||
| 109 | const kannada_2 = "\u{0cb0}\u{0cbc}\u{0ccd}\u{0c9a}"; | ||
| 110 | try testing.expectEqual(@as(usize, 2), strWidth(kannada_2)); | ||
| 111 | } | ||