diff options
Diffstat (limited to 'src/DisplayWidth.zig')
| -rw-r--r-- | src/DisplayWidth.zig | 351 |
1 files changed, 351 insertions, 0 deletions
diff --git a/src/DisplayWidth.zig b/src/DisplayWidth.zig new file mode 100644 index 0000000..85d04a0 --- /dev/null +++ b/src/DisplayWidth.zig | |||
| @@ -0,0 +1,351 @@ | |||
| 1 | const std = @import("std"); | ||
| 2 | const builtin = @import("builtin"); | ||
| 3 | const ArrayList = std.ArrayList; | ||
| 4 | const mem = std.mem; | ||
| 5 | const simd = std.simd; | ||
| 6 | const testing = std.testing; | ||
| 7 | |||
| 8 | const ascii = @import("ascii"); | ||
| 9 | const CodePointIterator = @import("code_point").Iterator; | ||
| 10 | const GraphemeIterator = @import("grapheme").Iterator; | ||
| 11 | pub const Data = @import("DisplayWidthData"); | ||
| 12 | |||
| 13 | data: *Data, | ||
| 14 | |||
| 15 | const Self = @This(); | ||
| 16 | |||
| 17 | /// strWidth returns the total display width of `str` as the number of cells | ||
| 18 | /// required in a fixed-pitch font (i.e. a terminal screen). | ||
| 19 | pub fn strWidth(self: Self, str: []const u8) usize { | ||
| 20 | var total: isize = 0; | ||
| 21 | |||
| 22 | // ASCII fast path | ||
| 23 | if (ascii.isAsciiOnly(str)) { | ||
| 24 | for (str) |b| total += self.data.codePointWidth(b); | ||
| 25 | return @intCast(@max(0, total)); | ||
| 26 | } | ||
| 27 | |||
| 28 | var giter = GraphemeIterator.init(str, &self.data.g_data); | ||
| 29 | |||
| 30 | while (giter.next()) |gc| { | ||
| 31 | var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) }; | ||
| 32 | var gc_total: isize = 0; | ||
| 33 | |||
| 34 | while (cp_iter.next()) |cp| { | ||
| 35 | var w = self.data.codePointWidth(cp.code); | ||
| 36 | |||
| 37 | if (w != 0) { | ||
| 38 | // Handle text emoji sequence. | ||
| 39 | if (cp_iter.next()) |ncp| { | ||
| 40 | // emoji text sequence. | ||
| 41 | if (ncp.code == 0xFE0E) w = 1; | ||
| 42 | } | ||
| 43 | |||
| 44 | // Only adding width of first non-zero-width code point. | ||
| 45 | if (gc_total == 0) { | ||
| 46 | gc_total = w; | ||
| 47 | break; | ||
| 48 | } | ||
| 49 | } | ||
| 50 | } | ||
| 51 | |||
| 52 | total += gc_total; | ||
| 53 | } | ||
| 54 | |||
| 55 | return @intCast(@max(0, total)); | ||
| 56 | } | ||
| 57 | |||
| 58 | test "strWidth" { | ||
| 59 | var data = try Data.init(testing.allocator); | ||
| 60 | defer data.deinit(); | ||
| 61 | const self = Self{ .data = &data }; | ||
| 62 | |||
| 63 | try testing.expectEqual(@as(usize, 5), self.strWidth("Hello\r\n")); | ||
| 64 | try testing.expectEqual(@as(usize, 1), self.strWidth("\u{0065}\u{0301}")); | ||
| 65 | try testing.expectEqual(@as(usize, 2), self.strWidth("\u{1F476}\u{1F3FF}\u{0308}\u{200D}\u{1F476}\u{1F3FF}")); | ||
| 66 | try testing.expectEqual(@as(usize, 8), self.strWidth("Hello 😊")); | ||
| 67 | try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo 😊")); | ||
| 68 | try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo :)")); | ||
| 69 | try testing.expectEqual(@as(usize, 8), self.strWidth("Héllo 🇪🇸")); | ||
| 70 | try testing.expectEqual(@as(usize, 2), self.strWidth("\u{26A1}")); // Lone emoji | ||
| 71 | try testing.expectEqual(@as(usize, 1), self.strWidth("\u{26A1}\u{FE0E}")); // Text sequence | ||
| 72 | try testing.expectEqual(@as(usize, 2), self.strWidth("\u{26A1}\u{FE0F}")); // Presentation sequence | ||
| 73 | try testing.expectEqual(@as(usize, 0), self.strWidth("A\x08")); // Backspace | ||
| 74 | try testing.expectEqual(@as(usize, 0), self.strWidth("\x7FA")); // DEL | ||
| 75 | try testing.expectEqual(@as(usize, 0), self.strWidth("\x7FA\x08\x08")); // never less than o | ||
| 76 | |||
| 77 | // wcwidth Python lib tests. See: https://github.com/jquast/wcwidth/blob/master/tests/test_core.py | ||
| 78 | const empty = ""; | ||
| 79 | try testing.expectEqual(@as(usize, 0), self.strWidth(empty)); | ||
| 80 | const with_null = "hello\x00world"; | ||
| 81 | try testing.expectEqual(@as(usize, 10), self.strWidth(with_null)); | ||
| 82 | const hello_jp = "コンニチハ, セカイ!"; | ||
| 83 | try testing.expectEqual(@as(usize, 19), self.strWidth(hello_jp)); | ||
| 84 | const control = "\x1b[0m"; | ||
| 85 | try testing.expectEqual(@as(usize, 3), self.strWidth(control)); | ||
| 86 | const balinese = "\u{1B13}\u{1B28}\u{1B2E}\u{1B44}"; | ||
| 87 | try testing.expectEqual(@as(usize, 3), self.strWidth(balinese)); | ||
| 88 | |||
| 89 | // These commented out tests require a new specification for complex scripts. | ||
| 90 | // See: https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf | ||
| 91 | // const jamo = "\u{1100}\u{1160}"; | ||
| 92 | // try testing.expectEqual(@as(usize, 3), strWidth(jamo)); | ||
| 93 | // const devengari = "\u{0915}\u{094D}\u{0937}\u{093F}"; | ||
| 94 | // try testing.expectEqual(@as(usize, 3), strWidth(devengari)); | ||
| 95 | // const tamal = "\u{0b95}\u{0bcd}\u{0bb7}\u{0bcc}"; | ||
| 96 | // try testing.expectEqual(@as(usize, 5), strWidth(tamal)); | ||
| 97 | // const kannada_1 = "\u{0cb0}\u{0ccd}\u{0c9d}\u{0cc8}"; | ||
| 98 | // try testing.expectEqual(@as(usize, 3), strWidth(kannada_1)); | ||
| 99 | // The following passes but as a mere coincidence. | ||
| 100 | const kannada_2 = "\u{0cb0}\u{0cbc}\u{0ccd}\u{0c9a}"; | ||
| 101 | try testing.expectEqual(@as(usize, 2), self.strWidth(kannada_2)); | ||
| 102 | |||
| 103 | // From Rust https://github.com/jameslanska/unicode-display-width | ||
| 104 | try testing.expectEqual(@as(usize, 15), self.strWidth("🔥🗡🍩👩🏻🚀⏰💃🏼🔦👍🏻")); | ||
| 105 | try testing.expectEqual(@as(usize, 2), self.strWidth("🦀")); | ||
| 106 | try testing.expectEqual(@as(usize, 2), self.strWidth("👨👩👧👧")); | ||
| 107 | try testing.expectEqual(@as(usize, 2), self.strWidth("👩🔬")); | ||
| 108 | try testing.expectEqual(@as(usize, 9), self.strWidth("sane text")); | ||
| 109 | try testing.expectEqual(@as(usize, 9), self.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ")); | ||
| 110 | try testing.expectEqual(@as(usize, 17), self.strWidth("슬라바 우크라이나")); | ||
| 111 | try testing.expectEqual(@as(usize, 1), self.strWidth("\u{378}")); | ||
| 112 | } | ||
| 113 | |||
| 114 | /// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding. | ||
| 115 | /// If the length of `str` and `total_width` have different parity, the right side of `str` will | ||
| 116 | /// receive one additional pad. This makes sure the returned string fills the requested width. | ||
| 117 | /// Caller must free returned bytes with `allocator`. | ||
| 118 | pub fn center( | ||
| 119 | self: Self, | ||
| 120 | allocator: mem.Allocator, | ||
| 121 | str: []const u8, | ||
| 122 | total_width: usize, | ||
| 123 | pad: []const u8, | ||
| 124 | ) ![]u8 { | ||
| 125 | const str_width = self.strWidth(str); | ||
| 126 | if (str_width > total_width) return error.StrTooLong; | ||
| 127 | if (str_width == total_width) return try allocator.dupe(u8, str); | ||
| 128 | |||
| 129 | const pad_width = self.strWidth(pad); | ||
| 130 | if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; | ||
| 131 | |||
| 132 | const margin_width = @divFloor((total_width - str_width), 2); | ||
| 133 | if (pad_width > margin_width) return error.PadTooLong; | ||
| 134 | const extra_pad: usize = if (total_width % 2 != str_width % 2) 1 else 0; | ||
| 135 | const pads = @divFloor(margin_width, pad_width) * 2 + extra_pad; | ||
| 136 | |||
| 137 | var result = try allocator.alloc(u8, pads * pad.len + str.len); | ||
| 138 | var bytes_index: usize = 0; | ||
| 139 | var pads_index: usize = 0; | ||
| 140 | |||
| 141 | while (pads_index < pads / 2) : (pads_index += 1) { | ||
| 142 | @memcpy(result[bytes_index..][0..pad.len], pad); | ||
| 143 | bytes_index += pad.len; | ||
| 144 | } | ||
| 145 | |||
| 146 | @memcpy(result[bytes_index..][0..str.len], str); | ||
| 147 | bytes_index += str.len; | ||
| 148 | |||
| 149 | pads_index = 0; | ||
| 150 | while (pads_index < pads / 2 + extra_pad) : (pads_index += 1) { | ||
| 151 | @memcpy(result[bytes_index..][0..pad.len], pad); | ||
| 152 | bytes_index += pad.len; | ||
| 153 | } | ||
| 154 | |||
| 155 | return result; | ||
| 156 | } | ||
| 157 | |||
| 158 | test "center" { | ||
| 159 | const allocator = testing.allocator; | ||
| 160 | var data = try Data.init(allocator); | ||
| 161 | defer data.deinit(); | ||
| 162 | const self = Self{ .data = &data }; | ||
| 163 | |||
| 164 | // Input and width both have odd length | ||
| 165 | var centered = try self.center(allocator, "abc", 9, "*"); | ||
| 166 | try testing.expectEqualSlices(u8, "***abc***", centered); | ||
| 167 | |||
| 168 | // Input and width both have even length | ||
| 169 | testing.allocator.free(centered); | ||
| 170 | centered = try self.center(allocator, "w😊w", 10, "-"); | ||
| 171 | try testing.expectEqualSlices(u8, "---w😊w---", centered); | ||
| 172 | |||
| 173 | // Input has even length, width has odd length | ||
| 174 | testing.allocator.free(centered); | ||
| 175 | centered = try self.center(allocator, "1234", 9, "-"); | ||
| 176 | try testing.expectEqualSlices(u8, "--1234---", centered); | ||
| 177 | |||
| 178 | // Input has odd length, width has even length | ||
| 179 | testing.allocator.free(centered); | ||
| 180 | centered = try self.center(allocator, "123", 8, "-"); | ||
| 181 | try testing.expectEqualSlices(u8, "--123---", centered); | ||
| 182 | |||
| 183 | // Input is the same length as the width | ||
| 184 | testing.allocator.free(centered); | ||
| 185 | centered = try self.center(allocator, "123", 3, "-"); | ||
| 186 | try testing.expectEqualSlices(u8, "123", centered); | ||
| 187 | |||
| 188 | // Input is empty | ||
| 189 | testing.allocator.free(centered); | ||
| 190 | centered = try self.center(allocator, "", 3, "-"); | ||
| 191 | try testing.expectEqualSlices(u8, "---", centered); | ||
| 192 | |||
| 193 | // Input is empty and width is zero | ||
| 194 | testing.allocator.free(centered); | ||
| 195 | centered = try self.center(allocator, "", 0, "-"); | ||
| 196 | try testing.expectEqualSlices(u8, "", centered); | ||
| 197 | |||
| 198 | // Input is longer than the width, which is an error | ||
| 199 | testing.allocator.free(centered); | ||
| 200 | try testing.expectError(error.StrTooLong, self.center(allocator, "123", 2, "-")); | ||
| 201 | } | ||
| 202 | |||
| 203 | /// padLeft returns a new string of width `total_width` (in display cells) using `pad` as padding | ||
| 204 | /// on the left side. Caller must free returned bytes with `allocator`. | ||
| 205 | pub fn padLeft( | ||
| 206 | self: Self, | ||
| 207 | allocator: mem.Allocator, | ||
| 208 | str: []const u8, | ||
| 209 | total_width: usize, | ||
| 210 | pad: []const u8, | ||
| 211 | ) ![]u8 { | ||
| 212 | const str_width = self.strWidth(str); | ||
| 213 | if (str_width > total_width) return error.StrTooLong; | ||
| 214 | |||
| 215 | const pad_width = self.strWidth(pad); | ||
| 216 | if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; | ||
| 217 | |||
| 218 | const margin_width = total_width - str_width; | ||
| 219 | if (pad_width > margin_width) return error.PadTooLong; | ||
| 220 | |||
| 221 | const pads = @divFloor(margin_width, pad_width); | ||
| 222 | |||
| 223 | var result = try allocator.alloc(u8, pads * pad.len + str.len); | ||
| 224 | var bytes_index: usize = 0; | ||
| 225 | var pads_index: usize = 0; | ||
| 226 | |||
| 227 | while (pads_index < pads) : (pads_index += 1) { | ||
| 228 | @memcpy(result[bytes_index..][0..pad.len], pad); | ||
| 229 | bytes_index += pad.len; | ||
| 230 | } | ||
| 231 | |||
| 232 | @memcpy(result[bytes_index..][0..str.len], str); | ||
| 233 | |||
| 234 | return result; | ||
| 235 | } | ||
| 236 | |||
| 237 | test "padLeft" { | ||
| 238 | const allocator = testing.allocator; | ||
| 239 | var data = try Data.init(allocator); | ||
| 240 | defer data.deinit(); | ||
| 241 | const self = Self{ .data = &data }; | ||
| 242 | |||
| 243 | var right_aligned = try self.padLeft(allocator, "abc", 9, "*"); | ||
| 244 | defer testing.allocator.free(right_aligned); | ||
| 245 | try testing.expectEqualSlices(u8, "******abc", right_aligned); | ||
| 246 | |||
| 247 | testing.allocator.free(right_aligned); | ||
| 248 | right_aligned = try self.padLeft(allocator, "w😊w", 10, "-"); | ||
| 249 | try testing.expectEqualSlices(u8, "------w😊w", right_aligned); | ||
| 250 | } | ||
| 251 | |||
| 252 | /// padRight returns a new string of width `total_width` (in display cells) using `pad` as padding | ||
| 253 | /// on the right side. Caller must free returned bytes with `allocator`. | ||
| 254 | pub fn padRight( | ||
| 255 | self: Self, | ||
| 256 | allocator: mem.Allocator, | ||
| 257 | str: []const u8, | ||
| 258 | total_width: usize, | ||
| 259 | pad: []const u8, | ||
| 260 | ) ![]u8 { | ||
| 261 | const str_width = self.strWidth(str); | ||
| 262 | if (str_width > total_width) return error.StrTooLong; | ||
| 263 | |||
| 264 | const pad_width = self.strWidth(pad); | ||
| 265 | if (pad_width > total_width or str_width + pad_width > total_width) return error.PadTooLong; | ||
| 266 | |||
| 267 | const margin_width = total_width - str_width; | ||
| 268 | if (pad_width > margin_width) return error.PadTooLong; | ||
| 269 | |||
| 270 | const pads = @divFloor(margin_width, pad_width); | ||
| 271 | |||
| 272 | var result = try allocator.alloc(u8, pads * pad.len + str.len); | ||
| 273 | var bytes_index: usize = 0; | ||
| 274 | var pads_index: usize = 0; | ||
| 275 | |||
| 276 | @memcpy(result[bytes_index..][0..str.len], str); | ||
| 277 | bytes_index += str.len; | ||
| 278 | |||
| 279 | while (pads_index < pads) : (pads_index += 1) { | ||
| 280 | @memcpy(result[bytes_index..][0..pad.len], pad); | ||
| 281 | bytes_index += pad.len; | ||
| 282 | } | ||
| 283 | |||
| 284 | return result; | ||
| 285 | } | ||
| 286 | |||
| 287 | test "padRight" { | ||
| 288 | const allocator = testing.allocator; | ||
| 289 | var data = try Data.init(allocator); | ||
| 290 | defer data.deinit(); | ||
| 291 | const self = Self{ .data = &data }; | ||
| 292 | |||
| 293 | var left_aligned = try self.padRight(allocator, "abc", 9, "*"); | ||
| 294 | defer testing.allocator.free(left_aligned); | ||
| 295 | try testing.expectEqualSlices(u8, "abc******", left_aligned); | ||
| 296 | |||
| 297 | testing.allocator.free(left_aligned); | ||
| 298 | left_aligned = try self.padRight(allocator, "w😊w", 10, "-"); | ||
| 299 | try testing.expectEqualSlices(u8, "w😊w------", left_aligned); | ||
| 300 | } | ||
| 301 | |||
| 302 | /// Wraps a string approximately at the given number of colums per line. | ||
| 303 | /// `threshold` defines how far the last column of the last word can be | ||
| 304 | /// from the edge. Caller must free returned bytes with `allocator`. | ||
| 305 | pub fn wrap( | ||
| 306 | self: Self, | ||
| 307 | allocator: mem.Allocator, | ||
| 308 | str: []const u8, | ||
| 309 | columns: usize, | ||
| 310 | threshold: usize, | ||
| 311 | ) ![]u8 { | ||
| 312 | var result = ArrayList(u8).init(allocator); | ||
| 313 | defer result.deinit(); | ||
| 314 | |||
| 315 | var line_iter = mem.tokenizeAny(u8, str, "\r\n"); | ||
| 316 | var line_width: usize = 0; | ||
| 317 | |||
| 318 | while (line_iter.next()) |line| { | ||
| 319 | var word_iter = mem.tokenizeScalar(u8, line, ' '); | ||
| 320 | |||
| 321 | while (word_iter.next()) |word| { | ||
| 322 | try result.appendSlice(word); | ||
| 323 | try result.append(' '); | ||
| 324 | line_width += self.strWidth(word) + 1; | ||
| 325 | |||
| 326 | if (line_width > columns or columns - line_width <= threshold) { | ||
| 327 | try result.append('\n'); | ||
| 328 | line_width = 0; | ||
| 329 | } | ||
| 330 | } | ||
| 331 | } | ||
| 332 | |||
| 333 | // Remove trailing space and newline. | ||
| 334 | _ = result.pop(); | ||
| 335 | _ = result.pop(); | ||
| 336 | |||
| 337 | return try result.toOwnedSlice(); | ||
| 338 | } | ||
| 339 | |||
| 340 | test "wrap" { | ||
| 341 | const allocator = testing.allocator; | ||
| 342 | var data = try Data.init(allocator); | ||
| 343 | defer data.deinit(); | ||
| 344 | const self = Self{ .data = &data }; | ||
| 345 | |||
| 346 | const input = "The quick brown fox\r\njumped over the lazy dog!"; | ||
| 347 | const got = try self.wrap(allocator, input, 10, 3); | ||
| 348 | defer testing.allocator.free(got); | ||
| 349 | const want = "The quick \nbrown fox \njumped \nover the \nlazy dog!"; | ||
| 350 | try testing.expectEqualStrings(want, got); | ||
| 351 | } | ||