summaryrefslogtreecommitdiff
path: root/src/code_point.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/code_point.zig')
-rw-r--r--src/code_point.zig60
1 files changed, 58 insertions, 2 deletions
diff --git a/src/code_point.zig b/src/code_point.zig
index 8bd3d5b..16648af 100644
--- a/src/code_point.zig
+++ b/src/code_point.zig
@@ -39,9 +39,17 @@ pub fn decode(bytes: []const u8, offset: uoffset) ?CodePoint {
39 return null; 39 return null;
40} 40}
41 41
42/// Return the codepoint at `index`, even if `index` is in the middle
43/// of that codepoint.
44pub fn codepointAtIndex(bytes: []const u8, index: uoffset) ?CodePoint {
45 var idx = index;
46 while (idx > 0 and 0x80 <= bytes[idx] and bytes[idx] <= 0xbf) : (idx -= 1) {}
47 return decodeAtIndex(bytes, idx);
48}
49
42/// Decode the CodePoint, if any, at `bytes[idx]`. 50/// Decode the CodePoint, if any, at `bytes[idx]`.
43pub fn decodeAtIndex(bytes: []const u8, idx: uoffset) ?CodePoint { 51pub fn decodeAtIndex(bytes: []const u8, index: uoffset) ?CodePoint {
44 var off = idx; 52 var off = index;
45 return decodeAtCursor(bytes, &off); 53 return decodeAtCursor(bytes, &off);
46} 54}
47 55
@@ -329,6 +337,54 @@ test Iterator {
329 try expectEqual(@as(?CodePoint, null), iter.next()); 337 try expectEqual(@as(?CodePoint, null), iter.next());
330} 338}
331 339
340const code_point = @This();
341
342// Keep this in sync with the README
343test "Code point iterator" {
344 const str = "Hi 😊";
345 var iter: code_point.Iterator = .init(str);
346 var i: usize = 0;
347
348 while (iter.next()) |cp| : (i += 1) {
349 // The `code` field is the actual code point scalar as a `u21`.
350 if (i == 0) try expect(cp.code == 'H');
351 if (i == 1) try expect(cp.code == 'i');
352 if (i == 2) try expect(cp.code == ' ');
353
354 if (i == 3) {
355 try expect(cp.code == '😊');
356 // The `offset` field is the byte offset in the
357 // source string.
358 try expect(cp.offset == 3);
359 try expectEqual(cp, code_point.decodeAtIndex(str, cp.offset).?);
360 // The `len` field is the length in bytes of the
361 // code point in the source string.
362 try expect(cp.len == 4);
363 // There is also a 'cursor' decode, like so:
364 {
365 var cursor = cp.offset;
366 try expectEqual(cp, code_point.decodeAtCursor(str, &cursor).?);
367 // Which advances the cursor variable to the next possible
368 // offset, in this case, `str.len`. Don't forget to account
369 // for this possibility!
370 try expectEqual(cp.offset + cp.len, cursor);
371 }
372 // There's also this, for when you aren't sure if you have the
373 // correct start for a code point:
374 try expectEqual(cp, code_point.codepointAtIndex(str, cp.offset + 1).?);
375 }
376 // Reverse iteration is also an option:
377 var r_iter: code_point.ReverseIterator = .init(str);
378 // Both iterators can be peeked:
379 try expectEqual('😊', r_iter.peek().?.code);
380 try expectEqual('😊', r_iter.prev().?.code);
381 // Both kinds of iterators can be reversed:
382 var fwd_iter = r_iter.forwardIterator(); // or iter.reverseIterator();
383 // This will always return the last codepoint from
384 // the prior iterator, _if_ it yielded one:
385 try expectEqual('😊', fwd_iter.next().?.code);
386 }
387}
332test "overlongs" { 388test "overlongs" {
333 // None of these should equal `/`, all should be byte-for-byte 389 // None of these should equal `/`, all should be byte-for-byte
334 // handled as replacement characters. 390 // handled as replacement characters.