summaryrefslogtreecommitdiff
path: root/src/Grapheme.zig
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-02-14 12:02:02 -0400
committerGravatar Jose Colon Rodriguez2024-02-14 12:02:02 -0400
commit703a824c1bb7fc41535c9515c5a2209d04899d19 (patch)
tree0572d99ce590a644bf54444a41e20a9688fecb19 /src/Grapheme.zig
parentRemoved unreachables from CodePointIterator (diff)
downloadzg-703a824c1bb7fc41535c9515c5a2209d04899d19.tar.gz
zg-703a824c1bb7fc41535c9515c5a2209d04899d19.tar.xz
zg-703a824c1bb7fc41535c9515c5a2209d04899d19.zip
Code reorg; Added UCD
Diffstat (limited to 'src/Grapheme.zig')
-rw-r--r--src/Grapheme.zig40
1 files changed, 29 insertions, 11 deletions
diff --git a/src/Grapheme.zig b/src/Grapheme.zig
index d739159..9f09255 100644
--- a/src/Grapheme.zig
+++ b/src/Grapheme.zig
@@ -308,29 +308,36 @@ test "Segmentation ZWJ and ZWSP emoji sequences" {
308} 308}
309 309
310// Grapheme break state. 310// Grapheme break state.
311fn hasXpic(state: *const u3) bool { 311// Extended Pictographic (emoji)
312inline fn hasXpic(state: *const u3) bool {
312 return state.* & 1 == 1; 313 return state.* & 1 == 1;
313} 314}
314 315inline fn setXpic(state: *u3) void {
315fn setXpic(state: *u3) void {
316 state.* |= 1; 316 state.* |= 1;
317} 317}
318 318inline fn unsetXpic(state: *u3) void {
319fn unsetXpic(state: *u3) void {
320 state.* ^= 1; 319 state.* ^= 1;
321} 320}
322 321// Regional Indicatior (flags)
323fn hasRegional(state: *const u3) bool { 322inline fn hasRegional(state: *const u3) bool {
324 return state.* & 2 == 2; 323 return state.* & 2 == 2;
325} 324}
326 325inline fn setRegional(state: *u3) void {
327fn setRegional(state: *u3) void {
328 state.* |= 2; 326 state.* |= 2;
329} 327}
330 328inline fn unsetRegional(state: *u3) void {
331fn unsetRegional(state: *u3) void {
332 state.* ^= 2; 329 state.* ^= 2;
333} 330}
331// Indic Conjunct
332inline fn hasIndic(state: *const u3) bool {
333 return state.* & 2 == 4;
334}
335inline fn setIndic(state: *u3) void {
336 state.* |= 4;
337}
338inline fn unsetIndic(state: *u3) void {
339 state.* ^= 4;
340}
334 341
335/// `graphemeBreak` returns true only if a grapheme break point is required 342/// `graphemeBreak` returns true only if a grapheme break point is required
336/// between `cp1` and `cp2`. `state` should start out as 0. If calling 343/// between `cp1` and `cp2`. `state` should start out as 0. If calling
@@ -345,6 +352,8 @@ pub fn graphemeBreak(
345) bool { 352) bool {
346 // GB11: Emoji Extend* ZWJ x Emoji 353 // GB11: Emoji Extend* ZWJ x Emoji
347 if (!hasXpic(state) and emoji.isExtendedPictographic(cp1)) setXpic(state); 354 if (!hasXpic(state) and emoji.isExtendedPictographic(cp1)) setXpic(state);
355 // GB9c: Indic Conjunct Break
356 // if (!hasIndic(state) and indic.isConsonant(cp1)) setIndic(state);
348 357
349 // GB3: CR x LF 358 // GB3: CR x LF
350 if (cp1 == '\r' and cp2 == '\n') return false; 359 if (cp1 == '\r' and cp2 == '\n') return false;
@@ -400,6 +409,15 @@ pub fn graphemeBreak(
400 return false; 409 return false;
401 } 410 }
402 411
412 // GB9c: Indic Conjunct Break
413 // if (hasIndic(state) and
414 // indic.isLinker(cp1) and
415 // indic.isConsonant(cp2))
416 // {
417 // unsetIndic(state);
418 // return false;
419 // }
420
403 return true; 421 return true;
404} 422}
405 423