From 703a824c1bb7fc41535c9515c5a2209d04899d19 Mon Sep 17 00:00:00 2001 From: Jose Colon Rodriguez Date: Wed, 14 Feb 2024 12:02:02 -0400 Subject: Code reorg; Added UCD --- src/Grapheme.zig | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) (limited to 'src/Grapheme.zig') diff --git a/src/Grapheme.zig b/src/Grapheme.zig index d739159..9f09255 100644 --- a/src/Grapheme.zig +++ b/src/Grapheme.zig @@ -308,29 +308,36 @@ test "Segmentation ZWJ and ZWSP emoji sequences" { } // Grapheme break state. -fn hasXpic(state: *const u3) bool { +// Extended Pictographic (emoji) +inline fn hasXpic(state: *const u3) bool { return state.* & 1 == 1; } - -fn setXpic(state: *u3) void { +inline fn setXpic(state: *u3) void { state.* |= 1; } - -fn unsetXpic(state: *u3) void { +inline fn unsetXpic(state: *u3) void { state.* ^= 1; } - -fn hasRegional(state: *const u3) bool { +// Regional Indicatior (flags) +inline fn hasRegional(state: *const u3) bool { return state.* & 2 == 2; } - -fn setRegional(state: *u3) void { +inline fn setRegional(state: *u3) void { state.* |= 2; } - -fn unsetRegional(state: *u3) void { +inline fn unsetRegional(state: *u3) void { state.* ^= 2; } +// Indic Conjunct +inline fn hasIndic(state: *const u3) bool { + return state.* & 2 == 4; +} +inline fn setIndic(state: *u3) void { + state.* |= 4; +} +inline fn unsetIndic(state: *u3) void { + state.* ^= 4; +} /// `graphemeBreak` returns true only if a grapheme break point is required /// between `cp1` and `cp2`. `state` should start out as 0. If calling @@ -345,6 +352,8 @@ pub fn graphemeBreak( ) bool { // GB11: Emoji Extend* ZWJ x Emoji if (!hasXpic(state) and emoji.isExtendedPictographic(cp1)) setXpic(state); + // GB9c: Indic Conjunct Break + // if (!hasIndic(state) and indic.isConsonant(cp1)) setIndic(state); // GB3: CR x LF if (cp1 == '\r' and cp2 == '\n') return false; @@ -400,6 +409,15 @@ pub fn graphemeBreak( return false; } + // GB9c: Indic Conjunct Break + // if (hasIndic(state) and + // indic.isLinker(cp1) and + // indic.isConsonant(cp2)) + // { + // unsetIndic(state); + // return false; + // } + return true; } -- cgit v1.2.3