diff options
| author | 2024-02-14 12:02:02 -0400 | |
|---|---|---|
| committer | 2024-02-14 12:02:02 -0400 | |
| commit | 703a824c1bb7fc41535c9515c5a2209d04899d19 (patch) | |
| tree | 0572d99ce590a644bf54444a41e20a9688fecb19 /src/Grapheme.zig | |
| parent | Removed unreachables from CodePointIterator (diff) | |
| download | zg-703a824c1bb7fc41535c9515c5a2209d04899d19.tar.gz zg-703a824c1bb7fc41535c9515c5a2209d04899d19.tar.xz zg-703a824c1bb7fc41535c9515c5a2209d04899d19.zip | |
Code reorg; Added UCD
Diffstat (limited to 'src/Grapheme.zig')
| -rw-r--r-- | src/Grapheme.zig | 40 |
1 files changed, 29 insertions, 11 deletions
diff --git a/src/Grapheme.zig b/src/Grapheme.zig index d739159..9f09255 100644 --- a/src/Grapheme.zig +++ b/src/Grapheme.zig | |||
| @@ -308,29 +308,36 @@ test "Segmentation ZWJ and ZWSP emoji sequences" { | |||
| 308 | } | 308 | } |
| 309 | 309 | ||
| 310 | // Grapheme break state. | 310 | // Grapheme break state. |
| 311 | fn hasXpic(state: *const u3) bool { | 311 | // Extended Pictographic (emoji) |
| 312 | inline fn hasXpic(state: *const u3) bool { | ||
| 312 | return state.* & 1 == 1; | 313 | return state.* & 1 == 1; |
| 313 | } | 314 | } |
| 314 | 315 | inline fn setXpic(state: *u3) void { | |
| 315 | fn setXpic(state: *u3) void { | ||
| 316 | state.* |= 1; | 316 | state.* |= 1; |
| 317 | } | 317 | } |
| 318 | 318 | inline fn unsetXpic(state: *u3) void { | |
| 319 | fn unsetXpic(state: *u3) void { | ||
| 320 | state.* ^= 1; | 319 | state.* ^= 1; |
| 321 | } | 320 | } |
| 322 | 321 | // Regional Indicatior (flags) | |
| 323 | fn hasRegional(state: *const u3) bool { | 322 | inline fn hasRegional(state: *const u3) bool { |
| 324 | return state.* & 2 == 2; | 323 | return state.* & 2 == 2; |
| 325 | } | 324 | } |
| 326 | 325 | inline fn setRegional(state: *u3) void { | |
| 327 | fn setRegional(state: *u3) void { | ||
| 328 | state.* |= 2; | 326 | state.* |= 2; |
| 329 | } | 327 | } |
| 330 | 328 | inline fn unsetRegional(state: *u3) void { | |
| 331 | fn unsetRegional(state: *u3) void { | ||
| 332 | state.* ^= 2; | 329 | state.* ^= 2; |
| 333 | } | 330 | } |
| 331 | // Indic Conjunct | ||
| 332 | inline fn hasIndic(state: *const u3) bool { | ||
| 333 | return state.* & 2 == 4; | ||
| 334 | } | ||
| 335 | inline fn setIndic(state: *u3) void { | ||
| 336 | state.* |= 4; | ||
| 337 | } | ||
| 338 | inline fn unsetIndic(state: *u3) void { | ||
| 339 | state.* ^= 4; | ||
| 340 | } | ||
| 334 | 341 | ||
| 335 | /// `graphemeBreak` returns true only if a grapheme break point is required | 342 | /// `graphemeBreak` returns true only if a grapheme break point is required |
| 336 | /// between `cp1` and `cp2`. `state` should start out as 0. If calling | 343 | /// between `cp1` and `cp2`. `state` should start out as 0. If calling |
| @@ -345,6 +352,8 @@ pub fn graphemeBreak( | |||
| 345 | ) bool { | 352 | ) bool { |
| 346 | // GB11: Emoji Extend* ZWJ x Emoji | 353 | // GB11: Emoji Extend* ZWJ x Emoji |
| 347 | if (!hasXpic(state) and emoji.isExtendedPictographic(cp1)) setXpic(state); | 354 | if (!hasXpic(state) and emoji.isExtendedPictographic(cp1)) setXpic(state); |
| 355 | // GB9c: Indic Conjunct Break | ||
| 356 | // if (!hasIndic(state) and indic.isConsonant(cp1)) setIndic(state); | ||
| 348 | 357 | ||
| 349 | // GB3: CR x LF | 358 | // GB3: CR x LF |
| 350 | if (cp1 == '\r' and cp2 == '\n') return false; | 359 | if (cp1 == '\r' and cp2 == '\n') return false; |
| @@ -400,6 +409,15 @@ pub fn graphemeBreak( | |||
| 400 | return false; | 409 | return false; |
| 401 | } | 410 | } |
| 402 | 411 | ||
| 412 | // GB9c: Indic Conjunct Break | ||
| 413 | // if (hasIndic(state) and | ||
| 414 | // indic.isLinker(cp1) and | ||
| 415 | // indic.isConsonant(cp2)) | ||
| 416 | // { | ||
| 417 | // unsetIndic(state); | ||
| 418 | // return false; | ||
| 419 | // } | ||
| 420 | |||
| 403 | return true; | 421 | return true; |
| 404 | } | 422 | } |
| 405 | 423 | ||