diff options
Diffstat (limited to '')
| -rw-r--r-- | codegen/emoji.zig (renamed from src/emoji_gen.zig) | 0 | ||||
| -rw-r--r-- | codegen/grapheme_break.zig (renamed from src/gbp_gen.zig) | 0 | ||||
| -rw-r--r-- | src/Grapheme.zig | 40 | ||||
| -rw-r--r-- | src/main.zig | 9 |
4 files changed, 34 insertions, 15 deletions
diff --git a/src/emoji_gen.zig b/codegen/emoji.zig index acad0ca..acad0ca 100644 --- a/src/emoji_gen.zig +++ b/codegen/emoji.zig | |||
diff --git a/src/gbp_gen.zig b/codegen/grapheme_break.zig index ace875c..ace875c 100644 --- a/src/gbp_gen.zig +++ b/codegen/grapheme_break.zig | |||
diff --git a/src/Grapheme.zig b/src/Grapheme.zig index d739159..9f09255 100644 --- a/src/Grapheme.zig +++ b/src/Grapheme.zig | |||
| @@ -308,29 +308,36 @@ test "Segmentation ZWJ and ZWSP emoji sequences" { | |||
| 308 | } | 308 | } |
| 309 | 309 | ||
| 310 | // Grapheme break state. | 310 | // Grapheme break state. |
| 311 | fn hasXpic(state: *const u3) bool { | 311 | // Extended Pictographic (emoji) |
| 312 | inline fn hasXpic(state: *const u3) bool { | ||
| 312 | return state.* & 1 == 1; | 313 | return state.* & 1 == 1; |
| 313 | } | 314 | } |
| 314 | 315 | inline fn setXpic(state: *u3) void { | |
| 315 | fn setXpic(state: *u3) void { | ||
| 316 | state.* |= 1; | 316 | state.* |= 1; |
| 317 | } | 317 | } |
| 318 | 318 | inline fn unsetXpic(state: *u3) void { | |
| 319 | fn unsetXpic(state: *u3) void { | ||
| 320 | state.* ^= 1; | 319 | state.* ^= 1; |
| 321 | } | 320 | } |
| 322 | 321 | // Regional Indicatior (flags) | |
| 323 | fn hasRegional(state: *const u3) bool { | 322 | inline fn hasRegional(state: *const u3) bool { |
| 324 | return state.* & 2 == 2; | 323 | return state.* & 2 == 2; |
| 325 | } | 324 | } |
| 326 | 325 | inline fn setRegional(state: *u3) void { | |
| 327 | fn setRegional(state: *u3) void { | ||
| 328 | state.* |= 2; | 326 | state.* |= 2; |
| 329 | } | 327 | } |
| 330 | 328 | inline fn unsetRegional(state: *u3) void { | |
| 331 | fn unsetRegional(state: *u3) void { | ||
| 332 | state.* ^= 2; | 329 | state.* ^= 2; |
| 333 | } | 330 | } |
| 331 | // Indic Conjunct | ||
| 332 | inline fn hasIndic(state: *const u3) bool { | ||
| 333 | return state.* & 2 == 4; | ||
| 334 | } | ||
| 335 | inline fn setIndic(state: *u3) void { | ||
| 336 | state.* |= 4; | ||
| 337 | } | ||
| 338 | inline fn unsetIndic(state: *u3) void { | ||
| 339 | state.* ^= 4; | ||
| 340 | } | ||
| 334 | 341 | ||
| 335 | /// `graphemeBreak` returns true only if a grapheme break point is required | 342 | /// `graphemeBreak` returns true only if a grapheme break point is required |
| 336 | /// between `cp1` and `cp2`. `state` should start out as 0. If calling | 343 | /// between `cp1` and `cp2`. `state` should start out as 0. If calling |
| @@ -345,6 +352,8 @@ pub fn graphemeBreak( | |||
| 345 | ) bool { | 352 | ) bool { |
| 346 | // GB11: Emoji Extend* ZWJ x Emoji | 353 | // GB11: Emoji Extend* ZWJ x Emoji |
| 347 | if (!hasXpic(state) and emoji.isExtendedPictographic(cp1)) setXpic(state); | 354 | if (!hasXpic(state) and emoji.isExtendedPictographic(cp1)) setXpic(state); |
| 355 | // GB9c: Indic Conjunct Break | ||
| 356 | // if (!hasIndic(state) and indic.isConsonant(cp1)) setIndic(state); | ||
| 348 | 357 | ||
| 349 | // GB3: CR x LF | 358 | // GB3: CR x LF |
| 350 | if (cp1 == '\r' and cp2 == '\n') return false; | 359 | if (cp1 == '\r' and cp2 == '\n') return false; |
| @@ -400,6 +409,15 @@ pub fn graphemeBreak( | |||
| 400 | return false; | 409 | return false; |
| 401 | } | 410 | } |
| 402 | 411 | ||
| 412 | // GB9c: Indic Conjunct Break | ||
| 413 | // if (hasIndic(state) and | ||
| 414 | // indic.isLinker(cp1) and | ||
| 415 | // indic.isConsonant(cp2)) | ||
| 416 | // { | ||
| 417 | // unsetIndic(state); | ||
| 418 | // return false; | ||
| 419 | // } | ||
| 420 | |||
| 403 | return true; | 421 | return true; |
| 404 | } | 422 | } |
| 405 | 423 | ||
diff --git a/src/main.zig b/src/main.zig index 8335530..a78c1dc 100644 --- a/src/main.zig +++ b/src/main.zig | |||
| @@ -16,14 +16,15 @@ pub fn main() !void { | |||
| 16 | 16 | ||
| 17 | var timer = try std.time.Timer.start(); | 17 | var timer = try std.time.Timer.start(); |
| 18 | 18 | ||
| 19 | for (0..50) |_| { | 19 | // for (0..50) |_| { |
| 20 | while (iter.next()) |_| result += 1; | 20 | while (iter.next()) |_| result += 1; |
| 21 | iter.cp_iter.i = 0; | 21 | iter.cp_iter.i = 0; |
| 22 | } | 22 | // } |
| 23 | 23 | ||
| 24 | std.debug.print("result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); | 24 | std.debug.print("result: {}, took: {}\n", .{ result, timer.lap() / std.time.ns_per_ms }); |
| 25 | } | 25 | } |
| 26 | 26 | ||
| 27 | test { | 27 | test { |
| 28 | _ = @import("CodePoint.zig"); | ||
| 28 | _ = @import("Grapheme.zig"); | 29 | _ = @import("Grapheme.zig"); |
| 29 | } | 30 | } |