Testing Ghostty's Utf8Decoder. A bit slower

author: Jose Colon Rodriguez 2024-02-18 11:14:43 -0400
committer: Jose Colon Rodriguez 2024-02-18 11:14:43 -0400
commit: 6c7da0b526959840240177c0defb680e76fecad6 (patch)
tree: 78c426747ebd23f1e0034798f29c37d1a5893826 /src
parent: Rename to zg (diff)
download: zg-6c7da0b526959840240177c0defb680e76fecad6.tar.gz
zg-6c7da0b526959840240177c0defb680e76fecad6.tar.xz
zg-6c7da0b526959840240177c0defb680e76fecad6.zip
4 files changed, 216 insertions, 18 deletions
diff --git a/src/Grapheme.zig b/src/Grapheme.zig
index 6981753..f013aba 100644
--- a/src/Grapheme.zig
+++ b/src/Grapheme.zig
@@ -1,6 +1,7 @@
 const std = @import("std");
 const unicode = std.unicode;
+const CodePoint = @import("code_point").CodePoint;
 const CodePointIterator = @import("code_point").Iterator;
 const gbp = @import("gbp");
@@ -16,13 +17,6 @@ pub const Grapheme = struct {
    }
 };
-// We need the code as a u21.
-const CodePoint = struct {
-    code: u21,
-    len: u3,
-    offset: u32,
-};
 /// `Iterator` iterates a sting of UTF-8 encoded bytes one grapheme cluster at-a-time.
 pub const Iterator = struct {
    buf: [2]?CodePoint = .{ null, null },
@@ -39,13 +33,7 @@ pub const Iterator = struct {
    fn advance(self: *Self) void {
        self.buf[0] = self.buf[1];
+        self.buf[1] = self.cp_iter.next();
-        const maybe_cp = self.cp_iter.next();
-        self.buf[1] = if (maybe_cp) |cp| .{
-            .code = cp.code(self.cp_iter.bytes),
-            .len = cp.len,
-            .offset = cp.offset,
-        } else null;
    }
    pub fn next(self: *Self) ?Grapheme {
diff --git a/src/Utf8Decoder.zig b/src/Utf8Decoder.zig
new file mode 100644
index 0000000..6bb0d98
--- /dev/null
+++ b/src/Utf8Decoder.zig
@@ -0,0 +1,142 @@
+//! DFA-based non-allocating error-replacing UTF-8 decoder.
+//!
+//! This implementation is based largely on the excellent work of
+//! Bjoern Hoehrmann, with slight modifications to support error-
+//! replacement.
+//!
+//! For details on Bjoern's DFA-based UTF-8 decoder, see
+//! http://bjoern.hoehrmann.de/utf-8/decoder/dfa (MIT licensed)
+const UTF8Decoder = @This();
+const std = @import("std");
+const testing = std.testing;
+const log = std.log.scoped(.utf8decoder);
+// zig fmt: off
+const char_classes = [_]u4{
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+   8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+};
+const transitions = [_]u8 {
+   0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
+  12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
+  12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
+  12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
+  12,36,12,12,12,12,12,12,12,12,12,12,
+};
+// zig fmt: on
+// DFA states
+const ACCEPT_STATE = 0;
+const REJECT_STATE = 12;
+// This is where we accumulate our current codepoint.
+accumulator: u21 = 0,
+// The internal state of the DFA.
+state: u8 = ACCEPT_STATE,
+/// Takes the next byte in the utf-8 sequence and emits a tuple of
+/// - The codepoint that was generated, if there is one.
+/// - A boolean that indicates whether the provided byte was consumed.
+///
+/// The only case where the byte is not consumed is if an ill-formed
+/// sequence is reached, in which case a replacement character will be
+/// emitted and the byte will not be consumed.
+///
+/// If the byte is not consumed, the caller is responsible for calling
+/// again with the same byte before continuing.
+pub inline fn next(self: *UTF8Decoder, byte: u8) struct { ?u21, bool } {
+    const char_class = char_classes[byte];
+    const initial_state = self.state;
+    if (self.state != ACCEPT_STATE) {
+        self.accumulator <<= 6;
+        self.accumulator |= (byte & 0x3F);
+    } else {
+        self.accumulator = (@as(u21, 0xFF) >> char_class) & (byte);
+    }
+    self.state = transitions[self.state + char_class];
+    if (self.state == ACCEPT_STATE) {
+        defer self.accumulator = 0;
+        // Emit the fully decoded codepoint.
+        return .{ self.accumulator, true };
+    } else if (self.state == REJECT_STATE) {
+        self.accumulator = 0;
+        self.state = ACCEPT_STATE;
+        // Emit a replacement character. If we rejected the first byte
+        // in a sequence, then it was consumed, otherwise it was not.
+        return .{ 0xFFFD, initial_state == ACCEPT_STATE };
+    } else {
+        // Emit nothing, we're in the middle of a sequence.
+        return .{ null, true };
+    }
+}
+test "ASCII" {
+    var d: UTF8Decoder = .{};
+    var out: [13]u8 = undefined;
+    for ("Hello, World!", 0..) |byte, i| {
+        const res = d.next(byte);
+        try testing.expect(res[1]);
+        if (res[0]) |codepoint| {
+            out[i] = @intCast(codepoint);
+        }
+    }
+    try testing.expect(std.mem.eql(u8, &out, "Hello, World!"));
+}
+test "Well formed utf-8" {
+    var d: UTF8Decoder = .{};
+    var out: [4]u21 = undefined;
+    var i: usize = 0;
+    // 4 bytes, 3 bytes, 2 bytes, 1 byte
+    for ("😄✤ÁA") |byte| {
+        var consumed = false;
+        while (!consumed) {
+            const res = d.next(byte);
+            consumed = res[1];
+            // There are no errors in this sequence, so
+            // every byte should be consumed first try.
+            try testing.expect(consumed == true);
+            if (res[0]) |codepoint| {
+                out[i] = codepoint;
+                i += 1;
+            }
+        }
+    }
+    try testing.expect(std.mem.eql(u21, &out, &[_]u21{ 0x1F604, 0x2724, 0xC1, 0x41 }));
+}
+test "Partially invalid utf-8" {
+    var d: UTF8Decoder = .{};
+    var out: [5]u21 = undefined;
+    var i: usize = 0;
+    // Illegally terminated sequence, valid sequence, illegal surrogate pair.
+    for ("\xF0\x9F😄\xED\xA0\x80") |byte| {
+        var consumed = false;
+        while (!consumed) {
+            const res = d.next(byte);
+            consumed = res[1];
+            if (res[0]) |codepoint| {
+                out[i] = codepoint;
+                i += 1;
+            }
+        }
+    }
+    try testing.expect(std.mem.eql(u21, &out, &[_]u21{ 0xFFFD, 0x1F604, 0xFFFD, 0xFFFD, 0xFFFD }));
+}
diff --git a/src/cp2.zig b/src/cp2.zig
new file mode 100644
index 0000000..ae0f9da
--- /dev/null
+++ b/src/cp2.zig
@@ -0,0 +1,69 @@
+const std = @import("std");
+const Utf8Decoder = @import("Utf8Decoder.zig");
+/// `CodePoint` represents a Unicode code point by its code,
+/// length, and offset in the source bytes.
+pub const CodePoint = struct {
+    code: u21,
+    len: u3,
+    offset: u32,
+};
+/// `Iterator` iterates a string one `CodePoint` at-a-time.
+pub const Iterator = struct {
+    bytes: []const u8,
+    decoder: Utf8Decoder = .{},
+    i: u32 = 0,
+    pub fn next(self: *Iterator) ?CodePoint {
+        if (self.i >= self.bytes.len) return null;
+        if (self.bytes[self.i] < 128) {
+            // ASCII fast path
+            defer self.i += 1;
+            return .{
+                .code = self.bytes[self.i],
+                .len = 1,
+                .offset = self.i,
+            };
+        }
+        for (self.bytes[self.i..], 1..) |b, len| {
+            var consumed = false;
+            while (!consumed) {
+                const res = self.decoder.next(b);
+                consumed = res[1];
+                if (res[0]) |code| {
+                    defer self.i += @intCast(len);
+                    return .{
+                        .code = code,
+                        .len = @intCast(len),
+                        .offset = self.i,
+                    };
+                }
+            }
+        }
+        unreachable;
+    }
+    pub fn peek(self: *Iterator) ?CodePoint {
+        const saved_i = self.i;
+        defer self.i = saved_i;
+        return self.next();
+    }
+};
+test "peek" {
+    const src = "Hi";
+    var iter = Iterator{ .bytes = src };
+    try std.testing.expectEqual(@as(u21, 'H'), iter.next().?.code);
+    try std.testing.expectEqual(@as(u21, 'i'), iter.peek().?.code);
+    try std.testing.expectEqual(@as(u21, 'i'), iter.next().?.code);
+    try std.testing.expectEqual(@as(?CodePoint, null), iter.peek());
+    try std.testing.expectEqual(@as(?CodePoint, null), iter.next());
+}
diff --git a/src/display_width.zig b/src/display_width.zig
index 7f39566..71483ca 100644
--- a/src/display_width.zig
+++ b/src/display_width.zig
@@ -52,18 +52,17 @@ pub fn strWidth(str: []const u8) usize {
    var giter = GraphemeIterator.init(str);
    while (giter.next()) |gc| {
-        const gc_bytes = gc.bytes(str);
+        var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) };
-        var cp_iter = CodePointIterator{ .bytes = gc_bytes };
        var gc_total: isize = 0;
        while (cp_iter.next()) |cp| {
-            var w = codePointWidth(cp.code(gc_bytes));
+            var w = codePointWidth(cp.code);
            if (w != 0) {
                // Handle text emoji sequence.
                if (cp_iter.next()) |ncp| {
                    // emoji text sequence.
-                    if (ncp.code(gc_bytes) == 0xFE0E) w = 1;
+                    if (ncp.code == 0xFE0E) w = 1;
                }
                // Only adding width of first non-zero-width code point.
author	Jose Colon Rodriguez	2024-02-18 11:14:43 -0400
committer	Jose Colon Rodriguez	2024-02-18 11:14:43 -0400
commit	6c7da0b526959840240177c0defb680e76fecad6 (patch)
tree	78c426747ebd23f1e0034798f29c37d1a5893826 /src
parent	Rename to zg (diff)
download	zg-6c7da0b526959840240177c0defb680e76fecad6.tar.gz zg-6c7da0b526959840240177c0defb680e76fecad6.tar.xz zg-6c7da0b526959840240177c0defb680e76fecad6.zip

diff --git a/src/Grapheme.zig b/src/Grapheme.zig index 6981753..f013aba 100644 --- a/src/Grapheme.zig +++ b/src/Grapheme.zig
@@ -1,6 +1,7 @@
1	const std = @import("std");	1	const std = @import("std");
2	const unicode = std.unicode;	2	const unicode = std.unicode;
3		3
		4	const CodePoint = @import("code_point").CodePoint;
4	const CodePointIterator = @import("code_point").Iterator;	5	const CodePointIterator = @import("code_point").Iterator;
5	const gbp = @import("gbp");	6	const gbp = @import("gbp");
6		7
@@ -16,13 +17,6 @@ pub const Grapheme = struct {
16	}	17	}
17	};	18	};
18		19
19	// We need the code as a u21.
20	const CodePoint = struct {
21	code: u21,
22	len: u3,
23	offset: u32,
24	};
25
26	/// `Iterator` iterates a sting of UTF-8 encoded bytes one grapheme cluster at-a-time.	20	/// `Iterator` iterates a sting of UTF-8 encoded bytes one grapheme cluster at-a-time.
27	pub const Iterator = struct {	21	pub const Iterator = struct {
28	buf: [2]?CodePoint = .{ null, null },	22	buf: [2]?CodePoint = .{ null, null },
@@ -39,13 +33,7 @@ pub const Iterator = struct {
39		33
40	fn advance(self: *Self) void {	34	fn advance(self: *Self) void {
41	self.buf[0] = self.buf[1];	35	self.buf[0] = self.buf[1];
42		36	self.buf[1] = self.cp_iter.next();
43	const maybe_cp = self.cp_iter.next();
44	self.buf[1] = if (maybe_cp) \|cp\| .{
45	.code = cp.code(self.cp_iter.bytes),
46	.len = cp.len,
47	.offset = cp.offset,
48	} else null;
49	}	37	}
50		38
51	pub fn next(self: *Self) ?Grapheme {	39	pub fn next(self: *Self) ?Grapheme {


diff --git a/src/Utf8Decoder.zig b/src/Utf8Decoder.zig new file mode 100644 index 0000000..6bb0d98 --- /dev/null +++ b/src/Utf8Decoder.zig
@@ -0,0 +1,142 @@
		1	//! DFA-based non-allocating error-replacing UTF-8 decoder.
		2	//!
		3	//! This implementation is based largely on the excellent work of
		4	//! Bjoern Hoehrmann, with slight modifications to support error-
		5	//! replacement.
		6	//!
		7	//! For details on Bjoern's DFA-based UTF-8 decoder, see
		8	//! http://bjoern.hoehrmann.de/utf-8/decoder/dfa (MIT licensed)
		9	const UTF8Decoder = @This();
		10
		11	const std = @import("std");
		12	const testing = std.testing;
		13
		14	const log = std.log.scoped(.utf8decoder);
		15
		16	// zig fmt: off
		17	const char_classes = [_]u4{
		18	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
		19	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
		20	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
		21	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
		22	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
		23	7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
		24	8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
		25	10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
		26	};
		27
		28	const transitions = [_]u8 {
		29	0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
		30	12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
		31	12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
		32	12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
		33	12,36,12,12,12,12,12,12,12,12,12,12,
		34	};
		35	// zig fmt: on
		36
		37	// DFA states
		38	const ACCEPT_STATE = 0;
		39	const REJECT_STATE = 12;
		40
		41	// This is where we accumulate our current codepoint.
		42	accumulator: u21 = 0,
		43	// The internal state of the DFA.
		44	state: u8 = ACCEPT_STATE,
		45
		46	/// Takes the next byte in the utf-8 sequence and emits a tuple of
		47	/// - The codepoint that was generated, if there is one.
		48	/// - A boolean that indicates whether the provided byte was consumed.
		49	///
		50	/// The only case where the byte is not consumed is if an ill-formed
		51	/// sequence is reached, in which case a replacement character will be
		52	/// emitted and the byte will not be consumed.
		53	///
		54	/// If the byte is not consumed, the caller is responsible for calling
		55	/// again with the same byte before continuing.
		56	pub inline fn next(self: *UTF8Decoder, byte: u8) struct { ?u21, bool } {
		57	const char_class = char_classes[byte];
		58
		59	const initial_state = self.state;
		60
		61	if (self.state != ACCEPT_STATE) {
		62	self.accumulator <<= 6;
		63	self.accumulator \|= (byte & 0x3F);
		64	} else {
		65	self.accumulator = (@as(u21, 0xFF) >> char_class) & (byte);
		66	}
		67
		68	self.state = transitions[self.state + char_class];
		69
		70	if (self.state == ACCEPT_STATE) {
		71	defer self.accumulator = 0;
		72
		73	// Emit the fully decoded codepoint.
		74	return .{ self.accumulator, true };
		75	} else if (self.state == REJECT_STATE) {
		76	self.accumulator = 0;
		77	self.state = ACCEPT_STATE;
		78	// Emit a replacement character. If we rejected the first byte
		79	// in a sequence, then it was consumed, otherwise it was not.
		80	return .{ 0xFFFD, initial_state == ACCEPT_STATE };
		81	} else {
		82	// Emit nothing, we're in the middle of a sequence.
		83	return .{ null, true };
		84	}
		85	}
		86
		87	test "ASCII" {
		88	var d: UTF8Decoder = .{};
		89	var out: [13]u8 = undefined;
		90	for ("Hello, World!", 0..) \|byte, i\| {
		91	const res = d.next(byte);
		92	try testing.expect(res[1]);
		93	if (res[0]) \|codepoint\| {
		94	out[i] = @intCast(codepoint);
		95	}
		96	}
		97
		98	try testing.expect(std.mem.eql(u8, &out, "Hello, World!"));
		99	}
		100
		101	test "Well formed utf-8" {
		102	var d: UTF8Decoder = .{};
		103	var out: [4]u21 = undefined;
		104	var i: usize = 0;
		105	// 4 bytes, 3 bytes, 2 bytes, 1 byte
		106	for ("😄✤ÁA") \|byte\| {
		107	var consumed = false;
		108	while (!consumed) {
		109	const res = d.next(byte);
		110	consumed = res[1];
		111	// There are no errors in this sequence, so
		112	// every byte should be consumed first try.
		113	try testing.expect(consumed == true);
		114	if (res[0]) \|codepoint\| {
		115	out[i] = codepoint;
		116	i += 1;
		117	}
		118	}
		119	}
		120
		121	try testing.expect(std.mem.eql(u21, &out, &[_]u21{ 0x1F604, 0x2724, 0xC1, 0x41 }));
		122	}
		123
		124	test "Partially invalid utf-8" {
		125	var d: UTF8Decoder = .{};
		126	var out: [5]u21 = undefined;
		127	var i: usize = 0;
		128	// Illegally terminated sequence, valid sequence, illegal surrogate pair.
		129	for ("\xF0\x9F😄\xED\xA0\x80") \|byte\| {
		130	var consumed = false;
		131	while (!consumed) {
		132	const res = d.next(byte);
		133	consumed = res[1];
		134	if (res[0]) \|codepoint\| {
		135	out[i] = codepoint;
		136	i += 1;
		137	}
		138	}
		139	}
		140
		141	try testing.expect(std.mem.eql(u21, &out, &[_]u21{ 0xFFFD, 0x1F604, 0xFFFD, 0xFFFD, 0xFFFD }));
		142	}


diff --git a/src/cp2.zig b/src/cp2.zig new file mode 100644 index 0000000..ae0f9da --- /dev/null +++ b/src/cp2.zig
@@ -0,0 +1,69 @@
		1	const std = @import("std");
		2
		3	const Utf8Decoder = @import("Utf8Decoder.zig");
		4
		5	/// `CodePoint` represents a Unicode code point by its code,
		6	/// length, and offset in the source bytes.
		7	pub const CodePoint = struct {
		8	code: u21,
		9	len: u3,
		10	offset: u32,
		11	};
		12
		13	/// `Iterator` iterates a string one `CodePoint` at-a-time.
		14	pub const Iterator = struct {
		15	bytes: []const u8,
		16	decoder: Utf8Decoder = .{},
		17	i: u32 = 0,
		18
		19	pub fn next(self: *Iterator) ?CodePoint {
		20	if (self.i >= self.bytes.len) return null;
		21
		22	if (self.bytes[self.i] < 128) {
		23	// ASCII fast path
		24	defer self.i += 1;
		25	return .{
		26	.code = self.bytes[self.i],
		27	.len = 1,
		28	.offset = self.i,
		29	};
		30	}
		31
		32	for (self.bytes[self.i..], 1..) \|b, len\| {
		33	var consumed = false;
		34	while (!consumed) {
		35	const res = self.decoder.next(b);
		36	consumed = res[1];
		37
		38	if (res[0]) \|code\| {
		39	defer self.i += @intCast(len);
		40
		41	return .{
		42	.code = code,
		43	.len = @intCast(len),
		44	.offset = self.i,
		45	};
		46	}
		47	}
		48	}
		49
		50	unreachable;
		51	}
		52
		53	pub fn peek(self: *Iterator) ?CodePoint {
		54	const saved_i = self.i;
		55	defer self.i = saved_i;
		56	return self.next();
		57	}
		58	};
		59
		60	test "peek" {
		61	const src = "Hi";
		62	var iter = Iterator{ .bytes = src };
		63
		64	try std.testing.expectEqual(@as(u21, 'H'), iter.next().?.code);
		65	try std.testing.expectEqual(@as(u21, 'i'), iter.peek().?.code);
		66	try std.testing.expectEqual(@as(u21, 'i'), iter.next().?.code);
		67	try std.testing.expectEqual(@as(?CodePoint, null), iter.peek());
		68	try std.testing.expectEqual(@as(?CodePoint, null), iter.next());
		69	}


diff --git a/src/display_width.zig b/src/display_width.zig index 7f39566..71483ca 100644 --- a/src/display_width.zig +++ b/src/display_width.zig
@@ -52,18 +52,17 @@ pub fn strWidth(str: []const u8) usize {
52	var giter = GraphemeIterator.init(str);	52	var giter = GraphemeIterator.init(str);
53		53
54	while (giter.next()) \|gc\| {	54	while (giter.next()) \|gc\| {
55	const gc_bytes = gc.bytes(str);	55	var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) };
56	var cp_iter = CodePointIterator{ .bytes = gc_bytes };
57	var gc_total: isize = 0;	56	var gc_total: isize = 0;
58		57
59	while (cp_iter.next()) \|cp\| {	58	while (cp_iter.next()) \|cp\| {
60	var w = codePointWidth(cp.code(gc_bytes));	59	var w = codePointWidth(cp.code);
61		60
62	if (w != 0) {	61	if (w != 0) {
63	// Handle text emoji sequence.	62	// Handle text emoji sequence.
64	if (cp_iter.next()) \|ncp\| {	63	if (cp_iter.next()) \|ncp\| {
65	// emoji text sequence.	64	// emoji text sequence.
66	if (ncp.code(gc_bytes) == 0xFE0E) w = 1;	65	if (ncp.code == 0xFE0E) w = 1;
67	}	66	}
68		67
69	// Only adding width of first non-zero-width code point.	68	// Only adding width of first non-zero-width code point.