From a7164d9e7b3c3ec6813e06a42d82180d766e15ca Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Wed, 30 Apr 2025 20:32:23 -0400 Subject: Unicode 16.0 Went smoothly, needed to add some scripts and adjust the magic numbers, but other than that, all set. --- data/unicode/DoNotEmit.txt | 472 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 472 insertions(+) create mode 100644 data/unicode/DoNotEmit.txt (limited to 'data/unicode/DoNotEmit.txt') diff --git a/data/unicode/DoNotEmit.txt b/data/unicode/DoNotEmit.txt new file mode 100644 index 0000000..757a313 --- /dev/null +++ b/data/unicode/DoNotEmit.txt @@ -0,0 +1,472 @@ +# DoNotEmit-16.0.0.txt +# Date: 2024-07-30, 19:30:00 GMT +# © 2024 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html +# +# For documentation, see UAX #44: Unicode Character Database, +# at https://www.unicode.org/reports/tr44/ +# +# Do_Not_Emit +# +# This file is part of the Unicode Character Database. It does not define +# any properties, but rather provides additional information about +# characters or character sequences that should not be emitted or generated +# in newly authored text. Applications such as input methods could use this +# information to replace "Do Not Emit" sequences input by users with an +# acceptable alternative. Other applications may use the information in +# this file to consider certain sequences similar to each other for display, +# collation, or searching purposes. (This is an addition to canonical +# equivalence, which is defined elsewhere in the standard.) +# +# Note that the discouraged sequences listed in this file should not be +# considered invalid input to text display implementations. When received +# by an implementation, the sequences are not required to be displayed with +# a visual indication of an error (such as dotted circles). Implementation +# should try their best to display them as normal text, perhaps in the same +# way or very similar to the way their alternative sequence is displayed. +# +# Only characters and character sequences for which a suitable alternative +# sequence exists are provided. For example, deprecated characters for +# which no suitable alternative exists are not listed. (For a list of +# deprecated characters see the "Deprecated" property defined in the Unicode +# Character Database file "PropList.txt".) +# +# Also, canonically equivalent sequences are not listed, even if one +# sequence is specified to be discouraged or deprecated in the Unicode +# Standard. For example, U+2126 OHM SIGN, which is canonically equivalent +# to U+03A9 GREEK CAPITAL LETTER OMEGA is not explicitly listed, since it is +# expected that conforming Unicode processes would discover the relation +# between the two characters. +# +# Note that some sequences could be considered recursive, in the way that +# the preferred sequence to use may be a subsequence of the "Do Not Emit" +# sequence. This may have implications for some implementations who may want +# to treat the original sequence and its alternative as similar. +# +# This file should not be considered to be comprehensive. It is expected +# that new sequences and categories may be added to or removed from the file +# as the Unicode Standard goes through new releases. +# +# Format: +# Field 0 A sequence of Unicode code point values +# Field 1 A replacement sequence of Unicode code point values +# Field 2 DoNotEmit type of the original character sequence +# +# Field 2 is followed by an optional human-readable comment field. +# +# These are the values used for Field 2: +# Indic_Atomic_Consonant: +# Sequences that look like an Indic consonant but should be avoided +# in representing that consonant. For now, these are limited to +# Devanagari. +# Indic_Consonant_Conjunct: +# Sequences that look like an Indic conjunct but should be avoided +# in representing that conjunct. For now, these are limited to +# Devanagari. +# Indic_Vowel_Letter: +# Sequences that look like an Indic vowel letter but should be avoided +# in representing that vowel letter. +# Bengali_Khanda_Ta: +# Legacy representation of Bengali khanda ta prior to +# Unicode Version 4.1. +# Malayalam_Chillu: +# Legacy representation of Malayalam chillus prior to +# Unicode Version 5.1. Note that the sequence in Field 0 may appear +# in legitimate Malayalam sequences not related to chillus. +# Tamil_Shrii: +# Legacy representation of Tamil ligature shri prior to +# Unicode Version 4.1. +# Dotless_Form: +# Dotless forms of lowercase Latin i and j followed by a +# combining dot above. +# Hamza_Form: +# Sequences containing Arabic hamza above, which should be avoided. +# Precomposed_Form: +# Sequences for which a precomposed form exists, but without canonical +# equivalence. +# Deprecated: +# Characters that are identified in the Unicode Standard as +# deprecated for which a replacement sequence exists. +# Discouraged: +# Miscellaneous characters and sequences discouraged in the +# Unicode Standard. +# Preferred_Spelling: +# Miscellaneous characters and sequeences for which the Unicode Standard +# specifies a preferred spelling. + +# ================================================ +# "Do Not Use" tables from the Core Specification +# ================================================ + +# Devanagari, from Table 12-1 +0905 0946; 0904; Indic_Vowel_Letter # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT E; DEVANAGARI LETTER SHORT A +0905 093E; 0906; Indic_Vowel_Letter # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER AA +0930 094D 0907; 0908; Indic_Vowel_Letter # DEVANAGARI LETTER RA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER I; DEVANAGARI LETTER II +0909 0941; 090A; Indic_Vowel_Letter # DEVANAGARI LETTER U, DEVANAGARI VOWEL SIGN U; DEVANAGARI LETTER UU +090F 0945; 090D; Indic_Vowel_Letter # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN CANDRA E; DEVANAGARI LETTER CANDRA E +090F 0946; 090E; Indic_Vowel_Letter # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN SHORT E; DEVANAGARI LETTER SHORT E +090F 0947; 0910; Indic_Vowel_Letter # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN E; DEVANAGARI LETTER AI +0905 0949; 0911; Indic_Vowel_Letter # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA O; DEVANAGARI LETTER CANDRA O +0906 0945; 0911; Indic_Vowel_Letter # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN CANDRA E; DEVANAGARI LETTER CANDRA O +0905 094A; 0912; Indic_Vowel_Letter # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT O; DEVANAGARI LETTER SHORT O +0906 0946; 0912; Indic_Vowel_Letter # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN SHORT E; DEVANAGARI LETTER SHORT O +0905 094B; 0913; Indic_Vowel_Letter # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN O; DEVANAGARI LETTER O +0906 0947; 0913; Indic_Vowel_Letter # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN E; DEVANAGARI LETTER O +0905 094C; 0914; Indic_Vowel_Letter # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AU; DEVANAGARI LETTER AU +0906 0948; 0914; Indic_Vowel_Letter # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN AI; DEVANAGARI LETTER AU +0905 0945; 0972; Indic_Vowel_Letter # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA E; DEVANAGARI LETTER CANDRA A +0905 093A; 0973; Indic_Vowel_Letter # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OE; DEVANAGARI LETTER OE +0905 093B; 0974; Indic_Vowel_Letter # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OOE; DEVANAGARI LETTER OOE +0906 093A; 0974; Indic_Vowel_Letter # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN OE; DEVANAGARI LETTER OOE +0905 094F; 0975; Indic_Vowel_Letter # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AW; DEVANAGARI LETTER AW +0905 0956; 0976; Indic_Vowel_Letter # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UE; DEVANAGARI LETTER UE +0905 0957; 0977; Indic_Vowel_Letter # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UUE; DEVANAGARI LETTER UUE + +# Devanagari, from Table 12-2 +# Review Note: Some experts have recommended removing these, while +# others prefer keeping them. They may also be procedurally generated. +0916 094D 093E; 0916; Indic_Atomic_Consonant # DEVANAGARI LETTER KHA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER KHA +0916 094D 200D 093E; 0916; Indic_Atomic_Consonant # DEVANAGARI LETTER KHA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER KHA +0917 094D 093E; 0917; Indic_Atomic_Consonant # DEVANAGARI LETTER GA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER GA +0917 094D 200D 093E; 0917; Indic_Atomic_Consonant # DEVANAGARI LETTER GA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER GA +0918 094D 093E; 0918; Indic_Atomic_Consonant # DEVANAGARI LETTER GHA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER GHA +0918 094D 200D 093E; 0918; Indic_Atomic_Consonant # DEVANAGARI LETTER GHA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER GHA +091A 094D 093E; 091A; Indic_Atomic_Consonant # DEVANAGARI LETTER CA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER CA +091A 094D 200D 093E; 091A; Indic_Atomic_Consonant # DEVANAGARI LETTER CA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER CA +091C 094D 093E; 091C; Indic_Atomic_Consonant # DEVANAGARI LETTER JA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER JA +091C 094D 200D 093E; 091C; Indic_Atomic_Consonant # DEVANAGARI LETTER JA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER JA +091D 094D 093E; 091D; Indic_Atomic_Consonant # DEVANAGARI LETTER JHA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER JHA +091D 094D 200D 093E; 091D; Indic_Atomic_Consonant # DEVANAGARI LETTER JHA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER JHA +091E 094D 093E; 091E; Indic_Atomic_Consonant # DEVANAGARI LETTER NYA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER NYA +091E 094D 200D 093E; 091E; Indic_Atomic_Consonant # DEVANAGARI LETTER NYA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER NYA +0923 094D 093E; 0923; Indic_Atomic_Consonant # DEVANAGARI LETTER NNA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER NNA +0923 094D 200D 093E; 0923; Indic_Atomic_Consonant # DEVANAGARI LETTER NNA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER NNA +0924 094D 093E; 0924; Indic_Atomic_Consonant # DEVANAGARI LETTER TA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER TA +0924 094D 200D 093E; 0924; Indic_Atomic_Consonant # DEVANAGARI LETTER TA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER TA +0925 094D 093E; 0925; Indic_Atomic_Consonant # DEVANAGARI LETTER THA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER THA +0925 094D 200D 093E; 0925; Indic_Atomic_Consonant # DEVANAGARI LETTER THA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER THA +0927 094D 093E; 0927; Indic_Atomic_Consonant # DEVANAGARI LETTER DHA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER DHA +0927 094D 200D 093E; 0927; Indic_Atomic_Consonant # DEVANAGARI LETTER DHA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER DHA +0928 094D 093E; 0928; Indic_Atomic_Consonant # DEVANAGARI LETTER NA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER NA +0928 094D 200D 093E; 0928; Indic_Atomic_Consonant # DEVANAGARI LETTER NA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER NA +0929 094D 093E; 0929; Indic_Atomic_Consonant # DEVANAGARI LETTER NNNA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER NNNA +0929 094D 200D 093E; 0929; Indic_Atomic_Consonant # DEVANAGARI LETTER NNNA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER NNNA +0928 093C 094D 093E; 0929; Indic_Atomic_Consonant # DEVANAGARI LETTER NA, DEVANAGARI SIGN NUKTA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER NNNA +0928 093C 094D 200D 093E; 0929; Indic_Atomic_Consonant # DEVANAGARI LETTER NA, DEVANAGARI SIGN NUKTA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER NNNA +092A 094D 093E; 092A; Indic_Atomic_Consonant # DEVANAGARI LETTER PA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER PA +092A 094D 200D 093E; 092A; Indic_Atomic_Consonant # DEVANAGARI LETTER PA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER PA +092C 094D 093E; 092C; Indic_Atomic_Consonant # DEVANAGARI LETTER BA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER BA +092C 094D 200D 093E; 092C; Indic_Atomic_Consonant # DEVANAGARI LETTER BA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER BA +092D 094D 093E; 092D; Indic_Atomic_Consonant # DEVANAGARI LETTER BHA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER BHA +092D 094D 200D 093E; 092D; Indic_Atomic_Consonant # DEVANAGARI LETTER BHA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER BHA +092E 094D 093E; 092E; Indic_Atomic_Consonant # DEVANAGARI LETTER MA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER MA +092E 094D 200D 093E; 092E; Indic_Atomic_Consonant # DEVANAGARI LETTER MA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER MA +092F 094D 093E; 092F; Indic_Atomic_Consonant # DEVANAGARI LETTER YA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER YA +092F 094D 200D 093E; 092F; Indic_Atomic_Consonant # DEVANAGARI LETTER YA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER YA +0932 094D 093E; 0932; Indic_Atomic_Consonant # DEVANAGARI LETTER LA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER LA +0932 094D 200D 093E; 0932; Indic_Atomic_Consonant # DEVANAGARI LETTER LA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER LA +0935 094D 093E; 0935; Indic_Atomic_Consonant # DEVANAGARI LETTER VA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER VA +0935 094D 200D 093E; 0935; Indic_Atomic_Consonant # DEVANAGARI LETTER VA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER VA +0936 094D 093E; 0936; Indic_Atomic_Consonant # DEVANAGARI LETTER SHA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER SHA +0936 094D 200D 093E; 0936; Indic_Atomic_Consonant # DEVANAGARI LETTER SHA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER SHA +0937 094D 093E; 0937; Indic_Atomic_Consonant # DEVANAGARI LETTER SSA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER SSA +0937 094D 200D 093E; 0937; Indic_Atomic_Consonant # DEVANAGARI LETTER SSA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER SSA +0938 094D 093E; 0938; Indic_Atomic_Consonant # DEVANAGARI LETTER SA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER SA +0938 094D 200D 093E; 0938; Indic_Atomic_Consonant # DEVANAGARI LETTER SA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER SA +0959 094D 093E; 0959; Indic_Atomic_Consonant # DEVANAGARI LETTER KHHA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER KHHA +0959 094D 200D 093E; 0959; Indic_Atomic_Consonant # DEVANAGARI LETTER KHHA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER KHHA +0916 093C 094D 093E; 0959; Indic_Atomic_Consonant # DEVANAGARI LETTER KHA, DEVANAGARI SIGN NUKTA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER KHHA +0916 093C 094D 200D 093E; 0959; Indic_Atomic_Consonant # DEVANAGARI LETTER KHA, DEVANAGARI SIGN NUKTA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER KHHA +095A 094D 093E; 095A; Indic_Atomic_Consonant # DEVANAGARI LETTER GHHA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER GHHA +095A 094D 200D 093E; 095A; Indic_Atomic_Consonant # DEVANAGARI LETTER GHHA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER GHHA +0917 093C 094D 093E; 095A; Indic_Atomic_Consonant # DEVANAGARI LETTER GA, DEVANAGARI SIGN NUKTA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER GHHA +0917 093C 094D 200D 093E; 095A; Indic_Atomic_Consonant # DEVANAGARI LETTER GA, DEVANAGARI SIGN NUKTA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER GHHA +095B 094D 093E; 095B; Indic_Atomic_Consonant # DEVANAGARI LETTER ZA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER ZA +095B 094D 200D 093E; 095B; Indic_Atomic_Consonant # DEVANAGARI LETTER ZA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER ZA +091C 093C 094D 093E; 095B; Indic_Atomic_Consonant # DEVANAGARI LETTER JA, DEVANAGARI SIGN NUKTA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER ZA +091C 093C 094D 200D 093E; 095B; Indic_Atomic_Consonant # DEVANAGARI LETTER JA, DEVANAGARI SIGN NUKTA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER ZA +095F 094D 093E; 095F; Indic_Atomic_Consonant # DEVANAGARI LETTER YYA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER YYA +095F 094D 200D 093E; 095F; Indic_Atomic_Consonant # DEVANAGARI LETTER YYA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER YYA +092F 093C 094D 093E; 095F; Indic_Atomic_Consonant # DEVANAGARI LETTER YA, DEVANAGARI SIGN NUKTA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER YYA +092F 093C 094D 200D 093E; 095F; Indic_Atomic_Consonant # DEVANAGARI LETTER YA, DEVANAGARI SIGN NUKTA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER YYA +0979 094D 093E; 0979; Indic_Atomic_Consonant # DEVANAGARI LETTER ZHA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER ZHA +0979 094D 200D 093E; 0979; Indic_Atomic_Consonant # DEVANAGARI LETTER ZHA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER ZHA +097A 094D 093E; 097A; Indic_Atomic_Consonant # DEVANAGARI LETTER HEAVY YA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER HEAVY YA +097A 094D 200D 093E; 097A; Indic_Atomic_Consonant # DEVANAGARI LETTER HEAVY YA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER HEAVY YA +097B 094D 093E; 097B; Indic_Atomic_Consonant # DEVANAGARI LETTER GGA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER GGA +097B 094D 200D 093E; 097B; Indic_Atomic_Consonant # DEVANAGARI LETTER GGA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER GGA +097C 094D 093E; 097C; Indic_Atomic_Consonant # DEVANAGARI LETTER JJA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER JJA +097C 094D 200D 093E; 097C; Indic_Atomic_Consonant # DEVANAGARI LETTER JJA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER JJA +097E 094D 093E; 097E; Indic_Atomic_Consonant # DEVANAGARI LETTER DDDA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER DDDA +097E 094D 200D 093E; 097E; Indic_Atomic_Consonant # DEVANAGARI LETTER DDDA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER DDDA +097F 094D 093E; 097F; Indic_Atomic_Consonant # DEVANAGARI LETTER BBA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER BBA +097F 094D 200D 093E; 097F; Indic_Atomic_Consonant # DEVANAGARI LETTER BBA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER BBA + +# Devanagari, from Table 12-3 +# Review Note: Some experts have recommended removing these, while +# others prefer keeping them. They may also be procedurally generated. +# Note: This list may be incomplete. +0915 094D 091A 094D 093E; 0915 094D 091A; Indic_Consonant_Conjunct # DEVANAGARI LETTER KA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER CA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER KA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER CA +0915 094D 091A 094D 200D 093E; 0915 094D 091A; Indic_Consonant_Conjunct # DEVANAGARI LETTER KA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER CA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER KA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER CA +0915 094D 0937 094D 093E; 0915 094D 0937; Indic_Consonant_Conjunct # DEVANAGARI LETTER KA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER SSA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER KA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER SSA +0915 094D 0937 094D 200D 093E; 0915 094D 0937; Indic_Consonant_Conjunct # DEVANAGARI LETTER KA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER SSA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER KA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER SSA +0924 094D 0924 094D 093E; 0924 094D 0924; Indic_Consonant_Conjunct # DEVANAGARI LETTER TA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER TA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER TA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER TA +0924 094D 0924 094D 200D 093E; 0924 094D 0924; Indic_Consonant_Conjunct # DEVANAGARI LETTER TA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER TA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER TA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER TA +0928 094D 0924 094D 093E; 0928 094D 0924; Indic_Consonant_Conjunct # DEVANAGARI LETTER NA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER TA, DEVANAGARI SIGN VIRAMA, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER NA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER TA +0928 094D 0924 094D 200D 093E; 0928 094D 0924; Indic_Consonant_Conjunct # DEVANAGARI LETTER NA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER TA, DEVANAGARI SIGN VIRAMA, ZERO WIDTH JOINER, DEVANAGARI VOWEL SIGN AA; DEVANAGARI LETTER NA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER TA + +# Bengali, from Table 12-11 +0985 09BE; 0986; Indic_Vowel_Letter # BENGALI LETTER A, BENGALI VOWEL SIGN AA; BENGALI LETTER AA +098B 09C3; 09E0; Indic_Vowel_Letter # BENGALI LETTER VOCALIC R, BENGALI VOWEL SIGN VOCALIC R; BENGALI LETTER VOCALIC RR +098C 09E2; 09E1; Indic_Vowel_Letter # BENGALI LETTER VOCALIC L, BENGALI VOWEL SIGN VOCALIC L; BENGALI LETTER VOCALIC LL + +# Gurmukhi, from Table 12-16 +0A05 0A3E; 0A06; Indic_Vowel_Letter # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AA; GURMUKHI LETTER AA +0A72 0A3F; 0A07; Indic_Vowel_Letter # GURMUKHI IRI, GURMUKHI VOWEL SIGN I; GURMUKHI LETTER I +0A72 0A40; 0A08; Indic_Vowel_Letter # GURMUKHI IRI, GURMUKHI VOWEL SIGN II; GURMUKHI LETTER II +0A73 0A41; 0A09; Indic_Vowel_Letter # GURMUKHI URA, GURMUKHI VOWEL SIGN U; GURMUKHI LETTER U +0A73 0A42; 0A0A; Indic_Vowel_Letter # GURMUKHI URA, GURMUKHI VOWEL SIGN UU; GURMUKHI LETTER UU +0A72 0A47; 0A0F; Indic_Vowel_Letter # GURMUKHI IRI, GURMUKHI VOWEL SIGN EE; GURMUKHI LETTER EE +0A05 0A48; 0A10; Indic_Vowel_Letter # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AI; GURMUKHI LETTER AI +0A73 0A4B; 0A13; Indic_Vowel_Letter # GURMUKHI URA, GURMUKHI VOWEL SIGN OO; GURMUKHI LETTER OO +0A05 0A4C; 0A14; Indic_Vowel_Letter # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AU; GURMUKHI LETTER AU + +# Gujarati, from Table 12-20 +0A85 0ABE; 0A86; Indic_Vowel_Letter # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA; GUJARATI LETTER AA +0A85 0AC5; 0A8D; Indic_Vowel_Letter # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA E; GUJARATI VOWEL CANDRA E +0A85 0AC7; 0A8F; Indic_Vowel_Letter # GUJARATI LETTER A, GUJARATI VOWEL SIGN E; GUJARATI LETTER E +0A85 0AC8; 0A90; Indic_Vowel_Letter # GUJARATI LETTER A, GUJARATI VOWEL SIGN AI; GUJARATI LETTER AI +0A85 0AC9; 0A91; Indic_Vowel_Letter # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA O; GUJARATI VOWEL CANDRA O +0A85 0ACB; 0A93; Indic_Vowel_Letter # GUJARATI LETTER A, GUJARATI VOWEL SIGN O; GUJARATI LETTER O +0A85 0ABE 0AC5; 0A93; Indic_Vowel_Letter # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN CANDRA E; GUJARATI LETTER O +0A85 0ACC; 0A94; Indic_Vowel_Letter # GUJARATI LETTER A, GUJARATI VOWEL SIGN AU; GUJARATI LETTER AU +0A85 0ABE 0AC8; 0A94; Indic_Vowel_Letter # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN AI; GUJARATI LETTER AU +0AC5 0ABE; 0AC9; Indic_Vowel_Letter # GUJARATI VOWEL SIGN CANDRA E, GUJARATI VOWEL SIGN AA; GUJARATI VOWEL SIGN CANDRA O + +# Oriya, from Table 12-22 +0B05 0B3E; 0B06; Indic_Vowel_Letter # ORIYA LETTER A, ORIYA VOWEL SIGN AA; ORIYA LETTER AA +0B0F 0B57; 0B10; Indic_Vowel_Letter # ORIYA LETTER E, ORIYA AU LENGTH MARK; ORIYA LETTER AI +0B13 0B57; 0B14; Indic_Vowel_Letter # ORIYA LETTER O, ORIYA AU LENGTH MARK; ORIYA LETTER AU + +# Tamil, from Table 12-26 +0B85 0BC2; 0B86; Indic_Vowel_Letter # TAMIL LETTER A, TAMIL VOWEL SIGN UU; TAMIL LETTER AA + +# Telugu, from Table 12-30 +0C12 0C55; 0C13; Indic_Vowel_Letter # TELUGU LETTER O, TELUGU LENGTH MARK; TELUGU LETTER OO +0C12 0C4C; 0C14; Indic_Vowel_Letter # TELUGU LETTER O, TELUGU VOWEL SIGN AU; TELUGU LETTER AU +0C3F 0C55; 0C40; Indic_Vowel_Letter # TELUGU VOWEL SIGN I, TELUGU LENGTH MARK; TELUGU VOWEL SIGN II +0C46 0C55; 0C47; Indic_Vowel_Letter # TELUGU VOWEL SIGN E, TELUGU LENGTH MARK; TELUGU VOWEL SIGN EE +0C4A 0C55; 0C4B; Indic_Vowel_Letter # TELUGU VOWEL SIGN O, TELUGU LENGTH MARK; TELUGU VOWEL SIGN OO + +# Kannada, from Table 12-31 +0C89 0CBE; 0C8A; Indic_Vowel_Letter # KANNADA LETTER U, KANNADA VOWEL SIGN AA; KANNADA LETTER UU +0C92 0CCC; 0C94; Indic_Vowel_Letter # KANNADA LETTER O, KANNADA VOWEL SIGN AU; KANNADA LETTER AU +0C8B 0CBE; 0CE0; Indic_Vowel_Letter # KANNADA LETTER VOCALIC R, KANNADA VOWEL SIGN AA; KANNADA LETTER VOCALIC RR + +# Malayalam, from Table 12-32 +0D07 0D57; 0D08; Indic_Vowel_Letter # MALAYALAM LETTER I, MALAYALAM AU LENGTH MARK; MALAYALAM LETTER II +0D09 0D57; 0D0A; Indic_Vowel_Letter # MALAYALAM LETTER U, MALAYALAM AU LENGTH MARK; MALAYALAM LETTER UU +0D0E 0D46; 0D10; Indic_Vowel_Letter # MALAYALAM LETTER E, MALAYALAM VOWEL SIGN E; MALAYALAM LETTER AI +0D12 0D3E; 0D13; Indic_Vowel_Letter # MALAYALAM LETTER O, MALAYALAM VOWEL SIGN AA; MALAYALAM LETTER OO +0D12 0D57; 0D14; Indic_Vowel_Letter # MALAYALAM LETTER O, MALAYALAM AU LENGTH MARK; MALAYALAM LETTER AU + +# Sinhala, from Table 13-2 +0D85 0DCF; 0D86; Indic_Vowel_Letter # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN AELA-PILLA; SINHALA LETTER AAYANNA +0D85 0DD0; 0D87; Indic_Vowel_Letter # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN KETTI AEDA-PILLA; SINHALA LETTER AEYANNA +0D85 0DD1; 0D88; Indic_Vowel_Letter # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN DIGA AEDA-PILLA; SINHALA LETTER AEEYANNA +0D8B 0DDF; 0D8C; Indic_Vowel_Letter # SINHALA LETTER UYANNA, SINHALA VOWEL SIGN GAYANUKITTA; SINHALA LETTER UUYANNA +0D8D 0DD8; 0D8E; Indic_Vowel_Letter # SINHALA LETTER IRUYANNA, SINHALA VOWEL SIGN GAETTA-PILLA; SINHALA LETTER IRUUYANNA +0D8F 0DDF; 0D90; Indic_Vowel_Letter # SINHALA LETTER ILUYANNA, SINHALA VOWEL SIGN GAYANUKITTA; SINHALA LETTER ILUUYANNA +0D91 0DCA; 0D92; Indic_Vowel_Letter # SINHALA LETTER EYANNA, SINHALA SIGN AL-LAKUNA; SINHALA LETTER EEYANNA +0D91 0DD9; 0D93; Indic_Vowel_Letter # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA; SINHALA LETTER AIYANNA +0D94 0DDF; 0D96; Indic_Vowel_Letter # SINHALA LETTER OYANNA, SINHALA VOWEL SIGN GAYANUKITTA; SINHALA LETTER AUYANNA + +# Brahmi, from Table 14-1 +11005 11038; 11006; Indic_Vowel_Letter # BRAHMI LETTER A, BRAHMI VOWEL SIGN AA; BRAHMI LETTER AA +1100B 1103E; 1100C; Indic_Vowel_Letter # BRAHMI LETTER VOCALIC R, BRAHMI VOWEL SIGN VOCALIC R; BRAHMI LETTER VOCALIC RR +1100F 11042; 11010; Indic_Vowel_Letter # BRAHMI LETTER E, BRAHMI VOWEL SIGN E; BRAHMI LETTER AI + +# Takri, from Table 15-1 +11680 116AD; 11681; Indic_Vowel_Letter # TAKRI LETTER A, TAKRI VOWEL SIGN AA; TAKRI LETTER AA +11686 116B2; 11687; Indic_Vowel_Letter # TAKRI LETTER E, TAKRI VOWEL SIGN E; TAKRI LETTER AI +11680 116B4; 11688; Indic_Vowel_Letter # TAKRI LETTER A, TAKRI VOWEL SIGN O; TAKRI LETTER O +11680 116B5; 11689; Indic_Vowel_Letter # TAKRI LETTER A, TAKRI VOWEL SIGN AU; TAKRI LETTER AU + +# Khojki, from Table 15-3 +11200 1122C; 11201; Indic_Vowel_Letter # KHOJKI LETTER A, KHOJKI VOWEL SIGN AA; KHOJKI LETTER AA +11240 1122E; 11202; Indic_Vowel_Letter # KHOJKI LETTER SHORT I, KHOJKI VOWEL SIGN II; KHOJKI LETTER I +11206 1122C; 11203; Indic_Vowel_Letter # KHOJKI LETTER O, KHOJKI VOWEL SIGN AA; KHOJKI LETTER U +11200 11231; 11205; Indic_Vowel_Letter # KHOJKI LETTER A, KHOJKI VOWEL SIGN AI; KHOJKI LETTER AI +11200 11233; 11207; Indic_Vowel_Letter # KHOJKI LETTER A, KHOJKI VOWEL SIGN AU; KHOJKI LETTER AU +11200 1122C 11231; 11207; Indic_Vowel_Letter # KHOJKI LETTER A, KHOJKI VOWEL SIGN AA, KHOJKI VOWEL SIGN AI; KHOJKI LETTER AU +1122C 11230; 11232; Indic_Vowel_Letter # KHOJKI VOWEL SIGN AA, KHOJKI VOWEL SIGN E; KHOJKI VOWEL SIGN O +1122C 11231; 11233; Indic_Vowel_Letter # KHOJKI VOWEL SIGN AA, KHOJKI VOWEL SIGN AI; KHOJKI VOWEL SIGN AU + +# Khudawadi, from Table 15-4 +112B0 112E0; 112B1; Indic_Vowel_Letter # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AA; KHUDAWADI LETTER AA +112B0 112E5; 112B6; Indic_Vowel_Letter # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN E; KHUDAWADI LETTER E +112B0 112E6; 112B7; Indic_Vowel_Letter # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AI; KHUDAWADI LETTER AI +112B0 112E7; 112B8; Indic_Vowel_Letter # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN O; KHUDAWADI LETTER O +112B0 112E8; 112B9; Indic_Vowel_Letter # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AU; KHUDAWADI LETTER AU + +# Tirhuta, from Table 15-6 +11481 114B0; 11482; Indic_Vowel_Letter # TIRHUTA LETTER A, TIRHUTA VOWEL SIGN AA; TIRHUTA LETTER AA +114AA 114B5; 11489; Indic_Vowel_Letter # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC R; TIRHUTA LETTER VOCALIC L +114AA 114B6; 1148A; Indic_Vowel_Letter # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC RR; TIRHUTA LETTER VOCALIC LL +1148B 114BA; 1148C; Indic_Vowel_Letter # TIRHUTA LETTER E, TIRHUTA VOWEL SIGN SHORT E; TIRHUTA LETTER AI +1148D 114BA; 1148E; Indic_Vowel_Letter # TIRHUTA LETTER O, TIRHUTA VOWEL SIGN SHORT E; TIRHUTA LETTER AU + +# Modi, from Table 15-7 +11600 11639; 1160A; Indic_Vowel_Letter # MODI LETTER A, MODI VOWEL SIGN E; MODI LETTER E +11600 1163A; 1160B; Indic_Vowel_Letter # MODI LETTER A, MODI VOWEL SIGN AI; MODI LETTER AI +11601 11639; 1160C; Indic_Vowel_Letter # MODI LETTER AA, MODI VOWEL SIGN E; MODI LETTER O +11601 1163A; 1160D; Indic_Vowel_Letter # MODI LETTER AA, MODI VOWEL SIGN AI; MODI LETTER AU + +# ================================================ +# Deprecated characters and other discouraged characters and sequences +# ================================================ + +# Latin, from text of Section 7.1, the NamesList, and the uppercase mapping +0140; 006C 00B7; Preferred_Spelling # LATIN SMALL LETTER L WITH MIDDLE DOT; LATIN SMALL LETTER L, MIDDLE DOT +0149; 2019 006E; Deprecated # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE; RIGHT SINGLE QUOTATION MARK, LATIN SMALL LETTER N +0131 0307; 0069 0307; Dotless_Form # LATIN SMALL LETTER DOTLESS I, COMBINING DOT ABOVE; LATIN SMALL LETTER I, COMBINING DOT ABOVE +0237 0307; 006A 0307; Dotless_Form # LATIN SMALL LETTER DOTLESS J, COMBINING DOT ABOVE; LATIN SMALL LETTER J, COMBINING DOT ABOVE +# Characters with overstruck tilde for which a precomposed form exists, +# but the sequences are not canonically equivalent +004C 0334; 2C62; Precomposed_Form # LATIN CAPITAL LETTER L, COMBINING TILDE OVERLAY; LATIN CAPITAL LETTER L WITH MIDDLE TILDE +0062 0334; 1D6C; Precomposed_Form # LATIN SMALL LETTER B, COMBINING TILDE OVERLAY; LATIN SMALL LETTER B WITH MIDDLE TILDE +0064 0334; 1D6D; Precomposed_Form # LATIN SMALL LETTER D, COMBINING TILDE OVERLAY; LATIN SMALL LETTER D WITH MIDDLE TILDE +0066 0334; 1D6E; Precomposed_Form # LATIN SMALL LETTER F, COMBINING TILDE OVERLAY; LATIN SMALL LETTER F WITH MIDDLE TILDE +006C 0334; 026B; Precomposed_Form # LATIN SMALL LETTER L, COMBINING TILDE OVERLAY; LATIN SMALL LETTER L WITH MIDDLE TILDE +006D 0334; 1D6F; Precomposed_Form # LATIN SMALL LETTER M, COMBINING TILDE OVERLAY; LATIN SMALL LETTER M WITH MIDDLE TILDE +006E 0334; 1D70; Precomposed_Form # LATIN SMALL LETTER N, COMBINING TILDE OVERLAY; LATIN SMALL LETTER N WITH MIDDLE TILDE +0070 0334; 1D71; Precomposed_Form # LATIN SMALL LETTER P, COMBINING TILDE OVERLAY; LATIN SMALL LETTER P WITH MIDDLE TILDE +0072 0334; 1D72; Precomposed_Form # LATIN SMALL LETTER R, COMBINING TILDE OVERLAY; LATIN SMALL LETTER R WITH MIDDLE TILDE +0073 0334; 1D74; Precomposed_Form # LATIN SMALL LETTER S, COMBINING TILDE OVERLAY; LATIN SMALL LETTER S WITH MIDDLE TILDE +0074 0334; 1D75; Precomposed_Form # LATIN SMALL LETTER T, COMBINING TILDE OVERLAY; LATIN SMALL LETTER T WITH MIDDLE TILDE +007A 0334; 1D76; Precomposed_Form # LATIN SMALL LETTER Z, COMBINING TILDE OVERLAY; LATIN SMALL LETTER Z WITH MIDDLE TILDE +0279 0334; AB68; Precomposed_Form # LATIN SMALL LETTER TURNED R, COMBINING TILDE OVERLAY; LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +027E 0334; 1D73; Precomposed_Form # LATIN SMALL LETTER R WITH FISHHOOK, COMBINING TILDE OVERLAY; LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE +02E1 0334; AB5E; Precomposed_Form # MODIFIER LETTER SMALL L, COMBINING TILDE OVERLAY; MODIFIER LETTER SMALL L WITH MIDDLE TILDE +# Characters with palatalized hook for which a precomposed form exists, +# but the sequences are not canonically equivalent +0043 0321; A7C4; Precomposed_Form # LATIN CAPITAL LETTER C, COMBINING PALATALIZED HOOK BELOW; LATIN CAPITAL LETTER C WITH PALATAL HOOK +005A 0321; A7C6; Precomposed_Form # LATIN CAPITAL LETTER Z, COMBINING PALATALIZED HOOK BELOW; LATIN CAPITAL LETTER Z WITH PALATAL HOOK +0062 0321; 1D80; Precomposed_Form # LATIN SMALL LETTER B, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER B WITH PALATAL HOOK +0063 0321; A794; Precomposed_Form # LATIN SMALL LETTER C, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER C WITH PALATAL HOOK +0064 0321; 1D81; Precomposed_Form # LATIN SMALL LETTER D, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER D WITH PALATAL HOOK +0066 0321; 1D82; Precomposed_Form # LATIN SMALL LETTER F, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER F WITH PALATAL HOOK +0068 0321; A795; Precomposed_Form # LATIN SMALL LETTER H, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER H WITH PALATAL HOOK +006B 0321; 1D84; Precomposed_Form # LATIN SMALL LETTER K, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER K WITH PALATAL HOOK +006C 0321; 1D85; Precomposed_Form # LATIN SMALL LETTER L, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER L WITH PALATAL HOOK +006D 0321; 1D86; Precomposed_Form # LATIN SMALL LETTER M, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER M WITH PALATAL HOOK +006E 0321; 1D87; Precomposed_Form # LATIN SMALL LETTER N, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER N WITH PALATAL HOOK +0070 0321; 1D88; Precomposed_Form # LATIN SMALL LETTER P, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER P WITH PALATAL HOOK +0072 0321; 1D89; Precomposed_Form # LATIN SMALL LETTER R, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER R WITH PALATAL HOOK +0073 0321; 1D8A; Precomposed_Form # LATIN SMALL LETTER S, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER S WITH PALATAL HOOK +0074 0321; 01AB; Precomposed_Form # LATIN SMALL LETTER T, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER T WITH PALATAL HOOK +0076 0321; 1D8C; Precomposed_Form # LATIN SMALL LETTER V, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER V WITH PALATAL HOOK +0078 0321; 1D8D; Precomposed_Form # LATIN SMALL LETTER X, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER X WITH PALATAL HOOK +007A 0321; 1D8E; Precomposed_Form # LATIN SMALL LETTER Z, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER Z WITH PALATAL HOOK +014B 0321; 1DF14; Precomposed_Form # LATIN SMALL LETTER ENG, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER ENG WITH PALATAL HOOK +0261 0321; 1D83; Precomposed_Form # LATIN SMALL LETTER SCRIPT G, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER G WITH PALATAL HOOK +026C 0321; 1DF13; Precomposed_Form # LATIN SMALL LETTER L WITH BELT, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER L WITH BELT AND PALATAL HOOK +0279 0321; 1DF15; Precomposed_Form # LATIN SMALL LETTER TURNED R, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER TURNED R WITH PALATAL HOOK +027E 0321; 1DF16; Precomposed_Form # LATIN SMALL LETTER R WITH FISHHOOK, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER R WITH FISHHOOK AND PALATAL HOOK +0283 0321; 1D8B; Precomposed_Form # LATIN SMALL LETTER ESH, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER ESH WITH PALATAL HOOK +0292 0321; 1DF18; Precomposed_Form # LATIN SMALL LETTER EZH, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER EZH WITH PALATAL HOOK +02A4 0321; 1DF12; Precomposed_Form # LATIN SMALL LETTER DEZH DIGRAPH, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER DEZH DIGRAPH WITH PALATAL HOOK +02A7 0321; 1DF17; Precomposed_Form # LATIN SMALL LETTER TESH DIGRAPH, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER TESH DIGRAPH WITH PALATAL HOOK +02E1 0321; 1DAA; Precomposed_Form # MODIFIER LETTER SMALL L, COMBINING PALATALIZED HOOK BELOW; MODIFIER LETTER SMALL L WITH PALATAL HOOK +1D57 0321; 1DB5; Precomposed_Form # MODIFIER LETTER SMALL T, COMBINING PALATALIZED HOOK BELOW; MODIFIER LETTER SMALL T WITH PALATAL HOOK +# Characters with retroflex hook for which a precomposed form exists, +# but the sequences are not canonically equivalent +0052 0322; 2C64; Precomposed_Form # LATIN CAPITAL LETTER R, COMBINING RETROFLEX HOOK BELOW; LATIN CAPITAL LETTER R WITH TAIL +0054 0322; 01AE; Precomposed_Form # LATIN CAPITAL LETTER T, COMBINING RETROFLEX HOOK BELOW; LATIN CAPITAL LETTER T WITH RETROFLEX HOOK +0061 0322; 1D8F; Precomposed_Form # LATIN SMALL LETTER A, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER A WITH RETROFLEX HOOK +0063 0322; 1DF1D; Precomposed_Form # LATIN SMALL LETTER C, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER C WITH RETROFLEX HOOK +0064 0322; 0256; Precomposed_Form # LATIN SMALL LETTER D, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER D WITH TAIL +0065 0322; 1D92; Precomposed_Form # LATIN SMALL LETTER E, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER E WITH RETROFLEX HOOK +0069 0322; 1D96; Precomposed_Form # LATIN SMALL LETTER I, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER I WITH RETROFLEX HOOK +006C 0322; 026D; Precomposed_Form # LATIN SMALL LETTER L, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER L WITH RETROFLEX HOOK +006E 0322; 0273; Precomposed_Form # LATIN SMALL LETTER N, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER N WITH RETROFLEX HOOK +006F 0322; 1DF1B; Precomposed_Form # LATIN SMALL LETTER O, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER O WITH RETROFLEX HOOK +0072 0322; 027D; Precomposed_Form # LATIN SMALL LETTER R, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER R WITH TAIL +0074 0322; 0288; Precomposed_Form # LATIN SMALL LETTER T, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER T WITH RETROFLEX HOOK +0075 0322; 1D99; Precomposed_Form # LATIN SMALL LETTER U, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER U WITH RETROFLEX HOOK +007A 0322; 0290; Precomposed_Form # LATIN SMALL LETTER Z, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER Z WITH RETROFLEX HOOK +01AD 0322; 1DF09; Precomposed_Form # LATIN SMALL LETTER T WITH HOOK, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +01C3 0322; 1DF0A; Precomposed_Form # LATIN LETTER RETROFLEX CLICK, COMBINING RETROFLEX HOOK BELOW; LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +0251 0322; 1D90; Precomposed_Form # LATIN SMALL LETTER ALPHA, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER ALPHA WITH RETROFLEX HOOK +0254 0322; 1D97; Precomposed_Form # LATIN SMALL LETTER OPEN O, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK +0257 0322; 1D91; Precomposed_Form # LATIN SMALL LETTER D WITH HOOK, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER D WITH HOOK AND TAIL +0259 0322; 1D95; Precomposed_Form # LATIN SMALL LETTER SCHWA, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK +025B 0322; 1D93; Precomposed_Form # LATIN SMALL LETTER OPEN E, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK +025C 0322; 1D94; Precomposed_Form # LATIN SMALL LETTER REVERSED OPEN E, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK +0268 0322; 1DF1A; Precomposed_Form # LATIN SMALL LETTER I WITH STROKE, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER I WITH STROKE AND RETROFLEX HOOK +026C 0322; A78E; Precomposed_Form # LATIN SMALL LETTER L WITH BELT, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +026E 0322; 1DF05; Precomposed_Form # LATIN SMALL LETTER LEZH, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER LEZH WITH RETROFLEX HOOK +027A 0322; 1DF08; Precomposed_Form # LATIN SMALL LETTER TURNED R WITH LONG LEG, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER TURNED R WITH LONG LEG AND RETROFLEX HOOK +0283 0322; 1D98; Precomposed_Form # LATIN SMALL LETTER ESH, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER ESH WITH RETROFLEX HOOK +0292 0322; 1D9A; Precomposed_Form # LATIN SMALL LETTER EZH, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +02A3 0322; AB66; Precomposed_Form # LATIN SMALL LETTER DZ DIGRAPH, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER DZ DIGRAPH WITH RETROFLEX HOOK +02A4 0322; 1DF19; Precomposed_Form # LATIN SMALL LETTER DEZH DIGRAPH, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER DEZH DIGRAPH WITH RETROFLEX HOOK +02A6 0322; AB67; Precomposed_Form # LATIN SMALL LETTER TS DIGRAPH, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER TS DIGRAPH WITH RETROFLEX HOOK +02A7 0322; 1DF1C; Precomposed_Form # LATIN SMALL LETTER TESH DIGRAPH, COMBINING RETROFLEX HOOK BELOW; LATIN SMALL LETTER TESH DIGRAPH WITH RETROFLEX HOOK +02B3 0322; 107A8; Precomposed_Form # MODIFIER LETTER SMALL R, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL R WITH TAIL +02E1 0322; 1DA9; Precomposed_Form # MODIFIER LETTER SMALL L, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL L WITH RETROFLEX HOOK +1D48 0322; 1078B; Precomposed_Form # MODIFIER LETTER SMALL D, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL D WITH TAIL +1D57 0322; 107AF; Precomposed_Form # MODIFIER LETTER SMALL T, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL T WITH RETROFLEX HOOK +1DBB 0322; 1DBC; Precomposed_Form # MODIFIER LETTER SMALL Z, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL Z WITH RETROFLEX HOOK +207F 0322; 1DAF; Precomposed_Form # SUPERSCRIPT LATIN SMALL LETTER N, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL N WITH RETROFLEX HOOK +10787 0322; 10788; Precomposed_Form # MODIFIER LETTER SMALL DZ DIGRAPH, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL DZ DIGRAPH WITH RETROFLEX HOOK +1078C 0322; 1078D; Precomposed_Form # MODIFIER LETTER SMALL D WITH HOOK, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL D WITH HOOK AND TAIL +1079B 0322; 1079D; Precomposed_Form # MODIFIER LETTER SMALL L WITH BELT, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL L WITH RETROFLEX HOOK AND BELT +1079E 0322; 1079F; Precomposed_Form # MODIFIER LETTER SMALL LEZH, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL LEZH WITH RETROFLEX HOOK +107A6 0322; 107A7; Precomposed_Form # MODIFIER LETTER SMALL TURNED R WITH LONG LEG, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL TURNED R WITH LONG LEG AND RETROFLEX HOOK +107AC 0322; 107AD; Precomposed_Form # MODIFIER LETTER SMALL TS DIGRAPH, COMBINING RETROFLEX HOOK BELOW; MODIFIER LETTER SMALL TS DIGRAPH WITH RETROFLEX HOOK + +# Arabic, from text of Section 9.2 and the NamesList +0649 0654; 0626; Hamza_Form # ARABIC LETTER ALEF MAKSURA, ARABIC HAMZA ABOVE; ARABIC LETTER YEH WITH HAMZA ABOVE +0673; 0627 065F; Deprecated # ARABIC LETTER ALEF WITH WAVY HAMZA BELOW; ARABIC LETTER ALEF, ARABIC WAVY HAMZA BELOW +0675; 0674 0627; Preferred_Spelling # ARABIC LETTER HIGH HAMZA ALEF; ARABIC LETTER HIGH HAMZA, ARABIC LETTER ALEF +0676; 0674 0648; Preferred_Spelling # ARABIC LETTER HIGH HAMZA WAW; ARABIC LETTER HIGH HAMZA, ARABIC LETTER WAW +0677; 0674 06C7; Preferred_Spelling # ARABIC LETTER U WITH HAMZA ABOVE; ARABIC LETTER HIGH HAMZA, ARABIC LETTER U +0678; 0674 0649; Preferred_Spelling # ARABIC LETTER HIGH HAMZA YEH; ARABIC LETTER HIGH HAMZA, ARABIC LETTER ALEF MAKSURA + +# Devanagari, from Section 12.1 and the NamesList +0953; 0300; Discouraged # DEVANAGARI GRAVE ACCENT; COMBINING GRAVE ACCENT +0954; 0301; Discouraged # DEVANAGARI ACUTE ACCENT; COMBINING ACUTE ACCENT + +# Bengali, from Section 12.2 +09A4 09CD 200D; 09CE; Bengali_Khanda_Ta # BENGALI LETTER TA, BENGALI SIGN VIRAMA, ZERO WIDTH JOINER; BENGALI LETTER KHANDA TA + +# Gujarati, from the NamesList +0AF1; 0AB0 0AC2 0AF0; Preferred_Spelling # GUJARATI RUPEE SIGN; GUJARATI LETTER RA, GUJARATI VOWEL SIGN UU, GUJARATI ABBREVIATION SIGN + +# Tamil ligature shri +0BB8 0BCD 0BB0 0BC0; 0BB6 0BCD 0BB0 0BC0; Tamil_Shrii # TAMIL LETTER SA, TAMIL SIGN VIRAMA, TAMIL LETTER RA, TAMIL VOWEL SIGN II; TAMIL LETTER SHA, TAMIL SIGN VIRAMA, TAMIL LETTER RA, TAMIL VOWEL SIGN II + +# Malayalam Chillus, from Table 12-40 +0D23 0D4D 200D; 0D7A; Malayalam_Chillu # MALAYALAM LETTER NNA, MALAYALAM SIGN VIRAMA, ZERO WIDTH JOINER; MALAYALAM LETTER CHILLU NN +0D28 0D4D 200D; 0D7B; Malayalam_Chillu # MALAYALAM LETTER NA, MALAYALAM SIGN VIRAMA, ZERO WIDTH JOINER; MALAYALAM LETTER CHILLU N +0D30 0D4D 200D; 0D7C; Malayalam_Chillu # MALAYALAM LETTER RA, MALAYALAM SIGN VIRAMA, ZERO WIDTH JOINER; MALAYALAM LETTER CHILLU RR +0D32 0D4D 200D; 0D7D; Malayalam_Chillu # MALAYALAM LETTER LA, MALAYALAM SIGN VIRAMA, ZERO WIDTH JOINER; MALAYALAM LETTER CHILLU L +0D33 0D4D 200D; 0D7E; Malayalam_Chillu # MALAYALAM LETTER LLA, MALAYALAM SIGN VIRAMA, ZERO WIDTH JOINER; MALAYALAM LETTER CHILLU LL + +# Tibetan, from text of Section 13.4, the NamesList, and the decompositions +0F77; 0FB2 0F71 0F80; Deprecated # TIBETAN VOWEL SIGN VOCALIC RR; TIBETAN SUBJOINED LETTER RA, TIBETAN VOWEL SIGN AA, TIBETAN VOWEL SIGN REVERSED I +0F79; 0FB3 0F71 0F80; Deprecated # TIBETAN VOWEL SIGN VOCALIC LL; TIBETAN SUBJOINED LETTER LA, TIBETAN VOWEL SIGN AA, TIBETAN VOWEL SIGN REVERSED I + +# Khmer, from text of Section 16.4 and the NamesList +17A3; 17A2; Deprecated # KHMER INDEPENDENT VOWEL QAQ; KHMER LETTER QA +17A4; 17A2 17B6; Deprecated # KHMER INDEPENDENT VOWEL QAA; KHMER LETTER QA, KHMER VOWEL SIGN AA +17D8; 17D4 179B 17D4; Discouraged # KHMER SIGN BEYYAL; KHMER SIGN KHAN, KHMER LETTER LO, KHMER SIGN KHAN +17E8 17D3; 19E0; Discouraged # KHMER DIGIT EIGHT, KHMER SIGN BATHAMASAT; KHMER SYMBOL PATHAMASAT + +# Sharada, from the NamesList, and glyph shape of U+1118E +1118D 111BC; 1118E; Indic_Vowel_Letter # SHARADA LETTER E, SHARADA VOWEL SIGN E; SHARADA LETTER AI +111C4; 1118F 11180; Discouraged # SHARADA OM; SHARADA LETTER O, SHARADA SIGN CANDRABINDU + +# EOF -- cgit v1.2.3