diff options
Diffstat (limited to 'data/unicode/ArabicShaping.txt')
| -rw-r--r-- | data/unicode/ArabicShaping.txt | 60 |
1 files changed, 41 insertions, 19 deletions
diff --git a/data/unicode/ArabicShaping.txt b/data/unicode/ArabicShaping.txt index 0cbdc8a..3c9e0ca 100644 --- a/data/unicode/ArabicShaping.txt +++ b/data/unicode/ArabicShaping.txt | |||
| @@ -1,8 +1,8 @@ | |||
| 1 | # ArabicShaping-15.1.0.txt | 1 | # ArabicShaping-16.0.0.txt |
| 2 | # Date: 2023-01-05 | 2 | # Date: 2024-07-30 |
| 3 | # © 2023 Unicode®, Inc. | 3 | # © 2024 Unicode®, Inc. |
| 4 | # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. | 4 | # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. |
| 5 | # For terms of use, see https://www.unicode.org/terms_of_use.html | 5 | # For terms of use and license, see https://www.unicode.org/terms_of_use.html |
| 6 | # | 6 | # |
| 7 | # This file is a normative contributory data file in the | 7 | # This file is a normative contributory data file in the |
| 8 | # Unicode Character Database. | 8 | # Unicode Character Database. |
| @@ -10,21 +10,33 @@ | |||
| 10 | # This file defines the Joining_Type and Joining_Group property | 10 | # This file defines the Joining_Type and Joining_Group property |
| 11 | # values for Arabic, Syriac, N'Ko, Mandaic, and Manichaean positional | 11 | # values for Arabic, Syriac, N'Ko, Mandaic, and Manichaean positional |
| 12 | # shaping, repeating in machine readable form the information | 12 | # shaping, repeating in machine readable form the information |
| 13 | # exemplified in Tables 9-3, 9-8, 9-9, 9-10, 9-14, 9-15, 9-16, 9-19, | 13 | # exemplified in various tables of The Unicode Standard core specification. |
| 14 | # 9-20, 10-4, 10-5, 10-6, 10-7, and 19-5 of The Unicode Standard core | 14 | # |
| 15 | # specification. This file also defines Joining_Type values for | 15 | # This file also defines Joining_Type values for |
| 16 | # Mongolian, Phags-pa, Psalter Pahlavi, Sogdian, Old Uyghur, Chorasmian, | 16 | # Mongolian, Phags-pa, Psalter Pahlavi, Sogdian, Old Uyghur, Chorasmian, |
| 17 | # and Adlam positional shaping, | 17 | # and Adlam positional shaping, |
| 18 | # and Joining_Type and Joining_Group values for Hanifi Rohingya positional shaping, | 18 | # and Joining_Type and Joining_Group values for Hanifi Rohingya positional shaping, |
| 19 | # which are not listed in tables in the standard. | 19 | # which are not listed in tables in the core specification. |
| 20 | # | ||
| 21 | # Script Section Table(s) | ||
| 20 | # | 22 | # |
| 21 | # See Sections 9.2, 9.3, 9.5, 10.5, 10.6, 13.5, 14.4, 14.10, 14.11, 16.14, 19.4, and 19.9 | 23 | # Arabic 9.2 9-3, 9-4, 9-5, 9-7, 9-8, 9-9, 9-10, 9-11 |
| 22 | # of The Unicode Standard core specification for more information. | 24 | # Syriac 9.3 9-15, 9-16, 9-17, 9-18, 9-19 |
| 25 | # Mandaic 9.5 9-21, 9-22 | ||
| 26 | # Manichaean 10.5 10-4, 10-5, 10-6, 10-7 | ||
| 27 | # Psalter Pahlavi 10.6 -- | ||
| 28 | # Chorasmian 10.8 -- | ||
| 29 | # Mongolian 13.5 -- | ||
| 30 | # Phags-pa 14.4 -- | ||
| 31 | # Sogdian 14.10 -- | ||
| 32 | # Old Uyghur 14.11 -- | ||
| 33 | # Hanifi Rohingya 16.14 -- | ||
| 34 | # N'Ko 19.4 19-5 | ||
| 35 | # Adlam 19.9 -- | ||
| 23 | # | 36 | # |
| 24 | # Each line contains four fields, separated by a semicolon. | 37 | # Each line contains four fields, separated by a semicolon. |
| 25 | # | 38 | # |
| 26 | # Field 0: the code point, in 4-digit hexadecimal | 39 | # Field 0: the code point of a character, in hexadecimal form. |
| 27 | # form, of a character. | ||
| 28 | # | 40 | # |
| 29 | # Field 1: gives a short schematic name for that character. | 41 | # Field 1: gives a short schematic name for that character. |
| 30 | # The schematic name is descriptive of the shape, based as | 42 | # The schematic name is descriptive of the shape, based as |
| @@ -89,14 +101,18 @@ | |||
| 89 | # assigns jg=No_Joining_Group to all the singletons. | 101 | # assigns jg=No_Joining_Group to all the singletons. |
| 90 | # | 102 | # |
| 91 | # Note: Code points that are not explicitly listed in this file are | 103 | # Note: Code points that are not explicitly listed in this file are |
| 92 | # either of joining type T or U: | 104 | # either of Joining_Type T or U: |
| 93 | # | 105 | # |
| 94 | # - Those that are not explicitly listed and that are of General Category Mn, Me, or Cf | 106 | # - Those that are not explicitly listed and that are of General_Category Mn, Me, or Cf |
| 95 | # have joining type T. | 107 | # are Joining_Type=T. |
| 96 | # - All others not explicitly listed have joining type U. | 108 | # - All others not explicitly listed are Joining_Type=U. |
| 97 | # | 109 | # |
| 98 | # For an explicit listing of all characters of joining type T, see | 110 | # For an explicit listing of all characters of Joining_Type=T, see |
| 99 | # the derived property file DerivedJoiningType.txt. | 111 | # the derived property file DerivedJoiningType.txt. |
| 112 | # For an implementation that needs to parse for the values of | ||
| 113 | # Joining_Type, it is recommended to use DerivedJoiningType.txt | ||
| 114 | # instead of ArabicShaping.txt, to avoid the separate required step of | ||
| 115 | # calculating the set for Joining_Type=T based on General_Category values. | ||
| 100 | # | 116 | # |
| 101 | # ############################################################# | 117 | # ############################################################# |
| 102 | 118 | ||
| @@ -112,7 +128,7 @@ | |||
| 112 | 0605; ARABIC NUMBER MARK ABOVE; U; No_Joining_Group | 128 | 0605; ARABIC NUMBER MARK ABOVE; U; No_Joining_Group |
| 113 | 0608; ARABIC RAY; U; No_Joining_Group | 129 | 0608; ARABIC RAY; U; No_Joining_Group |
| 114 | 060B; AFGHANI SIGN; U; No_Joining_Group | 130 | 060B; AFGHANI SIGN; U; No_Joining_Group |
| 115 | 0620; DOTLESS YEH WITH SEPARATE RING BELOW; D; YEH | 131 | 0620; KASHMIRI YEH; D; KASHMIRI YEH |
| 116 | 0621; HAMZA; U; No_Joining_Group | 132 | 0621; HAMZA; U; No_Joining_Group |
| 117 | 0622; ALEF WITH MADDA ABOVE; R; ALEF | 133 | 0622; ALEF WITH MADDA ABOVE; R; ALEF |
| 118 | 0623; ALEF WITH HAMZA ABOVE; R; ALEF | 134 | 0623; ALEF WITH HAMZA ABOVE; R; ALEF |
| @@ -608,7 +624,7 @@ | |||
| 608 | 1875; MONGOLIAN MANCHU RA; D; No_Joining_Group | 624 | 1875; MONGOLIAN MANCHU RA; D; No_Joining_Group |
| 609 | 1876; MONGOLIAN MANCHU FA; D; No_Joining_Group | 625 | 1876; MONGOLIAN MANCHU FA; D; No_Joining_Group |
| 610 | 1877; MONGOLIAN MANCHU ZHA; D; No_Joining_Group | 626 | 1877; MONGOLIAN MANCHU ZHA; D; No_Joining_Group |
| 611 | 1878; MONGOLIAN MANCHU CHA WITH 2 DOTS; D; No_Joining_Group | 627 | 1878; MONGOLIAN CHA WITH 2 DOTS; D; No_Joining_Group |
| 612 | 1880; MONGOLIAN ALI GALI ANUSVARA ONE; U; No_Joining_Group | 628 | 1880; MONGOLIAN ALI GALI ANUSVARA ONE; U; No_Joining_Group |
| 613 | 1881; MONGOLIAN ALI GALI VISARGA ONE; U; No_Joining_Group | 629 | 1881; MONGOLIAN ALI GALI VISARGA ONE; U; No_Joining_Group |
| 614 | 1882; MONGOLIAN ALI GALI DAMARU; U; No_Joining_Group | 630 | 1882; MONGOLIAN ALI GALI DAMARU; U; No_Joining_Group |
| @@ -829,6 +845,12 @@ A873; PHAGS-PA CANDRABINDU; U; No_Joining_Group | |||
| 829 | 10D22; HANIFI ROHINGYA SAKIN; R; No_Joining_Group | 845 | 10D22; HANIFI ROHINGYA SAKIN; R; No_Joining_Group |
| 830 | 10D23; HANIFI ROHINGYA DOTLESS KINNA YA WITH DOT ABOVE; D; HANIFI ROHINGYA KINNA YA | 846 | 10D23; HANIFI ROHINGYA DOTLESS KINNA YA WITH DOT ABOVE; D; HANIFI ROHINGYA KINNA YA |
| 831 | 847 | ||
| 848 | # Arabic Extended-C Characters | ||
| 849 | |||
| 850 | 10EC2; DAL WITH VERTICAL 2 DOTS BELOW; R; DAL | ||
| 851 | 10EC3; TAH WITH VERTICAL 2 DOTS BELOW; D; TAH | ||
| 852 | 10EC4; KAF WITH VERTICAL 2 DOTS BELOW; D; KAF | ||
| 853 | |||
| 832 | # Sogdian Characters | 854 | # Sogdian Characters |
| 833 | 855 | ||
| 834 | 10F30; SOGDIAN ALEPH; D; No_Joining_Group | 856 | 10F30; SOGDIAN ALEPH; D; No_Joining_Group |