diff options
| author | 2024-02-19 09:11:56 -0400 | |
|---|---|---|
| committer | 2024-02-19 09:11:56 -0400 | |
| commit | 6013b2ded106521ee9cae6bd77dacbd5254ff763 (patch) | |
| tree | 990f13cfbe4bfc20a08d2f097c4646984bffb565 /data/unicode/Blocks.txt | |
| parent | Tried SIMD lower/upper string. Slower than linear. (diff) | |
| download | zg-6013b2ded106521ee9cae6bd77dacbd5254ff763.tar.gz zg-6013b2ded106521ee9cae6bd77dacbd5254ff763.tar.xz zg-6013b2ded106521ee9cae6bd77dacbd5254ff763.zip | |
Cleaned up directory structure
Diffstat (limited to 'data/unicode/Blocks.txt')
| -rw-r--r-- | data/unicode/Blocks.txt | 364 |
1 files changed, 364 insertions, 0 deletions
diff --git a/data/unicode/Blocks.txt b/data/unicode/Blocks.txt new file mode 100644 index 0000000..8fa3eaa --- /dev/null +++ b/data/unicode/Blocks.txt | |||
| @@ -0,0 +1,364 @@ | |||
| 1 | # Blocks-15.1.0.txt | ||
| 2 | # Date: 2023-07-28, 15:47:20 GMT | ||
| 3 | # © 2023 Unicode®, Inc. | ||
| 4 | # For terms of use, see https://www.unicode.org/terms_of_use.html | ||
| 5 | # | ||
| 6 | # Unicode Character Database | ||
| 7 | # For documentation, see https://www.unicode.org/reports/tr44/ | ||
| 8 | # | ||
| 9 | # Format: | ||
| 10 | # Start Code..End Code; Block Name | ||
| 11 | |||
| 12 | # ================================================ | ||
| 13 | |||
| 14 | # Note: When comparing block names, casing, whitespace, hyphens, | ||
| 15 | # and underbars are ignored. | ||
| 16 | # For example, "Latin Extended-A" and "latin extended a" are equivalent. | ||
| 17 | # For more information on the comparison of property values, | ||
| 18 | # see UAX #44: https://www.unicode.org/reports/tr44/ | ||
| 19 | # | ||
| 20 | # All block ranges start with a value where (cp MOD 16) = 0, | ||
| 21 | # and end with a value where (cp MOD 16) = 15. In other words, | ||
| 22 | # the last hexadecimal digit of the start of range is ...0 | ||
| 23 | # and the last hexadecimal digit of the end of range is ...F. | ||
| 24 | # This constraint on block ranges guarantees that allocations | ||
| 25 | # are done in terms of whole columns, and that code chart display | ||
| 26 | # never involves splitting columns in the charts. | ||
| 27 | # | ||
| 28 | # All code points not explicitly listed for Block | ||
| 29 | # have the value No_Block. | ||
| 30 | |||
| 31 | # Property: Block | ||
| 32 | # | ||
| 33 | # @missing: 0000..10FFFF; No_Block | ||
| 34 | |||
| 35 | 0000..007F; Basic Latin | ||
| 36 | 0080..00FF; Latin-1 Supplement | ||
| 37 | 0100..017F; Latin Extended-A | ||
| 38 | 0180..024F; Latin Extended-B | ||
| 39 | 0250..02AF; IPA Extensions | ||
| 40 | 02B0..02FF; Spacing Modifier Letters | ||
| 41 | 0300..036F; Combining Diacritical Marks | ||
| 42 | 0370..03FF; Greek and Coptic | ||
| 43 | 0400..04FF; Cyrillic | ||
| 44 | 0500..052F; Cyrillic Supplement | ||
| 45 | 0530..058F; Armenian | ||
| 46 | 0590..05FF; Hebrew | ||
| 47 | 0600..06FF; Arabic | ||
| 48 | 0700..074F; Syriac | ||
| 49 | 0750..077F; Arabic Supplement | ||
| 50 | 0780..07BF; Thaana | ||
| 51 | 07C0..07FF; NKo | ||
| 52 | 0800..083F; Samaritan | ||
| 53 | 0840..085F; Mandaic | ||
| 54 | 0860..086F; Syriac Supplement | ||
| 55 | 0870..089F; Arabic Extended-B | ||
| 56 | 08A0..08FF; Arabic Extended-A | ||
| 57 | 0900..097F; Devanagari | ||
| 58 | 0980..09FF; Bengali | ||
| 59 | 0A00..0A7F; Gurmukhi | ||
| 60 | 0A80..0AFF; Gujarati | ||
| 61 | 0B00..0B7F; Oriya | ||
| 62 | 0B80..0BFF; Tamil | ||
| 63 | 0C00..0C7F; Telugu | ||
| 64 | 0C80..0CFF; Kannada | ||
| 65 | 0D00..0D7F; Malayalam | ||
| 66 | 0D80..0DFF; Sinhala | ||
| 67 | 0E00..0E7F; Thai | ||
| 68 | 0E80..0EFF; Lao | ||
| 69 | 0F00..0FFF; Tibetan | ||
| 70 | 1000..109F; Myanmar | ||
| 71 | 10A0..10FF; Georgian | ||
| 72 | 1100..11FF; Hangul Jamo | ||
| 73 | 1200..137F; Ethiopic | ||
| 74 | 1380..139F; Ethiopic Supplement | ||
| 75 | 13A0..13FF; Cherokee | ||
| 76 | 1400..167F; Unified Canadian Aboriginal Syllabics | ||
| 77 | 1680..169F; Ogham | ||
| 78 | 16A0..16FF; Runic | ||
| 79 | 1700..171F; Tagalog | ||
| 80 | 1720..173F; Hanunoo | ||
| 81 | 1740..175F; Buhid | ||
| 82 | 1760..177F; Tagbanwa | ||
| 83 | 1780..17FF; Khmer | ||
| 84 | 1800..18AF; Mongolian | ||
| 85 | 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended | ||
| 86 | 1900..194F; Limbu | ||
| 87 | 1950..197F; Tai Le | ||
| 88 | 1980..19DF; New Tai Lue | ||
| 89 | 19E0..19FF; Khmer Symbols | ||
| 90 | 1A00..1A1F; Buginese | ||
| 91 | 1A20..1AAF; Tai Tham | ||
| 92 | 1AB0..1AFF; Combining Diacritical Marks Extended | ||
| 93 | 1B00..1B7F; Balinese | ||
| 94 | 1B80..1BBF; Sundanese | ||
| 95 | 1BC0..1BFF; Batak | ||
| 96 | 1C00..1C4F; Lepcha | ||
| 97 | 1C50..1C7F; Ol Chiki | ||
| 98 | 1C80..1C8F; Cyrillic Extended-C | ||
| 99 | 1C90..1CBF; Georgian Extended | ||
| 100 | 1CC0..1CCF; Sundanese Supplement | ||
| 101 | 1CD0..1CFF; Vedic Extensions | ||
| 102 | 1D00..1D7F; Phonetic Extensions | ||
| 103 | 1D80..1DBF; Phonetic Extensions Supplement | ||
| 104 | 1DC0..1DFF; Combining Diacritical Marks Supplement | ||
| 105 | 1E00..1EFF; Latin Extended Additional | ||
| 106 | 1F00..1FFF; Greek Extended | ||
| 107 | 2000..206F; General Punctuation | ||
| 108 | 2070..209F; Superscripts and Subscripts | ||
| 109 | 20A0..20CF; Currency Symbols | ||
| 110 | 20D0..20FF; Combining Diacritical Marks for Symbols | ||
| 111 | 2100..214F; Letterlike Symbols | ||
| 112 | 2150..218F; Number Forms | ||
| 113 | 2190..21FF; Arrows | ||
| 114 | 2200..22FF; Mathematical Operators | ||
| 115 | 2300..23FF; Miscellaneous Technical | ||
| 116 | 2400..243F; Control Pictures | ||
| 117 | 2440..245F; Optical Character Recognition | ||
| 118 | 2460..24FF; Enclosed Alphanumerics | ||
| 119 | 2500..257F; Box Drawing | ||
| 120 | 2580..259F; Block Elements | ||
| 121 | 25A0..25FF; Geometric Shapes | ||
| 122 | 2600..26FF; Miscellaneous Symbols | ||
| 123 | 2700..27BF; Dingbats | ||
| 124 | 27C0..27EF; Miscellaneous Mathematical Symbols-A | ||
| 125 | 27F0..27FF; Supplemental Arrows-A | ||
| 126 | 2800..28FF; Braille Patterns | ||
| 127 | 2900..297F; Supplemental Arrows-B | ||
| 128 | 2980..29FF; Miscellaneous Mathematical Symbols-B | ||
| 129 | 2A00..2AFF; Supplemental Mathematical Operators | ||
| 130 | 2B00..2BFF; Miscellaneous Symbols and Arrows | ||
| 131 | 2C00..2C5F; Glagolitic | ||
| 132 | 2C60..2C7F; Latin Extended-C | ||
| 133 | 2C80..2CFF; Coptic | ||
| 134 | 2D00..2D2F; Georgian Supplement | ||
| 135 | 2D30..2D7F; Tifinagh | ||
| 136 | 2D80..2DDF; Ethiopic Extended | ||
| 137 | 2DE0..2DFF; Cyrillic Extended-A | ||
| 138 | 2E00..2E7F; Supplemental Punctuation | ||
| 139 | 2E80..2EFF; CJK Radicals Supplement | ||
| 140 | 2F00..2FDF; Kangxi Radicals | ||
| 141 | 2FF0..2FFF; Ideographic Description Characters | ||
| 142 | 3000..303F; CJK Symbols and Punctuation | ||
| 143 | 3040..309F; Hiragana | ||
| 144 | 30A0..30FF; Katakana | ||
| 145 | 3100..312F; Bopomofo | ||
| 146 | 3130..318F; Hangul Compatibility Jamo | ||
| 147 | 3190..319F; Kanbun | ||
| 148 | 31A0..31BF; Bopomofo Extended | ||
| 149 | 31C0..31EF; CJK Strokes | ||
| 150 | 31F0..31FF; Katakana Phonetic Extensions | ||
| 151 | 3200..32FF; Enclosed CJK Letters and Months | ||
| 152 | 3300..33FF; CJK Compatibility | ||
| 153 | 3400..4DBF; CJK Unified Ideographs Extension A | ||
| 154 | 4DC0..4DFF; Yijing Hexagram Symbols | ||
| 155 | 4E00..9FFF; CJK Unified Ideographs | ||
| 156 | A000..A48F; Yi Syllables | ||
| 157 | A490..A4CF; Yi Radicals | ||
| 158 | A4D0..A4FF; Lisu | ||
| 159 | A500..A63F; Vai | ||
| 160 | A640..A69F; Cyrillic Extended-B | ||
| 161 | A6A0..A6FF; Bamum | ||
| 162 | A700..A71F; Modifier Tone Letters | ||
| 163 | A720..A7FF; Latin Extended-D | ||
| 164 | A800..A82F; Syloti Nagri | ||
| 165 | A830..A83F; Common Indic Number Forms | ||
| 166 | A840..A87F; Phags-pa | ||
| 167 | A880..A8DF; Saurashtra | ||
| 168 | A8E0..A8FF; Devanagari Extended | ||
| 169 | A900..A92F; Kayah Li | ||
| 170 | A930..A95F; Rejang | ||
| 171 | A960..A97F; Hangul Jamo Extended-A | ||
| 172 | A980..A9DF; Javanese | ||
| 173 | A9E0..A9FF; Myanmar Extended-B | ||
| 174 | AA00..AA5F; Cham | ||
| 175 | AA60..AA7F; Myanmar Extended-A | ||
| 176 | AA80..AADF; Tai Viet | ||
| 177 | AAE0..AAFF; Meetei Mayek Extensions | ||
| 178 | AB00..AB2F; Ethiopic Extended-A | ||
| 179 | AB30..AB6F; Latin Extended-E | ||
| 180 | AB70..ABBF; Cherokee Supplement | ||
| 181 | ABC0..ABFF; Meetei Mayek | ||
| 182 | AC00..D7AF; Hangul Syllables | ||
| 183 | D7B0..D7FF; Hangul Jamo Extended-B | ||
| 184 | D800..DB7F; High Surrogates | ||
| 185 | DB80..DBFF; High Private Use Surrogates | ||
| 186 | DC00..DFFF; Low Surrogates | ||
| 187 | E000..F8FF; Private Use Area | ||
| 188 | F900..FAFF; CJK Compatibility Ideographs | ||
| 189 | FB00..FB4F; Alphabetic Presentation Forms | ||
| 190 | FB50..FDFF; Arabic Presentation Forms-A | ||
| 191 | FE00..FE0F; Variation Selectors | ||
| 192 | FE10..FE1F; Vertical Forms | ||
| 193 | FE20..FE2F; Combining Half Marks | ||
| 194 | FE30..FE4F; CJK Compatibility Forms | ||
| 195 | FE50..FE6F; Small Form Variants | ||
| 196 | FE70..FEFF; Arabic Presentation Forms-B | ||
| 197 | FF00..FFEF; Halfwidth and Fullwidth Forms | ||
| 198 | FFF0..FFFF; Specials | ||
| 199 | 10000..1007F; Linear B Syllabary | ||
| 200 | 10080..100FF; Linear B Ideograms | ||
| 201 | 10100..1013F; Aegean Numbers | ||
| 202 | 10140..1018F; Ancient Greek Numbers | ||
| 203 | 10190..101CF; Ancient Symbols | ||
| 204 | 101D0..101FF; Phaistos Disc | ||
| 205 | 10280..1029F; Lycian | ||
| 206 | 102A0..102DF; Carian | ||
| 207 | 102E0..102FF; Coptic Epact Numbers | ||
| 208 | 10300..1032F; Old Italic | ||
| 209 | 10330..1034F; Gothic | ||
| 210 | 10350..1037F; Old Permic | ||
| 211 | 10380..1039F; Ugaritic | ||
| 212 | 103A0..103DF; Old Persian | ||
| 213 | 10400..1044F; Deseret | ||
| 214 | 10450..1047F; Shavian | ||
| 215 | 10480..104AF; Osmanya | ||
| 216 | 104B0..104FF; Osage | ||
| 217 | 10500..1052F; Elbasan | ||
| 218 | 10530..1056F; Caucasian Albanian | ||
| 219 | 10570..105BF; Vithkuqi | ||
| 220 | 10600..1077F; Linear A | ||
| 221 | 10780..107BF; Latin Extended-F | ||
| 222 | 10800..1083F; Cypriot Syllabary | ||
| 223 | 10840..1085F; Imperial Aramaic | ||
| 224 | 10860..1087F; Palmyrene | ||
| 225 | 10880..108AF; Nabataean | ||
| 226 | 108E0..108FF; Hatran | ||
| 227 | 10900..1091F; Phoenician | ||
| 228 | 10920..1093F; Lydian | ||
| 229 | 10980..1099F; Meroitic Hieroglyphs | ||
| 230 | 109A0..109FF; Meroitic Cursive | ||
| 231 | 10A00..10A5F; Kharoshthi | ||
| 232 | 10A60..10A7F; Old South Arabian | ||
| 233 | 10A80..10A9F; Old North Arabian | ||
| 234 | 10AC0..10AFF; Manichaean | ||
| 235 | 10B00..10B3F; Avestan | ||
| 236 | 10B40..10B5F; Inscriptional Parthian | ||
| 237 | 10B60..10B7F; Inscriptional Pahlavi | ||
| 238 | 10B80..10BAF; Psalter Pahlavi | ||
| 239 | 10C00..10C4F; Old Turkic | ||
| 240 | 10C80..10CFF; Old Hungarian | ||
| 241 | 10D00..10D3F; Hanifi Rohingya | ||
| 242 | 10E60..10E7F; Rumi Numeral Symbols | ||
| 243 | 10E80..10EBF; Yezidi | ||
| 244 | 10EC0..10EFF; Arabic Extended-C | ||
| 245 | 10F00..10F2F; Old Sogdian | ||
| 246 | 10F30..10F6F; Sogdian | ||
| 247 | 10F70..10FAF; Old Uyghur | ||
| 248 | 10FB0..10FDF; Chorasmian | ||
| 249 | 10FE0..10FFF; Elymaic | ||
| 250 | 11000..1107F; Brahmi | ||
| 251 | 11080..110CF; Kaithi | ||
| 252 | 110D0..110FF; Sora Sompeng | ||
| 253 | 11100..1114F; Chakma | ||
| 254 | 11150..1117F; Mahajani | ||
| 255 | 11180..111DF; Sharada | ||
| 256 | 111E0..111FF; Sinhala Archaic Numbers | ||
| 257 | 11200..1124F; Khojki | ||
| 258 | 11280..112AF; Multani | ||
| 259 | 112B0..112FF; Khudawadi | ||
| 260 | 11300..1137F; Grantha | ||
| 261 | 11400..1147F; Newa | ||
| 262 | 11480..114DF; Tirhuta | ||
| 263 | 11580..115FF; Siddham | ||
| 264 | 11600..1165F; Modi | ||
| 265 | 11660..1167F; Mongolian Supplement | ||
| 266 | 11680..116CF; Takri | ||
| 267 | 11700..1174F; Ahom | ||
| 268 | 11800..1184F; Dogra | ||
| 269 | 118A0..118FF; Warang Citi | ||
| 270 | 11900..1195F; Dives Akuru | ||
| 271 | 119A0..119FF; Nandinagari | ||
| 272 | 11A00..11A4F; Zanabazar Square | ||
| 273 | 11A50..11AAF; Soyombo | ||
| 274 | 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A | ||
| 275 | 11AC0..11AFF; Pau Cin Hau | ||
| 276 | 11B00..11B5F; Devanagari Extended-A | ||
| 277 | 11C00..11C6F; Bhaiksuki | ||
| 278 | 11C70..11CBF; Marchen | ||
| 279 | 11D00..11D5F; Masaram Gondi | ||
| 280 | 11D60..11DAF; Gunjala Gondi | ||
| 281 | 11EE0..11EFF; Makasar | ||
| 282 | 11F00..11F5F; Kawi | ||
| 283 | 11FB0..11FBF; Lisu Supplement | ||
| 284 | 11FC0..11FFF; Tamil Supplement | ||
| 285 | 12000..123FF; Cuneiform | ||
| 286 | 12400..1247F; Cuneiform Numbers and Punctuation | ||
| 287 | 12480..1254F; Early Dynastic Cuneiform | ||
| 288 | 12F90..12FFF; Cypro-Minoan | ||
| 289 | 13000..1342F; Egyptian Hieroglyphs | ||
| 290 | 13430..1345F; Egyptian Hieroglyph Format Controls | ||
| 291 | 14400..1467F; Anatolian Hieroglyphs | ||
| 292 | 16800..16A3F; Bamum Supplement | ||
| 293 | 16A40..16A6F; Mro | ||
| 294 | 16A70..16ACF; Tangsa | ||
| 295 | 16AD0..16AFF; Bassa Vah | ||
| 296 | 16B00..16B8F; Pahawh Hmong | ||
| 297 | 16E40..16E9F; Medefaidrin | ||
| 298 | 16F00..16F9F; Miao | ||
| 299 | 16FE0..16FFF; Ideographic Symbols and Punctuation | ||
| 300 | 17000..187FF; Tangut | ||
| 301 | 18800..18AFF; Tangut Components | ||
| 302 | 18B00..18CFF; Khitan Small Script | ||
| 303 | 18D00..18D7F; Tangut Supplement | ||
| 304 | 1AFF0..1AFFF; Kana Extended-B | ||
| 305 | 1B000..1B0FF; Kana Supplement | ||
| 306 | 1B100..1B12F; Kana Extended-A | ||
| 307 | 1B130..1B16F; Small Kana Extension | ||
| 308 | 1B170..1B2FF; Nushu | ||
| 309 | 1BC00..1BC9F; Duployan | ||
| 310 | 1BCA0..1BCAF; Shorthand Format Controls | ||
| 311 | 1CF00..1CFCF; Znamenny Musical Notation | ||
| 312 | 1D000..1D0FF; Byzantine Musical Symbols | ||
| 313 | 1D100..1D1FF; Musical Symbols | ||
| 314 | 1D200..1D24F; Ancient Greek Musical Notation | ||
| 315 | 1D2C0..1D2DF; Kaktovik Numerals | ||
| 316 | 1D2E0..1D2FF; Mayan Numerals | ||
| 317 | 1D300..1D35F; Tai Xuan Jing Symbols | ||
| 318 | 1D360..1D37F; Counting Rod Numerals | ||
| 319 | 1D400..1D7FF; Mathematical Alphanumeric Symbols | ||
| 320 | 1D800..1DAAF; Sutton SignWriting | ||
| 321 | 1DF00..1DFFF; Latin Extended-G | ||
| 322 | 1E000..1E02F; Glagolitic Supplement | ||
| 323 | 1E030..1E08F; Cyrillic Extended-D | ||
| 324 | 1E100..1E14F; Nyiakeng Puachue Hmong | ||
| 325 | 1E290..1E2BF; Toto | ||
| 326 | 1E2C0..1E2FF; Wancho | ||
| 327 | 1E4D0..1E4FF; Nag Mundari | ||
| 328 | 1E7E0..1E7FF; Ethiopic Extended-B | ||
| 329 | 1E800..1E8DF; Mende Kikakui | ||
| 330 | 1E900..1E95F; Adlam | ||
| 331 | 1EC70..1ECBF; Indic Siyaq Numbers | ||
| 332 | 1ED00..1ED4F; Ottoman Siyaq Numbers | ||
| 333 | 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols | ||
| 334 | 1F000..1F02F; Mahjong Tiles | ||
| 335 | 1F030..1F09F; Domino Tiles | ||
| 336 | 1F0A0..1F0FF; Playing Cards | ||
| 337 | 1F100..1F1FF; Enclosed Alphanumeric Supplement | ||
| 338 | 1F200..1F2FF; Enclosed Ideographic Supplement | ||
| 339 | 1F300..1F5FF; Miscellaneous Symbols and Pictographs | ||
| 340 | 1F600..1F64F; Emoticons | ||
| 341 | 1F650..1F67F; Ornamental Dingbats | ||
| 342 | 1F680..1F6FF; Transport and Map Symbols | ||
| 343 | 1F700..1F77F; Alchemical Symbols | ||
| 344 | 1F780..1F7FF; Geometric Shapes Extended | ||
| 345 | 1F800..1F8FF; Supplemental Arrows-C | ||
| 346 | 1F900..1F9FF; Supplemental Symbols and Pictographs | ||
| 347 | 1FA00..1FA6F; Chess Symbols | ||
| 348 | 1FA70..1FAFF; Symbols and Pictographs Extended-A | ||
| 349 | 1FB00..1FBFF; Symbols for Legacy Computing | ||
| 350 | 20000..2A6DF; CJK Unified Ideographs Extension B | ||
| 351 | 2A700..2B73F; CJK Unified Ideographs Extension C | ||
| 352 | 2B740..2B81F; CJK Unified Ideographs Extension D | ||
| 353 | 2B820..2CEAF; CJK Unified Ideographs Extension E | ||
| 354 | 2CEB0..2EBEF; CJK Unified Ideographs Extension F | ||
| 355 | 2EBF0..2EE5F; CJK Unified Ideographs Extension I | ||
| 356 | 2F800..2FA1F; CJK Compatibility Ideographs Supplement | ||
| 357 | 30000..3134F; CJK Unified Ideographs Extension G | ||
| 358 | 31350..323AF; CJK Unified Ideographs Extension H | ||
| 359 | E0000..E007F; Tags | ||
| 360 | E0100..E01EF; Variation Selectors Supplement | ||
| 361 | F0000..FFFFF; Supplementary Private Use Area-A | ||
| 362 | 100000..10FFFF; Supplementary Private Use Area-B | ||
| 363 | |||
| 364 | # EOF | ||