diff options
| author | 2024-02-19 09:11:56 -0400 | |
|---|---|---|
| committer | 2024-02-19 09:11:56 -0400 | |
| commit | 6013b2ded106521ee9cae6bd77dacbd5254ff763 (patch) | |
| tree | 990f13cfbe4bfc20a08d2f097c4646984bffb565 /data/unicode/PropertyAliases.txt | |
| parent | Tried SIMD lower/upper string. Slower than linear. (diff) | |
| download | zg-6013b2ded106521ee9cae6bd77dacbd5254ff763.tar.gz zg-6013b2ded106521ee9cae6bd77dacbd5254ff763.tar.xz zg-6013b2ded106521ee9cae6bd77dacbd5254ff763.zip | |
Cleaned up directory structure
Diffstat (limited to 'data/unicode/PropertyAliases.txt')
| -rw-r--r-- | data/unicode/PropertyAliases.txt | 217 |
1 files changed, 217 insertions, 0 deletions
diff --git a/data/unicode/PropertyAliases.txt b/data/unicode/PropertyAliases.txt new file mode 100644 index 0000000..686b25a --- /dev/null +++ b/data/unicode/PropertyAliases.txt | |||
| @@ -0,0 +1,217 @@ | |||
| 1 | # PropertyAliases-15.1.0.txt | ||
| 2 | # Date: 2023-08-07, 15:21:34 GMT | ||
| 3 | # © 2023 Unicode®, Inc. | ||
| 4 | # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. | ||
| 5 | # For terms of use, see https://www.unicode.org/terms_of_use.html | ||
| 6 | # | ||
| 7 | # Unicode Character Database | ||
| 8 | # For documentation, see https://www.unicode.org/reports/tr44/ | ||
| 9 | # | ||
| 10 | # This file contains aliases for properties used in the UCD. | ||
| 11 | # These names can be used for XML formats of UCD data, for regular-expression | ||
| 12 | # property tests, and other programmatic textual descriptions of Unicode data. | ||
| 13 | # | ||
| 14 | # The names may be translated in appropriate environments, and additional | ||
| 15 | # aliases may be useful. | ||
| 16 | # | ||
| 17 | # FORMAT | ||
| 18 | # | ||
| 19 | # Each line has two or more fields, separated by semicolons. | ||
| 20 | # | ||
| 21 | # First Field: The first field is the short name for the property. | ||
| 22 | # It is typically an abbreviation, but in a number of cases it is simply | ||
| 23 | # a duplicate of the "long name" in the second field. | ||
| 24 | # For Unihan database tags, the short name is actually a longer string than | ||
| 25 | # the tag specified in the second field. | ||
| 26 | # | ||
| 27 | # Second Field: The second field is the long name for the property, | ||
| 28 | # typically the formal name used in documentation about the property. | ||
| 29 | # | ||
| 30 | # The above are the preferred aliases. Other aliases may be listed in additional fields. | ||
| 31 | # | ||
| 32 | # Loose matching should be applied to all property names and property values, with | ||
| 33 | # the exception of String Property values. With loose matching of property names and | ||
| 34 | # values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property | ||
| 35 | # values, numeric equivalencies are applied: thus "01.00" is equivalent to "1". | ||
| 36 | # | ||
| 37 | # NOTE: Property value names are NOT unique across properties. For example: | ||
| 38 | # | ||
| 39 | # AL means Arabic Letter for the Bidi_Class property, and | ||
| 40 | # AL means Above_Left for the Combining_Class property, and | ||
| 41 | # AL means Alphabetic for the Line_Break property. | ||
| 42 | # | ||
| 43 | # In addition, some property names may be the same as some property value names. | ||
| 44 | # For example: | ||
| 45 | # | ||
| 46 | # sc means the Script property, and | ||
| 47 | # Sc means the General_Category property value Currency_Symbol (Sc) | ||
| 48 | # | ||
| 49 | # The combination of property value and property name is, however, unique. | ||
| 50 | # | ||
| 51 | # For more information, see UAX #44, Unicode Character Database, and | ||
| 52 | # UTS #18, Unicode Regular Expressions. | ||
| 53 | # ================================================ | ||
| 54 | |||
| 55 | |||
| 56 | # ================================================ | ||
| 57 | # Numeric Properties | ||
| 58 | # ================================================ | ||
| 59 | cjkAccountingNumeric ; kAccountingNumeric | ||
| 60 | cjkOtherNumeric ; kOtherNumeric | ||
| 61 | cjkPrimaryNumeric ; kPrimaryNumeric | ||
| 62 | nv ; Numeric_Value | ||
| 63 | |||
| 64 | # ================================================ | ||
| 65 | # String Properties | ||
| 66 | # ================================================ | ||
| 67 | bmg ; Bidi_Mirroring_Glyph | ||
| 68 | bpb ; Bidi_Paired_Bracket | ||
| 69 | cf ; Case_Folding | ||
| 70 | cjkCompatibilityVariant ; kCompatibilityVariant | ||
| 71 | dm ; Decomposition_Mapping | ||
| 72 | EqUIdeo ; Equivalent_Unified_Ideograph | ||
| 73 | FC_NFKC ; FC_NFKC_Closure | ||
| 74 | lc ; Lowercase_Mapping | ||
| 75 | NFKC_CF ; NFKC_Casefold | ||
| 76 | NFKC_SCF ; NFKC_Simple_Casefold | ||
| 77 | scf ; Simple_Case_Folding ; sfc | ||
| 78 | slc ; Simple_Lowercase_Mapping | ||
| 79 | stc ; Simple_Titlecase_Mapping | ||
| 80 | suc ; Simple_Uppercase_Mapping | ||
| 81 | tc ; Titlecase_Mapping | ||
| 82 | uc ; Uppercase_Mapping | ||
| 83 | |||
| 84 | # ================================================ | ||
| 85 | # Miscellaneous Properties | ||
| 86 | # ================================================ | ||
| 87 | cjkIICore ; kIICore | ||
| 88 | cjkIRG_GSource ; kIRG_GSource | ||
| 89 | cjkIRG_HSource ; kIRG_HSource | ||
| 90 | cjkIRG_JSource ; kIRG_JSource | ||
| 91 | cjkIRG_KPSource ; kIRG_KPSource | ||
| 92 | cjkIRG_KSource ; kIRG_KSource | ||
| 93 | cjkIRG_MSource ; kIRG_MSource | ||
| 94 | cjkIRG_SSource ; kIRG_SSource | ||
| 95 | cjkIRG_TSource ; kIRG_TSource | ||
| 96 | cjkIRG_UKSource ; kIRG_UKSource | ||
| 97 | cjkIRG_USource ; kIRG_USource | ||
| 98 | cjkIRG_VSource ; kIRG_VSource | ||
| 99 | cjkRSUnicode ; kRSUnicode ; Unicode_Radical_Stroke; URS | ||
| 100 | isc ; ISO_Comment | ||
| 101 | JSN ; Jamo_Short_Name | ||
| 102 | na ; Name | ||
| 103 | na1 ; Unicode_1_Name | ||
| 104 | Name_Alias ; Name_Alias | ||
| 105 | scx ; Script_Extensions | ||
| 106 | |||
| 107 | # ================================================ | ||
| 108 | # Catalog Properties | ||
| 109 | # ================================================ | ||
| 110 | age ; Age | ||
| 111 | blk ; Block | ||
| 112 | sc ; Script | ||
| 113 | |||
| 114 | # ================================================ | ||
| 115 | # Enumerated Properties | ||
| 116 | # ================================================ | ||
| 117 | bc ; Bidi_Class | ||
| 118 | bpt ; Bidi_Paired_Bracket_Type | ||
| 119 | ccc ; Canonical_Combining_Class | ||
| 120 | dt ; Decomposition_Type | ||
| 121 | ea ; East_Asian_Width | ||
| 122 | gc ; General_Category | ||
| 123 | GCB ; Grapheme_Cluster_Break | ||
| 124 | hst ; Hangul_Syllable_Type | ||
| 125 | InCB ; Indic_Conjunct_Break | ||
| 126 | InPC ; Indic_Positional_Category | ||
| 127 | InSC ; Indic_Syllabic_Category | ||
| 128 | jg ; Joining_Group | ||
| 129 | jt ; Joining_Type | ||
| 130 | lb ; Line_Break | ||
| 131 | NFC_QC ; NFC_Quick_Check | ||
| 132 | NFD_QC ; NFD_Quick_Check | ||
| 133 | NFKC_QC ; NFKC_Quick_Check | ||
| 134 | NFKD_QC ; NFKD_Quick_Check | ||
| 135 | nt ; Numeric_Type | ||
| 136 | SB ; Sentence_Break | ||
| 137 | vo ; Vertical_Orientation | ||
| 138 | WB ; Word_Break | ||
| 139 | |||
| 140 | # ================================================ | ||
| 141 | # Binary Properties | ||
| 142 | # ================================================ | ||
| 143 | AHex ; ASCII_Hex_Digit | ||
| 144 | Alpha ; Alphabetic | ||
| 145 | Bidi_C ; Bidi_Control | ||
| 146 | Bidi_M ; Bidi_Mirrored | ||
| 147 | Cased ; Cased | ||
| 148 | CE ; Composition_Exclusion | ||
| 149 | CI ; Case_Ignorable | ||
| 150 | Comp_Ex ; Full_Composition_Exclusion | ||
| 151 | CWCF ; Changes_When_Casefolded | ||
| 152 | CWCM ; Changes_When_Casemapped | ||
| 153 | CWKCF ; Changes_When_NFKC_Casefolded | ||
| 154 | CWL ; Changes_When_Lowercased | ||
| 155 | CWT ; Changes_When_Titlecased | ||
| 156 | CWU ; Changes_When_Uppercased | ||
| 157 | Dash ; Dash | ||
| 158 | Dep ; Deprecated | ||
| 159 | DI ; Default_Ignorable_Code_Point | ||
| 160 | Dia ; Diacritic | ||
| 161 | EBase ; Emoji_Modifier_Base | ||
| 162 | EComp ; Emoji_Component | ||
| 163 | EMod ; Emoji_Modifier | ||
| 164 | Emoji ; Emoji | ||
| 165 | EPres ; Emoji_Presentation | ||
| 166 | Ext ; Extender | ||
| 167 | ExtPict ; Extended_Pictographic | ||
| 168 | Gr_Base ; Grapheme_Base | ||
| 169 | Gr_Ext ; Grapheme_Extend | ||
| 170 | Gr_Link ; Grapheme_Link | ||
| 171 | Hex ; Hex_Digit | ||
| 172 | Hyphen ; Hyphen | ||
| 173 | ID_Compat_Math_Continue ; ID_Compat_Math_Continue | ||
| 174 | ID_Compat_Math_Start ; ID_Compat_Math_Start | ||
| 175 | IDC ; ID_Continue | ||
| 176 | Ideo ; Ideographic | ||
| 177 | IDS ; ID_Start | ||
| 178 | IDSB ; IDS_Binary_Operator | ||
| 179 | IDST ; IDS_Trinary_Operator | ||
| 180 | IDSU ; IDS_Unary_Operator | ||
| 181 | Join_C ; Join_Control | ||
| 182 | LOE ; Logical_Order_Exception | ||
| 183 | Lower ; Lowercase | ||
| 184 | Math ; Math | ||
| 185 | NChar ; Noncharacter_Code_Point | ||
| 186 | OAlpha ; Other_Alphabetic | ||
| 187 | ODI ; Other_Default_Ignorable_Code_Point | ||
| 188 | OGr_Ext ; Other_Grapheme_Extend | ||
| 189 | OIDC ; Other_ID_Continue | ||
| 190 | OIDS ; Other_ID_Start | ||
| 191 | OLower ; Other_Lowercase | ||
| 192 | OMath ; Other_Math | ||
| 193 | OUpper ; Other_Uppercase | ||
| 194 | Pat_Syn ; Pattern_Syntax | ||
| 195 | Pat_WS ; Pattern_White_Space | ||
| 196 | PCM ; Prepended_Concatenation_Mark | ||
| 197 | QMark ; Quotation_Mark | ||
| 198 | Radical ; Radical | ||
| 199 | RI ; Regional_Indicator | ||
| 200 | SD ; Soft_Dotted | ||
| 201 | STerm ; Sentence_Terminal | ||
| 202 | Term ; Terminal_Punctuation | ||
| 203 | UIdeo ; Unified_Ideograph | ||
| 204 | Upper ; Uppercase | ||
| 205 | VS ; Variation_Selector | ||
| 206 | WSpace ; White_Space ; space | ||
| 207 | XIDC ; XID_Continue | ||
| 208 | XIDS ; XID_Start | ||
| 209 | XO_NFC ; Expands_On_NFC | ||
| 210 | XO_NFD ; Expands_On_NFD | ||
| 211 | XO_NFKC ; Expands_On_NFKC | ||
| 212 | XO_NFKD ; Expands_On_NFKD | ||
| 213 | |||
| 214 | # ================================================ | ||
| 215 | # Total: 134 | ||
| 216 | |||
| 217 | # EOF | ||