summaryrefslogtreecommitdiff
path: root/data/unicode/PropertyAliases.txt
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-02-19 09:11:56 -0400
committerGravatar Jose Colon Rodriguez2024-02-19 09:11:56 -0400
commit6013b2ded106521ee9cae6bd77dacbd5254ff763 (patch)
tree990f13cfbe4bfc20a08d2f097c4646984bffb565 /data/unicode/PropertyAliases.txt
parentTried SIMD lower/upper string. Slower than linear. (diff)
downloadzg-6013b2ded106521ee9cae6bd77dacbd5254ff763.tar.gz
zg-6013b2ded106521ee9cae6bd77dacbd5254ff763.tar.xz
zg-6013b2ded106521ee9cae6bd77dacbd5254ff763.zip
Cleaned up directory structure
Diffstat (limited to 'data/unicode/PropertyAliases.txt')
-rw-r--r--data/unicode/PropertyAliases.txt217
1 files changed, 217 insertions, 0 deletions
diff --git a/data/unicode/PropertyAliases.txt b/data/unicode/PropertyAliases.txt
new file mode 100644
index 0000000..686b25a
--- /dev/null
+++ b/data/unicode/PropertyAliases.txt
@@ -0,0 +1,217 @@
1# PropertyAliases-15.1.0.txt
2# Date: 2023-08-07, 15:21:34 GMT
3# © 2023 Unicode®, Inc.
4# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
5# For terms of use, see https://www.unicode.org/terms_of_use.html
6#
7# Unicode Character Database
8# For documentation, see https://www.unicode.org/reports/tr44/
9#
10# This file contains aliases for properties used in the UCD.
11# These names can be used for XML formats of UCD data, for regular-expression
12# property tests, and other programmatic textual descriptions of Unicode data.
13#
14# The names may be translated in appropriate environments, and additional
15# aliases may be useful.
16#
17# FORMAT
18#
19# Each line has two or more fields, separated by semicolons.
20#
21# First Field: The first field is the short name for the property.
22# It is typically an abbreviation, but in a number of cases it is simply
23# a duplicate of the "long name" in the second field.
24# For Unihan database tags, the short name is actually a longer string than
25# the tag specified in the second field.
26#
27# Second Field: The second field is the long name for the property,
28# typically the formal name used in documentation about the property.
29#
30# The above are the preferred aliases. Other aliases may be listed in additional fields.
31#
32# Loose matching should be applied to all property names and property values, with
33# the exception of String Property values. With loose matching of property names and
34# values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property
35# values, numeric equivalencies are applied: thus "01.00" is equivalent to "1".
36#
37# NOTE: Property value names are NOT unique across properties. For example:
38#
39# AL means Arabic Letter for the Bidi_Class property, and
40# AL means Above_Left for the Combining_Class property, and
41# AL means Alphabetic for the Line_Break property.
42#
43# In addition, some property names may be the same as some property value names.
44# For example:
45#
46# sc means the Script property, and
47# Sc means the General_Category property value Currency_Symbol (Sc)
48#
49# The combination of property value and property name is, however, unique.
50#
51# For more information, see UAX #44, Unicode Character Database, and
52# UTS #18, Unicode Regular Expressions.
53# ================================================
54
55
56# ================================================
57# Numeric Properties
58# ================================================
59cjkAccountingNumeric ; kAccountingNumeric
60cjkOtherNumeric ; kOtherNumeric
61cjkPrimaryNumeric ; kPrimaryNumeric
62nv ; Numeric_Value
63
64# ================================================
65# String Properties
66# ================================================
67bmg ; Bidi_Mirroring_Glyph
68bpb ; Bidi_Paired_Bracket
69cf ; Case_Folding
70cjkCompatibilityVariant ; kCompatibilityVariant
71dm ; Decomposition_Mapping
72EqUIdeo ; Equivalent_Unified_Ideograph
73FC_NFKC ; FC_NFKC_Closure
74lc ; Lowercase_Mapping
75NFKC_CF ; NFKC_Casefold
76NFKC_SCF ; NFKC_Simple_Casefold
77scf ; Simple_Case_Folding ; sfc
78slc ; Simple_Lowercase_Mapping
79stc ; Simple_Titlecase_Mapping
80suc ; Simple_Uppercase_Mapping
81tc ; Titlecase_Mapping
82uc ; Uppercase_Mapping
83
84# ================================================
85# Miscellaneous Properties
86# ================================================
87cjkIICore ; kIICore
88cjkIRG_GSource ; kIRG_GSource
89cjkIRG_HSource ; kIRG_HSource
90cjkIRG_JSource ; kIRG_JSource
91cjkIRG_KPSource ; kIRG_KPSource
92cjkIRG_KSource ; kIRG_KSource
93cjkIRG_MSource ; kIRG_MSource
94cjkIRG_SSource ; kIRG_SSource
95cjkIRG_TSource ; kIRG_TSource
96cjkIRG_UKSource ; kIRG_UKSource
97cjkIRG_USource ; kIRG_USource
98cjkIRG_VSource ; kIRG_VSource
99cjkRSUnicode ; kRSUnicode ; Unicode_Radical_Stroke; URS
100isc ; ISO_Comment
101JSN ; Jamo_Short_Name
102na ; Name
103na1 ; Unicode_1_Name
104Name_Alias ; Name_Alias
105scx ; Script_Extensions
106
107# ================================================
108# Catalog Properties
109# ================================================
110age ; Age
111blk ; Block
112sc ; Script
113
114# ================================================
115# Enumerated Properties
116# ================================================
117bc ; Bidi_Class
118bpt ; Bidi_Paired_Bracket_Type
119ccc ; Canonical_Combining_Class
120dt ; Decomposition_Type
121ea ; East_Asian_Width
122gc ; General_Category
123GCB ; Grapheme_Cluster_Break
124hst ; Hangul_Syllable_Type
125InCB ; Indic_Conjunct_Break
126InPC ; Indic_Positional_Category
127InSC ; Indic_Syllabic_Category
128jg ; Joining_Group
129jt ; Joining_Type
130lb ; Line_Break
131NFC_QC ; NFC_Quick_Check
132NFD_QC ; NFD_Quick_Check
133NFKC_QC ; NFKC_Quick_Check
134NFKD_QC ; NFKD_Quick_Check
135nt ; Numeric_Type
136SB ; Sentence_Break
137vo ; Vertical_Orientation
138WB ; Word_Break
139
140# ================================================
141# Binary Properties
142# ================================================
143AHex ; ASCII_Hex_Digit
144Alpha ; Alphabetic
145Bidi_C ; Bidi_Control
146Bidi_M ; Bidi_Mirrored
147Cased ; Cased
148CE ; Composition_Exclusion
149CI ; Case_Ignorable
150Comp_Ex ; Full_Composition_Exclusion
151CWCF ; Changes_When_Casefolded
152CWCM ; Changes_When_Casemapped
153CWKCF ; Changes_When_NFKC_Casefolded
154CWL ; Changes_When_Lowercased
155CWT ; Changes_When_Titlecased
156CWU ; Changes_When_Uppercased
157Dash ; Dash
158Dep ; Deprecated
159DI ; Default_Ignorable_Code_Point
160Dia ; Diacritic
161EBase ; Emoji_Modifier_Base
162EComp ; Emoji_Component
163EMod ; Emoji_Modifier
164Emoji ; Emoji
165EPres ; Emoji_Presentation
166Ext ; Extender
167ExtPict ; Extended_Pictographic
168Gr_Base ; Grapheme_Base
169Gr_Ext ; Grapheme_Extend
170Gr_Link ; Grapheme_Link
171Hex ; Hex_Digit
172Hyphen ; Hyphen
173ID_Compat_Math_Continue ; ID_Compat_Math_Continue
174ID_Compat_Math_Start ; ID_Compat_Math_Start
175IDC ; ID_Continue
176Ideo ; Ideographic
177IDS ; ID_Start
178IDSB ; IDS_Binary_Operator
179IDST ; IDS_Trinary_Operator
180IDSU ; IDS_Unary_Operator
181Join_C ; Join_Control
182LOE ; Logical_Order_Exception
183Lower ; Lowercase
184Math ; Math
185NChar ; Noncharacter_Code_Point
186OAlpha ; Other_Alphabetic
187ODI ; Other_Default_Ignorable_Code_Point
188OGr_Ext ; Other_Grapheme_Extend
189OIDC ; Other_ID_Continue
190OIDS ; Other_ID_Start
191OLower ; Other_Lowercase
192OMath ; Other_Math
193OUpper ; Other_Uppercase
194Pat_Syn ; Pattern_Syntax
195Pat_WS ; Pattern_White_Space
196PCM ; Prepended_Concatenation_Mark
197QMark ; Quotation_Mark
198Radical ; Radical
199RI ; Regional_Indicator
200SD ; Soft_Dotted
201STerm ; Sentence_Terminal
202Term ; Terminal_Punctuation
203UIdeo ; Unified_Ideograph
204Upper ; Uppercase
205VS ; Variation_Selector
206WSpace ; White_Space ; space
207XIDC ; XID_Continue
208XIDS ; XID_Start
209XO_NFC ; Expands_On_NFC
210XO_NFD ; Expands_On_NFD
211XO_NFKC ; Expands_On_NFKC
212XO_NFKD ; Expands_On_NFKD
213
214# ================================================
215# Total: 134
216
217# EOF