From 6013b2ded106521ee9cae6bd77dacbd5254ff763 Mon Sep 17 00:00:00 2001 From: Jose Colon Rodriguez Date: Mon, 19 Feb 2024 09:11:56 -0400 Subject: Cleaned up directory structure --- data/unicode/PropertyAliases.txt | 217 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 217 insertions(+) create mode 100644 data/unicode/PropertyAliases.txt (limited to 'data/unicode/PropertyAliases.txt') diff --git a/data/unicode/PropertyAliases.txt b/data/unicode/PropertyAliases.txt new file mode 100644 index 0000000..686b25a --- /dev/null +++ b/data/unicode/PropertyAliases.txt @@ -0,0 +1,217 @@ +# PropertyAliases-15.1.0.txt +# Date: 2023-08-07, 15:21:34 GMT +# © 2023 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see https://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ +# +# This file contains aliases for properties used in the UCD. +# These names can be used for XML formats of UCD data, for regular-expression +# property tests, and other programmatic textual descriptions of Unicode data. +# +# The names may be translated in appropriate environments, and additional +# aliases may be useful. +# +# FORMAT +# +# Each line has two or more fields, separated by semicolons. +# +# First Field: The first field is the short name for the property. +# It is typically an abbreviation, but in a number of cases it is simply +# a duplicate of the "long name" in the second field. +# For Unihan database tags, the short name is actually a longer string than +# the tag specified in the second field. +# +# Second Field: The second field is the long name for the property, +# typically the formal name used in documentation about the property. +# +# The above are the preferred aliases. Other aliases may be listed in additional fields. +# +# Loose matching should be applied to all property names and property values, with +# the exception of String Property values. With loose matching of property names and +# values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property +# values, numeric equivalencies are applied: thus "01.00" is equivalent to "1". +# +# NOTE: Property value names are NOT unique across properties. For example: +# +# AL means Arabic Letter for the Bidi_Class property, and +# AL means Above_Left for the Combining_Class property, and +# AL means Alphabetic for the Line_Break property. +# +# In addition, some property names may be the same as some property value names. +# For example: +# +# sc means the Script property, and +# Sc means the General_Category property value Currency_Symbol (Sc) +# +# The combination of property value and property name is, however, unique. +# +# For more information, see UAX #44, Unicode Character Database, and +# UTS #18, Unicode Regular Expressions. +# ================================================ + + +# ================================================ +# Numeric Properties +# ================================================ +cjkAccountingNumeric ; kAccountingNumeric +cjkOtherNumeric ; kOtherNumeric +cjkPrimaryNumeric ; kPrimaryNumeric +nv ; Numeric_Value + +# ================================================ +# String Properties +# ================================================ +bmg ; Bidi_Mirroring_Glyph +bpb ; Bidi_Paired_Bracket +cf ; Case_Folding +cjkCompatibilityVariant ; kCompatibilityVariant +dm ; Decomposition_Mapping +EqUIdeo ; Equivalent_Unified_Ideograph +FC_NFKC ; FC_NFKC_Closure +lc ; Lowercase_Mapping +NFKC_CF ; NFKC_Casefold +NFKC_SCF ; NFKC_Simple_Casefold +scf ; Simple_Case_Folding ; sfc +slc ; Simple_Lowercase_Mapping +stc ; Simple_Titlecase_Mapping +suc ; Simple_Uppercase_Mapping +tc ; Titlecase_Mapping +uc ; Uppercase_Mapping + +# ================================================ +# Miscellaneous Properties +# ================================================ +cjkIICore ; kIICore +cjkIRG_GSource ; kIRG_GSource +cjkIRG_HSource ; kIRG_HSource +cjkIRG_JSource ; kIRG_JSource +cjkIRG_KPSource ; kIRG_KPSource +cjkIRG_KSource ; kIRG_KSource +cjkIRG_MSource ; kIRG_MSource +cjkIRG_SSource ; kIRG_SSource +cjkIRG_TSource ; kIRG_TSource +cjkIRG_UKSource ; kIRG_UKSource +cjkIRG_USource ; kIRG_USource +cjkIRG_VSource ; kIRG_VSource +cjkRSUnicode ; kRSUnicode ; Unicode_Radical_Stroke; URS +isc ; ISO_Comment +JSN ; Jamo_Short_Name +na ; Name +na1 ; Unicode_1_Name +Name_Alias ; Name_Alias +scx ; Script_Extensions + +# ================================================ +# Catalog Properties +# ================================================ +age ; Age +blk ; Block +sc ; Script + +# ================================================ +# Enumerated Properties +# ================================================ +bc ; Bidi_Class +bpt ; Bidi_Paired_Bracket_Type +ccc ; Canonical_Combining_Class +dt ; Decomposition_Type +ea ; East_Asian_Width +gc ; General_Category +GCB ; Grapheme_Cluster_Break +hst ; Hangul_Syllable_Type +InCB ; Indic_Conjunct_Break +InPC ; Indic_Positional_Category +InSC ; Indic_Syllabic_Category +jg ; Joining_Group +jt ; Joining_Type +lb ; Line_Break +NFC_QC ; NFC_Quick_Check +NFD_QC ; NFD_Quick_Check +NFKC_QC ; NFKC_Quick_Check +NFKD_QC ; NFKD_Quick_Check +nt ; Numeric_Type +SB ; Sentence_Break +vo ; Vertical_Orientation +WB ; Word_Break + +# ================================================ +# Binary Properties +# ================================================ +AHex ; ASCII_Hex_Digit +Alpha ; Alphabetic +Bidi_C ; Bidi_Control +Bidi_M ; Bidi_Mirrored +Cased ; Cased +CE ; Composition_Exclusion +CI ; Case_Ignorable +Comp_Ex ; Full_Composition_Exclusion +CWCF ; Changes_When_Casefolded +CWCM ; Changes_When_Casemapped +CWKCF ; Changes_When_NFKC_Casefolded +CWL ; Changes_When_Lowercased +CWT ; Changes_When_Titlecased +CWU ; Changes_When_Uppercased +Dash ; Dash +Dep ; Deprecated +DI ; Default_Ignorable_Code_Point +Dia ; Diacritic +EBase ; Emoji_Modifier_Base +EComp ; Emoji_Component +EMod ; Emoji_Modifier +Emoji ; Emoji +EPres ; Emoji_Presentation +Ext ; Extender +ExtPict ; Extended_Pictographic +Gr_Base ; Grapheme_Base +Gr_Ext ; Grapheme_Extend +Gr_Link ; Grapheme_Link +Hex ; Hex_Digit +Hyphen ; Hyphen +ID_Compat_Math_Continue ; ID_Compat_Math_Continue +ID_Compat_Math_Start ; ID_Compat_Math_Start +IDC ; ID_Continue +Ideo ; Ideographic +IDS ; ID_Start +IDSB ; IDS_Binary_Operator +IDST ; IDS_Trinary_Operator +IDSU ; IDS_Unary_Operator +Join_C ; Join_Control +LOE ; Logical_Order_Exception +Lower ; Lowercase +Math ; Math +NChar ; Noncharacter_Code_Point +OAlpha ; Other_Alphabetic +ODI ; Other_Default_Ignorable_Code_Point +OGr_Ext ; Other_Grapheme_Extend +OIDC ; Other_ID_Continue +OIDS ; Other_ID_Start +OLower ; Other_Lowercase +OMath ; Other_Math +OUpper ; Other_Uppercase +Pat_Syn ; Pattern_Syntax +Pat_WS ; Pattern_White_Space +PCM ; Prepended_Concatenation_Mark +QMark ; Quotation_Mark +Radical ; Radical +RI ; Regional_Indicator +SD ; Soft_Dotted +STerm ; Sentence_Terminal +Term ; Terminal_Punctuation +UIdeo ; Unified_Ideograph +Upper ; Uppercase +VS ; Variation_Selector +WSpace ; White_Space ; space +XIDC ; XID_Continue +XIDS ; XID_Start +XO_NFC ; Expands_On_NFC +XO_NFD ; Expands_On_NFD +XO_NFKC ; Expands_On_NFKC +XO_NFKD ; Expands_On_NFKD + +# ================================================ +# Total: 134 + +# EOF -- cgit v1.2.3