1 files changed, 63 insertions, 0 deletions
diff --git a/data/unicode/NamedSequencesProv.txt b/data/unicode/NamedSequencesProv.txt
new file mode 100644
index 0000000..7bea43c
--- /dev/null
+++ b/data/unicode/NamedSequencesProv.txt
@@ -0,0 +1,63 @@
+# NamedSequencesProv-15.1.0.txt
+# Date: 2023-01-05
+# © 2023 Unicode®, Inc.
+# For terms of use, see https://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+# For documentation, see https://www.unicode.org/reports/tr44/
+#
+# Provisional Unicode Named Character Sequences
+#
+# Note: This data file contains those named character
+#   sequences which have been designated to be provisional,
+#   rather than fully approved.
+#
+# Format:
+# Name of Sequence; Code Point Sequence for USI
+#
+# Code point sequences in the Unicode Character Database
+# use spaces as delimiters. The corresponding format for a
+# UCS Sequence Identifier (USI) in ISO/IEC 10646 uses
+# comma delimitation and angle brackets. Thus, a Unicode
+# named character sequence of the form:
+#
+# EXAMPLE NAME;1000 1001 1002
+#
+# in this data file, would correspond to an ISO/IEC 10646 USI
+# as follows:
+#
+# <1000, 1001, 1002>
+#
+# For more information, see UAX #34: Unicode Named Character
+# Sequences, at https://www.unicode.org/reports/tr34/
+#
+# Note: The order of entries in this file is not significant.
+# However, entries are generally in script order corresponding
+# to block order in the Unicode Standard, to make it easier
+# to find entries currently in the list.
+# ================================================
+# Provisional entries for NamedSequences.txt.
+# No provisional entries are currently defined.
+# ================================================
+# Entries from Unicode 4.1.0 version of NamedSequences.txt,
+# subsequently disapproved because of potential errors in
+# representation.
+# GURMUKHI HALF YA;0A2F 0A4D
+# GURMUKHI PARI YA;0A4D 0A2F
+# Entry removed 2006-05-18:
+#
+# LATIN SMALL LETTER A WITH ACUTE AND OGONEK;00E1 0328
+#
+# This entry was removed because the sequence was not in NFC,
+# as required. It was replaced with the NFC version of
+# the sequence, based on the Lithuanian additions accepted
+# for Unicode 5.0.
+# EOF

diff --git a/data/unicode/NamedSequencesProv.txt b/data/unicode/NamedSequencesProv.txt new file mode 100644 index 0000000..7bea43c --- /dev/null +++ b/data/unicode/NamedSequencesProv.txt
@@ -0,0 +1,63 @@
	1	# NamedSequencesProv-15.1.0.txt
	2	# Date: 2023-01-05
	3	# © 2023 Unicode®, Inc.
	4	# For terms of use, see https://www.unicode.org/terms_of_use.html
	5	#
	6	# Unicode Character Database
	7	# For documentation, see https://www.unicode.org/reports/tr44/
	8	#
	9	# Provisional Unicode Named Character Sequences
	10	#
	11	# Note: This data file contains those named character
	12	# sequences which have been designated to be provisional,
	13	# rather than fully approved.
	14	#
	15	# Format:
	16	# Name of Sequence; Code Point Sequence for USI
	17	#
	18	# Code point sequences in the Unicode Character Database
	19	# use spaces as delimiters. The corresponding format for a
	20	# UCS Sequence Identifier (USI) in ISO/IEC 10646 uses
	21	# comma delimitation and angle brackets. Thus, a Unicode
	22	# named character sequence of the form:
	23	#
	24	# EXAMPLE NAME;1000 1001 1002
	25	#
	26	# in this data file, would correspond to an ISO/IEC 10646 USI
	27	# as follows:
	28	#
	29	# <1000, 1001, 1002>
	30	#
	31	# For more information, see UAX #34: Unicode Named Character
	32	# Sequences, at https://www.unicode.org/reports/tr34/
	33	#
	34	# Note: The order of entries in this file is not significant.
	35	# However, entries are generally in script order corresponding
	36	# to block order in the Unicode Standard, to make it easier
	37	# to find entries currently in the list.
	38
	39	# ================================================
	40
	41	# Provisional entries for NamedSequences.txt.
	42
	43	# No provisional entries are currently defined.
	44
	45	# ================================================
	46
	47	# Entries from Unicode 4.1.0 version of NamedSequences.txt,
	48	# subsequently disapproved because of potential errors in
	49	# representation.
	50
	51	# GURMUKHI HALF YA;0A2F 0A4D
	52	# GURMUKHI PARI YA;0A4D 0A2F
	53
	54	# Entry removed 2006-05-18:
	55	#
	56	# LATIN SMALL LETTER A WITH ACUTE AND OGONEK;00E1 0328
	57	#
	58	# This entry was removed because the sequence was not in NFC,
	59	# as required. It was replaced with the NFC version of
	60	# the sequence, based on the Lithuanian additions accepted
	61	# for Unicode 5.0.
	62
	63	# EOF