diff options
| author | 2024-02-20 09:13:36 -0400 | |
|---|---|---|
| committer | 2024-02-20 09:13:36 -0400 | |
| commit | 134a7df8206aa66ca7b0abbc7f31f08410b502d2 (patch) | |
| tree | e3c520dce4f529de5290bc145847e78fb5583bee /norm_notes.txt | |
| parent | Cleaned up directory structure (diff) | |
| download | zg-134a7df8206aa66ca7b0abbc7f31f08410b502d2.tar.gz zg-134a7df8206aa66ca7b0abbc7f31f08410b502d2.tar.xz zg-134a7df8206aa66ca7b0abbc7f31f08410b502d2.zip | |
Replaced ccc_map with table. 20ms faster
Diffstat (limited to 'norm_notes.txt')
| -rw-r--r-- | norm_notes.txt | 42 |
1 files changed, 42 insertions, 0 deletions
diff --git a/norm_notes.txt b/norm_notes.txt new file mode 100644 index 0000000..ae78ea6 --- /dev/null +++ b/norm_notes.txt | |||
| @@ -0,0 +1,42 @@ | |||
| 1 | * ASCII (0x0..0x7f) needs no normalization. | ||
| 2 | * Latin-1 (0x0..0xff) needs no NFC normalization. | ||
| 3 | * Composition exclusions cannot appear in any | ||
| 4 | normalized string of any normalization form. | ||
| 5 | * Singleton decompositions are excluded from the | ||
| 6 | composition algorithm. | ||
| 7 | * Non-starter decompositions are excluded from the | ||
| 8 | composition algorithm. | ||
| 9 | * There are no Quick Check MAYBE values for NFD and NFKD. | ||
| 10 | * Combining Class Code 255 is available as a flag. | ||
| 11 | * Sample Java Quick Check code: | ||
| 12 | |||
| 13 | public int quickCheck(String source) { | ||
| 14 | short lastCanonicalClass = 0; | ||
| 15 | int result = YES; | ||
| 16 | for (int i = 0; i < source.length(); ++i) { | ||
| 17 | int ch = source.codepointAt(i); | ||
| 18 | if (Character.isSupplementaryCodePoint(ch)) ++i; | ||
| 19 | short canonicalClass = getCanonicalClass(ch); | ||
| 20 | if (lastCanonicalClass > canonicalClass && canonicalClass != 0) { | ||
| 21 | return NO; } | ||
| 22 | int check = isAllowed(ch); | ||
| 23 | if (check == NO) return NO; | ||
| 24 | if (check == MAYBE) result = MAYBE; | ||
| 25 | lastCanonicalClass = canonicalClass; | ||
| 26 | } | ||
| 27 | return result; | ||
| 28 | } | ||
| 29 | |||
| 30 | * No string when decomposed with NFC expands to more than 3× | ||
| 31 | in length (measured in code units). | ||
| 32 | * When concatenating normalized strings, re-normalize from the | ||
| 33 | last code point in string A with Quick_Check=YES and | ||
| 34 | Canonical_Combining_Class=0 to the first code point in string B | ||
| 35 | with Quick_Check=YES and Canonical_Combining_Class=0. | ||
| 36 | * If requiring Stream Safe Format strings, a 128 byte buffer is all | ||
| 37 | that's needed to normalize. | ||
| 38 | |||
| 39 | * Flags: | ||
| 40 | - Combining Class | ||
| 41 | - Hangul Syllable Type | ||
| 42 | - Full Composition Exclusion | ||