32 files changed, 606 insertions, 136 deletions
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..d4fc8f6
--- /dev/null
+++ b/README.md
@@ -0,0 +1,537 @@
+# zg
+zg provides Unicode text processing for Zig projects.
+## Unicode Version
+The Unicode version supported by zg is 15.1.0.
+## Zig Version
+The minimum Zig version required is 0.12.0-dev.3496+a2df84d0.
+## Integrating zg into your Zig Project
+You first need to add zg as a dependency in your `build.zig.zon` file:
+```zig
+.zg = .{
+    .url = "https://codeberg.org/dude_the_builder/zg/archive/v0.1.0.tar.gz",
+}
+```
+Then instantiate the dependency in your `build.zig`:
+```zig
+const zg = b.dependency("zg", .{});
+```
+## A Modular Approach
+zg is a modular library. This approach minimizes binary file size and memory
+requirements by only including the Unicode data required for the specified module.
+The following sections describe the various modules and their specific use case.
+## Code Points
+In the `code_point` module, you'll find a data structure representing a single code
+point, `CodePoint`, and an `Iterator` to iterate over the code points in a string.
+In your `build.zig`:
+```zig
+exe.root_module.addImport("code_point", zg.module("code_point"));
+```
+In your code:
+```zig
+const code_point = @import("code_point");
+test "Code point iterator" {
+    const str = "Hi 😊";
+    var iter = code_point.Iterator{ .bytes = str };
+    var i: usize = 0;
+    while (iter.next()) |cp| : (i += 1) {
+        // The `code` field is the actual code point scalar as a `u21`.
+        if (i == 0) try expect(cp.code == 'H');
+        if (i == 1) try expect(cp.code == 'i');
+        if (i == 2) try expect(cp.code == ' ');
+        if (i == 3) {
+            try expect(cp.code == '😊');
+            // The `offset` field is the byte offset in the
+            // source string.
+            try expect(cp.offset == 3);
+            // The `len` field is the length in bytes of the
+            // code point in the source string.
+            try expect(cp.len == 4);
+        }
+    }
+}
+```
+## Grapheme Clusters
+Many characters are composed from more than one code point. These are known as
+Grapheme Clusters and the `grapheme` module has a data structure to represent
+them, `Grapheme`, and an `Iterator` to iterate over them in a string.
+In your `build.zig`:
+```zig
+exe.root_module.addImport("grapheme", zg.module("grapheme"));
+```
+In your code:
+```zig
+const grapheme = @import("grapheme");
+test "Grapheme cluster iterator" {
+    // we need some Unicode data to process Grapheme Clusters.
+    const gd = try grapheme.GraphemeData.init(allocator);
+    defer gd.deinit();
+    const str = "He\u{301}"; // Hé
+    var iter = grapheme.Iterator.init(str, &gd);
+    var i: usize = 0;
+    while (iter.next()) |gc| : (i += 1) {
+        // The `len` field is the length in bytes of the
+        // grapheme cluster in the source string.
+        if (i == 0) try expect(gc.len == 1);
+        if (i == 1) {
+            try expect(gc.len == 3);
+            // The `offset` in bytes of the grapheme cluster
+            // in the source string.
+            try expect(gc.offset == 1);
+            // The `bytes` method returns the slice of bytes
+            // that comprise this grapheme cluster in the
+            // source string `str`.
+            try expectEqualStrings("e\u{301}", gc.bytes(str));
+        }
+    }
+}
+```
+## Unicode General Categories
+To detect the general category for a code point, use the `GenCatData` module.
+In your `build.zig`:
+```zig
+exe.root_module.addImport("GenCatData", zg.module("GenCatData"));
+```
+In your code:
+```zig
+const GenCatData = @import("GenCatData");
+test "General Category" {
+    const gcd = try GenCatData.init(allocator);
+    defer gcd.deinit();
+    // The `gc` method returns the abbreviated General Category.
+    // These abbreviations and descriptive comments can be found
+    // in the source file `src/GenCatData.zig` as en enum.
+    try expect(gcd.gc('A') == .Lu); // Lu: uppercase letter
+    try expect(gcd.gc('3') == .Nd); // Nd: decimal number
+    // The following are convenience methods for groups of General
+    // Categories. For example, all letter categories start with `L`:
+    // Lu, Ll, Lt, Lo.
+    try expect(gcd.isControl(0));
+    try expect(gcd.isLetter('z'));
+    try expect(gcd.isMark('\u{301}'));
+    try expect(gcd.isNumber('3'));
+    try expect(gcd.isPunctuation('['));
+    try expect(gcd.isSeparator(' '));
+    try expect(gcd.isSymbol('©'));
+}
+```
+## Unicode Properties
+You can detect common properties of a code point with the `PropsData` module.
+In your `build.zig`:
+```zig
+exe.root_module.addImport("PropsData", zg.module("PropsData"));
+```
+In your code:
+```zig
+const PropsData = @import("PropsData");
+test "Properties" {
+    const pd = try PropsData.init(allocator);
+    defer pd.deinit();
+    // Mathematical symbols and letters.
+    try expect(pd.isMath('+'));
+    // Alphabetic only code points.
+    try expect(pd.isAlphabetic('Z'));
+    // Space, tab, and other separators.
+    try expect(pd.isWhitespace(' '));
+    // Hexadecimal digits and variations thereof.
+    try expect(pd.isHexDigit('f'));
+    try expect(!pd.isHexDigit('z'));
+    // Accents, dieresis, and other combining marks.
+    try expect(pd.isDiacritic('\u{301}'));
+    // Unicode has a specification for valid identifiers like 
+    // the ones used in programming and regular expressions.
+    try expect(pd.isIdStart('Z')); // Identifier start character
+    try expect(!pd.isIdStart('1'));
+    try expect(pd.isIdContinue('1'));
+    // The `X` versions add some code points that can appear after
+    // normalizing a string.
+    try expect(pd.isXidStart('\u{b33}')); // Extended identifier start character
+    try expect(pd.isXidContinue('\u{e33}'));
+    try expect(!pd.isXidStart('1'));
+    // Note surprising Unicode numeric type properties!
+    try expect(pd.isNumeric('\u{277f}'));
+    try expect(!pd.isNumeric('3')); // 3 is not numeric!
+    try expect(pd.isDigit('\u{2070}'));
+    try expect(!pd.isDigit('3')); // 3 is not a digit!
+    try expect(pd.isDecimal('3')); // 3 is a decimal digit
+}
+```
+## Letter Case Detection and Conversion
+To detect and convert to and from different letter cases, use the `CaseData`
+module.
+In your `build.zig`:
+```zig
+exe.root_module.addImport("CaseData", zg.module("CaseData"));
+```
+In your code:
+```zig
+const CaseData = @import("CaseData");
+test "Case" {
+    const cd = try CaseData.init(allocator);
+    defer cd.deinit();
+    // Upper and lower case.
+    try expect(cd.isUpper('A'));
+    try expect('A' == cd.toUpper('a'));
+    try expect(cd.isLower('a'));
+    try expect('a' == cd.toLower('A'));
+    // Code points that have case.
+    try expect(cd.isCased('É'));
+    try expect(!cd.isCased('3'));
+    // Case detection and conversion for strings.
+    try expect(cd.isUpperStr("HELLO 123!"));
+    const ucased = try cd.toUpperStr(allocator, "hello 123");
+    defer allocator.free(ucased);
+    try expectEqualStrings("HELLO 123", ucased);
+    try expect(cd.isLowerStr("hello 123!"));
+    const lcased = try cd.toLowerStr(allocator, "HELLO 123");
+    defer allocator.free(lcased);
+    try expectEqualStrings("hello 123", lcased);
+}
+```
+## Normalization
+Unicode normalization is the process of converting a string into a uniform 
+representation that can guarantee a known structure by following a strict set
+of rules. There are four normalization forms:
+Canonical Composition (NFC)
+: The most compact representation obtained by first
+decomposing to Canonical Decomposition and then composing to NFC.
+Compatibility Composition (NFKC)
+: The most comprehensive composition obtained
+by first decomposing to Compatibility Decomposition and then composing to NFKC.
+Canonical Decomposition (NFD)
+: Only code points with canonical decompositions
+are decomposed. This is a more compact and faster decomposition but will not 
+provide the most comprehensive normalization possible.
+Compatibility Decomposition (NFKD)
+: The most comprehensive decomposition method
+where both canonical and compatibility decompositions are performed recursively.
+zg has methods to produce all four normalization forms in the `Normalize` module. 
+In your `build.zig`:
+```zig
+exe.root_module.addImport("Normalize", zg.module("Normalize"));
+```
+In your code:
+```zig
+const Normalize = @import("Normalize");
+test "Normalization" {
+    // We need lots of Unicode dta for normalization.
+    var norm_data = try Normalize.NormData.init(allocator);
+    defer norm_data.deinit();
+    // The `Normalize` structure takes a pointer to the data.
+    const n = Normalize{ .norm_data = &norm_data };
+    // NFC: Canonical composition
+    const nfc_result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}");
+    defer nfc_result.deinit();
+    try expectEqualStrings("Complex char: \u{3D3}", nfc_result.slice);
+    // NFKC: Compatibility composition
+    const nfkc_result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}");
+    defer nfkc_result.deinit();
+    try expectEqualStrings("Complex char: \u{038E}", nfkc_result.slice);
+    // NFD: Canonical decomposition
+    const nfd_result = try n.nfd(allocator, "Héllo World! \u{3d3}");
+    defer nfd_result.deinit();
+    try expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", nfd_result.slice);
+    // NFKD: Compatibility decomposition
+    const nfkd_result = try n.nfkd(allocator, "Héllo World! \u{3d3}");
+    defer nfkd_result.deinit();
+    try expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", nfkd_result.slice);
+    // Test for equality of two strings after normalizing to NFC.
+    try expect(try n.eql(allocator, "foé", "foe\u{0301}"));
+    try expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}"));
+}
+```
+## Caseless Matching via Case Folding
+Unicode provides a more efficient way of comparing strings while ignoring letter
+case differences: case folding. When you case fold a string, it's converted into a
+normalized case form suitable for efficient matching. Use the `CaseFold` module
+for this.
+In your `build.zig`:
+```zig
+exe.root_module.addImport("Normalize", zg.module("Normalize"));
+exe.root_module.addImport("CaseFold", zg.module("CaseFold"));
+```
+In your code:
+```zig
+const Normalize = @import("Normalize");
+const CaseFold = @import("CaseFold");
+test "Caseless matching" {
+    // We need to normalize during the matching process.
+    var norm_data = try Normalize.NormData.init(allocator);
+    defer norm_data.deinit();
+    const n = Normalize{ .norm_data = &norm_data };
+    // We need Unicode case fold data.
+    const cfd = try CaseFold.FoldData.init(allocator);
+    defer cfd.deinit();
+    // The `CaseFold` structure takes a pointer to the data.
+    const cf = CaseFold{ .fold_data = &cfd };
+    // `compatCaselessMatch` provides the deepest level of caseless
+    // matching because it decomposes fully to NFKD.
+    const a = "Héllo World! \u{3d3}";
+    const b = "He\u{301}llo World! \u{3a5}\u{301}";
+    try expect(try cf.compatCaselessMatch(allocator, &n, a, b));
+    const c = "He\u{301}llo World! \u{3d2}\u{301}";
+    try expect(try cf.compatCaselessMatch(allocator, &n, a, c));
+    // `canonCaselessMatch` isn't as comprehensive as `compatCaselessMatch`
+    // because it only decomposes to NFD. Naturally, it's faster because of this.
+    try expect(!try cf.canonCaselessMatch(allocator, &n, a, b));
+    try expect(try cf.canonCaselessMatch(allocator, &n, a, c));
+}
+```
+## Display Width of Characters and Strings
+When displaying text with a fixed-width font on a terminal screen, it's very
+important to know exactly how many columns or cells each character should take.
+Most characters will use one column, but there are many, like emoji and East-
+Asian ideographs that need more space. The `DisplayWidth` module provides 
+methods for this purpose. It also has methods that use the display width calculation
+to `center`, `padLeft`, `padRight`, and `wrap` text.
+In your `build.zig`:
+```zig
+exe.root_module.addImport("DisplayWidth", zg.module("DisplayWidth"));
+```
+In your code:
+```zig
+const DisplayWidth = @import("DisplayWidth");
+test "Display width" {
+    // We need Unicode data for display width calculation.
+    const dwd = try DisplayWidth.DisplayWidthData.init(allocator);
+    defer dwd.deinit();
+    // The `DisplayWidth` structure takes a pointer to the data.
+    const dw = DisplayWidth{ .data = &dwd };
+    // String display width
+    try expectEqual(@as(usize, 5), dw.strWidth("Hello\r\n"));
+    try expectEqual(@as(usize, 8), dw.strWidth("Hello 😊"));
+    try expectEqual(@as(usize, 8), dw.strWidth("Héllo 😊"));
+    try expectEqual(@as(usize, 9), dw.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ"));
+    try expectEqual(@as(usize, 17), dw.strWidth("슬라바 우크라이나"));
+    // Centering text
+    const centered = try dw.center(allocator, "w😊w", 10, "-");
+    defer allocator.free(centered);
+    try expectEqualStrings("---w😊w---", centered);
+    // Pad left
+    const right_aligned = try dw.padLeft(allocator, "abc", 9, "*");
+    defer allocator.free(right_aligned);
+    try expectEqualStrings("******abc", right_aligned);
+    // Pad right
+    const left_aligned = try dw.padRight(allocator, "abc", 9, "*");
+    defer allocator.free(left_aligned);
+    try expectEqualStrings("abc******", left_aligned);
+    // Wrap text
+    const input = "The quick brown fox\r\njumped over the lazy dog!";
+    const wrapped = try dw.wrap(allocator, input, 10, 3);
+    defer allocator.free(wrapped);
+    const want =
+        \\The quick 
+        \\brown fox 
+        \\jumped 
+        \\over the 
+        \\lazy dog!
+    ;
+    try expectEqualStrings(want, wrapped);
+}
+```
+## Scripts
+Unicode categorizes code points by the Script in which they belong. A Script 
+collects letters and other symbols that belong to a particular writing system.
+You can detect the Script for a code point with the `ScriptsData` module.
+In your `build.zig`:
+```zig
+exe.root_module.addImport("ScriptsData", zg.module("ScriptsData"));
+```
+In your code:
+```zig
+const ScriptsData = @import("ScriptsData");
+test "Scripts" {
+    const sd = try ScriptsData.init(allocator);
+    defer sd.deinit();
+    // To see the full list of Scripts, look at the
+    // `src/ScriptsData.zig` file. They are list in an enum.
+    try expect(sd.script('A') == .Latin);
+    try expect(sd.script('Ω') == .Greek);
+    try expect(sd.script('צ') == .Hebrew);
+}
+```
+## Relation to Ziglyph
+zg is a total re-write of some of the components of Ziglyph. The idea was to
+reduce binary size and improve performance. These goals were achieved by using
+trie-like data structures instead of generated functions. Where Ziglyph uses a 
+function call, zg uses an array lookup, which is quite faster. In addition, all
+these data structures in zg are loaded at runtime from compressed versions in the
+binary. This allows for smaller binary sizes at the expense of increased memory 
+footprint at runtime.
+Benchmarks demonstrate the above stated goals have been met:
+```plain
+Binary sizes =======
+149K ziglyph_case
+87K zg_case
+275K ziglyph_caseless
+168K zg_caseless
+68K ziglyph_codepoint
+68K zg_codepoint
+101K ziglyph_grapheme
+86K zg_grapheme
+185K ziglyph_normalizer
+152K zg_normalize
+101K ziglyph_width
+86K zg_width
+Benchmarks ==========
+Ziglyph toUpperStr/toLowerStr: result: 7911596, took: 80
+Ziglyph isUpperStr/isLowerStr: result: 110959, took: 17
+zg toUpperStr/toLowerStr: result: 7911596, took: 62
+zg isUpperStr/isLowerStr: result: 110959, took: 7
+Ziglyph Normalizer.eqlCaseless: result: 625, took: 500
+zg CaseFold.canonCaselessMatch: result: 625, took: 385
+zg CaseFold.compatCaselessMatch: result: 625, took: 593
+Ziglyph CodePointIterator: result: 3769314, took: 2
+zg CodePointIterator: result: 3769314, took: 3
+Ziglyph GraphemeIterator: result: 3691806, took: 48
+zg GraphemeIterator: result: 3691806, took: 16
+Ziglyph Normalizer.nfkc: result: 3934162, took: 416
+zg Normalize.nfkc: result: 3934162, took: 182
+Ziglyph Normalizer.nfc: result: 3955798, took: 57
+zg Normalize.nfc: result: 3955798, took: 28
+Ziglyph Normalizer.nfkd: result: 4006398, took: 172
+zg Normalize.nfkd: result: 4006398, took: 104
+Ziglyph Normalizer.nfd: result: 4028034, took: 169
+zg Normalize.nfd: result: 4028034, took: 104
+Ziglyph Normalizer.eql: result: 625, took: 337
+Zg Normalize.eql: result: 625, took: 53
+Ziglyph display_width.strWidth: result: 3700914, took: 71
+zg DisplayWidth.strWidth: result: 3700914, took: 24
+```
+These results were obtained on an M1 Mac with 16 GiB of RAM.
+In contrast to Ziglyph, zg does not have:
+- Word segmentation
+- Sentence segmentation
+- Collation
+It's possible that any missing functionality will be added in future versions,
+but only if enough demand is present in the community.
diff --git a/build.zig b/build.zig
index c05b4a1..9f7f518 100644
--- a/build.zig
+++ b/build.zig
@@ -315,7 +315,7 @@ pub fn build(b: *std.Build) void {
    scripts_data.addAnonymousImport("scripts", .{ .root_source_file = scripts_gen_out });
    // Properties
-    const props_data = b.addModule("ScriptsData", .{
+    const props_data = b.addModule("PropsData", .{
        .root_source_file = .{ .path = "src/PropsData.zig" },
        .target = target,
        .optimize = optimize,
diff --git a/codegen/canon.zig b/codegen/canon.zig
index 9c84bfc..28b7f28 100644
--- a/codegen/canon.zig
+++ b/codegen/canon.zig
@@ -17,11 +17,10 @@ pub fn main() !void {
    _ = args_iter.skip();
    const output_path = args_iter.next() orelse @panic("No output file arg!");
-    const compressor = std.compress.deflate.compressor;
+    const compressor = std.compress.flate.deflate.compressor;
    var out_file = try std.fs.cwd().createFile(output_path, .{});
    defer out_file.close();
-    var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression });
+    var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
-    defer out_comp.deinit();
    const writer = out_comp.writer();
    const endian = builtin.cpu.arch.endian();
diff --git a/codegen/case_prop.zig b/codegen/case_prop.zig
index ce7ee0d..6c912a8 100644
--- a/codegen/case_prop.zig
+++ b/codegen/case_prop.zig
@@ -118,11 +118,10 @@ pub fn main() !void {
    _ = args_iter.skip();
    const output_path = args_iter.next() orelse @panic("No output file arg!");
-    const compressor = std.compress.deflate.compressor;
+    const compressor = std.compress.flate.deflate.compressor;
    var out_file = try std.fs.cwd().createFile(output_path, .{});
    defer out_file.close();
-    var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression });
+    var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
-    defer out_comp.deinit();
    const writer = out_comp.writer();
    const endian = builtin.cpu.arch.endian();
diff --git a/codegen/ccc.zig b/codegen/ccc.zig
index fd278ea..a01c8d2 100644
--- a/codegen/ccc.zig
+++ b/codegen/ccc.zig
@@ -107,11 +107,10 @@ pub fn main() !void {
    _ = args_iter.skip();
    const output_path = args_iter.next() orelse @panic("No output file arg!");
-    const compressor = std.compress.deflate.compressor;
+    const compressor = std.compress.flate.deflate.compressor;
    var out_file = try std.fs.cwd().createFile(output_path, .{});
    defer out_file.close();
-    var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression });
+    var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
-    defer out_comp.deinit();
    const writer = out_comp.writer();
    const endian = builtin.cpu.arch.endian();
diff --git a/codegen/compat.zig b/codegen/compat.zig
index d0a108a..07616fc 100644
--- a/codegen/compat.zig
+++ b/codegen/compat.zig
@@ -17,11 +17,10 @@ pub fn main() !void {
    _ = args_iter.skip();
    const output_path = args_iter.next() orelse @panic("No output file arg!");
-    const compressor = std.compress.deflate.compressor;
+    const compressor = std.compress.flate.deflate.compressor;
    var out_file = try std.fs.cwd().createFile(output_path, .{});
    defer out_file.close();
-    var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression });
+    var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
-    defer out_comp.deinit();
    const writer = out_comp.writer();
    const endian = builtin.cpu.arch.endian();
diff --git a/codegen/core_props.zig b/codegen/core_props.zig
index 1f46f9e..f60c7a9 100644
--- a/codegen/core_props.zig
+++ b/codegen/core_props.zig
@@ -121,11 +121,10 @@ pub fn main() !void {
    _ = args_iter.skip();
    const output_path = args_iter.next() orelse @panic("No output file arg!");
-    const compressor = std.compress.deflate.compressor;
+    const compressor = std.compress.flate.deflate.compressor;
    var out_file = try std.fs.cwd().createFile(output_path, .{});
    defer out_file.close();
-    var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression });
+    var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
-    defer out_comp.deinit();
    const writer = out_comp.writer();
    const endian = builtin.cpu.arch.endian();
diff --git a/codegen/dwp.zig b/codegen/dwp.zig
index 76a14d3..b36b2c9 100644
--- a/codegen/dwp.zig
+++ b/codegen/dwp.zig
@@ -230,11 +230,10 @@ pub fn main() !void {
    _ = args_iter.skip();
    const output_path = args_iter.next() orelse @panic("No output file arg!");
-    const compressor = std.compress.deflate.compressor;
+    const compressor = std.compress.flate.deflate.compressor;
    var out_file = try std.fs.cwd().createFile(output_path, .{});
    defer out_file.close();
-    var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression });
+    var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
-    defer out_comp.deinit();
    const writer = out_comp.writer();
    const endian = builtin.cpu.arch.endian();
diff --git a/codegen/fold.zig b/codegen/fold.zig
index b3192e7..6dc51ac 100644
--- a/codegen/fold.zig
+++ b/codegen/fold.zig
@@ -63,11 +63,10 @@ pub fn main() !void {
    _ = args_iter.skip();
    const output_path = args_iter.next() orelse @panic("No output file arg!");
-    const compressor = std.compress.deflate.compressor;
+    const compressor = std.compress.flate.deflate.compressor;
    var out_file = try std.fs.cwd().createFile(output_path, .{});
    defer out_file.close();
-    var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression });
+    var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
-    defer out_comp.deinit();
    const writer = out_comp.writer();
    const endian = builtin.cpu.arch.endian();
diff --git a/codegen/gbp.zig b/codegen/gbp.zig
index 39e0da3..3fc4461 100644
--- a/codegen/gbp.zig
+++ b/codegen/gbp.zig
@@ -227,11 +227,10 @@ pub fn main() !void {
    _ = args_iter.skip();
    const output_path = args_iter.next() orelse @panic("No output file arg!");
-    const compressor = std.compress.deflate.compressor;
+    const compressor = std.compress.flate.deflate.compressor;
    var out_file = try std.fs.cwd().createFile(output_path, .{});
    defer out_file.close();
-    var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression });
+    var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
-    defer out_comp.deinit();
    const writer = out_comp.writer();
    const endian = builtin.cpu.arch.endian();
diff --git a/codegen/gencat.zig b/codegen/gencat.zig
index a7713e6..fe06bd7 100644
--- a/codegen/gencat.zig
+++ b/codegen/gencat.zig
@@ -151,11 +151,10 @@ pub fn main() !void {
    _ = args_iter.skip();
    const output_path = args_iter.next() orelse @panic("No output file arg!");
-    const compressor = std.compress.deflate.compressor;
+    const compressor = std.compress.flate.deflate.compressor;
    var out_file = try std.fs.cwd().createFile(output_path, .{});
    defer out_file.close();
-    var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression });
+    var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
-    defer out_comp.deinit();
    const writer = out_comp.writer();
    const endian = builtin.cpu.arch.endian();
diff --git a/codegen/hangul.zig b/codegen/hangul.zig
index 73680c6..2c42bb7 100644
--- a/codegen/hangul.zig
+++ b/codegen/hangul.zig
@@ -116,11 +116,10 @@ pub fn main() !void {
    _ = args_iter.skip();
    const output_path = args_iter.next() orelse @panic("No output file arg!");
-    const compressor = std.compress.deflate.compressor;
+    const compressor = std.compress.flate.deflate.compressor;
    var out_file = try std.fs.cwd().createFile(output_path, .{});
    defer out_file.close();
-    var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression });
+    var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
-    defer out_comp.deinit();
    const writer = out_comp.writer();
    const endian = builtin.cpu.arch.endian();
diff --git a/codegen/lower.zig b/codegen/lower.zig
index 644ec13..a053fe3 100644
--- a/codegen/lower.zig
+++ b/codegen/lower.zig
@@ -17,11 +17,10 @@ pub fn main() !void {
    _ = args_iter.skip();
    const output_path = args_iter.next() orelse @panic("No output file arg!");
-    const compressor = std.compress.deflate.compressor;
+    const compressor = std.compress.flate.deflate.compressor;
    var out_file = try std.fs.cwd().createFile(output_path, .{});
    defer out_file.close();
-    var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression });
+    var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
-    defer out_comp.deinit();
    const writer = out_comp.writer();
    const endian = builtin.cpu.arch.endian();
diff --git a/codegen/normp.zig b/codegen/normp.zig
index 8ceda36..60dabdc 100644
--- a/codegen/normp.zig
+++ b/codegen/normp.zig
@@ -117,11 +117,10 @@ pub fn main() !void {
    _ = args_iter.skip();
    const output_path = args_iter.next() orelse @panic("No output file arg!");
-    const compressor = std.compress.deflate.compressor;
+    const compressor = std.compress.flate.deflate.compressor;
    var out_file = try std.fs.cwd().createFile(output_path, .{});
    defer out_file.close();
-    var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression });
+    var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
-    defer out_comp.deinit();
    const writer = out_comp.writer();
    const endian = builtin.cpu.arch.endian();
diff --git a/codegen/numeric.zig b/codegen/numeric.zig
index ad8490c..038ac0a 100644
--- a/codegen/numeric.zig
+++ b/codegen/numeric.zig
@@ -118,11 +118,10 @@ pub fn main() !void {
    _ = args_iter.skip();
    const output_path = args_iter.next() orelse @panic("No output file arg!");
-    const compressor = std.compress.deflate.compressor;
+    const compressor = std.compress.flate.deflate.compressor;
    var out_file = try std.fs.cwd().createFile(output_path, .{});
    defer out_file.close();
-    var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression });
+    var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
-    defer out_comp.deinit();
    const writer = out_comp.writer();
    const endian = builtin.cpu.arch.endian();
diff --git a/codegen/props.zig b/codegen/props.zig
index 57a205e..24b22e0 100644
--- a/codegen/props.zig
+++ b/codegen/props.zig
@@ -118,11 +118,10 @@ pub fn main() !void {
    _ = args_iter.skip();
    const output_path = args_iter.next() orelse @panic("No output file arg!");
-    const compressor = std.compress.deflate.compressor;
+    const compressor = std.compress.flate.deflate.compressor;
    var out_file = try std.fs.cwd().createFile(output_path, .{});
    defer out_file.close();
-    var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression });
+    var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
-    defer out_comp.deinit();
    const writer = out_comp.writer();
    const endian = builtin.cpu.arch.endian();
diff --git a/codegen/scripts.zig b/codegen/scripts.zig
index e985c1e..660699d 100644
--- a/codegen/scripts.zig
+++ b/codegen/scripts.zig
@@ -288,11 +288,10 @@ pub fn main() !void {
    _ = args_iter.skip();
    const output_path = args_iter.next() orelse @panic("No output file arg!");
-    const compressor = std.compress.deflate.compressor;
+    const compressor = std.compress.flate.deflate.compressor;
    var out_file = try std.fs.cwd().createFile(output_path, .{});
    defer out_file.close();
-    var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression });
+    var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
-    defer out_comp.deinit();
    const writer = out_comp.writer();
    const endian = builtin.cpu.arch.endian();
diff --git a/codegen/upper.zig b/codegen/upper.zig
index 455fe2c..5848911 100644
--- a/codegen/upper.zig
+++ b/codegen/upper.zig
@@ -17,11 +17,10 @@ pub fn main() !void {
    _ = args_iter.skip();
    const output_path = args_iter.next() orelse @panic("No output file arg!");
-    const compressor = std.compress.deflate.compressor;
+    const compressor = std.compress.flate.deflate.compressor;
    var out_file = try std.fs.cwd().createFile(output_path, .{});
    defer out_file.close();
-    var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression });
+    var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
-    defer out_comp.deinit();
    const writer = out_comp.writer();
    const endian = builtin.cpu.arch.endian();
diff --git a/src/CanonData.zig b/src/CanonData.zig
index 64d5555..be2b381 100644
--- a/src/CanonData.zig
+++ b/src/CanonData.zig
@@ -10,11 +10,10 @@ nfd: [][]u21 = undefined,
 const Self = @This();
 pub fn init(allocator: mem.Allocator) !Self {
-    const decompressor = compress.deflate.decompressor;
+    const decompressor = compress.flate.inflate.decompressor;
    const in_bytes = @embedFile("canon");
    var in_fbs = std.io.fixedBufferStream(in_bytes);
-    var in_decomp = try decompressor(allocator, in_fbs.reader(), null);
+    var in_decomp = decompressor(.raw, in_fbs.reader());
-    defer in_decomp.deinit();
    var reader = in_decomp.reader();
    const endian = builtin.cpu.arch.endian();
diff --git a/src/CaseData.zig b/src/CaseData.zig
index c9ccc1e..260637a 100644
--- a/src/CaseData.zig
+++ b/src/CaseData.zig
@@ -15,7 +15,7 @@ prop_s2: []u8 = undefined,
 const Self = @This();
 pub fn init(allocator: mem.Allocator) !Self {
-    const decompressor = compress.deflate.decompressor;
+    const decompressor = compress.flate.inflate.decompressor;
    const endian = builtin.cpu.arch.endian();
    var self = Self{
@@ -32,8 +32,7 @@ pub fn init(allocator: mem.Allocator) !Self {
    // Uppercase
    const upper_bytes = @embedFile("upper");
    var upper_fbs = std.io.fixedBufferStream(upper_bytes);
-    var upper_decomp = try decompressor(allocator, upper_fbs.reader(), null);
+    var upper_decomp = decompressor(.raw, upper_fbs.reader());
-    defer upper_decomp.deinit();
    var upper_reader = upper_decomp.reader();
    while (true) {
@@ -46,8 +45,7 @@ pub fn init(allocator: mem.Allocator) !Self {
    // Lowercase
    const lower_bytes = @embedFile("lower");
    var lower_fbs = std.io.fixedBufferStream(lower_bytes);
-    var lower_decomp = try decompressor(allocator, lower_fbs.reader(), null);
+    var lower_decomp = decompressor(.raw, lower_fbs.reader());
-    defer lower_decomp.deinit();
    var lower_reader = lower_decomp.reader();
    while (true) {
@@ -60,8 +58,7 @@ pub fn init(allocator: mem.Allocator) !Self {
    // Case properties
    const cp_bytes = @embedFile("case_prop");
    var cp_fbs = std.io.fixedBufferStream(cp_bytes);
-    var cp_decomp = try decompressor(allocator, cp_fbs.reader(), null);
+    var cp_decomp = decompressor(.raw, cp_fbs.reader());
-    defer cp_decomp.deinit();
    var cp_reader = cp_decomp.reader();
    const stage_1_len: u16 = try cp_reader.readInt(u16, endian);
diff --git a/src/CaseFold.zig b/src/CaseFold.zig
index 9b10e16..3e7535e 100644
--- a/src/CaseFold.zig
+++ b/src/CaseFold.zig
@@ -10,7 +10,9 @@ fold_data: *const FoldData,
 const Self = @This();
-fn caseFold(
+/// Produces the case folded code points for `cps`. Caller must free returned
+/// slice with `allocator`.
+pub fn caseFold(
    self: Self,
    allocator: mem.Allocator,
    cps: []const u21,
@@ -37,6 +39,8 @@ fn changesWhenCaseFolded(self: Self, cps: []const u21) bool {
    } else false;
 }
+/// Caseless compare `a` and `b` by decomposing to NFKD. This is the most
+/// comprehensive comparison possible, but slower than `canonCaselessMatch`.
 pub fn compatCaselessMatch(
    self: Self,
    allocator: mem.Allocator,
@@ -108,6 +112,8 @@ test "compatCaselessMatch" {
    try testing.expect(try caser.compatCaselessMatch(allocator, &n, a, c));
 }
+/// Performs canonical caseless string matching by decomposing to NFD. This is
+/// faster than `compatCaselessMatch`, but less comprehensive.
 pub fn canonCaselessMatch(
    self: Self,
    allocator: mem.Allocator,
diff --git a/src/CombiningData.zig b/src/CombiningData.zig
index a40cbde..16b923f 100644
--- a/src/CombiningData.zig
+++ b/src/CombiningData.zig
@@ -10,11 +10,10 @@ s2: []u8 = undefined,
 const Self = @This();
 pub fn init(allocator: mem.Allocator) !Self {
-    const decompressor = compress.deflate.decompressor;
+    const decompressor = compress.flate.inflate.decompressor;
    const in_bytes = @embedFile("ccc");
    var in_fbs = std.io.fixedBufferStream(in_bytes);
-    var in_decomp = try decompressor(allocator, in_fbs.reader(), null);
+    var in_decomp = decompressor(.raw, in_fbs.reader());
-    defer in_decomp.deinit();
    var reader = in_decomp.reader();
    const endian = builtin.cpu.arch.endian();
diff --git a/src/CompatData.zig b/src/CompatData.zig
index a931cb3..3346a06 100644
--- a/src/CompatData.zig
+++ b/src/CompatData.zig
@@ -9,11 +9,10 @@ nfkd: [][]u21 = undefined,
 const Self = @This();
 pub fn init(allocator: mem.Allocator) !Self {
-    const decompressor = compress.deflate.decompressor;
+    const decompressor = compress.flate.inflate.decompressor;
    const in_bytes = @embedFile("compat");
    var in_fbs = std.io.fixedBufferStream(in_bytes);
-    var in_decomp = try decompressor(allocator, in_fbs.reader(), null);
+    var in_decomp = decompressor(.raw, in_fbs.reader());
-    defer in_decomp.deinit();
    var reader = in_decomp.reader();
    const endian = builtin.cpu.arch.endian();
diff --git a/src/FoldData.zig b/src/FoldData.zig
index a06eefe..d4312b0 100644
--- a/src/FoldData.zig
+++ b/src/FoldData.zig
@@ -10,11 +10,10 @@ cwcf: []bool = undefined,
 const Self = @This();
 pub fn init(allocator: mem.Allocator) !Self {
-    const decompressor = compress.deflate.decompressor;
+    const decompressor = compress.flate.inflate.decompressor;
    const in_bytes = @embedFile("fold");
    var in_fbs = std.io.fixedBufferStream(in_bytes);
-    var in_decomp = try decompressor(allocator, in_fbs.reader(), null);
+    var in_decomp = decompressor(.raw, in_fbs.reader());
-    defer in_decomp.deinit();
    var reader = in_decomp.reader();
    const endian = builtin.cpu.arch.endian();
diff --git a/src/GenCatData.zig b/src/GenCatData.zig
index 12501bf..454c45a 100644
--- a/src/GenCatData.zig
+++ b/src/GenCatData.zig
@@ -45,11 +45,10 @@ s3: []u5 = undefined,
 const Self = @This();
 pub fn init(allocator: mem.Allocator) !Self {
-    const decompressor = compress.deflate.decompressor;
+    const decompressor = compress.flate.inflate.decompressor;
    const in_bytes = @embedFile("gencat");
    var in_fbs = std.io.fixedBufferStream(in_bytes);
-    var in_decomp = try decompressor(allocator, in_fbs.reader(), null);
+    var in_decomp = decompressor(.raw, in_fbs.reader());
-    defer in_decomp.deinit();
    var reader = in_decomp.reader();
    const endian = builtin.cpu.arch.endian();
diff --git a/src/GraphemeData.zig b/src/GraphemeData.zig
index 500ffea..1710870 100644
--- a/src/GraphemeData.zig
+++ b/src/GraphemeData.zig
@@ -38,11 +38,10 @@ s3: []u8 = undefined,
 const Self = @This();
 pub fn init(allocator: mem.Allocator) !Self {
-    const decompressor = compress.deflate.decompressor;
+    const decompressor = compress.flate.inflate.decompressor;
    const in_bytes = @embedFile("gbp");
    var in_fbs = std.io.fixedBufferStream(in_bytes);
-    var in_decomp = try decompressor(allocator, in_fbs.reader(), null);
+    var in_decomp = decompressor(.raw, in_fbs.reader());
-    defer in_decomp.deinit();
    var reader = in_decomp.reader();
    const endian = builtin.cpu.arch.endian();
diff --git a/src/HangulData.zig b/src/HangulData.zig
index 99d91c1..5eee427 100644
--- a/src/HangulData.zig
+++ b/src/HangulData.zig
@@ -20,11 +20,10 @@ s2: []u3 = undefined,
 const Self = @This();
 pub fn init(allocator: mem.Allocator) !Self {
-    const decompressor = compress.deflate.decompressor;
+    const decompressor = compress.flate.inflate.decompressor;
    const in_bytes = @embedFile("hangul");
    var in_fbs = std.io.fixedBufferStream(in_bytes);
-    var in_decomp = try decompressor(allocator, in_fbs.reader(), null);
+    var in_decomp = decompressor(.raw, in_fbs.reader());
-    defer in_decomp.deinit();
    var reader = in_decomp.reader();
    const endian = builtin.cpu.arch.endian();
diff --git a/src/NormPropsData.zig b/src/NormPropsData.zig
index 86d497b..899bb8f 100644
--- a/src/NormPropsData.zig
+++ b/src/NormPropsData.zig
@@ -11,11 +11,10 @@ s2: []u4 = undefined,
 const Self = @This();
 pub fn init(allocator: mem.Allocator) !Self {
-    const decompressor = compress.deflate.decompressor;
+    const decompressor = compress.flate.inflate.decompressor;
    const in_bytes = @embedFile("normp");
    var in_fbs = std.io.fixedBufferStream(in_bytes);
-    var in_decomp = try decompressor(allocator, in_fbs.reader(), null);
+    var in_decomp = decompressor(.raw, in_fbs.reader());
-    defer in_decomp.deinit();
    var reader = in_decomp.reader();
    const endian = builtin.cpu.arch.endian();
diff --git a/src/Normalize.zig b/src/Normalize.zig
index f437f4f..85e3aa3 100644
--- a/src/Normalize.zig
+++ b/src/Normalize.zig
@@ -572,47 +572,6 @@ test "eql" {
    try testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}"));
 }
-// FCD
-fn getLeadCcc(self: Self, cp: u21) u8 {
-    const dc = self.mapping(cp, .nfd);
-    const dcp = if (dc.form == .same) cp else dc.cps[0];
-    return self.norm_data.ccc_data.ccc(dcp);
-}
-fn getTrailCcc(self: Self, cp: u21) u8 {
-    const dc = self.mapping(cp, .nfd);
-    const dcp = if (dc.form == .same) cp else dc.cps[dc.cps.len - 1];
-    return self.norm_data.ccc_data.ccc(dcp);
-}
-// Fast check to detect if a string is already in NFC or NFD form.
-fn isFcd(self: Self, str: []const u8) bool {
-    var prev_ccc: u8 = 0;
-    var cp_iter = CodePointIterator{ .bytes = str };
-    return while (cp_iter.next()) |cp| {
-        const ccc = self.getLeadCcc(cp.code);
-        if (ccc != 0 and ccc < prev_ccc) break false;
-        prev_ccc = self.getTrailCcc(cp.code);
-    } else true;
-}
-test "isFcd" {
-    const allocator = testing.allocator;
-    const data = try NormData.init(allocator);
-    defer data.deinit();
-    const n = Self{ .norm_data = &data };
-    const is_nfc = "José \u{3D3}";
-    try testing.expect(n.isFcd(is_nfc));
-    const is_nfd = "Jose\u{301} \u{3d2}\u{301}";
-    try testing.expect(n.isFcd(is_nfd));
-    const not_fcd = "Jose\u{301} \u{3d2}\u{315}\u{301}";
-    try testing.expect(!n.isFcd(not_fcd));
-}
 /// Returns true if `str` only contains Latin-1 Supplement
 /// code points. Uses SIMD if possible.
 pub fn isLatin1Only(str: []const u8) bool {
diff --git a/src/PropsData.zig b/src/PropsData.zig
index 9d24e68..f6c8370 100644
--- a/src/PropsData.zig
+++ b/src/PropsData.zig
@@ -15,14 +15,13 @@ num_s2: []u8 = undefined,
 const Self = @This();
 pub fn init(allocator: mem.Allocator) !Self {
-    const decompressor = compress.deflate.decompressor;
+    const decompressor = compress.flate.inflate.decompressor;
    const endian = builtin.cpu.arch.endian();
    // Process DerivedCoreProperties.txt
    const core_bytes = @embedFile("core_props");
    var core_fbs = std.io.fixedBufferStream(core_bytes);
-    var core_decomp = try decompressor(allocator, core_fbs.reader(), null);
+    var core_decomp = decompressor(.raw, core_fbs.reader());
-    defer core_decomp.deinit();
    var core_reader = core_decomp.reader();
    var self = Self{ .allocator = allocator };
@@ -40,8 +39,7 @@ pub fn init(allocator: mem.Allocator) !Self {
    // Process PropList.txt
    const props_bytes = @embedFile("props");
    var props_fbs = std.io.fixedBufferStream(props_bytes);
-    var props_decomp = try decompressor(allocator, props_fbs.reader(), null);
+    var props_decomp = decompressor(.raw, props_fbs.reader());
-    defer props_decomp.deinit();
    var props_reader = props_decomp.reader();
    const stage_1_len: u16 = try props_reader.readInt(u16, endian);
@@ -57,8 +55,7 @@ pub fn init(allocator: mem.Allocator) !Self {
    // Process DerivedNumericType.txt
    const num_bytes = @embedFile("numeric");
    var num_fbs = std.io.fixedBufferStream(num_bytes);
-    var num_decomp = try decompressor(allocator, num_fbs.reader(), null);
+    var num_decomp = decompressor(.raw, num_fbs.reader());
-    defer num_decomp.deinit();
    var num_reader = num_decomp.reader();
    const num_stage_1_len: u16 = try num_reader.readInt(u16, endian);
diff --git a/src/ScriptsData.zig b/src/ScriptsData.zig
index 4e371bf..415ce2d 100644
--- a/src/ScriptsData.zig
+++ b/src/ScriptsData.zig
@@ -4,7 +4,7 @@ const compress = std.compress;
 const mem = std.mem;
 const testing = std.testing;
-/// Script
+/// Scripts
 pub const Script = enum {
    none,
    Adlam,
@@ -180,11 +180,10 @@ s3: []u8 = undefined,
 const Self = @This();
 pub fn init(allocator: mem.Allocator) !Self {
-    const decompressor = compress.deflate.decompressor;
+    const decompressor = compress.flate.inflate.decompressor;
    const in_bytes = @embedFile("scripts");
    var in_fbs = std.io.fixedBufferStream(in_bytes);
-    var in_decomp = try decompressor(allocator, in_fbs.reader(), null);
+    var in_decomp = decompressor(.raw, in_fbs.reader());
-    defer in_decomp.deinit();
    var reader = in_decomp.reader();
    const endian = builtin.cpu.arch.endian();
diff --git a/src/WidthData.zig b/src/WidthData.zig
index b9ef84e..cf31b7f 100644
--- a/src/WidthData.zig
+++ b/src/WidthData.zig
@@ -14,11 +14,10 @@ s2: []i3 = undefined,
 const Self = @This();
 pub fn init(allocator: mem.Allocator) !Self {
-    const decompressor = compress.deflate.decompressor;
+    const decompressor = compress.flate.inflate.decompressor;
    const in_bytes = @embedFile("dwp");
    var in_fbs = std.io.fixedBufferStream(in_bytes);
-    var in_decomp = try decompressor(allocator, in_fbs.reader(), null);
+    var in_decomp = decompressor(.raw, in_fbs.reader());
-    defer in_decomp.deinit();
    var reader = in_decomp.reader();
    const endian = builtin.cpu.arch.endian();