Update README.md to new API

author: Sam Atman 2025-04-30 15:59:56 -0400
committer: Sam Atman 2025-04-30 15:59:56 -0400
commit: 3a6cfa885697b5fecf0473b602dc38a0af0d3f7d (patch)
tree: 64eacca86ec8302bf966e4731e125243067d8ba9
parent: Rest of the Renamings (diff)
download: zg-3a6cfa885697b5fecf0473b602dc38a0af0d3f7d.tar.gz
zg-3a6cfa885697b5fecf0473b602dc38a0af0d3f7d.tar.xz
zg-3a6cfa885697b5fecf0473b602dc38a0af0d3f7d.zip
2 files changed, 121 insertions, 115 deletions
diff --git a/README.md b/README.md
index 1069eef..80e544f 100644
--- a/README.md
+++ b/README.md
@@ -82,21 +82,20 @@ them, `Grapheme`, and an `Iterator` to iterate over them in a string.
 In your `build.zig`:
 ```zig
-exe.root_module.addImport("grapheme", zg.module("grapheme"));
+exe.root_module.addImport("Graphemes", zg.module("Graphemes"));
 ```
 In your code:
 ```zig
-const grapheme = @import("grapheme");
+const Graphemes = @import("Graphemes");
 test "Grapheme cluster iterator" {
-    // we need some Unicode data to process Grapheme Clusters.
+    const graph = try Graphemes.init(allocator);
-    const gd = try grapheme.GraphemeData.init(allocator);
+    defer graph.deinit(allocator);
-    defer gd.deinit(allocator);
    const str = "He\u{301}"; // Hé
-    var iter = grapheme.Iterator.init(str, &gd);
+    var iter = graph.iterator(str);
    var i: usize = 0;
@@ -123,133 +122,133 @@ test "Grapheme cluster iterator" {
 ## Unicode General Categories
-To detect the general category for a code point, use the `GenCatData` module.
+To detect the general category for a code point, use the `GeneralCategories` module.
 In your `build.zig`:
 ```zig
-exe.root_module.addImport("GenCatData", zg.module("GenCatData"));
+exe.root_module.addImport("GeneralCategories", zg.module("GeneralCategories"));
 ```
 In your code:
 ```zig
-const GenCatData = @import("GenCatData");
+const GeneralCategories = @import("GeneralCategories");
 test "General Category" {
-    const gcd = try GenCatData.init(allocator);
+    const gen_cat = try GeneralCategories.init(allocator);
-    defer gcd.deinit(allocator);
+    defer gen_cat.deinit(allocator);
    // The `gc` method returns the abbreviated General Category.
    // These abbreviations and descriptive comments can be found
    // in the source file `src/GenCatData.zig` as en enum.
-    try expect(gcd.gc('A') == .Lu); // Lu: uppercase letter
+    try expect(gen_cat.gc('A') == .Lu); // Lu: uppercase letter
-    try expect(gcd.gc('3') == .Nd); // Nd: decimal number
+    try expect(gen_cat.gc('3') == .Nd); // Nd: decimal number
    // The following are convenience methods for groups of General
    // Categories. For example, all letter categories start with `L`:
    // Lu, Ll, Lt, Lo.
-    try expect(gcd.isControl(0));
+    try expect(gen_cat.isControl(0));
-    try expect(gcd.isLetter('z'));
+    try expect(gen_cat.isLetter('z'));
-    try expect(gcd.isMark('\u{301}'));
+    try expect(gen_cat.isMark('\u{301}'));
-    try expect(gcd.isNumber('3'));
+    try expect(gen_cat.isNumber('3'));
-    try expect(gcd.isPunctuation('['));
+    try expect(gen_cat.isPunctuation('['));
-    try expect(gcd.isSeparator(' '));
+    try expect(gen_cat.isSeparator(' '));
-    try expect(gcd.isSymbol('©'));
+    try expect(gen_cat.isSymbol('©'));
 }
 ```
 ## Unicode Properties
-You can detect common properties of a code point with the `PropsData` module.
+You can detect common properties of a code point with the `Properties` module.
 In your `build.zig`:
 ```zig
-exe.root_module.addImport("PropsData", zg.module("PropsData"));
+exe.root_module.addImport("Properties", zg.module("Properties"));
 ```
 In your code:
 ```zig
-const PropsData = @import("PropsData");
+const Properties = @import("Properties");
 test "Properties" {
-    const pd = try PropsData.init(allocator);
+    const props = try Properties.init(allocator);
-    defer pd.deinit(allocator);
+    defer props.deinit(allocator);
    // Mathematical symbols and letters.
-    try expect(pd.isMath('+'));
+    try expect(props.isMath('+'));
    // Alphabetic only code points.
-    try expect(pd.isAlphabetic('Z'));
+    try expect(props.isAlphabetic('Z'));
    // Space, tab, and other separators.
-    try expect(pd.isWhitespace(' '));
+    try expect(props.isWhitespace(' '));
    // Hexadecimal digits and variations thereof.
-    try expect(pd.isHexDigit('f'));
+    try expect(props.isHexDigit('f'));
-    try expect(!pd.isHexDigit('z'));
+    try expect(!props.isHexDigit('z'));
    // Accents, dieresis, and other combining marks.
-    try expect(pd.isDiacritic('\u{301}'));
+    try expect(props.isDiacritic('\u{301}'));
    // Unicode has a specification for valid identifiers like
    // the ones used in programming and regular expressions.
-    try expect(pd.isIdStart('Z')); // Identifier start character
+    try expect(props.isIdStart('Z')); // Identifier start character
-    try expect(!pd.isIdStart('1'));
+    try expect(!props.isIdStart('1'));
-    try expect(pd.isIdContinue('1'));
+    try expect(props.isIdContinue('1'));
    // The `X` versions add some code points that can appear after
    // normalizing a string.
-    try expect(pd.isXidStart('\u{b33}')); // Extended identifier start character
+    try expect(props.isXidStart('\u{b33}')); // Extended identifier start character
-    try expect(pd.isXidContinue('\u{e33}'));
+    try expect(props.isXidContinue('\u{e33}'));
-    try expect(!pd.isXidStart('1'));
+    try expect(!props.isXidStart('1'));
    // Note surprising Unicode numeric type properties!
-    try expect(pd.isNumeric('\u{277f}'));
+    try expect(props.isNumeric('\u{277f}'));
-    try expect(!pd.isNumeric('3')); // 3 is not numeric!
+    try expect(!props.isNumeric('3')); // 3 is not numeric!
-    try expect(pd.isDigit('\u{2070}'));
+    try expect(props.isDigit('\u{2070}'));
-    try expect(!pd.isDigit('3')); // 3 is not a digit!
+    try expect(!props.isDigit('3')); // 3 is not a digit!
-    try expect(pd.isDecimal('3')); // 3 is a decimal digit
+    try expect(props.isDecimal('3')); // 3 is a decimal digit
 }
 ```
 ## Letter Case Detection and Conversion
-To detect and convert to and from different letter cases, use the `CaseData`
+To detect and convert to and from different letter cases, use the `LetterCasing`
 module.
 In your `build.zig`:
 ```zig
-exe.root_module.addImport("CaseData", zg.module("CaseData"));
+exe.root_module.addImport("LetterCasing", zg.module("LetterCasing"));
 ```
 In your code:
 ```zig
-const CaseData = @import("CaseData");
+const LetterCasing = @import("LetterCasing");
 test "Case" {
-    const cd = try CaseData.init(allocator);
+    const case = try LetterCasing.init(allocator);
-    defer cd.deinit(allocator);
+    defer case.deinit(allocator);
    // Upper and lower case.
-    try expect(cd.isUpper('A'));
+    try expect(case.isUpper('A'));
-    try expect('A' == cd.toUpper('a'));
+    try expect('A' == case.toUpper('a'));
-    try expect(cd.isLower('a'));
+    try expect(case.isLower('a'));
-    try expect('a' == cd.toLower('A'));
+    try expect('a' == case.toLower('A'));
    // Code points that have case.
-    try expect(cd.isCased('É'));
+    try expect(case.isCased('É'));
-    try expect(!cd.isCased('3'));
+    try expect(!case.isCased('3'));
    // Case detection and conversion for strings.
-    try expect(cd.isUpperStr("HELLO 123!"));
+    try expect(case.isUpperStr("HELLO 123!"));
-    const ucased = try cd.toUpperStr(allocator, "hello 123");
+    const ucased = try case.toUpperStr(allocator, "hello 123");
    defer allocator.free(ucased);
    try expectEqualStrings("HELLO 123", ucased);
-    try expect(cd.isLowerStr("hello 123!"));
+    try expect(case.isLowerStr("hello 123!"));
-    const lcased = try cd.toLowerStr(allocator, "HELLO 123");
+    const lcased = try case.toLowerStr(allocator, "HELLO 123");
    defer allocator.free(lcased);
    try expectEqualStrings("hello 123", lcased);
 }
@@ -292,37 +291,32 @@ In your code:
 const Normalize = @import("Normalize");
 test "Normalization" {
-    // We need lots of Unicode dta for normalization.
+    const normalize = try Normalize.init(allocator);
-    var norm_data: Normalize.NormData = undefined;
+    defer normalize.deinit(allocator);
-    try Normalize.NormData.init(&norm_data, allocator);
-    defer norm_data.deinit(allocator);
-    // The `Normalize` structure takes a pointer to the data.
-    const n = Normalize{ .norm_data = &norm_data };
    // NFC: Canonical composition
-    const nfc_result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}");
+    const nfc_result = try normalize.nfc(allocator, "Complex char: \u{3D2}\u{301}");
    defer nfc_result.deinit(allocator);
    try expectEqualStrings("Complex char: \u{3D3}", nfc_result.slice);
    // NFKC: Compatibility composition
-    const nfkc_result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}");
+    const nfkc_result = try normalize.nfkc(allocator, "Complex char: \u{03A5}\u{0301}");
    defer nfkc_result.deinit(allocator);
    try expectEqualStrings("Complex char: \u{038E}", nfkc_result.slice);
    // NFD: Canonical decomposition
-    const nfd_result = try n.nfd(allocator, "Héllo World! \u{3d3}");
+    const nfd_result = try normalize.nfd(allocator, "Héllo World! \u{3d3}");
    defer nfd_result.deinit(allocator);
    try expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", nfd_result.slice);
    // NFKD: Compatibility decomposition
-    const nfkd_result = try n.nfkd(allocator, "Héllo World! \u{3d3}");
+    const nfkd_result = try normalize.nfkd(allocator, "Héllo World! \u{3d3}");
    defer nfkd_result.deinit(allocator);
    try expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", nfkd_result.slice);
    // Test for equality of two strings after normalizing to NFC.
-    try expect(try n.eql(allocator, "foé", "foe\u{0301}"));
+    try expect(try normalize.eql(allocator, "foé", "foe\u{0301}"));
-    try expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}"));
+    try expect(try normalize.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}"));
 }
 ```
 The `Result` returned by normalization functions may or may not be copied from the
@@ -347,46 +341,53 @@ for this.
 In your `build.zig`:
 ```zig
-exe.root_module.addImport("Normalize", zg.module("Normalize"));
+exe.root_module.addImport("CaseFolding", zg.module("CaseFolding"));
-exe.root_module.addImport("CaseFold", zg.module("CaseFold"));
 ```
 In your code:
 ```zig
-const Normalize = @import("Normalize");
+const CaseFolding = @import("CaseFolding");
-const CaseFold = @import("CaseFold");
 test "Caseless matching" {
-    // We need to normalize during the matching process.
-    var norm_data: Normalize.NormData = undefined;
-    try Normalize.NormData.init(&norm_data, allocator);
-    defer norm_data.deinit(allocator);
-    const n = Normalize{ .norm_data = &norm_data };
    // We need Unicode case fold data.
-    const cfd = try CaseFold.FoldData.init(allocator);
+    const case_fold = try CaseFolding.init(allocator);
-    defer cfd.deinit(allocator);
+    defer case_fold.deinit(allocator);
-    // The `CaseFold` structure takes a pointer to the data.
-    const cf = CaseFold{ .fold_data = &cfd };
    // `compatCaselessMatch` provides the deepest level of caseless
    // matching because it decomposes fully to NFKD.
    const a = "Héllo World! \u{3d3}";
    const b = "He\u{301}llo World! \u{3a5}\u{301}";
-    try expect(try cf.compatCaselessMatch(allocator, &n, a, b));
+    try expect(try case_fold.compatCaselessMatch(allocator, &n, a, b));
    const c = "He\u{301}llo World! \u{3d2}\u{301}";
-    try expect(try cf.compatCaselessMatch(allocator, &n, a, c));
+    try expect(try case_fold.compatCaselessMatch(allocator, &n, a, c));
    // `canonCaselessMatch` isn't as comprehensive as `compatCaselessMatch`
    // because it only decomposes to NFD. Naturally, it's faster because of this.
-    try expect(!try cf.canonCaselessMatch(allocator, &n, a, b));
+    try expect(!try case_fold.canonCaselessMatch(allocator, &n, a, b));
-    try expect(try cf.canonCaselessMatch(allocator, &n, a, c));
+    try expect(try case_fold.canonCaselessMatch(allocator, &n, a, c));
+}
+```
+Case folding needs to use the `Normalize` module in order to produce the compatibility
+forms for comparison.  If you are already using a `Normalize` for other purposes,
+`CaseFolding` can borrow it:
+```zig
+const CaseFolding = @import("CaseFolding");
+const Normalize = @import("Normalize");
+test "Initialize With a Normalize" {
+    const normalize = try Normalize.init(allocator);
+    // You're responsible for freeing this:
+    defer normalize.deinit(allocator);
+    const case_fold = try CaseFolding.initWithNormalize(allocator, normalize);
+    // This will not free your normalize when it runs first.
+    defer case_fold.deinit(allocator);
 }
 ```
 ## Display Width of Characters and Strings
 When displaying text with a fixed-width font on a terminal screen, it's very
@@ -408,12 +409,8 @@ In your code:
 const DisplayWidth = @import("DisplayWidth");
 test "Display width" {
-    // We need Unicode data for display width calculation.
+    const dw = try DisplayWidth.init(allocator);
-    const dwd = try DisplayWidth.DisplayWidthData.init(allocator);
+    defer dw.deinit(allocator);
-    defer dwd.deinit(allocator);
-    // The `DisplayWidth` structure takes a pointer to the data.
-    const dw = DisplayWidth{ .data = &dwd };
    // String display width
    try expectEqual(@as(usize, 5), dw.strWidth("Hello\r\n"));
@@ -462,34 +459,43 @@ const zg = b.dependency("zg", .{
 });
 ```
-The other options are `c0_width` and `c1_width`.  The standard behavior is to treat C0 and C1 control codes as zero-width, except for delete and backspace, which are -1 (the logic ensures that a `strWidth` is always at least 0).  If printing control codes with replacement characters, it's necessary to assign these a width, hence the options.  When provided these values must fit in an `i4`, this allows for C1s to be printed as `\u{80}` if desired.
+The other options are `c0_width` and `c1_width`.  The standard behavior is to treat
+C0 and C1 control codes as zero-width, except for delete and backspace, which are
+-1 (the logic ensures that a `strWidth` is always at least 0).  If printing
+control codes with replacement characters, it's necessary to assign these a width,
+hence the options.  When provided these values must fit in an `i4`, this allows
+for C1s to be printed as `\u{80}` if desired.
+`DisplayWidth` uses the `Graphemes` module internally.  If you already have one,
+it can be borrowed using `DisplayWidth.initWithGraphemes(allocator, graphemes)`
+in the same fashion as shown for `CaseFolding` and `Normalize`.
 ## Scripts
 Unicode categorizes code points by the Script in which they belong. A Script
 collects letters and other symbols that belong to a particular writing system.
-You can detect the Script for a code point with the `ScriptsData` module.
+You can detect the Script for a code point with the `Scripts` module.
 In your `build.zig`:
 ```zig
-exe.root_module.addImport("ScriptsData", zg.module("ScriptsData"));
+exe.root_module.addImport("Scripts", zg.module("Scripts"));
 ```
 In your code:
 ```zig
-const ScriptsData = @import("ScriptsData");
+const Scripts= @import("Scripts");
 test "Scripts" {
-    const sd = try ScriptsData.init(allocator);
+    const scripts = try Scripts.init(allocator);
-    defer sd.deinit(allocator);
+    defer scripts.deinit(allocator);
    // To see the full list of Scripts, look at the
-    // `src/ScriptsData.zig` file. They are list in an enum.
+    // `src/Scripts.zig` file. They are list in an enum.
-    try expect(sd.script('A') == .Latin);
+    try expect(scripts.script('A') == .Latin);
-    try expect(sd.script('Ω') == .Greek);
+    try expect(scripts.script('Ω') == .Greek);
-    try expect(sd.script('צ') == .Hebrew);
+    try expect(scripts.script('צ') == .Hebrew);
 }
 ```
diff --git a/src/DisplayWidth.zig b/src/DisplayWidth.zig
index 11ec59e..c0d6d96 100644
--- a/src/DisplayWidth.zig
+++ b/src/DisplayWidth.zig
@@ -13,10 +13,10 @@ pub const DisplayWidthData = @import("DisplayWidthData");
 const Graphemes = @import("Graphemes");
-g_data: Graphemes,
+graphemes: Graphemes,
 s1: []u16 = undefined,
 s2: []i4 = undefined,
-owns_gdata: bool,
+owns_graphemes: bool,
 const DisplayWidth = @This();
@@ -26,16 +26,16 @@ pub fn init(allocator: mem.Allocator) mem.Allocator.Error!DisplayWidth {
        allocator.free(dw.s1);
        allocator.free(dw.s2);
    }
-    dw.owns_gdata = true;
+    dw.owns_graphemes = true;
-    dw.g_data = try Graphemes.init(allocator);
+    dw.graphemes = try Graphemes.init(allocator);
-    errdefer dw.g_data.deinit(allocator);
+    errdefer dw.graphemes.deinit(allocator);
    return dw;
 }
-pub fn initWithGraphemeData(allocator: mem.Allocator, g_data: Graphemes) mem.Allocator.Error!DisplayWidth {
+pub fn initWithGraphemes(allocator: mem.Allocator, graphemes: Graphemes) mem.Allocator.Error!DisplayWidth {
    var dw = try DisplayWidth.setup(allocator);
-    dw.g_data = g_data;
+    dw.graphemes = graphemes;
-    dw.owns_gdata = false;
+    dw.owns_graphemes = false;
    return dw;
 }
@@ -67,7 +67,7 @@ fn setup(allocator: mem.Allocator) mem.Allocator.Error!DisplayWidth {
 pub fn deinit(dw: *const DisplayWidth, allocator: mem.Allocator) void {
    allocator.free(dw.s1);
    allocator.free(dw.s2);
-    if (dw.owns_gdata) dw.g_data.deinit(allocator);
+    if (dw.owns_graphemes) dw.graphemes.deinit(allocator);
 }
 /// codePointWidth returns the number of cells `cp` requires when rendered
@@ -119,7 +119,7 @@ pub fn strWidth(dw: DisplayWidth, str: []const u8) usize {
        return @intCast(@max(0, total));
    }
-    var giter = dw.g_data.iterator(str);
+    var giter = dw.graphemes.iterator(str);
    while (giter.next()) |gc| {
        var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) };
author	Sam Atman	2025-04-30 15:59:56 -0400
committer	Sam Atman	2025-04-30 15:59:56 -0400
commit	3a6cfa885697b5fecf0473b602dc38a0af0d3f7d (patch)
tree	64eacca86ec8302bf966e4731e125243067d8ba9
parent	Rest of the Renamings (diff)
download	zg-3a6cfa885697b5fecf0473b602dc38a0af0d3f7d.tar.gz zg-3a6cfa885697b5fecf0473b602dc38a0af0d3f7d.tar.xz zg-3a6cfa885697b5fecf0473b602dc38a0af0d3f7d.zip