diff options
| -rw-r--r-- | README.md | 537 | ||||
| -rw-r--r-- | build.zig | 2 | ||||
| -rw-r--r-- | codegen/canon.zig | 5 | ||||
| -rw-r--r-- | codegen/case_prop.zig | 5 | ||||
| -rw-r--r-- | codegen/ccc.zig | 5 | ||||
| -rw-r--r-- | codegen/compat.zig | 5 | ||||
| -rw-r--r-- | codegen/core_props.zig | 5 | ||||
| -rw-r--r-- | codegen/dwp.zig | 5 | ||||
| -rw-r--r-- | codegen/fold.zig | 5 | ||||
| -rw-r--r-- | codegen/gbp.zig | 5 | ||||
| -rw-r--r-- | codegen/gencat.zig | 5 | ||||
| -rw-r--r-- | codegen/hangul.zig | 5 | ||||
| -rw-r--r-- | codegen/lower.zig | 5 | ||||
| -rw-r--r-- | codegen/normp.zig | 5 | ||||
| -rw-r--r-- | codegen/numeric.zig | 5 | ||||
| -rw-r--r-- | codegen/props.zig | 5 | ||||
| -rw-r--r-- | codegen/scripts.zig | 5 | ||||
| -rw-r--r-- | codegen/upper.zig | 5 | ||||
| -rw-r--r-- | src/CanonData.zig | 5 | ||||
| -rw-r--r-- | src/CaseData.zig | 11 | ||||
| -rw-r--r-- | src/CaseFold.zig | 8 | ||||
| -rw-r--r-- | src/CombiningData.zig | 5 | ||||
| -rw-r--r-- | src/CompatData.zig | 5 | ||||
| -rw-r--r-- | src/FoldData.zig | 5 | ||||
| -rw-r--r-- | src/GenCatData.zig | 5 | ||||
| -rw-r--r-- | src/GraphemeData.zig | 5 | ||||
| -rw-r--r-- | src/HangulData.zig | 5 | ||||
| -rw-r--r-- | src/NormPropsData.zig | 5 | ||||
| -rw-r--r-- | src/Normalize.zig | 41 | ||||
| -rw-r--r-- | src/PropsData.zig | 11 | ||||
| -rw-r--r-- | src/ScriptsData.zig | 7 | ||||
| -rw-r--r-- | src/WidthData.zig | 5 |
32 files changed, 606 insertions, 136 deletions
diff --git a/README.md b/README.md new file mode 100644 index 0000000..d4fc8f6 --- /dev/null +++ b/README.md | |||
| @@ -0,0 +1,537 @@ | |||
| 1 | # zg | ||
| 2 | zg provides Unicode text processing for Zig projects. | ||
| 3 | |||
| 4 | ## Unicode Version | ||
| 5 | The Unicode version supported by zg is 15.1.0. | ||
| 6 | |||
| 7 | ## Zig Version | ||
| 8 | The minimum Zig version required is 0.12.0-dev.3496+a2df84d0. | ||
| 9 | |||
| 10 | ## Integrating zg into your Zig Project | ||
| 11 | You first need to add zg as a dependency in your `build.zig.zon` file: | ||
| 12 | |||
| 13 | ```zig | ||
| 14 | .zg = .{ | ||
| 15 | .url = "https://codeberg.org/dude_the_builder/zg/archive/v0.1.0.tar.gz", | ||
| 16 | } | ||
| 17 | ``` | ||
| 18 | |||
| 19 | Then instantiate the dependency in your `build.zig`: | ||
| 20 | |||
| 21 | |||
| 22 | ```zig | ||
| 23 | const zg = b.dependency("zg", .{}); | ||
| 24 | ``` | ||
| 25 | |||
| 26 | ## A Modular Approach | ||
| 27 | zg is a modular library. This approach minimizes binary file size and memory | ||
| 28 | requirements by only including the Unicode data required for the specified module. | ||
| 29 | The following sections describe the various modules and their specific use case. | ||
| 30 | |||
| 31 | ## Code Points | ||
| 32 | In the `code_point` module, you'll find a data structure representing a single code | ||
| 33 | point, `CodePoint`, and an `Iterator` to iterate over the code points in a string. | ||
| 34 | |||
| 35 | In your `build.zig`: | ||
| 36 | |||
| 37 | ```zig | ||
| 38 | exe.root_module.addImport("code_point", zg.module("code_point")); | ||
| 39 | ``` | ||
| 40 | |||
| 41 | In your code: | ||
| 42 | |||
| 43 | ```zig | ||
| 44 | const code_point = @import("code_point"); | ||
| 45 | |||
| 46 | test "Code point iterator" { | ||
| 47 | const str = "Hi 😊"; | ||
| 48 | var iter = code_point.Iterator{ .bytes = str }; | ||
| 49 | var i: usize = 0; | ||
| 50 | |||
| 51 | while (iter.next()) |cp| : (i += 1) { | ||
| 52 | // The `code` field is the actual code point scalar as a `u21`. | ||
| 53 | if (i == 0) try expect(cp.code == 'H'); | ||
| 54 | if (i == 1) try expect(cp.code == 'i'); | ||
| 55 | if (i == 2) try expect(cp.code == ' '); | ||
| 56 | |||
| 57 | if (i == 3) { | ||
| 58 | try expect(cp.code == '😊'); | ||
| 59 | |||
| 60 | // The `offset` field is the byte offset in the | ||
| 61 | // source string. | ||
| 62 | try expect(cp.offset == 3); | ||
| 63 | |||
| 64 | // The `len` field is the length in bytes of the | ||
| 65 | // code point in the source string. | ||
| 66 | try expect(cp.len == 4); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | } | ||
| 70 | ``` | ||
| 71 | |||
| 72 | ## Grapheme Clusters | ||
| 73 | Many characters are composed from more than one code point. These are known as | ||
| 74 | Grapheme Clusters and the `grapheme` module has a data structure to represent | ||
| 75 | them, `Grapheme`, and an `Iterator` to iterate over them in a string. | ||
| 76 | |||
| 77 | In your `build.zig`: | ||
| 78 | |||
| 79 | ```zig | ||
| 80 | exe.root_module.addImport("grapheme", zg.module("grapheme")); | ||
| 81 | ``` | ||
| 82 | |||
| 83 | In your code: | ||
| 84 | |||
| 85 | ```zig | ||
| 86 | const grapheme = @import("grapheme"); | ||
| 87 | |||
| 88 | test "Grapheme cluster iterator" { | ||
| 89 | // we need some Unicode data to process Grapheme Clusters. | ||
| 90 | const gd = try grapheme.GraphemeData.init(allocator); | ||
| 91 | defer gd.deinit(); | ||
| 92 | |||
| 93 | const str = "He\u{301}"; // Hé | ||
| 94 | var iter = grapheme.Iterator.init(str, &gd); | ||
| 95 | |||
| 96 | var i: usize = 0; | ||
| 97 | |||
| 98 | while (iter.next()) |gc| : (i += 1) { | ||
| 99 | // The `len` field is the length in bytes of the | ||
| 100 | // grapheme cluster in the source string. | ||
| 101 | if (i == 0) try expect(gc.len == 1); | ||
| 102 | |||
| 103 | if (i == 1) { | ||
| 104 | try expect(gc.len == 3); | ||
| 105 | |||
| 106 | // The `offset` in bytes of the grapheme cluster | ||
| 107 | // in the source string. | ||
| 108 | try expect(gc.offset == 1); | ||
| 109 | |||
| 110 | // The `bytes` method returns the slice of bytes | ||
| 111 | // that comprise this grapheme cluster in the | ||
| 112 | // source string `str`. | ||
| 113 | try expectEqualStrings("e\u{301}", gc.bytes(str)); | ||
| 114 | } | ||
| 115 | } | ||
| 116 | } | ||
| 117 | ``` | ||
| 118 | |||
| 119 | ## Unicode General Categories | ||
| 120 | To detect the general category for a code point, use the `GenCatData` module. | ||
| 121 | |||
| 122 | In your `build.zig`: | ||
| 123 | |||
| 124 | ```zig | ||
| 125 | exe.root_module.addImport("GenCatData", zg.module("GenCatData")); | ||
| 126 | ``` | ||
| 127 | |||
| 128 | In your code: | ||
| 129 | |||
| 130 | ```zig | ||
| 131 | const GenCatData = @import("GenCatData"); | ||
| 132 | |||
| 133 | test "General Category" { | ||
| 134 | const gcd = try GenCatData.init(allocator); | ||
| 135 | defer gcd.deinit(); | ||
| 136 | |||
| 137 | // The `gc` method returns the abbreviated General Category. | ||
| 138 | // These abbreviations and descriptive comments can be found | ||
| 139 | // in the source file `src/GenCatData.zig` as en enum. | ||
| 140 | try expect(gcd.gc('A') == .Lu); // Lu: uppercase letter | ||
| 141 | try expect(gcd.gc('3') == .Nd); // Nd: decimal number | ||
| 142 | |||
| 143 | // The following are convenience methods for groups of General | ||
| 144 | // Categories. For example, all letter categories start with `L`: | ||
| 145 | // Lu, Ll, Lt, Lo. | ||
| 146 | try expect(gcd.isControl(0)); | ||
| 147 | try expect(gcd.isLetter('z')); | ||
| 148 | try expect(gcd.isMark('\u{301}')); | ||
| 149 | try expect(gcd.isNumber('3')); | ||
| 150 | try expect(gcd.isPunctuation('[')); | ||
| 151 | try expect(gcd.isSeparator(' ')); | ||
| 152 | try expect(gcd.isSymbol('©')); | ||
| 153 | } | ||
| 154 | ``` | ||
| 155 | |||
| 156 | ## Unicode Properties | ||
| 157 | You can detect common properties of a code point with the `PropsData` module. | ||
| 158 | |||
| 159 | In your `build.zig`: | ||
| 160 | |||
| 161 | ```zig | ||
| 162 | exe.root_module.addImport("PropsData", zg.module("PropsData")); | ||
| 163 | ``` | ||
| 164 | |||
| 165 | In your code: | ||
| 166 | |||
| 167 | ```zig | ||
| 168 | const PropsData = @import("PropsData"); | ||
| 169 | |||
| 170 | test "Properties" { | ||
| 171 | const pd = try PropsData.init(allocator); | ||
| 172 | defer pd.deinit(); | ||
| 173 | |||
| 174 | // Mathematical symbols and letters. | ||
| 175 | try expect(pd.isMath('+')); | ||
| 176 | // Alphabetic only code points. | ||
| 177 | try expect(pd.isAlphabetic('Z')); | ||
| 178 | // Space, tab, and other separators. | ||
| 179 | try expect(pd.isWhitespace(' ')); | ||
| 180 | // Hexadecimal digits and variations thereof. | ||
| 181 | try expect(pd.isHexDigit('f')); | ||
| 182 | try expect(!pd.isHexDigit('z')); | ||
| 183 | |||
| 184 | // Accents, dieresis, and other combining marks. | ||
| 185 | try expect(pd.isDiacritic('\u{301}')); | ||
| 186 | |||
| 187 | // Unicode has a specification for valid identifiers like | ||
| 188 | // the ones used in programming and regular expressions. | ||
| 189 | try expect(pd.isIdStart('Z')); // Identifier start character | ||
| 190 | try expect(!pd.isIdStart('1')); | ||
| 191 | try expect(pd.isIdContinue('1')); | ||
| 192 | |||
| 193 | // The `X` versions add some code points that can appear after | ||
| 194 | // normalizing a string. | ||
| 195 | try expect(pd.isXidStart('\u{b33}')); // Extended identifier start character | ||
| 196 | try expect(pd.isXidContinue('\u{e33}')); | ||
| 197 | try expect(!pd.isXidStart('1')); | ||
| 198 | |||
| 199 | // Note surprising Unicode numeric type properties! | ||
| 200 | try expect(pd.isNumeric('\u{277f}')); | ||
| 201 | try expect(!pd.isNumeric('3')); // 3 is not numeric! | ||
| 202 | try expect(pd.isDigit('\u{2070}')); | ||
| 203 | try expect(!pd.isDigit('3')); // 3 is not a digit! | ||
| 204 | try expect(pd.isDecimal('3')); // 3 is a decimal digit | ||
| 205 | } | ||
| 206 | ``` | ||
| 207 | |||
| 208 | ## Letter Case Detection and Conversion | ||
| 209 | To detect and convert to and from different letter cases, use the `CaseData` | ||
| 210 | module. | ||
| 211 | |||
| 212 | In your `build.zig`: | ||
| 213 | |||
| 214 | ```zig | ||
| 215 | exe.root_module.addImport("CaseData", zg.module("CaseData")); | ||
| 216 | ``` | ||
| 217 | |||
| 218 | In your code: | ||
| 219 | |||
| 220 | ```zig | ||
| 221 | const CaseData = @import("CaseData"); | ||
| 222 | |||
| 223 | test "Case" { | ||
| 224 | const cd = try CaseData.init(allocator); | ||
| 225 | defer cd.deinit(); | ||
| 226 | |||
| 227 | // Upper and lower case. | ||
| 228 | try expect(cd.isUpper('A')); | ||
| 229 | try expect('A' == cd.toUpper('a')); | ||
| 230 | try expect(cd.isLower('a')); | ||
| 231 | try expect('a' == cd.toLower('A')); | ||
| 232 | |||
| 233 | // Code points that have case. | ||
| 234 | try expect(cd.isCased('É')); | ||
| 235 | try expect(!cd.isCased('3')); | ||
| 236 | |||
| 237 | // Case detection and conversion for strings. | ||
| 238 | try expect(cd.isUpperStr("HELLO 123!")); | ||
| 239 | const ucased = try cd.toUpperStr(allocator, "hello 123"); | ||
| 240 | defer allocator.free(ucased); | ||
| 241 | try expectEqualStrings("HELLO 123", ucased); | ||
| 242 | |||
| 243 | try expect(cd.isLowerStr("hello 123!")); | ||
| 244 | const lcased = try cd.toLowerStr(allocator, "HELLO 123"); | ||
| 245 | defer allocator.free(lcased); | ||
| 246 | try expectEqualStrings("hello 123", lcased); | ||
| 247 | } | ||
| 248 | ``` | ||
| 249 | |||
| 250 | ## Normalization | ||
| 251 | Unicode normalization is the process of converting a string into a uniform | ||
| 252 | representation that can guarantee a known structure by following a strict set | ||
| 253 | of rules. There are four normalization forms: | ||
| 254 | |||
| 255 | Canonical Composition (NFC) | ||
| 256 | : The most compact representation obtained by first | ||
| 257 | decomposing to Canonical Decomposition and then composing to NFC. | ||
| 258 | |||
| 259 | Compatibility Composition (NFKC) | ||
| 260 | : The most comprehensive composition obtained | ||
| 261 | by first decomposing to Compatibility Decomposition and then composing to NFKC. | ||
| 262 | |||
| 263 | Canonical Decomposition (NFD) | ||
| 264 | : Only code points with canonical decompositions | ||
| 265 | are decomposed. This is a more compact and faster decomposition but will not | ||
| 266 | provide the most comprehensive normalization possible. | ||
| 267 | |||
| 268 | Compatibility Decomposition (NFKD) | ||
| 269 | : The most comprehensive decomposition method | ||
| 270 | where both canonical and compatibility decompositions are performed recursively. | ||
| 271 | |||
| 272 | zg has methods to produce all four normalization forms in the `Normalize` module. | ||
| 273 | |||
| 274 | In your `build.zig`: | ||
| 275 | |||
| 276 | ```zig | ||
| 277 | exe.root_module.addImport("Normalize", zg.module("Normalize")); | ||
| 278 | ``` | ||
| 279 | |||
| 280 | In your code: | ||
| 281 | |||
| 282 | ```zig | ||
| 283 | const Normalize = @import("Normalize"); | ||
| 284 | |||
| 285 | test "Normalization" { | ||
| 286 | // We need lots of Unicode dta for normalization. | ||
| 287 | var norm_data = try Normalize.NormData.init(allocator); | ||
| 288 | defer norm_data.deinit(); | ||
| 289 | |||
| 290 | // The `Normalize` structure takes a pointer to the data. | ||
| 291 | const n = Normalize{ .norm_data = &norm_data }; | ||
| 292 | |||
| 293 | // NFC: Canonical composition | ||
| 294 | const nfc_result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); | ||
| 295 | defer nfc_result.deinit(); | ||
| 296 | try expectEqualStrings("Complex char: \u{3D3}", nfc_result.slice); | ||
| 297 | |||
| 298 | // NFKC: Compatibility composition | ||
| 299 | const nfkc_result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); | ||
| 300 | defer nfkc_result.deinit(); | ||
| 301 | try expectEqualStrings("Complex char: \u{038E}", nfkc_result.slice); | ||
| 302 | |||
| 303 | // NFD: Canonical decomposition | ||
| 304 | const nfd_result = try n.nfd(allocator, "Héllo World! \u{3d3}"); | ||
| 305 | defer nfd_result.deinit(); | ||
| 306 | try expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", nfd_result.slice); | ||
| 307 | |||
| 308 | // NFKD: Compatibility decomposition | ||
| 309 | const nfkd_result = try n.nfkd(allocator, "Héllo World! \u{3d3}"); | ||
| 310 | defer nfkd_result.deinit(); | ||
| 311 | try expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", nfkd_result.slice); | ||
| 312 | |||
| 313 | // Test for equality of two strings after normalizing to NFC. | ||
| 314 | try expect(try n.eql(allocator, "foé", "foe\u{0301}")); | ||
| 315 | try expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); | ||
| 316 | } | ||
| 317 | ``` | ||
| 318 | |||
| 319 | ## Caseless Matching via Case Folding | ||
| 320 | Unicode provides a more efficient way of comparing strings while ignoring letter | ||
| 321 | case differences: case folding. When you case fold a string, it's converted into a | ||
| 322 | normalized case form suitable for efficient matching. Use the `CaseFold` module | ||
| 323 | for this. | ||
| 324 | |||
| 325 | In your `build.zig`: | ||
| 326 | |||
| 327 | ```zig | ||
| 328 | exe.root_module.addImport("Normalize", zg.module("Normalize")); | ||
| 329 | exe.root_module.addImport("CaseFold", zg.module("CaseFold")); | ||
| 330 | ``` | ||
| 331 | |||
| 332 | In your code: | ||
| 333 | |||
| 334 | ```zig | ||
| 335 | const Normalize = @import("Normalize"); | ||
| 336 | const CaseFold = @import("CaseFold"); | ||
| 337 | |||
| 338 | test "Caseless matching" { | ||
| 339 | // We need to normalize during the matching process. | ||
| 340 | var norm_data = try Normalize.NormData.init(allocator); | ||
| 341 | defer norm_data.deinit(); | ||
| 342 | const n = Normalize{ .norm_data = &norm_data }; | ||
| 343 | |||
| 344 | // We need Unicode case fold data. | ||
| 345 | const cfd = try CaseFold.FoldData.init(allocator); | ||
| 346 | defer cfd.deinit(); | ||
| 347 | |||
| 348 | // The `CaseFold` structure takes a pointer to the data. | ||
| 349 | const cf = CaseFold{ .fold_data = &cfd }; | ||
| 350 | |||
| 351 | // `compatCaselessMatch` provides the deepest level of caseless | ||
| 352 | // matching because it decomposes fully to NFKD. | ||
| 353 | const a = "Héllo World! \u{3d3}"; | ||
| 354 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; | ||
| 355 | try expect(try cf.compatCaselessMatch(allocator, &n, a, b)); | ||
| 356 | |||
| 357 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; | ||
| 358 | try expect(try cf.compatCaselessMatch(allocator, &n, a, c)); | ||
| 359 | |||
| 360 | // `canonCaselessMatch` isn't as comprehensive as `compatCaselessMatch` | ||
| 361 | // because it only decomposes to NFD. Naturally, it's faster because of this. | ||
| 362 | try expect(!try cf.canonCaselessMatch(allocator, &n, a, b)); | ||
| 363 | try expect(try cf.canonCaselessMatch(allocator, &n, a, c)); | ||
| 364 | } | ||
| 365 | ``` | ||
| 366 | |||
| 367 | ## Display Width of Characters and Strings | ||
| 368 | When displaying text with a fixed-width font on a terminal screen, it's very | ||
| 369 | important to know exactly how many columns or cells each character should take. | ||
| 370 | Most characters will use one column, but there are many, like emoji and East- | ||
| 371 | Asian ideographs that need more space. The `DisplayWidth` module provides | ||
| 372 | methods for this purpose. It also has methods that use the display width calculation | ||
| 373 | to `center`, `padLeft`, `padRight`, and `wrap` text. | ||
| 374 | |||
| 375 | In your `build.zig`: | ||
| 376 | |||
| 377 | ```zig | ||
| 378 | exe.root_module.addImport("DisplayWidth", zg.module("DisplayWidth")); | ||
| 379 | ``` | ||
| 380 | |||
| 381 | In your code: | ||
| 382 | |||
| 383 | ```zig | ||
| 384 | const DisplayWidth = @import("DisplayWidth"); | ||
| 385 | |||
| 386 | test "Display width" { | ||
| 387 | // We need Unicode data for display width calculation. | ||
| 388 | const dwd = try DisplayWidth.DisplayWidthData.init(allocator); | ||
| 389 | defer dwd.deinit(); | ||
| 390 | |||
| 391 | // The `DisplayWidth` structure takes a pointer to the data. | ||
| 392 | const dw = DisplayWidth{ .data = &dwd }; | ||
| 393 | |||
| 394 | // String display width | ||
| 395 | try expectEqual(@as(usize, 5), dw.strWidth("Hello\r\n")); | ||
| 396 | try expectEqual(@as(usize, 8), dw.strWidth("Hello 😊")); | ||
| 397 | try expectEqual(@as(usize, 8), dw.strWidth("Héllo 😊")); | ||
| 398 | try expectEqual(@as(usize, 9), dw.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ")); | ||
| 399 | try expectEqual(@as(usize, 17), dw.strWidth("슬라바 우크라이나")); | ||
| 400 | |||
| 401 | // Centering text | ||
| 402 | const centered = try dw.center(allocator, "w😊w", 10, "-"); | ||
| 403 | defer allocator.free(centered); | ||
| 404 | try expectEqualStrings("---w😊w---", centered); | ||
| 405 | |||
| 406 | // Pad left | ||
| 407 | const right_aligned = try dw.padLeft(allocator, "abc", 9, "*"); | ||
| 408 | defer allocator.free(right_aligned); | ||
| 409 | try expectEqualStrings("******abc", right_aligned); | ||
| 410 | |||
| 411 | // Pad right | ||
| 412 | const left_aligned = try dw.padRight(allocator, "abc", 9, "*"); | ||
| 413 | defer allocator.free(left_aligned); | ||
| 414 | try expectEqualStrings("abc******", left_aligned); | ||
| 415 | |||
| 416 | // Wrap text | ||
| 417 | const input = "The quick brown fox\r\njumped over the lazy dog!"; | ||
| 418 | const wrapped = try dw.wrap(allocator, input, 10, 3); | ||
| 419 | defer allocator.free(wrapped); | ||
| 420 | const want = | ||
| 421 | \\The quick | ||
| 422 | \\brown fox | ||
| 423 | \\jumped | ||
| 424 | \\over the | ||
| 425 | \\lazy dog! | ||
| 426 | ; | ||
| 427 | try expectEqualStrings(want, wrapped); | ||
| 428 | } | ||
| 429 | ``` | ||
| 430 | |||
| 431 | ## Scripts | ||
| 432 | Unicode categorizes code points by the Script in which they belong. A Script | ||
| 433 | collects letters and other symbols that belong to a particular writing system. | ||
| 434 | You can detect the Script for a code point with the `ScriptsData` module. | ||
| 435 | |||
| 436 | In your `build.zig`: | ||
| 437 | |||
| 438 | ```zig | ||
| 439 | exe.root_module.addImport("ScriptsData", zg.module("ScriptsData")); | ||
| 440 | ``` | ||
| 441 | |||
| 442 | In your code: | ||
| 443 | |||
| 444 | ```zig | ||
| 445 | const ScriptsData = @import("ScriptsData"); | ||
| 446 | |||
| 447 | test "Scripts" { | ||
| 448 | const sd = try ScriptsData.init(allocator); | ||
| 449 | defer sd.deinit(); | ||
| 450 | |||
| 451 | // To see the full list of Scripts, look at the | ||
| 452 | // `src/ScriptsData.zig` file. They are list in an enum. | ||
| 453 | try expect(sd.script('A') == .Latin); | ||
| 454 | try expect(sd.script('Ω') == .Greek); | ||
| 455 | try expect(sd.script('צ') == .Hebrew); | ||
| 456 | } | ||
| 457 | ``` | ||
| 458 | |||
| 459 | ## Relation to Ziglyph | ||
| 460 | zg is a total re-write of some of the components of Ziglyph. The idea was to | ||
| 461 | reduce binary size and improve performance. These goals were achieved by using | ||
| 462 | trie-like data structures instead of generated functions. Where Ziglyph uses a | ||
| 463 | function call, zg uses an array lookup, which is quite faster. In addition, all | ||
| 464 | these data structures in zg are loaded at runtime from compressed versions in the | ||
| 465 | binary. This allows for smaller binary sizes at the expense of increased memory | ||
| 466 | footprint at runtime. | ||
| 467 | |||
| 468 | Benchmarks demonstrate the above stated goals have been met: | ||
| 469 | |||
| 470 | ```plain | ||
| 471 | Binary sizes ======= | ||
| 472 | |||
| 473 | 149K ziglyph_case | ||
| 474 | 87K zg_case | ||
| 475 | |||
| 476 | 275K ziglyph_caseless | ||
| 477 | 168K zg_caseless | ||
| 478 | |||
| 479 | 68K ziglyph_codepoint | ||
| 480 | 68K zg_codepoint | ||
| 481 | |||
| 482 | 101K ziglyph_grapheme | ||
| 483 | 86K zg_grapheme | ||
| 484 | |||
| 485 | 185K ziglyph_normalizer | ||
| 486 | 152K zg_normalize | ||
| 487 | |||
| 488 | 101K ziglyph_width | ||
| 489 | 86K zg_width | ||
| 490 | |||
| 491 | Benchmarks ========== | ||
| 492 | |||
| 493 | Ziglyph toUpperStr/toLowerStr: result: 7911596, took: 80 | ||
| 494 | Ziglyph isUpperStr/isLowerStr: result: 110959, took: 17 | ||
| 495 | zg toUpperStr/toLowerStr: result: 7911596, took: 62 | ||
| 496 | zg isUpperStr/isLowerStr: result: 110959, took: 7 | ||
| 497 | |||
| 498 | Ziglyph Normalizer.eqlCaseless: result: 625, took: 500 | ||
| 499 | zg CaseFold.canonCaselessMatch: result: 625, took: 385 | ||
| 500 | zg CaseFold.compatCaselessMatch: result: 625, took: 593 | ||
| 501 | |||
| 502 | Ziglyph CodePointIterator: result: 3769314, took: 2 | ||
| 503 | zg CodePointIterator: result: 3769314, took: 3 | ||
| 504 | |||
| 505 | Ziglyph GraphemeIterator: result: 3691806, took: 48 | ||
| 506 | zg GraphemeIterator: result: 3691806, took: 16 | ||
| 507 | |||
| 508 | Ziglyph Normalizer.nfkc: result: 3934162, took: 416 | ||
| 509 | zg Normalize.nfkc: result: 3934162, took: 182 | ||
| 510 | |||
| 511 | Ziglyph Normalizer.nfc: result: 3955798, took: 57 | ||
| 512 | zg Normalize.nfc: result: 3955798, took: 28 | ||
| 513 | |||
| 514 | Ziglyph Normalizer.nfkd: result: 4006398, took: 172 | ||
| 515 | zg Normalize.nfkd: result: 4006398, took: 104 | ||
| 516 | |||
| 517 | Ziglyph Normalizer.nfd: result: 4028034, took: 169 | ||
| 518 | zg Normalize.nfd: result: 4028034, took: 104 | ||
| 519 | |||
| 520 | Ziglyph Normalizer.eql: result: 625, took: 337 | ||
| 521 | Zg Normalize.eql: result: 625, took: 53 | ||
| 522 | |||
| 523 | Ziglyph display_width.strWidth: result: 3700914, took: 71 | ||
| 524 | zg DisplayWidth.strWidth: result: 3700914, took: 24 | ||
| 525 | ``` | ||
| 526 | |||
| 527 | These results were obtained on an M1 Mac with 16 GiB of RAM. | ||
| 528 | |||
| 529 | In contrast to Ziglyph, zg does not have: | ||
| 530 | |||
| 531 | - Word segmentation | ||
| 532 | - Sentence segmentation | ||
| 533 | - Collation | ||
| 534 | |||
| 535 | It's possible that any missing functionality will be added in future versions, | ||
| 536 | but only if enough demand is present in the community. | ||
| 537 | |||
| @@ -315,7 +315,7 @@ pub fn build(b: *std.Build) void { | |||
| 315 | scripts_data.addAnonymousImport("scripts", .{ .root_source_file = scripts_gen_out }); | 315 | scripts_data.addAnonymousImport("scripts", .{ .root_source_file = scripts_gen_out }); |
| 316 | 316 | ||
| 317 | // Properties | 317 | // Properties |
| 318 | const props_data = b.addModule("ScriptsData", .{ | 318 | const props_data = b.addModule("PropsData", .{ |
| 319 | .root_source_file = .{ .path = "src/PropsData.zig" }, | 319 | .root_source_file = .{ .path = "src/PropsData.zig" }, |
| 320 | .target = target, | 320 | .target = target, |
| 321 | .optimize = optimize, | 321 | .optimize = optimize, |
diff --git a/codegen/canon.zig b/codegen/canon.zig index 9c84bfc..28b7f28 100644 --- a/codegen/canon.zig +++ b/codegen/canon.zig | |||
| @@ -17,11 +17,10 @@ pub fn main() !void { | |||
| 17 | _ = args_iter.skip(); | 17 | _ = args_iter.skip(); |
| 18 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | 18 | const output_path = args_iter.next() orelse @panic("No output file arg!"); |
| 19 | 19 | ||
| 20 | const compressor = std.compress.deflate.compressor; | 20 | const compressor = std.compress.flate.deflate.compressor; |
| 21 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | 21 | var out_file = try std.fs.cwd().createFile(output_path, .{}); |
| 22 | defer out_file.close(); | 22 | defer out_file.close(); |
| 23 | var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression }); | 23 | var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); |
| 24 | defer out_comp.deinit(); | ||
| 25 | const writer = out_comp.writer(); | 24 | const writer = out_comp.writer(); |
| 26 | 25 | ||
| 27 | const endian = builtin.cpu.arch.endian(); | 26 | const endian = builtin.cpu.arch.endian(); |
diff --git a/codegen/case_prop.zig b/codegen/case_prop.zig index ce7ee0d..6c912a8 100644 --- a/codegen/case_prop.zig +++ b/codegen/case_prop.zig | |||
| @@ -118,11 +118,10 @@ pub fn main() !void { | |||
| 118 | _ = args_iter.skip(); | 118 | _ = args_iter.skip(); |
| 119 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | 119 | const output_path = args_iter.next() orelse @panic("No output file arg!"); |
| 120 | 120 | ||
| 121 | const compressor = std.compress.deflate.compressor; | 121 | const compressor = std.compress.flate.deflate.compressor; |
| 122 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | 122 | var out_file = try std.fs.cwd().createFile(output_path, .{}); |
| 123 | defer out_file.close(); | 123 | defer out_file.close(); |
| 124 | var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression }); | 124 | var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); |
| 125 | defer out_comp.deinit(); | ||
| 126 | const writer = out_comp.writer(); | 125 | const writer = out_comp.writer(); |
| 127 | 126 | ||
| 128 | const endian = builtin.cpu.arch.endian(); | 127 | const endian = builtin.cpu.arch.endian(); |
diff --git a/codegen/ccc.zig b/codegen/ccc.zig index fd278ea..a01c8d2 100644 --- a/codegen/ccc.zig +++ b/codegen/ccc.zig | |||
| @@ -107,11 +107,10 @@ pub fn main() !void { | |||
| 107 | _ = args_iter.skip(); | 107 | _ = args_iter.skip(); |
| 108 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | 108 | const output_path = args_iter.next() orelse @panic("No output file arg!"); |
| 109 | 109 | ||
| 110 | const compressor = std.compress.deflate.compressor; | 110 | const compressor = std.compress.flate.deflate.compressor; |
| 111 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | 111 | var out_file = try std.fs.cwd().createFile(output_path, .{}); |
| 112 | defer out_file.close(); | 112 | defer out_file.close(); |
| 113 | var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression }); | 113 | var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); |
| 114 | defer out_comp.deinit(); | ||
| 115 | const writer = out_comp.writer(); | 114 | const writer = out_comp.writer(); |
| 116 | 115 | ||
| 117 | const endian = builtin.cpu.arch.endian(); | 116 | const endian = builtin.cpu.arch.endian(); |
diff --git a/codegen/compat.zig b/codegen/compat.zig index d0a108a..07616fc 100644 --- a/codegen/compat.zig +++ b/codegen/compat.zig | |||
| @@ -17,11 +17,10 @@ pub fn main() !void { | |||
| 17 | _ = args_iter.skip(); | 17 | _ = args_iter.skip(); |
| 18 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | 18 | const output_path = args_iter.next() orelse @panic("No output file arg!"); |
| 19 | 19 | ||
| 20 | const compressor = std.compress.deflate.compressor; | 20 | const compressor = std.compress.flate.deflate.compressor; |
| 21 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | 21 | var out_file = try std.fs.cwd().createFile(output_path, .{}); |
| 22 | defer out_file.close(); | 22 | defer out_file.close(); |
| 23 | var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression }); | 23 | var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); |
| 24 | defer out_comp.deinit(); | ||
| 25 | const writer = out_comp.writer(); | 24 | const writer = out_comp.writer(); |
| 26 | 25 | ||
| 27 | const endian = builtin.cpu.arch.endian(); | 26 | const endian = builtin.cpu.arch.endian(); |
diff --git a/codegen/core_props.zig b/codegen/core_props.zig index 1f46f9e..f60c7a9 100644 --- a/codegen/core_props.zig +++ b/codegen/core_props.zig | |||
| @@ -121,11 +121,10 @@ pub fn main() !void { | |||
| 121 | _ = args_iter.skip(); | 121 | _ = args_iter.skip(); |
| 122 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | 122 | const output_path = args_iter.next() orelse @panic("No output file arg!"); |
| 123 | 123 | ||
| 124 | const compressor = std.compress.deflate.compressor; | 124 | const compressor = std.compress.flate.deflate.compressor; |
| 125 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | 125 | var out_file = try std.fs.cwd().createFile(output_path, .{}); |
| 126 | defer out_file.close(); | 126 | defer out_file.close(); |
| 127 | var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression }); | 127 | var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); |
| 128 | defer out_comp.deinit(); | ||
| 129 | const writer = out_comp.writer(); | 128 | const writer = out_comp.writer(); |
| 130 | 129 | ||
| 131 | const endian = builtin.cpu.arch.endian(); | 130 | const endian = builtin.cpu.arch.endian(); |
diff --git a/codegen/dwp.zig b/codegen/dwp.zig index 76a14d3..b36b2c9 100644 --- a/codegen/dwp.zig +++ b/codegen/dwp.zig | |||
| @@ -230,11 +230,10 @@ pub fn main() !void { | |||
| 230 | _ = args_iter.skip(); | 230 | _ = args_iter.skip(); |
| 231 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | 231 | const output_path = args_iter.next() orelse @panic("No output file arg!"); |
| 232 | 232 | ||
| 233 | const compressor = std.compress.deflate.compressor; | 233 | const compressor = std.compress.flate.deflate.compressor; |
| 234 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | 234 | var out_file = try std.fs.cwd().createFile(output_path, .{}); |
| 235 | defer out_file.close(); | 235 | defer out_file.close(); |
| 236 | var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression }); | 236 | var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); |
| 237 | defer out_comp.deinit(); | ||
| 238 | const writer = out_comp.writer(); | 237 | const writer = out_comp.writer(); |
| 239 | 238 | ||
| 240 | const endian = builtin.cpu.arch.endian(); | 239 | const endian = builtin.cpu.arch.endian(); |
diff --git a/codegen/fold.zig b/codegen/fold.zig index b3192e7..6dc51ac 100644 --- a/codegen/fold.zig +++ b/codegen/fold.zig | |||
| @@ -63,11 +63,10 @@ pub fn main() !void { | |||
| 63 | _ = args_iter.skip(); | 63 | _ = args_iter.skip(); |
| 64 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | 64 | const output_path = args_iter.next() orelse @panic("No output file arg!"); |
| 65 | 65 | ||
| 66 | const compressor = std.compress.deflate.compressor; | 66 | const compressor = std.compress.flate.deflate.compressor; |
| 67 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | 67 | var out_file = try std.fs.cwd().createFile(output_path, .{}); |
| 68 | defer out_file.close(); | 68 | defer out_file.close(); |
| 69 | var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression }); | 69 | var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); |
| 70 | defer out_comp.deinit(); | ||
| 71 | const writer = out_comp.writer(); | 70 | const writer = out_comp.writer(); |
| 72 | 71 | ||
| 73 | const endian = builtin.cpu.arch.endian(); | 72 | const endian = builtin.cpu.arch.endian(); |
diff --git a/codegen/gbp.zig b/codegen/gbp.zig index 39e0da3..3fc4461 100644 --- a/codegen/gbp.zig +++ b/codegen/gbp.zig | |||
| @@ -227,11 +227,10 @@ pub fn main() !void { | |||
| 227 | _ = args_iter.skip(); | 227 | _ = args_iter.skip(); |
| 228 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | 228 | const output_path = args_iter.next() orelse @panic("No output file arg!"); |
| 229 | 229 | ||
| 230 | const compressor = std.compress.deflate.compressor; | 230 | const compressor = std.compress.flate.deflate.compressor; |
| 231 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | 231 | var out_file = try std.fs.cwd().createFile(output_path, .{}); |
| 232 | defer out_file.close(); | 232 | defer out_file.close(); |
| 233 | var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression }); | 233 | var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); |
| 234 | defer out_comp.deinit(); | ||
| 235 | const writer = out_comp.writer(); | 234 | const writer = out_comp.writer(); |
| 236 | 235 | ||
| 237 | const endian = builtin.cpu.arch.endian(); | 236 | const endian = builtin.cpu.arch.endian(); |
diff --git a/codegen/gencat.zig b/codegen/gencat.zig index a7713e6..fe06bd7 100644 --- a/codegen/gencat.zig +++ b/codegen/gencat.zig | |||
| @@ -151,11 +151,10 @@ pub fn main() !void { | |||
| 151 | _ = args_iter.skip(); | 151 | _ = args_iter.skip(); |
| 152 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | 152 | const output_path = args_iter.next() orelse @panic("No output file arg!"); |
| 153 | 153 | ||
| 154 | const compressor = std.compress.deflate.compressor; | 154 | const compressor = std.compress.flate.deflate.compressor; |
| 155 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | 155 | var out_file = try std.fs.cwd().createFile(output_path, .{}); |
| 156 | defer out_file.close(); | 156 | defer out_file.close(); |
| 157 | var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression }); | 157 | var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); |
| 158 | defer out_comp.deinit(); | ||
| 159 | const writer = out_comp.writer(); | 158 | const writer = out_comp.writer(); |
| 160 | 159 | ||
| 161 | const endian = builtin.cpu.arch.endian(); | 160 | const endian = builtin.cpu.arch.endian(); |
diff --git a/codegen/hangul.zig b/codegen/hangul.zig index 73680c6..2c42bb7 100644 --- a/codegen/hangul.zig +++ b/codegen/hangul.zig | |||
| @@ -116,11 +116,10 @@ pub fn main() !void { | |||
| 116 | _ = args_iter.skip(); | 116 | _ = args_iter.skip(); |
| 117 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | 117 | const output_path = args_iter.next() orelse @panic("No output file arg!"); |
| 118 | 118 | ||
| 119 | const compressor = std.compress.deflate.compressor; | 119 | const compressor = std.compress.flate.deflate.compressor; |
| 120 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | 120 | var out_file = try std.fs.cwd().createFile(output_path, .{}); |
| 121 | defer out_file.close(); | 121 | defer out_file.close(); |
| 122 | var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression }); | 122 | var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); |
| 123 | defer out_comp.deinit(); | ||
| 124 | const writer = out_comp.writer(); | 123 | const writer = out_comp.writer(); |
| 125 | 124 | ||
| 126 | const endian = builtin.cpu.arch.endian(); | 125 | const endian = builtin.cpu.arch.endian(); |
diff --git a/codegen/lower.zig b/codegen/lower.zig index 644ec13..a053fe3 100644 --- a/codegen/lower.zig +++ b/codegen/lower.zig | |||
| @@ -17,11 +17,10 @@ pub fn main() !void { | |||
| 17 | _ = args_iter.skip(); | 17 | _ = args_iter.skip(); |
| 18 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | 18 | const output_path = args_iter.next() orelse @panic("No output file arg!"); |
| 19 | 19 | ||
| 20 | const compressor = std.compress.deflate.compressor; | 20 | const compressor = std.compress.flate.deflate.compressor; |
| 21 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | 21 | var out_file = try std.fs.cwd().createFile(output_path, .{}); |
| 22 | defer out_file.close(); | 22 | defer out_file.close(); |
| 23 | var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression }); | 23 | var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); |
| 24 | defer out_comp.deinit(); | ||
| 25 | const writer = out_comp.writer(); | 24 | const writer = out_comp.writer(); |
| 26 | 25 | ||
| 27 | const endian = builtin.cpu.arch.endian(); | 26 | const endian = builtin.cpu.arch.endian(); |
diff --git a/codegen/normp.zig b/codegen/normp.zig index 8ceda36..60dabdc 100644 --- a/codegen/normp.zig +++ b/codegen/normp.zig | |||
| @@ -117,11 +117,10 @@ pub fn main() !void { | |||
| 117 | _ = args_iter.skip(); | 117 | _ = args_iter.skip(); |
| 118 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | 118 | const output_path = args_iter.next() orelse @panic("No output file arg!"); |
| 119 | 119 | ||
| 120 | const compressor = std.compress.deflate.compressor; | 120 | const compressor = std.compress.flate.deflate.compressor; |
| 121 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | 121 | var out_file = try std.fs.cwd().createFile(output_path, .{}); |
| 122 | defer out_file.close(); | 122 | defer out_file.close(); |
| 123 | var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression }); | 123 | var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); |
| 124 | defer out_comp.deinit(); | ||
| 125 | const writer = out_comp.writer(); | 124 | const writer = out_comp.writer(); |
| 126 | 125 | ||
| 127 | const endian = builtin.cpu.arch.endian(); | 126 | const endian = builtin.cpu.arch.endian(); |
diff --git a/codegen/numeric.zig b/codegen/numeric.zig index ad8490c..038ac0a 100644 --- a/codegen/numeric.zig +++ b/codegen/numeric.zig | |||
| @@ -118,11 +118,10 @@ pub fn main() !void { | |||
| 118 | _ = args_iter.skip(); | 118 | _ = args_iter.skip(); |
| 119 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | 119 | const output_path = args_iter.next() orelse @panic("No output file arg!"); |
| 120 | 120 | ||
| 121 | const compressor = std.compress.deflate.compressor; | 121 | const compressor = std.compress.flate.deflate.compressor; |
| 122 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | 122 | var out_file = try std.fs.cwd().createFile(output_path, .{}); |
| 123 | defer out_file.close(); | 123 | defer out_file.close(); |
| 124 | var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression }); | 124 | var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); |
| 125 | defer out_comp.deinit(); | ||
| 126 | const writer = out_comp.writer(); | 125 | const writer = out_comp.writer(); |
| 127 | 126 | ||
| 128 | const endian = builtin.cpu.arch.endian(); | 127 | const endian = builtin.cpu.arch.endian(); |
diff --git a/codegen/props.zig b/codegen/props.zig index 57a205e..24b22e0 100644 --- a/codegen/props.zig +++ b/codegen/props.zig | |||
| @@ -118,11 +118,10 @@ pub fn main() !void { | |||
| 118 | _ = args_iter.skip(); | 118 | _ = args_iter.skip(); |
| 119 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | 119 | const output_path = args_iter.next() orelse @panic("No output file arg!"); |
| 120 | 120 | ||
| 121 | const compressor = std.compress.deflate.compressor; | 121 | const compressor = std.compress.flate.deflate.compressor; |
| 122 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | 122 | var out_file = try std.fs.cwd().createFile(output_path, .{}); |
| 123 | defer out_file.close(); | 123 | defer out_file.close(); |
| 124 | var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression }); | 124 | var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); |
| 125 | defer out_comp.deinit(); | ||
| 126 | const writer = out_comp.writer(); | 125 | const writer = out_comp.writer(); |
| 127 | 126 | ||
| 128 | const endian = builtin.cpu.arch.endian(); | 127 | const endian = builtin.cpu.arch.endian(); |
diff --git a/codegen/scripts.zig b/codegen/scripts.zig index e985c1e..660699d 100644 --- a/codegen/scripts.zig +++ b/codegen/scripts.zig | |||
| @@ -288,11 +288,10 @@ pub fn main() !void { | |||
| 288 | _ = args_iter.skip(); | 288 | _ = args_iter.skip(); |
| 289 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | 289 | const output_path = args_iter.next() orelse @panic("No output file arg!"); |
| 290 | 290 | ||
| 291 | const compressor = std.compress.deflate.compressor; | 291 | const compressor = std.compress.flate.deflate.compressor; |
| 292 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | 292 | var out_file = try std.fs.cwd().createFile(output_path, .{}); |
| 293 | defer out_file.close(); | 293 | defer out_file.close(); |
| 294 | var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression }); | 294 | var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); |
| 295 | defer out_comp.deinit(); | ||
| 296 | const writer = out_comp.writer(); | 295 | const writer = out_comp.writer(); |
| 297 | 296 | ||
| 298 | const endian = builtin.cpu.arch.endian(); | 297 | const endian = builtin.cpu.arch.endian(); |
diff --git a/codegen/upper.zig b/codegen/upper.zig index 455fe2c..5848911 100644 --- a/codegen/upper.zig +++ b/codegen/upper.zig | |||
| @@ -17,11 +17,10 @@ pub fn main() !void { | |||
| 17 | _ = args_iter.skip(); | 17 | _ = args_iter.skip(); |
| 18 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | 18 | const output_path = args_iter.next() orelse @panic("No output file arg!"); |
| 19 | 19 | ||
| 20 | const compressor = std.compress.deflate.compressor; | 20 | const compressor = std.compress.flate.deflate.compressor; |
| 21 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | 21 | var out_file = try std.fs.cwd().createFile(output_path, .{}); |
| 22 | defer out_file.close(); | 22 | defer out_file.close(); |
| 23 | var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression }); | 23 | var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); |
| 24 | defer out_comp.deinit(); | ||
| 25 | const writer = out_comp.writer(); | 24 | const writer = out_comp.writer(); |
| 26 | 25 | ||
| 27 | const endian = builtin.cpu.arch.endian(); | 26 | const endian = builtin.cpu.arch.endian(); |
diff --git a/src/CanonData.zig b/src/CanonData.zig index 64d5555..be2b381 100644 --- a/src/CanonData.zig +++ b/src/CanonData.zig | |||
| @@ -10,11 +10,10 @@ nfd: [][]u21 = undefined, | |||
| 10 | const Self = @This(); | 10 | const Self = @This(); |
| 11 | 11 | ||
| 12 | pub fn init(allocator: mem.Allocator) !Self { | 12 | pub fn init(allocator: mem.Allocator) !Self { |
| 13 | const decompressor = compress.deflate.decompressor; | 13 | const decompressor = compress.flate.inflate.decompressor; |
| 14 | const in_bytes = @embedFile("canon"); | 14 | const in_bytes = @embedFile("canon"); |
| 15 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 15 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| 16 | var in_decomp = try decompressor(allocator, in_fbs.reader(), null); | 16 | var in_decomp = decompressor(.raw, in_fbs.reader()); |
| 17 | defer in_decomp.deinit(); | ||
| 18 | var reader = in_decomp.reader(); | 17 | var reader = in_decomp.reader(); |
| 19 | 18 | ||
| 20 | const endian = builtin.cpu.arch.endian(); | 19 | const endian = builtin.cpu.arch.endian(); |
diff --git a/src/CaseData.zig b/src/CaseData.zig index c9ccc1e..260637a 100644 --- a/src/CaseData.zig +++ b/src/CaseData.zig | |||
| @@ -15,7 +15,7 @@ prop_s2: []u8 = undefined, | |||
| 15 | const Self = @This(); | 15 | const Self = @This(); |
| 16 | 16 | ||
| 17 | pub fn init(allocator: mem.Allocator) !Self { | 17 | pub fn init(allocator: mem.Allocator) !Self { |
| 18 | const decompressor = compress.deflate.decompressor; | 18 | const decompressor = compress.flate.inflate.decompressor; |
| 19 | const endian = builtin.cpu.arch.endian(); | 19 | const endian = builtin.cpu.arch.endian(); |
| 20 | 20 | ||
| 21 | var self = Self{ | 21 | var self = Self{ |
| @@ -32,8 +32,7 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 32 | // Uppercase | 32 | // Uppercase |
| 33 | const upper_bytes = @embedFile("upper"); | 33 | const upper_bytes = @embedFile("upper"); |
| 34 | var upper_fbs = std.io.fixedBufferStream(upper_bytes); | 34 | var upper_fbs = std.io.fixedBufferStream(upper_bytes); |
| 35 | var upper_decomp = try decompressor(allocator, upper_fbs.reader(), null); | 35 | var upper_decomp = decompressor(.raw, upper_fbs.reader()); |
| 36 | defer upper_decomp.deinit(); | ||
| 37 | var upper_reader = upper_decomp.reader(); | 36 | var upper_reader = upper_decomp.reader(); |
| 38 | 37 | ||
| 39 | while (true) { | 38 | while (true) { |
| @@ -46,8 +45,7 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 46 | // Lowercase | 45 | // Lowercase |
| 47 | const lower_bytes = @embedFile("lower"); | 46 | const lower_bytes = @embedFile("lower"); |
| 48 | var lower_fbs = std.io.fixedBufferStream(lower_bytes); | 47 | var lower_fbs = std.io.fixedBufferStream(lower_bytes); |
| 49 | var lower_decomp = try decompressor(allocator, lower_fbs.reader(), null); | 48 | var lower_decomp = decompressor(.raw, lower_fbs.reader()); |
| 50 | defer lower_decomp.deinit(); | ||
| 51 | var lower_reader = lower_decomp.reader(); | 49 | var lower_reader = lower_decomp.reader(); |
| 52 | 50 | ||
| 53 | while (true) { | 51 | while (true) { |
| @@ -60,8 +58,7 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 60 | // Case properties | 58 | // Case properties |
| 61 | const cp_bytes = @embedFile("case_prop"); | 59 | const cp_bytes = @embedFile("case_prop"); |
| 62 | var cp_fbs = std.io.fixedBufferStream(cp_bytes); | 60 | var cp_fbs = std.io.fixedBufferStream(cp_bytes); |
| 63 | var cp_decomp = try decompressor(allocator, cp_fbs.reader(), null); | 61 | var cp_decomp = decompressor(.raw, cp_fbs.reader()); |
| 64 | defer cp_decomp.deinit(); | ||
| 65 | var cp_reader = cp_decomp.reader(); | 62 | var cp_reader = cp_decomp.reader(); |
| 66 | 63 | ||
| 67 | const stage_1_len: u16 = try cp_reader.readInt(u16, endian); | 64 | const stage_1_len: u16 = try cp_reader.readInt(u16, endian); |
diff --git a/src/CaseFold.zig b/src/CaseFold.zig index 9b10e16..3e7535e 100644 --- a/src/CaseFold.zig +++ b/src/CaseFold.zig | |||
| @@ -10,7 +10,9 @@ fold_data: *const FoldData, | |||
| 10 | 10 | ||
| 11 | const Self = @This(); | 11 | const Self = @This(); |
| 12 | 12 | ||
| 13 | fn caseFold( | 13 | /// Produces the case folded code points for `cps`. Caller must free returned |
| 14 | /// slice with `allocator`. | ||
| 15 | pub fn caseFold( | ||
| 14 | self: Self, | 16 | self: Self, |
| 15 | allocator: mem.Allocator, | 17 | allocator: mem.Allocator, |
| 16 | cps: []const u21, | 18 | cps: []const u21, |
| @@ -37,6 +39,8 @@ fn changesWhenCaseFolded(self: Self, cps: []const u21) bool { | |||
| 37 | } else false; | 39 | } else false; |
| 38 | } | 40 | } |
| 39 | 41 | ||
| 42 | /// Caseless compare `a` and `b` by decomposing to NFKD. This is the most | ||
| 43 | /// comprehensive comparison possible, but slower than `canonCaselessMatch`. | ||
| 40 | pub fn compatCaselessMatch( | 44 | pub fn compatCaselessMatch( |
| 41 | self: Self, | 45 | self: Self, |
| 42 | allocator: mem.Allocator, | 46 | allocator: mem.Allocator, |
| @@ -108,6 +112,8 @@ test "compatCaselessMatch" { | |||
| 108 | try testing.expect(try caser.compatCaselessMatch(allocator, &n, a, c)); | 112 | try testing.expect(try caser.compatCaselessMatch(allocator, &n, a, c)); |
| 109 | } | 113 | } |
| 110 | 114 | ||
| 115 | /// Performs canonical caseless string matching by decomposing to NFD. This is | ||
| 116 | /// faster than `compatCaselessMatch`, but less comprehensive. | ||
| 111 | pub fn canonCaselessMatch( | 117 | pub fn canonCaselessMatch( |
| 112 | self: Self, | 118 | self: Self, |
| 113 | allocator: mem.Allocator, | 119 | allocator: mem.Allocator, |
diff --git a/src/CombiningData.zig b/src/CombiningData.zig index a40cbde..16b923f 100644 --- a/src/CombiningData.zig +++ b/src/CombiningData.zig | |||
| @@ -10,11 +10,10 @@ s2: []u8 = undefined, | |||
| 10 | const Self = @This(); | 10 | const Self = @This(); |
| 11 | 11 | ||
| 12 | pub fn init(allocator: mem.Allocator) !Self { | 12 | pub fn init(allocator: mem.Allocator) !Self { |
| 13 | const decompressor = compress.deflate.decompressor; | 13 | const decompressor = compress.flate.inflate.decompressor; |
| 14 | const in_bytes = @embedFile("ccc"); | 14 | const in_bytes = @embedFile("ccc"); |
| 15 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 15 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| 16 | var in_decomp = try decompressor(allocator, in_fbs.reader(), null); | 16 | var in_decomp = decompressor(.raw, in_fbs.reader()); |
| 17 | defer in_decomp.deinit(); | ||
| 18 | var reader = in_decomp.reader(); | 17 | var reader = in_decomp.reader(); |
| 19 | 18 | ||
| 20 | const endian = builtin.cpu.arch.endian(); | 19 | const endian = builtin.cpu.arch.endian(); |
diff --git a/src/CompatData.zig b/src/CompatData.zig index a931cb3..3346a06 100644 --- a/src/CompatData.zig +++ b/src/CompatData.zig | |||
| @@ -9,11 +9,10 @@ nfkd: [][]u21 = undefined, | |||
| 9 | const Self = @This(); | 9 | const Self = @This(); |
| 10 | 10 | ||
| 11 | pub fn init(allocator: mem.Allocator) !Self { | 11 | pub fn init(allocator: mem.Allocator) !Self { |
| 12 | const decompressor = compress.deflate.decompressor; | 12 | const decompressor = compress.flate.inflate.decompressor; |
| 13 | const in_bytes = @embedFile("compat"); | 13 | const in_bytes = @embedFile("compat"); |
| 14 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 14 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| 15 | var in_decomp = try decompressor(allocator, in_fbs.reader(), null); | 15 | var in_decomp = decompressor(.raw, in_fbs.reader()); |
| 16 | defer in_decomp.deinit(); | ||
| 17 | var reader = in_decomp.reader(); | 16 | var reader = in_decomp.reader(); |
| 18 | 17 | ||
| 19 | const endian = builtin.cpu.arch.endian(); | 18 | const endian = builtin.cpu.arch.endian(); |
diff --git a/src/FoldData.zig b/src/FoldData.zig index a06eefe..d4312b0 100644 --- a/src/FoldData.zig +++ b/src/FoldData.zig | |||
| @@ -10,11 +10,10 @@ cwcf: []bool = undefined, | |||
| 10 | const Self = @This(); | 10 | const Self = @This(); |
| 11 | 11 | ||
| 12 | pub fn init(allocator: mem.Allocator) !Self { | 12 | pub fn init(allocator: mem.Allocator) !Self { |
| 13 | const decompressor = compress.deflate.decompressor; | 13 | const decompressor = compress.flate.inflate.decompressor; |
| 14 | const in_bytes = @embedFile("fold"); | 14 | const in_bytes = @embedFile("fold"); |
| 15 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 15 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| 16 | var in_decomp = try decompressor(allocator, in_fbs.reader(), null); | 16 | var in_decomp = decompressor(.raw, in_fbs.reader()); |
| 17 | defer in_decomp.deinit(); | ||
| 18 | var reader = in_decomp.reader(); | 17 | var reader = in_decomp.reader(); |
| 19 | 18 | ||
| 20 | const endian = builtin.cpu.arch.endian(); | 19 | const endian = builtin.cpu.arch.endian(); |
diff --git a/src/GenCatData.zig b/src/GenCatData.zig index 12501bf..454c45a 100644 --- a/src/GenCatData.zig +++ b/src/GenCatData.zig | |||
| @@ -45,11 +45,10 @@ s3: []u5 = undefined, | |||
| 45 | const Self = @This(); | 45 | const Self = @This(); |
| 46 | 46 | ||
| 47 | pub fn init(allocator: mem.Allocator) !Self { | 47 | pub fn init(allocator: mem.Allocator) !Self { |
| 48 | const decompressor = compress.deflate.decompressor; | 48 | const decompressor = compress.flate.inflate.decompressor; |
| 49 | const in_bytes = @embedFile("gencat"); | 49 | const in_bytes = @embedFile("gencat"); |
| 50 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 50 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| 51 | var in_decomp = try decompressor(allocator, in_fbs.reader(), null); | 51 | var in_decomp = decompressor(.raw, in_fbs.reader()); |
| 52 | defer in_decomp.deinit(); | ||
| 53 | var reader = in_decomp.reader(); | 52 | var reader = in_decomp.reader(); |
| 54 | 53 | ||
| 55 | const endian = builtin.cpu.arch.endian(); | 54 | const endian = builtin.cpu.arch.endian(); |
diff --git a/src/GraphemeData.zig b/src/GraphemeData.zig index 500ffea..1710870 100644 --- a/src/GraphemeData.zig +++ b/src/GraphemeData.zig | |||
| @@ -38,11 +38,10 @@ s3: []u8 = undefined, | |||
| 38 | const Self = @This(); | 38 | const Self = @This(); |
| 39 | 39 | ||
| 40 | pub fn init(allocator: mem.Allocator) !Self { | 40 | pub fn init(allocator: mem.Allocator) !Self { |
| 41 | const decompressor = compress.deflate.decompressor; | 41 | const decompressor = compress.flate.inflate.decompressor; |
| 42 | const in_bytes = @embedFile("gbp"); | 42 | const in_bytes = @embedFile("gbp"); |
| 43 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 43 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| 44 | var in_decomp = try decompressor(allocator, in_fbs.reader(), null); | 44 | var in_decomp = decompressor(.raw, in_fbs.reader()); |
| 45 | defer in_decomp.deinit(); | ||
| 46 | var reader = in_decomp.reader(); | 45 | var reader = in_decomp.reader(); |
| 47 | 46 | ||
| 48 | const endian = builtin.cpu.arch.endian(); | 47 | const endian = builtin.cpu.arch.endian(); |
diff --git a/src/HangulData.zig b/src/HangulData.zig index 99d91c1..5eee427 100644 --- a/src/HangulData.zig +++ b/src/HangulData.zig | |||
| @@ -20,11 +20,10 @@ s2: []u3 = undefined, | |||
| 20 | const Self = @This(); | 20 | const Self = @This(); |
| 21 | 21 | ||
| 22 | pub fn init(allocator: mem.Allocator) !Self { | 22 | pub fn init(allocator: mem.Allocator) !Self { |
| 23 | const decompressor = compress.deflate.decompressor; | 23 | const decompressor = compress.flate.inflate.decompressor; |
| 24 | const in_bytes = @embedFile("hangul"); | 24 | const in_bytes = @embedFile("hangul"); |
| 25 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 25 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| 26 | var in_decomp = try decompressor(allocator, in_fbs.reader(), null); | 26 | var in_decomp = decompressor(.raw, in_fbs.reader()); |
| 27 | defer in_decomp.deinit(); | ||
| 28 | var reader = in_decomp.reader(); | 27 | var reader = in_decomp.reader(); |
| 29 | 28 | ||
| 30 | const endian = builtin.cpu.arch.endian(); | 29 | const endian = builtin.cpu.arch.endian(); |
diff --git a/src/NormPropsData.zig b/src/NormPropsData.zig index 86d497b..899bb8f 100644 --- a/src/NormPropsData.zig +++ b/src/NormPropsData.zig | |||
| @@ -11,11 +11,10 @@ s2: []u4 = undefined, | |||
| 11 | const Self = @This(); | 11 | const Self = @This(); |
| 12 | 12 | ||
| 13 | pub fn init(allocator: mem.Allocator) !Self { | 13 | pub fn init(allocator: mem.Allocator) !Self { |
| 14 | const decompressor = compress.deflate.decompressor; | 14 | const decompressor = compress.flate.inflate.decompressor; |
| 15 | const in_bytes = @embedFile("normp"); | 15 | const in_bytes = @embedFile("normp"); |
| 16 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 16 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| 17 | var in_decomp = try decompressor(allocator, in_fbs.reader(), null); | 17 | var in_decomp = decompressor(.raw, in_fbs.reader()); |
| 18 | defer in_decomp.deinit(); | ||
| 19 | var reader = in_decomp.reader(); | 18 | var reader = in_decomp.reader(); |
| 20 | 19 | ||
| 21 | const endian = builtin.cpu.arch.endian(); | 20 | const endian = builtin.cpu.arch.endian(); |
diff --git a/src/Normalize.zig b/src/Normalize.zig index f437f4f..85e3aa3 100644 --- a/src/Normalize.zig +++ b/src/Normalize.zig | |||
| @@ -572,47 +572,6 @@ test "eql" { | |||
| 572 | try testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); | 572 | try testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); |
| 573 | } | 573 | } |
| 574 | 574 | ||
| 575 | // FCD | ||
| 576 | fn getLeadCcc(self: Self, cp: u21) u8 { | ||
| 577 | const dc = self.mapping(cp, .nfd); | ||
| 578 | const dcp = if (dc.form == .same) cp else dc.cps[0]; | ||
| 579 | return self.norm_data.ccc_data.ccc(dcp); | ||
| 580 | } | ||
| 581 | |||
| 582 | fn getTrailCcc(self: Self, cp: u21) u8 { | ||
| 583 | const dc = self.mapping(cp, .nfd); | ||
| 584 | const dcp = if (dc.form == .same) cp else dc.cps[dc.cps.len - 1]; | ||
| 585 | return self.norm_data.ccc_data.ccc(dcp); | ||
| 586 | } | ||
| 587 | |||
| 588 | // Fast check to detect if a string is already in NFC or NFD form. | ||
| 589 | fn isFcd(self: Self, str: []const u8) bool { | ||
| 590 | var prev_ccc: u8 = 0; | ||
| 591 | var cp_iter = CodePointIterator{ .bytes = str }; | ||
| 592 | |||
| 593 | return while (cp_iter.next()) |cp| { | ||
| 594 | const ccc = self.getLeadCcc(cp.code); | ||
| 595 | if (ccc != 0 and ccc < prev_ccc) break false; | ||
| 596 | prev_ccc = self.getTrailCcc(cp.code); | ||
| 597 | } else true; | ||
| 598 | } | ||
| 599 | |||
| 600 | test "isFcd" { | ||
| 601 | const allocator = testing.allocator; | ||
| 602 | const data = try NormData.init(allocator); | ||
| 603 | defer data.deinit(); | ||
| 604 | const n = Self{ .norm_data = &data }; | ||
| 605 | |||
| 606 | const is_nfc = "José \u{3D3}"; | ||
| 607 | try testing.expect(n.isFcd(is_nfc)); | ||
| 608 | |||
| 609 | const is_nfd = "Jose\u{301} \u{3d2}\u{301}"; | ||
| 610 | try testing.expect(n.isFcd(is_nfd)); | ||
| 611 | |||
| 612 | const not_fcd = "Jose\u{301} \u{3d2}\u{315}\u{301}"; | ||
| 613 | try testing.expect(!n.isFcd(not_fcd)); | ||
| 614 | } | ||
| 615 | |||
| 616 | /// Returns true if `str` only contains Latin-1 Supplement | 575 | /// Returns true if `str` only contains Latin-1 Supplement |
| 617 | /// code points. Uses SIMD if possible. | 576 | /// code points. Uses SIMD if possible. |
| 618 | pub fn isLatin1Only(str: []const u8) bool { | 577 | pub fn isLatin1Only(str: []const u8) bool { |
diff --git a/src/PropsData.zig b/src/PropsData.zig index 9d24e68..f6c8370 100644 --- a/src/PropsData.zig +++ b/src/PropsData.zig | |||
| @@ -15,14 +15,13 @@ num_s2: []u8 = undefined, | |||
| 15 | const Self = @This(); | 15 | const Self = @This(); |
| 16 | 16 | ||
| 17 | pub fn init(allocator: mem.Allocator) !Self { | 17 | pub fn init(allocator: mem.Allocator) !Self { |
| 18 | const decompressor = compress.deflate.decompressor; | 18 | const decompressor = compress.flate.inflate.decompressor; |
| 19 | const endian = builtin.cpu.arch.endian(); | 19 | const endian = builtin.cpu.arch.endian(); |
| 20 | 20 | ||
| 21 | // Process DerivedCoreProperties.txt | 21 | // Process DerivedCoreProperties.txt |
| 22 | const core_bytes = @embedFile("core_props"); | 22 | const core_bytes = @embedFile("core_props"); |
| 23 | var core_fbs = std.io.fixedBufferStream(core_bytes); | 23 | var core_fbs = std.io.fixedBufferStream(core_bytes); |
| 24 | var core_decomp = try decompressor(allocator, core_fbs.reader(), null); | 24 | var core_decomp = decompressor(.raw, core_fbs.reader()); |
| 25 | defer core_decomp.deinit(); | ||
| 26 | var core_reader = core_decomp.reader(); | 25 | var core_reader = core_decomp.reader(); |
| 27 | 26 | ||
| 28 | var self = Self{ .allocator = allocator }; | 27 | var self = Self{ .allocator = allocator }; |
| @@ -40,8 +39,7 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 40 | // Process PropList.txt | 39 | // Process PropList.txt |
| 41 | const props_bytes = @embedFile("props"); | 40 | const props_bytes = @embedFile("props"); |
| 42 | var props_fbs = std.io.fixedBufferStream(props_bytes); | 41 | var props_fbs = std.io.fixedBufferStream(props_bytes); |
| 43 | var props_decomp = try decompressor(allocator, props_fbs.reader(), null); | 42 | var props_decomp = decompressor(.raw, props_fbs.reader()); |
| 44 | defer props_decomp.deinit(); | ||
| 45 | var props_reader = props_decomp.reader(); | 43 | var props_reader = props_decomp.reader(); |
| 46 | 44 | ||
| 47 | const stage_1_len: u16 = try props_reader.readInt(u16, endian); | 45 | const stage_1_len: u16 = try props_reader.readInt(u16, endian); |
| @@ -57,8 +55,7 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 57 | // Process DerivedNumericType.txt | 55 | // Process DerivedNumericType.txt |
| 58 | const num_bytes = @embedFile("numeric"); | 56 | const num_bytes = @embedFile("numeric"); |
| 59 | var num_fbs = std.io.fixedBufferStream(num_bytes); | 57 | var num_fbs = std.io.fixedBufferStream(num_bytes); |
| 60 | var num_decomp = try decompressor(allocator, num_fbs.reader(), null); | 58 | var num_decomp = decompressor(.raw, num_fbs.reader()); |
| 61 | defer num_decomp.deinit(); | ||
| 62 | var num_reader = num_decomp.reader(); | 59 | var num_reader = num_decomp.reader(); |
| 63 | 60 | ||
| 64 | const num_stage_1_len: u16 = try num_reader.readInt(u16, endian); | 61 | const num_stage_1_len: u16 = try num_reader.readInt(u16, endian); |
diff --git a/src/ScriptsData.zig b/src/ScriptsData.zig index 4e371bf..415ce2d 100644 --- a/src/ScriptsData.zig +++ b/src/ScriptsData.zig | |||
| @@ -4,7 +4,7 @@ const compress = std.compress; | |||
| 4 | const mem = std.mem; | 4 | const mem = std.mem; |
| 5 | const testing = std.testing; | 5 | const testing = std.testing; |
| 6 | 6 | ||
| 7 | /// Script | 7 | /// Scripts |
| 8 | pub const Script = enum { | 8 | pub const Script = enum { |
| 9 | none, | 9 | none, |
| 10 | Adlam, | 10 | Adlam, |
| @@ -180,11 +180,10 @@ s3: []u8 = undefined, | |||
| 180 | const Self = @This(); | 180 | const Self = @This(); |
| 181 | 181 | ||
| 182 | pub fn init(allocator: mem.Allocator) !Self { | 182 | pub fn init(allocator: mem.Allocator) !Self { |
| 183 | const decompressor = compress.deflate.decompressor; | 183 | const decompressor = compress.flate.inflate.decompressor; |
| 184 | const in_bytes = @embedFile("scripts"); | 184 | const in_bytes = @embedFile("scripts"); |
| 185 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 185 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| 186 | var in_decomp = try decompressor(allocator, in_fbs.reader(), null); | 186 | var in_decomp = decompressor(.raw, in_fbs.reader()); |
| 187 | defer in_decomp.deinit(); | ||
| 188 | var reader = in_decomp.reader(); | 187 | var reader = in_decomp.reader(); |
| 189 | 188 | ||
| 190 | const endian = builtin.cpu.arch.endian(); | 189 | const endian = builtin.cpu.arch.endian(); |
diff --git a/src/WidthData.zig b/src/WidthData.zig index b9ef84e..cf31b7f 100644 --- a/src/WidthData.zig +++ b/src/WidthData.zig | |||
| @@ -14,11 +14,10 @@ s2: []i3 = undefined, | |||
| 14 | const Self = @This(); | 14 | const Self = @This(); |
| 15 | 15 | ||
| 16 | pub fn init(allocator: mem.Allocator) !Self { | 16 | pub fn init(allocator: mem.Allocator) !Self { |
| 17 | const decompressor = compress.deflate.decompressor; | 17 | const decompressor = compress.flate.inflate.decompressor; |
| 18 | const in_bytes = @embedFile("dwp"); | 18 | const in_bytes = @embedFile("dwp"); |
| 19 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 19 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| 20 | var in_decomp = try decompressor(allocator, in_fbs.reader(), null); | 20 | var in_decomp = decompressor(.raw, in_fbs.reader()); |
| 21 | defer in_decomp.deinit(); | ||
| 22 | var reader = in_decomp.reader(); | 21 | var reader = in_decomp.reader(); |
| 23 | 22 | ||
| 24 | const endian = builtin.cpu.arch.endian(); | 23 | const endian = builtin.cpu.arch.endian(); |