summaryrefslogtreecommitdiff
path: root/codegen
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-06-26 13:12:58 -0400
committerGravatar Jose Colon Rodriguez2024-06-26 13:12:58 -0400
commit7a1423fdd616ab4337166a20686da8ee0973eef9 (patch)
tree4a931e8009acfbcdf57bbd0939ec3d0df1e1cb93 /codegen
parentImplemented sqeek502s case fold (diff)
downloadzg-7a1423fdd616ab4337166a20686da8ee0973eef9.tar.gz
zg-7a1423fdd616ab4337166a20686da8ee0973eef9.tar.xz
zg-7a1423fdd616ab4337166a20686da8ee0973eef9.zip
Added changes when casefolded back
Diffstat (limited to 'codegen')
-rw-r--r--codegen/fold.zig80
1 files changed, 49 insertions, 31 deletions
diff --git a/codegen/fold.zig b/codegen/fold.zig
index ec024c5..24ecae6 100644
--- a/codegen/fold.zig
+++ b/codegen/fold.zig
@@ -1,32 +1,66 @@
1const std = @import("std"); 1const std = @import("std");
2const builtin = @import("builtin"); 2const builtin = @import("builtin");
3 3const mem = std.mem;
4// From https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt
5// const case_folding_txt = @embedFile("CaseFolding.txt");
6 4
7pub fn main() !void { 5pub fn main() !void {
8 var gpa = std.heap.GeneralPurposeAllocator(.{}){}; 6 var gpa = std.heap.GeneralPurposeAllocator(.{}){};
9 defer std.debug.assert(gpa.deinit() == .ok); 7 defer std.debug.assert(gpa.deinit() == .ok);
10 const allocator = gpa.allocator(); 8 const allocator = gpa.allocator();
11 9
12 // const unbuf_stdout = std.io.getStdOut().writer(); 10 // Process DerivedCoreProperties.txt
13 // var buf_stdout = std.io.bufferedWriter(unbuf_stdout); 11 var props_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{});
14 // const writer = buf_stdout.writer(); 12 defer props_file.close();
13 var props_buf = std.io.bufferedReader(props_file.reader());
14 const props_reader = props_buf.reader();
15
16 var props_map = std.AutoHashMap(u21, void).init(allocator);
17 defer props_map.deinit();
18
19 var line_buf: [4096]u8 = undefined;
20
21 props_lines: while (try props_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
22 if (line.len == 0 or line[0] == '#') continue;
23
24 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
25
26 var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
27 var current_code: [2]u21 = undefined;
28
29 var i: usize = 0;
30 while (field_iter.next()) |field| : (i += 1) {
31 switch (i) {
32 0 => {
33 // Code point(s)
34 if (std.mem.indexOf(u8, field, "..")) |dots| {
35 current_code = .{
36 try std.fmt.parseInt(u21, field[0..dots], 16),
37 try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
38 };
39 } else {
40 const code = try std.fmt.parseInt(u21, field, 16);
41 current_code = .{ code, code };
42 }
43 },
44 1 => {
45 // Core property
46 if (!mem.eql(u8, field, "Changes_When_Casefolded")) continue :props_lines;
47 for (current_code[0]..current_code[1] + 1) |cp| try props_map.put(@intCast(cp), {});
48 },
49 else => {},
50 }
51 }
52 }
15 53
16 var codepoint_mapping = std.AutoArrayHashMap(u21, [3]u21).init(allocator); 54 var codepoint_mapping = std.AutoArrayHashMap(u21, [3]u21).init(allocator);
17 defer codepoint_mapping.deinit(); 55 defer codepoint_mapping.deinit();
18 56
19 // Process 57 // Process CaseFolding.txt
20 var cp_file = try std.fs.cwd().openFile("data/unicode/CaseFolding.txt", .{}); 58 var cp_file = try std.fs.cwd().openFile("data/unicode/CaseFolding.txt", .{});
21 defer cp_file.close(); 59 defer cp_file.close();
22 var cp_buf = std.io.bufferedReader(cp_file.reader()); 60 var cp_buf = std.io.bufferedReader(cp_file.reader());
23 const cp_reader = cp_buf.reader(); 61 const cp_reader = cp_buf.reader();
24 62
25 // var line_it = std.mem.tokenizeAny(u8, case_folding_txt, "\r\n");
26 var line_buf: [4096]u8 = undefined;
27
28 while (try cp_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { 63 while (try cp_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
29 // while (line_it.next()) |line| {
30 if (line.len == 0 or line[0] == '#') continue; 64 if (line.len == 0 or line[0] == '#') continue;
31 65
32 var field_it = std.mem.splitScalar(u8, line, ';'); 66 var field_it = std.mem.splitScalar(u8, line, ';');
@@ -168,12 +202,6 @@ pub fn main() !void {
168 @memcpy(stage2[i * 256 ..][0..256], &key); 202 @memcpy(stage2[i * 256 ..][0..256], &key);
169 } 203 }
170 204
171 // try writer.print("const cutoff = 0x{X};\n", .{codepoint_cutoff});
172 // try writeArray(writer, u8, "stage1", meaningful_stage1);
173 // try writeArray(writer, u8, "stage2", stage2);
174 // try writer.print("const multiple_start = {};\n", .{multiple_codepoint_start});
175 // try writeArray(writer, i24, "stage3", stage3);
176
177 var args_iter = try std.process.argsWithAllocator(allocator); 205 var args_iter = try std.process.argsWithAllocator(allocator);
178 defer args_iter.deinit(); 206 defer args_iter.deinit();
179 _ = args_iter.skip(); 207 _ = args_iter.skip();
@@ -199,20 +227,10 @@ pub fn main() !void {
199 try writer.writeInt(u16, @intCast(stage3.len), endian); 227 try writer.writeInt(u16, @intCast(stage3.len), endian);
200 for (stage3) |offset| try writer.writeInt(i24, offset, endian); 228 for (stage3) |offset| try writer.writeInt(i24, offset, endian);
201 229
230 try writer.writeInt(u16, @intCast(props_map.count()), endian);
231 var iter = props_map.keyIterator();
232 while (iter.next()) |key_ptr| try writer.writeInt(u24, key_ptr.*, endian);
233
202 try out_comp.flush(); 234 try out_comp.flush();
203 } 235 }
204
205 // try buf_stdout.flush();
206} 236}
207
208// fn writeArray(writer: anytype, comptime T: type, name: []const u8, data: []const T) !void {
209// try writer.print("const {s} = [{}]{s}{{", .{ name, data.len, @typeName(T) });
210//
211// for (data, 0..) |v, i| {
212// if (i % 32 == 0) try writer.writeAll("\n ");
213// try writer.print("{},", .{v});
214// if (i != data.len - 1) try writer.writeByte(' ');
215// }
216//
217// try writer.writeAll("\n};\n");
218// }