From 7188a9fc85f6aa0f71a4cb7966f8b0a044f29e02 Mon Sep 17 00:00:00 2001 From: Jimmi Holst Christensen Date: Mon, 31 Jan 2022 17:11:15 +0100 Subject: Refactor the ArgIterator interface They now follow the interface provided by the standard library. This now means that we no longer needs `args.OsIterator` as that the one from `std` can now be used directly. Also remove `args.ShellIterator` as a simular iterator exists in `std` called `ArgIteratorGeneral`. --- README.md | 10 +- clap.zig | 14 +- clap/args.zig | 316 +-------------------------------------------- clap/streaming.zig | 10 +- example/simple-ex.zig | 8 +- example/streaming-clap.zig | 10 +- 6 files changed, 37 insertions(+), 331 deletions(-) diff --git a/README.md b/README.md index d872965..80d0e80 100644 --- a/README.md +++ b/README.md @@ -108,6 +108,7 @@ const std = @import("std"); const debug = std.debug; const io = std.io; +const process = std.process; pub fn main() !void { const allocator = std.heap.page_allocator; @@ -126,16 +127,17 @@ pub fn main() !void { .{ .id = 'f', .takes_value = .one }, }; - // We then initialize an argument iterator. We will use the OsIterator as it nicely - // wraps iterating over arguments the most efficient way on each os. - var iter = try clap.args.OsIterator.init(allocator); + var iter = try process.ArgIterator.initWithAllocator(allocator); defer iter.deinit(); + // Skip exe argument + _ = iter.next(); + // Initalize our diagnostics, which can be used for reporting useful errors. // This is optional. You can also leave the `diagnostic` field unset if you // don't care about the extra information `Diagnostic` provides. var diag = clap.Diagnostic{}; - var parser = clap.StreamingClap(u8, clap.args.OsIterator){ + var parser = clap.StreamingClap(u8, process.ArgIterator){ .params = ¶ms, .iter = &iter, .diagnostic = &diag, diff --git a/clap.zig b/clap.zig index 7234283..39bbef2 100644 --- a/clap.zig +++ b/clap.zig @@ -4,6 +4,7 @@ const debug = std.debug; const heap = std.heap; const io = std.io; const mem = std.mem; +const process = std.process; const testing = std.testing; pub const args = @import("clap/args.zig"); @@ -347,16 +348,21 @@ pub fn parse( comptime params: []const Param(Id), opt: ParseOptions, ) !Args(Id, params) { - var iter = try args.OsIterator.init(opt.allocator); + var arena = heap.ArenaAllocator.init(opt.allocator); + errdefer arena.deinit(); + + var iter = try process.ArgIterator.initWithAllocator(arena.allocator()); + const exe_arg = iter.next(); + const clap = try parseEx(Id, params, &iter, .{ // Let's reuse the arena from the `OSIterator` since we already have it. - .allocator = iter.arena.allocator(), + .allocator = arena.allocator(), .diagnostic = opt.diagnostic, }); return Args(Id, params){ - .exe_arg = iter.exe_arg, - .arena = iter.arena, + .exe_arg = exe_arg, + .arena = arena, .clap = clap, }; } diff --git a/clap/args.zig b/clap/args.zig index 90c50fa..d0aaee3 100644 --- a/clap/args.zig +++ b/clap/args.zig @@ -9,9 +9,7 @@ const testing = std.testing; /// An example of what methods should be implemented on an arg iterator. pub const ExampleArgIterator = struct { - const Error = error{}; - - pub fn next(iter: *ExampleArgIterator) Error!?[]const u8 { + pub fn next(iter: *ExampleArgIterator) ?[]const u8 { _ = iter; return "2"; } @@ -20,12 +18,10 @@ pub const ExampleArgIterator = struct { /// An argument iterator which iterates over a slice of arguments. /// This implementation does not allocate. pub const SliceIterator = struct { - const Error = error{}; - args: []const []const u8, index: usize = 0, - pub fn next(iter: *SliceIterator) Error!?[]const u8 { + pub fn next(iter: *SliceIterator) ?[]const u8 { if (iter.args.len <= iter.index) return null; @@ -38,310 +34,8 @@ test "SliceIterator" { const args = [_][]const u8{ "A", "BB", "CCC" }; var iter = SliceIterator{ .args = &args }; - for (args) |a| { - const b = try iter.next(); - debug.assert(mem.eql(u8, a, b.?)); - } -} - -/// An argument iterator which wraps the ArgIterator in ::std. -/// On windows, this iterator allocates. -pub const OsIterator = struct { - const Error = process.ArgIterator.NextError; - - arena: heap.ArenaAllocator, - args: process.ArgIterator, - - /// The executable path (this is the first argument passed to the program) - /// TODO: Is it the right choice for this to be null? Maybe `init` should - /// return an error when we have no exe. - exe_arg: ?[:0]const u8, - - pub fn init(allocator: mem.Allocator) Error!OsIterator { - var res = OsIterator{ - .arena = heap.ArenaAllocator.init(allocator), - .args = process.args(), - .exe_arg = undefined, - }; - res.exe_arg = try res.next(); - return res; - } - - pub fn deinit(iter: *OsIterator) void { - iter.arena.deinit(); - } - - pub fn next(iter: *OsIterator) Error!?[:0]const u8 { - if (builtin.os.tag == .windows) { - return (try iter.args.next(iter.arena.allocator())) orelse return null; - } else { - return iter.args.nextPosix(); - } - } -}; - -/// An argument iterator that takes a string and parses it into arguments, simulating -/// how shells split arguments. -pub const ShellIterator = struct { - const Error = error{ - DanglingEscape, - QuoteNotClosed, - } || mem.Allocator.Error; - - arena: heap.ArenaAllocator, - str: []const u8, - - pub fn init(allocator: mem.Allocator, str: []const u8) ShellIterator { - return .{ - .arena = heap.ArenaAllocator.init(allocator), - .str = str, - }; - } - - pub fn deinit(iter: *ShellIterator) void { - iter.arena.deinit(); - } - - pub fn next(iter: *ShellIterator) Error!?[]const u8 { - // Whenever possible, this iterator will return slices into `str` instead of - // allocating. Sometimes this is not possible, for example, escaped characters - // have be be unescape, so we need to allocate in this case. - var list = std.ArrayList(u8).init(iter.arena.allocator()); - var start: usize = 0; - var state: enum { - skip_whitespace, - no_quote, - no_quote_escape, - single_quote, - double_quote, - double_quote_escape, - after_quote, - } = .skip_whitespace; - - for (iter.str) |c, i| { - switch (state) { - // The state that skips the initial whitespace. - .skip_whitespace => switch (c) { - ' ', '\t', '\n' => {}, - '\'' => { - start = i + 1; - state = .single_quote; - }, - '"' => { - start = i + 1; - state = .double_quote; - }, - '\\' => { - start = i + 1; - state = .no_quote_escape; - }, - else => { - start = i; - state = .no_quote; - }, - }, - - // The state that parses the none quoted part of a argument. - .no_quote => switch (c) { - // We're done parsing a none quoted argument when we hit a - // whitespace. - ' ', '\t', '\n' => { - defer iter.str = iter.str[i..]; - return iter.result(start, i, &list); - }, - - // Slicing is not possible if a quote starts while parsing none - // quoted args. - // Example: - // ab'cd' -> abcd - '\'' => { - try list.appendSlice(iter.str[start..i]); - start = i + 1; - state = .single_quote; - }, - '"' => { - try list.appendSlice(iter.str[start..i]); - start = i + 1; - state = .double_quote; - }, - - // Slicing is not possible if we need to escape a character. - // Example: - // ab\"d -> ab"d - '\\' => { - try list.appendSlice(iter.str[start..i]); - start = i + 1; - state = .no_quote_escape; - }, - else => {}, - }, - - // We're in this state after having parsed the quoted part of an - // argument. This state works mostly the same as .no_quote, but - // is aware, that the last character seen was a quote, which should - // not be part of the argument. This is why you will see `i - 1` here - // instead of just `i` when `iter.str` is sliced. - .after_quote => switch (c) { - ' ', '\t', '\n' => { - defer iter.str = iter.str[i..]; - return iter.result(start, i - 1, &list); - }, - '\'' => { - try list.appendSlice(iter.str[start .. i - 1]); - start = i + 1; - state = .single_quote; - }, - '"' => { - try list.appendSlice(iter.str[start .. i - 1]); - start = i + 1; - state = .double_quote; - }, - '\\' => { - try list.appendSlice(iter.str[start .. i - 1]); - start = i + 1; - state = .no_quote_escape; - }, - else => { - try list.appendSlice(iter.str[start .. i - 1]); - start = i; - state = .no_quote; - }, - }, - - // The states that parse the quoted part of arguments. The only differnece - // between single and double quoted arguments is that single quoted - // arguments ignore escape sequences, while double quoted arguments - // does escaping. - .single_quote => switch (c) { - '\'' => state = .after_quote, - else => {}, - }, - .double_quote => switch (c) { - '"' => state = .after_quote, - '\\' => { - try list.appendSlice(iter.str[start..i]); - start = i + 1; - state = .double_quote_escape; - }, - else => {}, - }, - - // The state we end up when after the escape character (`\`). All these - // states do is transition back into the previous state. - // TODO: Are there any escape sequences that does transform the second - // character into something else? For example, in Zig, `\n` is - // transformed into the line feed ascii character. - .no_quote_escape => switch (c) { - else => state = .no_quote, - }, - .double_quote_escape => switch (c) { - else => state = .double_quote, - }, - } - } - - defer iter.str = iter.str[iter.str.len..]; - switch (state) { - .skip_whitespace => return null, - .no_quote => return iter.result(start, iter.str.len, &list), - .after_quote => return iter.result(start, iter.str.len - 1, &list), - .no_quote_escape => return Error.DanglingEscape, - .single_quote, - .double_quote, - .double_quote_escape, - => return Error.QuoteNotClosed, - } - } - - fn result( - iter: *ShellIterator, - start: usize, - end: usize, - list: *std.ArrayList(u8), - ) Error!?[]const u8 { - const res = iter.str[start..end]; - - // If we already have something in `list` that means that we could not - // parse the argument without allocation. We therefor need to just append - // the rest we have to the list and return that. - if (list.items.len != 0) { - try list.appendSlice(res); - return list.toOwnedSlice(); - } - return res; - } -}; - -fn testShellIteratorOk(str: []const u8, allocations: usize, expect: []const []const u8) !void { - var allocator = testing.FailingAllocator.init(testing.allocator, allocations); - var it = ShellIterator.init(allocator.allocator(), str); - defer it.deinit(); - - for (expect) |e| { - if (it.next()) |actual| { - try testing.expect(actual != null); - try testing.expectEqualStrings(e, actual.?); - } else |err| try testing.expectEqual(@as(anyerror![]const u8, e), err); - } - - if (it.next()) |actual| { - try testing.expectEqual(@as(?[]const u8, null), actual); - try testing.expectEqual(allocations, allocator.allocations); - } else |err| try testing.expectEqual(@as(anyerror!void, {}), err); -} - -fn testShellIteratorErr(str: []const u8, expect: anyerror) !void { - var it = ShellIterator.init(testing.allocator, str); - defer it.deinit(); - - while (it.next() catch |err| { - try testing.expectError(expect, @as(anyerror!void, err)); - return; - }) |_| {} - - try testing.expectError(expect, @as(anyerror!void, {})); -} - -test "ShellIterator" { - try testShellIteratorOk("a", 0, &.{"a"}); - try testShellIteratorOk("'a'", 0, &.{"a"}); - try testShellIteratorOk("\"a\"", 0, &.{"a"}); - try testShellIteratorOk("a b", 0, &.{ "a", "b" }); - try testShellIteratorOk("'a' b", 0, &.{ "a", "b" }); - try testShellIteratorOk("\"a\" b", 0, &.{ "a", "b" }); - try testShellIteratorOk("a 'b'", 0, &.{ "a", "b" }); - try testShellIteratorOk("a \"b\"", 0, &.{ "a", "b" }); - try testShellIteratorOk("'a b'", 0, &.{"a b"}); - try testShellIteratorOk("\"a b\"", 0, &.{"a b"}); - try testShellIteratorOk("\"a\"\"b\"", 1, &.{"ab"}); - try testShellIteratorOk("'a''b'", 1, &.{"ab"}); - try testShellIteratorOk("'a'b", 1, &.{"ab"}); - try testShellIteratorOk("a'b'", 1, &.{"ab"}); - try testShellIteratorOk("a\\ b", 1, &.{"a b"}); - try testShellIteratorOk("\"a\\ b\"", 1, &.{"a b"}); - try testShellIteratorOk("'a\\ b'", 0, &.{"a\\ b"}); - try testShellIteratorOk(" a b ", 0, &.{ "a", "b" }); - try testShellIteratorOk("\\ \\ ", 0, &.{ " ", " " }); - - try testShellIteratorOk( - \\printf 'run\nuninstall\n' - , 0, &.{ "printf", "run\\nuninstall\\n" }); - try testShellIteratorOk( - \\setsid -f steam "steam://$action/$id" - , 0, &.{ "setsid", "-f", "steam", "steam://$action/$id" }); - try testShellIteratorOk( - \\xargs -I% rg --no-heading --no-line-number --only-matching - \\ --case-sensitive --multiline --text --byte-offset '(?-u)%' $@ - \\ - , 0, &.{ - "xargs", "-I%", "rg", "--no-heading", - "--no-line-number", "--only-matching", "--case-sensitive", "--multiline", - "--text", "--byte-offset", "(?-u)%", "$@", - }); + for (args) |a| + try testing.expectEqualStrings(a, iter.next().?); - try testShellIteratorErr("'a", error.QuoteNotClosed); - try testShellIteratorErr("'a\\", error.QuoteNotClosed); - try testShellIteratorErr("\"a", error.QuoteNotClosed); - try testShellIteratorErr("\"a\\", error.QuoteNotClosed); - try testShellIteratorErr("a\\", error.DanglingEscape); + try testing.expectEqual(@as(?[]const u8, null), iter.next()); } diff --git a/clap/streaming.zig b/clap/streaming.zig index 3f24aaa..8eca51a 100644 --- a/clap/streaming.zig +++ b/clap/streaming.zig @@ -49,7 +49,7 @@ pub fn StreamingClap(comptime Id: type, comptime ArgIterator: type) type { .chaining => |state| return try parser.chaining(state), .rest_are_positional => { const param = parser.positionalParam() orelse unreachable; - const value = (try parser.iter.next()) orelse return null; + const value = parser.iter.next() orelse return null; return Arg(Id){ .param = param, .value = value }; }, } @@ -80,7 +80,7 @@ pub fn StreamingClap(comptime Id: type, comptime ArgIterator: type) type { if (maybe_value) |v| break :blk v; - break :blk (try parser.iter.next()) orelse + break :blk parser.iter.next() orelse return parser.err(arg, .{ .long = name }, error.MissingValue); }; @@ -99,7 +99,7 @@ pub fn StreamingClap(comptime Id: type, comptime ArgIterator: type) type { // arguments. if (mem.eql(u8, arg, "--")) { parser.state = .rest_are_positional; - const value = (try parser.iter.next()) orelse return null; + const value = parser.iter.next() orelse return null; return Arg(Id){ .param = param, .value = value }; } @@ -142,7 +142,7 @@ pub fn StreamingClap(comptime Id: type, comptime ArgIterator: type) type { } if (arg.len <= next_index) { - const value = (try parser.iter.next()) orelse + const value = parser.iter.next() orelse return parser.err(arg, .{ .short = short }, error.MissingValue); return Arg(Id){ .param = param, .value = value }; @@ -184,7 +184,7 @@ pub fn StreamingClap(comptime Id: type, comptime ArgIterator: type) type { }; fn parseNextArg(parser: *@This()) !?ArgInfo { - const full_arg = (try parser.iter.next()) orelse return null; + const full_arg = parser.iter.next() orelse return null; if (mem.eql(u8, full_arg, "--") or mem.eql(u8, full_arg, "-")) return ArgInfo{ .arg = full_arg, .kind = .positional }; if (mem.startsWith(u8, full_arg, "--")) diff --git a/example/simple-ex.zig b/example/simple-ex.zig index 5653fd1..d2dc77e 100644 --- a/example/simple-ex.zig +++ b/example/simple-ex.zig @@ -3,6 +3,7 @@ const std = @import("std"); const debug = std.debug; const io = std.io; +const process = std.process; pub fn main() !void { const allocator = std.heap.page_allocator; @@ -16,11 +17,12 @@ pub fn main() !void { clap.parseParam("...") catch unreachable, }; - // We then initialize an argument iterator. We will use the OsIterator as it nicely - // wraps iterating over arguments the most efficient way on each os. - var iter = try clap.args.OsIterator.init(allocator); + var iter = try process.ArgIterator.initWithAllocator(allocator); defer iter.deinit(); + // Skip exe argument + _ = iter.next(); + // Initalize our diagnostics, which can be used for reporting useful errors. // This is optional. You can also pass `.{}` to `clap.parse` if you don't // care about the extra information `Diagnostics` provides. diff --git a/example/streaming-clap.zig b/example/streaming-clap.zig index 9ed38dd..a7ab7d8 100644 --- a/example/streaming-clap.zig +++ b/example/streaming-clap.zig @@ -3,6 +3,7 @@ const std = @import("std"); const debug = std.debug; const io = std.io; +const process = std.process; pub fn main() !void { const allocator = std.heap.page_allocator; @@ -21,16 +22,17 @@ pub fn main() !void { .{ .id = 'f', .takes_value = .one }, }; - // We then initialize an argument iterator. We will use the OsIterator as it nicely - // wraps iterating over arguments the most efficient way on each os. - var iter = try clap.args.OsIterator.init(allocator); + var iter = try process.ArgIterator.initWithAllocator(allocator); defer iter.deinit(); + // Skip exe argument + _ = iter.next(); + // Initalize our diagnostics, which can be used for reporting useful errors. // This is optional. You can also leave the `diagnostic` field unset if you // don't care about the extra information `Diagnostic` provides. var diag = clap.Diagnostic{}; - var parser = clap.StreamingClap(u8, clap.args.OsIterator){ + var parser = clap.StreamingClap(u8, process.ArgIterator){ .params = ¶ms, .iter = &iter, .diagnostic = &diag, -- cgit v1.2.3