From 7188a9fc85f6aa0f71a4cb7966f8b0a044f29e02 Mon Sep 17 00:00:00 2001 From: Jimmi Holst Christensen Date: Mon, 31 Jan 2022 17:11:15 +0100 Subject: Refactor the ArgIterator interface They now follow the interface provided by the standard library. This now means that we no longer needs `args.OsIterator` as that the one from `std` can now be used directly. Also remove `args.ShellIterator` as a simular iterator exists in `std` called `ArgIteratorGeneral`. --- clap/args.zig | 316 +---------------------------------------------------- clap/streaming.zig | 10 +- 2 files changed, 10 insertions(+), 316 deletions(-) (limited to 'clap') diff --git a/clap/args.zig b/clap/args.zig index 90c50fa..d0aaee3 100644 --- a/clap/args.zig +++ b/clap/args.zig @@ -9,9 +9,7 @@ const testing = std.testing; /// An example of what methods should be implemented on an arg iterator. pub const ExampleArgIterator = struct { - const Error = error{}; - - pub fn next(iter: *ExampleArgIterator) Error!?[]const u8 { + pub fn next(iter: *ExampleArgIterator) ?[]const u8 { _ = iter; return "2"; } @@ -20,12 +18,10 @@ pub const ExampleArgIterator = struct { /// An argument iterator which iterates over a slice of arguments. /// This implementation does not allocate. pub const SliceIterator = struct { - const Error = error{}; - args: []const []const u8, index: usize = 0, - pub fn next(iter: *SliceIterator) Error!?[]const u8 { + pub fn next(iter: *SliceIterator) ?[]const u8 { if (iter.args.len <= iter.index) return null; @@ -38,310 +34,8 @@ test "SliceIterator" { const args = [_][]const u8{ "A", "BB", "CCC" }; var iter = SliceIterator{ .args = &args }; - for (args) |a| { - const b = try iter.next(); - debug.assert(mem.eql(u8, a, b.?)); - } -} - -/// An argument iterator which wraps the ArgIterator in ::std. -/// On windows, this iterator allocates. -pub const OsIterator = struct { - const Error = process.ArgIterator.NextError; - - arena: heap.ArenaAllocator, - args: process.ArgIterator, - - /// The executable path (this is the first argument passed to the program) - /// TODO: Is it the right choice for this to be null? Maybe `init` should - /// return an error when we have no exe. - exe_arg: ?[:0]const u8, - - pub fn init(allocator: mem.Allocator) Error!OsIterator { - var res = OsIterator{ - .arena = heap.ArenaAllocator.init(allocator), - .args = process.args(), - .exe_arg = undefined, - }; - res.exe_arg = try res.next(); - return res; - } - - pub fn deinit(iter: *OsIterator) void { - iter.arena.deinit(); - } - - pub fn next(iter: *OsIterator) Error!?[:0]const u8 { - if (builtin.os.tag == .windows) { - return (try iter.args.next(iter.arena.allocator())) orelse return null; - } else { - return iter.args.nextPosix(); - } - } -}; - -/// An argument iterator that takes a string and parses it into arguments, simulating -/// how shells split arguments. -pub const ShellIterator = struct { - const Error = error{ - DanglingEscape, - QuoteNotClosed, - } || mem.Allocator.Error; - - arena: heap.ArenaAllocator, - str: []const u8, - - pub fn init(allocator: mem.Allocator, str: []const u8) ShellIterator { - return .{ - .arena = heap.ArenaAllocator.init(allocator), - .str = str, - }; - } - - pub fn deinit(iter: *ShellIterator) void { - iter.arena.deinit(); - } - - pub fn next(iter: *ShellIterator) Error!?[]const u8 { - // Whenever possible, this iterator will return slices into `str` instead of - // allocating. Sometimes this is not possible, for example, escaped characters - // have be be unescape, so we need to allocate in this case. - var list = std.ArrayList(u8).init(iter.arena.allocator()); - var start: usize = 0; - var state: enum { - skip_whitespace, - no_quote, - no_quote_escape, - single_quote, - double_quote, - double_quote_escape, - after_quote, - } = .skip_whitespace; - - for (iter.str) |c, i| { - switch (state) { - // The state that skips the initial whitespace. - .skip_whitespace => switch (c) { - ' ', '\t', '\n' => {}, - '\'' => { - start = i + 1; - state = .single_quote; - }, - '"' => { - start = i + 1; - state = .double_quote; - }, - '\\' => { - start = i + 1; - state = .no_quote_escape; - }, - else => { - start = i; - state = .no_quote; - }, - }, - - // The state that parses the none quoted part of a argument. - .no_quote => switch (c) { - // We're done parsing a none quoted argument when we hit a - // whitespace. - ' ', '\t', '\n' => { - defer iter.str = iter.str[i..]; - return iter.result(start, i, &list); - }, - - // Slicing is not possible if a quote starts while parsing none - // quoted args. - // Example: - // ab'cd' -> abcd - '\'' => { - try list.appendSlice(iter.str[start..i]); - start = i + 1; - state = .single_quote; - }, - '"' => { - try list.appendSlice(iter.str[start..i]); - start = i + 1; - state = .double_quote; - }, - - // Slicing is not possible if we need to escape a character. - // Example: - // ab\"d -> ab"d - '\\' => { - try list.appendSlice(iter.str[start..i]); - start = i + 1; - state = .no_quote_escape; - }, - else => {}, - }, - - // We're in this state after having parsed the quoted part of an - // argument. This state works mostly the same as .no_quote, but - // is aware, that the last character seen was a quote, which should - // not be part of the argument. This is why you will see `i - 1` here - // instead of just `i` when `iter.str` is sliced. - .after_quote => switch (c) { - ' ', '\t', '\n' => { - defer iter.str = iter.str[i..]; - return iter.result(start, i - 1, &list); - }, - '\'' => { - try list.appendSlice(iter.str[start .. i - 1]); - start = i + 1; - state = .single_quote; - }, - '"' => { - try list.appendSlice(iter.str[start .. i - 1]); - start = i + 1; - state = .double_quote; - }, - '\\' => { - try list.appendSlice(iter.str[start .. i - 1]); - start = i + 1; - state = .no_quote_escape; - }, - else => { - try list.appendSlice(iter.str[start .. i - 1]); - start = i; - state = .no_quote; - }, - }, - - // The states that parse the quoted part of arguments. The only differnece - // between single and double quoted arguments is that single quoted - // arguments ignore escape sequences, while double quoted arguments - // does escaping. - .single_quote => switch (c) { - '\'' => state = .after_quote, - else => {}, - }, - .double_quote => switch (c) { - '"' => state = .after_quote, - '\\' => { - try list.appendSlice(iter.str[start..i]); - start = i + 1; - state = .double_quote_escape; - }, - else => {}, - }, - - // The state we end up when after the escape character (`\`). All these - // states do is transition back into the previous state. - // TODO: Are there any escape sequences that does transform the second - // character into something else? For example, in Zig, `\n` is - // transformed into the line feed ascii character. - .no_quote_escape => switch (c) { - else => state = .no_quote, - }, - .double_quote_escape => switch (c) { - else => state = .double_quote, - }, - } - } - - defer iter.str = iter.str[iter.str.len..]; - switch (state) { - .skip_whitespace => return null, - .no_quote => return iter.result(start, iter.str.len, &list), - .after_quote => return iter.result(start, iter.str.len - 1, &list), - .no_quote_escape => return Error.DanglingEscape, - .single_quote, - .double_quote, - .double_quote_escape, - => return Error.QuoteNotClosed, - } - } - - fn result( - iter: *ShellIterator, - start: usize, - end: usize, - list: *std.ArrayList(u8), - ) Error!?[]const u8 { - const res = iter.str[start..end]; - - // If we already have something in `list` that means that we could not - // parse the argument without allocation. We therefor need to just append - // the rest we have to the list and return that. - if (list.items.len != 0) { - try list.appendSlice(res); - return list.toOwnedSlice(); - } - return res; - } -}; - -fn testShellIteratorOk(str: []const u8, allocations: usize, expect: []const []const u8) !void { - var allocator = testing.FailingAllocator.init(testing.allocator, allocations); - var it = ShellIterator.init(allocator.allocator(), str); - defer it.deinit(); - - for (expect) |e| { - if (it.next()) |actual| { - try testing.expect(actual != null); - try testing.expectEqualStrings(e, actual.?); - } else |err| try testing.expectEqual(@as(anyerror![]const u8, e), err); - } - - if (it.next()) |actual| { - try testing.expectEqual(@as(?[]const u8, null), actual); - try testing.expectEqual(allocations, allocator.allocations); - } else |err| try testing.expectEqual(@as(anyerror!void, {}), err); -} - -fn testShellIteratorErr(str: []const u8, expect: anyerror) !void { - var it = ShellIterator.init(testing.allocator, str); - defer it.deinit(); - - while (it.next() catch |err| { - try testing.expectError(expect, @as(anyerror!void, err)); - return; - }) |_| {} - - try testing.expectError(expect, @as(anyerror!void, {})); -} - -test "ShellIterator" { - try testShellIteratorOk("a", 0, &.{"a"}); - try testShellIteratorOk("'a'", 0, &.{"a"}); - try testShellIteratorOk("\"a\"", 0, &.{"a"}); - try testShellIteratorOk("a b", 0, &.{ "a", "b" }); - try testShellIteratorOk("'a' b", 0, &.{ "a", "b" }); - try testShellIteratorOk("\"a\" b", 0, &.{ "a", "b" }); - try testShellIteratorOk("a 'b'", 0, &.{ "a", "b" }); - try testShellIteratorOk("a \"b\"", 0, &.{ "a", "b" }); - try testShellIteratorOk("'a b'", 0, &.{"a b"}); - try testShellIteratorOk("\"a b\"", 0, &.{"a b"}); - try testShellIteratorOk("\"a\"\"b\"", 1, &.{"ab"}); - try testShellIteratorOk("'a''b'", 1, &.{"ab"}); - try testShellIteratorOk("'a'b", 1, &.{"ab"}); - try testShellIteratorOk("a'b'", 1, &.{"ab"}); - try testShellIteratorOk("a\\ b", 1, &.{"a b"}); - try testShellIteratorOk("\"a\\ b\"", 1, &.{"a b"}); - try testShellIteratorOk("'a\\ b'", 0, &.{"a\\ b"}); - try testShellIteratorOk(" a b ", 0, &.{ "a", "b" }); - try testShellIteratorOk("\\ \\ ", 0, &.{ " ", " " }); - - try testShellIteratorOk( - \\printf 'run\nuninstall\n' - , 0, &.{ "printf", "run\\nuninstall\\n" }); - try testShellIteratorOk( - \\setsid -f steam "steam://$action/$id" - , 0, &.{ "setsid", "-f", "steam", "steam://$action/$id" }); - try testShellIteratorOk( - \\xargs -I% rg --no-heading --no-line-number --only-matching - \\ --case-sensitive --multiline --text --byte-offset '(?-u)%' $@ - \\ - , 0, &.{ - "xargs", "-I%", "rg", "--no-heading", - "--no-line-number", "--only-matching", "--case-sensitive", "--multiline", - "--text", "--byte-offset", "(?-u)%", "$@", - }); + for (args) |a| + try testing.expectEqualStrings(a, iter.next().?); - try testShellIteratorErr("'a", error.QuoteNotClosed); - try testShellIteratorErr("'a\\", error.QuoteNotClosed); - try testShellIteratorErr("\"a", error.QuoteNotClosed); - try testShellIteratorErr("\"a\\", error.QuoteNotClosed); - try testShellIteratorErr("a\\", error.DanglingEscape); + try testing.expectEqual(@as(?[]const u8, null), iter.next()); } diff --git a/clap/streaming.zig b/clap/streaming.zig index 3f24aaa..8eca51a 100644 --- a/clap/streaming.zig +++ b/clap/streaming.zig @@ -49,7 +49,7 @@ pub fn StreamingClap(comptime Id: type, comptime ArgIterator: type) type { .chaining => |state| return try parser.chaining(state), .rest_are_positional => { const param = parser.positionalParam() orelse unreachable; - const value = (try parser.iter.next()) orelse return null; + const value = parser.iter.next() orelse return null; return Arg(Id){ .param = param, .value = value }; }, } @@ -80,7 +80,7 @@ pub fn StreamingClap(comptime Id: type, comptime ArgIterator: type) type { if (maybe_value) |v| break :blk v; - break :blk (try parser.iter.next()) orelse + break :blk parser.iter.next() orelse return parser.err(arg, .{ .long = name }, error.MissingValue); }; @@ -99,7 +99,7 @@ pub fn StreamingClap(comptime Id: type, comptime ArgIterator: type) type { // arguments. if (mem.eql(u8, arg, "--")) { parser.state = .rest_are_positional; - const value = (try parser.iter.next()) orelse return null; + const value = parser.iter.next() orelse return null; return Arg(Id){ .param = param, .value = value }; } @@ -142,7 +142,7 @@ pub fn StreamingClap(comptime Id: type, comptime ArgIterator: type) type { } if (arg.len <= next_index) { - const value = (try parser.iter.next()) orelse + const value = parser.iter.next() orelse return parser.err(arg, .{ .short = short }, error.MissingValue); return Arg(Id){ .param = param, .value = value }; @@ -184,7 +184,7 @@ pub fn StreamingClap(comptime Id: type, comptime ArgIterator: type) type { }; fn parseNextArg(parser: *@This()) !?ArgInfo { - const full_arg = (try parser.iter.next()) orelse return null; + const full_arg = parser.iter.next() orelse return null; if (mem.eql(u8, full_arg, "--") or mem.eql(u8, full_arg, "-")) return ArgInfo{ .arg = full_arg, .kind = .positional }; if (mem.startsWith(u8, full_arg, "--")) -- cgit v1.2.3