diff options
| -rw-r--r-- | clap/args.zig | 262 |
1 files changed, 262 insertions, 0 deletions
diff --git a/clap/args.zig b/clap/args.zig index 52626fc..2868050 100644 --- a/clap/args.zig +++ b/clap/args.zig | |||
| @@ -5,6 +5,7 @@ const debug = std.debug; | |||
| 5 | const heap = std.heap; | 5 | const heap = std.heap; |
| 6 | const mem = std.mem; | 6 | const mem = std.mem; |
| 7 | const process = std.process; | 7 | const process = std.process; |
| 8 | const testing = std.testing; | ||
| 8 | 9 | ||
| 9 | /// An example of what methods should be implemented on an arg iterator. | 10 | /// An example of what methods should be implemented on an arg iterator. |
| 10 | pub const ExampleArgIterator = struct { | 11 | pub const ExampleArgIterator = struct { |
| @@ -77,3 +78,264 @@ pub const OsIterator = struct { | |||
| 77 | } | 78 | } |
| 78 | } | 79 | } |
| 79 | }; | 80 | }; |
| 81 | |||
| 82 | /// An argument iterator that takes a string and parses it into arguments, simulating | ||
| 83 | /// how shells split arguments. | ||
| 84 | pub const ShellIterator = struct { | ||
| 85 | const Error = error{ | ||
| 86 | DanglingEscape, | ||
| 87 | QuoteNotClosed, | ||
| 88 | } || mem.Allocator.Error; | ||
| 89 | |||
| 90 | arena: heap.ArenaAllocator, | ||
| 91 | str: []const u8, | ||
| 92 | |||
| 93 | pub fn init(allocator: *mem.Allocator, str: []const u8) ShellIterator { | ||
| 94 | return .{ | ||
| 95 | .arena = heap.ArenaAllocator.init(allocator), | ||
| 96 | .str = str, | ||
| 97 | }; | ||
| 98 | } | ||
| 99 | |||
| 100 | pub fn deinit(iter: *ShellIterator) void { | ||
| 101 | iter.arena.deinit(); | ||
| 102 | } | ||
| 103 | |||
| 104 | pub fn next(iter: *ShellIterator) Error!?[]const u8 { | ||
| 105 | // Whenever possible, this iterator will return slices into `str` instead of | ||
| 106 | // allocating. Sometimes this is not possible, for example, escaped characters | ||
| 107 | // have be be unescape, so we need to allocate in this case. | ||
| 108 | var list = std.ArrayList(u8).init(&iter.arena.allocator); | ||
| 109 | var start: usize = 0; | ||
| 110 | var state: enum { | ||
| 111 | skip_whitespace, | ||
| 112 | no_quote, | ||
| 113 | no_quote_escape, | ||
| 114 | single_quote, | ||
| 115 | double_quote, | ||
| 116 | double_quote_escape, | ||
| 117 | after_quote, | ||
| 118 | } = .skip_whitespace; | ||
| 119 | |||
| 120 | for (iter.str) |c, i| { | ||
| 121 | switch (state) { | ||
| 122 | // The state that skips the initial whitespace. | ||
| 123 | .skip_whitespace => switch (c) { | ||
| 124 | ' ', '\t', '\n' => {}, | ||
| 125 | '\'' => { | ||
| 126 | start = i + 1; | ||
| 127 | state = .single_quote; | ||
| 128 | }, | ||
| 129 | '"' => { | ||
| 130 | start = i + 1; | ||
| 131 | state = .double_quote; | ||
| 132 | }, | ||
| 133 | '\\' => { | ||
| 134 | start = i + 1; | ||
| 135 | state = .no_quote_escape; | ||
| 136 | }, | ||
| 137 | else => { | ||
| 138 | start = i; | ||
| 139 | state = .no_quote; | ||
| 140 | }, | ||
| 141 | }, | ||
| 142 | |||
| 143 | // The state that parses the none quoted part of a argument. | ||
| 144 | .no_quote => switch (c) { | ||
| 145 | // We are doing parsing a none quoted argument when we hit a | ||
| 146 | // whitespace. | ||
| 147 | ' ', '\t', '\n' => { | ||
| 148 | defer iter.str = iter.str[i..]; | ||
| 149 | return iter.result(start, i, &list); | ||
| 150 | }, | ||
| 151 | |||
| 152 | // Slicing is not possible if a quote starts while parsing none | ||
| 153 | // quoted args. | ||
| 154 | // Example: | ||
| 155 | // ab'cd' -> abcd | ||
| 156 | '\'' => { | ||
| 157 | try list.appendSlice(iter.str[start..i]); | ||
| 158 | start = i + 1; | ||
| 159 | state = .single_quote; | ||
| 160 | }, | ||
| 161 | '"' => { | ||
| 162 | try list.appendSlice(iter.str[start..i]); | ||
| 163 | start = i + 1; | ||
| 164 | state = .double_quote; | ||
| 165 | }, | ||
| 166 | |||
| 167 | // Slicing is not possible if we need to escape a character. | ||
| 168 | // Example: | ||
| 169 | // ab\"d -> ab"d | ||
| 170 | '\\' => { | ||
| 171 | try list.appendSlice(iter.str[start..i]); | ||
| 172 | start = i + 1; | ||
| 173 | state = .no_quote_escape; | ||
| 174 | }, | ||
| 175 | else => {}, | ||
| 176 | }, | ||
| 177 | |||
| 178 | // We're in this state after having parsed the quoted part of an | ||
| 179 | // argument. This state works mostly the same as .no_quote, but | ||
| 180 | // is aware, that the last character seen was a quote, which should | ||
| 181 | // not be part of the argument. This is why you will see `i - 1` here | ||
| 182 | // instead of just `i` when `iter.str` is sliced. | ||
| 183 | .after_quote => switch (c) { | ||
| 184 | ' ', '\t', '\n' => { | ||
| 185 | defer iter.str = iter.str[i..]; | ||
| 186 | return iter.result(start, i - 1, &list); | ||
| 187 | }, | ||
| 188 | '\'' => { | ||
| 189 | try list.appendSlice(iter.str[start .. i - 1]); | ||
| 190 | start = i + 1; | ||
| 191 | state = .single_quote; | ||
| 192 | }, | ||
| 193 | '"' => { | ||
| 194 | try list.appendSlice(iter.str[start .. i - 1]); | ||
| 195 | start = i + 1; | ||
| 196 | state = .double_quote; | ||
| 197 | }, | ||
| 198 | '\\' => { | ||
| 199 | try list.appendSlice(iter.str[start .. i - 1]); | ||
| 200 | start = i + 1; | ||
| 201 | state = .no_quote_escape; | ||
| 202 | }, | ||
| 203 | else => { | ||
| 204 | try list.appendSlice(iter.str[start .. i - 1]); | ||
| 205 | start = i; | ||
| 206 | state = .no_quote; | ||
| 207 | }, | ||
| 208 | }, | ||
| 209 | |||
| 210 | // The states that parse the quoted part of arguments. The only differnece | ||
| 211 | // between single and double quoted arguments is that single quoted | ||
| 212 | // arguments ignore escape sequences, while double quoted arguments | ||
| 213 | // does escaping. | ||
| 214 | .single_quote => switch (c) { | ||
| 215 | '\'' => state = .after_quote, | ||
| 216 | else => {}, | ||
| 217 | }, | ||
| 218 | .double_quote => switch (c) { | ||
| 219 | '"' => state = .after_quote, | ||
| 220 | '\\' => { | ||
| 221 | try list.appendSlice(iter.str[start..i]); | ||
| 222 | start = i + 1; | ||
| 223 | state = .double_quote_escape; | ||
| 224 | }, | ||
| 225 | else => {}, | ||
| 226 | }, | ||
| 227 | |||
| 228 | // The state we end up when after the escape character (`\`). All these | ||
| 229 | // states do is transition back into the previous state. | ||
| 230 | // TODO: Are there any escape sequences that does transform the second | ||
| 231 | // character into something else? For example, in Zig, `\n` is | ||
| 232 | // transformed into the line feed ascii character. | ||
| 233 | .no_quote_escape => switch (c) { | ||
| 234 | else => state = .no_quote, | ||
| 235 | }, | ||
| 236 | .double_quote_escape => switch (c) { | ||
| 237 | else => state = .double_quote, | ||
| 238 | }, | ||
| 239 | } | ||
| 240 | } | ||
| 241 | |||
| 242 | defer iter.str = iter.str[iter.str.len..]; | ||
| 243 | switch (state) { | ||
| 244 | .skip_whitespace => return null, | ||
| 245 | .no_quote => return iter.result(start, iter.str.len, &list), | ||
| 246 | .after_quote => return iter.result(start, iter.str.len - 1, &list), | ||
| 247 | .no_quote_escape => return Error.DanglingEscape, | ||
| 248 | .single_quote, | ||
| 249 | .double_quote, | ||
| 250 | .double_quote_escape, | ||
| 251 | => return Error.QuoteNotClosed, | ||
| 252 | } | ||
| 253 | } | ||
| 254 | |||
| 255 | fn result(iter: *ShellIterator, start: usize, end: usize, list: *std.ArrayList(u8)) Error!?[]const u8 { | ||
| 256 | const res = iter.str[start..end]; | ||
| 257 | |||
| 258 | // If we already have something in `list` that means that we could not | ||
| 259 | // parse the argument without allocation. We therefor need to just append | ||
| 260 | // the rest we have to the list and return that. | ||
| 261 | if (list.items.len != 0) { | ||
| 262 | try list.appendSlice(res); | ||
| 263 | return list.toOwnedSlice(); | ||
| 264 | } | ||
| 265 | return res; | ||
| 266 | } | ||
| 267 | }; | ||
| 268 | |||
| 269 | fn testShellIteratorOk(str: []const u8, allocations: usize, expect: []const []const u8) void { | ||
| 270 | var allocator = testing.FailingAllocator.init(testing.allocator, allocations); | ||
| 271 | var it = ShellIterator.init(&allocator.allocator, str); | ||
| 272 | defer it.deinit(); | ||
| 273 | |||
| 274 | for (expect) |e| { | ||
| 275 | if (it.next()) |actual| { | ||
| 276 | testing.expect(actual != null); | ||
| 277 | testing.expectEqualStrings(e, actual.?); | ||
| 278 | } else |err| testing.expectEqual(@as(anyerror![]const u8, e), err); | ||
| 279 | } | ||
| 280 | |||
| 281 | if (it.next()) |actual| { | ||
| 282 | testing.expectEqual(@as(?[]const u8, null), actual); | ||
| 283 | testing.expectEqual(allocations, allocator.allocations); | ||
| 284 | } else |err| testing.expectEqual(@as(anyerror!void, {}), err); | ||
| 285 | } | ||
| 286 | |||
| 287 | fn testShellIteratorErr(str: []const u8, expect: anyerror) void { | ||
| 288 | var it = ShellIterator.init(testing.allocator, str); | ||
| 289 | defer it.deinit(); | ||
| 290 | |||
| 291 | while (it.next() catch |err| { | ||
| 292 | testing.expectError(expect, @as(anyerror!void, err)); | ||
| 293 | return; | ||
| 294 | }) |_| {} | ||
| 295 | |||
| 296 | testing.expectError(expect, @as(anyerror!void, {})); | ||
| 297 | } | ||
| 298 | |||
| 299 | test "ShellIterator" { | ||
| 300 | testShellIteratorOk("a", 0, &[_][]const u8{"a"}); | ||
| 301 | testShellIteratorOk("'a'", 0, &[_][]const u8{"a"}); | ||
| 302 | testShellIteratorOk("\"a\"", 0, &[_][]const u8{"a"}); | ||
| 303 | testShellIteratorOk("a b", 0, &[_][]const u8{ "a", "b" }); | ||
| 304 | testShellIteratorOk("'a' b", 0, &[_][]const u8{ "a", "b" }); | ||
| 305 | testShellIteratorOk("\"a\" b", 0, &[_][]const u8{ "a", "b" }); | ||
| 306 | testShellIteratorOk("a 'b'", 0, &[_][]const u8{ "a", "b" }); | ||
| 307 | testShellIteratorOk("a \"b\"", 0, &[_][]const u8{ "a", "b" }); | ||
| 308 | testShellIteratorOk("'a b'", 0, &[_][]const u8{"a b"}); | ||
| 309 | testShellIteratorOk("\"a b\"", 0, &[_][]const u8{"a b"}); | ||
| 310 | testShellIteratorOk("\"a\"\"b\"", 1, &[_][]const u8{"ab"}); | ||
| 311 | testShellIteratorOk("'a''b'", 1, &[_][]const u8{"ab"}); | ||
| 312 | testShellIteratorOk("'a'b", 1, &[_][]const u8{"ab"}); | ||
| 313 | testShellIteratorOk("a'b'", 1, &[_][]const u8{"ab"}); | ||
| 314 | testShellIteratorOk("a\\ b", 1, &[_][]const u8{"a b"}); | ||
| 315 | testShellIteratorOk("\"a\\ b\"", 1, &[_][]const u8{"a b"}); | ||
| 316 | testShellIteratorOk("'a\\ b'", 0, &[_][]const u8{"a\\ b"}); | ||
| 317 | testShellIteratorOk(" a b ", 0, &[_][]const u8{ "a", "b" }); | ||
| 318 | testShellIteratorOk("\\ \\ ", 0, &[_][]const u8{ " ", " " }); | ||
| 319 | |||
| 320 | testShellIteratorOk( | ||
| 321 | \\printf 'run\nuninstall\n' | ||
| 322 | , 0, &[_][]const u8{ "printf", "run\\nuninstall\\n" }); | ||
| 323 | testShellIteratorOk( | ||
| 324 | \\setsid -f steam "steam://$action/$id" | ||
| 325 | , 0, &[_][]const u8{ "setsid", "-f", "steam", "steam://$action/$id" }); | ||
| 326 | testShellIteratorOk( | ||
| 327 | \\xargs -I% rg --no-heading --no-line-number --only-matching | ||
| 328 | \\ --case-sensitive --multiline --text --byte-offset '(?-u)%' $@ | ||
| 329 | \\ | ||
| 330 | , 0, &[_][]const u8{ | ||
| 331 | "xargs", "-I%", "rg", "--no-heading", | ||
| 332 | "--no-line-number", "--only-matching", "--case-sensitive", "--multiline", | ||
| 333 | "--text", "--byte-offset", "(?-u)%", "$@", | ||
| 334 | }); | ||
| 335 | |||
| 336 | testShellIteratorErr("'a", error.QuoteNotClosed); | ||
| 337 | testShellIteratorErr("'a\\", error.QuoteNotClosed); | ||
| 338 | testShellIteratorErr("\"a", error.QuoteNotClosed); | ||
| 339 | testShellIteratorErr("\"a\\", error.QuoteNotClosed); | ||
| 340 | testShellIteratorErr("a\\", error.DanglingEscape); | ||
| 341 | } | ||