diff --git a/example/cat/main.zig b/example/cat/main.zig index 432ed70bbba9..ce72000ffc3d 100644 --- a/example/cat/main.zig +++ b/example/cat/main.zig @@ -4,11 +4,12 @@ const mem = std.mem; const os = std.os; pub fn main() -> %void { - const exe = os.args.at(0); + const allocator = &std.debug.global_allocator; + var args_it = os.args(); + const exe = %return unwrapArg(??args_it.next(allocator)); var catted_anything = false; - var arg_i: usize = 1; - while (arg_i < os.args.count()) : (arg_i += 1) { - const arg = os.args.at(arg_i); + while (args_it.next(allocator)) |arg_or_err| { + const arg = %return unwrapArg(arg_or_err); if (mem.eql(u8, arg, "-")) { catted_anything = true; %return cat_stream(&io.stdin); @@ -55,3 +56,10 @@ fn cat_stream(is: &io.InStream) -> %void { }; } } + +fn unwrapArg(arg: %[]u8) -> %[]u8 { + return arg %% |err| { + %%io.stderr.printf("Unable to parse command line: {}\n", err); + return err; + }; +} diff --git a/std/array_list.zig b/std/array_list.zig index e01451792e3d..e844095c675e 100644 --- a/std/array_list.zig +++ b/std/array_list.zig @@ -14,6 +14,7 @@ pub fn ArrayList(comptime T: type) -> type{ len: usize, allocator: &Allocator, + /// Deinitialize with `deinit` or use `toOwnedSlice`. pub fn init(allocator: &Allocator) -> Self { Self { .items = []T{}, @@ -34,6 +35,25 @@ pub fn ArrayList(comptime T: type) -> type{ return l.items[0..l.len]; } + /// ArrayList takes ownership of the passed in slice. The slice must have been + /// allocated with `allocator`. + /// Deinitialize with `deinit` or use `toOwnedSlice`. + pub fn fromOwnedSlice(allocator: &Allocator, slice: []T) -> Self { + return Self { + .items = slice, + .len = slice.len, + .allocator = allocator, + }; + } + + /// The caller owns the returned memory. ArrayList becomes empty. + pub fn toOwnedSlice(self: &Self) -> []T { + const allocator = self.allocator; + const result = allocator.shrink(T, self.items, self.len); + *self = init(allocator); + return result; + } + pub fn append(l: &Self, item: &const T) -> %void { const new_item_ptr = %return l.addOne(); *new_item_ptr = *item; diff --git a/std/buffer.zig b/std/buffer.zig index 0ad530d7cfd2..b7b81e6442bc 100644 --- a/std/buffer.zig +++ b/std/buffer.zig @@ -38,6 +38,27 @@ pub const Buffer = struct { return Buffer.init(buffer.list.allocator, buffer.toSliceConst()); } + /// Buffer takes ownership of the passed in slice. The slice must have been + /// allocated with `allocator`. + /// Must deinitialize with deinit. + pub fn fromOwnedSlice(allocator: &Allocator, slice: []u8) -> Buffer { + var self = Buffer { + .list = ArrayList(u8).fromOwnedSlice(allocator, slice), + }; + self.list.append(0); + return self; + } + + /// The caller owns the returned memory. The Buffer becomes null and + /// is safe to `deinit`. + pub fn toOwnedSlice(self: &Buffer) -> []u8 { + const allocator = self.list.allocator; + const result = allocator.shrink(u8, self.list.items, self.len()); + *self = initNull(allocator); + return result; + } + + pub fn deinit(self: &Buffer) { self.list.deinit(); } diff --git a/std/os/index.zig b/std/os/index.zig index e31abaf62914..73bcfb2dd686 100644 --- a/std/os/index.zig +++ b/std/os/index.zig @@ -1,5 +1,7 @@ const builtin = @import("builtin"); const Os = builtin.Os; +const is_windows = builtin.os == Os.windows; + pub const windows = @import("windows/index.zig"); pub const darwin = @import("darwin.zig"); pub const linux = @import("linux.zig"); @@ -37,6 +39,7 @@ const cstr = @import("../cstr.zig"); const io = @import("../io.zig"); const base64 = @import("../base64.zig"); const ArrayList = @import("../array_list.zig").ArrayList; +const Buffer = @import("../buffer.zig").Buffer; error Unexpected; error SystemResources; @@ -513,18 +516,6 @@ pub fn getEnv(key: []const u8) -> ?[]const u8 { return null; } -pub const args = struct { - pub var raw: []&u8 = undefined; - - pub fn count() -> usize { - return raw.len; - } - pub fn at(i: usize) -> []const u8 { - const s = raw[i]; - return cstr.toSlice(s); - } -}; - /// Caller must free the returned memory. pub fn getCwd(allocator: &Allocator) -> %[]u8 { switch (builtin.os) { @@ -1144,6 +1135,233 @@ pub fn posix_setregid(rgid: u32, egid: u32) -> %void { }; } +pub const ArgIteratorPosix = struct { + index: usize, + count: usize, + + pub fn init() -> ArgIteratorPosix { + return ArgIteratorPosix { + .index = 0, + .count = raw.len, + }; + } + + pub fn next(self: &ArgIteratorPosix) -> ?[]const u8 { + if (self.index == self.count) + return null; + + const s = raw[self.index]; + self.index += 1; + return cstr.toSlice(s); + } + + pub fn skip(self: &ArgIteratorPosix) -> bool { + if (self.index == self.count) + return false; + + self.index += 1; + return true; + } + + /// This is marked as public but actually it's only meant to be used + /// internally by zig's startup code. + pub var raw: []&u8 = undefined; +}; + +pub const ArgIteratorWindows = struct { + index: usize, + cmd_line: &const u8, + backslash_count: usize, + in_quote: bool, + quote_count: usize, + seen_quote_count: usize, + + pub fn init() -> ArgIteratorWindows { + return initWithCmdLine(windows.GetCommandLineA()); + } + + pub fn initWithCmdLine(cmd_line: &const u8) -> ArgIteratorWindows { + return ArgIteratorWindows { + .index = 0, + .cmd_line = cmd_line, + .backslash_count = 0, + .in_quote = false, + .quote_count = countQuotes(cmd_line), + .seen_quote_count = 0, + }; + } + + /// You must free the returned memory when done. + pub fn next(self: &ArgIteratorWindows, allocator: &Allocator) -> ?%[]u8 { + // march forward over whitespace + while (true) : (self.index += 1) { + const byte = self.cmd_line[self.index]; + switch (byte) { + 0 => return null, + ' ', '\t' => continue, + else => break, + } + } + + return self.internalNext(allocator); + } + + pub fn skip(self: &ArgIteratorWindows) -> bool { + // march forward over whitespace + while (true) : (self.index += 1) { + const byte = self.cmd_line[self.index]; + switch (byte) { + 0 => return false, + ' ', '\t' => continue, + else => break, + } + } + + while (true) : (self.index += 1) { + const byte = self.cmd_line[self.index]; + switch (byte) { + 0 => return true, + '"' => { + const quote_is_real = self.backslash_count % 2 == 0; + if (quote_is_real) { + self.seen_quote_count += 1; + } + }, + '\\' => { + self.backslash_count += 1; + }, + ' ', '\t' => { + if (self.seen_quote_count % 2 == 0 or self.seen_quote_count == self.quote_count) { + return true; + } + }, + else => continue, + } + } + } + + fn internalNext(self: &ArgIteratorWindows, allocator: &Allocator) -> %[]u8 { + var buf = %return Buffer.initSize(allocator, 0); + defer buf.deinit(); + + while (true) : (self.index += 1) { + const byte = self.cmd_line[self.index]; + switch (byte) { + 0 => return buf.toOwnedSlice(), + '"' => { + const quote_is_real = self.backslash_count % 2 == 0; + %return self.emitBackslashes(&buf, self.backslash_count / 2); + + if (quote_is_real) { + self.seen_quote_count += 1; + if (self.seen_quote_count == self.quote_count and self.seen_quote_count % 2 == 1) { + %return buf.appendByte('"'); + } + } else { + %return buf.appendByte('"'); + } + }, + '\\' => { + self.backslash_count += 1; + }, + ' ', '\t' => { + %return self.emitBackslashes(&buf, self.backslash_count); + if (self.seen_quote_count % 2 == 1 and self.seen_quote_count != self.quote_count) { + %return buf.appendByte(byte); + } else { + return buf.toOwnedSlice(); + } + }, + else => { + %return self.emitBackslashes(&buf, self.backslash_count); + %return buf.appendByte(byte); + }, + } + } + } + + fn emitBackslashes(self: &ArgIteratorWindows, buf: &Buffer, emit_count: usize) -> %void { + self.backslash_count = 0; + var i: usize = 0; + while (i < emit_count) : (i += 1) { + %return buf.appendByte('\\'); + } + } + + fn countQuotes(cmd_line: &const u8) -> usize { + var result: usize = 0; + var backslash_count: usize = 0; + var index: usize = 0; + while (true) : (index += 1) { + const byte = cmd_line[index]; + switch (byte) { + 0 => return result, + '\\' => backslash_count += 1, + '"' => { + result += 1 - (backslash_count % 2); + backslash_count = 0; + }, + else => { + backslash_count = 0; + }, + } + } + } + +}; + +pub const ArgIterator = struct { + inner: if (builtin.os == Os.windows) ArgIteratorWindows else ArgIteratorPosix, + + pub fn init() -> ArgIterator { + return ArgIterator { + .inner = if (builtin.os == Os.windows) ArgIteratorWindows.init() else ArgIteratorPosix.init(), + }; + } + + /// You must free the returned memory when done. + pub fn next(self: &ArgIterator, allocator: &Allocator) -> ?%[]u8 { + if (builtin.os == Os.windows) { + return self.inner.next(allocator); + } else { + return mem.dupe(allocator, u8, self.inner.next() ?? return null); + } + } + + /// If you only are targeting posix you can call this and not need an allocator. + pub fn nextPosix(self: &ArgIterator) -> ?[]const u8 { + return self.inner.next(); + } + + /// Parse past 1 argument without capturing it. + /// Returns `true` if skipped an arg, `false` if we are at the end. + pub fn skip(self: &ArgIterator) -> bool { + return self.inner.skip(); + } +}; + +pub fn args() -> ArgIterator { + return ArgIterator.init(); +} + +test "windows arg parsing" { + testWindowsCmdLine(c"a b\tc d", [][]const u8{"a", "b", "c", "d"}); + testWindowsCmdLine(c"\"abc\" d e", [][]const u8{"abc", "d", "e"}); + testWindowsCmdLine(c"a\\\\\\b d\"e f\"g h", [][]const u8{"a\\\\\\b", "de fg", "h"}); + testWindowsCmdLine(c"a\\\\\\\"b c d", [][]const u8{"a\\\"b", "c", "d"}); + testWindowsCmdLine(c"a\\\\\\\\\"b c\" d e", [][]const u8{"a\\\\b c", "d", "e"}); + testWindowsCmdLine(c"a b\tc \"d f", [][]const u8{"a", "b", "c", "\"d", "f"}); +} + +fn testWindowsCmdLine(input_cmd_line: &const u8, expected_args: []const []const u8) { + var it = ArgIteratorWindows.initWithCmdLine(input_cmd_line); + for (expected_args) |expected_arg| { + const arg = %%??it.next(&debug.global_allocator); + assert(mem.eql(u8, arg, expected_arg)); + } + assert(it.next(&debug.global_allocator) == null); +} + test "std.os" { _ = @import("child_process.zig"); _ = @import("darwin_errno.zig"); diff --git a/std/os/windows/index.zig b/std/os/windows/index.zig index a7ba338bf7ae..915743843b35 100644 --- a/std/os/windows/index.zig +++ b/std/os/windows/index.zig @@ -17,7 +17,7 @@ pub extern "kernel32" stdcallcc fn DeleteFileA(lpFileName: LPCSTR) -> bool; pub extern "kernel32" stdcallcc fn ExitProcess(exit_code: UINT) -> noreturn; -pub extern "kernel32" stdcallcc fn GetCommandLine() -> LPTSTR; +pub extern "kernel32" stdcallcc fn GetCommandLineA() -> LPSTR; pub extern "kernel32" stdcallcc fn GetConsoleMode(in_hConsoleHandle: HANDLE, out_lpMode: &DWORD) -> bool; diff --git a/std/special/bootstrap.zig b/std/special/bootstrap.zig index fea062712b20..c202d07f0c99 100644 --- a/std/special/bootstrap.zig +++ b/std/special/bootstrap.zig @@ -52,7 +52,7 @@ fn posixCallMainAndExit() -> noreturn { } fn callMain(argc: usize, argv: &&u8, envp: &?&u8) -> %void { - std.os.args.raw = argv[0..argc]; + std.os.ArgIteratorPosix.raw = argv[0..argc]; var env_count: usize = 0; while (envp[env_count] != null) : (env_count += 1) {} diff --git a/std/special/build_runner.zig b/std/special/build_runner.zig index 4dfb6301fc47..b1fbfc6c2bf3 100644 --- a/std/special/build_runner.zig +++ b/std/special/build_runner.zig @@ -10,37 +10,7 @@ const ArrayList = std.ArrayList; error InvalidArgs; pub fn main() -> %void { - var arg_i: usize = 1; - - const zig_exe = { - if (arg_i >= os.args.count()) { - %%io.stderr.printf("Expected first argument to be path to zig compiler\n"); - return error.InvalidArgs; - } - const result = os.args.at(arg_i); - arg_i += 1; - result - }; - - const build_root = { - if (arg_i >= os.args.count()) { - %%io.stderr.printf("Expected second argument to be build root directory path\n"); - return error.InvalidArgs; - } - const result = os.args.at(arg_i); - arg_i += 1; - result - }; - - const cache_root = { - if (arg_i >= os.args.count()) { - %%io.stderr.printf("Expected third argument to be cache root directory path\n"); - return error.InvalidArgs; - } - const result = os.args.at(arg_i); - arg_i += 1; - result - }; + var arg_it = os.args(); // TODO use a more general purpose allocator here var inc_allocator = %%mem.IncrementingAllocator.init(20 * 1024 * 1024); @@ -48,6 +18,23 @@ pub fn main() -> %void { const allocator = &inc_allocator.allocator; + + // skip my own exe name + _ = arg_it.skip(); + + const zig_exe = %return unwrapArg(arg_it.next(allocator) ?? { + %%io.stderr.printf("Expected first argument to be path to zig compiler\n"); + return error.InvalidArgs; + }); + const build_root = %return unwrapArg(arg_it.next(allocator) ?? { + %%io.stderr.printf("Expected second argument to be build root directory path\n"); + return error.InvalidArgs; + }); + const cache_root = %return unwrapArg(arg_it.next(allocator) ?? { + %%io.stderr.printf("Expected third argument to be cache root directory path\n"); + return error.InvalidArgs; + }); + var builder = Builder.init(allocator, zig_exe, build_root, cache_root); defer builder.deinit(); @@ -55,8 +42,8 @@ pub fn main() -> %void { var prefix: ?[]const u8 = null; - while (arg_i < os.args.count()) : (arg_i += 1) { - const arg = os.args.at(arg_i); + while (arg_it.next(allocator)) |err_or_arg| { + const arg = %return unwrapArg(err_or_arg); if (mem.startsWith(u8, arg, "-D")) { const option_contents = arg[2..]; if (option_contents.len == 0) { @@ -76,10 +63,12 @@ pub fn main() -> %void { if (mem.eql(u8, arg, "--verbose")) { builder.verbose = true; } else if (mem.eql(u8, arg, "--help")) { - return usage(&builder, false, &io.stdout); - } else if (mem.eql(u8, arg, "--prefix") and arg_i + 1 < os.args.count()) { - arg_i += 1; - prefix = os.args.at(arg_i); + return usage(&builder, false, &io.stdout); + } else if (mem.eql(u8, arg, "--prefix")) { + prefix = %return unwrapArg(arg_it.next(allocator) ?? { + %%io.stderr.printf("Expected argument after --prefix\n\n"); + return usage(&builder, false, &io.stderr); + }); } else { %%io.stderr.printf("Unrecognized argument: {}\n\n", arg); return usage(&builder, false, &io.stderr); @@ -151,3 +140,10 @@ fn usage(builder: &Builder, already_ran_build: bool, out_stream: &io.OutStream) if (out_stream == &io.stderr) return error.InvalidArgs; } + +fn unwrapArg(arg: %[]u8) -> %[]u8 { + return arg %% |err| { + %%io.stderr.printf("Unable to parse command line: {}\n", err); + return err; + }; +} diff --git a/test/compare_output.zig b/test/compare_output.zig index 5b84d7ddbd10..a601598e7f32 100644 --- a/test/compare_output.zig +++ b/test/compare_output.zig @@ -346,7 +346,8 @@ pub fn addCases(cases: &tests.CompareOutputContext) { \\ %%io.stdout.printf("before\n"); \\ defer %%io.stdout.printf("defer1\n"); \\ defer %%io.stdout.printf("defer2\n"); - \\ if (os.args.count() == 1) return; + \\ var args_it = @import("std").os.args(); + \\ if (args_it.skip() and !args_it.skip()) return; \\ defer %%io.stdout.printf("defer3\n"); \\ %%io.stdout.printf("after\n"); \\}