Skip to content

Commit

Permalink
Merge pull request #18309 from castholm/windows-argv
Browse files Browse the repository at this point in the history
More accurate argv parsing/serialization on Windows
  • Loading branch information
Vexu authored Dec 25, 2023
2 parents bb0f7d5 + 4d9c4ab commit cd30277
Show file tree
Hide file tree
Showing 2 changed files with 510 additions and 35 deletions.
179 changes: 150 additions & 29 deletions lib/std/child_process.zig
Original file line number Diff line number Diff line change
Expand Up @@ -744,9 +744,6 @@ pub const ChildProcess = struct {
windowsDestroyPipe(g_hChildStd_ERR_Rd, g_hChildStd_ERR_Wr);
};

const cmd_line = try windowsCreateCommandLine(self.allocator, self.argv);
defer self.allocator.free(cmd_line);

var siStartInfo = windows.STARTUPINFOW{
.cb = @sizeOf(windows.STARTUPINFOW),
.hStdError = g_hChildStd_ERR_Wr,
Expand Down Expand Up @@ -818,7 +815,11 @@ pub const ChildProcess = struct {
const app_name_w = try unicode.utf8ToUtf16LeWithNull(self.allocator, app_basename_utf8);
defer self.allocator.free(app_name_w);

const cmd_line_w = try unicode.utf8ToUtf16LeWithNull(self.allocator, cmd_line);
const cmd_line_w = argvToCommandLineWindows(self.allocator, self.argv) catch |err| switch (err) {
// argv[0] contains unsupported characters that will never resolve to a valid exe.
error.InvalidArg0 => return error.FileNotFound,
else => |e| return e,
};
defer self.allocator.free(cmd_line_w);

run: {
Expand Down Expand Up @@ -1236,39 +1237,159 @@ test "windowsCreateProcessSupportsExtension" {
try std.testing.expect(windowsCreateProcessSupportsExtension(&[_]u16{ '.', 'e', 'X', 'e', 'c' }) == null);
}

/// Caller must dealloc.
fn windowsCreateCommandLine(allocator: mem.Allocator, argv: []const []const u8) ![:0]u8 {
pub const ArgvToCommandLineError = error{ OutOfMemory, InvalidUtf8, InvalidArg0 };

/// Serializes `argv` to a Windows command-line string suitable for passing to a child process and
/// parsing by the `CommandLineToArgvW` algorithm. The caller owns the returned slice.
pub fn argvToCommandLineWindows(
allocator: mem.Allocator,
argv: []const []const u8,
) ArgvToCommandLineError![:0]u16 {
var buf = std.ArrayList(u8).init(allocator);
defer buf.deinit();

for (argv, 0..) |arg, arg_i| {
if (arg_i != 0) try buf.append(' ');
if (mem.indexOfAny(u8, arg, " \t\n\"") == null) {
try buf.appendSlice(arg);
continue;
if (argv.len != 0) {
const arg0 = argv[0];

// The first argument must be quoted if it contains spaces or ASCII control characters
// (excluding DEL). It also follows special quoting rules where backslashes have no special
// interpretation, which makes it impossible to pass certain first arguments containing
// double quotes to a child process without characters from the first argument leaking into
// subsequent ones (which could have security implications).
//
// Empty arguments technically don't need quotes, but we quote them anyway for maximum
// compatibility with different implementations of the 'CommandLineToArgvW' algorithm.
//
// Double quotes are illegal in paths on Windows, so for the sake of simplicity we reject
// all first arguments containing double quotes, even ones that we could theoretically
// serialize in unquoted form.
var needs_quotes = arg0.len == 0;
for (arg0) |c| {
if (c <= ' ') {
needs_quotes = true;
} else if (c == '"') {
return error.InvalidArg0;
}
}
try buf.append('"');
var backslash_count: usize = 0;
for (arg) |byte| {
switch (byte) {
'\\' => backslash_count += 1,
'"' => {
try buf.appendNTimes('\\', backslash_count * 2 + 1);
try buf.append('"');
backslash_count = 0;
},
else => {
try buf.appendNTimes('\\', backslash_count);
try buf.append(byte);
backslash_count = 0;
},
if (needs_quotes) {
try buf.append('"');
try buf.appendSlice(arg0);
try buf.append('"');
} else {
try buf.appendSlice(arg0);
}

for (argv[1..]) |arg| {
try buf.append(' ');

// Subsequent arguments must be quoted if they contain spaces, tabs or double quotes,
// or if they are empty. For simplicity and for maximum compatibility with different
// implementations of the 'CommandLineToArgvW' algorithm, we also quote all ASCII
// control characters (again, excluding DEL).
needs_quotes = for (arg) |c| {
if (c <= ' ' or c == '"') {
break true;
}
} else arg.len == 0;
if (!needs_quotes) {
try buf.appendSlice(arg);
continue;
}

try buf.append('"');
var backslash_count: usize = 0;
for (arg) |byte| {
switch (byte) {
'\\' => {
backslash_count += 1;
},
'"' => {
try buf.appendNTimes('\\', backslash_count * 2 + 1);
try buf.append('"');
backslash_count = 0;
},
else => {
try buf.appendNTimes('\\', backslash_count);
try buf.append(byte);
backslash_count = 0;
},
}
}
try buf.appendNTimes('\\', backslash_count * 2);
try buf.append('"');
}
try buf.appendNTimes('\\', backslash_count * 2);
try buf.append('"');
}

return buf.toOwnedSliceSentinel(0);
return try unicode.utf8ToUtf16LeWithNull(allocator, buf.items);
}

test "argvToCommandLineWindows" {
const t = testArgvToCommandLineWindows;

try t(&.{
\\C:\Program Files\zig\zig.exe
,
\\run
,
\\.\src\main.zig
,
\\-target
,
\\x86_64-windows-gnu
,
\\-O
,
\\ReleaseSafe
,
\\--
,
\\--emoji=🗿
,
\\--eval=new Regex("Dwayne \"The Rock\" Johnson")
,
},
\\"C:\Program Files\zig\zig.exe" run .\src\main.zig -target x86_64-windows-gnu -O ReleaseSafe -- --emoji=🗿 "--eval=new Regex(\"Dwayne \\\"The Rock\\\" Johnson\")"
);

try t(&.{}, "");
try t(&.{""}, "\"\"");
try t(&.{" "}, "\" \"");
try t(&.{"\t"}, "\"\t\"");
try t(&.{"\x07"}, "\"\x07\"");
try t(&.{"🦎"}, "🦎");

try t(
&.{ "zig", "aa aa", "bb\tbb", "cc\ncc", "dd\r\ndd", "ee\x7Fee" },
"zig \"aa aa\" \"bb\tbb\" \"cc\ncc\" \"dd\r\ndd\" ee\x7Fee",
);

try t(
&.{ "\\\\foo bar\\foo bar\\", "\\\\zig zag\\zig zag\\" },
"\"\\\\foo bar\\foo bar\\\" \"\\\\zig zag\\zig zag\\\\\"",
);

try std.testing.expectError(
error.InvalidArg0,
argvToCommandLineWindows(std.testing.allocator, &.{"\"quotes\"quotes\""}),
);
try std.testing.expectError(
error.InvalidArg0,
argvToCommandLineWindows(std.testing.allocator, &.{"quotes\"quotes"}),
);
try std.testing.expectError(
error.InvalidArg0,
argvToCommandLineWindows(std.testing.allocator, &.{"q u o t e s \" q u o t e s"}),
);
}

fn testArgvToCommandLineWindows(argv: []const []const u8, expected_cmd_line: []const u8) !void {
const cmd_line_w = try argvToCommandLineWindows(std.testing.allocator, argv);
defer std.testing.allocator.free(cmd_line_w);

const cmd_line = try unicode.utf16leToUtf8Alloc(std.testing.allocator, cmd_line_w);
defer std.testing.allocator.free(cmd_line);

try std.testing.expectEqualStrings(expected_cmd_line, cmd_line);
}

fn windowsDestroyPipe(rd: ?windows.HANDLE, wr: ?windows.HANDLE) void {
Expand Down
Loading

0 comments on commit cd30277

Please sign in to comment.