Skip to content

Commit

Permalink
Switch our C++ subprocess library from fork to posix_spawn for sa…
Browse files Browse the repository at this point in the history
…fety/portability

This seems to reduce overhead more than I would have expected; we also end up seeing a bit less than a 10x speedup when running our C++-driven fuzzer, almost certainly due to `posix_spawn`'s use of `vfork` rather than `fork`. This restores the C++-driven fuzzer to within ~10% of the speeds we were previously seeing with the Python-driven fuzzer on the same machines, while also moving us to a much safer interface that's still POSIX-portable.

Since we can't rely on our environment supporting the chdir file action, we use a dedicated helper executable to continue supporting executing a subprocess with a different working directory.

PiperOrigin-RevId: 574537445
  • Loading branch information
ericastor authored and copybara-github committed Oct 18, 2023
1 parent 29858f4 commit 4bc0970
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 48 deletions.
7 changes: 7 additions & 0 deletions xls/common/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -200,10 +200,16 @@ cc_test(
],
)

cc_binary(
name = "subprocess_helper",
srcs = ["subprocess_helper.cc"],
)

cc_library(
name = "subprocess",
srcs = ["subprocess.cc"],
hdrs = ["subprocess.h"],
data = [":subprocess_helper"],
deps = [
":strerror",
":thread",
Expand All @@ -216,6 +222,7 @@ cc_library(
"@com_google_absl//absl/time",
"@com_google_absl//absl/types:span",
"//xls/common/file:file_descriptor",
"//xls/common/file:get_runfile_path",
"//xls/common/logging",
"//xls/common/logging:log_lines",
"//xls/common/status:status_macros",
Expand Down
117 changes: 87 additions & 30 deletions xls/common/subprocess.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <fcntl.h>
#include <poll.h>
#include <spawn.h>
#include <sys/wait.h>
#include <unistd.h>

Expand All @@ -29,6 +30,7 @@
#include <optional>
#include <ostream>
#include <string>
#include <string_view>
#include <utility>
#include <vector>

Expand All @@ -42,6 +44,7 @@
#include "absl/time/time.h"
#include "absl/types/span.h"
#include "xls/common/file/file_descriptor.h"
#include "xls/common/file/get_runfile_path.h"
#include "xls/common/logging/log_lines.h"
#include "xls/common/logging/logging.h"
#include "xls/common/status/status_macros.h"
Expand All @@ -51,6 +54,10 @@
namespace xls {
namespace {

// Note: this path is runfiles-relative.
constexpr std::string_view kSubprocessHelperPath =
"xls/common/subprocess_helper";

struct Pipe {
Pipe(FileDescriptor exit, FileDescriptor&& entrance)
: exit(std::move(exit)), entrance(std::move(entrance)) {}
Expand All @@ -71,45 +78,95 @@ struct Pipe {
FileDescriptor entrance;
};

absl::StatusOr<pid_t> ExecInChildProcess(
const std::vector<const char*>& argv_pointers,
const std::optional<std::filesystem::path>& cwd, Pipe& stdout_pipe,
Pipe& stderr_pipe) {
const char* cwd_c_str = nullptr;
if (cwd.has_value()) {
cwd_c_str = cwd->c_str();
absl::Status ReplaceFdWithPipe(posix_spawn_file_actions_t& actions, int fd,
Pipe& pipe, std::string_view pipe_name) {
if (int err = posix_spawn_file_actions_addclose(&actions, pipe.exit.get());
err != 0) {
return absl::InternalError(absl::StrFormat(
"Cannot add close() action for %s exit: %s", pipe_name, Strerror(err)));
}

pid_t pid = fork();
if (pid == -1) {
if (int err =
posix_spawn_file_actions_adddup2(&actions, pipe.entrance.get(), fd);
err != 0) {
return absl::InternalError(
absl::StrCat("Failed to fork: ", Strerror(errno)));
absl::StrFormat("Cannot add dup2() action for %s entrance: %s",
pipe_name, Strerror(err)));
}
if (pid != 0) {
// This is the parent process.
stdout_pipe.entrance.Close();
stderr_pipe.entrance.Close();
return pid;
if (int err =
posix_spawn_file_actions_addclose(&actions, pipe.entrance.get());
err != 0) {
return absl::InternalError(absl::StrFormat(
"Cannot clean up for %s entrance: %s", pipe_name, Strerror(err)));
}
return absl::OkStatus();
}

// This is the child process.
//
// NOTE: It is not safe to allocate from here to the `exec` invocation, so
// logging is also unsafe.
if (cwd_c_str != nullptr) {
if (chdir(cwd_c_str) != 0) {
_exit(127);
}
absl::StatusOr<posix_spawn_file_actions_t> CreateChildFileActions(
Pipe& stdout_pipe, Pipe& stderr_pipe) {
posix_spawn_file_actions_t actions;

if (int err = posix_spawn_file_actions_init(&actions); err != 0) {
return absl::InternalError(
absl::StrCat("Cannot initialize file actions: ", Strerror(err)));
}
if (int err = posix_spawn_file_actions_addclose(&actions, STDIN_FILENO);
err != 0) {
return absl::InternalError(
absl::StrCat("Cannot add close() action (stdin): ", Strerror(err)));
}
while ((dup2(stdout_pipe.entrance.get(), STDOUT_FILENO) == -1) &&
(errno == EINTR)) {

XLS_RETURN_IF_ERROR(
ReplaceFdWithPipe(actions, STDOUT_FILENO, stdout_pipe, "stdout"));
XLS_RETURN_IF_ERROR(
ReplaceFdWithPipe(actions, STDERR_FILENO, stderr_pipe, "stderr"));

return actions;
}

absl::StatusOr<pid_t> ExecInChildProcess(
const std::vector<const char*>& argv_pointers,
const std::optional<std::filesystem::path>& cwd, Pipe& stdout_pipe,
Pipe& stderr_pipe) {
// We previously used fork() & exec() here, but that's prone to many subtle
// problems (e.g., allocating between fork() and exec() can cause arbitrary
// problems)... and it's also slow. vfork() might have made the performance
// better, but it's not fully clear what's safe between vfork() and exec()
// either, so we just use posix_spawn for safety and convenience.

// Since we may need the child to have a different working directory (per
// `cwd`), and posix_spawn does not (yet) have support for a chdir action, we
// use a helper binary that chdir's to its first argument, then invokes
// "execvp" with the remaining arguments to replace itself with the command we
// actually wanted to run.
XLS_ASSIGN_OR_RETURN(std::filesystem::path subprocess_helper,
GetXlsRunfilePath(kSubprocessHelperPath));
std::vector<const char*> helper_argv_pointers;
helper_argv_pointers.reserve(argv_pointers.size() + 2);
helper_argv_pointers.push_back(subprocess_helper.c_str());
helper_argv_pointers.push_back(cwd.has_value() ? cwd->c_str() : "");
helper_argv_pointers.insert(helper_argv_pointers.end(), argv_pointers.begin(),
argv_pointers.end());

XLS_ASSIGN_OR_RETURN(posix_spawn_file_actions_t file_actions,
CreateChildFileActions(stdout_pipe, stderr_pipe));

pid_t pid;
if (int err = posix_spawnp(
&pid, subprocess_helper.c_str(), &file_actions, nullptr,
const_cast<char* const*>(helper_argv_pointers.data()), environ);
err != 0) {
return absl::InternalError(
absl::StrCat("Cannot spawn child process: ", Strerror(err)));
}
while ((dup2(stderr_pipe.entrance.get(), STDERR_FILENO) == -1) &&
(errno == EINTR)) {

if (int err = posix_spawn_file_actions_destroy(&file_actions); err != 0) {
return absl::InternalError(
absl::StrCat("Cannot destroy file actions: ", Strerror(err)));
}
execv(argv_pointers[0], const_cast<char* const*>(argv_pointers.data()));
XLS_LOG(ERROR) << "Execv syscall failed: " << Strerror(errno);
_exit(127);
stdout_pipe.entrance.Close();
stderr_pipe.entrance.Close();
return pid;
}

// Takes a list of file descriptor data streams and reads them into a list of
Expand Down
29 changes: 29 additions & 0 deletions xls/common/subprocess_helper.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright 2023 The XLS Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Usage: subprocess_helper <desired working directory> <executable> [args...]
//
// Exec's the given executable in the specified working directory. If the first
// argument is an empty string, stays in the current working directory.

#include <unistd.h>

#include <string_view>

int main(int argc, char** argv) {
if (!std::string_view(argv[1]).empty()) {
chdir(argv[1]);
}
execvp(argv[2], &argv[2]);
}
3 changes: 3 additions & 0 deletions xls/fuzzer/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,7 @@ cc_library(
"@com_google_absl//absl/strings",
"@com_google_absl//absl/types:span",
"@com_google_protobuf//:protobuf",
"@com_googlesource_code_re2//:re2",
],
)

Expand Down Expand Up @@ -872,7 +873,9 @@ cc_library(
"//xls/common/file:filesystem",
"//xls/common/file:get_runfile_path",
"//xls/common/logging:log_lines",
"//xls/common/status:status_macros",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings",
],
)

Expand Down
22 changes: 15 additions & 7 deletions xls/fuzzer/cpp_run_fuzz.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,18 @@

#include "xls/fuzzer/cpp_run_fuzz.h"

#include <filesystem> // NOLINT
#include <optional>
#include <string>
#include <string_view>
#include <vector>

#include "absl/strings/str_cat.h"
#include "absl/strings/str_join.h"
#include "xls/common/file/filesystem.h"
#include "xls/common/file/get_runfile_path.h"
#include "xls/common/logging/log_lines.h"
#include "xls/common/status/status_macros.h"
#include "xls/common/subprocess.h"

namespace xls {
Expand Down Expand Up @@ -66,8 +70,9 @@ absl::StatusOr<std::optional<std::filesystem::path>> MinimizeIr(

SampleOptions ir_minimize_options = smp.options();
ir_minimize_options.set_input_is_dslx(false);
XLS_RETURN_IF_ERROR(WriteToFile(run_dir, "ir_minimizer.options.pbtxt",
ir_minimize_options.proto().DebugString()));
XLS_RETURN_IF_ERROR(
SetFileContents(run_dir / "ir_minimizer.options.pbtxt",
ir_minimize_options.proto().DebugString()));

XLS_ASSIGN_OR_RETURN(std::filesystem::path sample_runner_main_path,
GetSampleRunnerMainPath());
Expand All @@ -79,10 +84,12 @@ absl::StatusOr<std::optional<std::filesystem::path>> MinimizeIr(
std::string{sample_runner_main_path}, "--logtostderr",
"--options_file=ir_minimizer.options.pbtxt", "--args_file=args.txt",
"--input_file=$1"};
XLS_RETURN_IF_ERROR(
WriteToFile(run_dir, "ir_minimizer_test.sh",
absl::StrCat("#!/bin/sh\n! ", absl::StrJoin(args, " ")),
/*executable=*/true));
const std::filesystem::path test_script = run_dir / "ir_minimizer_test.sh";
XLS_RETURN_IF_ERROR(SetFileContents(
test_script, absl::StrCat("#!/bin/sh\n! ", absl::StrJoin(args, " "))));
std::filesystem::permissions(test_script,
std::filesystem::perms::owner_exec,
std::filesystem::perm_options::add);

std::string basename = ir_minimizer_main_path.stem();
std::filesystem::path stderr_path =
Expand All @@ -92,7 +99,8 @@ absl::StatusOr<std::optional<std::filesystem::path>> MinimizeIr(
SubprocessResult result,
InvokeSubprocess(
{ir_minimizer_main_path, "--alsologtostderr",
"--test_executable=ir_minimizer_test.sh", "sample.ir"},
absl::StrCat("--test_executable=", test_script.string()),
"sample.ir"},
/*cwd=*/run_dir, timeout));
XLS_RETURN_IF_ERROR(SetFileContents(stderr_path, result.stderr));

Expand Down
42 changes: 37 additions & 5 deletions xls/fuzzer/sample.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#ifndef XLS_FUZZER_SAMPLE_H_
#define XLS_FUZZER_SAMPLE_H_

#include <memory>
#include <optional>
#include <ostream>
#include <string>
Expand All @@ -27,6 +28,7 @@
#include "xls/common/proto_adaptor_utils.h"
#include "xls/dslx/interp_value.h"
#include "xls/fuzzer/sample.pb.h"
#include "re2/re2.h"

namespace xls {

Expand All @@ -40,6 +42,16 @@ std::string IrChannelNamesToText(
std::vector<std::string> ParseIrChannelNames(
std::string_view ir_channel_names_text);

struct KnownFailure {
// We use shared_ptr here because it's extremely difficult to get things to
// compile if we use unique_ptr... these get stored & passed around in a
// std::vector, which has a lot of restrictions on its use if storing a
// non-copyable struct. (RE2 itself is neither copyable nor movable, which
// causes its own set of problems.)
std::shared_ptr<RE2> tool;
std::shared_ptr<RE2> stderr_regex;
};

// Options describing how to run a code sample. See member comments for details.
class SampleOptions {
public:
Expand Down Expand Up @@ -126,24 +138,42 @@ class SampleOptions {
int64_t proc_ticks() const { return proto_.proc_ticks(); }
void set_proc_ticks(int64_t value) { proto_.set_proc_ticks(value); }

std::vector<fuzzer::KnownFailure> known_failures() const {
return std::vector<fuzzer::KnownFailure>(
proto_.known_failure().begin(),
proto_.known_failure().end());
const std::vector<KnownFailure>& known_failures() const {
if (known_failures_.empty() && proto_.known_failure_size() > 0) {
known_failures_.reserve(proto_.known_failure_size());
for (const auto& fail : proto_.known_failure()) {
KnownFailure compiled_fail;
if (fail.has_tool()) {
compiled_fail.tool = std::make_shared<RE2>(fail.tool());
}
if (fail.has_stderr_regex()) {
compiled_fail.stderr_regex =
std::make_shared<RE2>(fail.stderr_regex());
}
known_failures_.push_back(std::move(compiled_fail));
}
}
return known_failures_;
}
void clear_known_failures() {
proto_.clear_known_failure();
known_failures_.clear();
}
void clear_known_failures() { proto_.clear_known_failure(); }
void add_known_failure(std::string_view re) {
*proto_.mutable_known_failure()->Add()->mutable_stderr_regex() = re;
known_failures_.clear();
}
void add_known_failure(std::string_view tool, std::string_view re) {
auto* fail = proto_.add_known_failure();
*fail->mutable_tool() = tool;
*fail->mutable_stderr_regex() = re;
known_failures_.clear();
}
void set_known_failures(absl::Span<const fuzzer::KnownFailure> fails) {
for (const auto& arg : fails) {
*proto_.add_known_failure() = arg;
}
known_failures_.clear();
}

bool operator==(const SampleOptions& other) const;
Expand All @@ -164,6 +194,8 @@ class SampleOptions {

private:
fuzzer::SampleOptionsProto proto_ = DefaultOptionsProto();

mutable std::vector<KnownFailure> known_failures_;
};
bool AbslParseFlag(std::string_view text, SampleOptions* sample_options,
std::string* error);
Expand Down
Loading

0 comments on commit 4bc0970

Please sign in to comment.