Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a benchmark for workspace initialization #10754

Merged
merged 2 commits into from
Jun 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions benches/benchsuite/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,20 @@ description = "Benchmarking suite for Cargo."

[dependencies]
cargo = { path = "../.." }
cargo-test-support = { path = "../../crates/cargo-test-support" }
# Consider removing html_reports in 0.4 and switching to `cargo criterion`.
criterion = { version = "0.3.5", features = ["html_reports"] }
flate2 = { version = "1.0.3", default-features = false, features = ["zlib"] }
tar = { version = "0.4.38", default-features = false }
url = "2.2.2"

[lib]
bench = false

[[bench]]
name = "resolve"
harness = false

[[bench]]
name = "workspace_initialization"
harness = false
203 changes: 11 additions & 192 deletions benches/benchsuite/benches/resolve.rs
Original file line number Diff line number Diff line change
@@ -1,145 +1,12 @@
use benchsuite::fixtures;
use cargo::core::compiler::{CompileKind, RustcTargetData};
use cargo::core::resolver::features::{CliFeatures, FeatureOpts, FeatureResolver, ForceAllTargets};
use cargo::core::resolver::{HasDevUnits, ResolveBehavior};
use cargo::core::resolver::features::{FeatureOpts, FeatureResolver};
use cargo::core::resolver::{CliFeatures, ForceAllTargets, HasDevUnits, ResolveBehavior};
use cargo::core::{PackageIdSpec, Workspace};
use cargo::ops::WorkspaceResolve;
use cargo::Config;
use criterion::{criterion_group, criterion_main, Criterion};
use std::fs;
use std::path::{Path, PathBuf};
use std::process::Command;
use url::Url;

// This is an arbitrary commit that existed when I started. This helps
// ensure consistent results. It can be updated if needed, but that can
// make it harder to compare results with older versions of cargo.
const CRATES_IO_COMMIT: &str = "85f7bfd61ea4fee08ec68c468762e886b2aebec6";

fn setup() {
create_home();
create_target_dir();
clone_index();
unpack_workspaces();
}

fn root() -> PathBuf {
let mut p = PathBuf::from(env!("CARGO_TARGET_TMPDIR"));
p.push("bench");
p
}

fn target_dir() -> PathBuf {
let mut p = root();
p.push("target");
p
}

fn cargo_home() -> PathBuf {
let mut p = root();
p.push("chome");
p
}

fn index() -> PathBuf {
let mut p = root();
p.push("index");
p
}

fn workspaces_path() -> PathBuf {
let mut p = root();
p.push("workspaces");
p
}

fn registry_url() -> Url {
Url::from_file_path(index()).unwrap()
}

fn create_home() {
let home = cargo_home();
if !home.exists() {
fs::create_dir_all(&home).unwrap();
}
fs::write(
home.join("config.toml"),
format!(
r#"
[source.crates-io]
replace-with = 'local-snapshot'

[source.local-snapshot]
registry = '{}'
"#,
registry_url()
),
)
.unwrap();
}

fn create_target_dir() {
// This is necessary to ensure the .rustc_info.json file is written.
// Otherwise it won't be written, and it is very expensive to create.
if !target_dir().exists() {
std::fs::create_dir_all(target_dir()).unwrap();
}
}

/// This clones crates.io at a specific point in time into tmp/index.
fn clone_index() {
let index = index();
let maybe_git = |command: &str| {
let status = Command::new("git")
.current_dir(&index)
.args(command.split_whitespace().collect::<Vec<_>>())
.status()
.expect("git should be installed");
status.success()
};
let git = |command: &str| {
if !maybe_git(command) {
panic!("failed to run git command: {}", command);
}
};
if index.exists() {
if maybe_git(&format!(
"rev-parse -q --verify {}^{{commit}}",
CRATES_IO_COMMIT
)) {
// Already fetched.
return;
}
} else {
fs::create_dir_all(&index).unwrap();
git("init --bare");
git("remote add origin https://github.com/rust-lang/crates.io-index");
}
git(&format!("fetch origin {}", CRATES_IO_COMMIT));
git("branch -f master FETCH_HEAD");
}

/// This unpacks the compressed workspace skeletons into tmp/workspaces.
fn unpack_workspaces() {
let ws_dir = Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.unwrap()
.join("workspaces");
let archives = fs::read_dir(ws_dir)
.unwrap()
.map(|e| e.unwrap().path())
.filter(|p| p.extension() == Some(std::ffi::OsStr::new("tgz")));
for archive in archives {
let name = archive.file_stem().unwrap();
let f = fs::File::open(&archive).unwrap();
let f = flate2::read::GzDecoder::new(f);
let dest = workspaces_path().join(&name);
if dest.exists() {
fs::remove_dir_all(&dest).unwrap();
}
let mut archive = tar::Archive::new(f);
archive.unpack(workspaces_path()).unwrap();
}
}
use std::path::Path;

struct ResolveInfo<'cfg> {
ws: Workspace<'cfg>,
Expand All @@ -152,36 +19,12 @@ struct ResolveInfo<'cfg> {
ws_resolve: WorkspaceResolve<'cfg>,
}

/// Vec of `(ws_name, ws_root)`.
fn workspaces() -> Vec<(String, PathBuf)> {
// CARGO_BENCH_WORKSPACES can be used to override, otherwise it just uses
// the workspaces in the workspaces directory.
let mut ps: Vec<_> = match std::env::var_os("CARGO_BENCH_WORKSPACES") {
Some(s) => std::env::split_paths(&s).collect(),
None => fs::read_dir(workspaces_path())
.unwrap()
.map(|e| e.unwrap().path())
// These currently fail in most cases on Windows due to long
// filenames in the git checkouts.
.filter(|p| {
!(cfg!(windows)
&& matches!(p.file_name().unwrap().to_str().unwrap(), "servo" | "tikv"))
})
.collect(),
};
// Sort so it is consistent.
ps.sort();
ps.into_iter()
.map(|p| (p.file_name().unwrap().to_str().unwrap().to_owned(), p))
.collect()
}

/// Helper for resolving a workspace. This will run the resolver once to
/// download everything, and returns all the data structures that are used
/// during resolution.
fn do_resolve<'cfg>(config: &'cfg Config, ws_root: &Path) -> ResolveInfo<'cfg> {
let requested_kinds = [CompileKind::Host];
let ws = cargo::core::Workspace::new(&ws_root.join("Cargo.toml"), config).unwrap();
let ws = Workspace::new(&ws_root.join("Cargo.toml"), config).unwrap();
let target_data = RustcTargetData::new(&ws, &requested_kinds).unwrap();
let cli_features = CliFeatures::from_command_line(&[], false, true).unwrap();
let pkgs = cargo::ops::Packages::Default;
Expand Down Expand Up @@ -212,38 +55,14 @@ fn do_resolve<'cfg>(config: &'cfg Config, ws_root: &Path) -> ResolveInfo<'cfg> {
}
}

/// Creates a new Config.
///
/// This is separate from `do_resolve` to deal with the ownership and lifetime.
fn make_config(ws_root: &Path) -> Config {
let shell = cargo::core::Shell::new();
let mut config = cargo::util::Config::new(shell, ws_root.to_path_buf(), cargo_home());
// Configure is needed to set the target_dir which is needed to write
// the .rustc_info.json file which is very expensive.
config
.configure(
0,
false,
None,
false,
false,
false,
&Some(target_dir()),
&[],
&[],
)
.unwrap();
config
}

/// Benchmark of the full `resolve_ws_with_opts` which runs the resolver
/// twice, the feature resolver, and more. This is a major component of a
/// regular cargo build.
fn resolve_ws(c: &mut Criterion) {
setup();
let fixtures = fixtures!();
let mut group = c.benchmark_group("resolve_ws");
for (ws_name, ws_root) in workspaces() {
let config = make_config(&ws_root);
for (ws_name, ws_root) in fixtures.workspaces() {
let config = fixtures.make_config(&ws_root);
// The resolver info is initialized only once in a lazy fashion. This
// allows criterion to skip this workspace if the user passes a filter
// on the command-line (like `cargo bench -- resolve_ws/tikv`).
Expand Down Expand Up @@ -282,10 +101,10 @@ fn resolve_ws(c: &mut Criterion) {

/// Benchmark of the feature resolver.
fn feature_resolver(c: &mut Criterion) {
setup();
let fixtures = fixtures!();
let mut group = c.benchmark_group("feature_resolver");
for (ws_name, ws_root) in workspaces() {
let config = make_config(&ws_root);
for (ws_name, ws_root) in fixtures.workspaces() {
let config = fixtures.make_config(&ws_root);
let mut lazy_info = None;
group.bench_function(&ws_name, |b| {
let ResolveInfo {
Expand Down
27 changes: 27 additions & 0 deletions benches/benchsuite/benches/workspace_initialization.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
use benchsuite::fixtures;
use cargo::core::Workspace;
use criterion::{criterion_group, criterion_main, Criterion};

fn workspace_initialization(c: &mut Criterion) {
let fixtures = fixtures!();
let mut group = c.benchmark_group("workspace_initialization");
for (ws_name, ws_root) in fixtures.workspaces() {
let config = fixtures.make_config(&ws_root);
// The resolver info is initialized only once in a lazy fashion. This
// allows criterion to skip this workspace if the user passes a filter
// on the command-line (like `cargo bench -- workspace_initialization/tikv`).
group.bench_function(ws_name, |b| {
b.iter(|| Workspace::new(&ws_root.join("Cargo.toml"), &config).unwrap())
});
}
group.finish();
}

// Criterion complains about the measurement time being too small, but the
// measurement time doesn't seem important to me, what is more important is
// the number of iterations which defaults to 100, which seems like a
// reasonable default. Otherwise, the measurement time would need to be
// changed per workspace. We wouldn't want to spend 60s on every workspace,
// that would take too long and isn't necessary for the smaller workspaces.
criterion_group!(benches, workspace_initialization);
criterion_main!(benches);
epage marked this conversation as resolved.
Show resolved Hide resolved
Loading