Skip to content

Updates to path source walking. #8095

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ pretty_env_logger = { version = "0.4", optional = true }
anyhow = "1.0"
filetime = "0.2.9"
flate2 = { version = "1.0.3", default-features = false, features = ["zlib"] }
git2 = "0.13.1"
git2 = "0.13.5"
git2-curl = "0.14.0"
glob = "0.3.0"
hex = "0.4"
Expand All @@ -44,7 +44,7 @@ jobserver = "0.1.21"
lazycell = "1.2.0"
libc = "0.2"
log = "0.4.6"
libgit2-sys = "0.12.1"
libgit2-sys = "0.12.5"
memchr = "2.1.3"
num_cpus = "1.0"
opener = "0.4"
Expand Down
20 changes: 18 additions & 2 deletions src/cargo/core/compiler/fingerprint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1209,7 +1209,12 @@ fn calculate_normal(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<Finger
let target_root = target_root(cx);
let local = if unit.mode.is_doc() {
// rustdoc does not have dep-info files.
let fingerprint = pkg_fingerprint(cx.bcx, &unit.pkg)?;
let fingerprint = pkg_fingerprint(cx.bcx, &unit.pkg).chain_err(|| {
format!(
"failed to determine package fingerprint for documenting {}",
unit.pkg
)
})?;
vec![LocalFingerprint::Precalculated(fingerprint)]
} else {
let dep_info = dep_info_loc(cx, unit);
Expand Down Expand Up @@ -1270,7 +1275,18 @@ fn calculate_run_custom_build(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoRes
// the whole crate.
let (gen_local, overridden) = build_script_local_fingerprints(cx, unit);
let deps = &cx.build_explicit_deps[unit];
let local = (gen_local)(deps, Some(&|| pkg_fingerprint(cx.bcx, &unit.pkg)))?.unwrap();
let local = (gen_local)(
deps,
Some(&|| {
pkg_fingerprint(cx.bcx, &unit.pkg).chain_err(|| {
format!(
"failed to determine package fingerprint for build script for {}",
unit.pkg
)
})
}),
)?
.unwrap();
let output = deps.build_script_output.clone();

// Include any dependencies of our execution, which is typically just the
Expand Down
147 changes: 84 additions & 63 deletions src/cargo/sources/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,15 @@ impl<'cfg> PathSource<'cfg> {
/// are relevant for building this package, but it also contains logic to
/// use other methods like .gitignore to filter the list of files.
pub fn list_files(&self, pkg: &Package) -> CargoResult<Vec<PathBuf>> {
self._list_files(pkg).chain_err(|| {
format!(
"failed to determine list of files in {}",
pkg.root().display()
)
})
}

fn _list_files(&self, pkg: &Package) -> CargoResult<Vec<PathBuf>> {
let root = pkg.root();
let no_include_option = pkg.manifest().include().is_empty();

Expand All @@ -111,17 +120,21 @@ impl<'cfg> PathSource<'cfg> {
}
let ignore_include = include_builder.build()?;

let ignore_should_package = |relative_path: &Path| -> CargoResult<bool> {
let ignore_should_package = |relative_path: &Path, is_dir: bool| -> CargoResult<bool> {
// "Include" and "exclude" options are mutually exclusive.
if no_include_option {
match ignore_exclude
.matched_path_or_any_parents(relative_path, /* is_dir */ false)
{
match ignore_exclude.matched_path_or_any_parents(relative_path, is_dir) {
Match::None => Ok(true),
Match::Ignore(_) => Ok(false),
Match::Whitelist(_) => Ok(true),
}
} else {
if is_dir {
// Generally, include directives don't list every
// directory (nor should they!). Just skip all directory
// checks, and only check files.
return Ok(true);
}
match ignore_include
.matched_path_or_any_parents(relative_path, /* is_dir */ false)
{
Expand All @@ -132,7 +145,7 @@ impl<'cfg> PathSource<'cfg> {
}
};

let mut filter = |path: &Path| -> CargoResult<bool> {
let mut filter = |path: &Path, is_dir: bool| -> CargoResult<bool> {
let relative_path = path.strip_prefix(root)?;

let rel = relative_path.as_os_str();
Expand All @@ -142,13 +155,13 @@ impl<'cfg> PathSource<'cfg> {
return Ok(true);
}

ignore_should_package(relative_path)
ignore_should_package(relative_path, is_dir)
};

// Attempt Git-prepopulate only if no `include` (see rust-lang/cargo#4135).
if no_include_option {
if let Some(result) = self.discover_git_and_list_files(pkg, root, &mut filter) {
return result;
if let Some(result) = self.discover_git_and_list_files(pkg, root, &mut filter)? {
return Ok(result);
}
// no include option and not git repo discovered (see rust-lang/cargo#7183).
return self.list_files_walk_except_dot_files_and_dirs(pkg, &mut filter);
Expand All @@ -162,50 +175,48 @@ impl<'cfg> PathSource<'cfg> {
&self,
pkg: &Package,
root: &Path,
filter: &mut dyn FnMut(&Path) -> CargoResult<bool>,
) -> Option<CargoResult<Vec<PathBuf>>> {
// If this package is in a Git repository, then we really do want to
// query the Git repository as it takes into account items such as
// `.gitignore`. We're not quite sure where the Git repository is,
// however, so we do a bit of a probe.
//
// We walk this package's path upwards and look for a sibling
// `Cargo.toml` and `.git` directory. If we find one then we assume that
// we're part of that repository.
let mut cur = root;
loop {
if cur.join("Cargo.toml").is_file() {
// If we find a Git repository next to this `Cargo.toml`, we still
// check to see if we are indeed part of the index. If not, then
// this is likely an unrelated Git repo, so keep going.
if let Ok(repo) = git2::Repository::open(cur) {
let index = match repo.index() {
Ok(index) => index,
Err(err) => return Some(Err(err.into())),
};
let path = root.strip_prefix(cur).unwrap().join("Cargo.toml");
if index.get_path(&path, 0).is_some() {
return Some(self.list_files_git(pkg, &repo, filter));
}
}
}
// Don't cross submodule boundaries.
if cur.join(".git").is_dir() {
break;
}
match cur.parent() {
Some(parent) => cur = parent,
None => break,
filter: &mut dyn FnMut(&Path, bool) -> CargoResult<bool>,
) -> CargoResult<Option<Vec<PathBuf>>> {
let repo = match git2::Repository::discover(root) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like we historically actually did this but at some point backed away from it. That being said my mind here is quite fuzzy and I don't really remember the details (nor with some searching can I find anything).

The best I can remember is that folks sometimes have their whole $HOME dir as a git repo and they did or didn't want that messing with Cargo. That being said this here is only being used to list the files within a repo for a package we already know, which seems like it's basically just respecting .gitignore, right? If that's the case I can't really think right now how this can go astray...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤷 Maybe play it safe and wait till next week to merge this so that it gets in the next release and gets the maximum amount of time on nightly?

Ok(repo) => repo,
Err(e) => {
log::debug!(
"could not discover git repo at or above {}: {}",
root.display(),
e
);
return Ok(None);
}
};
let index = repo
.index()
.chain_err(|| format!("failed to open git index at {}", repo.path().display()))?;
let repo_root = repo.workdir().ok_or_else(|| {
anyhow::format_err!(
"did not expect repo at {} to be bare",
repo.path().display()
)
})?;
let repo_relative_path = root.strip_prefix(repo_root).chain_err(|| {
format!(
"expected git repo {} to be parent of package {}",
repo.path().display(),
root.display()
)
})?;
let manifest_path = repo_relative_path.join("Cargo.toml");
if index.get_path(&manifest_path, 0).is_some() {
return Ok(Some(self.list_files_git(pkg, &repo, filter)?));
}
None
// Package Cargo.toml is not in git, don't use git to guide our selection.
Ok(None)
}

fn list_files_git(
&self,
pkg: &Package,
repo: &git2::Repository,
filter: &mut dyn FnMut(&Path) -> CargoResult<bool>,
filter: &mut dyn FnMut(&Path, bool) -> CargoResult<bool>,
) -> CargoResult<Vec<PathBuf>> {
warn!("list_files_git {}", pkg.package_id());
let index = repo.index()?;
Expand Down Expand Up @@ -289,7 +300,10 @@ impl<'cfg> PathSource<'cfg> {
continue;
}

if is_dir.unwrap_or_else(|| file_path.is_dir()) {
// `is_dir` is None for symlinks. The `unwrap` checks if the
// symlink points to a directory.
let is_dir = is_dir.unwrap_or_else(|| file_path.is_dir());
if is_dir {
warn!(" found submodule {}", file_path.display());
let rel = file_path.strip_prefix(root)?;
let rel = rel.to_str().ok_or_else(|| {
Expand All @@ -307,7 +321,8 @@ impl<'cfg> PathSource<'cfg> {
PathSource::walk(&file_path, &mut ret, false, filter)?;
}
}
} else if (*filter)(&file_path)? {
} else if (*filter)(&file_path, is_dir)? {
assert!(!is_dir);
// We found a file!
warn!(" found {}", file_path.display());
ret.push(file_path);
Expand Down Expand Up @@ -338,29 +353,28 @@ impl<'cfg> PathSource<'cfg> {
fn list_files_walk_except_dot_files_and_dirs(
&self,
pkg: &Package,
filter: &mut dyn FnMut(&Path) -> CargoResult<bool>,
filter: &mut dyn FnMut(&Path, bool) -> CargoResult<bool>,
) -> CargoResult<Vec<PathBuf>> {
let root = pkg.root();
let mut exclude_dot_files_dir_builder = GitignoreBuilder::new(root);
exclude_dot_files_dir_builder.add_line(None, ".*")?;
let ignore_dot_files_and_dirs = exclude_dot_files_dir_builder.build()?;

let mut filter_ignore_dot_files_and_dirs = |path: &Path| -> CargoResult<bool> {
let relative_path = path.strip_prefix(root)?;
match ignore_dot_files_and_dirs
.matched_path_or_any_parents(relative_path, /* is_dir */ false)
{
Match::Ignore(_) => Ok(false),
_ => filter(path),
}
};
let mut filter_ignore_dot_files_and_dirs =
|path: &Path, is_dir: bool| -> CargoResult<bool> {
let relative_path = path.strip_prefix(root)?;
match ignore_dot_files_and_dirs.matched_path_or_any_parents(relative_path, is_dir) {
Match::Ignore(_) => Ok(false),
_ => filter(path, is_dir),
}
};
self.list_files_walk(pkg, &mut filter_ignore_dot_files_and_dirs)
}

fn list_files_walk(
&self,
pkg: &Package,
filter: &mut dyn FnMut(&Path) -> CargoResult<bool>,
filter: &mut dyn FnMut(&Path, bool) -> CargoResult<bool>,
) -> CargoResult<Vec<PathBuf>> {
let mut ret = Vec::new();
PathSource::walk(pkg.root(), &mut ret, true, filter)?;
Expand All @@ -371,12 +385,14 @@ impl<'cfg> PathSource<'cfg> {
path: &Path,
ret: &mut Vec<PathBuf>,
is_root: bool,
filter: &mut dyn FnMut(&Path) -> CargoResult<bool>,
filter: &mut dyn FnMut(&Path, bool) -> CargoResult<bool>,
) -> CargoResult<()> {
if !path.is_dir() {
if (*filter)(path)? {
ret.push(path.to_path_buf());
}
let is_dir = path.is_dir();
if !is_root && !(*filter)(path, is_dir)? {
return Ok(());
}
if !is_dir {
ret.push(path.to_path_buf());
return Ok(());
}
// Don't recurse into any sub-packages that we have.
Expand Down Expand Up @@ -415,7 +431,12 @@ impl<'cfg> PathSource<'cfg> {

let mut max = FileTime::zero();
let mut max_path = PathBuf::new();
for file in self.list_files(pkg)? {
for file in self.list_files(pkg).chain_err(|| {
format!(
"failed to determine the most recently modified file in {}",
pkg.root().display()
)
})? {
// An `fs::stat` error here is either because path is a
// broken symlink, a permissions error, or a race
// condition where this path was `rm`-ed -- either way,
Expand Down
38 changes: 36 additions & 2 deletions tests/testsuite/build_script.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3977,26 +3977,60 @@ fn links_interrupted_can_restart() {
fn build_script_scan_eacces() {
// build.rs causes a scan of the whole project, which can be a problem if
// a directory is not accessible.
use cargo_test_support::git;
use std::os::unix::fs::PermissionsExt;

let p = project()
.file("src/lib.rs", "")
.file("build.rs", "fn main() {}")
.file("secrets/stuff", "")
.build();
let path = p.root().join("secrets");
fs::set_permissions(&path, fs::Permissions::from_mode(0)).unwrap();
// "Caused by" is a string from libc such as the following:
// The last "Caused by" is a string from libc such as the following:
// Permission denied (os error 13)
p.cargo("build")
.with_stderr(
"\
[ERROR] cannot read \"[..]/foo/secrets\"
[ERROR] failed to determine package fingerprint for build script for foo v0.0.1 ([..]/foo)

Caused by:
failed to determine the most recently modified file in [..]/foo

Caused by:
failed to determine list of files in [..]/foo

Caused by:
cannot read \"[..]/foo/secrets\"

Caused by:
[..]
",
)
.with_status(101)
.run();

// Try `package.exclude` to skip a directory.
p.change_file(
"Cargo.toml",
r#"
[package]
name = "foo"
version = "0.0.1"
exclude = ["secrets"]
"#,
);
p.cargo("build").run();

// Try with git. This succeeds because the git status walker ignores
// directories it can't access.
p.change_file("Cargo.toml", &basic_manifest("foo", "0.0.1"));
p.build_dir().rm_rf();
let repo = git::init(&p.root());
git::add(&repo);
git::commit(&repo);
p.cargo("build").run();

// Restore permissions so that the directory can be deleted.
fs::set_permissions(&path, fs::Permissions::from_mode(0o755)).unwrap();
}
Loading