Skip to content

fix(package): check dirtiness of symlinks source files #14981

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Dec 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions crates/cargo-util/src/paths.rs
Original file line number Diff line number Diff line change
Expand Up @@ -710,9 +710,9 @@ pub fn set_file_time_no_err<P: AsRef<Path>>(path: P, time: FileTime) {
/// This canonicalizes both paths before stripping. This is useful if the
/// paths are obtained in different ways, and one or the other may or may not
/// have been normalized in some way.
pub fn strip_prefix_canonical<P: AsRef<Path>>(
path: P,
base: P,
pub fn strip_prefix_canonical(
path: impl AsRef<Path>,
base: impl AsRef<Path>,
) -> Result<PathBuf, std::path::StripPrefixError> {
// Not all filesystems support canonicalize. Just ignore if it doesn't work.
let safe_canonicalize = |path: &Path| match path.canonicalize() {
Expand Down
62 changes: 35 additions & 27 deletions src/cargo/ops/cargo_package/vcs.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Helpers to gather the VCS information for `cargo package`.

use std::path::Path;
use std::collections::HashSet;
use std::path::PathBuf;

use anyhow::Context as _;
Expand Down Expand Up @@ -186,7 +186,7 @@ fn git(
.iter()
.filter(|src_file| dirty_files.iter().any(|path| src_file.starts_with(path)))
.map(|p| p.as_ref())
.chain(dirty_metadata_paths(pkg, repo)?.iter())
.chain(dirty_files_outside_pkg_root(pkg, repo, src_files)?.iter())
.map(|path| {
pathdiff::diff_paths(path, cwd)
.as_ref()
Expand Down Expand Up @@ -219,39 +219,47 @@ fn git(
}
}

/// Checks whether files at paths specified in `package.readme` and
/// `package.license-file` have been modified.
/// Checks whether "included" source files outside package root have been modified.
///
/// This currently looks at
///
/// * `package.readme` and `package.license-file` pointing to paths outside package root
/// * symlinks targets reside outside package root
///
/// This is required because those paths may link to a file outside the
/// current package root, but still under the git workdir, affecting the
/// final packaged `.crate` file.
fn dirty_metadata_paths(pkg: &Package, repo: &git2::Repository) -> CargoResult<Vec<PathBuf>> {
let mut dirty_files = Vec::new();
fn dirty_files_outside_pkg_root(
pkg: &Package,
repo: &git2::Repository,
src_files: &[PathEntry],
) -> CargoResult<HashSet<PathBuf>> {
let pkg_root = pkg.root();
let workdir = repo.workdir().unwrap();
let root = pkg.root();

let meta = pkg.manifest().metadata();
for path in [&meta.license_file, &meta.readme] {
let Some(path) = path.as_deref().map(Path::new) else {
continue;
};
let abs_path = paths::normalize_path(&root.join(path));
if paths::strip_prefix_canonical(abs_path.as_path(), root).is_ok() {
// Inside package root. Don't bother checking git status.
continue;
}
if let Ok(rel_path) = paths::strip_prefix_canonical(abs_path.as_path(), workdir) {
// Outside package root but under git workdir,
if repo.status_file(&rel_path)? != git2::Status::CURRENT {
dirty_files.push(if abs_path.is_symlink() {
// For symlinks, shows paths to symlink sources
workdir.join(rel_path)
} else {
abs_path
});
}
let metadata_paths: Vec<_> = [&meta.license_file, &meta.readme]
.into_iter()
.filter_map(|p| p.as_deref())
.map(|path| paths::normalize_path(&pkg_root.join(path)))
.collect();

let mut dirty_symlinks = HashSet::new();
for rel_path in src_files
.iter()
.filter(|p| p.is_symlink_or_under_symlink())
.map(|p| p.as_ref())
.chain(metadata_paths.iter())
// If inside package root. Don't bother checking git status.
.filter(|p| paths::strip_prefix_canonical(p, pkg_root).is_err())
// Handle files outside package root but under git workdir,
.filter_map(|p| paths::strip_prefix_canonical(p, workdir).ok())
{
if repo.status_file(&rel_path)? != git2::Status::CURRENT {
dirty_symlinks.insert(workdir.join(rel_path));
}
}
Ok(dirty_files)
Ok(dirty_symlinks)
}

/// Helper to collect dirty statuses for a single repo.
Expand Down
40 changes: 40 additions & 0 deletions src/cargo/sources/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,8 @@ impl From<gix::dir::entry::Kind> for FileType {
pub struct PathEntry {
path: PathBuf,
ty: FileType,
/// Whether this path was visited when traversing a symlink directory.
under_symlink_dir: bool,
}

impl PathEntry {
Expand All @@ -469,10 +471,21 @@ impl PathEntry {

/// Similar to [`std::path::Path::is_symlink`]
/// but doesn't follow the symbolic link nor make any system call
///
/// If the path is not a symlink but under a symlink parent directory,
/// this will return false.
/// See [`PathEntry::is_symlink_or_under_symlink`] for an alternative.
pub fn is_symlink(&self) -> bool {
matches!(self.ty, FileType::Symlink)
}

/// Whether a path is a symlink or a path under a symlink directory.
///
/// Use [`PathEntry::is_symlink`] to get the exact file type of the path only.
pub fn is_symlink_or_under_symlink(&self) -> bool {
self.is_symlink() || self.under_symlink_dir
}

/// Whether this path might be a plain text symlink.
///
/// Git may check out symlinks as plain text files that contain the link texts,
Expand Down Expand Up @@ -826,6 +839,9 @@ fn list_files_gix(
files.push(PathEntry {
path: file_path,
ty,
// Git index doesn't include files from symlink diretory,
// symlink dirs are handled in `list_files_walk`.
under_symlink_dir: false,
});
}
}
Expand All @@ -847,6 +863,10 @@ fn list_files_walk(
) -> CargoResult<()> {
let walkdir = WalkDir::new(path)
.follow_links(true)
// While this is the default, set it explicitly.
// We need walkdir to visit the directory tree in depth-first order,
// so we can ensure a path visited later be under a certain directory.
.contents_first(false)
.into_iter()
.filter_entry(|entry| {
let path = entry.path();
Expand Down Expand Up @@ -876,10 +896,27 @@ fn list_files_walk(

true
});

let mut current_symlink_dir = None;
for entry in walkdir {
match entry {
Ok(entry) => {
let file_type = entry.file_type();

match current_symlink_dir.as_ref() {
Some(dir) if entry.path().starts_with(dir) => {
// Still walk under the same parent symlink dir, so keep it
}
Some(_) | None => {
// Not under any parent symlink dir, update the current one.
current_symlink_dir = if file_type.is_dir() && entry.path_is_symlink() {
Some(entry.path().to_path_buf())
} else {
None
};
}
}

if file_type.is_file() || file_type.is_symlink() {
// We follow_links(true) here so check if entry was created from a symlink
let ty = if entry.path_is_symlink() {
Expand All @@ -890,6 +927,8 @@ fn list_files_walk(
ret.push(PathEntry {
path: entry.into_path(),
ty,
// This rely on contents_first(false), which walks in depth-first order
under_symlink_dir: current_symlink_dir.is_some(),
});
}
}
Expand All @@ -907,6 +946,7 @@ fn list_files_walk(
Some(path) => ret.push(PathEntry {
path: path.to_path_buf(),
ty: FileType::Other,
under_symlink_dir: false,
}),
None => return Err(err.into()),
},
Expand Down
11 changes: 9 additions & 2 deletions tests/testsuite/package.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1339,10 +1339,12 @@ fn dirty_file_outside_pkg_root_considered_dirty() {
license-file = "../LICENSE"
"#,
)
.file("original-dir/file", "before")
.symlink("lib.rs", "isengard/src/lib.rs")
.symlink("README.md", "isengard/README.md")
.file(&main_outside_pkg_root, "fn main() {}")
.symlink(&main_outside_pkg_root, "isengard/src/main.rs")
.symlink_dir("original-dir", "isengard/symlink-dir")
});
git::commit(&repo);

Expand All @@ -1352,6 +1354,7 @@ fn dirty_file_outside_pkg_root_considered_dirty() {
// * Changes in files outside package root that source files symlink to
p.change_file("README.md", "after");
p.change_file("lib.rs", "pub fn after() {}");
p.change_file("original-dir/file", "after");
// * Changes in files outside pkg root that `license-file`/`readme` point to
p.change_file("LICENSE", "after");
// * When workspace inheritance is involved and changed
Expand All @@ -1375,10 +1378,12 @@ fn dirty_file_outside_pkg_root_considered_dirty() {
p.cargo("package --workspace --no-verify")
.with_status(101)
.with_stderr_data(str![[r#"
[ERROR] 2 files in the working directory contain changes that were not yet committed into git:
[ERROR] 4 files in the working directory contain changes that were not yet committed into git:

LICENSE
README.md
lib.rs
original-dir/file

to proceed despite this and include the uncommitted changes, pass the `--allow-dirty` flag

Expand All @@ -1388,7 +1393,7 @@ to proceed despite this and include the uncommitted changes, pass the `--allow-d
p.cargo("package --workspace --no-verify --allow-dirty")
.with_stderr_data(str![[r#"
[PACKAGING] isengard v0.0.0 ([ROOT]/foo/isengard)
[PACKAGED] 8 files, [FILE_SIZE]B ([FILE_SIZE]B compressed)
[PACKAGED] 9 files, [FILE_SIZE]B ([FILE_SIZE]B compressed)

"#]])
.run();
Expand All @@ -1411,13 +1416,15 @@ edition = "2021"
"Cargo.toml.orig",
"src/lib.rs",
"src/main.rs",
"symlink-dir/file",
"Cargo.lock",
"LICENSE",
"README.md",
],
[
("src/lib.rs", str!["pub fn after() {}"]),
("src/main.rs", str![r#"fn main() { eprintln!("after"); }"#]),
("symlink-dir/file", str!["after"]),
("README.md", str!["after"]),
("LICENSE", str!["after"]),
("Cargo.toml", cargo_toml),
Expand Down
Loading