Skip to content

Commit

Permalink
Enable shallow clones and fetches for registry and git dependencies.
Browse files Browse the repository at this point in the history
The implementation hinges on passing information about the kind of clone
and fetch to the `fetch()` method, which then configures the fetch accordingly.

Note that it doesn't differentiate between initial clones and fetches as
the shallow-ness of the repository is maintained nonetheless.
  • Loading branch information
Byron committed Mar 15, 2023
1 parent 41412a1 commit ee2e9bf
Show file tree
Hide file tree
Showing 7 changed files with 881 additions and 24 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ filetime = "0.2.9"
flate2 = { version = "1.0.3", default-features = false, features = ["zlib"] }
git2 = "0.16.0"
git2-curl = "0.17.0"
gix = { version = "0.41.0", default-features = false, features = ["blocking-http-transport-curl", "progress-tree"] }
gix = { version = "0.42.0", default-features = false, features = ["blocking-http-transport-curl", "progress-tree"] }
gix-features-for-configuration-only = { version = "0.28.0", package = "gix-features", features = [ "parallel" ] }
glob = "0.3.0"
hex = "0.4"
Expand Down
61 changes: 61 additions & 0 deletions src/cargo/sources/git/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,66 @@ mod source;
mod utils;

pub mod fetch {
use crate::core::features::GitoxideFeatures;
use crate::Config;

/// The kind remote repository to fetch.
#[derive(Debug, Copy, Clone)]
pub enum RemoteKind {
/// A repository belongs to a git dependency.
GitDependency,
/// A repository belongs to a git dependency, and due to usage of checking out specific revisions we can't
/// use shallow clones.
GitDependencyForbidShallow,
/// A repository belongs to a Cargo registry.
Registry,
}

#[derive(Debug, Clone)]
pub enum History {
Shallow(gix::remote::fetch::Shallow),
Unshallow,
}

impl From<History> for gix::remote::fetch::Shallow {
fn from(value: History) -> Self {
match value {
History::Unshallow => gix::remote::fetch::Shallow::undo(),
History::Shallow(how) => how,
}
}
}

impl RemoteKind {
/// Obtain the kind of history we would want for a fetch from our remote knowing if the target repo is already shallow
/// via `repo_is_shallow` along with gitoxide-specific feature configuration via `config`.
pub(crate) fn to_history(&self, repo_is_shallow: bool, config: &Config) -> History {
let has_feature = |cb: &dyn Fn(GitoxideFeatures) -> bool| {
config
.cli_unstable()
.gitoxide
.map_or(false, |features| cb(features))
};
let how = if repo_is_shallow {
if matches!(self, RemoteKind::GitDependencyForbidShallow) {
return History::Unshallow;
} else {
gix::remote::fetch::Shallow::NoChange
}
} else {
match self {
RemoteKind::GitDependency if has_feature(&|git| git.shallow_deps) => {
gix::remote::fetch::Shallow::DepthAtRemote(1.try_into().expect("non-zero"))
}
RemoteKind::Registry if has_feature(&|git| git.shallow_index) => {
gix::remote::fetch::Shallow::DepthAtRemote(1.try_into().expect("non-zero"))
}
_ => gix::remote::fetch::Shallow::NoChange,
}
};
History::Shallow(how)
}
}

pub type Error = gix::env::collate::fetch::Error<gix::refspec::parse::Error>;
}
55 changes: 43 additions & 12 deletions src/cargo/sources/git/oxide.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ pub fn with_retry_and_progress(
) -> CargoResult<()> {
std::thread::scope(|s| {
let mut progress_bar = Progress::new("Fetch", config);
let is_shallow = config
.cli_unstable()
.gitoxide
.map_or(false, |gix| gix.shallow_deps || gix.shallow_index);
network::with_retry(config, || {
let progress_root: Arc<gix::progress::tree::Root> =
gix::progress::tree::root::Options {
Expand All @@ -50,7 +54,7 @@ pub fn with_retry_and_progress(
);
amend_authentication_hints(res, urls.get_mut().take())
});
translate_progress_to_bar(&mut progress_bar, root)?;
translate_progress_to_bar(&mut progress_bar, root, is_shallow)?;
thread.join().expect("no panic in scoped thread")
})
})
Expand All @@ -59,7 +63,9 @@ pub fn with_retry_and_progress(
fn translate_progress_to_bar(
progress_bar: &mut Progress<'_>,
root: Weak<gix::progress::tree::Root>,
is_shallow: bool,
) -> CargoResult<()> {
let remote_progress: gix::progress::Id = gix::remote::fetch::ProgressId::RemoteProgress.into();
let read_pack_bytes: gix::progress::Id =
gix::odb::pack::bundle::write::ProgressId::ReadPackBytes.into();
let delta_index_objects: gix::progress::Id =
Expand Down Expand Up @@ -88,6 +94,7 @@ fn translate_progress_to_bar(
"progress should be smoother by keeping these as multiples of each other"
);

let num_phases = if is_shallow { 3 } else { 2 }; // indexing + delta-resolution, both with same amount of objects to handle
while let Some(root) = root.upgrade() {
std::thread::sleep(sleep_interval);
let needs_update = last_fast_update.elapsed() >= fast_check_interval;
Expand All @@ -102,31 +109,37 @@ fn translate_progress_to_bar(
fn progress_by_id(
id: gix::progress::Id,
task: &gix::progress::Task,
) -> Option<&gix::progress::Value> {
(task.id == id).then(|| task.progress.as_ref()).flatten()
) -> Option<(&str, &gix::progress::Value)> {
(task.id == id)
.then(|| task.progress.as_ref())
.flatten()
.map(|value| (task.name.as_str(), value))
}
fn find_in<K>(
tasks: &[(K, gix::progress::Task)],
cb: impl Fn(&gix::progress::Task) -> Option<&gix::progress::Value>,
) -> Option<&gix::progress::Value> {
cb: impl Fn(&gix::progress::Task) -> Option<(&str, &gix::progress::Value)>,
) -> Option<(&str, &gix::progress::Value)> {
tasks.iter().find_map(|(_, t)| cb(t))
}

const NUM_PHASES: usize = 2; // indexing + delta-resolution, both with same amount of objects to handle
if let Some(objs) = find_in(&tasks, |t| progress_by_id(resolve_objects, t)) {
// Resolving deltas.
if let Some((_, objs)) = find_in(&tasks, |t| progress_by_id(resolve_objects, t)) {
// Phase 3: Resolving deltas.
let objects = objs.step.load(Ordering::Relaxed);
let total_objects = objs.done_at.expect("known amount of objects");
let msg = format!(", ({objects}/{total_objects}) resolving deltas");

progress_bar.tick(total_objects + objects, total_objects * NUM_PHASES, &msg)?;
progress_bar.tick(
(total_objects * (num_phases - 1)) + objects,
total_objects * num_phases,
&msg,
)?;
} else if let Some((objs, read_pack)) =
find_in(&tasks, |t| progress_by_id(read_pack_bytes, t)).and_then(|read| {
find_in(&tasks, |t| progress_by_id(delta_index_objects, t))
.map(|delta| (delta, read))
.map(|delta| (delta.1, read.1))
})
{
// Receiving objects.
// Phase 2: Receiving objects.
let objects = objs.step.load(Ordering::Relaxed);
let total_objects = objs.done_at.expect("known amount of objects");
let received_bytes = read_pack.step.load(Ordering::Relaxed);
Expand All @@ -139,7 +152,25 @@ fn translate_progress_to_bar(
let (rate, unit) = human_readable_bytes(counter.rate() as u64);
let msg = format!(", {rate:.2}{unit}/s");

progress_bar.tick(objects, total_objects * NUM_PHASES, &msg)?;
progress_bar.tick(
(total_objects * (num_phases - 2)) + objects,
total_objects * num_phases,
&msg,
)?;
} else if let Some((action, remote)) =
find_in(&tasks, |t| progress_by_id(remote_progress, t))
{
if !is_shallow {
continue;
}
// phase 1: work on the remote side

// Resolving deltas.
let objects = remote.step.load(Ordering::Relaxed);
if let Some(total_objects) = remote.done_at {
let msg = format!(", ({objects}/{total_objects}) {action}");
progress_bar.tick(objects, total_objects * num_phases, &msg)?;
}
}
}
Ok(())
Expand Down
60 changes: 55 additions & 5 deletions src/cargo/sources/git/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
//! authentication/cloning.
use crate::core::{GitReference, Verbosity};
use crate::sources::git::fetch::{History, RemoteKind};
use crate::sources::git::oxide;
use crate::sources::git::oxide::cargo_config_to_gitoxide_overrides;
use crate::util::errors::CargoResult;
Expand Down Expand Up @@ -96,9 +97,21 @@ impl GitRemote {
// if we can. If that can successfully load our revision then we've
// populated the database with the latest version of `reference`, so
// return that database and the rev we resolve to.
let remote_kind = if locked_rev.is_some() || matches!(reference, GitReference::Rev(_)) {
RemoteKind::GitDependencyForbidShallow
} else {
RemoteKind::GitDependency
};
if let Some(mut db) = db {
fetch(&mut db.repo, self.url.as_str(), reference, cargo_config)
.context(format!("failed to fetch into: {}", into.display()))?;
let history = remote_kind.to_history(db.repo.is_shallow(), cargo_config);
fetch(
&mut db.repo,
self.url.as_str(),
reference,
cargo_config,
history,
)
.context(format!("failed to fetch into: {}", into.display()))?;
match locked_rev {
Some(rev) => {
if db.contains(rev) {
Expand All @@ -121,8 +134,15 @@ impl GitRemote {
}
paths::create_dir_all(into)?;
let mut repo = init(into, true)?;
fetch(&mut repo, self.url.as_str(), reference, cargo_config)
.context(format!("failed to clone into: {}", into.display()))?;
let history = remote_kind.to_history(repo.is_shallow(), cargo_config);
fetch(
&mut repo,
self.url.as_str(),
reference,
cargo_config,
history,
)
.context(format!("failed to clone into: {}", into.display()))?;
let rev = match locked_rev {
Some(rev) => rev,
None => reference.resolve(&repo)?,
Expand Down Expand Up @@ -282,6 +302,12 @@ impl<'a> GitCheckout<'a> {
.with_checkout(checkout)
.fetch_options(fopts)
.clone(url.as_str(), into)?;
if database.repo.is_shallow() {
std::fs::copy(
database.repo.path().join("shallow"),
r.path().join("shallow"),
)?;
}
repo = Some(r);
Ok(())
})?;
Expand Down Expand Up @@ -432,7 +458,8 @@ impl<'a> GitCheckout<'a> {
cargo_config
.shell()
.status("Updating", format!("git submodule `{}`", url))?;
fetch(&mut repo, &url, &reference, cargo_config).with_context(|| {
let history = RemoteKind::GitDependency.to_history(repo.is_shallow(), cargo_config);
fetch(&mut repo, &url, &reference, cargo_config, history).with_context(|| {
format!(
"failed to fetch submodule `{}` from {}",
child.name().unwrap_or(""),
Expand Down Expand Up @@ -803,11 +830,15 @@ pub fn with_fetch_options(
})
}

/// Note that `history` is a complex computed value to determine whether it's acceptable to perform shallow clones
/// at all. It's needed to allow the caller to determine the correct position of the destination repository or move it
/// into place should its position change.
pub fn fetch(
repo: &mut git2::Repository,
orig_url: &str,
reference: &GitReference,
config: &Config,
history: History,
) -> CargoResult<()> {
if config.frozen() {
anyhow::bail!(
Expand Down Expand Up @@ -952,6 +983,7 @@ pub fn fetch(
);
let outcome = connection
.prepare_fetch(gix::remote::ref_map::Options::default())?
.with_shallow(history.clone().into())
.receive(should_interrupt)?;
Ok(outcome)
});
Expand All @@ -967,6 +999,7 @@ pub fn fetch(
// folder before writing files into it, or else not even open a directory as git repository (which is
// also handled here).
&& err.is_corrupted()
|| has_shallow_lock_file(&err)
{
repo_reinitialized.store(true, Ordering::Relaxed);
debug!(
Expand Down Expand Up @@ -1005,6 +1038,12 @@ pub fn fetch(
// again. If it looks like any other kind of error, or if we've already
// blown away the repository, then we want to return the error as-is.
let mut repo_reinitialized = false;
// while shallow repos aren't officially supported, don't risk fetching them.
// We are in this situation only when `gitoxide` is cloning but then disabled to use `git2`
// for fetching.
if repo.is_shallow() {
reinitialize(repo)?;
}
loop {
debug!("initiating fetch of {:?} from {}", refspecs, orig_url);
let res = repo
Expand Down Expand Up @@ -1036,6 +1075,17 @@ pub fn fetch(
}
}

/// `gitoxide` uses shallow locks to assure consistency when fetching to and to avoid races, and to write
/// files atomically.
/// Cargo has its own lock files and doesn't need that mechanism for race protection, so a stray lock means
/// a signal interrupted a previous shallow fetch and doesn't mean a race is happening.
fn has_shallow_lock_file(err: &crate::sources::git::fetch::Error) -> bool {
matches!(
err,
gix::env::collate::fetch::Error::Fetch(gix::remote::fetch::Error::LockShallowFile(_))
)
}

fn fetch_with_cli(
repo: &mut git2::Repository,
url: &str,
Expand Down
12 changes: 10 additions & 2 deletions src/cargo/sources/registry/remote.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::core::{GitReference, PackageId, SourceId};
use crate::sources::git;
use crate::sources::git::fetch::RemoteKind;
use crate::sources::registry::download;
use crate::sources::registry::MaybeLock;
use crate::sources::registry::{LoadResponse, RegistryConfig, RegistryData};
Expand Down Expand Up @@ -300,8 +301,15 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
// checkout.
let url = self.source_id.url();
let repo = self.repo.borrow_mut().unwrap();
git::fetch(repo, url.as_str(), &self.index_git_ref, self.config)
.with_context(|| format!("failed to fetch `{}`", url))?;
let history = RemoteKind::Registry.to_history(repo.is_shallow(), self.config);
git::fetch(
repo,
url.as_str(),
&self.index_git_ref,
self.config,
history,
)
.with_context(|| format!("failed to fetch `{}`", url))?;

// Create a dummy file to record the mtime for when we updated the
// index.
Expand Down
Loading

0 comments on commit ee2e9bf

Please sign in to comment.