Skip to content

Add rename tracking to blame #2022

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 22 additions & 5 deletions gitoxide-core/src/repository/blame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,23 @@ pub fn blame_file(
options,
)?;
let statistics = outcome.statistics;
write_blame_entries(out, outcome)?;
write_blame_entries(out, outcome, file)?;

if let Some(err) = err {
writeln!(err, "{statistics:#?}")?;
}
Ok(())
}

fn write_blame_entries(mut out: impl std::io::Write, outcome: gix::blame::Outcome) -> Result<(), std::io::Error> {
fn write_blame_entries(
mut out: impl std::io::Write,
outcome: gix::blame::Outcome,
source_file_name: gix::bstr::BString,
) -> Result<(), std::io::Error> {
let show_file_names = outcome
.entries_with_lines()
.any(|(entry, _)| entry.source_file_name.is_some());

for (entry, lines_in_hunk) in outcome.entries_with_lines() {
for ((actual_lno, source_lno), line) in entry
.range_in_blamed_file()
Expand All @@ -64,11 +72,20 @@ fn write_blame_entries(mut out: impl std::io::Write, outcome: gix::blame::Outcom
{
write!(
out,
"{short_id} {line_no} {src_line_no} {line}",
line_no = actual_lno + 1,
src_line_no = source_lno + 1,
"{short_id} {line_no} ",
short_id = entry.commit_id.to_hex_with_len(8),
line_no = actual_lno + 1,
)?;

if show_file_names {
if let Some(ref source_file_name) = entry.source_file_name {
write!(out, "{source_file_name} ")?;
} else {
write!(out, "{source_file_name} ")?;
}
}

write!(out, "{src_line_no} {line}", src_line_no = source_lno + 1)?;
}
}

Expand Down
2 changes: 2 additions & 0 deletions gix-blame/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ pub enum Error {
Traverse(#[source] Box<dyn std::error::Error + Send + Sync>),
#[error(transparent)]
DiffTree(#[from] gix_diff::tree::Error),
#[error(transparent)]
DiffTreeWithRewrites(#[from] gix_diff::tree_with_rewrites::Error),
#[error("Invalid line range was given, line range is expected to be a 1-based inclusive range in the format '<start>,<end>'")]
InvalidLineRange,
#[error("Failure to decode commit during traversal")]
Expand Down
180 changes: 167 additions & 13 deletions gix-blame/src/file/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,19 +72,21 @@ pub fn file(
) -> Result<Outcome, Error> {
let _span = gix_trace::coarse!("gix_blame::file()", ?file_path, ?suspect);

let mut current_file_path: BString = file_path.into();

let mut stats = Statistics::default();
let (mut buf, mut buf2, mut buf3) = (Vec::new(), Vec::new(), Vec::new());
let blamed_file_entry_id = find_path_entry_in_commit(
&odb,
&suspect,
file_path,
current_file_path.as_ref(),
cache.as_ref(),
&mut buf,
&mut buf2,
&mut stats,
)?
.ok_or_else(|| Error::FileMissing {
file_path: file_path.to_owned(),
file_path: current_file_path.to_owned(),
commit_id: suspect,
})?;
let blamed_file_blob = odb.find_blob(&blamed_file_entry_id, &mut buf)?.data.to_vec();
Expand All @@ -102,6 +104,7 @@ pub fn file(
hunks_to_blame.push(UnblamedHunk {
range_in_blamed_file: range.clone(),
suspects: [(suspect, range)].into(),
source_file_name: None,
});
}

Expand Down Expand Up @@ -165,7 +168,7 @@ pub fn file(
entry = find_path_entry_in_commit(
&odb,
&suspect,
file_path,
current_file_path.as_ref(),
cache.as_ref(),
&mut buf,
&mut buf2,
Expand Down Expand Up @@ -216,7 +219,7 @@ pub fn file(
if let Some(parent_entry_id) = find_path_entry_in_commit(
&odb,
parent_id,
file_path,
current_file_path.as_ref(),
cache.as_ref(),
&mut buf,
&mut buf2,
Expand All @@ -239,12 +242,13 @@ pub fn file(
queue.insert(parent_commit_time, parent_id);
let changes_for_file_path = tree_diff_at_file_path(
&odb,
file_path,
current_file_path.as_ref(),
suspect,
parent_id,
cache.as_ref(),
&mut stats,
&mut diff_state,
resource_cache,
&mut buf,
&mut buf2,
&mut buf3,
Expand All @@ -263,7 +267,7 @@ pub fn file(
};

match modification {
gix_diff::tree::recorder::Change::Addition { .. } => {
TreeDiffChange::Addition => {
if more_than_one_parent {
// Do nothing under the assumption that this always (or almost always)
// implies that the file comes from a different parent, compared to which
Expand All @@ -272,20 +276,44 @@ pub fn file(
break 'outer;
}
}
gix_diff::tree::recorder::Change::Deletion { .. } => {
TreeDiffChange::Deletion => {
unreachable!("We already found file_path in suspect^{{tree}}, so it can't be deleted")
}
gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => {
TreeDiffChange::Modification { previous_id, id } => {
let changes = blob_changes(
&odb,
resource_cache,
oid,
previous_oid,
file_path,
id,
previous_id,
current_file_path.as_ref(),
options.diff_algorithm,
&mut stats,
)?;
hunks_to_blame = process_changes(hunks_to_blame, changes, suspect, parent_id);
}
TreeDiffChange::Rewrite {
source_location,
source_id,
id,
} => {
let changes = blob_changes(
&odb,
resource_cache,
id,
source_id,
current_file_path.as_ref(),
options.diff_algorithm,
&mut stats,
)?;
hunks_to_blame = process_changes(hunks_to_blame, changes, suspect, parent_id);

for hunk in hunks_to_blame.iter_mut() {
if hunk.has_suspect(&parent_id) {
hunk.source_file_name = Some(source_location.clone());
}
}

current_file_path = source_location;
}
}
}
Expand Down Expand Up @@ -382,6 +410,7 @@ fn coalesce_blame_entries(lines_blamed: Vec<BlameEntry>) -> Vec<BlameEntry> {
len: NonZeroU32::new((current_source_range.end - previous_source_range.start) as u32)
.expect("BUG: hunks are never zero-sized"),
commit_id: previous_entry.commit_id,
source_file_name: previous_entry.source_file_name.clone(),
};

acc.pop();
Expand All @@ -399,6 +428,57 @@ fn coalesce_blame_entries(lines_blamed: Vec<BlameEntry>) -> Vec<BlameEntry> {
})
}

enum TreeDiffChange {
Addition,
Deletion,
Modification {
previous_id: ObjectId,
id: ObjectId,
},
Rewrite {
source_location: BString,
source_id: ObjectId,
id: ObjectId,
},
}

impl From<gix_diff::tree::recorder::Change> for TreeDiffChange {
fn from(value: gix_diff::tree::recorder::Change) -> Self {
use gix_diff::tree::recorder::Change;

match value {
Change::Addition { .. } => Self::Addition,
Change::Deletion { .. } => Self::Deletion,
Change::Modification { previous_oid, oid, .. } => Self::Modification {
previous_id: previous_oid,
id: oid,
},
}
}
}

impl From<gix_diff::tree_with_rewrites::Change> for TreeDiffChange {
fn from(value: gix_diff::tree_with_rewrites::Change) -> Self {
use gix_diff::tree_with_rewrites::Change;

match value {
Change::Addition { .. } => Self::Addition,
Change::Deletion { .. } => Self::Deletion,
Change::Modification { previous_id, id, .. } => Self::Modification { previous_id, id },
Change::Rewrite {
source_location,
source_id,
id,
..
} => Self::Rewrite {
source_location,
source_id,
id,
},
}
}
}

#[allow(clippy::too_many_arguments)]
fn tree_diff_at_file_path(
odb: impl gix_object::Find + gix_object::FindHeader,
Expand All @@ -408,10 +488,11 @@ fn tree_diff_at_file_path(
cache: Option<&gix_commitgraph::Graph>,
stats: &mut Statistics,
state: &mut gix_diff::tree::State,
resource_cache: &mut gix_diff::blob::Platform,
commit_buf: &mut Vec<u8>,
lhs_tree_buf: &mut Vec<u8>,
rhs_tree_buf: &mut Vec<u8>,
) -> Result<Option<gix_diff::tree::recorder::Change>, Error> {
) -> Result<Option<TreeDiffChange>, Error> {
let parent_tree_id = find_commit(cache, &odb, &parent_id, commit_buf)?.tree_id()?;

let parent_tree_iter = odb.find_tree_iter(&parent_tree_id, lhs_tree_buf)?;
Expand All @@ -422,6 +503,37 @@ fn tree_diff_at_file_path(
let tree_iter = odb.find_tree_iter(&tree_id, rhs_tree_buf)?;
stats.trees_decoded += 1;

let result = tree_diff_without_rewrites_at_file_path(&odb, file_path, stats, state, parent_tree_iter, tree_iter)?;

// Here, we follow git’s behaviour. We return when we’ve found a `Modification`. We try a
// second time with rename tracking when the change is either an `Addition` or a `Deletion`
// because those can turn out to have been a `Rewrite`.
if matches!(result, Some(TreeDiffChange::Modification { .. })) {
return Ok(result);
}

let result = tree_diff_with_rewrites_at_file_path(
&odb,
file_path,
stats,
state,
resource_cache,
parent_tree_iter,
tree_iter,
)?;

Ok(result)
}

#[allow(clippy::too_many_arguments)]
fn tree_diff_without_rewrites_at_file_path(
odb: impl gix_object::Find + gix_object::FindHeader,
file_path: &BStr,
stats: &mut Statistics,
state: &mut gix_diff::tree::State,
parent_tree_iter: gix_object::TreeRefIter<'_>,
tree_iter: gix_object::TreeRefIter<'_>,
) -> Result<Option<TreeDiffChange>, Error> {
struct FindChangeToPath {
inner: gix_diff::tree::Recorder,
interesting_path: BString,
Expand Down Expand Up @@ -509,11 +621,53 @@ fn tree_diff_at_file_path(
stats.trees_diffed += 1;

match result {
Ok(_) | Err(gix_diff::tree::Error::Cancelled) => Ok(recorder.change),
Ok(_) | Err(gix_diff::tree::Error::Cancelled) => Ok(recorder.change.map(std::convert::Into::into)),
Err(error) => Err(Error::DiffTree(error)),
}
}

#[allow(clippy::too_many_arguments)]
fn tree_diff_with_rewrites_at_file_path(
odb: impl gix_object::Find + gix_object::FindHeader,
file_path: &BStr,
stats: &mut Statistics,
state: &mut gix_diff::tree::State,
resource_cache: &mut gix_diff::blob::Platform,
parent_tree_iter: gix_object::TreeRefIter<'_>,
tree_iter: gix_object::TreeRefIter<'_>,
) -> Result<Option<TreeDiffChange>, Error> {
let mut change: Option<gix_diff::tree_with_rewrites::Change> = None;

let options: gix_diff::tree_with_rewrites::Options = gix_diff::tree_with_rewrites::Options {
location: Some(gix_diff::tree::recorder::Location::Path),
rewrites: Some(gix_diff::Rewrites::default()),
};
let result = gix_diff::tree_with_rewrites(
parent_tree_iter,
tree_iter,
resource_cache,
state,
&odb,
|change_ref| -> Result<_, std::convert::Infallible> {
if change_ref.location() == file_path {
change = Some(change_ref.into_owned());
Ok(gix_diff::tree_with_rewrites::Action::Cancel)
} else {
Ok(gix_diff::tree_with_rewrites::Action::Continue)
}
},
options,
);
stats.trees_diffed_with_rewrites += 1;

match result {
Ok(_) | Err(gix_diff::tree_with_rewrites::Error::Diff(gix_diff::tree::Error::Cancelled)) => {
Ok(change.map(std::convert::Into::into))
}
Err(error) => Err(Error::DiffTreeWithRewrites(error)),
}
}

fn blob_changes(
odb: impl gix_object::Find + gix_object::FindHeader,
resource_cache: &mut gix_diff::blob::Platform,
Expand Down
3 changes: 3 additions & 0 deletions gix-blame/src/file/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -393,11 +393,13 @@ impl UnblamedHunk {
range_in_blamed_file: self.range_in_blamed_file.start
..(self.range_in_blamed_file.start + split_at_from_start),
suspects: new_suspects_before.collect(),
source_file_name: self.source_file_name.clone(),
};
let new_hunk_after = Self {
range_in_blamed_file: (self.range_in_blamed_file.start + split_at_from_start)
..(self.range_in_blamed_file.end),
suspects: new_suspects_after.collect(),
source_file_name: self.source_file_name,
};

Either::Right((new_hunk_before, new_hunk_after))
Expand Down Expand Up @@ -445,6 +447,7 @@ impl BlameEntry {
start_in_source_file: range_in_source_file.start,
len: force_non_zero(range_in_source_file.len() as u32),
commit_id,
source_file_name: unblamed_hunk.source_file_name.clone(),
})
}
}
Expand Down
Loading
Loading