Skip to content

Commit afa5788

Browse files
committed
Optimize diff implementation
* delay conversion to String for filepaths to the last moment. That way, only the paths that are displayed will be converted in an operation that isn't free. * change diff implementation to decode parents only once, instead of three times in the commmon case. * setup an object cache in the `Repository` for faster traversals and much faster diffs.
1 parent 5b85a3f commit afa5788

File tree

2 files changed

+32
-32
lines changed

2 files changed

+32
-32
lines changed

src/info/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ impl std::fmt::Display for Info {
116116
}
117117

118118
pub fn build_info(cli_options: &CliOptions) -> Result<Info> {
119-
let repo = gix::ThreadSafeRepository::discover_opts(
119+
let mut repo = gix::ThreadSafeRepository::discover_opts(
120120
&cli_options.input,
121121
gix::discover::upwards::Options {
122122
dot_git_only: true,
@@ -125,6 +125,8 @@ pub fn build_info(cli_options: &CliOptions) -> Result<Info> {
125125
Mapping::default(),
126126
)?
127127
.to_thread_local();
128+
// Having an object cache is important for getting much better traversal and diff performance.
129+
repo.object_cache_size_if_unset(4 * 1024 * 1024);
128130
let repo_path = get_work_dir(&repo)?;
129131

130132
let loc_by_language_sorted_handle = std::thread::spawn({

src/info/utils/git.rs

Lines changed: 29 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use gix::bstr::{BString, Utf8Error};
77
use gix::object::tree::diff::change::Event;
88
use gix::object::tree::diff::{Action, Change};
99
use gix::objs::tree::EntryMode;
10+
use gix::prelude::ObjectIdExt;
1011
use gix::Commit;
1112
use regex::Regex;
1213
use std::collections::HashMap;
@@ -46,7 +47,7 @@ impl CommitMetrics {
4647
let mailmap_config = repo.open_mailmap();
4748
let mut number_of_commits_by_signature: HashMap<Sig, usize> = HashMap::new();
4849
let mut total_number_of_commits = 0;
49-
let mut number_of_commits_by_file_path: HashMap<String, usize> = HashMap::new();
50+
let mut number_of_commits_by_file_path: HashMap<BString, usize> = HashMap::new();
5051

5152
// From newest to oldest
5253
while let Some(commit_id) = commit_iter_peekable.next() {
@@ -113,20 +114,19 @@ impl CommitMetrics {
113114
}
114115

115116
fn compute_file_churns(
116-
number_of_commits_by_file_path: HashMap<String, usize>,
117+
number_of_commits_by_file_path: HashMap<BString, usize>,
117118
number_of_file_churns_to_display: usize,
118119
number_separator: NumberSeparator,
119120
) -> Vec<FileChurn> {
120-
let mut number_of_commits_by_file_path_sorted: Vec<(String, usize)> =
121-
number_of_commits_by_file_path.into_iter().collect();
121+
let mut number_of_commits_by_file_path_sorted = Vec::from_iter(number_of_commits_by_file_path);
122122

123123
number_of_commits_by_file_path_sorted
124124
.sort_by(|(_, a_count), (_, b_count)| b_count.cmp(a_count));
125125

126126
number_of_commits_by_file_path_sorted
127127
.into_iter()
128128
.map(|(file_path, nbr_of_commits)| {
129-
FileChurn::new(file_path, nbr_of_commits, number_separator)
129+
FileChurn::new(file_path.to_string(), nbr_of_commits, number_separator)
130130
})
131131
.take(number_of_file_churns_to_display)
132132
.collect()
@@ -168,40 +168,38 @@ fn compute_authors(
168168
}
169169

170170
fn compute_diff(
171-
change_map: &mut HashMap<String, usize>,
171+
change_map: &mut HashMap<BString, usize>,
172172
commit: &Commit,
173173
repo: &gix::Repository,
174174
) -> Result<()> {
175-
// Handles the very first commit
176-
if commit.parent_ids().count() == 0 {
177-
repo.empty_tree()
175+
let mut parents = commit.parent_ids();
176+
let parents = (
177+
parents
178+
.next()
179+
.map(|parent_id| -> Result<_> { Ok(parent_id.object()?.into_commit().tree_id()?) })
180+
.unwrap_or_else(|| {
181+
Ok(gix::hash::ObjectId::empty_tree(repo.object_hash()).attach(repo))
182+
})?,
183+
parents.next(),
184+
);
185+
// Ignore merge commits
186+
if let (tree_id, None) = parents {
187+
tree_id
188+
.object()?
189+
.into_tree()
178190
.changes()?
179191
.track_path()
180192
.for_each_to_obtain_tree(&commit.tree()?, |change| {
181193
for_each_change(change, change_map)
182194
})?;
183195
}
184-
// Ignore merge commits
185-
else if commit.parent_ids().count() == 1 {
186-
for parent_id in commit.parent_ids() {
187-
parent_id
188-
.object()?
189-
.into_commit()
190-
.tree()?
191-
.changes()?
192-
.track_path()
193-
.for_each_to_obtain_tree(&commit.tree()?, |change| {
194-
for_each_change(change, change_map)
195-
})?;
196-
}
197-
}
198196

199197
Ok(())
200198
}
201199

202200
fn for_each_change(
203201
change: Change,
204-
change_map: &mut HashMap<String, usize>,
202+
change_map: &mut HashMap<BString, usize>,
205203
) -> Result<Action, Utf8Error> {
206204
let is_file_change = match change.event {
207205
Event::Addition { entry_mode, .. } | Event::Modification { entry_mode, .. } => {
@@ -210,11 +208,11 @@ fn for_each_change(
210208
Event::Deletion { .. } | Event::Rewrite { .. } => false,
211209
};
212210
if is_file_change {
213-
let path = change.location.to_os_str()?.to_string_lossy();
214-
*change_map.entry(path.into_owned()).or_insert(0) += 1;
211+
let path = change.location;
212+
*change_map.entry(path.to_owned()).or_insert(0) += 1;
215213
}
216214

217-
Ok::<Action, Utf8Error>(Action::Continue)
215+
Ok(Action::Continue)
218216
}
219217

220218
fn get_no_bots_regex(no_bots: &Option<Option<MyRegex>>) -> Result<Option<MyRegex>> {
@@ -304,10 +302,10 @@ mod tests {
304302
#[test]
305303
fn test_compute_file_churns() {
306304
let mut number_of_commits_by_file_path = HashMap::new();
307-
number_of_commits_by_file_path.insert("path/to/file1.txt".to_string(), 2);
308-
number_of_commits_by_file_path.insert("path/to/file2.txt".to_string(), 5);
309-
number_of_commits_by_file_path.insert("path/to/file3.txt".to_string(), 3);
310-
number_of_commits_by_file_path.insert("path/to/file4.txt".to_string(), 7);
305+
number_of_commits_by_file_path.insert("path/to/file1.txt".into(), 2);
306+
number_of_commits_by_file_path.insert("path/to/file2.txt".into(), 5);
307+
number_of_commits_by_file_path.insert("path/to/file3.txt".into(), 3);
308+
number_of_commits_by_file_path.insert("path/to/file4.txt".into(), 7);
311309

312310
let number_of_file_churns_to_display = 3;
313311
let number_separator = NumberSeparator::Comma;

0 commit comments

Comments
 (0)