Skip to content

Commit e3bf91e

Browse files
mononoke: add generate_placeholder_diff option
Summary: Let's use functionality added in D20389226 so that we can generate diffs even for large files. I've contemplated between two approaches: either "silently" generate placeholder diff for files that are over the limit or add a new option where client can request these placeholders. I choose the latter for a few reasons: 1) To me option #1 might cause surprises e.g. diffing a single large file doesn't fail, but diffing two files whose size is (LIMIT / 2 + 1) will fail. Option #2 let's the client be very explicit about what it needs - and it also won't return a placeholder when the actual content is expected! 2) Option #2 makes the client think about what it wants to be returned, and it seems in line with source control thrift API design in general i.e. commit_file_diffs is not trivial to use by design to because force client to think about edge cases. And it seems that forcing client to think about additional important edge case (i.e. large file) makes sense. 3) The code change is simpler with option #2 I also thought about adding generate_placeholder_diff parameter to CommitFileDiffsParams, but that makes integration with scs_helper.py harder. Reviewed By: markbt Differential Revision: D20417787 fbshipit-source-id: ab9b32fd7a4768043414ed7d8bf39e3c6f4eb53e
1 parent 62a0277 commit e3bf91e

File tree

4 files changed

+111
-10
lines changed

4 files changed

+111
-10
lines changed

eden/mononoke/mononoke_api/src/changeset_path.rs

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -290,35 +290,62 @@ impl ChangesetPathContext {
290290
}
291291
}
292292

293+
#[derive(Clone, Copy, Eq, PartialEq)]
294+
pub enum UnifiedDiffMode {
295+
Inline,
296+
/// Content is not fetched - instead a placeholder diff like
297+
///
298+
/// diff --git a/file.txt b/file.txt
299+
/// Binary file file.txt has changed
300+
///
301+
/// is generated
302+
OmitContent,
303+
}
304+
293305
/// Renders the diff (in the git diff format) against some other path.
294306
/// Provided with copy_info will render the diff as copy or move as requested.
295-
// (does not do the copy-tracking on its own) async fn unified_diff(
307+
/// (does not do the copy-tracking on its own)
308+
/// If `omit_content` is set then unified_diff(...) doesn't fetch content, but just
309+
/// generates a placeholder diff that says that files differ.
296310
pub async fn unified_diff(
297311
// The diff applied to old_path with produce new_path
298312
old_path: &Option<ChangesetPathContext>,
299313
new_path: &Option<ChangesetPathContext>,
300314
copy_info: CopyInfo,
301315
context_lines: usize,
316+
mode: UnifiedDiffMode,
302317
) -> Result<UnifiedDiff, MononokeError> {
303318
// Helper for getting file information.
304319
async fn get_file_data(
305320
path: &Option<ChangesetPathContext>,
321+
mode: UnifiedDiffMode,
306322
) -> Result<Option<xdiff::DiffFile<String, Bytes>>, MononokeError> {
307323
match path {
308324
Some(path) => {
309325
if let Some(file_type) = path.file_type().await? {
310326
let file = path.file().await?.ok_or_else(|| {
311327
MononokeError::from(Error::msg("assertion error: file should exist"))
312328
})?;
313-
let contents = file.content_concat().await?;
314329
let file_type = match file_type {
315330
FileType::Regular => xdiff::FileType::Regular,
316331
FileType::Executable => xdiff::FileType::Executable,
317332
FileType::Symlink => xdiff::FileType::Symlink,
318333
};
334+
let contents = match mode {
335+
UnifiedDiffMode::Inline => {
336+
let contents = file.content_concat().await?;
337+
xdiff::FileContent::Inline(contents)
338+
}
339+
UnifiedDiffMode::OmitContent => {
340+
let content_id = file.metadata().await?.content_id;
341+
xdiff::FileContent::Omitted {
342+
content_hash: format!("{}", content_id),
343+
}
344+
}
345+
};
319346
Ok(Some(xdiff::DiffFile {
320347
path: path.path().to_string(),
321-
contents: xdiff::FileContent::Inline(contents),
348+
contents,
322349
file_type,
323350
}))
324351
} else {
@@ -329,8 +356,10 @@ pub async fn unified_diff(
329356
}
330357
}
331358

332-
let (old_diff_file, new_diff_file) =
333-
try_join!(get_file_data(&old_path), get_file_data(&new_path))?;
359+
let (old_diff_file, new_diff_file) = try_join!(
360+
get_file_data(&old_path, mode),
361+
get_file_data(&new_path, mode)
362+
)?;
334363
let is_binary = xdiff::file_is_binary(&old_diff_file) || xdiff::file_is_binary(&new_diff_file);
335364
let copy_info = match copy_info {
336365
CopyInfo::None => xdiff::CopyInfo::None,

eden/mononoke/mononoke_api/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ pub use crate::legacy::get_content_by_path;
4949

5050
pub use crate::changeset::ChangesetContext;
5151
pub use crate::changeset_path::{
52-
unified_diff, ChangesetPathContext, CopyInfo, PathEntry, UnifiedDiff,
52+
unified_diff, ChangesetPathContext, CopyInfo, PathEntry, UnifiedDiff, UnifiedDiffMode,
5353
};
5454
pub use crate::changeset_path_diff::ChangesetPathDiffContext;
5555
pub use crate::errors::MononokeError;

eden/mononoke/scs_server/src/methods/commit.rs

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use std::convert::{TryFrom, TryInto};
99

1010
use context::CoreContext;
1111
use futures_util::{future, stream, try_join, StreamExt, TryStreamExt};
12-
use mononoke_api::{unified_diff, ChangesetSpecifier, CopyInfo, MononokePath};
12+
use mononoke_api::{unified_diff, ChangesetSpecifier, CopyInfo, MononokePath, UnifiedDiffMode};
1313
use source_control as thrift;
1414

1515
use crate::commit_id::{map_commit_identity, CommitIdExt};
@@ -78,6 +78,11 @@ impl SourceControlServiceImpl {
7878
.paths
7979
.into_iter()
8080
.map(|path_pair| {
81+
let mode = if path_pair.generate_placeholder_diff.unwrap_or(false) {
82+
UnifiedDiffMode::OmitContent
83+
} else {
84+
UnifiedDiffMode::Inline
85+
};
8186
Ok((
8287
match path_pair.base_path {
8388
Some(path) => Some(base_commit.path(&path)?),
@@ -88,14 +93,19 @@ impl SourceControlServiceImpl {
8893
None => None,
8994
},
9095
CopyInfo::from_request(&path_pair.copy_info)?,
96+
mode,
9197
))
9298
})
9399
.collect::<Result<Vec<_>, errors::ServiceError>>()?;
94100

95101
// Check the total file size limit
96102
let flat_paths = paths
97103
.iter()
98-
.flat_map(|(base_path, other_path, _)| vec![base_path, other_path])
104+
.filter_map(|(base_path, other_path, _, mode)| match mode {
105+
UnifiedDiffMode::OmitContent => None,
106+
UnifiedDiffMode::Inline => Some((base_path, other_path)),
107+
})
108+
.flat_map(|(base_path, other_path)| vec![base_path, other_path])
99109
.filter_map(|x| x.as_ref());
100110
let total_input_size: u64 = future::try_join_all(flat_paths.map(|path| async move {
101111
let r: Result<_, errors::ServiceError> = if let Some(file) = path.file().await? {
@@ -114,8 +124,9 @@ impl SourceControlServiceImpl {
114124
}
115125

116126
let path_diffs = future::try_join_all(paths.into_iter().map(
117-
|(base_path, other_path, copy_info)| async move {
118-
let diff = unified_diff(&other_path, &base_path, copy_info, context_lines).await?;
127+
|(base_path, other_path, copy_info, mode)| async move {
128+
let diff =
129+
unified_diff(&other_path, &base_path, copy_info, context_lines, mode).await?;
119130
let r: Result<_, errors::ServiceError> =
120131
Ok(thrift::CommitFileDiffsResponseElement {
121132
base_path: base_path.map(|p| p.path().to_string()),
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Copyright (c) Facebook, Inc. and its affiliates.
2+
#
3+
# This software may be used and distributed according to the terms of the
4+
# GNU General Public License found in the LICENSE file in the root
5+
# directory of this source tree.
6+
7+
$ . "${TEST_FIXTURES}/library.sh"
8+
9+
Setup config repo:
10+
$ setup_common_config
11+
$ cd "$TESTTMP"
12+
13+
Setup testing repo for mononoke:
14+
$ hg init repo-hg
15+
$ cd repo-hg
16+
$ setup_hg_server
17+
18+
Helper for making commit:
19+
$ function commit() { # the arg is used both for commit message and variable name
20+
> hg commit -Am $1 # create commit
21+
> export COMMIT_$1="$(hg --debug id -i)" # save hash to variable
22+
> }
23+
24+
First two simple commits and bookmark:
25+
$ echo -e "a\nb\nc\nd\ne" > a
26+
$ commit A
27+
adding a
28+
29+
$ echo -e "a\nb\nd\ne\nf" > b
30+
$ commit B
31+
adding b
32+
33+
A commit with a file change and binary file
34+
35+
$ echo -e "b\nc\nd\ne\nf" > b
36+
$ echo -e "\0 10" > binary
37+
$ commit C
38+
adding binary
39+
40+
import testing repo to mononoke
41+
$ cd ..
42+
$ blobimport repo-hg/.hg repo
43+
44+
start SCS server
45+
$ start_and_wait_for_scs_server --scuba-log-file "$TESTTMP/scuba.json"
46+
47+
$ scsc diff --repo repo --hg-commit-id "$COMMIT_A" --hg-commit-id "$COMMIT_B"
48+
diff --git a/b b/b
49+
new file mode 100644
50+
--- /dev/null
51+
+++ b/b
52+
@@ -0,0 +1,5 @@
53+
+a
54+
+b
55+
+d
56+
+e
57+
+f
58+
$ scsc diff --repo repo --hg-commit-id "$COMMIT_A" --hg-commit-id "$COMMIT_B" --placeholders-only
59+
diff --git a/b b/b
60+
new file mode 100644
61+
Binary file b has changed

0 commit comments

Comments
 (0)