Skip to content

Commit bcc059d

Browse files
gustavoavenafacebook-github-bot
authored andcommitted
Support heads per export path
Summary: ## Problem If a export path was created by renaming another, the user might want to follow the history from the old path name. An example of this is `eden/mononoke` which was renamed in 2020 from `scm/mononoke` (D19722832). See T164121717 and its attached diffs for more details about this problem. ## Solutions considered **This diff implements solution #2.** ### 1. Try to automatically detect and follow renames I spent some time implementing this, but there are a lot of edge cases and implementation would still take significant time and is likely to have issues with correctness (e.g. export what we don't want). We decided it wasn't worth pursuing this given that this tool will be used by engineers. ### 2. Warn user and support passing rename revision manually (this diff) During an export, we can (a) warn users when a changeset is likely creating the export path by renaming another one and (b) provide a way for the user to specify that we should export commits from a path but **only up to a specific changeset** (i.e. a HEAD commit). This is what this diff and the one above implements. ## This diff This diff implements support in the library, i.e. when export paths are given with specific head commits, it builds the proper `GitExportGraphInfo` and commits are copied properly given this `GitExportGraphInfo`. The next diff adds arguments to the CLI so that this functionality can be used. There's still one small improvement I want to do to the unit tests, but I figured I could publish this to speed up review while I update the tests (and possibly think of more test cases). ## TODO: - Update unit tests to check copy_from field Reviewed By: mitrandir77 Differential Revision: D49682305 fbshipit-source-id: 535f314c76214cfaa6aca90c9b48a4c7523b95de
1 parent cba4b42 commit bcc059d

File tree

8 files changed

+732
-120
lines changed

8 files changed

+732
-120
lines changed

eden/mononoke/git/gitexport/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ blobstore = { version = "0.1.0", path = "../../blobstore" }
2828
bookmarks_types = { version = "0.1.0", path = "../../bookmarks/bookmarks_types" }
2929
borrowed = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }
3030
clap = { version = "4.3.5", features = ["derive", "env", "string", "unicode", "wrap_help"] }
31-
cloned = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }
3231
commit_id = { version = "0.1.0", path = "../../cmdlib/commit_id" }
3332
commit_transformation = { version = "0.1.0", path = "../../megarepo_api/commit_transformation" }
3433
fbinit = { version = "0.1.2", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }
@@ -49,5 +48,6 @@ test_repo_factory = { version = "0.1.0", path = "../../repo_factory/test_repo_fa
4948

5049
[dev-dependencies]
5150
fbinit-tokio = { version = "0.1.2", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }
51+
maplit = "1.0"
5252
slog_glog_fmt = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }
5353
test_utils = { version = "0.1.0", path = "test_utils" }

eden/mononoke/git/gitexport/src/gitexport_tools/lib.rs

Lines changed: 145 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,14 @@ use anyhow::Result;
1616
use blobstore::Loadable;
1717
use blobstore::PutBehaviour;
1818
use borrowed::borrowed;
19-
use cloned::cloned;
2019
use commit_transformation::rewrite_commit;
2120
use commit_transformation::upload_commits;
2221
use commit_transformation::MultiMover;
2322
use fbinit::FacebookInit;
2423
use fileblob::Fileblob;
2524
use futures::stream::TryStreamExt;
2625
use futures::stream::{self};
26+
use futures::StreamExt;
2727
use indicatif::ProgressBar;
2828
use indicatif::ProgressStyle;
2929
use mononoke_api::BookmarkKey;
@@ -42,12 +42,15 @@ use slog::debug;
4242
use slog::error;
4343
use slog::info;
4444
use slog::trace;
45+
use slog::warn;
4546
use slog::Drain;
47+
use slog::Logger;
4648
use sql::rusqlite::Connection as SqliteConnection;
4749
use test_repo_factory::TestRepoFactory;
4850

4951
pub use crate::partial_commit_graph::build_partial_commit_graph_for_export;
5052
use crate::partial_commit_graph::ChangesetParents;
53+
use crate::partial_commit_graph::ExportPathInfo;
5154
pub use crate::partial_commit_graph::GitExportGraphInfo;
5255

5356
pub const MASTER_BOOKMARK: &str = "master";
@@ -59,7 +62,7 @@ pub async fn rewrite_partial_changesets(
5962
fb: FacebookInit,
6063
source_repo_ctx: RepoContext,
6164
graph_info: GitExportGraphInfo,
62-
export_paths: Vec<NonRootMPath>,
65+
export_paths: Vec<ExportPathInfo>,
6366
) -> Result<RepoContext> {
6467
let ctx: &CoreContext = source_repo_ctx.ctx();
6568
let changesets = graph_info.changesets;
@@ -73,24 +76,6 @@ pub async fn rewrite_partial_changesets(
7376
// Repo that will hold the partial changesets that will be exported to git
7477
let temp_repo_ctx = create_temp_repo(fb, ctx).await?;
7578

76-
let logger_clone = logger.clone();
77-
78-
let multi_mover: MultiMover<'static> = Arc::new(move |source_path: &NonRootMPath| {
79-
let should_export = export_paths.iter().any(|p| p.is_prefix_of(source_path));
80-
81-
if !should_export {
82-
trace!(
83-
logger_clone,
84-
"Path {:#?} will NOT be exported.",
85-
&source_path
86-
);
87-
return Ok(vec![]);
88-
}
89-
90-
trace!(logger_clone, "Path {:#?} will be exported.", &source_path);
91-
Ok(vec![source_path.clone()])
92-
});
93-
9479
let num_changesets = changesets.len().try_into().unwrap();
9580
let cs_results: Vec<Result<ChangesetContext, MononokeError>> =
9681
changesets.into_iter().map(Ok).collect::<Vec<_>>();
@@ -116,14 +101,21 @@ pub async fn rewrite_partial_changesets(
116101
|(mut new_bonsai_changesets, remapped_parents), changeset| {
117102
borrowed!(source_repo_ctx);
118103
borrowed!(mb_progress_bar);
119-
cloned!(multi_mover);
104+
borrowed!(export_paths);
105+
borrowed!(logger);
106+
120107
async move {
108+
let export_paths =
109+
get_export_paths_for_changeset(&changeset, export_paths).await?;
110+
111+
let multi_mover = build_multi_mover_for_changeset(logger, &export_paths)?;
121112
let (new_bcs, remapped_parents) = create_bonsai_for_new_repo(
122113
source_repo_ctx,
123114
multi_mover,
124115
changeset_parents,
125116
remapped_parents,
126117
changeset,
118+
&export_paths,
127119
)
128120
.await?;
129121
new_bonsai_changesets.push(new_bcs);
@@ -171,12 +163,13 @@ pub async fn rewrite_partial_changesets(
171163

172164
/// Given a changeset and a set of paths being exported, build the
173165
/// BonsaiChangeset containing only the changes to those paths.
174-
async fn create_bonsai_for_new_repo(
166+
async fn create_bonsai_for_new_repo<'a>(
175167
source_repo_ctx: &RepoContext,
176168
multi_mover: MultiMover<'_>,
177169
changeset_parents: &ChangesetParents,
178170
mut remapped_parents: HashMap<ChangesetId, ChangesetId>,
179171
changeset_ctx: ChangesetContext,
172+
export_paths: &'a [&'a NonRootMPath],
180173
) -> Result<(BonsaiChangeset, HashMap<ChangesetId, ChangesetId>), MononokeError> {
181174
let logger = changeset_ctx.repo().ctx().logger();
182175
trace!(
@@ -207,30 +200,66 @@ async fn create_bonsai_for_new_repo(
207200
let mut mut_bcs = bcs.into_mut();
208201
mut_bcs.parents = orig_parent_ids.clone();
209202

210-
// If this isn't the first changeset (i.e. that creates the oldest exported
211-
// directory), we need to make sure that file changes that copy from
212-
// previous commits only reference revisions that are also being exported.
213-
if let Some(new_parent_cs_id) = orig_parent_ids.first() {
214-
// TODO(T161204758): iterate over all parents and select one that is the closest
215-
// ancestor of each commit in the `copy_from` field.
216-
mut_bcs.file_changes.iter_mut().for_each(|(_p, fc)| {
217-
if let FileChange::Change(tracked_fc) = fc {
218-
// If any FileChange copies a file from a previous revision (e.g. a parent),
219-
// set the `copy_from` field to point to its new parent.
220-
//
221-
// Since we're building a history using all changesets that
222-
// affect the exported directories, any file being copied
223-
// should always exist in the new parent.
203+
// If this changeset created an export directory by copying/moving files
204+
// from a directory that's not being exported, we only need to print a
205+
// warning once and not for every single file that was copied.
206+
let mut printed_warning_about_copy = false;
207+
208+
// TODO(T161204758): iterate over all parents and select one that is the closest
209+
// ancestor of each commit in the `copy_from` field.
210+
mut_bcs.file_changes.iter_mut().for_each(|(_p, fc)| {
211+
if let FileChange::Change(tracked_fc) = fc {
212+
// If any FileChange copies a file from a previous revision (e.g. a parent),
213+
// set the `copy_from` field to point to its new parent.
214+
//
215+
// Since we're building a history using all changesets that
216+
// affect the exported directories, any file being copied
217+
// should always exist in the new parent.
218+
//
219+
// If this isn't done, it might not be possible to rewrite the
220+
// commit to the new repo, because the changeset referenced in
221+
// its `copy_from` field will not have been remapped.
222+
if let Some((old_path, copy_from_cs_id)) = tracked_fc.copy_from_mut() {
223+
// If this isn't the first changeset (i.e. that creates the oldest exported
224+
// directory), we need to make sure that file changes that copy from
225+
// previous commits only reference revisions that are also being exported.
224226
//
225-
// If this isn't done, it might not be possible to rewrite the
226-
// commit to the new repo, because the changeset referenced in
227-
// its `copy_from` field will not have been remapped.
228-
if let Some((_p, copy_from_cs_id)) = tracked_fc.copy_from_mut() {
229-
*copy_from_cs_id = new_parent_cs_id.clone();
227+
// We should also make sure that we only reference files that are
228+
// are in the revision that we'll set as the parent.
229+
// If that's not the case, the `copy_from` information will be
230+
// dropped and a warning will be printed to the user so they're
231+
// aware that files in an export directory might have been
232+
// copied/moved from another one.
233+
let old_path = &*old_path; // We need an immutable reference for prefix checks
234+
let should_export = export_paths.iter().any(|p| p.is_prefix_of(old_path));
235+
let new_parent_cs_id = orig_parent_ids.first();
236+
if new_parent_cs_id.is_some() && should_export {
237+
*copy_from_cs_id = new_parent_cs_id.cloned().unwrap();
238+
} else {
239+
// First commit where the export paths were created.
240+
// If the `copy_from` field is set, it means that some files
241+
// were copied from other directories, which might not be
242+
// included in the provided export paths.
243+
//
244+
// By default, these renames won't be followed, but a warning
245+
// will be printed so that the user can check the commit
246+
// and decide if they want to re-run it passing the old
247+
// name as an export path along with this commit as the
248+
// head changeset.
249+
if !printed_warning_about_copy {
250+
warn!(
251+
logger,
252+
"Changeset {0:?} might have created one of the exported paths by moving/copying files from a previous commit that will not be exported (id {1:?}).",
253+
changeset_ctx.id(),
254+
copy_from_cs_id
255+
);
256+
printed_warning_about_copy = true;
257+
};
258+
*tracked_fc = tracked_fc.with_new_copy_from(None);
230259
};
231260
};
232-
});
233-
};
261+
};
262+
});
234263

235264
let rewritten_bcs_mut = rewrite_commit(
236265
source_repo_ctx.ctx(),
@@ -257,6 +286,79 @@ async fn create_bonsai_for_new_repo(
257286
Ok((rewritten_bcs, remapped_parents))
258287
}
259288

289+
/// Builds a vector of references to the paths that should be exported when
290+
/// rewriting the provided changeset based on each export path's head commit.
291+
async fn get_export_paths_for_changeset<'a>(
292+
processed_cs: &ChangesetContext,
293+
export_path_infos: &'a Vec<ExportPathInfo>,
294+
) -> Result<Vec<&'a NonRootMPath>> {
295+
// Get the export paths for the changeset being processed considering its
296+
// head commit.
297+
let export_paths: Vec<&NonRootMPath> = stream::iter(export_path_infos)
298+
.then(|(exp_path, head_cs)| async move {
299+
// If the processed changeset is older than a path's head commit,
300+
// then this path should be exported when rewriting this changeset.
301+
let is_ancestor_of_head_cs = processed_cs.is_ancestor_of(head_cs.id()).await?;
302+
if is_ancestor_of_head_cs {
303+
return anyhow::Ok::<Option<&NonRootMPath>>(Some(exp_path));
304+
}
305+
// Otherwise the changeset is a descendant of this path's head, so
306+
// the path should NOT be exported.
307+
Ok(None)
308+
})
309+
.try_collect::<Vec<_>>()
310+
.await?
311+
.into_iter()
312+
.flatten()
313+
.collect::<Vec<&NonRootMPath>>();
314+
315+
Ok(export_paths)
316+
}
317+
318+
/// The MultiMover closure is called for every path affected by a commit being
319+
/// copied and it should determine if the changes to the path should be
320+
/// included or not in the new commit.
321+
///
322+
/// This function builds the MultiMover for a given changeset, considering what
323+
/// paths should be exported based on their head.
324+
/// This is needed to handle renames of export directories.
325+
/// For details about head changeset, see the docs of `ExportPathInfo`.
326+
///
327+
/// **Example:**
328+
/// `A -> B -> c -> D -> E -> f (master)`
329+
/// A) CREATE: old/foo
330+
/// B) MODIFY: old/foo
331+
/// c) MODIFY: random/file.txt
332+
/// D) RENAME: old/foo → new/foo
333+
/// E) MODIFY: new/foo.txt
334+
/// f) CREATE: old/foo.
335+
///
336+
/// Expected changesets: [A', B', D', E'], because the `old` directory created
337+
/// in `f` should NOT be exported.
338+
///
339+
/// In this case, `export_path_infos` would be `[("new", "f", ("old", "D")]`.
340+
///
341+
fn build_multi_mover_for_changeset<'a>(
342+
logger: &'a Logger,
343+
export_paths: &'a [&'a NonRootMPath],
344+
) -> Result<MultiMover<'a>> {
345+
let multi_mover: MultiMover = Arc::new(
346+
move |source_path: &NonRootMPath| -> Result<Vec<NonRootMPath>> {
347+
let should_export = export_paths.iter().any(|p| p.is_prefix_of(source_path));
348+
349+
if !should_export {
350+
trace!(logger, "Path {:#?} will NOT be exported.", &source_path);
351+
return Ok(vec![]);
352+
}
353+
354+
trace!(logger, "Path {:#?} will be exported.", &source_path);
355+
Ok(vec![source_path.clone()])
356+
},
357+
);
358+
359+
Ok(multi_mover)
360+
}
361+
260362
/// Create a temporary repository to store the changesets that affect the export
261363
/// directories.
262364
/// The temporary repo uses file-backed storage and does not perform any writes

eden/mononoke/git/gitexport/src/gitexport_tools/partial_commit_graph.rs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,18 @@ use slog::Logger;
2525

2626
pub type ChangesetParents = HashMap<ChangesetId, Vec<ChangesetId>>;
2727

28+
/// Represents a path that should be exported until a given changeset, i.e. the
29+
/// HEAD commit for that path.
30+
///
31+
/// When partially copying each relevant changeset to the temporary repo, changes
32+
/// to this path in a given changeset will only be copied if this changeset is
33+
/// an ancestor of the head changeset of that path.
34+
///
35+
/// This head changeset will be used to query the history of the path,
36+
/// i.e. all exported commits that affect this path will be this changeset's
37+
/// ancestor.
38+
pub type ExportPathInfo = (NonRootMPath, ChangesetContext);
39+
2840
#[derive(Debug)]
2941
pub struct GitExportGraphInfo {
3042
pub changesets: Vec<ChangesetContext>,
@@ -38,8 +50,7 @@ pub struct GitExportGraphInfo {
3850
/// and a hashmap of changset id to their parents' ids.
3951
pub async fn build_partial_commit_graph_for_export(
4052
logger: &Logger,
41-
paths: Vec<NonRootMPath>,
42-
cs_ctx: ChangesetContext,
53+
paths: Vec<ExportPathInfo>,
4354
// Consider history until the provided timestamp, i.e. all commits in the
4455
// graph will have its creation time greater than or equal to it.
4556
oldest_commit_ts: Option<i64>,
@@ -53,7 +64,7 @@ pub async fn build_partial_commit_graph_for_export(
5364
};
5465

5566
let history_changesets: Vec<Vec<ChangesetContext>> = stream::iter(paths)
56-
.then(|p| async {
67+
.then(|(p, cs_ctx)| async move {
5768
get_relevant_changesets_for_single_path(p, &cs_ctx, &cs_path_history_options).await
5869
})
5970
.try_collect::<Vec<_>>()

eden/mononoke/git/gitexport/src/main.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,15 @@ async fn async_main(app: MononokeApp) -> Result<(), Error> {
143143
.into_iter()
144144
.map(|p| TryFrom::try_from(p.as_os_str()))
145145
.collect::<Result<Vec<NonRootMPath>>>()?;
146+
// TODO(T164121717): support proper head commits per paths
147+
let export_path_infos: Vec<(NonRootMPath, ChangesetContext)> = export_paths
148+
.into_iter()
149+
.map(|p| (p, cs_ctx.clone()))
150+
.collect();
146151

147152
let graph_info = build_partial_commit_graph_for_export(
148153
logger,
149-
export_paths.clone(),
150-
cs_ctx,
154+
export_path_infos.clone(),
151155
args.oldest_commit_ts,
152156
)
153157
.await?;
@@ -156,7 +160,7 @@ async fn async_main(app: MononokeApp) -> Result<(), Error> {
156160
trace!(logger, "changeset parents: {:?}", &graph_info.parents_map);
157161

158162
let temp_repo_ctx =
159-
rewrite_partial_changesets(app.fb, repo_ctx, graph_info, export_paths).await?;
163+
rewrite_partial_changesets(app.fb, repo_ctx, graph_info, export_path_infos).await?;
160164

161165
let temp_master_csc = temp_repo_ctx
162166
.resolve_bookmark(

0 commit comments

Comments
 (0)