@@ -16,14 +16,14 @@ use anyhow::Result;
1616use blobstore:: Loadable ;
1717use blobstore:: PutBehaviour ;
1818use borrowed:: borrowed;
19- use cloned:: cloned;
2019use commit_transformation:: rewrite_commit;
2120use commit_transformation:: upload_commits;
2221use commit_transformation:: MultiMover ;
2322use fbinit:: FacebookInit ;
2423use fileblob:: Fileblob ;
2524use futures:: stream:: TryStreamExt ;
2625use futures:: stream:: { self } ;
26+ use futures:: StreamExt ;
2727use indicatif:: ProgressBar ;
2828use indicatif:: ProgressStyle ;
2929use mononoke_api:: BookmarkKey ;
@@ -42,12 +42,15 @@ use slog::debug;
4242use slog:: error;
4343use slog:: info;
4444use slog:: trace;
45+ use slog:: warn;
4546use slog:: Drain ;
47+ use slog:: Logger ;
4648use sql:: rusqlite:: Connection as SqliteConnection ;
4749use test_repo_factory:: TestRepoFactory ;
4850
4951pub use crate :: partial_commit_graph:: build_partial_commit_graph_for_export;
5052use crate :: partial_commit_graph:: ChangesetParents ;
53+ use crate :: partial_commit_graph:: ExportPathInfo ;
5154pub use crate :: partial_commit_graph:: GitExportGraphInfo ;
5255
5356pub const MASTER_BOOKMARK : & str = "master" ;
@@ -59,7 +62,7 @@ pub async fn rewrite_partial_changesets(
5962 fb : FacebookInit ,
6063 source_repo_ctx : RepoContext ,
6164 graph_info : GitExportGraphInfo ,
62- export_paths : Vec < NonRootMPath > ,
65+ export_paths : Vec < ExportPathInfo > ,
6366) -> Result < RepoContext > {
6467 let ctx: & CoreContext = source_repo_ctx. ctx ( ) ;
6568 let changesets = graph_info. changesets ;
@@ -73,24 +76,6 @@ pub async fn rewrite_partial_changesets(
7376 // Repo that will hold the partial changesets that will be exported to git
7477 let temp_repo_ctx = create_temp_repo ( fb, ctx) . await ?;
7578
76- let logger_clone = logger. clone ( ) ;
77-
78- let multi_mover: MultiMover < ' static > = Arc :: new ( move |source_path : & NonRootMPath | {
79- let should_export = export_paths. iter ( ) . any ( |p| p. is_prefix_of ( source_path) ) ;
80-
81- if !should_export {
82- trace ! (
83- logger_clone,
84- "Path {:#?} will NOT be exported." ,
85- & source_path
86- ) ;
87- return Ok ( vec ! [ ] ) ;
88- }
89-
90- trace ! ( logger_clone, "Path {:#?} will be exported." , & source_path) ;
91- Ok ( vec ! [ source_path. clone( ) ] )
92- } ) ;
93-
9479 let num_changesets = changesets. len ( ) . try_into ( ) . unwrap ( ) ;
9580 let cs_results: Vec < Result < ChangesetContext , MononokeError > > =
9681 changesets. into_iter ( ) . map ( Ok ) . collect :: < Vec < _ > > ( ) ;
@@ -116,14 +101,21 @@ pub async fn rewrite_partial_changesets(
116101 |( mut new_bonsai_changesets, remapped_parents) , changeset| {
117102 borrowed ! ( source_repo_ctx) ;
118103 borrowed ! ( mb_progress_bar) ;
119- cloned ! ( multi_mover) ;
104+ borrowed ! ( export_paths) ;
105+ borrowed ! ( logger) ;
106+
120107 async move {
108+ let export_paths =
109+ get_export_paths_for_changeset ( & changeset, export_paths) . await ?;
110+
111+ let multi_mover = build_multi_mover_for_changeset ( logger, & export_paths) ?;
121112 let ( new_bcs, remapped_parents) = create_bonsai_for_new_repo (
122113 source_repo_ctx,
123114 multi_mover,
124115 changeset_parents,
125116 remapped_parents,
126117 changeset,
118+ & export_paths,
127119 )
128120 . await ?;
129121 new_bonsai_changesets. push ( new_bcs) ;
@@ -171,12 +163,13 @@ pub async fn rewrite_partial_changesets(
171163
172164/// Given a changeset and a set of paths being exported, build the
173165/// BonsaiChangeset containing only the changes to those paths.
174- async fn create_bonsai_for_new_repo (
166+ async fn create_bonsai_for_new_repo < ' a > (
175167 source_repo_ctx : & RepoContext ,
176168 multi_mover : MultiMover < ' _ > ,
177169 changeset_parents : & ChangesetParents ,
178170 mut remapped_parents : HashMap < ChangesetId , ChangesetId > ,
179171 changeset_ctx : ChangesetContext ,
172+ export_paths : & ' a [ & ' a NonRootMPath ] ,
180173) -> Result < ( BonsaiChangeset , HashMap < ChangesetId , ChangesetId > ) , MononokeError > {
181174 let logger = changeset_ctx. repo ( ) . ctx ( ) . logger ( ) ;
182175 trace ! (
@@ -207,30 +200,66 @@ async fn create_bonsai_for_new_repo(
207200 let mut mut_bcs = bcs. into_mut ( ) ;
208201 mut_bcs. parents = orig_parent_ids. clone ( ) ;
209202
210- // If this isn't the first changeset (i.e. that creates the oldest exported
211- // directory), we need to make sure that file changes that copy from
212- // previous commits only reference revisions that are also being exported.
213- if let Some ( new_parent_cs_id) = orig_parent_ids. first ( ) {
214- // TODO(T161204758): iterate over all parents and select one that is the closest
215- // ancestor of each commit in the `copy_from` field.
216- mut_bcs. file_changes . iter_mut ( ) . for_each ( |( _p, fc) | {
217- if let FileChange :: Change ( tracked_fc) = fc {
218- // If any FileChange copies a file from a previous revision (e.g. a parent),
219- // set the `copy_from` field to point to its new parent.
220- //
221- // Since we're building a history using all changesets that
222- // affect the exported directories, any file being copied
223- // should always exist in the new parent.
203+ // If this changeset created an export directory by copying/moving files
204+ // from a directory that's not being exported, we only need to print a
205+ // warning once and not for every single file that was copied.
206+ let mut printed_warning_about_copy = false ;
207+
208+ // TODO(T161204758): iterate over all parents and select one that is the closest
209+ // ancestor of each commit in the `copy_from` field.
210+ mut_bcs. file_changes . iter_mut ( ) . for_each ( |( _p, fc) | {
211+ if let FileChange :: Change ( tracked_fc) = fc {
212+ // If any FileChange copies a file from a previous revision (e.g. a parent),
213+ // set the `copy_from` field to point to its new parent.
214+ //
215+ // Since we're building a history using all changesets that
216+ // affect the exported directories, any file being copied
217+ // should always exist in the new parent.
218+ //
219+ // If this isn't done, it might not be possible to rewrite the
220+ // commit to the new repo, because the changeset referenced in
221+ // its `copy_from` field will not have been remapped.
222+ if let Some ( ( old_path, copy_from_cs_id) ) = tracked_fc. copy_from_mut ( ) {
223+ // If this isn't the first changeset (i.e. that creates the oldest exported
224+ // directory), we need to make sure that file changes that copy from
225+ // previous commits only reference revisions that are also being exported.
224226 //
225- // If this isn't done, it might not be possible to rewrite the
226- // commit to the new repo, because the changeset referenced in
227- // its `copy_from` field will not have been remapped.
228- if let Some ( ( _p, copy_from_cs_id) ) = tracked_fc. copy_from_mut ( ) {
229- * copy_from_cs_id = new_parent_cs_id. clone ( ) ;
227+ // We should also make sure that we only reference files that are
228+ // are in the revision that we'll set as the parent.
229+ // If that's not the case, the `copy_from` information will be
230+ // dropped and a warning will be printed to the user so they're
231+ // aware that files in an export directory might have been
232+ // copied/moved from another one.
233+ let old_path = & * old_path; // We need an immutable reference for prefix checks
234+ let should_export = export_paths. iter ( ) . any ( |p| p. is_prefix_of ( old_path) ) ;
235+ let new_parent_cs_id = orig_parent_ids. first ( ) ;
236+ if new_parent_cs_id. is_some ( ) && should_export {
237+ * copy_from_cs_id = new_parent_cs_id. cloned ( ) . unwrap ( ) ;
238+ } else {
239+ // First commit where the export paths were created.
240+ // If the `copy_from` field is set, it means that some files
241+ // were copied from other directories, which might not be
242+ // included in the provided export paths.
243+ //
244+ // By default, these renames won't be followed, but a warning
245+ // will be printed so that the user can check the commit
246+ // and decide if they want to re-run it passing the old
247+ // name as an export path along with this commit as the
248+ // head changeset.
249+ if !printed_warning_about_copy {
250+ warn ! (
251+ logger,
252+ "Changeset {0:?} might have created one of the exported paths by moving/copying files from a previous commit that will not be exported (id {1:?})." ,
253+ changeset_ctx. id( ) ,
254+ copy_from_cs_id
255+ ) ;
256+ printed_warning_about_copy = true ;
257+ } ;
258+ * tracked_fc = tracked_fc. with_new_copy_from ( None ) ;
230259 } ;
231260 } ;
232- } ) ;
233- } ;
261+ } ;
262+ } ) ;
234263
235264 let rewritten_bcs_mut = rewrite_commit (
236265 source_repo_ctx. ctx ( ) ,
@@ -257,6 +286,79 @@ async fn create_bonsai_for_new_repo(
257286 Ok ( ( rewritten_bcs, remapped_parents) )
258287}
259288
289+ /// Builds a vector of references to the paths that should be exported when
290+ /// rewriting the provided changeset based on each export path's head commit.
291+ async fn get_export_paths_for_changeset < ' a > (
292+ processed_cs : & ChangesetContext ,
293+ export_path_infos : & ' a Vec < ExportPathInfo > ,
294+ ) -> Result < Vec < & ' a NonRootMPath > > {
295+ // Get the export paths for the changeset being processed considering its
296+ // head commit.
297+ let export_paths: Vec < & NonRootMPath > = stream:: iter ( export_path_infos)
298+ . then ( |( exp_path, head_cs) | async move {
299+ // If the processed changeset is older than a path's head commit,
300+ // then this path should be exported when rewriting this changeset.
301+ let is_ancestor_of_head_cs = processed_cs. is_ancestor_of ( head_cs. id ( ) ) . await ?;
302+ if is_ancestor_of_head_cs {
303+ return anyhow:: Ok :: < Option < & NonRootMPath > > ( Some ( exp_path) ) ;
304+ }
305+ // Otherwise the changeset is a descendant of this path's head, so
306+ // the path should NOT be exported.
307+ Ok ( None )
308+ } )
309+ . try_collect :: < Vec < _ > > ( )
310+ . await ?
311+ . into_iter ( )
312+ . flatten ( )
313+ . collect :: < Vec < & NonRootMPath > > ( ) ;
314+
315+ Ok ( export_paths)
316+ }
317+
318+ /// The MultiMover closure is called for every path affected by a commit being
319+ /// copied and it should determine if the changes to the path should be
320+ /// included or not in the new commit.
321+ ///
322+ /// This function builds the MultiMover for a given changeset, considering what
323+ /// paths should be exported based on their head.
324+ /// This is needed to handle renames of export directories.
325+ /// For details about head changeset, see the docs of `ExportPathInfo`.
326+ ///
327+ /// **Example:**
328+ /// `A -> B -> c -> D -> E -> f (master)`
329+ /// A) CREATE: old/foo
330+ /// B) MODIFY: old/foo
331+ /// c) MODIFY: random/file.txt
332+ /// D) RENAME: old/foo → new/foo
333+ /// E) MODIFY: new/foo.txt
334+ /// f) CREATE: old/foo.
335+ ///
336+ /// Expected changesets: [A', B', D', E'], because the `old` directory created
337+ /// in `f` should NOT be exported.
338+ ///
339+ /// In this case, `export_path_infos` would be `[("new", "f", ("old", "D")]`.
340+ ///
341+ fn build_multi_mover_for_changeset < ' a > (
342+ logger : & ' a Logger ,
343+ export_paths : & ' a [ & ' a NonRootMPath ] ,
344+ ) -> Result < MultiMover < ' a > > {
345+ let multi_mover: MultiMover = Arc :: new (
346+ move |source_path : & NonRootMPath | -> Result < Vec < NonRootMPath > > {
347+ let should_export = export_paths. iter ( ) . any ( |p| p. is_prefix_of ( source_path) ) ;
348+
349+ if !should_export {
350+ trace ! ( logger, "Path {:#?} will NOT be exported." , & source_path) ;
351+ return Ok ( vec ! [ ] ) ;
352+ }
353+
354+ trace ! ( logger, "Path {:#?} will be exported." , & source_path) ;
355+ Ok ( vec ! [ source_path. clone( ) ] )
356+ } ,
357+ ) ;
358+
359+ Ok ( multi_mover)
360+ }
361+
260362/// Create a temporary repository to store the changesets that affect the export
261363/// directories.
262364/// The temporary repo uses file-backed storage and does not perform any writes
0 commit comments