@@ -113,6 +113,7 @@ use rustc_middle::query::Providers;
113113use rustc_middle:: ty:: print:: { characteristic_def_id_of_type, with_no_trimmed_paths} ;
114114use rustc_middle:: ty:: { self , visit:: TypeVisitableExt , InstanceDef , TyCtxt } ;
115115use rustc_session:: config:: { DumpMonoStatsFormat , SwitchWithOptPath } ;
116+ use rustc_session:: CodegenUnits ;
116117use rustc_span:: symbol:: Symbol ;
117118
118119use crate :: collector:: UsageMap ;
@@ -121,7 +122,6 @@ use crate::errors::{CouldntDumpMonoStats, SymbolAlreadyDefined, UnknownCguCollec
121122
122123struct PartitioningCx < ' a , ' tcx > {
123124 tcx : TyCtxt < ' tcx > ,
124- target_cgu_count : usize ,
125125 usage_map : & ' a UsageMap < ' tcx > ,
126126}
127127
@@ -130,26 +130,30 @@ struct PlacedRootMonoItems<'tcx> {
130130 codegen_units : Vec < CodegenUnit < ' tcx > > ,
131131
132132 internalization_candidates : FxHashSet < MonoItem < ' tcx > > ,
133+
134+ /// These must be obtained when the iterator in `partition` runs. They
135+ /// can't be obtained later because some inlined functions might not be
136+ /// reachable.
137+ unique_inlined_stats : ( usize , usize ) ,
133138}
134139
135140// The output CGUs are sorted by name.
136141fn partition < ' tcx , I > (
137142 tcx : TyCtxt < ' tcx > ,
138143 mono_items : I ,
139- max_cgu_count : usize ,
140144 usage_map : & UsageMap < ' tcx > ,
141145) -> Vec < CodegenUnit < ' tcx > >
142146where
143147 I : Iterator < Item = MonoItem < ' tcx > > ,
144148{
145149 let _prof_timer = tcx. prof . generic_activity ( "cgu_partitioning" ) ;
146150
147- let cx = & PartitioningCx { tcx, target_cgu_count : max_cgu_count , usage_map } ;
151+ let cx = & PartitioningCx { tcx, usage_map } ;
148152
149153 // In the first step, we place all regular monomorphizations into their
150154 // respective 'home' codegen unit. Regular monomorphizations are all
151155 // functions and statics defined in the local crate.
152- let PlacedRootMonoItems { mut codegen_units, internalization_candidates } = {
156+ let PlacedRootMonoItems { mut codegen_units, internalization_candidates, unique_inlined_stats } = {
153157 let _prof_timer = tcx. prof . generic_activity ( "cgu_partitioning_place_roots" ) ;
154158 place_root_mono_items ( cx, mono_items)
155159 } ;
@@ -158,15 +162,15 @@ where
158162 cgu. create_size_estimate ( tcx) ;
159163 }
160164
161- debug_dump ( tcx, "INITIAL PARTITIONING " , & codegen_units) ;
165+ debug_dump ( tcx, "ROOTS " , & codegen_units, unique_inlined_stats ) ;
162166
163167 // Merge until we have at most `max_cgu_count` codegen units.
164168 // `merge_codegen_units` is responsible for updating the CGU size
165169 // estimates.
166170 {
167171 let _prof_timer = tcx. prof . generic_activity ( "cgu_partitioning_merge_cgus" ) ;
168172 merge_codegen_units ( cx, & mut codegen_units) ;
169- debug_dump ( tcx, "POST MERGING " , & codegen_units) ;
173+ debug_dump ( tcx, "MERGE " , & codegen_units, unique_inlined_stats ) ;
170174 }
171175
172176 // In the next step, we use the inlining map to determine which additional
@@ -182,7 +186,7 @@ where
182186 cgu. create_size_estimate ( tcx) ;
183187 }
184188
185- debug_dump ( tcx, "POST INLINING " , & codegen_units) ;
189+ debug_dump ( tcx, "INLINE " , & codegen_units, unique_inlined_stats ) ;
186190
187191 // Next we try to make as many symbols "internal" as possible, so LLVM has
188192 // more freedom to optimize.
@@ -226,7 +230,7 @@ where
226230 // Ensure CGUs are sorted by name, so that we get deterministic results.
227231 assert ! ( codegen_units. is_sorted_by( |a, b| Some ( a. name( ) . as_str( ) . cmp( b. name( ) . as_str( ) ) ) ) ) ;
228232
229- debug_dump ( tcx, "FINAL" , & codegen_units) ;
233+ debug_dump ( tcx, "FINAL" , & codegen_units, unique_inlined_stats ) ;
230234
231235 codegen_units
232236}
@@ -252,10 +256,16 @@ where
252256 let cgu_name_builder = & mut CodegenUnitNameBuilder :: new ( cx. tcx ) ;
253257 let cgu_name_cache = & mut FxHashMap :: default ( ) ;
254258
259+ let mut num_unique_inlined_items = 0 ;
260+ let mut unique_inlined_items_size = 0 ;
255261 for mono_item in mono_items {
256262 match mono_item. instantiation_mode ( cx. tcx ) {
257263 InstantiationMode :: GloballyShared { .. } => { }
258- InstantiationMode :: LocalCopy => continue ,
264+ InstantiationMode :: LocalCopy => {
265+ num_unique_inlined_items += 1 ;
266+ unique_inlined_items_size += mono_item. size_estimate ( cx. tcx ) ;
267+ continue ;
268+ }
259269 }
260270
261271 let characteristic_def_id = characteristic_def_id_of_mono_item ( cx. tcx , mono_item) ;
@@ -300,7 +310,11 @@ where
300310 let mut codegen_units: Vec < _ > = codegen_units. into_values ( ) . collect ( ) ;
301311 codegen_units. sort_by ( |a, b| a. name ( ) . as_str ( ) . cmp ( b. name ( ) . as_str ( ) ) ) ;
302312
303- PlacedRootMonoItems { codegen_units, internalization_candidates }
313+ PlacedRootMonoItems {
314+ codegen_units,
315+ internalization_candidates,
316+ unique_inlined_stats : ( num_unique_inlined_items, unique_inlined_items_size) ,
317+ }
304318}
305319
306320// This function requires the CGUs to be sorted by name on input, and ensures
@@ -309,7 +323,7 @@ fn merge_codegen_units<'tcx>(
309323 cx : & PartitioningCx < ' _ , ' tcx > ,
310324 codegen_units : & mut Vec < CodegenUnit < ' tcx > > ,
311325) {
312- assert ! ( cx. target_cgu_count >= 1 ) ;
326+ assert ! ( cx. tcx . sess . codegen_units ( ) . as_usize ( ) >= 1 ) ;
313327
314328 // A sorted order here ensures merging is deterministic.
315329 assert ! ( codegen_units. is_sorted_by( |a, b| Some ( a. name( ) . as_str( ) . cmp( b. name( ) . as_str( ) ) ) ) ) ;
@@ -318,11 +332,32 @@ fn merge_codegen_units<'tcx>(
318332 let mut cgu_contents: FxHashMap < Symbol , Vec < Symbol > > =
319333 codegen_units. iter ( ) . map ( |cgu| ( cgu. name ( ) , vec ! [ cgu. name( ) ] ) ) . collect ( ) ;
320334
321- // Merge the two smallest codegen units until the target size is
322- // reached.
323- while codegen_units. len ( ) > cx. target_cgu_count {
324- // Sort small cgus to the back
335+ // Having multiple CGUs can drastically speed up compilation. But for
336+ // non-incremental builds, tiny CGUs slow down compilation *and* result in
337+ // worse generated code. So we don't allow CGUs smaller than this (unless
338+ // there is just one CGU, of course). Note that CGU sizes of 100,000+ are
339+ // common in larger programs, so this isn't all that large.
340+ const NON_INCR_MIN_CGU_SIZE : usize = 1000 ;
341+
342+ // Repeatedly merge the two smallest codegen units as long as:
343+ // - we have more CGUs than the upper limit, or
344+ // - (Non-incremental builds only) the user didn't specify a CGU count, and
345+ // there are multiple CGUs, and some are below the minimum size.
346+ //
347+ // The "didn't specify a CGU count" condition is because when an explicit
348+ // count is requested we observe it as closely as possible. For example,
349+ // the `compiler_builtins` crate sets `codegen-units = 10000` and it's
350+ // critical they aren't merged. Also, some tests use explicit small values
351+ // and likewise won't work if small CGUs are merged.
352+ while codegen_units. len ( ) > cx. tcx . sess . codegen_units ( ) . as_usize ( )
353+ || ( cx. tcx . sess . opts . incremental . is_none ( )
354+ && matches ! ( cx. tcx. sess. codegen_units( ) , CodegenUnits :: Default ( _) )
355+ && codegen_units. len ( ) > 1
356+ && codegen_units. iter ( ) . any ( |cgu| cgu. size_estimate ( ) < NON_INCR_MIN_CGU_SIZE ) )
357+ {
358+ // Sort small cgus to the back.
325359 codegen_units. sort_by_cached_key ( |cgu| cmp:: Reverse ( cgu. size_estimate ( ) ) ) ;
360+
326361 let mut smallest = codegen_units. pop ( ) . unwrap ( ) ;
327362 let second_smallest = codegen_units. last_mut ( ) . unwrap ( ) ;
328363
@@ -814,47 +849,147 @@ fn default_visibility(tcx: TyCtxt<'_>, id: DefId, is_generic: bool) -> Visibilit
814849 }
815850}
816851
817- fn debug_dump < ' a , ' tcx : ' a > ( tcx : TyCtxt < ' tcx > , label : & str , cgus : & [ CodegenUnit < ' tcx > ] ) {
852+ fn debug_dump < ' a , ' tcx : ' a > (
853+ tcx : TyCtxt < ' tcx > ,
854+ label : & str ,
855+ cgus : & [ CodegenUnit < ' tcx > ] ,
856+ ( unique_inlined_items, unique_inlined_size) : ( usize , usize ) ,
857+ ) {
818858 let dump = move || {
819859 use std:: fmt:: Write ;
820860
821- let num_cgus = cgus. len ( ) ;
822- let num_items: usize = cgus. iter ( ) . map ( |cgu| cgu. items ( ) . len ( ) ) . sum ( ) ;
823- let total_size: usize = cgus. iter ( ) . map ( |cgu| cgu. size_estimate ( ) ) . sum ( ) ;
824- let max_size = cgus. iter ( ) . map ( |cgu| cgu. size_estimate ( ) ) . max ( ) . unwrap ( ) ;
825- let min_size = cgus. iter ( ) . map ( |cgu| cgu. size_estimate ( ) ) . min ( ) . unwrap ( ) ;
826- let max_min_size_ratio = max_size as f64 / min_size as f64 ;
861+ let mut num_cgus = 0 ;
862+ let mut all_cgu_sizes = Vec :: new ( ) ;
863+
864+ // Note: every unique root item is placed exactly once, so the number
865+ // of unique root items always equals the number of placed root items.
866+
867+ let mut root_items = 0 ;
868+ // unique_inlined_items is passed in above.
869+ let mut placed_inlined_items = 0 ;
870+
871+ let mut root_size = 0 ;
872+ // unique_inlined_size is passed in above.
873+ let mut placed_inlined_size = 0 ;
874+
875+ for cgu in cgus. iter ( ) {
876+ num_cgus += 1 ;
877+ all_cgu_sizes. push ( cgu. size_estimate ( ) ) ;
878+
879+ for ( item, _) in cgu. items ( ) {
880+ match item. instantiation_mode ( tcx) {
881+ InstantiationMode :: GloballyShared { .. } => {
882+ root_items += 1 ;
883+ root_size += item. size_estimate ( tcx) ;
884+ }
885+ InstantiationMode :: LocalCopy => {
886+ placed_inlined_items += 1 ;
887+ placed_inlined_size += item. size_estimate ( tcx) ;
888+ }
889+ }
890+ }
891+ }
892+
893+ all_cgu_sizes. sort_unstable_by_key ( |& n| cmp:: Reverse ( n) ) ;
894+
895+ let unique_items = root_items + unique_inlined_items;
896+ let placed_items = root_items + placed_inlined_items;
897+ let items_ratio = placed_items as f64 / unique_items as f64 ;
898+
899+ let unique_size = root_size + unique_inlined_size;
900+ let placed_size = root_size + placed_inlined_size;
901+ let size_ratio = placed_size as f64 / unique_size as f64 ;
902+
903+ let mean_cgu_size = placed_size as f64 / num_cgus as f64 ;
904+
905+ assert_eq ! ( placed_size, all_cgu_sizes. iter( ) . sum:: <usize >( ) ) ;
827906
828907 let s = & mut String :: new ( ) ;
908+ let _ = writeln ! ( s, "{label}" ) ;
829909 let _ = writeln ! (
830910 s,
831- "{label} ({num_items} items, total_size={total_size}; {num_cgus} CGUs, \
832- max_size={max_size}, min_size={min_size}, max_size/min_size={max_min_size_ratio:.1}):"
911+ "- unique items: {unique_items} ({root_items} root + {unique_inlined_items} inlined), \
912+ unique size: {unique_size} ({root_size} root + {unique_inlined_size} inlined)\n \
913+ - placed items: {placed_items} ({root_items} root + {placed_inlined_items} inlined), \
914+ placed size: {placed_size} ({root_size} root + {placed_inlined_size} inlined)\n \
915+ - placed/unique items ratio: {items_ratio:.2}, \
916+ placed/unique size ratio: {size_ratio:.2}\n \
917+ - CGUs: {num_cgus}, mean size: {mean_cgu_size:.1}, sizes: {}",
918+ list( & all_cgu_sizes) ,
833919 ) ;
920+ let _ = writeln ! ( s) ;
921+
834922 for ( i, cgu) in cgus. iter ( ) . enumerate ( ) {
923+ let name = cgu. name ( ) ;
924+ let size = cgu. size_estimate ( ) ;
835925 let num_items = cgu. items ( ) . len ( ) ;
836- let _ = writeln ! (
837- s,
838- "- CGU[{i}] {} ({num_items} items, size={}):" ,
839- cgu. name( ) ,
840- cgu. size_estimate( )
841- ) ;
926+ let mean_size = size as f64 / num_items as f64 ;
927+
928+ let mut placed_item_sizes: Vec < _ > =
929+ cgu. items ( ) . iter ( ) . map ( |( item, _) | item. size_estimate ( tcx) ) . collect ( ) ;
930+ placed_item_sizes. sort_unstable_by_key ( |& n| cmp:: Reverse ( n) ) ;
931+ let sizes = list ( & placed_item_sizes) ;
932+
933+ let _ = writeln ! ( s, "- CGU[{i}]" ) ;
934+ let _ = writeln ! ( s, " - {name}, size: {size}" ) ;
935+ let _ =
936+ writeln ! ( s, " - items: {num_items}, mean size: {mean_size:.1}, sizes: {sizes}" , ) ;
842937
843938 for ( item, linkage) in cgu. items_in_deterministic_order ( tcx) {
844939 let symbol_name = item. symbol_name ( tcx) . name ;
845940 let symbol_hash_start = symbol_name. rfind ( 'h' ) ;
846941 let symbol_hash = symbol_hash_start. map_or ( "<no hash>" , |i| & symbol_name[ i..] ) ;
847942 let size = item. size_estimate ( tcx) ;
943+ let kind = match item. instantiation_mode ( tcx) {
944+ InstantiationMode :: GloballyShared { .. } => "root" ,
945+ InstantiationMode :: LocalCopy => "inlined" ,
946+ } ;
848947 let _ = with_no_trimmed_paths ! ( writeln!(
849948 s,
850- " - {item} [{linkage:?}] [{symbol_hash}] (size= {size})"
949+ " - {item} [{linkage:?}] [{symbol_hash}] ({kind}, size: {size})"
851950 ) ) ;
852951 }
853952
854953 let _ = writeln ! ( s) ;
855954 }
856955
857- std:: mem:: take ( s)
956+ return std:: mem:: take ( s) ;
957+
958+ // Converts a slice to a string, capturing repetitions to save space.
959+ // E.g. `[4, 4, 4, 3, 2, 1, 1, 1, 1, 1]` -> "[4 (x3), 3, 2, 1 (x5)]".
960+ fn list ( ns : & [ usize ] ) -> String {
961+ let mut v = Vec :: new ( ) ;
962+ if ns. is_empty ( ) {
963+ return "[]" . to_string ( ) ;
964+ }
965+
966+ let mut elem = |curr, curr_count| {
967+ if curr_count == 1 {
968+ v. push ( format ! ( "{curr}" ) ) ;
969+ } else {
970+ v. push ( format ! ( "{curr} (x{curr_count})" ) ) ;
971+ }
972+ } ;
973+
974+ let mut curr = ns[ 0 ] ;
975+ let mut curr_count = 1 ;
976+
977+ for & n in & ns[ 1 ..] {
978+ if n != curr {
979+ elem ( curr, curr_count) ;
980+ curr = n;
981+ curr_count = 1 ;
982+ } else {
983+ curr_count += 1 ;
984+ }
985+ }
986+ elem ( curr, curr_count) ;
987+
988+ let mut s = "[" . to_string ( ) ;
989+ s. push_str ( & v. join ( ", " ) ) ;
990+ s. push_str ( "]" ) ;
991+ s
992+ }
858993 } ;
859994
860995 debug ! ( "{}" , dump( ) ) ;
@@ -922,8 +1057,7 @@ fn collect_and_partition_mono_items(tcx: TyCtxt<'_>, (): ()) -> (&DefIdSet, &[Co
9221057 let ( codegen_units, _) = tcx. sess . time ( "partition_and_assert_distinct_symbols" , || {
9231058 sync:: join (
9241059 || {
925- let mut codegen_units =
926- partition ( tcx, items. iter ( ) . copied ( ) , tcx. sess . codegen_units ( ) , & usage_map) ;
1060+ let mut codegen_units = partition ( tcx, items. iter ( ) . copied ( ) , & usage_map) ;
9271061 codegen_units[ 0 ] . make_primary ( ) ;
9281062 & * tcx. arena . alloc_from_iter ( codegen_units)
9291063 } ,
0 commit comments