From 19cbb71f23de3577a719d8b8486e6c5d4fc6b994 Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Sat, 30 Mar 2024 15:53:24 -0400 Subject: [PATCH 1/3] Enable -Zshare-generics for inline(never) functions This avoids inlining cross-crate generic items when possible that are already marked inline(never), implying that the author is not intending for the function to be inlined by callers. As such, having a local copy may make it easier for LLVM to optimize but mostly just adds to binary bloat and codegen time. In practice our benchmarks indicate this is indeed a win for larger compilations, where the extra cost in dynamic linking to these symbols is diminished compared to the advantages in fewer copies that need optimizing in each binary. It might also make sense it expand this with other heuristics (e.g., `#[cold]`) in the future, but this seems like a good starting point. --- Cargo.lock | 1 + compiler/rustc_codegen_llvm/src/callee.rs | 4 ++- .../src/back/symbol_export.rs | 12 ++++++- compiler/rustc_middle/src/mir/mono.rs | 7 ++++ compiler/rustc_middle/src/ty/context.rs | 2 -- compiler/rustc_middle/src/ty/instance.rs | 18 ++++++---- compiler/rustc_monomorphize/Cargo.toml | 1 + .../rustc_monomorphize/src/partitioning.rs | 34 ++++++++++++++----- .../auxiliary/cgu_generic_function.rs | 11 ++++++ 9 files changed, 71 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 54b1bf593e0a2..2715c79b017be 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4493,6 +4493,7 @@ dependencies = [ name = "rustc_monomorphize" version = "0.0.0" dependencies = [ + "rustc_attr", "rustc_data_structures", "rustc_errors", "rustc_fluent_macro", diff --git a/compiler/rustc_codegen_llvm/src/callee.rs b/compiler/rustc_codegen_llvm/src/callee.rs index 659c6ae0d8630..beaf2b1ddf1f9 100644 --- a/compiler/rustc_codegen_llvm/src/callee.rs +++ b/compiler/rustc_codegen_llvm/src/callee.rs @@ -113,7 +113,9 @@ pub fn get_fn<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, instance: Instance<'tcx>) -> // This is a monomorphization. Its expected visibility depends // on whether we are in share-generics mode. - if cx.tcx.sess.opts.share_generics() { + if cx.tcx.sess.opts.share_generics() + || tcx.codegen_fn_attrs(instance_def_id).inline == rustc_attr::InlineAttr::Never + { // We are in share_generics mode. if let Some(instance_def_id) = instance_def_id.as_local() { diff --git a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs index a6df8950b3517..3f5f35f7a2e55 100644 --- a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs +++ b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs @@ -308,7 +308,7 @@ fn exported_symbols_provider_local( )); } - if tcx.sess.opts.share_generics() && tcx.local_crate_exports_generics() { + if tcx.local_crate_exports_generics() { use rustc_middle::mir::mono::{Linkage, MonoItem, Visibility}; use rustc_middle::ty::InstanceDef; @@ -336,6 +336,16 @@ fn exported_symbols_provider_local( continue; } + if !tcx.sess.opts.share_generics() { + if tcx.codegen_fn_attrs(mono_item.def_id()).inline == rustc_attr::InlineAttr::Never + { + // this is OK, we explicitly allow sharing inline(never) across crates even + // without share-generics. + } else { + continue; + } + } + match *mono_item { MonoItem::Fn(Instance { def: InstanceDef::Item(def), args }) => { if args.non_erasable_generics(tcx, def).next().is_some() { diff --git a/compiler/rustc_middle/src/mir/mono.rs b/compiler/rustc_middle/src/mir/mono.rs index 3d79ec0092f82..5523d80ceb486 100644 --- a/compiler/rustc_middle/src/mir/mono.rs +++ b/compiler/rustc_middle/src/mir/mono.rs @@ -125,6 +125,13 @@ impl<'tcx> MonoItem<'tcx> { return InstantiationMode::GloballyShared { may_conflict: false }; } + if let InlineAttr::Never = tcx.codegen_fn_attrs(instance.def_id()).inline + && self.is_generic_fn(tcx) + { + // Upgrade inline(never) to a globally shared instance. + return InstantiationMode::GloballyShared { may_conflict: true }; + } + // At this point we don't have explicit linkage and we're an // inlined function. If we're inlining into all CGUs then we'll // be creating a local copy per CGU. diff --git a/compiler/rustc_middle/src/ty/context.rs b/compiler/rustc_middle/src/ty/context.rs index 65d744239a6ab..7c2882720442e 100644 --- a/compiler/rustc_middle/src/ty/context.rs +++ b/compiler/rustc_middle/src/ty/context.rs @@ -1503,8 +1503,6 @@ impl<'tcx> TyCtxt<'tcx> { #[inline] pub fn local_crate_exports_generics(self) -> bool { - debug_assert!(self.sess.opts.share_generics()); - self.crate_types().iter().any(|crate_type| { match crate_type { CrateType::Executable diff --git a/compiler/rustc_middle/src/ty/instance.rs b/compiler/rustc_middle/src/ty/instance.rs index a887047f62f21..c0c0e5c9dad28 100644 --- a/compiler/rustc_middle/src/ty/instance.rs +++ b/compiler/rustc_middle/src/ty/instance.rs @@ -194,19 +194,23 @@ impl<'tcx> Instance<'tcx> { /// This method already takes into account the global `-Zshare-generics` /// setting, always returning `None` if `share-generics` is off. pub fn upstream_monomorphization(&self, tcx: TyCtxt<'tcx>) -> Option { - // If we are not in share generics mode, we don't link to upstream - // monomorphizations but always instantiate our own internal versions - // instead. - if !tcx.sess.opts.share_generics() { - return None; - } - // If this is an item that is defined in the local crate, no upstream // crate can know about it/provide a monomorphization. if self.def_id().is_local() { return None; } + // If we are not in share generics mode, we don't link to upstream + // monomorphizations but always instantiate our own internal versions + // instead. + if !tcx.sess.opts.share_generics() + // However, if the def_id is marked inline(never), then it's fine to just reuse the + // upstream monomorphization. + && tcx.codegen_fn_attrs(self.def_id()).inline != rustc_attr::InlineAttr::Never + { + return None; + } + // If this a non-generic instance, it cannot be a shared monomorphization. self.args.non_erasable_generics(tcx, self.def_id()).next()?; diff --git a/compiler/rustc_monomorphize/Cargo.toml b/compiler/rustc_monomorphize/Cargo.toml index c7f1b9fa78454..4826e28e80df5 100644 --- a/compiler/rustc_monomorphize/Cargo.toml +++ b/compiler/rustc_monomorphize/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" [dependencies] # tidy-alphabetical-start +rustc_attr = { path = "../rustc_attr" } rustc_data_structures = { path = "../rustc_data_structures" } rustc_errors = { path = "../rustc_errors" } rustc_fluent_macro = { path = "../rustc_fluent_macro" } diff --git a/compiler/rustc_monomorphize/src/partitioning.rs b/compiler/rustc_monomorphize/src/partitioning.rs index 336341f4e746d..bfde7c4114ace 100644 --- a/compiler/rustc_monomorphize/src/partitioning.rs +++ b/compiler/rustc_monomorphize/src/partitioning.rs @@ -206,8 +206,8 @@ where // available to downstream crates. This depends on whether we are in // share-generics mode and whether the current crate can even have // downstream crates. - let export_generics = - cx.tcx.sess.opts.share_generics() && cx.tcx.local_crate_exports_generics(); + let can_export_generics = cx.tcx.local_crate_exports_generics(); + let always_export_generics = can_export_generics && cx.tcx.sess.opts.share_generics(); let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx); let cgu_name_cache = &mut UnordMap::default(); @@ -247,7 +247,8 @@ where cx.tcx, &mono_item, &mut can_be_internalized, - export_generics, + can_export_generics, + always_export_generics, ); if visibility == Visibility::Hidden && can_be_internalized { internalization_candidates.insert(mono_item); @@ -734,12 +735,19 @@ fn mono_item_linkage_and_visibility<'tcx>( tcx: TyCtxt<'tcx>, mono_item: &MonoItem<'tcx>, can_be_internalized: &mut bool, - export_generics: bool, + can_export_generics: bool, + always_export_generics: bool, ) -> (Linkage, Visibility) { if let Some(explicit_linkage) = mono_item.explicit_linkage(tcx) { return (explicit_linkage, Visibility::Default); } - let vis = mono_item_visibility(tcx, mono_item, can_be_internalized, export_generics); + let vis = mono_item_visibility( + tcx, + mono_item, + can_be_internalized, + can_export_generics, + always_export_generics, + ); (Linkage::External, vis) } @@ -762,7 +770,8 @@ fn mono_item_visibility<'tcx>( tcx: TyCtxt<'tcx>, mono_item: &MonoItem<'tcx>, can_be_internalized: &mut bool, - export_generics: bool, + can_export_generics: bool, + always_export_generics: bool, ) -> Visibility { let instance = match mono_item { // This is pretty complicated; see below. @@ -822,7 +831,11 @@ fn mono_item_visibility<'tcx>( // Upstream `DefId` instances get different handling than local ones. let Some(def_id) = def_id.as_local() else { - return if export_generics && is_generic { + return if is_generic + && (always_export_generics + || (can_export_generics + && tcx.codegen_fn_attrs(def_id).inline == rustc_attr::InlineAttr::Never)) + { // If it is an upstream monomorphization and we export generics, we must make // it available to downstream crates. *can_be_internalized = false; @@ -833,7 +846,12 @@ fn mono_item_visibility<'tcx>( }; if is_generic { - if export_generics { + // An inline(never) function won't get inlined in upstream crates anyway, so we might as + // well export it. + if always_export_generics + || (can_export_generics + && tcx.codegen_fn_attrs(def_id).inline == rustc_attr::InlineAttr::Never) + { if tcx.is_unreachable_local_definition(def_id) { // This instance cannot be used from another crate. Visibility::Hidden diff --git a/tests/codegen-units/partitioning/auxiliary/cgu_generic_function.rs b/tests/codegen-units/partitioning/auxiliary/cgu_generic_function.rs index 3926f295742bc..8bb78eb788a22 100644 --- a/tests/codegen-units/partitioning/auxiliary/cgu_generic_function.rs +++ b/tests/codegen-units/partitioning/auxiliary/cgu_generic_function.rs @@ -11,10 +11,21 @@ pub fn foo(x: T) -> (T, u32, i8) { #[inline(never)] fn bar(x: T) -> (T, Struct) { let _ = not_exported_and_not_generic(0); + exported_and_generic::(0); (x, Struct(1)) } +pub static F: fn(u32) -> u32 = exported_and_generic::; + // These should not contribute to the codegen items of other crates. + +// This is generic, but it's only instantiated with a u32 argument and that instantiation is present +// in the local crate (see F above). +#[inline(never)] +pub fn exported_and_generic(x: T) -> T { + x +} + #[inline(never)] pub fn exported_but_not_generic(x: i32) -> i64 { x as i64 From 48ca45a11639d79e8c623d2002aa3497cc522d97 Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Tue, 28 May 2024 07:57:36 -0400 Subject: [PATCH 2/3] Fix fallout in tests Keeping in separate commit while we discuss it. --- library/alloc/src/vec/mod.rs | 8 +++++++- library/std/src/panicking.rs | 16 ++++++++++++++++ tests/codegen/avr/avr-func-addrspace.rs | 2 +- tests/codegen/issues/issue-77812.rs | 1 + ...ssue-47429-short-backtraces.legacy.run.stderr | 2 +- tests/ui/panics/runtime-switch.legacy.run.stderr | 2 +- .../short-ice-remove-middle-frames-2.run.stderr | 2 +- .../short-ice-remove-middle-frames.run.stderr | 2 +- 8 files changed, 29 insertions(+), 6 deletions(-) diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index aa9b632cbed9e..7e571c336bea2 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -1036,7 +1036,13 @@ impl Vec { /// ``` #[stable(feature = "try_reserve", since = "1.57.0")] pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { - self.buf.try_reserve(self.len, additional) + let len = self.len; + let ret = self.buf.try_reserve(len, additional); + unsafe { + // Inform the optimier that growing did not change the length. + core::hint::assert_unchecked(self.len == len); + } + ret } /// Tries to reserve the minimum capacity for at least `additional` diff --git a/library/std/src/panicking.rs b/library/std/src/panicking.rs index 5699937cdb49b..8150dd00c8def 100644 --- a/library/std/src/panicking.rs +++ b/library/std/src/panicking.rs @@ -733,6 +733,22 @@ pub const fn begin_panic(msg: M) -> ! { } } +// This forces codegen of the function called by panic!() inside the std crate, rather than in +// downstream crates. Primarily this is useful for rustc's codegen tests, which rely on noticing +// complete removal of panic from generated IR. Since begin_panic is inline(never), it's only +// codegen'd once per crate-graph so this pushes that to std rather than our codegen test crates. +// +// (See https://github.com/rust-lang/rust/pull/123244 for more info on why). +// +// If this is causing problems we can also modify those codegen tests to use a crate type like +// cdylib which doesn't export "Rust" symbols to downstream linkage units. +#[unstable(feature = "libstd_sys_internals", reason = "used by the panic! macro", issue = "none")] +#[doc(hidden)] +#[allow(dead_code)] +#[used] +pub static EMPTY_PANIC: fn(&'static str) -> ! = + begin_panic::<&'static str> as fn(&'static str) -> !; + /// Central point for dispatching panics. /// /// Executes the primary logic for a panic, including checking for recursive diff --git a/tests/codegen/avr/avr-func-addrspace.rs b/tests/codegen/avr/avr-func-addrspace.rs index 2d9efb52c7cfa..d254e0d130487 100644 --- a/tests/codegen/avr/avr-func-addrspace.rs +++ b/tests/codegen/avr/avr-func-addrspace.rs @@ -85,7 +85,7 @@ pub extern "C" fn test() { // A call through the Fn trait must use address space 1. // - // CHECK: call{{.+}}addrspace(1) void @call_through_fn_trait() + // CHECK: call{{.+}}addrspace(1) void @call_through_fn_trait(ptr {{.*}} poison) call_through_fn_trait(&mut update_bar_value); // A call through a global variable must use address space 1. diff --git a/tests/codegen/issues/issue-77812.rs b/tests/codegen/issues/issue-77812.rs index bf84ac21b16d8..d0e0bd93b1a1f 100644 --- a/tests/codegen/issues/issue-77812.rs +++ b/tests/codegen/issues/issue-77812.rs @@ -15,6 +15,7 @@ extern "C" { fn exf2(); } +#[no_mangle] pub static mut GLOBAL: Variant = Variant::Zero; // CHECK-LABEL: @issue_77812 diff --git a/tests/ui/panics/issue-47429-short-backtraces.legacy.run.stderr b/tests/ui/panics/issue-47429-short-backtraces.legacy.run.stderr index dce91ce59e3a1..f458c7acb39fd 100644 --- a/tests/ui/panics/issue-47429-short-backtraces.legacy.run.stderr +++ b/tests/ui/panics/issue-47429-short-backtraces.legacy.run.stderr @@ -1,6 +1,6 @@ thread 'main' panicked at $DIR/issue-47429-short-backtraces.rs:23:5: explicit panic stack backtrace: - 0: std::panicking::begin_panic + 0: std::panicking::begin_panic::<&str> 1: issue_47429_short_backtraces::main note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace. diff --git a/tests/ui/panics/runtime-switch.legacy.run.stderr b/tests/ui/panics/runtime-switch.legacy.run.stderr index bd05b6cc00fb1..2078c356d5cd7 100644 --- a/tests/ui/panics/runtime-switch.legacy.run.stderr +++ b/tests/ui/panics/runtime-switch.legacy.run.stderr @@ -1,6 +1,6 @@ thread 'main' panicked at $DIR/runtime-switch.rs:26:5: explicit panic stack backtrace: - 0: std::panicking::begin_panic + 0: std::panicking::begin_panic::<&str> 1: runtime_switch::main note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace. diff --git a/tests/ui/panics/short-ice-remove-middle-frames-2.run.stderr b/tests/ui/panics/short-ice-remove-middle-frames-2.run.stderr index 2b648a0cad2ea..4bf8907f317e7 100644 --- a/tests/ui/panics/short-ice-remove-middle-frames-2.run.stderr +++ b/tests/ui/panics/short-ice-remove-middle-frames-2.run.stderr @@ -1,7 +1,7 @@ thread 'main' panicked at $DIR/short-ice-remove-middle-frames-2.rs:56:5: debug!!! stack backtrace: - 0: std::panicking::begin_panic + 0: std::panicking::begin_panic::<&str> 1: short_ice_remove_middle_frames_2::eight 2: short_ice_remove_middle_frames_2::seven::{{closure}} [... omitted 3 frames ...] diff --git a/tests/ui/panics/short-ice-remove-middle-frames.run.stderr b/tests/ui/panics/short-ice-remove-middle-frames.run.stderr index 5b37268409679..d05a60aedc607 100644 --- a/tests/ui/panics/short-ice-remove-middle-frames.run.stderr +++ b/tests/ui/panics/short-ice-remove-middle-frames.run.stderr @@ -1,7 +1,7 @@ thread 'main' panicked at $DIR/short-ice-remove-middle-frames.rs:52:5: debug!!! stack backtrace: - 0: std::panicking::begin_panic + 0: std::panicking::begin_panic::<&str> 1: short_ice_remove_middle_frames::seven 2: short_ice_remove_middle_frames::sixth 3: short_ice_remove_middle_frames::fifth::{{closure}} From 74af8bade157f0608861abad3406d7d6592e6850 Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Sun, 9 Jun 2024 14:35:57 -0400 Subject: [PATCH 3/3] WIP: un-inline(never) finish_grow This lets it get codegen'd downstream. --- library/alloc/src/raw_vec.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/library/alloc/src/raw_vec.rs b/library/alloc/src/raw_vec.rs index 1134c7f833e2b..4edb566b7056e 100644 --- a/library/alloc/src/raw_vec.rs +++ b/library/alloc/src/raw_vec.rs @@ -548,7 +548,6 @@ impl RawVec { // above `RawVec::grow_amortized` for details. (The `A` parameter isn't // significant, because the number of different `A` types seen in practice is // much smaller than the number of `T` types.) -#[inline(never)] fn finish_grow( new_layout: Result, current_memory: Option<(NonNull, Layout)>,