Skip to content

Speed up target feature computation #137586

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 11, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions compiler/rustc_codegen_cranelift/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,13 +176,9 @@ impl CodegenBackend for CraneliftCodegenBackend {
}
}

fn target_features_cfg(
&self,
sess: &Session,
_allow_unstable: bool,
) -> Vec<rustc_span::Symbol> {
fn target_features_cfg(&self, sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
// FIXME return the actually used target features. this is necessary for #[cfg(target_feature)]
if sess.target.arch == "x86_64" && sess.target.os != "none" {
let target_features = if sess.target.arch == "x86_64" && sess.target.os != "none" {
// x86_64 mandates SSE2 support and rustc requires the x87 feature to be enabled
vec![sym::fsxr, sym::sse, sym::sse2, Symbol::intern("x87")]
} else if sess.target.arch == "aarch64" {
Expand All @@ -196,7 +192,10 @@ impl CodegenBackend for CraneliftCodegenBackend {
}
} else {
vec![]
}
};
// FIXME do `unstable_target_features` properly
let unstable_target_features = target_features.clone();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not just unstable target features as the name of this variable implies, but all including unstable ones. Maybe rename it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all_target_features and stable_target_features would be clearer names, yes. But Session already has fields called unstable_target_features and target_features, and this is the code that is used to set those fields, so I used those names for consistency. If you think it's important, maybe it should be a follow-up.

(target_features, unstable_target_features)
}

fn print_version(&self) {
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_gcc/src/gcc_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ pub(crate) fn global_gcc_features(sess: &Session, diagnostics: bool) -> Vec<Stri
for feature in sess.opts.cg.target_feature.split(',') {
if let Some(feature) = feature.strip_prefix('+') {
all_rust_features.extend(
UnordSet::from(sess.target.implied_target_features(std::iter::once(feature)))
UnordSet::from(sess.target.implied_target_features(feature))
.to_sorted_stable_ord()
.iter()
.map(|&&s| (true, s)),
Expand Down
68 changes: 37 additions & 31 deletions compiler/rustc_codegen_gcc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -259,8 +259,8 @@ impl CodegenBackend for GccCodegenBackend {
.join(sess)
}

fn target_features_cfg(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
target_features_cfg(sess, allow_unstable, &self.target_info)
fn target_features_cfg(&self, sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
target_features_cfg(sess, &self.target_info)
}
}

Expand Down Expand Up @@ -486,35 +486,41 @@ fn to_gcc_opt_level(optlevel: Option<OptLevel>) -> OptimizationLevel {
/// Returns the features that should be set in `cfg(target_feature)`.
fn target_features_cfg(
sess: &Session,
allow_unstable: bool,
target_info: &LockedTargetInfo,
) -> Vec<Symbol> {
) -> (Vec<Symbol>, Vec<Symbol>) {
// TODO(antoyo): use global_gcc_features.
sess.target
.rust_target_features()
.iter()
.filter_map(|&(feature, gate, _)| {
if allow_unstable
|| (gate.in_cfg() && (sess.is_nightly_build() || gate.requires_nightly().is_none()))
{
Some(feature)
} else {
None
}
})
.filter(|feature| {
// TODO: we disable Neon for now since we don't support the LLVM intrinsics for it.
if *feature == "neon" {
return false;
}
target_info.cpu_supports(feature)
/*
adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512fp16, avx512ifma,
avx512pf, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpopcntdq,
bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, gfni, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm,
sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, vaes, vpclmulqdq, xsave, xsavec, xsaveopt, xsaves
*/
})
.map(Symbol::intern)
.collect()
let f = |allow_unstable| {
sess.target
.rust_target_features()
.iter()
.filter_map(|&(feature, gate, _)| {
if allow_unstable
|| (gate.in_cfg()
&& (sess.is_nightly_build() || gate.requires_nightly().is_none()))
{
Some(feature)
} else {
None
}
})
.filter(|feature| {
// TODO: we disable Neon for now since we don't support the LLVM intrinsics for it.
if *feature == "neon" {
return false;
}
target_info.cpu_supports(feature)
/*
adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512fp16, avx512ifma,
avx512pf, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpopcntdq,
bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, gfni, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm,
sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, vaes, vpclmulqdq, xsave, xsavec, xsaveopt, xsaves
*/
})
.map(Symbol::intern)
.collect()
};

let target_features = f(false);
let unstable_target_features = f(true);
(target_features, unstable_target_features)
}
4 changes: 2 additions & 2 deletions compiler/rustc_codegen_llvm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -338,8 +338,8 @@ impl CodegenBackend for LlvmCodegenBackend {
llvm_util::print_version();
}

fn target_features_cfg(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
target_features_cfg(sess, allow_unstable)
fn target_features_cfg(&self, sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
target_features_cfg(sess)
}

fn codegen_crate<'tcx>(
Expand Down
69 changes: 37 additions & 32 deletions compiler/rustc_codegen_llvm/src/llvm_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -306,18 +306,18 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
/// Must express features in the way Rust understands them.
///
/// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled outside codegen.
pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
pub(crate) fn target_features_cfg(sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
let mut features: FxHashSet<Symbol> = Default::default();

// Add base features for the target.
// We do *not* add the -Ctarget-features there, and instead duplicate the logic for that below.
// The reason is that if LLVM considers a feature implied but we do not, we don't want that to
// show up in `cfg`. That way, `cfg` is entirely under our control -- except for the handling of
// the target CPU, that is still expanded to target features (with all their implied features) by
// LLVM.
// the target CPU, that is still expanded to target features (with all their implied features)
// by LLVM.
let target_machine = create_informational_target_machine(sess, true);
// Compute which of the known target features are enabled in the 'base' target machine.
// We only consider "supported" features; "forbidden" features are not reflected in `cfg` as of now.
// Compute which of the known target features are enabled in the 'base' target machine. We only
// consider "supported" features; "forbidden" features are not reflected in `cfg` as of now.
features.extend(
sess.target
.rust_target_features()
Expand All @@ -331,6 +331,9 @@ pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<S
if let Some(feat) = to_llvm_features(sess, feature) {
for llvm_feature in feat {
let cstr = SmallCStr::new(llvm_feature);
// `LLVMRustHasFeature` is moderately expensive. On targets with many
// features (e.g. x86) these calls take a non-trivial fraction of runtime
// when compiling very small programs.
if !unsafe { llvm::LLVMRustHasFeature(target_machine.raw(), cstr.as_ptr()) }
{
return false;
Expand All @@ -344,7 +347,7 @@ pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<S
.map(|(feature, _, _)| Symbol::intern(feature)),
);

// Add enabled features
// Add enabled and remove disabled features.
for (enabled, feature) in
sess.opts.cg.target_feature.split(',').filter_map(|s| match s.chars().next() {
Some('+') => Some((true, Symbol::intern(&s[1..]))),
Expand All @@ -360,7 +363,7 @@ pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<S
#[allow(rustc::potential_query_instability)]
features.extend(
sess.target
.implied_target_features(std::iter::once(feature.as_str()))
.implied_target_features(feature.as_str())
.iter()
.map(|s| Symbol::intern(s)),
);
Expand All @@ -371,11 +374,7 @@ pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<S
// `features.contains` below.
#[allow(rustc::potential_query_instability)]
features.retain(|f| {
if sess
.target
.implied_target_features(std::iter::once(f.as_str()))
.contains(&feature.as_str())
{
if sess.target.implied_target_features(f.as_str()).contains(&feature.as_str()) {
// If `f` if implies `feature`, then `!feature` implies `!f`, so we have to
// remove `f`. (This is the standard logical contraposition principle.)
false
Expand All @@ -387,25 +386,31 @@ pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<S
}
}

// Filter enabled features based on feature gates
sess.target
.rust_target_features()
.iter()
.filter_map(|(feature, gate, _)| {
// The `allow_unstable` set is used by rustc internally to determined which target
// features are truly available, so we want to return even perma-unstable "forbidden"
// features.
if allow_unstable
|| (gate.in_cfg() && (sess.is_nightly_build() || gate.requires_nightly().is_none()))
{
Some(*feature)
} else {
None
}
})
.filter(|feature| features.contains(&Symbol::intern(feature)))
.map(|feature| Symbol::intern(feature))
.collect()
// Filter enabled features based on feature gates.
let f = |allow_unstable| {
sess.target
.rust_target_features()
.iter()
.filter_map(|(feature, gate, _)| {
// The `allow_unstable` set is used by rustc internally to determined which target
// features are truly available, so we want to return even perma-unstable
// "forbidden" features.
if allow_unstable
|| (gate.in_cfg()
&& (sess.is_nightly_build() || gate.requires_nightly().is_none()))
{
Some(Symbol::intern(feature))
} else {
None
}
})
.filter(|feature| features.contains(&feature))
.collect()
};
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code in this closure is duplicated between all codegen backends. Maybe move it to the caller of target_features_cfg? So just return features from target_features_cfg and then do the splitting between stable and unstable features in the caller.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see it in the cranelift backend.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And if the response is "the cranelift backend should do that filtering", that would also be a good follow-up, because that's a pre-existing issue that is distinct from the one this PR is addressing.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Cranelift backend currently doesn't consider any unstable feature to ever be enabled at all, so filtering has no effect. Once it does support unstable features, yes it should do filtering.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, makes sense.


let target_features = f(false);
let unstable_target_features = f(true);
(target_features, unstable_target_features)
}

pub(crate) fn print_version() {
Expand Down Expand Up @@ -682,7 +687,7 @@ pub(crate) fn global_llvm_features(
for feature in sess.opts.cg.target_feature.split(',') {
if let Some(feature) = feature.strip_prefix('+') {
all_rust_features.extend(
UnordSet::from(sess.target.implied_target_features(std::iter::once(feature)))
UnordSet::from(sess.target.implied_target_features(feature))
.to_sorted_stable_ord()
.iter()
.map(|&&s| (true, s)),
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_ssa/src/target_features.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ pub(crate) fn provide(providers: &mut Providers) {
},
implied_target_features: |tcx, feature: Symbol| {
let feature = feature.as_str();
UnordSet::from(tcx.sess.target.implied_target_features(std::iter::once(feature)))
UnordSet::from(tcx.sess.target.implied_target_features(feature))
.into_sorted_stable_ord()
.into_iter()
.map(|s| Symbol::intern(s))
Expand Down
9 changes: 6 additions & 3 deletions compiler/rustc_codegen_ssa/src/traits/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,13 @@ pub trait CodegenBackend {

fn print(&self, _req: &PrintRequest, _out: &mut String, _sess: &Session) {}

/// Returns the features that should be set in `cfg(target_features)`.
/// Returns two feature sets:
/// - The first has the features that should be set in `cfg(target_features)`.
/// - The second is like the first, but also includes unstable features.
///
/// RUSTC_SPECIFIC_FEATURES should be skipped here, those are handled outside codegen.
fn target_features_cfg(&self, _sess: &Session, _allow_unstable: bool) -> Vec<Symbol> {
vec![]
fn target_features_cfg(&self, _sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
(vec![], vec![])
}

fn print_passes(&self) {}
Expand Down
8 changes: 4 additions & 4 deletions compiler/rustc_interface/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ pub(crate) fn add_configuration(
) {
let tf = sym::target_feature;

let unstable_target_features = codegen_backend.target_features_cfg(sess, true);
sess.unstable_target_features.extend(unstable_target_features.iter().cloned());
let (target_features, unstable_target_features) = codegen_backend.target_features_cfg(sess);

let target_features = codegen_backend.target_features_cfg(sess, false);
sess.target_features.extend(target_features.iter().cloned());
sess.unstable_target_features.extend(unstable_target_features.iter().copied());

sess.target_features.extend(target_features.iter().copied());

cfg.extend(target_features.into_iter().map(|feat| (tf, Some(feat))));

Expand Down
12 changes: 5 additions & 7 deletions compiler/rustc_target/src/target_features.rs
Original file line number Diff line number Diff line change
Expand Up @@ -768,17 +768,15 @@ impl Target {
}
}

pub fn implied_target_features<'a>(
&self,
base_features: impl Iterator<Item = &'a str>,
) -> FxHashSet<&'a str> {
// Note: the returned set includes `base_feature`.
pub fn implied_target_features<'a>(&self, base_feature: &'a str) -> FxHashSet<&'a str> {
let implied_features =
self.rust_target_features().iter().map(|(f, _, i)| (f, i)).collect::<FxHashMap<_, _>>();

// implied target features have their own implied target features, so we traverse the
// map until there are no more features to add
// Implied target features have their own implied target features, so we traverse the
// map until there are no more features to add.
let mut features = FxHashSet::default();
let mut new_features = base_features.collect::<Vec<&str>>();
let mut new_features = vec![base_feature];
while let Some(new_feature) = new_features.pop() {
if features.insert(new_feature) {
if let Some(implied_features) = implied_features.get(&new_feature) {
Expand Down