From e448aa37b5b2ee14b2c286f29864960a13a8453e Mon Sep 17 00:00:00 2001 From: Oli Scherer Date: Thu, 30 Sep 2021 19:32:41 +0200 Subject: [PATCH] attributes: help LLVM understand that some spans are never going to do anything (#1600) ## Motivation Adding `#[instrument(level = "debug")]` attributes to functions in rustc caused a performance regression (in release, where `debug!` is fully optimized out) across all crates: https://github.com/rust-lang/rust/pull/89048#issuecomment-929566530 While trying to debug this, I noticed that spans don't have the same advantage that events have wrt to how LLVM sees them. Spans (or more precisely, the enter-guard), will get dropped at the end of the scope, which throws a spanner into the LLVM optimization pipeline. I am not entirely sure where the problem is, but I am moderately certain that the issue is that even entering a dummy span is too much code for LLVM to reliably (or at all) optimize out. ## Solution My hope is that in trusting the Rust compiler to generate cool code when using drop flags, we can essentially generate a drop flag that depends on something we know (due to events working as expected) to be optimizable. So instead of doing ```rust let _x = span!(); let _y = _x.enter(); // lotsa code drop(_y) ``` we do ```rust let _x; let _y; let must_drop = false; if level_enabled!(DEBUG) { must_drop = true; _x = span!(); _y = _x.enter(); } // lotsa code if must_drop { drop(_y) } ``` I believe this will allow LLVM to properly optimize this again. Testing that right now, but I wanted to open this PR immediately for review. --- tracing-attributes/src/lib.rs | 59 ++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/tracing-attributes/src/lib.rs b/tracing-attributes/src/lib.rs index d3630ffbfc..130002417c 100644 --- a/tracing-attributes/src/lib.rs +++ b/tracing-attributes/src/lib.rs @@ -403,6 +403,8 @@ fn gen_block( .map(|name| quote!(#name)) .unwrap_or_else(|| quote!(#instrumented_function_name)); + let level = args.level(); + // generate this inside a closure, so we can return early on errors. let span = (|| { // Pull out the arguments-to-be-skipped first, so we can filter results @@ -448,7 +450,6 @@ fn gen_block( } } - let level = args.level(); let target = args.target(); // filter out skipped fields @@ -515,7 +516,9 @@ fn gen_block( if err { quote_spanned!(block.span()=> let __tracing_attr_span = #span; - tracing::Instrument::instrument(async move { + // See comment on the default case at the end of this function + // for why we do this a bit roundabout. + let fut = async move { match async move { #block }.await { #[allow(clippy::unit_arg)] Ok(x) => Ok(x), @@ -524,22 +527,46 @@ fn gen_block( Err(e) } } - }, __tracing_attr_span).await + }; + if tracing::level_enabled!(#level) { + tracing::Instrument::instrument( + fut, + __tracing_attr_span + ) + .await + } else { + fut.await + } ) } else { quote_spanned!(block.span()=> let __tracing_attr_span = #span; + // See comment on the default case at the end of this function + // for why we do this a bit roundabout. + let fut = async move { #block }; + if tracing::level_enabled!(#level) { tracing::Instrument::instrument( - async move { #block }, + fut, __tracing_attr_span ) .await + } else { + fut.await + } ) } } else if err { quote_spanned!(block.span()=> - let __tracing_attr_span = #span; - let __tracing_attr_guard = __tracing_attr_span.enter(); + // See comment on the default case at the end of this function + // for why we do this a bit roundabout. + let __tracing_attr_span; + let __tracing_attr_guard; + if tracing::level_enabled!(#level) { + __tracing_attr_span = #span; + __tracing_attr_guard = __tracing_attr_span.enter(); + } + // pacify clippy::suspicious_else_formatting + let _ = (); #[allow(clippy::redundant_closure_call)] match (move || #block)() { #[allow(clippy::unit_arg)] @@ -552,8 +579,24 @@ fn gen_block( ) } else { quote_spanned!(block.span()=> - let __tracing_attr_span = #span; - let __tracing_attr_guard = __tracing_attr_span.enter(); + // These variables are left uninitialized and initialized only + // if the tracing level is statically enabled at this point. + // While the tracing level is also checked at span creation + // time, that will still create a dummy span, and a dummy guard + // and drop the dummy guard later. By lazily initializing these + // variables, Rust will generate a drop flag for them and thus + // only drop the guard if it was created. This creates code that + // is very straightforward for LLVM to optimize out if the tracing + // level is statically disabled, while not causing any performance + // regression in case the level is enabled. + let __tracing_attr_span; + let __tracing_attr_guard; + if tracing::level_enabled!(#level) { + __tracing_attr_span = #span; + __tracing_attr_guard = __tracing_attr_span.enter(); + } + // pacify clippy::suspicious_else_formatting + let _ = (); #block ) }