Auto merge of #127516 - nnethercote:simplify-LazyAttrTokenStream, r=<try>

bors · bors · commit c0bb39c8e8f3 · 2024-07-09T12:35:25.000Z
Simplify `LazyAttrTokenStream`

`LazyAttrTokenStream` is an unpleasant type: `Lrc&lt;Box&lt;dyn ToAttrTokenStream&gt;&gt;`. Why does it look like that?
- There are two `ToAttrTokenStream` impls, one for the lazy case, and one for the case where we already have an `AttrTokenStream`.
- The lazy case (`LazyAttrTokenStreamImpl`) is implemented in `rustc_parse`, but `LazyAttrTokenStream` is defined in `rustc_ast`, which does not depend on `rustc_parse`. The use of the trait lets `rustc_ast` implicitly depend on `rustc_parse`. This explains the `dyn`.
- `LazyAttrTokenStream` must have a `size_of` as small as possible, because it's used in many AST nodes. This explains the `Lrc&lt;Box&lt;_&gt;&gt;`, which keeps it to one word. (It's required `Lrc&lt;dyn _&gt;` would be a fat pointer.)

This PR moves `LazyAttrTokenStreamImpl` (and a few other token stream things) from `rustc_parse` to `rustc_ast`. This lets us replace the `ToAttrTokenStream` trait with a two-variant enum and also remove the `Box`, changing `LazyAttrTokenStream` to `Lrc&lt;LazyAttrTokenStreamInner&gt;`. Plus it does a few cleanups.

r? `@petrochenkov`
diff --git a/compiler/rustc_ast/src/lib.rs b/compiler/rustc_ast/src/lib.rs
@@ -11,6 +11,7 @@
     test(attr(deny(warnings)))
 )]
 #![doc(rust_logo)]
+#![feature(array_windows)]
 #![feature(associated_type_defaults)]
 #![feature(box_patterns)]
 #![feature(if_let_guard)]
diff --git a/compiler/rustc_ast/src/mut_visit.rs b/compiler/rustc_ast/src/mut_visit.rs
@@ -744,7 +744,7 @@ fn visit_lazy_tts_opt_mut<T: MutVisitor>(lazy_tts: Option<&mut LazyAttrTokenStre
         if let Some(lazy_tts) = lazy_tts {
             let mut tts = lazy_tts.to_attr_token_stream();
             visit_attr_tts(&mut tts, vis);
-            *lazy_tts = LazyAttrTokenStream::new(tts);
+            *lazy_tts = LazyAttrTokenStream::new_direct(tts);
         }
     }
 }
diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs
diff --git a/compiler/rustc_expand/src/config.rs b/compiler/rustc_expand/src/config.rs
@@ -160,7 +160,7 @@ impl<'a> StripUnconfigured<'a> {
         if self.config_tokens {
             if let Some(Some(tokens)) = node.tokens_mut() {
                 let attr_stream = tokens.to_attr_token_stream();
-                *tokens = LazyAttrTokenStream::new(self.configure_tokens(&attr_stream));
+                *tokens = LazyAttrTokenStream::new_direct(self.configure_tokens(&attr_stream));
             }
         }
     }
@@ -190,7 +190,7 @@ impl<'a> StripUnconfigured<'a> {
                     target.attrs.flat_map_in_place(|attr| self.process_cfg_attr(&attr));
 
                     if self.in_cfg(&target.attrs) {
-                        target.tokens = LazyAttrTokenStream::new(
+                        target.tokens = LazyAttrTokenStream::new_direct(
                             self.configure_tokens(&target.tokens.to_attr_token_stream()),
                         );
                         Some(AttrTokenTree::AttrsTarget(target))
@@ -335,7 +335,7 @@ impl<'a> StripUnconfigured<'a> {
         } else {
             vec![AttrTokenTree::Token(pound_token, Spacing::JointHidden), bracket_group]
         };
-        let tokens = Some(LazyAttrTokenStream::new(AttrTokenStream::new(trees)));
+        let tokens = Some(LazyAttrTokenStream::new_direct(AttrTokenStream::new(trees)));
         let attr = attr::mk_attr_from_item(
             &self.sess.psess.attr_id_generator,
             item,
diff --git a/compiler/rustc_parse/src/lib.rs b/compiler/rustc_parse/src/lib.rs
@@ -4,7 +4,6 @@
 #![allow(internal_features)]
 #![allow(rustc::diagnostic_outside_of_impl)]
 #![allow(rustc::untranslatable_diagnostic)]
-#![feature(array_windows)]
 #![feature(box_patterns)]
 #![feature(debug_closure_helpers)]
 #![feature(if_let_guard)]
diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs
@@ -1,14 +1,13 @@
-use super::{Capturing, FlatToken, ForceCollect, Parser, ReplaceRange, TokenCursor, TrailingToken};
-use rustc_ast::token::{self, Delimiter, Token, TokenKind};
-use rustc_ast::tokenstream::{AttrTokenStream, AttrTokenTree, AttrsTarget, DelimSpacing};
-use rustc_ast::tokenstream::{DelimSpan, LazyAttrTokenStream, Spacing, ToAttrTokenStream};
+use super::{Capturing, ForceCollect, Parser, TrailingToken};
+use rustc_ast::token;
+use rustc_ast::tokenstream::{AttrsTarget, LazyAttrTokenStream, ReplaceRange};
 use rustc_ast::{self as ast};
 use rustc_ast::{AttrVec, Attribute, HasAttrs, HasTokens};
 use rustc_errors::PResult;
 use rustc_session::parse::ParseSess;
-use rustc_span::{sym, Span, DUMMY_SP};
+use rustc_span::{sym, DUMMY_SP};
 
-use std::{iter, mem};
+use std::mem;
 
 /// A wrapper type to ensure that the parser handles outer attributes correctly.
 /// When we parse outer attributes, we need to ensure that we capture tokens
@@ -76,98 +75,6 @@ fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool {
     })
 }
 
-// Produces a `TokenStream` on-demand. Using `cursor_snapshot`
-// and `num_calls`, we can reconstruct the `TokenStream` seen
-// by the callback. This allows us to avoid producing a `TokenStream`
-// if it is never needed - for example, a captured `macro_rules!`
-// argument that is never passed to a proc macro.
-// In practice token stream creation happens rarely compared to
-// calls to `collect_tokens` (see some statistics in #78736),
-// so we are doing as little up-front work as possible.
-//
-// This also makes `Parser` very cheap to clone, since
-// there is no intermediate collection buffer to clone.
-struct LazyAttrTokenStreamImpl {
-    start_token: (Token, Spacing),
-    cursor_snapshot: TokenCursor,
-    num_calls: u32,
-    break_last_token: bool,
-    replace_ranges: Box<[ReplaceRange]>,
-}
-
-impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
-    fn to_attr_token_stream(&self) -> AttrTokenStream {
-        // The token produced by the final call to `{,inlined_}next` was not
-        // actually consumed by the callback. The combination of chaining the
-        // initial token and using `take` produces the desired result - we
-        // produce an empty `TokenStream` if no calls were made, and omit the
-        // final token otherwise.
-        let mut cursor_snapshot = self.cursor_snapshot.clone();
-        let tokens = iter::once((FlatToken::Token(self.start_token.0.clone()), self.start_token.1))
-            .chain(iter::repeat_with(|| {
-                let token = cursor_snapshot.next();
-                (FlatToken::Token(token.0), token.1)
-            }))
-            .take(self.num_calls as usize);
-
-        if self.replace_ranges.is_empty() {
-            make_attr_token_stream(tokens, self.break_last_token)
-        } else {
-            let mut tokens: Vec<_> = tokens.collect();
-            let mut replace_ranges = self.replace_ranges.to_vec();
-            replace_ranges.sort_by_key(|(range, _)| range.start);
-
-            #[cfg(debug_assertions)]
-            {
-                for [(range, tokens), (next_range, next_tokens)] in replace_ranges.array_windows() {
-                    assert!(
-                        range.end <= next_range.start || range.end >= next_range.end,
-                        "Replace ranges should either be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
-                        range,
-                        tokens,
-                        next_range,
-                        next_tokens,
-                    );
-                }
-            }
-
-            // Process the replace ranges, starting from the highest start
-            // position and working our way back. If have tokens like:
-            //
-            // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
-            //
-            // Then we will generate replace ranges for both
-            // the `#[cfg(FALSE)] field: bool` and the entire
-            // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
-            //
-            // By starting processing from the replace range with the greatest
-            // start position, we ensure that any replace range which encloses
-            // another replace range will capture the *replaced* tokens for the inner
-            // range, not the original tokens.
-            for (range, target) in replace_ranges.into_iter().rev() {
-                assert!(!range.is_empty(), "Cannot replace an empty range: {range:?}");
-
-                // Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s, plus
-                // enough `FlatToken::Empty`s to fill up the rest of the range. This keeps the
-                // total length of `tokens` constant throughout the replacement process, allowing
-                // us to use all of the `ReplaceRanges` entries without adjusting indices.
-                let target_len = target.is_some() as usize;
-                tokens.splice(
-                    (range.start as usize)..(range.end as usize),
-                    target
-                        .into_iter()
-                        .map(|target| (FlatToken::AttrsTarget(target), Spacing::Alone))
-                        .chain(
-                            iter::repeat((FlatToken::Empty, Spacing::Alone))
-                                .take(range.len() - target_len),
-                        ),
-                );
-            }
-            make_attr_token_stream(tokens.into_iter(), self.break_last_token)
-        }
-    }
-}
-
 impl<'a> Parser<'a> {
     /// Records all tokens consumed by the provided callback,
     /// including the current token. These tokens are collected
@@ -317,20 +224,17 @@ impl<'a> Parser<'a> {
                 .collect()
         };
 
-        let tokens = LazyAttrTokenStream::new(LazyAttrTokenStreamImpl {
+        let tokens = LazyAttrTokenStream::new_pending(
             start_token,
-            num_calls,
             cursor_snapshot,
-            break_last_token: self.break_last_token,
+            num_calls,
+            self.break_last_token,
             replace_ranges,
-        });
+        );
 
-        // If we support tokens at all
-        if let Some(target_tokens) = ret.tokens_mut() {
-            if target_tokens.is_none() {
-                // Store our newly captured tokens into the AST node.
-                *target_tokens = Some(tokens.clone());
-            }
+        // If we support tokens and don't already have them, store the newly captured tokens.
+        if let Some(target_tokens @ None) = ret.tokens_mut() {
+            *target_tokens = Some(tokens.clone());
         }
 
         let final_attrs = ret.attrs();
@@ -366,88 +270,12 @@ impl<'a> Parser<'a> {
     }
 }
 
-/// Converts a flattened iterator of tokens (including open and close delimiter tokens) into an
-/// `AttrTokenStream`, creating an `AttrTokenTree::Delimited` for each matching pair of open and
-/// close delims.
-fn make_attr_token_stream(
-    mut iter: impl Iterator<Item = (FlatToken, Spacing)>,
-    break_last_token: bool,
-) -> AttrTokenStream {
-    #[derive(Debug)]
-    struct FrameData {
-        // This is `None` for the first frame, `Some` for all others.
-        open_delim_sp: Option<(Delimiter, Span, Spacing)>,
-        inner: Vec<AttrTokenTree>,
-    }
-    let mut stack = vec![FrameData { open_delim_sp: None, inner: vec![] }];
-    let mut token_and_spacing = iter.next();
-    while let Some((token, spacing)) = token_and_spacing {
-        match token {
-            FlatToken::Token(Token { kind: TokenKind::OpenDelim(delim), span }) => {
-                stack
-                    .push(FrameData { open_delim_sp: Some((delim, span, spacing)), inner: vec![] });
-            }
-            FlatToken::Token(Token { kind: TokenKind::CloseDelim(delim), span }) => {
-                let frame_data = stack
-                    .pop()
-                    .unwrap_or_else(|| panic!("Token stack was empty for token: {token:?}"));
-
-                let (open_delim, open_sp, open_spacing) = frame_data.open_delim_sp.unwrap();
-                assert_eq!(
-                    open_delim, delim,
-                    "Mismatched open/close delims: open={open_delim:?} close={span:?}"
-                );
-                let dspan = DelimSpan::from_pair(open_sp, span);
-                let dspacing = DelimSpacing::new(open_spacing, spacing);
-                let stream = AttrTokenStream::new(frame_data.inner);
-                let delimited = AttrTokenTree::Delimited(dspan, dspacing, delim, stream);
-                stack
-                    .last_mut()
-                    .unwrap_or_else(|| panic!("Bottom token frame is missing for token: {token:?}"))
-                    .inner
-                    .push(delimited);
-            }
-            FlatToken::Token(token) => stack
-                .last_mut()
-                .expect("Bottom token frame is missing!")
-                .inner
-                .push(AttrTokenTree::Token(token, spacing)),
-            FlatToken::AttrsTarget(target) => stack
-                .last_mut()
-                .expect("Bottom token frame is missing!")
-                .inner
-                .push(AttrTokenTree::AttrsTarget(target)),
-            FlatToken::Empty => {}
-        }
-        token_and_spacing = iter.next();
-    }
-    let mut final_buf = stack.pop().expect("Missing final buf!");
-    if break_last_token {
-        let last_token = final_buf.inner.pop().unwrap();
-        if let AttrTokenTree::Token(last_token, spacing) = last_token {
-            let unglued_first = last_token.kind.break_two_token_op().unwrap().0;
-
-            // An 'unglued' token is always two ASCII characters
-            let mut first_span = last_token.span.shrink_to_lo();
-            first_span = first_span.with_hi(first_span.lo() + rustc_span::BytePos(1));
-
-            final_buf
-                .inner
-                .push(AttrTokenTree::Token(Token::new(unglued_first, first_span), spacing));
-        } else {
-            panic!("Unexpected last token {last_token:?}")
-        }
-    }
-    AttrTokenStream::new(final_buf.inner)
-}
-
 // Some types are used a lot. Make sure they don't unintentionally get bigger.
 #[cfg(target_pointer_width = "64")]
 mod size_asserts {
     use super::*;
     use rustc_data_structures::static_assert_size;
     // tidy-alphabetical-start
     static_assert_size!(AttrWrapper, 16);
-    static_assert_size!(LazyAttrTokenStreamImpl, 96);
     // tidy-alphabetical-end
 }
diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs

Original file line number	Diff line number	Diff line change
`@@ -744,7 +744,7 @@ fn visit_lazy_tts_opt_mut<T: MutVisitor>(lazy_tts: Option<&mut LazyAttrTokenStre`
`744`	`744`	`if let Some(lazy_tts) = lazy_tts {`
`745`	`745`	`let mut tts = lazy_tts.to_attr_token_stream();`
`746`	`746`	`visit_attr_tts(&mut tts, vis);`
`747`		`- *lazy_tts = LazyAttrTokenStream::new(tts);`
	`747`	`+ *lazy_tts = LazyAttrTokenStream::new_direct(tts);`
`748`	`748`	`}`
`749`	`749`	`}`
`750`	`750`	`}`