Skip to content

Commit c0bb39c

Browse files
committed
Auto merge of #127516 - nnethercote:simplify-LazyAttrTokenStream, r=<try>
Simplify `LazyAttrTokenStream` `LazyAttrTokenStream` is an unpleasant type: `Lrc<Box<dyn ToAttrTokenStream>>`. Why does it look like that? - There are two `ToAttrTokenStream` impls, one for the lazy case, and one for the case where we already have an `AttrTokenStream`. - The lazy case (`LazyAttrTokenStreamImpl`) is implemented in `rustc_parse`, but `LazyAttrTokenStream` is defined in `rustc_ast`, which does not depend on `rustc_parse`. The use of the trait lets `rustc_ast` implicitly depend on `rustc_parse`. This explains the `dyn`. - `LazyAttrTokenStream` must have a `size_of` as small as possible, because it's used in many AST nodes. This explains the `Lrc<Box<_>>`, which keeps it to one word. (It's required `Lrc<dyn _>` would be a fat pointer.) This PR moves `LazyAttrTokenStreamImpl` (and a few other token stream things) from `rustc_parse` to `rustc_ast`. This lets us replace the `ToAttrTokenStream` trait with a two-variant enum and also remove the `Box`, changing `LazyAttrTokenStream` to `Lrc<LazyAttrTokenStreamInner>`. Plus it does a few cleanups. r? `@petrochenkov`
2 parents 8672b2b + 54c1d40 commit c0bb39c

File tree

7 files changed

+327
-324
lines changed

7 files changed

+327
-324
lines changed

compiler/rustc_ast/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
test(attr(deny(warnings)))
1212
)]
1313
#![doc(rust_logo)]
14+
#![feature(array_windows)]
1415
#![feature(associated_type_defaults)]
1516
#![feature(box_patterns)]
1617
#![feature(if_let_guard)]

compiler/rustc_ast/src/mut_visit.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -744,7 +744,7 @@ fn visit_lazy_tts_opt_mut<T: MutVisitor>(lazy_tts: Option<&mut LazyAttrTokenStre
744744
if let Some(lazy_tts) = lazy_tts {
745745
let mut tts = lazy_tts.to_attr_token_stream();
746746
visit_attr_tts(&mut tts, vis);
747-
*lazy_tts = LazyAttrTokenStream::new(tts);
747+
*lazy_tts = LazyAttrTokenStream::new_direct(tts);
748748
}
749749
}
750750
}

compiler/rustc_ast/src/tokenstream.rs

+307-23
Large diffs are not rendered by default.

compiler/rustc_expand/src/config.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ impl<'a> StripUnconfigured<'a> {
160160
if self.config_tokens {
161161
if let Some(Some(tokens)) = node.tokens_mut() {
162162
let attr_stream = tokens.to_attr_token_stream();
163-
*tokens = LazyAttrTokenStream::new(self.configure_tokens(&attr_stream));
163+
*tokens = LazyAttrTokenStream::new_direct(self.configure_tokens(&attr_stream));
164164
}
165165
}
166166
}
@@ -190,7 +190,7 @@ impl<'a> StripUnconfigured<'a> {
190190
target.attrs.flat_map_in_place(|attr| self.process_cfg_attr(&attr));
191191

192192
if self.in_cfg(&target.attrs) {
193-
target.tokens = LazyAttrTokenStream::new(
193+
target.tokens = LazyAttrTokenStream::new_direct(
194194
self.configure_tokens(&target.tokens.to_attr_token_stream()),
195195
);
196196
Some(AttrTokenTree::AttrsTarget(target))
@@ -335,7 +335,7 @@ impl<'a> StripUnconfigured<'a> {
335335
} else {
336336
vec![AttrTokenTree::Token(pound_token, Spacing::JointHidden), bracket_group]
337337
};
338-
let tokens = Some(LazyAttrTokenStream::new(AttrTokenStream::new(trees)));
338+
let tokens = Some(LazyAttrTokenStream::new_direct(AttrTokenStream::new(trees)));
339339
let attr = attr::mk_attr_from_item(
340340
&self.sess.psess.attr_id_generator,
341341
item,

compiler/rustc_parse/src/lib.rs

-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
#![allow(internal_features)]
55
#![allow(rustc::diagnostic_outside_of_impl)]
66
#![allow(rustc::untranslatable_diagnostic)]
7-
#![feature(array_windows)]
87
#![feature(box_patterns)]
98
#![feature(debug_closure_helpers)]
109
#![feature(if_let_guard)]

compiler/rustc_parse/src/parser/attr_wrapper.rs

+12-184
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
1-
use super::{Capturing, FlatToken, ForceCollect, Parser, ReplaceRange, TokenCursor, TrailingToken};
2-
use rustc_ast::token::{self, Delimiter, Token, TokenKind};
3-
use rustc_ast::tokenstream::{AttrTokenStream, AttrTokenTree, AttrsTarget, DelimSpacing};
4-
use rustc_ast::tokenstream::{DelimSpan, LazyAttrTokenStream, Spacing, ToAttrTokenStream};
1+
use super::{Capturing, ForceCollect, Parser, TrailingToken};
2+
use rustc_ast::token;
3+
use rustc_ast::tokenstream::{AttrsTarget, LazyAttrTokenStream, ReplaceRange};
54
use rustc_ast::{self as ast};
65
use rustc_ast::{AttrVec, Attribute, HasAttrs, HasTokens};
76
use rustc_errors::PResult;
87
use rustc_session::parse::ParseSess;
9-
use rustc_span::{sym, Span, DUMMY_SP};
8+
use rustc_span::{sym, DUMMY_SP};
109

11-
use std::{iter, mem};
10+
use std::mem;
1211

1312
/// A wrapper type to ensure that the parser handles outer attributes correctly.
1413
/// When we parse outer attributes, we need to ensure that we capture tokens
@@ -76,98 +75,6 @@ fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool {
7675
})
7776
}
7877

79-
// Produces a `TokenStream` on-demand. Using `cursor_snapshot`
80-
// and `num_calls`, we can reconstruct the `TokenStream` seen
81-
// by the callback. This allows us to avoid producing a `TokenStream`
82-
// if it is never needed - for example, a captured `macro_rules!`
83-
// argument that is never passed to a proc macro.
84-
// In practice token stream creation happens rarely compared to
85-
// calls to `collect_tokens` (see some statistics in #78736),
86-
// so we are doing as little up-front work as possible.
87-
//
88-
// This also makes `Parser` very cheap to clone, since
89-
// there is no intermediate collection buffer to clone.
90-
struct LazyAttrTokenStreamImpl {
91-
start_token: (Token, Spacing),
92-
cursor_snapshot: TokenCursor,
93-
num_calls: u32,
94-
break_last_token: bool,
95-
replace_ranges: Box<[ReplaceRange]>,
96-
}
97-
98-
impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
99-
fn to_attr_token_stream(&self) -> AttrTokenStream {
100-
// The token produced by the final call to `{,inlined_}next` was not
101-
// actually consumed by the callback. The combination of chaining the
102-
// initial token and using `take` produces the desired result - we
103-
// produce an empty `TokenStream` if no calls were made, and omit the
104-
// final token otherwise.
105-
let mut cursor_snapshot = self.cursor_snapshot.clone();
106-
let tokens = iter::once((FlatToken::Token(self.start_token.0.clone()), self.start_token.1))
107-
.chain(iter::repeat_with(|| {
108-
let token = cursor_snapshot.next();
109-
(FlatToken::Token(token.0), token.1)
110-
}))
111-
.take(self.num_calls as usize);
112-
113-
if self.replace_ranges.is_empty() {
114-
make_attr_token_stream(tokens, self.break_last_token)
115-
} else {
116-
let mut tokens: Vec<_> = tokens.collect();
117-
let mut replace_ranges = self.replace_ranges.to_vec();
118-
replace_ranges.sort_by_key(|(range, _)| range.start);
119-
120-
#[cfg(debug_assertions)]
121-
{
122-
for [(range, tokens), (next_range, next_tokens)] in replace_ranges.array_windows() {
123-
assert!(
124-
range.end <= next_range.start || range.end >= next_range.end,
125-
"Replace ranges should either be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
126-
range,
127-
tokens,
128-
next_range,
129-
next_tokens,
130-
);
131-
}
132-
}
133-
134-
// Process the replace ranges, starting from the highest start
135-
// position and working our way back. If have tokens like:
136-
//
137-
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
138-
//
139-
// Then we will generate replace ranges for both
140-
// the `#[cfg(FALSE)] field: bool` and the entire
141-
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
142-
//
143-
// By starting processing from the replace range with the greatest
144-
// start position, we ensure that any replace range which encloses
145-
// another replace range will capture the *replaced* tokens for the inner
146-
// range, not the original tokens.
147-
for (range, target) in replace_ranges.into_iter().rev() {
148-
assert!(!range.is_empty(), "Cannot replace an empty range: {range:?}");
149-
150-
// Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s, plus
151-
// enough `FlatToken::Empty`s to fill up the rest of the range. This keeps the
152-
// total length of `tokens` constant throughout the replacement process, allowing
153-
// us to use all of the `ReplaceRanges` entries without adjusting indices.
154-
let target_len = target.is_some() as usize;
155-
tokens.splice(
156-
(range.start as usize)..(range.end as usize),
157-
target
158-
.into_iter()
159-
.map(|target| (FlatToken::AttrsTarget(target), Spacing::Alone))
160-
.chain(
161-
iter::repeat((FlatToken::Empty, Spacing::Alone))
162-
.take(range.len() - target_len),
163-
),
164-
);
165-
}
166-
make_attr_token_stream(tokens.into_iter(), self.break_last_token)
167-
}
168-
}
169-
}
170-
17178
impl<'a> Parser<'a> {
17279
/// Records all tokens consumed by the provided callback,
17380
/// including the current token. These tokens are collected
@@ -317,20 +224,17 @@ impl<'a> Parser<'a> {
317224
.collect()
318225
};
319226

320-
let tokens = LazyAttrTokenStream::new(LazyAttrTokenStreamImpl {
227+
let tokens = LazyAttrTokenStream::new_pending(
321228
start_token,
322-
num_calls,
323229
cursor_snapshot,
324-
break_last_token: self.break_last_token,
230+
num_calls,
231+
self.break_last_token,
325232
replace_ranges,
326-
});
233+
);
327234

328-
// If we support tokens at all
329-
if let Some(target_tokens) = ret.tokens_mut() {
330-
if target_tokens.is_none() {
331-
// Store our newly captured tokens into the AST node.
332-
*target_tokens = Some(tokens.clone());
333-
}
235+
// If we support tokens and don't already have them, store the newly captured tokens.
236+
if let Some(target_tokens @ None) = ret.tokens_mut() {
237+
*target_tokens = Some(tokens.clone());
334238
}
335239

336240
let final_attrs = ret.attrs();
@@ -366,88 +270,12 @@ impl<'a> Parser<'a> {
366270
}
367271
}
368272

369-
/// Converts a flattened iterator of tokens (including open and close delimiter tokens) into an
370-
/// `AttrTokenStream`, creating an `AttrTokenTree::Delimited` for each matching pair of open and
371-
/// close delims.
372-
fn make_attr_token_stream(
373-
mut iter: impl Iterator<Item = (FlatToken, Spacing)>,
374-
break_last_token: bool,
375-
) -> AttrTokenStream {
376-
#[derive(Debug)]
377-
struct FrameData {
378-
// This is `None` for the first frame, `Some` for all others.
379-
open_delim_sp: Option<(Delimiter, Span, Spacing)>,
380-
inner: Vec<AttrTokenTree>,
381-
}
382-
let mut stack = vec![FrameData { open_delim_sp: None, inner: vec![] }];
383-
let mut token_and_spacing = iter.next();
384-
while let Some((token, spacing)) = token_and_spacing {
385-
match token {
386-
FlatToken::Token(Token { kind: TokenKind::OpenDelim(delim), span }) => {
387-
stack
388-
.push(FrameData { open_delim_sp: Some((delim, span, spacing)), inner: vec![] });
389-
}
390-
FlatToken::Token(Token { kind: TokenKind::CloseDelim(delim), span }) => {
391-
let frame_data = stack
392-
.pop()
393-
.unwrap_or_else(|| panic!("Token stack was empty for token: {token:?}"));
394-
395-
let (open_delim, open_sp, open_spacing) = frame_data.open_delim_sp.unwrap();
396-
assert_eq!(
397-
open_delim, delim,
398-
"Mismatched open/close delims: open={open_delim:?} close={span:?}"
399-
);
400-
let dspan = DelimSpan::from_pair(open_sp, span);
401-
let dspacing = DelimSpacing::new(open_spacing, spacing);
402-
let stream = AttrTokenStream::new(frame_data.inner);
403-
let delimited = AttrTokenTree::Delimited(dspan, dspacing, delim, stream);
404-
stack
405-
.last_mut()
406-
.unwrap_or_else(|| panic!("Bottom token frame is missing for token: {token:?}"))
407-
.inner
408-
.push(delimited);
409-
}
410-
FlatToken::Token(token) => stack
411-
.last_mut()
412-
.expect("Bottom token frame is missing!")
413-
.inner
414-
.push(AttrTokenTree::Token(token, spacing)),
415-
FlatToken::AttrsTarget(target) => stack
416-
.last_mut()
417-
.expect("Bottom token frame is missing!")
418-
.inner
419-
.push(AttrTokenTree::AttrsTarget(target)),
420-
FlatToken::Empty => {}
421-
}
422-
token_and_spacing = iter.next();
423-
}
424-
let mut final_buf = stack.pop().expect("Missing final buf!");
425-
if break_last_token {
426-
let last_token = final_buf.inner.pop().unwrap();
427-
if let AttrTokenTree::Token(last_token, spacing) = last_token {
428-
let unglued_first = last_token.kind.break_two_token_op().unwrap().0;
429-
430-
// An 'unglued' token is always two ASCII characters
431-
let mut first_span = last_token.span.shrink_to_lo();
432-
first_span = first_span.with_hi(first_span.lo() + rustc_span::BytePos(1));
433-
434-
final_buf
435-
.inner
436-
.push(AttrTokenTree::Token(Token::new(unglued_first, first_span), spacing));
437-
} else {
438-
panic!("Unexpected last token {last_token:?}")
439-
}
440-
}
441-
AttrTokenStream::new(final_buf.inner)
442-
}
443-
444273
// Some types are used a lot. Make sure they don't unintentionally get bigger.
445274
#[cfg(target_pointer_width = "64")]
446275
mod size_asserts {
447276
use super::*;
448277
use rustc_data_structures::static_assert_size;
449278
// tidy-alphabetical-start
450279
static_assert_size!(AttrWrapper, 16);
451-
static_assert_size!(LazyAttrTokenStreamImpl, 96);
452280
// tidy-alphabetical-end
453281
}

0 commit comments

Comments
 (0)