Skip to content

Speed up Parser::expected_tokens #133793

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Dec 19, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Make TokenType::from_u32 foolproof.
Currently it relies on having the right integer for every variant, and
if you add a variant you need to adjust the integers for all subsequent
variants, which is a pain.

This commit introduces a match guard formulation that takes advantage of
the enum-to-integer conversion to avoid specifying the integer for each
variant. And it does this via a macro to avoid lots of boilerplate.
  • Loading branch information
nnethercote committed Dec 19, 2024
commit df56c50cee0edad5ecc6e4535d361505407b989a
241 changes: 126 additions & 115 deletions compiler/rustc_parse/src/parser/token_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,124 +145,135 @@ pub enum TokenType {
// tidy-alphabetical-end
}

// Macro to avoid repetitive boilerplate code.
macro_rules! from_u32_match {
($val:ident; $($tok:ident,)+) => {
// A more obvious formulation would be `0 => TokenType::Eq`. But
// this formulation with the guard lets us avoid specifying a
// specific integer for each variant.
match $val {
$(
t if t == TokenType::$tok as u32 => TokenType::$tok,
)+
_ => panic!("unhandled value: {}", $val),
}
};
}

impl TokenType {
fn from_u32(val: u32) -> TokenType {
let token_type = match val {
0 => TokenType::Eq,
1 => TokenType::Lt,
2 => TokenType::Le,
3 => TokenType::EqEq,
4 => TokenType::Gt,
5 => TokenType::AndAnd,
6 => TokenType::OrOr,
7 => TokenType::Not,
8 => TokenType::Tilde,

9 => TokenType::Plus,
10 => TokenType::Minus,
11 => TokenType::Star,
12 => TokenType::And,
13 => TokenType::Or,

14 => TokenType::At,
15 => TokenType::Dot,
16 => TokenType::DotDot,
17 => TokenType::DotDotDot,
18 => TokenType::DotDotEq,
19 => TokenType::Comma,
20 => TokenType::Semi,
21 => TokenType::Colon,
22 => TokenType::PathSep,
23 => TokenType::RArrow,
24 => TokenType::FatArrow,
25 => TokenType::Pound,
26 => TokenType::Question,
27 => TokenType::OpenParen,
28 => TokenType::CloseParen,
29 => TokenType::OpenBrace,
30 => TokenType::CloseBrace,
31 => TokenType::OpenBracket,
32 => TokenType::CloseBracket,
33 => TokenType::Eof,

34 => TokenType::Operator,
35 => TokenType::Ident,
36 => TokenType::Lifetime,
37 => TokenType::Path,
38 => TokenType::Type,
39 => TokenType::Const,

40 => TokenType::KwAs,
41 => TokenType::KwAsync,
42 => TokenType::KwAuto,
43 => TokenType::KwAwait,
44 => TokenType::KwBecome,
45 => TokenType::KwBox,
46 => TokenType::KwBreak,
47 => TokenType::KwCatch,
48 => TokenType::KwConst,
49 => TokenType::KwContinue,
50 => TokenType::KwCrate,
51 => TokenType::KwDefault,
52 => TokenType::KwDyn,
53 => TokenType::KwElse,
54 => TokenType::KwEnum,
55 => TokenType::KwExtern,
56 => TokenType::KwFn,
57 => TokenType::KwFor,
58 => TokenType::KwGen,
59 => TokenType::KwIf,
60 => TokenType::KwImpl,
61 => TokenType::KwIn,
62 => TokenType::KwLet,
63 => TokenType::KwLoop,
64 => TokenType::KwMacro,
65 => TokenType::KwMacroRules,
66 => TokenType::KwMatch,
67 => TokenType::KwMod,
68 => TokenType::KwMove,
69 => TokenType::KwMut,
70 => TokenType::KwPub,
71 => TokenType::KwRaw,
72 => TokenType::KwRef,
73 => TokenType::KwReturn,
74 => TokenType::KwReuse,
75 => TokenType::KwSafe,
76 => TokenType::KwSelfUpper,
77 => TokenType::KwStatic,
78 => TokenType::KwStruct,
79 => TokenType::KwTrait,
80 => TokenType::KwTry,
81 => TokenType::KwType,
82 => TokenType::KwUnderscore,
83 => TokenType::KwUnsafe,
84 => TokenType::KwUse,
85 => TokenType::KwWhere,
86 => TokenType::KwWhile,
87 => TokenType::KwYield,

88 => TokenType::SymAttSyntax,
89 => TokenType::SymClobberAbi,
90 => TokenType::SymInlateout,
91 => TokenType::SymInout,
92 => TokenType::SymIs,
93 => TokenType::SymLabel,
94 => TokenType::SymLateout,
95 => TokenType::SymMayUnwind,
96 => TokenType::SymNomem,
97 => TokenType::SymNoreturn,
98 => TokenType::SymNostack,
99 => TokenType::SymOptions,
100 => TokenType::SymOut,
101 => TokenType::SymPreservesFlags,
102 => TokenType::SymPure,
103 => TokenType::SymReadonly,
104 => TokenType::SymSym,

_ => panic!("unhandled value: {val}"),
let token_type = from_u32_match! { val;
Eq,
Lt,
Le,
EqEq,
Gt,
AndAnd,
OrOr,
Not,
Tilde,

Plus,
Minus,
Star,
And,
Or,

At,
Dot,
DotDot,
DotDotDot,
DotDotEq,
Comma,
Semi,
Colon,
PathSep,
RArrow,
FatArrow,
Pound,
Question,
OpenParen,
CloseParen,
OpenBrace,
CloseBrace,
OpenBracket,
CloseBracket,
Eof,

Operator,
Ident,
Lifetime,
Path,
Type,
Const,

KwAs,
KwAsync,
KwAuto,
KwAwait,
KwBecome,
KwBox,
KwBreak,
KwCatch,
KwConst,
KwContinue,
KwCrate,
KwDefault,
KwDyn,
KwElse,
KwEnum,
KwExtern,
KwFn,
KwFor,
KwGen,
KwIf,
KwImpl,
KwIn,
KwLet,
KwLoop,
KwMacro,
KwMacroRules,
KwMatch,
KwMod,
KwMove,
KwMut,
KwPub,
KwRaw,
KwRef,
KwReturn,
KwReuse,
KwSafe,
KwSelfUpper,
KwStatic,
KwStruct,
KwTrait,
KwTry,
KwType,
KwUnderscore,
KwUnsafe,
KwUse,
KwWhere,
KwWhile,
KwYield,

SymAttSyntax,
SymClobberAbi,
SymInlateout,
SymInout,
SymIs,
SymLabel,
SymLateout,
SymMayUnwind,
SymNomem,
SymNoreturn,
SymNostack,
SymOptions,
SymOut,
SymPreservesFlags,
SymPure,
SymReadonly,
SymSym,
};
// This assertion will detect if this method and the type definition get out of sync.
assert_eq!(token_type as u32, val);
token_type
}

Expand Down