Skip to content

Commit 102c916

Browse files
committed
refactor(lexer): simplify byte handler macros
1 parent ab685bd commit 102c916

File tree

1 file changed

+38
-74
lines changed

1 file changed

+38
-74
lines changed

crates/oxc_parser/src/lexer/byte_handlers.rs

Lines changed: 38 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use oxc_data_structures::assert_unchecked;
2+
13
use crate::diagnostics;
24

35
use super::{Kind, Lexer};
@@ -41,50 +43,14 @@ static BYTE_HANDLERS: [ByteHandler; 256] = [
4143
UNI, UNI, UNI, UNI, UNI, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // F
4244
];
4345

44-
/// Macro for defining a byte handler.
45-
///
46-
/// Use `ascii_byte_handler!` macro for ASCII characters, which adds optimizations for ASCII.
47-
///
48-
/// Handlers are defined as functions instead of closures, so they have names in flame graphs.
49-
///
50-
/// ```
51-
/// byte_handler!(UNI(lexer) {
52-
/// lexer.unicode_char_handler()
53-
/// });
54-
/// ```
55-
///
56-
/// expands to:
57-
///
58-
/// ```
59-
/// const UNI: ByteHandler = {
60-
/// #[expect(non_snake_case)]
61-
/// fn UNI(lexer: &mut Lexer) -> Kind {
62-
/// lexer.unicode_char_handler()
63-
/// }
64-
/// UNI
65-
/// };
66-
/// ```
67-
macro_rules! byte_handler {
68-
($id:ident($lex:ident) $body:expr) => {
69-
const $id: ByteHandler = {
70-
#[expect(non_snake_case)]
71-
fn $id($lex: &mut Lexer) -> Kind {
72-
$body
73-
}
74-
$id
75-
};
76-
};
77-
}
78-
7946
/// Macro for defining byte handler for an ASCII character.
8047
///
81-
/// In addition to defining a `const` for the handler, it also asserts that lexer
82-
/// is not at end of file, and that next char is ASCII.
48+
/// Asserts that lexer is not at end of file, and that next char is ASCII.
8349
/// Where the handler is for an ASCII character, these assertions are self-evidently true.
8450
///
8551
/// These assertions produce no runtime code, but hint to the compiler that it can assume that
8652
/// next char is ASCII, and it uses that information to optimize the rest of the handler.
87-
/// e.g. `lexer.consume_char()` becomes just a single assembler instruction.
53+
/// e.g. `lexer.consume_char()` becomes just a single assembly instruction.
8854
/// Without the assertions, the compiler is unable to deduce the next char is ASCII, due to
8955
/// the indirection of the `BYTE_HANDLERS` jump table.
9056
///
@@ -95,42 +61,38 @@ macro_rules! byte_handler {
9561
///
9662
/// ```
9763
/// ascii_byte_handler!(SPS(lexer) {
98-
/// lexer.consume_char();
99-
/// Kind::WhiteSpace
64+
/// lexer.consume_char();
65+
/// Kind::WhiteSpace
10066
/// });
10167
/// ```
10268
///
10369
/// expands to:
10470
///
10571
/// ```
106-
/// const SPS: ByteHandler = {
107-
/// #[expect(non_snake_case)]
108-
/// fn SPS(lexer: &mut Lexer) {
72+
/// #[expect(non_snake_case)]
73+
/// fn SPS(lexer: &mut Lexer) {
10974
/// // SAFETY: This macro is only used for ASCII characters
11075
/// unsafe {
111-
/// use oxc_data_structures::assert_unchecked;
112-
/// assert_unchecked!(!lexer.source.is_eof());
113-
/// assert_unchecked!(lexer.source.peek_byte_unchecked() < 128);
76+
/// assert_unchecked!(!lexer.source.is_eof());
77+
/// assert_unchecked!(lexer.source.peek_byte_unchecked() < 128);
11478
/// }
11579
/// {
116-
/// lexer.consume_char();
117-
/// Kind::WhiteSpace
80+
/// lexer.consume_char();
81+
/// Kind::WhiteSpace
11882
/// }
119-
/// }
120-
/// SPS
121-
/// };
83+
/// }
12284
/// ```
12385
macro_rules! ascii_byte_handler {
12486
($id:ident($lex:ident) $body:expr) => {
125-
byte_handler!($id($lex) {
87+
#[expect(non_snake_case)]
88+
fn $id($lex: &mut Lexer) -> Kind {
12689
// SAFETY: This macro is only used for ASCII characters
12790
unsafe {
128-
use oxc_data_structures::assert_unchecked;
12991
assert_unchecked!(!$lex.source.is_eof());
13092
assert_unchecked!($lex.source.peek_byte_unchecked() < 128);
13193
}
13294
$body
133-
});
95+
}
13496
};
13597
}
13698

@@ -148,36 +110,34 @@ macro_rules! ascii_byte_handler {
148110
///
149111
/// ```
150112
/// ascii_identifier_handler!(L_G(id_without_first_char) match id_without_first_char {
151-
/// "et" => Kind::Get,
152-
/// "lobal" => Kind::Global,
153-
/// _ => Kind::Ident,
113+
/// "et" => Kind::Get,
114+
/// "lobal" => Kind::Global,
115+
/// _ => Kind::Ident,
154116
/// });
155117
/// ```
156118
///
157119
/// expands to:
158120
///
159121
/// ```
160-
/// const L_G: ByteHandler = {
161-
/// #[expect(non_snake_case)]
162-
/// fn L_G(lexer: &mut Lexer) -> Kind {
122+
/// #[expect(non_snake_case)]
123+
/// fn L_G(lexer: &mut Lexer) -> Kind {
163124
/// // SAFETY: This macro is only used for ASCII characters
164125
/// let id_without_first_char = unsafe { lexer.identifier_name_handler() };
165126
/// match id_without_first_char {
166-
/// "et" => Kind::Get,
167-
/// "lobal" => Kind::Global,
168-
/// _ => Kind::Ident,
127+
/// "et" => Kind::Get,
128+
/// "lobal" => Kind::Global,
129+
/// _ => Kind::Ident,
169130
/// }
170-
/// }
171-
/// L_G
172-
/// };
131+
/// }
173132
/// ```
174133
macro_rules! ascii_identifier_handler {
175134
($id:ident($str:ident) $body:expr) => {
176-
byte_handler!($id(lexer) {
135+
#[expect(non_snake_case)]
136+
fn $id(lexer: &mut Lexer) -> Kind {
177137
// SAFETY: This macro is only used for ASCII characters
178138
let $str = unsafe { lexer.identifier_name_handler() };
179139
$body
180-
});
140+
}
181141
};
182142
}
183143

@@ -687,17 +647,21 @@ ascii_identifier_handler!(L_Y(id_without_first_char) match id_without_first_char
687647
});
688648

689649
// Non-ASCII characters.
690-
// NB: Must not use `ascii_byte_handler!` macro, as this handler is for non-ASCII chars.
691-
byte_handler!(UNI(lexer) {
650+
//
651+
// Note: Must not use `ascii_byte_handler!` macro, as this handler is for non-ASCII chars.
652+
#[expect(non_snake_case)]
653+
fn UNI(lexer: &mut Lexer) -> Kind {
692654
lexer.unicode_char_handler()
693-
});
655+
}
694656

695657
// UTF-8 continuation bytes (0x80 - 0xBF) (i.e. middle of a multi-byte UTF-8 sequence)
696658
// + and byte values which are not legal in UTF-8 strings (0xC0, 0xC1, 0xF5 - 0xFF).
697659
// `handle_byte()` should only be called with 1st byte of a valid UTF-8 character,
698660
// so something has gone wrong if we get here.
699661
// https://datatracker.ietf.org/doc/html/rfc3629
700-
// NB: Must not use `ascii_byte_handler!` macro, as this handler is for non-ASCII bytes.
701-
byte_handler!(UER(_lexer) {
662+
//
663+
// Note: Must not use `ascii_byte_handler!` macro, as this handler is for non-ASCII bytes.
664+
#[expect(non_snake_case)]
665+
fn UER(_lexer: &mut Lexer) -> Kind {
702666
unreachable!();
703-
});
667+
}

0 commit comments

Comments
 (0)