Skip to content

Commit c181f5f

Browse files
overlookmotelBoshen
authored andcommitted
perf(lexer): only check for hashbang at start of file
1 parent c72f49e commit c181f5f

File tree

5 files changed

+36
-14
lines changed

5 files changed

+36
-14
lines changed

crates/oxc_parser/src/lexer/byte_handlers.rs

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -241,13 +241,7 @@ ascii_byte_handler!(QOS(lexer) {
241241
// #
242242
ascii_byte_handler!(HAS(lexer) {
243243
lexer.consume_char();
244-
// HashbangComment ::
245-
// `#!` SingleLineCommentChars?
246-
if lexer.token.start() == 0 && lexer.next_ascii_byte_eq(b'!') {
247-
lexer.read_hashbang_comment()
248-
} else {
249-
lexer.private_identifier()
250-
}
244+
lexer.private_identifier()
251245
});
252246

253247
// `A..=Z`, `a..=z` (except special cases below), `_`, `$`

crates/oxc_parser/src/lexer/comment.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,15 +180,25 @@ impl<'a> Lexer<'a> {
180180
}
181181
}
182182

183-
/// Section 12.5 Hashbang Comments
184-
pub(super) fn read_hashbang_comment(&mut self) -> Kind {
183+
/// Section 12.5 Hashbang Comments.
184+
///
185+
/// # SAFETY
186+
/// Next 2 bytes must be `#!`.
187+
pub(super) unsafe fn read_hashbang_comment(&mut self) -> Kind {
188+
debug_assert!(self.peek_2_bytes() == Some([b'#', b'!']));
189+
190+
// SAFETY: Caller guarantees next 2 bytes are `#!`
191+
unsafe {
192+
self.source.next_byte_unchecked();
193+
self.source.next_byte_unchecked();
194+
}
195+
185196
while let Some(c) = self.peek_char() {
186197
if is_line_terminator(c) {
187198
break;
188199
}
189200
self.consume_char();
190201
}
191-
self.token.set_is_on_new_line(true);
192202
Kind::HashbangComment
193203
}
194204
}

crates/oxc_parser/src/lexer/mod.rs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,21 @@ impl<'a> Lexer<'a> {
166166
self.context = context;
167167
}
168168

169-
/// Main entry point
169+
/// Read first token in file.
170+
pub fn first_token(&mut self) -> Token {
171+
// HashbangComment ::
172+
// `#!` SingleLineCommentChars?
173+
let kind = if let Some([b'#', b'!']) = self.peek_2_bytes() {
174+
// SAFETY: Next 2 bytes are `#!`
175+
unsafe { self.read_hashbang_comment() }
176+
} else {
177+
self.read_next_token()
178+
};
179+
self.finish_next(kind)
180+
}
181+
182+
/// Read next token in file.
183+
/// Use `first_token` for first token, and this method for all further tokens.
170184
pub fn next_token(&mut self) -> Token {
171185
let kind = self.read_next_token();
172186
self.finish_next(kind)
@@ -273,6 +287,7 @@ impl<'a> Lexer<'a> {
273287

274288
/// Read each char and set the current token
275289
/// Whitespace and line terminators are skipped
290+
#[inline] // Make sure is inlined into `next_token`
276291
fn read_next_token(&mut self) -> Kind {
277292
self.trivia_builder.has_pure_comment = false;
278293
self.trivia_builder.has_no_side_effects_comment = false;

crates/oxc_parser/src/lib.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -513,8 +513,9 @@ impl<'a> ParserImpl<'a> {
513513

514514
#[expect(clippy::cast_possible_truncation)]
515515
fn parse_program(&mut self) -> Program<'a> {
516-
// initialize cur_token and prev_token by moving onto the first token
517-
self.bump_any();
516+
// Initialize by moving onto the first token.
517+
// Checks for hashbang comment.
518+
self.token = self.lexer.first_token();
518519

519520
let hashbang = self.parse_hashbang();
520521
let (directives, statements) =

tasks/benchmark/benches/lexer.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,9 @@ fn bench_lexer(criterion: &mut Criterion) {
5252
let mut allocator = Allocator::default();
5353
b.iter(|| {
5454
let mut lexer = Lexer::new_for_benchmarks(&allocator, source_text, source_type);
55-
while lexer.next_token().kind() != Kind::Eof {}
55+
if lexer.first_token().kind() != Kind::Eof {
56+
while lexer.next_token().kind() != Kind::Eof {}
57+
}
5658
allocator.reset();
5759
});
5860
});

0 commit comments

Comments
 (0)