perf(lexer): only check for hashbang at start of file

overlookmotel · Boshen · commit c181f5f323a4 · 2025-08-10T15:29:30.000+08:00
diff --git a/crates/oxc_parser/src/lexer/byte_handlers.rs b/crates/oxc_parser/src/lexer/byte_handlers.rs
@@ -241,13 +241,7 @@ ascii_byte_handler!(QOS(lexer) {
 // #
 ascii_byte_handler!(HAS(lexer) {
     lexer.consume_char();
-    // HashbangComment ::
-    //     `#!` SingleLineCommentChars?
-    if lexer.token.start() == 0 && lexer.next_ascii_byte_eq(b'!') {
-        lexer.read_hashbang_comment()
-    } else {
-        lexer.private_identifier()
-    }
+    lexer.private_identifier()
 });
 
 // `A..=Z`, `a..=z` (except special cases below), `_`, `$`
diff --git a/crates/oxc_parser/src/lexer/comment.rs b/crates/oxc_parser/src/lexer/comment.rs
@@ -180,15 +180,25 @@ impl<'a> Lexer<'a> {
         }
     }
 
-    /// Section 12.5 Hashbang Comments
-    pub(super) fn read_hashbang_comment(&mut self) -> Kind {
+    /// Section 12.5 Hashbang Comments.
+    ///
+    /// # SAFETY
+    /// Next 2 bytes must be `#!`.
+    pub(super) unsafe fn read_hashbang_comment(&mut self) -> Kind {
+        debug_assert!(self.peek_2_bytes() == Some([b'#', b'!']));
+
+        // SAFETY: Caller guarantees next 2 bytes are `#!`
+        unsafe {
+            self.source.next_byte_unchecked();
+            self.source.next_byte_unchecked();
+        }
+
         while let Some(c) = self.peek_char() {
             if is_line_terminator(c) {
                 break;
             }
             self.consume_char();
         }
-        self.token.set_is_on_new_line(true);
         Kind::HashbangComment
     }
 }
diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs
@@ -166,7 +166,21 @@ impl<'a> Lexer<'a> {
         self.context = context;
     }
 
-    /// Main entry point
+    /// Read first token in file.
+    pub fn first_token(&mut self) -> Token {
+        // HashbangComment ::
+        //     `#!` SingleLineCommentChars?
+        let kind = if let Some([b'#', b'!']) = self.peek_2_bytes() {
+            // SAFETY: Next 2 bytes are `#!`
+            unsafe { self.read_hashbang_comment() }
+        } else {
+            self.read_next_token()
+        };
+        self.finish_next(kind)
+    }
+
+    /// Read next token in file.
+    /// Use `first_token` for first token, and this method for all further tokens.
     pub fn next_token(&mut self) -> Token {
         let kind = self.read_next_token();
         self.finish_next(kind)
@@ -273,6 +287,7 @@ impl<'a> Lexer<'a> {
 
     /// Read each char and set the current token
     /// Whitespace and line terminators are skipped
+    #[inline] // Make sure is inlined into `next_token`
     fn read_next_token(&mut self) -> Kind {
         self.trivia_builder.has_pure_comment = false;
         self.trivia_builder.has_no_side_effects_comment = false;
diff --git a/crates/oxc_parser/src/lib.rs b/crates/oxc_parser/src/lib.rs
@@ -513,8 +513,9 @@ impl<'a> ParserImpl<'a> {
 
     #[expect(clippy::cast_possible_truncation)]
     fn parse_program(&mut self) -> Program<'a> {
-        // initialize cur_token and prev_token by moving onto the first token
-        self.bump_any();
+        // Initialize by moving onto the first token.
+        // Checks for hashbang comment.
+        self.token = self.lexer.first_token();
 
         let hashbang = self.parse_hashbang();
         let (directives, statements) =
diff --git a/tasks/benchmark/benches/lexer.rs b/tasks/benchmark/benches/lexer.rs
@@ -52,7 +52,9 @@ fn bench_lexer(criterion: &mut Criterion) {
             let mut allocator = Allocator::default();
             b.iter(|| {
                 let mut lexer = Lexer::new_for_benchmarks(&allocator, source_text, source_type);
-                while lexer.next_token().kind() != Kind::Eof {}
+                if lexer.first_token().kind() != Kind::Eof {
+                    while lexer.next_token().kind() != Kind::Eof {}
+                }
                 allocator.reset();
             });
         });

Original file line number	Diff line number	Diff line change
`@@ -180,15 +180,25 @@ impl<'a> Lexer<'a> {`
`180`	`180`	`}`
`181`	`181`	`}`
`182`	`182`
`183`		`- /// Section 12.5 Hashbang Comments`
`184`		`- pub(super) fn read_hashbang_comment(&mut self) -> Kind {`
	`183`	`+ /// Section 12.5 Hashbang Comments.`
	`184`	`+ ///`
	`185`	`+ /// # SAFETY`
	`186`	+ /// Next 2 bytes must be `#!`.
	`187`	`+ pub(super) unsafe fn read_hashbang_comment(&mut self) -> Kind {`
	`188`	`+ debug_assert!(self.peek_2_bytes() == Some([b'#', b'!']));`
	`189`	`+`
	`190`	+ // SAFETY: Caller guarantees next 2 bytes are `#!`
	`191`	`+ unsafe {`
	`192`	`+ self.source.next_byte_unchecked();`
	`193`	`+ self.source.next_byte_unchecked();`
	`194`	`+ }`
	`195`	`+`
`185`	`196`	`while let Some(c) = self.peek_char() {`
`186`	`197`	`if is_line_terminator(c) {`
`187`	`198`	`break;`
`188`	`199`	`}`
`189`	`200`	`self.consume_char();`
`190`	`201`	`}`
`191`		`- self.token.set_is_on_new_line(true);`
`192`	`202`	`Kind::HashbangComment`
`193`	`203`	`}`
`194`	`204`	`}`