From 64bfdfe2232397467ee40de8f32a8e56a97a9401 Mon Sep 17 00:00:00 2001 From: overlookmotel <557937+overlookmotel@users.noreply.github.com> Date: Tue, 7 Jan 2025 06:58:24 +0000 Subject: [PATCH] refactor(lexer): tighten safety of lexer by always including lifetime on `SourcePosition` (#8293) Always use `SourcePosition<'a>` not `SourcePosition`, to ensure not reading from a `Source` with a `SourcePosition` which is from *another* `Source`. This doesn't definitively guard against UB - that is the job of `UniquePromise` - but it adds another level of defence. --- crates/oxc_parser/src/lexer/comment.rs | 4 ++-- crates/oxc_parser/src/lexer/identifier.rs | 9 ++++++--- crates/oxc_parser/src/lexer/source.rs | 22 ++++++++++++++-------- crates/oxc_parser/src/lexer/template.rs | 2 +- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/crates/oxc_parser/src/lexer/comment.rs b/crates/oxc_parser/src/lexer/comment.rs index 7646e9fb38aae..ddd2645a756ef 100644 --- a/crates/oxc_parser/src/lexer/comment.rs +++ b/crates/oxc_parser/src/lexer/comment.rs @@ -22,7 +22,7 @@ static LINE_BREAK_TABLE: SafeByteMatchTable = static MULTILINE_COMMENT_START_TABLE: SafeByteMatchTable = safe_byte_match_table!(|b| matches!(b, b'*' | b'\r' | b'\n' | LS_OR_PS_FIRST)); -impl Lexer<'_> { +impl<'a> Lexer<'a> { /// Section 12.4 Single Line Comment pub(super) fn skip_single_line_comment(&mut self) -> Kind { byte_search! { @@ -151,7 +151,7 @@ impl Lexer<'_> { Kind::Skip } - fn skip_multi_line_comment_after_line_break(&mut self, pos: SourcePosition) -> Kind { + fn skip_multi_line_comment_after_line_break(&mut self, pos: SourcePosition<'a>) -> Kind { // Can use `memchr` here as only searching for 1 pattern. // Cache `Finder` instance on `Lexer` as there's a significant cost to creating it. // `Finder::new` isn't a const function, so can't make it a `static`, and `lazy_static!` diff --git a/crates/oxc_parser/src/lexer/identifier.rs b/crates/oxc_parser/src/lexer/identifier.rs index 7dde35d16efbf..21161230c9698 100644 --- a/crates/oxc_parser/src/lexer/identifier.rs +++ b/crates/oxc_parser/src/lexer/identifier.rs @@ -98,7 +98,7 @@ impl<'a> Lexer<'a> { /// Handle rest of identifier after first byte of a multi-byte Unicode char found. /// Any number of characters can have already been consumed from `self.source` prior to it. /// `self.source` should be positioned at start of Unicode character. - fn identifier_tail_unicode(&mut self, start_pos: SourcePosition) -> &'a str { + fn identifier_tail_unicode(&mut self, start_pos: SourcePosition<'a>) -> &'a str { let c = self.peek_char().unwrap(); if is_identifier_part_unicode(c) { self.consume_char(); @@ -113,7 +113,10 @@ impl<'a> Lexer<'a> { /// /// First char should have been consumed from `self.source` prior to calling this. /// `start_pos` should be position of the start of the identifier (before first char was consumed). - pub(super) fn identifier_tail_after_unicode(&mut self, start_pos: SourcePosition) -> &'a str { + pub(super) fn identifier_tail_after_unicode( + &mut self, + start_pos: SourcePosition<'a>, + ) -> &'a str { // Identifier contains a Unicode chars, so probably contains more. // So just iterate over chars now, instead of bytes. while let Some(c) = self.peek_char() { @@ -146,7 +149,7 @@ impl<'a> Lexer<'a> { /// /// The `\` must not have be consumed from `lexer.source`. /// `start_pos` must be position of start of identifier. - fn identifier_backslash(&mut self, start_pos: SourcePosition, is_start: bool) -> &'a str { + fn identifier_backslash(&mut self, start_pos: SourcePosition<'a>, is_start: bool) -> &'a str { // Create arena string to hold unescaped identifier. // We don't know how long identifier will end up being. Take a guess that total length // will be double what we've seen so far, or `MIN_ESCAPED_STR_LEN` minimum. diff --git a/crates/oxc_parser/src/lexer/source.rs b/crates/oxc_parser/src/lexer/source.rs index 608a781b17a22..ce057077d84b6 100644 --- a/crates/oxc_parser/src/lexer/source.rs +++ b/crates/oxc_parser/src/lexer/source.rs @@ -167,7 +167,7 @@ impl<'a> Source<'a> { /// Move current position. #[inline] - pub(super) fn set_position(&mut self, pos: SourcePosition) { + pub(super) fn set_position(&mut self, pos: SourcePosition<'a>) { // `SourcePosition` always upholds the invariants of `Source`, as long as it's created // from this `Source`. `SourcePosition`s can only be created from a `Source`. // `Source::new` takes a `UniquePromise`, which guarantees that it's the only `Source` @@ -216,7 +216,7 @@ impl<'a> Source<'a> { } /// Get string slice from a `SourcePosition` up to the current position of `Source`. - pub(super) fn str_from_pos_to_current(&self, pos: SourcePosition) -> &'a str { + pub(super) fn str_from_pos_to_current(&self, pos: SourcePosition<'a>) -> &'a str { assert!(pos.ptr <= self.ptr); // SAFETY: The above assertion satisfies `str_from_pos_to_current_unchecked`'s requirements unsafe { self.str_from_pos_to_current_unchecked(pos) } @@ -230,7 +230,10 @@ impl<'a> Source<'a> { /// 1. `Source::set_position` has not been called since `pos` was created. /// 2. `pos` has not been advanced with `SourcePosition::add`. #[inline] - pub(super) unsafe fn str_from_pos_to_current_unchecked(&self, pos: SourcePosition) -> &'a str { + pub(super) unsafe fn str_from_pos_to_current_unchecked( + &self, + pos: SourcePosition<'a>, + ) -> &'a str { // SAFETY: Caller guarantees `pos` is not after current position of `Source`. // `self.ptr` is always a valid `SourcePosition` due to invariants of `Source`. self.str_between_positions_unchecked(pos, SourcePosition::new(self.ptr)) @@ -244,7 +247,10 @@ impl<'a> Source<'a> { /// 1. `Source::set_position` has not been called since `pos` was created. /// 2. `pos` has not been moved backwards with `SourcePosition::sub`. #[inline] - pub(super) unsafe fn str_from_current_to_pos_unchecked(&self, pos: SourcePosition) -> &'a str { + pub(super) unsafe fn str_from_current_to_pos_unchecked( + &self, + pos: SourcePosition<'a>, + ) -> &'a str { // SAFETY: Caller guarantees `pos` is not before current position of `Source`. // `self.ptr` is always a valid `SourcePosition` due to invariants of `Source`. self.str_between_positions_unchecked(SourcePosition::new(self.ptr), pos) @@ -252,7 +258,7 @@ impl<'a> Source<'a> { /// Get string slice from a `SourcePosition` up to the end of `Source`. #[inline] - pub(super) fn str_from_pos_to_end(&self, pos: SourcePosition) -> &'a str { + pub(super) fn str_from_pos_to_end(&self, pos: SourcePosition<'a>) -> &'a str { // SAFETY: Invariants of `SourcePosition` is that it cannot be after end of `Source`, // and always on a UTF-8 character boundary. // `self.end` is always a valid `SourcePosition` due to invariants of `Source`. @@ -266,8 +272,8 @@ impl<'a> Source<'a> { #[inline] pub(super) unsafe fn str_between_positions_unchecked( &self, - start: SourcePosition, - end: SourcePosition, + start: SourcePosition<'a>, + end: SourcePosition<'a>, ) -> &'a str { // Check `start` is not after `end` debug_assert!(start.ptr <= end.ptr); @@ -304,7 +310,7 @@ impl<'a> Source<'a> { /// Get offset of `pos`. #[expect(clippy::cast_possible_truncation)] #[inline] - pub(super) fn offset_of(&self, pos: SourcePosition) -> u32 { + pub(super) fn offset_of(&self, pos: SourcePosition<'a>) -> u32 { // Cannot overflow `u32` because of `MAX_LEN` check in `Source::new` (pos.addr() - self.start as usize) as u32 } diff --git a/crates/oxc_parser/src/lexer/template.rs b/crates/oxc_parser/src/lexer/template.rs index aa6605a204e74..b850233e176d9 100644 --- a/crates/oxc_parser/src/lexer/template.rs +++ b/crates/oxc_parser/src/lexer/template.rs @@ -167,7 +167,7 @@ impl<'a> Lexer<'a> { /// # SAFETY /// `pos` must not be before `self.source.position()` #[expect(clippy::unnecessary_safety_comment)] - unsafe fn template_literal_create_string(&self, pos: SourcePosition) -> String<'a> { + unsafe fn template_literal_create_string(&self, pos: SourcePosition<'a>) -> String<'a> { // Create arena string to hold modified template literal. // We don't know how long template literal will end up being. Take a guess that total length // will be double what we've seen so far, or `MIN_ESCAPED_TEMPLATE_LIT_LEN` minimum.