Skip to content

Commit f5d92ee

Browse files
bvanjoikdy1
authored andcommitted
perf(es/lexer): Do not scan number if there's no underscore (#10788)
1 parent 28fc643 commit f5d92ee

File tree

2 files changed

+69
-44
lines changed

2 files changed

+69
-44
lines changed

crates/swc_ecma_lexer/src/common/lexer/mod.rs

Lines changed: 62 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
use std::borrow::Cow;
22

3-
use ascii::AsciiChar;
43
use char::{Char, CharExt};
54
use comments_buffer::{BufferedComment, BufferedCommentKind};
6-
use cow_replace::ReplaceString;
75
use either::Either::{self, Left, Right};
86
use num_bigint::BigInt as BigIntValue;
97
use smartstring::{LazyCompact, SmartString};
@@ -67,6 +65,16 @@ static TEMPLATE_LITERAL_TABLE: SafeByteMatchTable =
6765

6866
pub type LexResult<T> = Result<T, crate::error::Error>;
6967

68+
fn remove_underscore(s: &str, has_underscore: bool) -> Cow<'_, str> {
69+
if has_underscore {
70+
debug_assert!(s.contains('_'));
71+
s.chars().filter(|&c| c != '_').collect::<String>().into()
72+
} else {
73+
debug_assert!(!s.contains('_'));
74+
Cow::Borrowed(s)
75+
}
76+
}
77+
7078
pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
7179
type State: self::state::State;
7280
type Token: token::TokenFactory<'a, TokenAndSpan, Self, Lexer = Self>;
@@ -510,6 +518,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
510518
&mut self,
511519
mut op: F,
512520
allow_num_separator: bool,
521+
has_underscore: &mut bool,
513522
) -> LexResult<Ret>
514523
where
515524
F: FnMut(Ret, u8, u32) -> LexResult<(Ret, bool)>,
@@ -529,41 +538,44 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
529538
let mut prev = None;
530539

531540
while let Some(c) = self.cur() {
532-
if allow_num_separator && c == '_' {
533-
let is_allowed = |c: Option<char>| {
534-
let Some(c) = c else {
535-
return false;
541+
if c == '_' {
542+
*has_underscore = true;
543+
if allow_num_separator {
544+
let is_allowed = |c: Option<char>| {
545+
let Some(c) = c else {
546+
return false;
547+
};
548+
c.is_digit(RADIX as _)
536549
};
537-
c.is_digit(RADIX as _)
538-
};
539-
let is_forbidden = |c: Option<char>| {
540-
let Some(c) = c else {
541-
return false;
550+
let is_forbidden = |c: Option<char>| {
551+
let Some(c) = c else {
552+
return false;
553+
};
554+
555+
if RADIX == 16 {
556+
matches!(c, '.' | 'X' | '_' | 'x')
557+
} else {
558+
matches!(c, '.' | 'B' | 'E' | 'O' | '_' | 'b' | 'e' | 'o')
559+
}
542560
};
543561

544-
if RADIX == 16 {
545-
matches!(c, '.' | 'X' | '_' | 'x')
546-
} else {
547-
matches!(c, '.' | 'B' | 'E' | 'O' | '_' | 'b' | 'e' | 'o')
548-
}
549-
};
562+
let next = self.input().peek();
550563

551-
let next = self.input().peek();
564+
if !is_allowed(next) || is_forbidden(prev) || is_forbidden(next) {
565+
self.emit_error(
566+
start,
567+
SyntaxError::NumericSeparatorIsAllowedOnlyBetweenTwoDigits,
568+
);
569+
}
552570

553-
if !is_allowed(next) || is_forbidden(prev) || is_forbidden(next) {
554-
self.emit_error(
555-
start,
556-
SyntaxError::NumericSeparatorIsAllowedOnlyBetweenTwoDigits,
557-
);
558-
}
571+
// Ignore this _ character
572+
unsafe {
573+
// Safety: cur() returns Some(c) where c is a valid char
574+
self.input_mut().bump();
575+
}
559576

560-
// Ignore this _ character
561-
unsafe {
562-
// Safety: cur() returns Some(c) where c is a valid char
563-
self.input_mut().bump();
577+
continue;
564578
}
565-
566-
continue;
567579
}
568580

569581
// e.g. (val for a) = 10 where radix = 16
@@ -602,6 +614,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
602614

603615
let mut not_octal = false;
604616
let mut read_any = false;
617+
let mut has_underscore = false;
605618

606619
self.read_digits::<_, (), RADIX>(
607620
|_, _, v| {
@@ -614,6 +627,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
614627
Ok(((), true))
615628
},
616629
true,
630+
&mut has_underscore,
617631
)?;
618632

619633
if !read_any {
@@ -624,6 +638,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
624638
start,
625639
end: self.cur_pos(),
626640
not_octal,
641+
has_underscore,
627642
})
628643
}
629644

@@ -635,6 +650,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
635650
debug_assert!(self.cur().is_some());
636651

637652
let start = self.cur_pos();
653+
let mut has_underscore = false;
638654

639655
let lazy_integer = if starts_with_dot {
640656
// first char is '.'
@@ -646,6 +662,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
646662
start,
647663
end: start,
648664
not_octal: true,
665+
has_underscore: false,
649666
}
650667
} else {
651668
let starts_with_zero = self.cur().unwrap() == '0';
@@ -693,7 +710,8 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
693710
self.emit_strict_mode_error(start, SyntaxError::LegacyDecimal);
694711
} else {
695712
// It's Legacy octal, and we should reinterpret value.
696-
let val = parse_integer::<8>(s);
713+
let s = remove_underscore(s, lazy_integer.has_underscore);
714+
let val = parse_integer::<8>(&s);
697715
let end = self.cur_pos();
698716
let raw = unsafe {
699717
// Safety: We got both start and end position from `self.input`
@@ -709,6 +727,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
709727
lazy_integer
710728
};
711729

730+
has_underscore |= lazy_integer.has_underscore;
712731
// At this point, number cannot be an octal literal.
713732

714733
let has_dot = self.cur() == Some('.');
@@ -722,7 +741,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
722741
debug_assert!(!starts_with_dot || self.cur().is_some_and(|cur| cur.is_ascii_digit()));
723742

724743
// Read numbers after dot
725-
self.read_digits::<_, (), 10>(|_, _, _| Ok(((), true)), true)?;
744+
self.read_digits::<_, (), 10>(|_, _, _| Ok(((), true)), true, &mut has_underscore)?;
726745
}
727746

728747
let has_e = self.cur().is_some_and(|c| c == 'e' || c == 'E');
@@ -747,7 +766,8 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
747766
self.bump(); // remove '+', '-'
748767
}
749768

750-
self.read_number_no_dot_as_str::<10>()?;
769+
let lazy_integer = self.read_number_no_dot_as_str::<10>()?;
770+
has_underscore |= lazy_integer.has_underscore;
751771
}
752772

753773
let val = if has_dot || has_e {
@@ -757,12 +777,12 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
757777
self.input_slice(start, end)
758778
};
759779

760-
raw.remove_all_ascii(AsciiChar::UnderScore)
761-
.parse()
762-
.expect("failed to parse float literal")
780+
let raw = remove_underscore(raw, has_underscore);
781+
raw.parse().expect("failed to parse float literal")
763782
} else {
764783
let s = unsafe { self.input_slice(lazy_integer.start, lazy_integer.end) };
765-
parse_integer::<10>(s)
784+
let s = remove_underscore(s, has_underscore);
785+
parse_integer::<10>(&s)
766786
};
767787

768788
self.ensure_not_ident()?;
@@ -795,6 +815,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
795815
Ok((Some(total), count != len))
796816
},
797817
true,
818+
&mut false,
798819
)?;
799820
if len != 0 && count != len {
800821
Ok(None)
@@ -822,6 +843,8 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
822843
self.bump();
823844

824845
let lazy_integer = self.read_number_no_dot_as_str::<RADIX>()?;
846+
let has_underscore = lazy_integer.has_underscore;
847+
825848
let s = unsafe {
826849
// Safety: We got both start and end position from `self.input`
827850
self.input_slice(lazy_integer.start, lazy_integer.end)
@@ -836,7 +859,8 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
836859
let bigint_value = num_bigint::BigInt::parse_bytes(s.as_bytes(), RADIX as _).unwrap();
837860
return Ok(Either::Right((Box::new(bigint_value), self.atom(raw))));
838861
}
839-
let val = parse_integer::<RADIX>(s);
862+
let s = remove_underscore(s, has_underscore);
863+
let val = parse_integer::<RADIX>(&s);
840864

841865
self.ensure_not_ident()?;
842866

crates/swc_ecma_lexer/src/common/lexer/number.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,28 @@
1-
use cow_replace::ReplaceString;
21
use swc_common::BytePos;
32

43
pub struct LazyInteger {
54
pub(super) start: BytePos,
65
pub(super) end: BytePos,
76
/// `true` if there was `8` or `9``
87
pub(super) not_octal: bool,
8+
pub(super) has_underscore: bool,
99
}
1010

1111
const MAX_SAFE_INT: u64 = 9007199254740991;
1212

1313
pub(super) fn parse_integer<const RADIX: u8>(s: &str) -> f64 {
1414
debug_assert!(matches!(RADIX, 2 | 8 | 10 | 16));
1515
debug_assert!(!s.is_empty());
16-
let s = s.remove_all_ascii(ascii::AsciiChar::UnderScore);
16+
debug_assert!(!s.contains('_'));
17+
1718
if RADIX == 10 {
18-
parse_integer_from_dec(&s)
19+
parse_integer_from_dec(s)
1920
} else if RADIX == 16 {
20-
parse_integer_from_hex(&s)
21+
parse_integer_from_hex(s)
2122
} else if RADIX == 2 {
22-
parse_integer_from_bin(&s)
23+
parse_integer_from_bin(s)
2324
} else if RADIX == 8 {
24-
parse_integer_from_oct(&s)
25+
parse_integer_from_oct(s)
2526
} else {
2627
unreachable!()
2728
}

0 commit comments

Comments
 (0)