11use std:: borrow:: Cow ;
22
3- use ascii:: AsciiChar ;
43use char:: { Char , CharExt } ;
54use comments_buffer:: { BufferedComment , BufferedCommentKind } ;
6- use cow_replace:: ReplaceString ;
75use either:: Either :: { self , Left , Right } ;
86use num_bigint:: BigInt as BigIntValue ;
97use smartstring:: { LazyCompact , SmartString } ;
@@ -67,6 +65,16 @@ static TEMPLATE_LITERAL_TABLE: SafeByteMatchTable =
6765
6866pub type LexResult < T > = Result < T , crate :: error:: Error > ;
6967
68+ fn remove_underscore ( s : & str , has_underscore : bool ) -> Cow < ' _ , str > {
69+ if has_underscore {
70+ debug_assert ! ( s. contains( '_' ) ) ;
71+ s. chars ( ) . filter ( |& c| c != '_' ) . collect :: < String > ( ) . into ( )
72+ } else {
73+ debug_assert ! ( !s. contains( '_' ) ) ;
74+ Cow :: Borrowed ( s)
75+ }
76+ }
77+
7078pub trait Lexer < ' a , TokenAndSpan > : Tokens < TokenAndSpan > + Sized {
7179 type State : self :: state:: State ;
7280 type Token : token:: TokenFactory < ' a , TokenAndSpan , Self , Lexer = Self > ;
@@ -510,6 +518,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
510518 & mut self ,
511519 mut op : F ,
512520 allow_num_separator : bool ,
521+ has_underscore : & mut bool ,
513522 ) -> LexResult < Ret >
514523 where
515524 F : FnMut ( Ret , u8 , u32 ) -> LexResult < ( Ret , bool ) > ,
@@ -529,41 +538,44 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
529538 let mut prev = None ;
530539
531540 while let Some ( c) = self . cur ( ) {
532- if allow_num_separator && c == '_' {
533- let is_allowed = |c : Option < char > | {
534- let Some ( c) = c else {
535- return false ;
541+ if c == '_' {
542+ * has_underscore = true ;
543+ if allow_num_separator {
544+ let is_allowed = |c : Option < char > | {
545+ let Some ( c) = c else {
546+ return false ;
547+ } ;
548+ c. is_digit ( RADIX as _ )
536549 } ;
537- c. is_digit ( RADIX as _ )
538- } ;
539- let is_forbidden = |c : Option < char > | {
540- let Some ( c) = c else {
541- return false ;
550+ let is_forbidden = |c : Option < char > | {
551+ let Some ( c) = c else {
552+ return false ;
553+ } ;
554+
555+ if RADIX == 16 {
556+ matches ! ( c, '.' | 'X' | '_' | 'x' )
557+ } else {
558+ matches ! ( c, '.' | 'B' | 'E' | 'O' | '_' | 'b' | 'e' | 'o' )
559+ }
542560 } ;
543561
544- if RADIX == 16 {
545- matches ! ( c, '.' | 'X' | '_' | 'x' )
546- } else {
547- matches ! ( c, '.' | 'B' | 'E' | 'O' | '_' | 'b' | 'e' | 'o' )
548- }
549- } ;
562+ let next = self . input ( ) . peek ( ) ;
550563
551- let next = self . input ( ) . peek ( ) ;
564+ if !is_allowed ( next) || is_forbidden ( prev) || is_forbidden ( next) {
565+ self . emit_error (
566+ start,
567+ SyntaxError :: NumericSeparatorIsAllowedOnlyBetweenTwoDigits ,
568+ ) ;
569+ }
552570
553- if !is_allowed ( next) || is_forbidden ( prev) || is_forbidden ( next) {
554- self . emit_error (
555- start,
556- SyntaxError :: NumericSeparatorIsAllowedOnlyBetweenTwoDigits ,
557- ) ;
558- }
571+ // Ignore this _ character
572+ unsafe {
573+ // Safety: cur() returns Some(c) where c is a valid char
574+ self . input_mut ( ) . bump ( ) ;
575+ }
559576
560- // Ignore this _ character
561- unsafe {
562- // Safety: cur() returns Some(c) where c is a valid char
563- self . input_mut ( ) . bump ( ) ;
577+ continue ;
564578 }
565-
566- continue ;
567579 }
568580
569581 // e.g. (val for a) = 10 where radix = 16
@@ -602,6 +614,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
602614
603615 let mut not_octal = false ;
604616 let mut read_any = false ;
617+ let mut has_underscore = false ;
605618
606619 self . read_digits :: < _ , ( ) , RADIX > (
607620 |_, _, v| {
@@ -614,6 +627,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
614627 Ok ( ( ( ) , true ) )
615628 } ,
616629 true ,
630+ & mut has_underscore,
617631 ) ?;
618632
619633 if !read_any {
@@ -624,6 +638,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
624638 start,
625639 end : self . cur_pos ( ) ,
626640 not_octal,
641+ has_underscore,
627642 } )
628643 }
629644
@@ -635,6 +650,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
635650 debug_assert ! ( self . cur( ) . is_some( ) ) ;
636651
637652 let start = self . cur_pos ( ) ;
653+ let mut has_underscore = false ;
638654
639655 let lazy_integer = if starts_with_dot {
640656 // first char is '.'
@@ -646,6 +662,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
646662 start,
647663 end : start,
648664 not_octal : true ,
665+ has_underscore : false ,
649666 }
650667 } else {
651668 let starts_with_zero = self . cur ( ) . unwrap ( ) == '0' ;
@@ -693,7 +710,8 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
693710 self . emit_strict_mode_error ( start, SyntaxError :: LegacyDecimal ) ;
694711 } else {
695712 // It's Legacy octal, and we should reinterpret value.
696- let val = parse_integer :: < 8 > ( s) ;
713+ let s = remove_underscore ( s, lazy_integer. has_underscore ) ;
714+ let val = parse_integer :: < 8 > ( & s) ;
697715 let end = self . cur_pos ( ) ;
698716 let raw = unsafe {
699717 // Safety: We got both start and end position from `self.input`
@@ -709,6 +727,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
709727 lazy_integer
710728 } ;
711729
730+ has_underscore |= lazy_integer. has_underscore ;
712731 // At this point, number cannot be an octal literal.
713732
714733 let has_dot = self . cur ( ) == Some ( '.' ) ;
@@ -722,7 +741,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
722741 debug_assert ! ( !starts_with_dot || self . cur( ) . is_some_and( |cur| cur. is_ascii_digit( ) ) ) ;
723742
724743 // Read numbers after dot
725- self . read_digits :: < _ , ( ) , 10 > ( |_, _, _| Ok ( ( ( ) , true ) ) , true ) ?;
744+ self . read_digits :: < _ , ( ) , 10 > ( |_, _, _| Ok ( ( ( ) , true ) ) , true , & mut has_underscore ) ?;
726745 }
727746
728747 let has_e = self . cur ( ) . is_some_and ( |c| c == 'e' || c == 'E' ) ;
@@ -747,7 +766,8 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
747766 self . bump ( ) ; // remove '+', '-'
748767 }
749768
750- self . read_number_no_dot_as_str :: < 10 > ( ) ?;
769+ let lazy_integer = self . read_number_no_dot_as_str :: < 10 > ( ) ?;
770+ has_underscore |= lazy_integer. has_underscore ;
751771 }
752772
753773 let val = if has_dot || has_e {
@@ -757,12 +777,12 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
757777 self . input_slice ( start, end)
758778 } ;
759779
760- raw. remove_all_ascii ( AsciiChar :: UnderScore )
761- . parse ( )
762- . expect ( "failed to parse float literal" )
780+ let raw = remove_underscore ( raw, has_underscore) ;
781+ raw. parse ( ) . expect ( "failed to parse float literal" )
763782 } else {
764783 let s = unsafe { self . input_slice ( lazy_integer. start , lazy_integer. end ) } ;
765- parse_integer :: < 10 > ( s)
784+ let s = remove_underscore ( s, has_underscore) ;
785+ parse_integer :: < 10 > ( & s)
766786 } ;
767787
768788 self . ensure_not_ident ( ) ?;
@@ -795,6 +815,7 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
795815 Ok ( ( Some ( total) , count != len) )
796816 } ,
797817 true ,
818+ & mut false ,
798819 ) ?;
799820 if len != 0 && count != len {
800821 Ok ( None )
@@ -822,6 +843,8 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
822843 self . bump ( ) ;
823844
824845 let lazy_integer = self . read_number_no_dot_as_str :: < RADIX > ( ) ?;
846+ let has_underscore = lazy_integer. has_underscore ;
847+
825848 let s = unsafe {
826849 // Safety: We got both start and end position from `self.input`
827850 self . input_slice ( lazy_integer. start , lazy_integer. end )
@@ -836,7 +859,8 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
836859 let bigint_value = num_bigint:: BigInt :: parse_bytes ( s. as_bytes ( ) , RADIX as _ ) . unwrap ( ) ;
837860 return Ok ( Either :: Right ( ( Box :: new ( bigint_value) , self . atom ( raw) ) ) ) ;
838861 }
839- let val = parse_integer :: < RADIX > ( s) ;
862+ let s = remove_underscore ( s, has_underscore) ;
863+ let val = parse_integer :: < RADIX > ( & s) ;
840864
841865 self . ensure_not_ident ( ) ?;
842866
0 commit comments