@@ -76,6 +76,8 @@ export interface Scanner {
76
76
getTokenFlags ( ) : TokenFlags ;
77
77
reScanGreaterToken ( ) : SyntaxKind ;
78
78
reScanSlashToken ( ) : SyntaxKind ;
79
+ /** @internal */
80
+ reScanSlashToken ( reportErrors ?: boolean ) : SyntaxKind ; // eslint-disable-line @typescript-eslint/unified-signatures
79
81
reScanAsteriskEqualsToken ( ) : SyntaxKind ;
80
82
reScanTemplateToken ( isTaggedTemplate : boolean ) : SyntaxKind ;
81
83
/** @deprecated use {@link reScanTemplateToken}(false) */
@@ -1484,7 +1486,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
1484
1486
// | [0-3] [0-7] [0-7]?
1485
1487
// | [4-7] [0-7]
1486
1488
// NonOctalDecimalEscapeSequence ::= [89]
1487
- function scanEscapeSequence ( shouldEmitInvalidEscapeError : boolean , isRegularExpression : boolean ) : string {
1489
+ function scanEscapeSequence ( shouldEmitInvalidEscapeError : boolean , isRegularExpression : boolean | "annex-b" ) : string {
1488
1490
const start = pos ;
1489
1491
pos ++ ;
1490
1492
if ( pos >= end ) {
@@ -1523,7 +1525,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
1523
1525
tokenFlags |= TokenFlags . ContainsInvalidEscape ;
1524
1526
if ( isRegularExpression || shouldEmitInvalidEscapeError ) {
1525
1527
const code = parseInt ( text . substring ( start + 1 , pos ) , 8 ) ;
1526
- error ( Diagnostics . Octal_escape_sequences_are_not_allowed_Use_the_syntax_0 , start , pos - start , "\\x" + code . toString ( 16 ) . padStart ( 2 , "0" ) ) ;
1528
+ if ( isRegularExpression !== "annex-b" ) {
1529
+ error ( Diagnostics . Octal_escape_sequences_are_not_allowed_Use_the_syntax_0 , start , pos - start , "\\x" + code . toString ( 16 ) . padStart ( 2 , "0" ) ) ;
1530
+ }
1527
1531
return String . fromCharCode ( code ) ;
1528
1532
}
1529
1533
return text . substring ( start , pos ) ;
@@ -1559,7 +1563,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
1559
1563
) {
1560
1564
// '\u{DDDDDD}'
1561
1565
pos -= 2 ;
1562
- return scanExtendedUnicodeEscape ( isRegularExpression || shouldEmitInvalidEscapeError ) ;
1566
+ return scanExtendedUnicodeEscape ( ! ! isRegularExpression || shouldEmitInvalidEscapeError ) ;
1563
1567
}
1564
1568
// '\uDDDD'
1565
1569
for ( ; pos < start + 6 ; pos ++ ) {
@@ -1623,7 +1627,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
1623
1627
case CharacterCodes . paragraphSeparator :
1624
1628
return "" ;
1625
1629
default :
1626
- if ( isRegularExpression && ( shouldEmitInvalidEscapeError || isIdentifierPart ( ch , languageVersion ) ) ) {
1630
+ if ( isRegularExpression === true && ( shouldEmitInvalidEscapeError || isIdentifierPart ( ch , languageVersion ) ) ) {
1627
1631
error ( Diagnostics . This_character_cannot_be_escaped_in_a_regular_expression , pos - 2 , 2 ) ;
1628
1632
}
1629
1633
return String . fromCharCode ( ch ) ;
@@ -2386,7 +2390,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2386
2390
return token = SyntaxKind . EqualsToken ;
2387
2391
}
2388
2392
2389
- function reScanSlashToken ( ) : SyntaxKind {
2393
+ function reScanSlashToken ( reportErrors ?: boolean ) : SyntaxKind {
2390
2394
if ( token === SyntaxKind . SlashToken || token === SyntaxKind . SlashEqualsToken ) {
2391
2395
// Quickly get to the end of regex such that we know the flags
2392
2396
let p = tokenStart + 1 ;
@@ -2444,44 +2448,57 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2444
2448
if ( ! isIdentifierPart ( ch , languageVersion ) ) {
2445
2449
break ;
2446
2450
}
2447
- const flag = characterToRegularExpressionFlag ( String . fromCharCode ( ch ) ) ;
2448
- if ( flag === undefined ) {
2449
- error ( Diagnostics . Unknown_regular_expression_flag , p , 1 ) ;
2450
- }
2451
- else if ( regExpFlags & flag ) {
2452
- error ( Diagnostics . Duplicate_regular_expression_flag , p , 1 ) ;
2453
- }
2454
- else if ( ( ( regExpFlags | flag ) & RegularExpressionFlags . UnicodeMode ) === RegularExpressionFlags . UnicodeMode ) {
2455
- error ( Diagnostics . The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously , p , 1 ) ;
2456
- }
2457
- else {
2458
- regExpFlags |= flag ;
2459
- const availableFrom = regExpFlagToFirstAvailableLanguageVersion . get ( flag ) ! ;
2460
- if ( languageVersion < availableFrom ) {
2461
- error ( Diagnostics . This_regular_expression_flag_is_only_available_when_targeting_0_or_later , p , 1 , getNameOfScriptTarget ( availableFrom ) ) ;
2451
+ if ( reportErrors ) {
2452
+ const flag = characterToRegularExpressionFlag ( String . fromCharCode ( ch ) ) ;
2453
+ if ( flag === undefined ) {
2454
+ error ( Diagnostics . Unknown_regular_expression_flag , p , 1 ) ;
2455
+ }
2456
+ else if ( regExpFlags & flag ) {
2457
+ error ( Diagnostics . Duplicate_regular_expression_flag , p , 1 ) ;
2458
+ }
2459
+ else if ( ( ( regExpFlags | flag ) & RegularExpressionFlags . UnicodeMode ) === RegularExpressionFlags . UnicodeMode ) {
2460
+ error ( Diagnostics . The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously , p , 1 ) ;
2461
+ }
2462
+ else {
2463
+ regExpFlags |= flag ;
2464
+ const availableFrom = regExpFlagToFirstAvailableLanguageVersion . get ( flag ) ! ;
2465
+ if ( languageVersion < availableFrom ) {
2466
+ error ( Diagnostics . This_regular_expression_flag_is_only_available_when_targeting_0_or_later , p , 1 , getNameOfScriptTarget ( availableFrom ) ) ;
2467
+ }
2462
2468
}
2463
2469
}
2464
2470
p ++ ;
2465
2471
}
2466
- pos = tokenStart + 1 ;
2467
- const saveTokenPos = tokenStart ;
2468
- const saveTokenFlags = tokenFlags ;
2469
- scanRegularExpressionWorker ( text , endOfBody , regExpFlags , isUnterminated ) ;
2470
- if ( ! isUnterminated ) {
2472
+ if ( reportErrors ) {
2473
+ pos = tokenStart + 1 ;
2474
+ const saveTokenPos = tokenStart ;
2475
+ const saveTokenFlags = tokenFlags ;
2476
+ scanRegularExpressionWorker ( text , endOfBody , regExpFlags , isUnterminated , /*annexB*/ true ) ;
2477
+ if ( ! isUnterminated ) {
2478
+ pos = p ;
2479
+ }
2480
+ tokenStart = saveTokenPos ;
2481
+ tokenFlags = saveTokenFlags ;
2482
+ }
2483
+ else {
2471
2484
pos = p ;
2472
2485
}
2473
- tokenStart = saveTokenPos ;
2474
- tokenFlags = saveTokenFlags ;
2475
2486
tokenValue = text . substring ( tokenStart , pos ) ;
2476
2487
token = SyntaxKind . RegularExpressionLiteral ;
2477
2488
}
2478
2489
return token ;
2479
2490
2480
- function scanRegularExpressionWorker ( text : string , end : number , regExpFlags : RegularExpressionFlags , isUnterminated : boolean ) {
2481
- /** Grammar parameter */
2482
- const unicodeMode = ! ! ( regExpFlags & RegularExpressionFlags . UnicodeMode ) ;
2491
+ function scanRegularExpressionWorker ( text : string , end : number , regExpFlags : RegularExpressionFlags , isUnterminated : boolean , annexB : boolean ) {
2483
2492
/** Grammar parameter */
2484
2493
const unicodeSetsMode = ! ! ( regExpFlags & RegularExpressionFlags . UnicodeSets ) ;
2494
+ /** Grammar parameter */
2495
+ const unicodeMode = ! ! ( regExpFlags & RegularExpressionFlags . UnicodeMode ) ;
2496
+
2497
+ if ( unicodeMode ) {
2498
+ // Annex B treats any unicode mode as the strict syntax.
2499
+ annexB = false ;
2500
+ }
2501
+
2485
2502
/** @see {scanClassSetExpression} */
2486
2503
let mayContainStrings = false ;
2487
2504
@@ -2571,7 +2588,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2571
2588
case CharacterCodes . equals :
2572
2589
case CharacterCodes . exclamation :
2573
2590
pos ++ ;
2574
- isPreviousTermQuantifiable = false ;
2591
+ // In Annex B, `(?=Disjunction)` and `(?!Disjunction)` are quantifiable
2592
+ isPreviousTermQuantifiable = annexB ;
2575
2593
break ;
2576
2594
case CharacterCodes . lessThan :
2577
2595
const groupNameStart = pos ;
@@ -2763,7 +2781,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2763
2781
default :
2764
2782
// The scanEscapeSequence call in scanCharacterEscape must return non-empty strings
2765
2783
// since there must not be line breaks in a regex literal
2766
- Debug . assert ( scanCharacterClassEscape ( ) || scanDecimalEscape ( ) || scanCharacterEscape ( ) ) ;
2784
+ Debug . assert ( scanCharacterClassEscape ( ) || scanDecimalEscape ( ) || scanCharacterEscape ( /*atomEscape*/ true ) ) ;
2767
2785
break ;
2768
2786
}
2769
2787
}
@@ -2788,7 +2806,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2788
2806
// IdentityEscape ::=
2789
2807
// | '^' | '$' | '/' | '\' | '.' | '*' | '+' | '?' | '(' | ')' | '[' | ']' | '{' | '}' | '|'
2790
2808
// | [~UnicodeMode] (any other non-identifier characters)
2791
- function scanCharacterEscape ( ) : string {
2809
+ function scanCharacterEscape ( atomEscape : boolean ) : string {
2792
2810
Debug . assertEqual ( text . charCodeAt ( pos - 1 ) , CharacterCodes . backslash ) ;
2793
2811
let ch = text . charCodeAt ( pos ) ;
2794
2812
switch ( ch ) {
@@ -2802,6 +2820,15 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2802
2820
if ( unicodeMode ) {
2803
2821
error ( Diagnostics . c_must_be_followed_by_an_ASCII_letter , pos - 2 , 2 ) ;
2804
2822
}
2823
+ else if ( atomEscape && annexB ) {
2824
+ // Annex B treats
2825
+ //
2826
+ // ExtendedAtom : `\` [lookahead = `c`]
2827
+ //
2828
+ // as the single character `\` when `c` isn't followed by a valid control character
2829
+ pos -- ;
2830
+ return "\\" ;
2831
+ }
2805
2832
return String . fromCharCode ( ch ) ;
2806
2833
case CharacterCodes . caret :
2807
2834
case CharacterCodes . $ :
@@ -2826,7 +2853,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2826
2853
return "\\" ;
2827
2854
}
2828
2855
pos -- ;
2829
- return scanEscapeSequence ( /*shouldEmitInvalidEscapeError*/ unicodeMode , /*isRegularExpression*/ true ) ;
2856
+ return scanEscapeSequence ( /*shouldEmitInvalidEscapeError*/ unicodeMode , /*isRegularExpression*/ annexB ? "annex-b" : true ) ;
2830
2857
}
2831
2858
}
2832
2859
@@ -2873,12 +2900,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2873
2900
if ( isClassContentExit ( ch ) ) {
2874
2901
return ;
2875
2902
}
2876
- if ( ! minCharacter ) {
2903
+ if ( ! minCharacter && ! annexB ) {
2877
2904
error ( Diagnostics . A_character_class_range_must_not_be_bounded_by_another_character_class , minStart , pos - 1 - minStart ) ;
2878
2905
}
2879
2906
const maxStart = pos ;
2880
2907
const maxCharacter = scanClassAtom ( ) ;
2881
- if ( ! maxCharacter ) {
2908
+ if ( ! maxCharacter && ! annexB ) {
2882
2909
error ( Diagnostics . A_character_class_range_must_not_be_bounded_by_another_character_class , maxStart , pos - maxStart ) ;
2883
2910
continue ;
2884
2911
}
@@ -3208,7 +3235,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
3208
3235
pos ++ ;
3209
3236
return String . fromCharCode ( ch ) ;
3210
3237
default :
3211
- return scanCharacterEscape ( ) ;
3238
+ return scanCharacterEscape ( /*atomEscape*/ false ) ;
3212
3239
}
3213
3240
}
3214
3241
else if ( ch === text . charCodeAt ( pos + 1 ) ) {
@@ -3275,7 +3302,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
3275
3302
if ( scanCharacterClassEscape ( ) ) {
3276
3303
return "" ;
3277
3304
}
3278
- return scanCharacterEscape ( ) ;
3305
+ return scanCharacterEscape ( /*atomEscape*/ false ) ;
3279
3306
}
3280
3307
}
3281
3308
else {
@@ -3407,7 +3434,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
3407
3434
}
3408
3435
} ) ;
3409
3436
forEach ( decimalEscapes , escape => {
3410
- if ( escape . value > numberOfCapturingGroups ) {
3437
+ // in AnnexB, if a DecimalEscape is greater than the number of capturing groups then it is treated as
3438
+ // either a LegacyOctalEscapeSequence or IdentityEscape
3439
+ if ( ! annexB && escape . value > numberOfCapturingGroups ) {
3411
3440
if ( numberOfCapturingGroups ) {
3412
3441
error ( Diagnostics . A_decimal_escape_must_refer_to_an_existent_capturing_group_There_are_only_0_capturing_groups_in_this_regular_expression , escape . pos , escape . end - escape . pos , numberOfCapturingGroups ) ;
3413
3442
}
0 commit comments