Skip to content

Commit e28ad99

Browse files
authored
Report RegExp errors in grammar check, use Annex B grammar (#58295)
1 parent e6ba82b commit e28ad99

20 files changed

+184
-262
lines changed

src/compiler/checker.ts

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ import {
102102
countWhere,
103103
createBinaryExpressionTrampoline,
104104
createCompilerDiagnostic,
105+
createDetachedDiagnostic,
105106
createDiagnosticCollection,
106107
createDiagnosticForFileFromMessageChain,
107108
createDiagnosticForNode,
@@ -123,6 +124,7 @@ import {
123124
createPrinterWithRemoveCommentsNeverAsciiEscape,
124125
createPrinterWithRemoveCommentsOmitTrailingSemicolon,
125126
createPropertyNameNodeForIdentifierOrLiteral,
127+
createScanner,
126128
createSymbolTable,
127129
createSyntacticTypeNodeBuilder,
128130
createTextWriter,
@@ -937,6 +939,7 @@ import {
937939
rangeOfTypeParameters,
938940
ReadonlyKeyword,
939941
reduceLeft,
942+
RegularExpressionLiteral,
940943
RelationComparisonResult,
941944
relativeComplement,
942945
removeExtension,
@@ -953,6 +956,7 @@ import {
953956
ReverseMappedType,
954957
sameMap,
955958
SatisfiesExpression,
959+
Scanner,
956960
scanTokenAtPosition,
957961
ScriptKind,
958962
ScriptTarget,
@@ -1446,6 +1450,7 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
14461450
var requestedExternalEmitHelperNames = new Set<string>();
14471451
var requestedExternalEmitHelpers: ExternalEmitHelpers;
14481452
var externalHelpersModule: Symbol;
1453+
var scanner: Scanner | undefined;
14491454

14501455
var Symbol = objectAllocator.getSymbolConstructor();
14511456
var Type = objectAllocator.getTypeConstructor();
@@ -31353,6 +31358,48 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
3135331358
}
3135431359
}
3135531360

31361+
function checkGrammarRegularExpressionLiteral(node: RegularExpressionLiteral) {
31362+
const sourceFile = getSourceFileOfNode(node);
31363+
if (!hasParseDiagnostics(sourceFile)) {
31364+
let lastError: DiagnosticWithLocation | undefined;
31365+
scanner ??= createScanner(ScriptTarget.ESNext, /*skipTrivia*/ true);
31366+
scanner.setScriptTarget(sourceFile.languageVersion);
31367+
scanner.setLanguageVariant(sourceFile.languageVariant);
31368+
scanner.setOnError((message, length, arg0) => {
31369+
// emulate `parseErrorAtPosition` from parser.ts
31370+
const start = scanner!.getTokenEnd();
31371+
if (message.category === DiagnosticCategory.Message && lastError && start === lastError.start && length === lastError.length) {
31372+
const error = createDetachedDiagnostic(sourceFile.fileName, sourceFile.text, start, length, message, arg0);
31373+
addRelatedInfo(lastError, error);
31374+
}
31375+
else if (!lastError || start !== lastError.start) {
31376+
lastError = createFileDiagnostic(sourceFile, start, length, message, arg0);
31377+
diagnostics.add(lastError);
31378+
}
31379+
});
31380+
scanner.setText(sourceFile.text, node.pos, node.end - node.pos);
31381+
try {
31382+
scanner.scan();
31383+
Debug.assert(scanner.reScanSlashToken(/*reportErrors*/ true) === SyntaxKind.RegularExpressionLiteral, "Expected scanner to rescan RegularExpressionLiteral");
31384+
return !!lastError;
31385+
}
31386+
finally {
31387+
scanner.setText("");
31388+
scanner.setOnError(/*onError*/ undefined);
31389+
}
31390+
}
31391+
return false;
31392+
}
31393+
31394+
function checkRegularExpressionLiteral(node: RegularExpressionLiteral) {
31395+
const nodeLinks = getNodeLinks(node);
31396+
if (!(nodeLinks.flags & NodeCheckFlags.TypeChecked)) {
31397+
nodeLinks.flags |= NodeCheckFlags.TypeChecked;
31398+
addLazyDiagnostic(() => checkGrammarRegularExpressionLiteral(node));
31399+
}
31400+
return globalRegExpType;
31401+
}
31402+
3135631403
function checkSpreadExpression(node: SpreadElement, checkMode?: CheckMode): Type {
3135731404
if (languageVersion < LanguageFeatureMinimumTarget.SpreadElements) {
3135831405
checkExternalEmitHelpers(node, compilerOptions.downlevelIteration ? ExternalEmitHelpers.SpreadIncludes : ExternalEmitHelpers.SpreadArray);
@@ -39662,7 +39709,7 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
3966239709
case SyntaxKind.TemplateExpression:
3966339710
return checkTemplateExpression(node as TemplateExpression);
3966439711
case SyntaxKind.RegularExpressionLiteral:
39665-
return globalRegExpType;
39712+
return checkRegularExpressionLiteral(node as RegularExpressionLiteral);
3966639713
case SyntaxKind.ArrayLiteralExpression:
3966739714
return checkArrayLiteral(node as ArrayLiteralExpression, checkMode, forceTuple);
3966839715
case SyntaxKind.ObjectLiteralExpression:

src/compiler/scanner.ts

Lines changed: 68 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ export interface Scanner {
7676
getTokenFlags(): TokenFlags;
7777
reScanGreaterToken(): SyntaxKind;
7878
reScanSlashToken(): SyntaxKind;
79+
/** @internal */
80+
reScanSlashToken(reportErrors?: boolean): SyntaxKind; // eslint-disable-line @typescript-eslint/unified-signatures
7981
reScanAsteriskEqualsToken(): SyntaxKind;
8082
reScanTemplateToken(isTaggedTemplate: boolean): SyntaxKind;
8183
/** @deprecated use {@link reScanTemplateToken}(false) */
@@ -1484,7 +1486,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
14841486
// | [0-3] [0-7] [0-7]?
14851487
// | [4-7] [0-7]
14861488
// NonOctalDecimalEscapeSequence ::= [89]
1487-
function scanEscapeSequence(shouldEmitInvalidEscapeError: boolean, isRegularExpression: boolean): string {
1489+
function scanEscapeSequence(shouldEmitInvalidEscapeError: boolean, isRegularExpression: boolean | "annex-b"): string {
14881490
const start = pos;
14891491
pos++;
14901492
if (pos >= end) {
@@ -1523,7 +1525,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
15231525
tokenFlags |= TokenFlags.ContainsInvalidEscape;
15241526
if (isRegularExpression || shouldEmitInvalidEscapeError) {
15251527
const code = parseInt(text.substring(start + 1, pos), 8);
1526-
error(Diagnostics.Octal_escape_sequences_are_not_allowed_Use_the_syntax_0, start, pos - start, "\\x" + code.toString(16).padStart(2, "0"));
1528+
if (isRegularExpression !== "annex-b") {
1529+
error(Diagnostics.Octal_escape_sequences_are_not_allowed_Use_the_syntax_0, start, pos - start, "\\x" + code.toString(16).padStart(2, "0"));
1530+
}
15271531
return String.fromCharCode(code);
15281532
}
15291533
return text.substring(start, pos);
@@ -1559,7 +1563,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
15591563
) {
15601564
// '\u{DDDDDD}'
15611565
pos -= 2;
1562-
return scanExtendedUnicodeEscape(isRegularExpression || shouldEmitInvalidEscapeError);
1566+
return scanExtendedUnicodeEscape(!!isRegularExpression || shouldEmitInvalidEscapeError);
15631567
}
15641568
// '\uDDDD'
15651569
for (; pos < start + 6; pos++) {
@@ -1623,7 +1627,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
16231627
case CharacterCodes.paragraphSeparator:
16241628
return "";
16251629
default:
1626-
if (isRegularExpression && (shouldEmitInvalidEscapeError || isIdentifierPart(ch, languageVersion))) {
1630+
if (isRegularExpression === true && (shouldEmitInvalidEscapeError || isIdentifierPart(ch, languageVersion))) {
16271631
error(Diagnostics.This_character_cannot_be_escaped_in_a_regular_expression, pos - 2, 2);
16281632
}
16291633
return String.fromCharCode(ch);
@@ -2386,7 +2390,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
23862390
return token = SyntaxKind.EqualsToken;
23872391
}
23882392

2389-
function reScanSlashToken(): SyntaxKind {
2393+
function reScanSlashToken(reportErrors?: boolean): SyntaxKind {
23902394
if (token === SyntaxKind.SlashToken || token === SyntaxKind.SlashEqualsToken) {
23912395
// Quickly get to the end of regex such that we know the flags
23922396
let p = tokenStart + 1;
@@ -2444,44 +2448,57 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
24442448
if (!isIdentifierPart(ch, languageVersion)) {
24452449
break;
24462450
}
2447-
const flag = characterToRegularExpressionFlag(String.fromCharCode(ch));
2448-
if (flag === undefined) {
2449-
error(Diagnostics.Unknown_regular_expression_flag, p, 1);
2450-
}
2451-
else if (regExpFlags & flag) {
2452-
error(Diagnostics.Duplicate_regular_expression_flag, p, 1);
2453-
}
2454-
else if (((regExpFlags | flag) & RegularExpressionFlags.UnicodeMode) === RegularExpressionFlags.UnicodeMode) {
2455-
error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, p, 1);
2456-
}
2457-
else {
2458-
regExpFlags |= flag;
2459-
const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag)!;
2460-
if (languageVersion < availableFrom) {
2461-
error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, p, 1, getNameOfScriptTarget(availableFrom));
2451+
if (reportErrors) {
2452+
const flag = characterToRegularExpressionFlag(String.fromCharCode(ch));
2453+
if (flag === undefined) {
2454+
error(Diagnostics.Unknown_regular_expression_flag, p, 1);
2455+
}
2456+
else if (regExpFlags & flag) {
2457+
error(Diagnostics.Duplicate_regular_expression_flag, p, 1);
2458+
}
2459+
else if (((regExpFlags | flag) & RegularExpressionFlags.UnicodeMode) === RegularExpressionFlags.UnicodeMode) {
2460+
error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, p, 1);
2461+
}
2462+
else {
2463+
regExpFlags |= flag;
2464+
const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag)!;
2465+
if (languageVersion < availableFrom) {
2466+
error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, p, 1, getNameOfScriptTarget(availableFrom));
2467+
}
24622468
}
24632469
}
24642470
p++;
24652471
}
2466-
pos = tokenStart + 1;
2467-
const saveTokenPos = tokenStart;
2468-
const saveTokenFlags = tokenFlags;
2469-
scanRegularExpressionWorker(text, endOfBody, regExpFlags, isUnterminated);
2470-
if (!isUnterminated) {
2472+
if (reportErrors) {
2473+
pos = tokenStart + 1;
2474+
const saveTokenPos = tokenStart;
2475+
const saveTokenFlags = tokenFlags;
2476+
scanRegularExpressionWorker(text, endOfBody, regExpFlags, isUnterminated, /*annexB*/ true);
2477+
if (!isUnterminated) {
2478+
pos = p;
2479+
}
2480+
tokenStart = saveTokenPos;
2481+
tokenFlags = saveTokenFlags;
2482+
}
2483+
else {
24712484
pos = p;
24722485
}
2473-
tokenStart = saveTokenPos;
2474-
tokenFlags = saveTokenFlags;
24752486
tokenValue = text.substring(tokenStart, pos);
24762487
token = SyntaxKind.RegularExpressionLiteral;
24772488
}
24782489
return token;
24792490

2480-
function scanRegularExpressionWorker(text: string, end: number, regExpFlags: RegularExpressionFlags, isUnterminated: boolean) {
2481-
/** Grammar parameter */
2482-
const unicodeMode = !!(regExpFlags & RegularExpressionFlags.UnicodeMode);
2491+
function scanRegularExpressionWorker(text: string, end: number, regExpFlags: RegularExpressionFlags, isUnterminated: boolean, annexB: boolean) {
24832492
/** Grammar parameter */
24842493
const unicodeSetsMode = !!(regExpFlags & RegularExpressionFlags.UnicodeSets);
2494+
/** Grammar parameter */
2495+
const unicodeMode = !!(regExpFlags & RegularExpressionFlags.UnicodeMode);
2496+
2497+
if (unicodeMode) {
2498+
// Annex B treats any unicode mode as the strict syntax.
2499+
annexB = false;
2500+
}
2501+
24852502
/** @see {scanClassSetExpression} */
24862503
let mayContainStrings = false;
24872504

@@ -2571,7 +2588,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
25712588
case CharacterCodes.equals:
25722589
case CharacterCodes.exclamation:
25732590
pos++;
2574-
isPreviousTermQuantifiable = false;
2591+
// In Annex B, `(?=Disjunction)` and `(?!Disjunction)` are quantifiable
2592+
isPreviousTermQuantifiable = annexB;
25752593
break;
25762594
case CharacterCodes.lessThan:
25772595
const groupNameStart = pos;
@@ -2763,7 +2781,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
27632781
default:
27642782
// The scanEscapeSequence call in scanCharacterEscape must return non-empty strings
27652783
// since there must not be line breaks in a regex literal
2766-
Debug.assert(scanCharacterClassEscape() || scanDecimalEscape() || scanCharacterEscape());
2784+
Debug.assert(scanCharacterClassEscape() || scanDecimalEscape() || scanCharacterEscape(/*atomEscape*/ true));
27672785
break;
27682786
}
27692787
}
@@ -2788,7 +2806,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
27882806
// IdentityEscape ::=
27892807
// | '^' | '$' | '/' | '\' | '.' | '*' | '+' | '?' | '(' | ')' | '[' | ']' | '{' | '}' | '|'
27902808
// | [~UnicodeMode] (any other non-identifier characters)
2791-
function scanCharacterEscape(): string {
2809+
function scanCharacterEscape(atomEscape: boolean): string {
27922810
Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash);
27932811
let ch = text.charCodeAt(pos);
27942812
switch (ch) {
@@ -2802,6 +2820,15 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
28022820
if (unicodeMode) {
28032821
error(Diagnostics.c_must_be_followed_by_an_ASCII_letter, pos - 2, 2);
28042822
}
2823+
else if (atomEscape && annexB) {
2824+
// Annex B treats
2825+
//
2826+
// ExtendedAtom : `\` [lookahead = `c`]
2827+
//
2828+
// as the single character `\` when `c` isn't followed by a valid control character
2829+
pos--;
2830+
return "\\";
2831+
}
28052832
return String.fromCharCode(ch);
28062833
case CharacterCodes.caret:
28072834
case CharacterCodes.$:
@@ -2826,7 +2853,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
28262853
return "\\";
28272854
}
28282855
pos--;
2829-
return scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ unicodeMode, /*isRegularExpression*/ true);
2856+
return scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ unicodeMode, /*isRegularExpression*/ annexB ? "annex-b" : true);
28302857
}
28312858
}
28322859

@@ -2873,12 +2900,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
28732900
if (isClassContentExit(ch)) {
28742901
return;
28752902
}
2876-
if (!minCharacter) {
2903+
if (!minCharacter && !annexB) {
28772904
error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, minStart, pos - 1 - minStart);
28782905
}
28792906
const maxStart = pos;
28802907
const maxCharacter = scanClassAtom();
2881-
if (!maxCharacter) {
2908+
if (!maxCharacter && !annexB) {
28822909
error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, maxStart, pos - maxStart);
28832910
continue;
28842911
}
@@ -3208,7 +3235,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
32083235
pos++;
32093236
return String.fromCharCode(ch);
32103237
default:
3211-
return scanCharacterEscape();
3238+
return scanCharacterEscape(/*atomEscape*/ false);
32123239
}
32133240
}
32143241
else if (ch === text.charCodeAt(pos + 1)) {
@@ -3275,7 +3302,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
32753302
if (scanCharacterClassEscape()) {
32763303
return "";
32773304
}
3278-
return scanCharacterEscape();
3305+
return scanCharacterEscape(/*atomEscape*/ false);
32793306
}
32803307
}
32813308
else {
@@ -3407,7 +3434,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
34073434
}
34083435
});
34093436
forEach(decimalEscapes, escape => {
3410-
if (escape.value > numberOfCapturingGroups) {
3437+
// in AnnexB, if a DecimalEscape is greater than the number of capturing groups then it is treated as
3438+
// either a LegacyOctalEscapeSequence or IdentityEscape
3439+
if (!annexB && escape.value > numberOfCapturingGroups) {
34113440
if (numberOfCapturingGroups) {
34123441
error(Diagnostics.A_decimal_escape_must_refer_to_an_existent_capturing_group_There_are_only_0_capturing_groups_in_this_regular_expression, escape.pos, escape.end - escape.pos, numberOfCapturingGroups);
34133442
}

src/testRunner/unittests/incrementalParser.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ describe("unittests:: Incremental Parser", () => {
160160
const oldText = ts.ScriptSnapshot.fromString(source);
161161
const newTextAndChange = withInsert(oldText, semicolonIndex, "/");
162162

163-
compareTrees(oldText, newTextAndChange.text, newTextAndChange.textChangeRange, 4);
163+
compareTrees(oldText, newTextAndChange.text, newTextAndChange.textChangeRange, 0);
164164
});
165165

166166
it("Regular expression 2", () => {

tests/baselines/reference/parserRegularExpression1.errors.txt

Lines changed: 0 additions & 7 deletions
This file was deleted.
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//// [tests/cases/conformance/parser/ecmascript5/RegularExpressions/parserRegularExpression1.ts] ////
22

33
//// [parserRegularExpression1.ts]
4-
return /(#?-?\d*\.\d\w*%?)|(@?#?[\w-?]+%?)/g;
4+
/(#?-?\d*\.\d\w*%?)|(@?#?[\w-?]+%?)/g;
55

66
//// [parserRegularExpression1.js]
7-
return /(#?-?\d*\.\d\w*%?)|(@?#?[\w-?]+%?)/g;
7+
/(#?-?\d*\.\d\w*%?)|(@?#?[\w-?]+%?)/g;

tests/baselines/reference/parserRegularExpression1.symbols

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22

33
=== parserRegularExpression1.ts ===
44

5-
return /(#?-?\d*\.\d\w*%?)|(@?#?[\w-?]+%?)/g;
5+
/(#?-?\d*\.\d\w*%?)|(@?#?[\w-?]+%?)/g;

0 commit comments

Comments
 (0)