Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for ES2025 duplicate named capturing groups #195

Merged
merged 10 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion src/ast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -428,10 +428,21 @@ export interface Character extends NodeBase {
* The backreference.
* E.g. `\1`, `\k<name>`
*/
export interface Backreference extends NodeBase {
export type Backreference = AmbiguousBackreference | UnambiguousBackreference
interface BaseBackreference extends NodeBase {
type: "Backreference"
parent: Alternative | Quantifier
ref: number | string
ambiguous: boolean
resolved: CapturingGroup | CapturingGroup[]
}
export interface AmbiguousBackreference extends BaseBackreference {
ref: string
ambiguous: true
resolved: CapturingGroup[]
}
export interface UnambiguousBackreference extends BaseBackreference {
ambiguous: false
resolved: CapturingGroup
}

Expand Down
3 changes: 2 additions & 1 deletion src/ecma-versions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ export type EcmaVersion =
| 2022
| 2023
| 2024
export const latestEcmaVersion = 2024
| 2025
export const latestEcmaVersion = 2025
167 changes: 167 additions & 0 deletions src/group-specifiers.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
/**
* Holds information for all GroupSpecifiers included in the pattern.
*/
export interface GroupSpecifiers {
/**
* @returns true if there are no GroupSpecifiers included in the pattern.
*/
isEmpty: () => boolean
clear: () => void
/**
* Called when visiting the Disjunction.
* For ES2025, manage nesting with new Disjunction scopes.
*/
enterDisjunction: () => void
/**
* Called when visiting the Alternative.
* For ES2025, manage nesting with new Alternative scopes.
*/
enterAlternative: (index: number) => void
/**
* Called when leaving the Disjunction.
*/
leaveDisjunction: () => unknown
/**
* Checks whether the given group name is within the pattern.
*/
hasInPattern: (name: string) => boolean
/**
* Checks whether the given group name is within the current scope.
*/
hasInScope: (name: string) => boolean
/**
* Adds the given group name to the current scope.
*/
addToScope: (name: string) => void
}

export class GroupSpecifiersAsES2018 implements GroupSpecifiers {
private readonly groupName = new Set<string>()

public clear(): void {
this.groupName.clear()
}

public isEmpty(): boolean {
return !this.groupName.size
}

public hasInPattern(name: string): boolean {
return this.groupName.has(name)
}

public hasInScope(name: string): boolean {
return this.hasInPattern(name)
}

public addToScope(name: string): void {
this.groupName.add(name)
}

// eslint-disable-next-line class-methods-use-this
public enterDisjunction(): void {
// Prior to ES2025, it does not manage disjunction scopes.
}

// eslint-disable-next-line class-methods-use-this
public enterAlternative(): void {
// Prior to ES2025, it does not manage alternative scopes.
}

// eslint-disable-next-line class-methods-use-this
public leaveDisjunction(): void {
// Prior to ES2025, it does not manage disjunction scopes.
}
}

/**
* Track disjunction structure to determine whether a duplicate
* capture group name is allowed because it is in a separate branch.
*/
class BranchID {
public readonly parent: BranchID | null
private readonly base: BranchID
public constructor(parent: BranchID | null, base: BranchID | null) {
// Parent disjunction branch
this.parent = parent
// Identifies this set of sibling branches
this.base = base ?? this
}

/**
* A branch is separate from another branch if they or any of
* their parents are siblings in a given disjunction
*/
public separatedFrom(other: BranchID): boolean {
if (this.base === other.base && this !== other) {
return true
}
if (other.parent && this.separatedFrom(other.parent)) {
return true
}
return this.parent?.separatedFrom(other) ?? false
}

public child() {
return new BranchID(this, null)
}

public sibling() {
return new BranchID(this.parent, this.base)
}
}

export class GroupSpecifiersAsES2025 implements GroupSpecifiers {
private branchID = new BranchID(null, null)
private readonly groupNames = new Map<string, BranchID[]>()

public clear(): void {
this.branchID = new BranchID(null, null)
this.groupNames.clear()
}

public isEmpty(): boolean {
return !this.groupNames.size
}

public enterDisjunction(): void {
this.branchID = this.branchID.child()
}

public enterAlternative(index: number): void {
if (index === 0) {
return
}
this.branchID = this.branchID.sibling()
}

public leaveDisjunction(): void {
this.branchID = this.branchID.parent!
}

public hasInPattern(name: string): boolean {
return this.groupNames.has(name)
}

public hasInScope(name: string): boolean {
const branches = this.groupNames.get(name)
if (!branches) {
return false
}
for (const branch of branches) {
if (!branch.separatedFrom(this.branchID)) {
return true
}
}
return false
}

public addToScope(name: string): void {
const branches = this.groupNames.get(name)
if (branches) {
branches.push(this.branchID)
return
}
this.groupNames.set(name, [this.branchID])
}
}
23 changes: 17 additions & 6 deletions src/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,21 @@ class RegExpParserState {

for (const reference of this._backreferences) {
const ref = reference.ref
const group =
const groups =
typeof ref === "number"
? this._capturingGroups[ref - 1]
: this._capturingGroups.find((g) => g.name === ref)!
reference.resolved = group
group.references.push(reference)
? [this._capturingGroups[ref - 1]]
: this._capturingGroups.filter((g) => g.name === ref)
if (groups.length === 1) {
const group = groups[0]
reference.ambiguous = false
reference.resolved = group
} else {
reference.ambiguous = true
reference.resolved = groups
}
for (const group of groups) {
group.references.push(reference)
}
}
}

Expand Down Expand Up @@ -480,6 +489,7 @@ class RegExpParserState {
end,
raw: this.source.slice(start, end),
ref,
ambiguous: false,
resolved: DUMMY_CAPTURING_GROUP,
}
parent.elements.push(node)
Expand Down Expand Up @@ -747,14 +757,15 @@ export namespace RegExpParser {
strict?: boolean

/**
* ECMAScript version. Default is `2024`.
* ECMAScript version. Default is `2025`.
* - `2015` added `u` and `y` flags.
* - `2018` added `s` flag, Named Capturing Group, Lookbehind Assertion,
* and Unicode Property Escape.
* - `2019`, `2020`, and `2021` added more valid Unicode Property Escapes.
* - `2022` added `d` flag.
* - `2023` added more valid Unicode Property Escapes.
* - `2024` added `v` flag.
* - `2025` added duplicate named capturing groups.
*/
ecmaVersion?: EcmaVersion
}
Expand Down
27 changes: 20 additions & 7 deletions src/validator.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import type { EcmaVersion } from "./ecma-versions"
import { latestEcmaVersion } from "./ecma-versions"
import type { GroupSpecifiers } from "./group-specifiers"
import {
GroupSpecifiersAsES2018,
GroupSpecifiersAsES2025,
} from "./group-specifiers"
import { Reader } from "./reader"
import { newRegExpSyntaxError } from "./regexp-syntax-error"
import {
Expand Down Expand Up @@ -231,14 +236,15 @@ export namespace RegExpValidator {
strict?: boolean

/**
* ECMAScript version. Default is `2024`.
* ECMAScript version. Default is `2025`.
* - `2015` added `u` and `y` flags.
* - `2018` added `s` flag, Named Capturing Group, Lookbehind Assertion,
* and Unicode Property Escape.
* - `2019`, `2020`, and `2021` added more valid Unicode Property Escapes.
* - `2022` added `d` flag.
* - `2023` added more valid Unicode Property Escapes.
* - `2024` added `v` flag.
* - `2025` added duplicate named capturing groups.
*/
ecmaVersion?: EcmaVersion

Expand Down Expand Up @@ -631,7 +637,7 @@ export class RegExpValidator {

private _numCapturingParens = 0

private _groupNames = new Set<string>()
private _groupSpecifiers: GroupSpecifiers

private _backreferenceNames = new Set<string>()

Expand All @@ -643,6 +649,10 @@ export class RegExpValidator {
*/
public constructor(options?: RegExpValidator.Options) {
this._options = options ?? {}
this._groupSpecifiers =
this.ecmaVersion >= 2025
? new GroupSpecifiersAsES2025()
: new GroupSpecifiersAsES2018()
}

/**
Expand Down Expand Up @@ -763,7 +773,7 @@ export class RegExpValidator {
if (
!this._nFlag &&
this.ecmaVersion >= 2018 &&
this._groupNames.size > 0
!this._groupSpecifiers.isEmpty()
) {
this._nFlag = true
this.rewind(start)
Expand Down Expand Up @@ -1301,7 +1311,7 @@ export class RegExpValidator {
private consumePattern(): void {
const start = this.index
this._numCapturingParens = this.countCapturingParens()
this._groupNames.clear()
this._groupSpecifiers.clear()
this._backreferenceNames.clear()

this.onPatternEnter(start)
Expand All @@ -1322,7 +1332,7 @@ export class RegExpValidator {
this.raise(`Unexpected character '${c}'`)
}
for (const name of this._backreferenceNames) {
if (!this._groupNames.has(name)) {
if (!this._groupSpecifiers.hasInPattern(name)) {
this.raise("Invalid named capture referenced")
}
}
Expand Down Expand Up @@ -1378,6 +1388,7 @@ export class RegExpValidator {
const start = this.index
let i = 0

this._groupSpecifiers.enterDisjunction()
this.onDisjunctionEnter(start)
do {
this.consumeAlternative(i++)
Expand All @@ -1390,6 +1401,7 @@ export class RegExpValidator {
this.raise("Lone quantifier brackets")
}
this.onDisjunctionLeave(start, this.index)
this._groupSpecifiers.leaveDisjunction()
}

/**
Expand All @@ -1403,6 +1415,7 @@ export class RegExpValidator {
private consumeAlternative(i: number): void {
const start = this.index

this._groupSpecifiers.enterAlternative(i)
this.onAlternativeEnter(start, i)
while (this.currentCodePoint !== -1 && this.consumeTerm()) {
// do nothing.
Expand Down Expand Up @@ -1846,8 +1859,8 @@ export class RegExpValidator {
private consumeGroupSpecifier(): boolean {
if (this.eat(QUESTION_MARK)) {
if (this.eatGroupName()) {
if (!this._groupNames.has(this._lastStrValue)) {
this._groupNames.add(this._lastStrValue)
if (!this._groupSpecifiers.hasInScope(this._lastStrValue)) {
this._groupSpecifiers.addToScope(this._lastStrValue)
return true
}
this.raise("Duplicate capture group name")
Expand Down
4 changes: 4 additions & 0 deletions test/fixtures/parser/literal/basic-valid-2015-u.json
Original file line number Diff line number Diff line change
Expand Up @@ -1690,6 +1690,7 @@
"end": 6,
"raw": "\\1",
"ref": 1,
"ambiguous": false,
"resolved": "♻️../0"
}
]
Expand Down Expand Up @@ -1741,6 +1742,7 @@
"end": 3,
"raw": "\\1",
"ref": 1,
"ambiguous": false,
"resolved": "♻️../1"
},
{
Expand Down Expand Up @@ -2104,6 +2106,7 @@
"end": 34,
"raw": "\\10",
"ref": 10,
"ambiguous": false,
"resolved": "♻️../9"
}
]
Expand Down Expand Up @@ -2465,6 +2468,7 @@
"end": 37,
"raw": "\\11",
"ref": 11,
"ambiguous": false,
"resolved": "♻️../10"
}
]
Expand Down
Loading
Loading