Skip to content

Commit 7252a7e

Browse files
refactor: simplify encoders (#88)
1 parent ea35d60 commit 7252a7e

28 files changed

+265
-521
lines changed

src/builders.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import type { RegexFlags, RegexSequence } from './types';
2-
import { encodeSequence } from './encoder/encoder';
3-
import { ensureArray } from './utils/elements';
2+
import { encode } from './encoder';
43

54
/**
65
* Generate RegExp object from elements with optional flags.
@@ -10,7 +9,7 @@ import { ensureArray } from './utils/elements';
109
* @returns RegExp object
1110
*/
1211
export function buildRegExp(sequence: RegexSequence, flags?: RegexFlags): RegExp {
13-
const pattern = encodeSequence(ensureArray(sequence)).pattern;
12+
const pattern = encode(sequence).pattern;
1413
const flagsString = encodeFlags(flags ?? {});
1514
return new RegExp(pattern, flagsString);
1615
}
@@ -21,7 +20,7 @@ export function buildRegExp(sequence: RegexSequence, flags?: RegexFlags): RegExp
2120
* @returns regex pattern string
2221
*/
2322
export function buildPattern(sequence: RegexSequence): string {
24-
return encodeSequence(ensureArray(sequence)).pattern;
23+
return encode(sequence).pattern;
2524
}
2625

2726
function encodeFlags(flags: RegexFlags): string {

src/constructs/__tests__/char-class.test.ts

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@ test('`charClass` joins character escapes', () => {
3737
expect(charClass(word, nonDigit)).toEqualRegex(/[\w\D]/);
3838
});
3939

40+
test('`charClass` throws on empty text', () => {
41+
expect(() => charClass()).toThrowErrorMatchingInlineSnapshot(
42+
`"\`charClass\` should receive at least one element"`,
43+
);
44+
});
45+
4046
test('`charRange` pattern', () => {
4147
expect(charRange('a', 'z')).toEqualRegex(/[a-z]/);
4248
expect(['x', charRange('0', '9')]).toEqualRegex(/x[0-9]/);
@@ -115,16 +121,7 @@ test('`negated` character class matching', () => {
115121
});
116122

117123
test('`encodeCharacterClass` throws on empty text', () => {
118-
expect(() =>
119-
buildRegExp(
120-
// @ts-expect-error
121-
negated({
122-
type: 'characterClass',
123-
chars: [],
124-
ranges: [],
125-
}),
126-
),
127-
).toThrowErrorMatchingInlineSnapshot(
124+
expect(() => buildRegExp(negated({ chars: [], ranges: [] }))).toThrowErrorMatchingInlineSnapshot(
128125
`"Character class should contain at least one character or character range"`,
129126
);
130127
});

src/encoder/__tests__/encoder.test.tsx renamed to src/constructs/__tests__/encoder.test.tsx

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,11 @@ test('`buildRegExp` throws error on unknown element', () => {
7474
expect(() =>
7575
// @ts-expect-error intentionally passing incorrect object
7676
buildRegExp({ type: 'unknown' }),
77-
).toThrowErrorMatchingInlineSnapshot(`"\`encodeNode\`: unknown element type unknown"`);
77+
).toThrowErrorMatchingInlineSnapshot(`
78+
"\`encodeElement\`: unknown element: {
79+
"type": "unknown"
80+
}"
81+
`);
7882
});
7983

8084
test('`buildPattern` throws on empty text', () => {

src/constructs/anchors.ts

Lines changed: 13 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,26 @@
1-
import type { EncodeResult } from '../encoder/types';
2-
import type { RegexConstruct } from '../types';
1+
import type { EncodedRegex } from '../types';
32

4-
export interface Anchor extends RegexConstruct {
5-
type: 'anchor';
6-
symbol: string;
7-
}
8-
9-
export const startOfString: Anchor = {
10-
type: 'anchor',
11-
symbol: '^',
12-
encode: encodeAnchor,
3+
export const startOfString: EncodedRegex = {
4+
precedence: 'atom',
5+
pattern: '^',
136
};
147

15-
export const endOfString: Anchor = {
16-
type: 'anchor',
17-
symbol: '$',
18-
encode: encodeAnchor,
8+
export const endOfString: EncodedRegex = {
9+
precedence: 'atom',
10+
pattern: '$',
1911
};
2012

21-
export const wordBoundary: Anchor = {
22-
type: 'anchor',
23-
symbol: '\\b',
24-
encode: encodeAnchor,
13+
export const wordBoundary: EncodedRegex = {
14+
precedence: 'atom',
15+
pattern: '\\b',
2516
};
2617

27-
export const nonWordBoundary: Anchor = {
28-
type: 'anchor',
29-
symbol: '\\B',
30-
encode: encodeAnchor,
18+
export const nonWordBoundary: EncodedRegex = {
19+
precedence: 'atom',
20+
pattern: '\\B',
3121
};
3222

3323
/**
3424
* @deprecated Renamed to `nonWordBoundary`.
3525
*/
3626
export const notWordBoundary = nonWordBoundary;
37-
38-
function encodeAnchor(this: Anchor): EncodeResult {
39-
return {
40-
precedence: 'sequence',
41-
pattern: this.symbol,
42-
};
43-
}

src/constructs/capture.ts

Lines changed: 16 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,5 @@
1-
import { encodeSequence } from '../encoder/encoder';
2-
import type { EncodeResult } from '../encoder/types';
3-
import { ensureArray } from '../utils/elements';
4-
import type { RegexConstruct, RegexElement, RegexSequence } from '../types';
5-
6-
export interface Capture extends RegexConstruct {
7-
type: 'capture';
8-
children: RegexElement[];
9-
options?: CaptureOptions;
10-
}
1+
import { encode } from '../encoder';
2+
import type { EncodedRegex, RegexSequence } from '../types';
113

124
export type CaptureOptions = {
135
/**
@@ -16,8 +8,7 @@ export type CaptureOptions = {
168
name?: string;
179
};
1810

19-
export interface Reference extends RegexConstruct {
20-
type: 'reference';
11+
export interface Reference extends EncodedRegex {
2112
name: string;
2213
}
2314

@@ -26,12 +17,18 @@ export interface Reference extends RegexConstruct {
2617
* - in the match results (`String.match`, `String.matchAll`, or `RegExp.exec`)
2718
* - in the regex itself, through {@link ref}
2819
*/
29-
export function capture(sequence: RegexSequence, options?: CaptureOptions): Capture {
20+
export function capture(sequence: RegexSequence, options?: CaptureOptions): EncodedRegex {
21+
const name = options?.name;
22+
if (name) {
23+
return {
24+
precedence: 'atom',
25+
pattern: `(?<${name}>${encode(sequence).pattern})`,
26+
};
27+
}
28+
3029
return {
31-
type: 'capture',
32-
children: ensureArray(sequence),
33-
options,
34-
encode: encodeCapture,
30+
precedence: 'atom',
31+
pattern: `(${encode(sequence).pattern})`,
3532
};
3633
}
3734

@@ -45,31 +42,9 @@ export function capture(sequence: RegexSequence, options?: CaptureOptions): Capt
4542
* @param name - Name of the capturing group to reference.
4643
*/
4744
export function ref(name: string): Reference {
48-
return {
49-
type: 'reference',
50-
name,
51-
encode: encodeReference,
52-
};
53-
}
54-
55-
function encodeCapture(this: Capture): EncodeResult {
56-
const name = this.options?.name;
57-
if (name) {
58-
return {
59-
precedence: 'atom',
60-
pattern: `(?<${name}>${encodeSequence(this.children).pattern})`,
61-
};
62-
}
63-
6445
return {
6546
precedence: 'atom',
66-
pattern: `(${encodeSequence(this.children).pattern})`,
67-
};
68-
}
69-
70-
function encodeReference(this: Reference): EncodeResult {
71-
return {
72-
precedence: 'atom',
73-
pattern: `\\k<${this.name}>`,
47+
pattern: `\\k<${name}>`,
48+
name,
7449
};
7550
}

src/constructs/char-class.ts

Lines changed: 8 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,14 @@
1-
import type { EncodeResult } from '../encoder/types';
2-
import type { RegexConstruct } from '../types';
3-
import type { CharacterEscape } from './char-escape';
4-
5-
/**
6-
* Character range from start to end (inclusive).
7-
*/
8-
export interface CharacterRange {
9-
start: string;
10-
end: string;
11-
}
12-
13-
export interface CharacterClass extends RegexConstruct {
14-
type: 'characterClass';
15-
chars: string[];
16-
ranges?: CharacterRange[];
17-
}
1+
import { encodeCharClass } from '../encoder';
2+
import type { CharacterClass, CharacterEscape, EncodedRegex } from '../types';
183

194
export function charClass(...elements: Array<CharacterClass | CharacterEscape>): CharacterClass {
5+
if (!elements.length) {
6+
throw new Error('`charClass` should receive at least one element');
7+
}
8+
209
return {
21-
type: 'characterClass',
2210
chars: elements.map((c) => c.chars).flat(),
2311
ranges: elements.map((c) => c.ranges ?? []).flat(),
24-
encode: encodeCharacterClass,
2512
};
2613
}
2714

@@ -39,10 +26,8 @@ export function charRange(start: string, end: string): CharacterClass {
3926
}
4027

4128
return {
42-
type: 'characterClass',
4329
chars: [],
4430
ranges: [{ start, end }],
45-
encode: encodeCharacterClass,
4631
};
4732
}
4833

@@ -54,47 +39,19 @@ export function anyOf(characters: string): CharacterClass {
5439
}
5540

5641
return {
57-
type: 'characterClass',
5842
chars,
59-
encode: encodeCharacterClass,
6043
};
6144
}
6245

63-
export function negated(element: CharacterClass | CharacterEscape): EncodeResult {
64-
return encodeCharacterClass.call(element, true);
46+
export function negated(element: CharacterClass | CharacterEscape): EncodedRegex {
47+
return encodeCharClass(element, true);
6548
}
6649

6750
/**
6851
* @deprecated Renamed to `negated`.
6952
*/
7053
export const inverted = negated;
7154

72-
export function encodeCharacterClass(
73-
this: CharacterClass | CharacterEscape,
74-
isNegated?: boolean,
75-
): EncodeResult {
76-
if (!this.chars.length && !this.ranges?.length) {
77-
throw new Error('Character class should contain at least one character or character range');
78-
}
79-
80-
// If passed characters includes hyphen (`-`) it need to be moved to
81-
// first (or last) place in order to treat it as hyphen character and not a range.
82-
// See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes#types
83-
const hyphen = this.chars.includes('-') ? '-' : '';
84-
const caret = this.chars.includes('^') ? '^' : '';
85-
const otherChars = this.chars.filter((c) => c !== '-' && c !== '^').join('');
86-
const ranges = this.ranges?.map(({ start, end }) => `${start}-${end}`).join('') ?? '';
87-
const negation = isNegated ? '^' : '';
88-
89-
let pattern = `[${negation}${ranges}${otherChars}${caret}${hyphen}]`;
90-
if (pattern === '[^-]') pattern = '[\\^-]';
91-
92-
return {
93-
precedence: 'atom',
94-
pattern,
95-
};
96-
}
97-
9855
function escapeForCharacterClass(text: string): string {
9956
return text.replace(/[\]\\]/g, '\\$&'); // $& means the whole matched string
10057
}

src/constructs/char-escape.ts

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,10 @@
1-
import type { EncodeResult } from '../encoder/types';
2-
3-
export interface CharacterEscape extends EncodeResult {
4-
kind: 'escape';
5-
6-
// `CharacterClass` compatibility
7-
chars: string[];
8-
ranges?: never;
9-
}
1+
import type { CharacterEscape, EncodedRegex } from '../types';
102

113
/**
124
* Matches any single character.
135
* Specifically this one is NOT a character escape.
146
*/
15-
export const any: EncodeResult = {
7+
export const any: EncodedRegex = {
168
precedence: 'atom',
179
pattern: '.',
1810
};
@@ -21,42 +13,36 @@ export const digit: CharacterEscape = {
2113
precedence: 'atom',
2214
pattern: '\\d',
2315
chars: ['\\d'],
24-
kind: 'escape',
2516
};
2617

2718
export const nonDigit: CharacterEscape = {
2819
precedence: 'atom',
2920
pattern: '\\D',
3021
chars: ['\\D'],
31-
kind: 'escape',
3222
};
3323

3424
export const word: CharacterEscape = {
3525
precedence: 'atom',
3626
pattern: '\\w',
3727
chars: ['\\w'],
38-
kind: 'escape',
3928
};
4029

4130
export const nonWord: CharacterEscape = {
4231
precedence: 'atom',
4332
pattern: '\\W',
4433
chars: ['\\W'],
45-
kind: 'escape',
4634
};
4735

4836
export const whitespace: CharacterEscape = {
4937
precedence: 'atom',
5038
pattern: '\\s',
5139
chars: ['\\s'],
52-
kind: 'escape',
5340
};
5441

5542
export const nonWhitespace: CharacterEscape = {
5643
precedence: 'atom',
5744
pattern: '\\S',
5845
chars: ['\\S'],
59-
kind: 'escape',
6046
};
6147

6248
/**

0 commit comments

Comments
 (0)