Skip to content

Commit 71ad0b5

Browse files
feat: mix with regexp object (#58)
1 parent d4a8285 commit 71ad0b5

File tree

5 files changed

+63
-5
lines changed

5 files changed

+63
-5
lines changed

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,15 +67,16 @@ TS Regex Builder allows you to build complex regular expressions using domain-sp
6767

6868
Terminology:
6969
- regex construct (`RegexConstruct`) - common name for all regex constructs like character classes, quantifiers, and anchors.
70-
- regex element (`RegexElement`) - a fundamental building block of a regular expression, defined as either a regex construct or a string.
70+
- regex element (`RegexElement`) - a fundamental building block of a regular expression, defined as either a regex construct, a string, or `RegExp` literal (`/.../`).
7171
- regex sequence (`RegexSequence`) - a sequence of regex elements forming a regular expression. For developer convenience, it also accepts a single element instead of an array.
7272

7373
Most of the regex constructs accept a regex sequence as their argument.
7474

7575
Examples of sequences:
76-
- single element (construct): `capture('abc')`
76+
- single element (construct): `capture('Hello')`
7777
- single element (string): `'Hello'`
78-
- array of elements: `['USD', oneOrMore(digit)]`
78+
- single element (`RegExp` literal): `/Hello/`
79+
- array of elements: `['USD', oneOrMore(digit), /Hello/]`
7980

8081
Regex constructs can be composed into a tree structure:
8182

docs/API.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ The sequence of regex elements forming a regular expression. For developer conve
88

99
### `RegexElement`
1010

11-
Fundamental building blocks of a regular expression, defined as either a regex construct or a string.
11+
Fundamental building blocks of a regular expression, defined as either a regex construct, a string, or a `RegExp` literal (`/.../`).
1212

1313
### `RegexConstruct`
1414

src/encoder/__tests__/encoder.test.tsx

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import { buildPattern, buildRegExp } from '../../builders';
2+
import { capture } from '../../constructs/capture';
3+
import { choiceOf } from '../../constructs/choice-of';
24
import { oneOrMore, optional, zeroOrMore } from '../../constructs/quantifiers';
35
import { repeat } from '../../constructs/repeat';
46

@@ -43,6 +45,26 @@ test('`buildPattern` escapes special characters', () => {
4345
expect([oneOrMore('.*'), zeroOrMore('[]{}')]).toEqualRegex(/(?:\.\*)+(?:\[\]\{\})*/);
4446
});
4547

48+
test('`buildRegExp` accepts RegExp object', () => {
49+
expect(buildRegExp(/abc/)).toEqual(/abc/);
50+
expect(buildRegExp(oneOrMore(/abc/))).toEqual(/(?:abc)+/);
51+
expect(buildRegExp(repeat(/abc/, 5))).toEqual(/(?:abc){5}/);
52+
expect(buildRegExp(capture(/abc/))).toEqual(/(abc)/);
53+
expect(buildRegExp(choiceOf(/a/, /b/))).toEqual(/a|b/);
54+
expect(buildRegExp(choiceOf(/a|b/, /c/))).toEqual(/a|b|c/);
55+
});
56+
57+
test('`buildRegExp` detects common atomic patterns', () => {
58+
expect(buildRegExp(/a/)).toEqual(/a/);
59+
expect(buildRegExp(/[a-z]/)).toEqual(/[a-z]/);
60+
expect(buildRegExp(/(abc)/)).toEqual(/(abc)/);
61+
expect(buildRegExp(oneOrMore(/a/))).toEqual(/a+/);
62+
expect(buildRegExp(oneOrMore(/[a-z]/))).toEqual(/[a-z]+/);
63+
expect(buildRegExp(oneOrMore(/(abc)/))).toEqual(/(abc)+/);
64+
expect(buildRegExp(repeat(/a/, 5))).toEqual(/a{5}/);
65+
expect(buildRegExp(oneOrMore(/(a|b|c)/))).toEqual(/(a|b|c)+/);
66+
});
67+
4668
test('`buildRegExp` throws error on unknown element', () => {
4769
expect(() =>
4870
// @ts-expect-error intentionally passing incorrect object

src/encoder/encoder.ts

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ function encodeNode(element: RegexElement): EncodeResult {
1616
return encodeText(element);
1717
}
1818

19+
if (typeof element === 'object' && element instanceof RegExp) {
20+
return encodeRegExp(element);
21+
}
22+
1923
if (typeof element.encode !== 'function') {
2024
throw new Error(`\`encodeNode\`: unknown element type ${element.type}`);
2125
}
@@ -42,6 +46,37 @@ function encodeText(text: string): EncodeResult {
4246
};
4347
}
4448

49+
function encodeRegExp(regexp: RegExp): EncodeResult {
50+
const pattern = regexp.source;
51+
52+
if (pattern.length === 0) {
53+
throw new Error('`encodeRegExp`: received regexp should not be empty');
54+
}
55+
56+
// Encode at safe precedence
57+
return {
58+
precedence: isAtomicPattern(regexp.source) ? 'atom' : 'disjunction',
59+
pattern,
60+
};
61+
}
62+
63+
// This is intended to catch only some popular atomic patterns like char classes.
64+
function isAtomicPattern(pattern: string): boolean {
65+
if (pattern.length === 1) {
66+
return true;
67+
}
68+
69+
if (pattern.startsWith('[') && pattern.endsWith(']') && pattern.match(/[[\]]/g)?.length === 2) {
70+
return true;
71+
}
72+
73+
if (pattern.startsWith('(') && pattern.endsWith(')') && pattern.match(/[()]/g)?.length === 2) {
74+
return true;
75+
}
76+
77+
return false;
78+
}
79+
4580
function concatSequence(encoded: EncodeResult[]): EncodeResult {
4681
if (encoded.length === 1) {
4782
return encoded[0]!;

src/types.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ export type RegexSequence = RegexElement[] | RegexElement;
1212
/**
1313
* Fundamental building block of a regular expression, defined as either a regex construct or a string.
1414
*/
15-
export type RegexElement = RegexConstruct | string;
15+
export type RegexElement = RegexConstruct | string | RegExp;
1616

1717
/**
1818
* Common interface for all regex constructs like character classes, quantifiers, and anchors.

0 commit comments

Comments
 (0)