Skip to content

Commit e46fc8b

Browse files
committed
feat: support for lookarounds and non-capture groups
1 parent 1ab6f1a commit e46fc8b

12 files changed

+448
-5
lines changed

docs/API.md

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,65 @@ Regex syntax: `(...)`.
5858

5959
Captures, also known as capturing groups, extract and store parts of the matched string for later use.
6060

61+
### `nonCaptureGroup()`
62+
63+
```ts
64+
function nonCaptureGroup(
65+
sequence: RegexSequence
66+
): NonCaptureGroup
67+
```
68+
69+
Regex syntax: `(?:...)`.
70+
71+
Groups multiple tokens together without creating a capture group.
72+
73+
### `positiveLookahead()`
74+
75+
```ts
76+
function positiveLookahead(
77+
sequence: RegexSequence
78+
): PositiveLookahead
79+
```
80+
81+
Regex syntax: `(?=...)`.
82+
83+
Matches a group after the main expression without including it in the result.
84+
85+
### `negativeLookahead()`
86+
87+
```ts
88+
function negativeLookahead(
89+
sequence: RegexSequence
90+
): NegativeLookahead
91+
```
92+
93+
Regex syntax: `(?!...)`.
94+
95+
Specifies a group that can not match after the main expression (if it matches, the result is discarded).
96+
97+
### `positiveLookbehind()`
98+
99+
```ts
100+
function positiveLookahead(
101+
sequence: RegexSequence
102+
): PositiveLookahead
103+
```
104+
105+
Regex syntax: `(?<=...)`.
106+
107+
Matches a group before the main expression without including it in the result.
108+
109+
### `negativeLookbehind()`
110+
111+
```ts
112+
function negativeLookahead(
113+
sequence: RegexSequence
114+
): NegativeLookahead
115+
```
116+
117+
Regex syntax: `(?<!...)`.
118+
119+
Specifies a group that can not match before the main expression (if it matches, the result is discarded).
61120
### `choiceOf()`
62121

63122
```ts
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import { negativeLookahead } from '../negative-lookahead';
2+
import { oneOrMore, zeroOrMore } from '../quantifiers';
3+
import { anyOf, digit } from '../character-class';
4+
import { capture } from '../capture';
5+
6+
test('`Negative Lookahead` base cases', () => {
7+
expect(negativeLookahead('a')).toEqualRegex(/(?!a)/);
8+
expect(negativeLookahead('abc')).toEqualRegex(/(?!abc)/);
9+
expect(negativeLookahead(oneOrMore('abc'))).toEqualRegex(/(?!(?:abc)+)/);
10+
expect(oneOrMore(negativeLookahead('abc'))).toEqualRegex(/(?!abc)+/);
11+
});
12+
13+
test('`Negative Lookahead` use cases', () => {
14+
expect([negativeLookahead('$'), oneOrMore(digit)]).toMatchString('1 turkey costs 30$');
15+
expect([negativeLookahead('a'), 'b']).toMatchString('abba');
16+
expect(['a', negativeLookahead(capture('bba'))]).not.toMatchGroups('abba', ['a', 'bba']);
17+
expect([negativeLookahead('-'), anyOf('+-'), zeroOrMore(digit)]).not.toMatchString('-123');
18+
expect([negativeLookahead('-'), anyOf('+-'), zeroOrMore(digit)]).toMatchString('+123');
19+
});
20+
21+
test('`Negative Lookahead` with multiple elements', () => {
22+
expect(negativeLookahead(['abc', 'def'])).toEqualRegex(/(?!abcdef)/);
23+
expect(negativeLookahead([oneOrMore('abc'), 'def'])).toEqualRegex(/(?!(?:abc)+def)/);
24+
expect(negativeLookahead(['abc', oneOrMore('def')])).toEqualRegex(/(?!abc(?:def)+)/);
25+
});
26+
27+
test('`Negative Lookahead` with special characters', () => {
28+
expect(negativeLookahead(['$', '+'])).toEqualRegex(/(?!\$\+)/);
29+
expect(negativeLookahead(['[', ']'])).toEqualRegex(/(?!\[\])/);
30+
expect(negativeLookahead(['\\', '\\'])).toEqualRegex(/(?!\\\\)/);
31+
});
32+
33+
test('`Negative Lookahead` with quantifiers', () => {
34+
expect(negativeLookahead(zeroOrMore('abc'))).toEqualRegex(/(?!(?:abc)*)/);
35+
expect(negativeLookahead(oneOrMore('abc'))).toEqualRegex(/(?!(?:abc)+)/);
36+
expect(negativeLookahead(['abc', zeroOrMore('def')])).toEqualRegex(/(?!abc(?:def)*)/);
37+
});
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import { negativeLookbehind } from '../negative-lookbehind';
2+
import { oneOrMore } from '../quantifiers';
3+
4+
test('`Negative Lookbehind` with single character', () => {
5+
expect(negativeLookbehind('a')).toEqualRegex(/(?<!a)/);
6+
expect(negativeLookbehind('b')).toEqualRegex(/(?<!b)/);
7+
expect(negativeLookbehind('c')).toEqualRegex(/(?<!c)/);
8+
});
9+
10+
test('`Negative Lookbehind` with multiple characters', () => {
11+
expect(negativeLookbehind('abc')).toEqualRegex(/(?<!abc)/);
12+
expect(negativeLookbehind('def')).toEqualRegex(/(?<!def)/);
13+
expect(negativeLookbehind('xyz')).toEqualRegex(/(?<!xyz)/);
14+
});
15+
16+
test('`Negative Lookbehind` with quantifiers', () => {
17+
expect(negativeLookbehind(oneOrMore('abc'))).toEqualRegex(/(?<!(?:abc)+)/);
18+
expect(negativeLookbehind(oneOrMore('def'))).toEqualRegex(/(?<!(?:def)+)/);
19+
expect(negativeLookbehind(oneOrMore('xyz'))).toEqualRegex(/(?<!(?:xyz)+)/);
20+
});
21+
22+
test('`Negative Lookbehind` with special characters', () => {
23+
expect(negativeLookbehind('-')).toEqualRegex(/(?<!-)/);
24+
expect(negativeLookbehind('$')).toEqualRegex(/(?<!\$)/);
25+
expect(negativeLookbehind('@')).toEqualRegex(/(?<!@)/);
26+
});
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import { nonCaptureGroup } from '../non-capture-group';
2+
import { oneOrMore } from '../quantifiers';
3+
4+
test('`NonCaptureGroup` base cases', () => {
5+
expect(nonCaptureGroup('a')).toEqualRegex(/(:a)/);
6+
expect(nonCaptureGroup('abc')).toEqualRegex(/(:abc)/);
7+
expect(nonCaptureGroup(oneOrMore('abc'))).toEqualRegex(/(:(?:abc)+)/);
8+
expect(oneOrMore(nonCaptureGroup('abc'))).toEqualRegex(/(:abc)+/);
9+
});
10+
11+
test('`NonCaptureGroup` does not capture group', () => {
12+
expect(nonCaptureGroup('b')).not.toMatchGroups('ab', ['b']);
13+
expect(['a', nonCaptureGroup('b')]).not.toMatchGroups('ab', ['ab']);
14+
expect(['a', nonCaptureGroup('b'), nonCaptureGroup('c')]).not.toMatchGroups('abc', ['abc']);
15+
});
16+
17+
test('`NonCaptureGroup` with special characters', () => {
18+
expect(nonCaptureGroup('$')).toEqualRegex(/(:\$)/);
19+
expect(nonCaptureGroup('+')).toEqualRegex(/(:\+)/);
20+
expect(nonCaptureGroup('\\')).toEqualRegex(/(:\\)/);
21+
});
22+
23+
test('`NonCaptureGroup` with quantifiers', () => {
24+
expect(nonCaptureGroup(oneOrMore('abc'))).toEqualRegex(/(:(?:abc)+)/);
25+
expect(nonCaptureGroup(oneOrMore('def'))).toEqualRegex(/(:(?:def)+)/);
26+
expect(nonCaptureGroup(oneOrMore('xyz'))).toEqualRegex(/(:(?:xyz)+)/);
27+
});
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import { positiveLookahead } from '../positive-lookahead';
2+
import { capture } from '../capture';
3+
import { oneOrMore, zeroOrMore } from '../quantifiers';
4+
import { digit, word } from '../character-class';
5+
6+
test('`Positive Lookahead` base cases', () => {
7+
expect(positiveLookahead('a')).toEqualRegex(/(?=a)/);
8+
expect([digit, positiveLookahead('abc')]).toEqualRegex(/\d(?=abc)/);
9+
expect(positiveLookahead(oneOrMore('abc'))).toEqualRegex(/(?=(?:abc)+)/);
10+
expect([zeroOrMore(word), positiveLookahead('abc')]).toEqualRegex(/\w*(?=abc)/);
11+
});
12+
13+
test('`Positive Lookahead` use cases', () => {
14+
expect([oneOrMore(digit), positiveLookahead('$')]).toMatchString('1 turkey costs 30$');
15+
expect(['q', positiveLookahead('u')]).toMatchString('queen');
16+
expect(['a', positiveLookahead('b'), positiveLookahead('c')]).not.toMatchString('abc');
17+
expect(['a', positiveLookahead(capture('bba'))]).toMatchGroups('abba', ['a', 'bba']);
18+
});
19+
20+
test('`Positive Lookahead` with multiple elements', () => {
21+
expect(positiveLookahead(['a', 'b', 'c'])).toEqualRegex(/(?=abc)/);
22+
});
23+
24+
test('`Positive Lookahead` with nested constructs', () => {
25+
expect(positiveLookahead(oneOrMore(capture('abc')))).toEqualRegex(/(?=(abc)+)/);
26+
expect(positiveLookahead([zeroOrMore(word), capture('abc')])).toEqualRegex(/(?=\w*(abc))/);
27+
});
28+
29+
test('`Positive Lookahead` with special characters', () => {
30+
expect(positiveLookahead(['$', capture('abc')])).toEqualRegex(/(?=\$(abc))/);
31+
expect(positiveLookahead(['q', capture('u')])).toEqualRegex(/(?=q(u))/);
32+
});
33+
34+
test('`Positive Lookahead` with capture group', () => {
35+
expect(positiveLookahead(capture('bba'))).toEqualRegex(/(?=(bba))/);
36+
});
37+
38+
test('`Positive Lookahead` with digit character class', () => {
39+
expect(positiveLookahead([digit, 'abc'])).toEqualRegex(/(?=\dabc)/);
40+
});
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import { positiveLookbehind } from '../positive-lookbehind';
2+
import { oneOrMore, zeroOrMore } from '../quantifiers';
3+
import { anyOf, digit, whitespace, word } from '../character-class';
4+
5+
test('`Positive Lookbehind` base cases', () => {
6+
expect(positiveLookbehind('a')).toEqualRegex(/(?<=a)/);
7+
expect(positiveLookbehind('abc')).toEqualRegex(/(?<=abc)/);
8+
expect(positiveLookbehind(oneOrMore('abc'))).toEqualRegex(/(?<=(?:abc)+)/);
9+
expect(positiveLookbehind('abc')).toEqualRegex(/(?<=abc)/);
10+
});
11+
12+
test('`Positve Lookbehind` use cases', () => {
13+
expect([
14+
zeroOrMore(whitespace),
15+
word,
16+
positiveLookbehind('s'),
17+
oneOrMore(whitespace),
18+
]).toMatchString('too many cats to feed.');
19+
20+
expect([positiveLookbehind('USD'), zeroOrMore(whitespace), oneOrMore(digit)]).toMatchString(
21+
'The price is USD 30',
22+
);
23+
24+
expect([positiveLookbehind('USD'), zeroOrMore(whitespace), oneOrMore(digit)]).not.toMatchString(
25+
'The price is CDN 30',
26+
);
27+
28+
expect([positiveLookbehind('a'), 'b']).toMatchString('abba');
29+
30+
const mjsImport = [positiveLookbehind('.mjs')];
31+
expect(mjsImport).toMatchString("import {Person} from './person.mjs';");
32+
expect(mjsImport).not.toMatchString("import {Person} from './person.js';");
33+
expect([anyOf('+-'), oneOrMore(digit), positiveLookbehind('-')]).not.toMatchString('+123');
34+
});
35+
36+
test('`Positive Lookbehind` with multiple elements', () => {
37+
expect(positiveLookbehind(['abc', 'def'])).toEqualRegex(/(?<=abcdef)/);
38+
expect(positiveLookbehind([oneOrMore('abc'), 'def'])).toEqualRegex(/(?<=(?:abc)+def)/);
39+
expect(positiveLookbehind(['abc', oneOrMore('def')])).toEqualRegex(/(?<=abc(?:def)+)/);
40+
});
41+
42+
test('`Positive Lookbehind` with special characters', () => {
43+
expect(positiveLookbehind(['$', '+'])).toEqualRegex(/(?<=\$\+)/);
44+
expect(positiveLookbehind(['[', ']'])).toEqualRegex(/(?<=\[\])/);
45+
expect(positiveLookbehind(['\\', '\\'])).toEqualRegex(/(?<=\\\\)/);
46+
});
47+
48+
test('`Positive Lookbehind` with quantifiers', () => {
49+
expect(positiveLookbehind(zeroOrMore('abc'))).toEqualRegex(/(?<=(?:abc)*)/);
50+
expect(positiveLookbehind(oneOrMore('abc'))).toEqualRegex(/(?<=(?:abc)+)/);
51+
expect(positiveLookbehind(['abc', zeroOrMore('def')])).toEqualRegex(/(?<=abc(?:def)*)/);
52+
});
53+
54+
test('`Positive Lookbehind` with character classes', () => {
55+
expect(positiveLookbehind(word)).toEqualRegex(/(?<=\w)/);
56+
expect(positiveLookbehind(whitespace)).toEqualRegex(/(?<=\s)/);
57+
expect(positiveLookbehind(digit)).toEqualRegex(/(?<=\d)/);
58+
expect(positiveLookbehind(anyOf('abc'))).toEqualRegex(/(?<=[abc])/);
59+
});

src/constructs/negative-lookahead.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import { encodeSequence } from '../encoder/encoder';
2+
import type { EncodeResult } from '../encoder/types';
3+
import { ensureArray } from '../utils/elements';
4+
import type { RegexConstruct, RegexElement, RegexSequence } from '../types';
5+
6+
/**
7+
* Negative lookahead assertion.
8+
*
9+
* A negative lookahead assertion is a zero-width assertion that matches a group of characters only if it is not followed by a specific group of characters.
10+
*
11+
* @example
12+
* ```ts
13+
* negativeLookahead("a");
14+
* // /(?=a)/
15+
*
16+
* negativeLookahead(["a", "b", "c"]);
17+
* // /(?=abc)/
18+
* ```
19+
*/
20+
export interface NegativeLookahead extends RegexConstruct {
21+
type: 'NegativeLookahead';
22+
children: RegexElement[];
23+
}
24+
25+
export function negativeLookahead(sequence: RegexSequence): NegativeLookahead {
26+
return {
27+
type: 'NegativeLookahead',
28+
children: ensureArray(sequence),
29+
encode: encodeNegativeLookahead,
30+
};
31+
}
32+
33+
function encodeNegativeLookahead(this: NegativeLookahead): EncodeResult {
34+
return {
35+
precedence: 'atom',
36+
pattern: `(?!${encodeSequence(this.children).pattern})`,
37+
};
38+
}

src/constructs/negative-lookbehind.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import { encodeSequence } from '../encoder/encoder';
2+
import type { EncodeResult } from '../encoder/types';
3+
import { ensureArray } from '../utils/elements';
4+
import type { RegexConstruct, RegexElement, RegexSequence } from '../types';
5+
6+
/**
7+
* Negative lookbehind assertion.
8+
*
9+
* A negative lookbehind assertion is a zero-width assertion that matches a group of characters only if it is not preceded by a specific group of characters.
10+
*
11+
* @example
12+
* ```ts
13+
* negativeLookbehind("a");
14+
* // /(?<!a)/
15+
*
16+
* negativeLookbehind(["a", "b", "c"]);
17+
* // /(?<!abc)/
18+
* ```
19+
*/
20+
export interface NegativeLookbehind extends RegexConstruct {
21+
type: 'NegativeLookbehind';
22+
children: RegexElement[];
23+
}
24+
25+
export function negativeLookbehind(sequence: RegexSequence): NegativeLookbehind {
26+
return {
27+
type: 'NegativeLookbehind',
28+
children: ensureArray(sequence),
29+
encode: encodeNegativeLookbehind,
30+
};
31+
}
32+
33+
function encodeNegativeLookbehind(this: NegativeLookbehind): EncodeResult {
34+
return {
35+
precedence: 'atom',
36+
pattern: `(?<!${encodeSequence(this.children).pattern})`,
37+
};
38+
}

src/constructs/non-capture-group.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import { encodeSequence } from '../encoder/encoder';
2+
import type { EncodeResult } from '../encoder/types';
3+
import { ensureArray } from '../utils/elements';
4+
import type { RegexConstruct, RegexElement, RegexSequence } from '../types';
5+
6+
/**
7+
* Non-capture group.
8+
*
9+
* A non-capture group is a group that does not capture the matched characters, allowing for more efficient matching.
10+
*
11+
* @example
12+
* ```ts
13+
* nonCaptureGroup("a");
14+
* // /(?:a)/
15+
*
16+
* nonCaptureGroup(["a", "b", "c"]);
17+
* // /(?:abc)/
18+
* ```
19+
*/
20+
export interface NonCaptureGroup extends RegexConstruct {
21+
type: 'NonCaptureGroup';
22+
children: RegexElement[];
23+
}
24+
25+
export function nonCaptureGroup(sequence: RegexSequence): NonCaptureGroup {
26+
return {
27+
type: 'NonCaptureGroup',
28+
children: ensureArray(sequence),
29+
encode: encodeNonCaptureGroup,
30+
};
31+
}
32+
33+
function encodeNonCaptureGroup(this: NonCaptureGroup): EncodeResult {
34+
return {
35+
precedence: 'atom',
36+
pattern: `(:${encodeSequence(this.children).pattern})`,
37+
};
38+
}

0 commit comments

Comments
 (0)