Skip to content

Commit 36b4f30

Browse files
feat: non-greedy quantifiers (#59)
1 parent 477c227 commit 36b4f30

File tree

6 files changed

+95
-26
lines changed

6 files changed

+95
-26
lines changed

README.md

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -109,14 +109,19 @@ See [API document](./docs/API.md).
109109

110110
### Quantifiers
111111

112-
| Quantifier | Regex Syntax | Description |
113-
| -------------------------------- | ------------ | ------------------------------------------------- |
114-
| `zeroOrMore(x)` | `x*` | Zero or more occurence of a pattern |
115-
| `oneOrMore(x)` | `x+` | One or more occurence of a pattern |
116-
| `optional(x)` | `x?` | Zero or one occurence of a pattern |
117-
| `repeat(x, n)` | `x{n}` | Pattern repeats exact number of times |
118-
| `repeat(x, { min: n, })` | `x{n,}` | Pattern repeats at least given number of times |
119-
| `repeat(x, { min: n, max: n2 })` | `x{n1,n2}` | Pattern repeats between n1 and n2 number of times |
112+
| Quantifier | Regex Syntax | Description |
113+
| ----------------------------------------------- | ------------ | -------------------------------------------------------------- |
114+
| `zeroOrMore(x)` | `x*` | Zero or more occurence of a pattern |
115+
| `zeroOrMore(x, { greedy: false })` | `x*?` | Zero or more occurence of a pattern (non-greedy) |
116+
| `oneOrMore(x)` | `x+` | One or more occurence of a pattern |
117+
| `oneOrMore(x, { greedy: false })` | `x+?` | One or more occurence of a pattern (non-greedy) |
118+
| `optional(x)` | `x?` | Zero or one occurence of a pattern |
119+
| `optional(x, { greedy: false })` | `x??` | Zero or one occurence of a pattern (non-greedy) |
120+
| `repeat(x, n)` | `x{n}` | Pattern repeats exact number of times |
121+
| `repeat(x, { min: n, })` | `x{n,}` | Pattern repeats at least given number of times |
122+
| `repeat(x, { min: n, greedy: false })` | `x{n,}?` | Pattern repeats at least given number of times (non-greedy) |
123+
| `repeat(x, { min: n, max: n2 })` | `x{n1,n2}` | Pattern repeats between n1 and n2 number of times |
124+
| `repeat(x, { min: n, max: n2, greedy: false })` | `x{n1,n2}?` | Pattern repeats between n1 and n2 number of times (non-greedy) |
120125

121126
### Character classes
122127

docs/API.md

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,15 @@ Quantifiers in regex define the number of occurrences to match for a pattern.
8181
```ts
8282
function zeroOrMore(
8383
sequence: RegexSequence,
84+
options?: {
85+
greedy?: boolean, // default=true
86+
}
8487
): ZeroOrMore
8588
```
8689

87-
Regex syntax: `x*`;
90+
Regex syntax:
91+
* `x*` for default greedy behavior (match as many characters as possible)
92+
* `x*?` for non-greedy behavior (match as few characters as possible)
8893

8994
The `zeroOrMore` quantifier matches zero or more occurrences of a given pattern, allowing a flexible number of repetitions of that element.
9095

@@ -93,10 +98,15 @@ The `zeroOrMore` quantifier matches zero or more occurrences of a given pattern,
9398
```ts
9499
function oneOrMore(
95100
sequence: RegexSequence,
101+
options?: {
102+
greedy?: boolean, // default=true
103+
}
96104
): OneOrMore
97105
```
98106

99-
Regex syntax: `x+`;
107+
Regex syntax:
108+
* `x+` for default greedy behavior (match as many characters as possible)
109+
* `x+?` for non-greedy behavior (match as few characters as possible)
100110

101111
The `oneOrMore` quantifier matches one or more occurrences of a given pattern, allowing a flexible number of repetitions of that element.
102112

@@ -105,10 +115,15 @@ The `oneOrMore` quantifier matches one or more occurrences of a given pattern, a
105115
```ts
106116
function optional(
107117
sequence: RegexSequence,
118+
options?: {
119+
greedy?: boolean, // default=true
120+
}
108121
): Optionally
109122
```
110123

111-
Regex syntax: `x?`;
124+
Regex syntax:
125+
* `x?` for default greedy behavior (match as many characters as possible)
126+
* `x??` for non-greedy behavior (match as few characters as possible)
112127

113128
The `optional` quantifier matches zero or one occurrence of a given pattern, making it optional.
114129

@@ -117,11 +132,17 @@ The `optional` quantifier matches zero or one occurrence of a given pattern, mak
117132
```ts
118133
function repeat(
119134
sequence: RegexSequence,
120-
count: number | { min: number; max?: number },
135+
options: number | {
136+
min: number;
137+
max?: number;
138+
greedy?: boolean; // default=true
139+
},
121140
): Repeat
122141
```
123142

124-
Regex syntax: `x{n}`, `x{min,}`, `x{min, max}`.
143+
Regex syntax:
144+
* `x{n}`, `x{min,}`, `x{min, max}` for default greedy behavior (match as many characters as possible)
145+
* `x{min,}?`, `x{min, max}?` for non-greedy behavior (match as few characters as possible)
125146

126147
The `repeat` quantifier in regex matches either exactly `count` times or between `min` and `max` times. If only `min` is provided, it matches at least `min` times.
127148

src/constructs/__tests__/quantifiers.test.tsx

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,25 @@ test('base quantifiers optimize grouping for atoms', () => {
3737
expect(optional('a')).toEqualRegex(/a?/);
3838
expect(zeroOrMore('a')).toEqualRegex(/a*/);
3939
});
40+
41+
test('greedy quantifiers', () => {
42+
expect(oneOrMore('a', { greedy: true })).toEqualRegex(/a+/);
43+
expect(oneOrMore('ab', { greedy: true })).toEqualRegex(/(?:ab)+/);
44+
45+
expect(optional('a', { greedy: true })).toEqualRegex(/a?/);
46+
expect(optional('ab', { greedy: true })).toEqualRegex(/(?:ab)?/);
47+
48+
expect(zeroOrMore('a', { greedy: true })).toEqualRegex(/a*/);
49+
expect(zeroOrMore('ab', { greedy: true })).toEqualRegex(/(?:ab)*/);
50+
});
51+
52+
test('non-greedy quantifiers', () => {
53+
expect(oneOrMore('a', { greedy: false })).toEqualRegex(/a+?/);
54+
expect(oneOrMore('ab', { greedy: false })).toEqualRegex(/(?:ab)+?/);
55+
56+
expect(optional('a', { greedy: false })).toEqualRegex(/a??/);
57+
expect(optional('ab', { greedy: false })).toEqualRegex(/(?:ab)??/);
58+
59+
expect(zeroOrMore('a', { greedy: false })).toEqualRegex(/a*?/);
60+
expect(zeroOrMore('ab', { greedy: false })).toEqualRegex(/(?:ab)*?/);
61+
});

src/constructs/__tests__/repeat.test.tsx

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,13 @@ test('`repeat` throws on no children', () => {
2222
`"\`repeat\` should receive at least one element"`,
2323
);
2424
});
25+
26+
test('greedy `repeat` quantifier', () => {
27+
expect(repeat('a', { min: 1, greedy: true })).toEqualRegex(/a{1,}/);
28+
expect(repeat('a', { min: 1, max: 5, greedy: true })).toEqualRegex(/a{1,5}/);
29+
});
30+
31+
test('non-greedy `repeat` quantifier', () => {
32+
expect(repeat('a', { min: 1, greedy: false })).toEqualRegex(/a{1,}?/);
33+
expect(repeat('a', { min: 1, max: 5, greedy: false })).toEqualRegex(/a{1,5}?/);
34+
});

src/constructs/quantifiers.ts

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,62 +3,71 @@ import type { EncodeResult } from '../encoder/types';
33
import { ensureArray } from '../utils/elements';
44
import type { RegexConstruct, RegexElement, RegexSequence } from '../types';
55

6+
export interface QuantifierOptions {
7+
greedy?: boolean;
8+
}
69
export interface ZeroOrMore extends RegexConstruct {
710
type: 'zeroOrMore';
811
children: RegexElement[];
12+
options?: QuantifierOptions;
913
}
1014

1115
export interface OneOrMore extends RegexConstruct {
1216
type: 'oneOrMore';
1317
children: RegexElement[];
18+
options?: QuantifierOptions;
1419
}
1520

1621
export interface Optional extends RegexConstruct {
1722
type: 'optional';
1823
children: RegexElement[];
24+
options?: QuantifierOptions;
1925
}
2026

21-
export function zeroOrMore(sequence: RegexSequence): ZeroOrMore {
27+
export function zeroOrMore(sequence: RegexSequence, options?: QuantifierOptions): ZeroOrMore {
2228
return {
2329
type: 'zeroOrMore',
2430
children: ensureArray(sequence),
31+
options,
2532
encode: encodeZeroOrMore,
2633
};
2734
}
2835

29-
export function oneOrMore(sequence: RegexSequence): OneOrMore {
36+
export function oneOrMore(sequence: RegexSequence, options?: QuantifierOptions): OneOrMore {
3037
return {
3138
type: 'oneOrMore',
3239
children: ensureArray(sequence),
40+
options,
3341
encode: encodeOneOrMore,
3442
};
3543
}
3644

37-
export function optional(sequence: RegexSequence): Optional {
45+
export function optional(sequence: RegexSequence, options?: QuantifierOptions): Optional {
3846
return {
3947
type: 'optional',
4048
children: ensureArray(sequence),
49+
options,
4150
encode: encodeOptional,
4251
};
4352
}
4453

4554
function encodeZeroOrMore(this: ZeroOrMore): EncodeResult {
4655
return {
4756
precedence: 'sequence',
48-
pattern: `${encodeAtom(this.children).pattern}*`,
57+
pattern: `${encodeAtom(this.children).pattern}*${this.options?.greedy === false ? '?' : ''}`,
4958
};
5059
}
5160

5261
function encodeOneOrMore(this: OneOrMore): EncodeResult {
5362
return {
5463
precedence: 'sequence',
55-
pattern: `${encodeAtom(this.children).pattern}+`,
64+
pattern: `${encodeAtom(this.children).pattern}+${this.options?.greedy === false ? '?' : ''}`,
5665
};
5766
}
5867

5968
function encodeOptional(this: Optional): EncodeResult {
6069
return {
6170
precedence: 'sequence',
62-
pattern: `${encodeAtom(this.children).pattern}?`,
71+
pattern: `${encodeAtom(this.children).pattern}?${this.options?.greedy === false ? '?' : ''}`,
6372
};
6473
}

src/constructs/repeat.ts

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@ import type { RegexConstruct, RegexElement, RegexSequence } from '../types';
55

66
export interface Repeat extends RegexConstruct {
77
type: 'repeat';
8-
count: RepeatCount;
98
children: RegexElement[];
9+
options: RepeatOptions;
1010
}
1111

12-
export type RepeatCount = number | { min: number; max?: number };
12+
export type RepeatOptions = number | { min: number; max?: number; greedy?: boolean };
1313

14-
export function repeat(sequence: RegexSequence, count: RepeatCount): Repeat {
14+
export function repeat(sequence: RegexSequence, options: RepeatOptions): Repeat {
1515
const children = ensureArray(sequence);
1616

1717
if (children.length === 0) {
@@ -21,23 +21,25 @@ export function repeat(sequence: RegexSequence, count: RepeatCount): Repeat {
2121
return {
2222
type: 'repeat',
2323
children,
24-
count: count,
24+
options,
2525
encode: encodeRepeat,
2626
};
2727
}
2828

2929
function encodeRepeat(this: Repeat): EncodeResult {
3030
const atomicNodes = encodeAtom(this.children);
3131

32-
if (typeof this.count === 'number') {
32+
if (typeof this.options === 'number') {
3333
return {
3434
precedence: 'sequence',
35-
pattern: `${atomicNodes.pattern}{${this.count}}`,
35+
pattern: `${atomicNodes.pattern}{${this.options}}`,
3636
};
3737
}
3838

3939
return {
4040
precedence: 'sequence',
41-
pattern: `${atomicNodes.pattern}{${this.count.min},${this.count?.max ?? ''}}`,
41+
pattern: `${atomicNodes.pattern}{${this.options.min},${this.options?.max ?? ''}}${
42+
this.options.greedy === false ? '?' : ''
43+
}`,
4244
};
4345
}

0 commit comments

Comments
 (0)