Skip to content

fix: character class escaping #35

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Dec 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ root = true
indent_style = space
indent_size = 2

max_line_length = 100

end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
Expand Down
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,15 @@ const hexDigit = characterClass(
characterRange('0', '9')
);

// prettier-ignore
const hexColor = buildRegex(
startOfString,
optionally('#'),
capture(
choiceOf(repeat({ count: 6 }, hexDigit), repeat({ count: 3 }, hexDigit))
choiceOf(
repeat({ count: 6 }, hexDigit),
repeat({ count: 3 }, hexDigit)
)
),
endOfString
);
Expand All @@ -44,6 +48,10 @@ import { buildRegex, capture, oneOrMore } from 'ts-regex-builder';
const regex = buildRegex(['Hello ', capture(oneOrMore(word))]);
```

## Examples

See [Examples document](./docs/Examples.md).

## Contributing

See the [contributing guide](CONTRIBUTING.md) to learn how to contribute to the repository and the development workflow.
Expand Down
41 changes: 41 additions & 0 deletions docs/Examples.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Regex Examples

## IPv4 address validation

```ts
// Match integers from 0-255
const octet = choiceOf(
[digit],
[characterRange('1', '9'), digit],
['1', repeat({ count: 2 }, digit)],
['2', characterRange('0', '4'), digit],
['25', characterRange('0', '5')]
);

// Match
const regex = buildRegex([
startOfString,
capture(octet),
'.',
capture(octet),
'.',
capture(octet),
'.',
capture(octet),
endOfString,
]);
```

This code generates the following regex pattern:

```ts
const regex =
/^(\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])$/;
```

This pattern uses repetition of the `capture(octet)` elements to generate capture groups for each of the IPv4 octets:

```ts
// Matched groups ['192.168.0.1', '192', '168', '0', '1',]
const match = regex.exec('192.168.0.1');
```
4 changes: 1 addition & 3 deletions src/__tests__/builder.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,5 @@ test('`regexBuilder` flags', () => {
expect(buildRegex({ sticky: true }, 'a').flags).toBe('y');
expect(buildRegex({ sticky: false }, 'a').flags).toBe('');

expect(
buildRegex({ global: true, ignoreCase: true, multiline: false }, 'a').flags
).toBe('gi');
expect(buildRegex({ global: true, ignoreCase: true, multiline: false }, 'a').flags).toBe('gi');
});
49 changes: 49 additions & 0 deletions src/__tests__/examples.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import {
buildRegex,
capture,
characterRange,
choiceOf,
digit,
endOfString,
repeat,
startOfString,
} from '../index';

test('example: IPv4 address validator', () => {
const octet = choiceOf(
[digit],
[characterRange('1', '9'), digit],
['1', repeat({ count: 2 }, digit)],
['2', characterRange('0', '4'), digit],
['25', characterRange('0', '5')]
);

const regex = buildRegex([
startOfString,
capture(octet),
'.',
capture(octet),
'.',
capture(octet),
'.',
capture(octet),
endOfString,
]);

expect(regex).toMatchGroups('0.0.0.0', ['0.0.0.0', '0', '0', '0', '0']);
expect(regex).toMatchGroups('192.168.0.1', ['192.168.0.1', '192', '168', '0', '1']);
expect(regex).toMatchGroups('1.99.100.249', ['1.99.100.249', '1', '99', '100', '249']);
expect(regex).toMatchGroups('255.255.255.255', ['255.255.255.255', '255', '255', '255', '255']);
expect(regex).toMatchGroups('123.45.67.89', ['123.45.67.89', '123', '45', '67', '89']);

expect(regex.test('0.0.0.')).toBe(false);
expect(regex.test('0.0.0.0.')).toBe(false);
expect(regex.test('0.-1.0.0')).toBe(false);
expect(regex.test('0.1000.0.0')).toBe(false);
expect(regex.test('0.0.300.0')).toBe(false);
expect(regex.test('255.255.255.256')).toBe(false);

expect(regex.source).toEqual(
'^(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])$'
);
});
10 changes: 2 additions & 8 deletions src/builders.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,13 @@ export function buildRegex(elements: RegexNode | RegexNode[]): RegExp;
* @param flags RegExp flags object
* @returns RegExp object
*/
export function buildRegex(
flags: RegexFlags,
elements: RegexNode | RegexNode[]
): RegExp;
export function buildRegex(flags: RegexFlags, elements: RegexNode | RegexNode[]): RegExp;

export function buildRegex(first: any, second?: any): RegExp {
return _buildRegex(...optionalFirstArg(first, second));
}

export function _buildRegex(
flags: RegexFlags,
elements: RegexNode | RegexNode[]
): RegExp {
export function _buildRegex(flags: RegexFlags, elements: RegexNode | RegexNode[]): RegExp {
const pattern = encodeSequence(asNodeArray(elements)).pattern;
const flagsString = encodeFlags(flags ?? {});
return new RegExp(pattern, flagsString);
Expand Down
6 changes: 1 addition & 5 deletions src/components/__tests__/capture.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,5 @@ test('`capture` base cases', () => {
test('`capture` captures group', () => {
expect(capture('b')).toMatchGroups('ab', ['b', 'b']);
expect(['a', capture('b')]).toMatchGroups('ab', ['ab', 'b']);
expect(['a', capture('b'), capture('c')]).toMatchGroups('abc', [
'abc',
'b',
'c',
]);
expect(['a', capture('b'), capture('c')]).toMatchGroups('abc', ['abc', 'b', 'c']);
});
24 changes: 10 additions & 14 deletions src/components/__tests__/character-class.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,17 @@ test('`whitespace` character class', () => {

test('`characterClass` base cases', () => {
expect(characterClass(characterRange('a', 'z'))).toHavePattern('[a-z]');
expect(
characterClass(characterRange('a', 'z'), characterRange('A', 'Z'))
).toHavePattern('[a-zA-Z]');
expect(characterClass(characterRange('a', 'z'), anyOf('05'))).toHavePattern(
'[a-z05]'
expect(characterClass(characterRange('a', 'z'), characterRange('A', 'Z'))).toHavePattern(
'[a-zA-Z]'
);
expect(characterClass(characterRange('a', 'z'), anyOf('05'))).toHavePattern('[a-z05]');
expect(characterClass(characterRange('a', 'z'), whitespace, anyOf('05'))).toHavePattern(
'[a-z\\s05]'
);
expect(
characterClass(characterRange('a', 'z'), whitespace, anyOf('05'))
).toHavePattern('[a-z\\s05]');
});

test('`characterClass` throws on inverted arguments', () => {
expect(() =>
characterClass(inverted(whitespace))
).toThrowErrorMatchingInlineSnapshot(
expect(() => characterClass(inverted(whitespace))).toThrowErrorMatchingInlineSnapshot(
`"\`characterClass\` should receive only non-inverted character classes"`
);
});
Expand Down Expand Up @@ -89,11 +85,11 @@ test('`anyOf` with quantifiers', () => {
});

test('`anyOf` escapes special characters', () => {
expect(anyOf('abc-+.')).toHavePattern('[-abc\\+\\.]');
expect(anyOf('abc-+.]\\')).toHavePattern('[abc+.\\]\\\\-]');
});

test('`anyOf` moves hyphen to the first position', () => {
expect(anyOf('a-bc')).toHavePattern('[-abc]');
test('`anyOf` moves hyphen to the last position', () => {
expect(anyOf('a-bc')).toHavePattern('[abc-]');
});

test('`anyOf` throws on empty text', () => {
Expand Down
10 changes: 4 additions & 6 deletions src/components/__tests__/choice-of.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,14 @@ test('`choiceOf` used in sequence', () => {
test('`choiceOf` with sequence options', () => {
expect([choiceOf(['a', 'b'])]).toHavePattern('ab');
expect([choiceOf(['a', 'b'], ['c', 'd'])]).toHavePattern('ab|cd');
expect([
choiceOf(['a', zeroOrMore('b')], [oneOrMore('c'), 'd']),
]).toHavePattern('ab*|c+d');
expect([choiceOf(['a', zeroOrMore('b')], [oneOrMore('c'), 'd'])]).toHavePattern('ab*|c+d');
});

test('`choiceOf` using nested regex', () => {
expect(choiceOf(oneOrMore('a'), zeroOrMore('b'))).toHavePattern('a+|b*');
expect(
choiceOf(repeat({ min: 1, max: 3 }, 'a'), repeat({ count: 5 }, 'bx'))
).toHavePattern('a{1,3}|(?:bx){5}');
expect(choiceOf(repeat({ min: 1, max: 3 }, 'a'), repeat({ count: 5 }, 'bx'))).toHavePattern(
'a{1,3}|(?:bx){5}'
);
});

test('`choiceOf` throws on empty options', () => {
Expand Down
8 changes: 2 additions & 6 deletions src/components/__tests__/repeat.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,8 @@ test('`repeat` quantifier', () => {
expect(['a', repeat({ min: 1 }, 'b')]).toHavePattern('ab{1,}');
expect(['a', repeat({ count: 1 }, 'b')]).toHavePattern('ab{1}');

expect(['a', repeat({ count: 1 }, ['a', zeroOrMore('b')])]).toHavePattern(
'a(?:ab*){1}'
);
expect(repeat({ count: 5 }, ['text', ' ', oneOrMore('d')])).toHavePattern(
'(?:text d+){5}'
);
expect(['a', repeat({ count: 1 }, ['a', zeroOrMore('b')])]).toHavePattern('a(?:ab*){1}');
expect(repeat({ count: 5 }, ['text', ' ', oneOrMore('d')])).toHavePattern('(?:text d+){5}');
});

test('`repeat` optimizes grouping for atoms', () => {
Expand Down
43 changes: 14 additions & 29 deletions src/components/character-class.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import type { EncodeOutput } from '../encoder/types';
import { escapeText } from '../utils/text';

export interface CharacterClass {
type: 'characterClass';
Expand Down Expand Up @@ -52,9 +51,7 @@ export const whitespace: CharacterClass = {
export function characterClass(...elements: CharacterClass[]): CharacterClass {
elements.forEach((element) => {
if (element.isInverted) {
throw new Error(
'`characterClass` should receive only non-inverted character classes'
);
throw new Error('`characterClass` should receive only non-inverted character classes');
}
});

Expand All @@ -69,37 +66,29 @@ export function characterClass(...elements: CharacterClass[]): CharacterClass {

export function characterRange(start: string, end: string): CharacterClass {
if (start.length !== 1) {
throw new Error(
'`characterRange` should receive only single character `start` string'
);
throw new Error('`characterRange` should receive only single character `start` string');
}

if (end.length !== 1) {
throw new Error(
'`characterRange` should receive only single character `end` string'
);
throw new Error('`characterRange` should receive only single character `end` string');
}

if (start > end) {
throw new Error('`start` should be before or equal to `end`');
}

const range = {
start: escapeText(start),
end: escapeText(end),
};

return {
type: 'characterClass',
characters: [],
ranges: [range],
ranges: [{ start, end }],
isInverted: false,
encode: encodeCharacterClass,
};
}

export function anyOf(characters: string): CharacterClass {
const charactersArray = characters.split('').map(escapeText);
const charactersArray = characters.split('').map((c) => escapeForCharacterClass(c));

if (charactersArray.length === 0) {
throw new Error('`anyOf` should received at least one character');
}
Expand All @@ -125,17 +114,11 @@ export function inverted(element: CharacterClass): CharacterClass {

function encodeCharacterClass(this: CharacterClass): EncodeOutput {
if (this.characters.length === 0 && this.ranges.length === 0) {
throw new Error(
'Character class should contain at least one character or character range'
);
throw new Error('Character class should contain at least one character or character range');
}

// Direct rendering for single-character class
if (
this.characters.length === 1 &&
this.ranges?.length === 0 &&
!this.isInverted
) {
if (this.characters.length === 1 && this.ranges?.length === 0 && !this.isInverted) {
return {
precedence: 'atom',
pattern: this.characters[0]!,
Expand All @@ -147,13 +130,15 @@ function encodeCharacterClass(this: CharacterClass): EncodeOutput {
// See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes#types
const hyphen = this.characters.includes('-') ? '-' : '';
const otherCharacters = this.characters.filter((c) => c !== '-').join('');
const ranges = this.ranges
.map(({ start, end }) => `${start}-${end}`)
.join('');
const ranges = this.ranges.map(({ start, end }) => `${start}-${end}`).join('');
const isInverted = this.isInverted ? '^' : '';

return {
precedence: 'atom',
pattern: `[${isInverted}${hyphen}${ranges}${otherCharacters}]`,
pattern: `[${isInverted}${ranges}${otherCharacters}${hyphen}]`,
};
}

function escapeForCharacterClass(text: string): string {
return text.replace(/[\]\\]/g, '\\$&'); // $& means the whole matched string
}
4 changes: 1 addition & 3 deletions src/components/choice-of.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@ export interface ChoiceOf extends RegexElement {
alternatives: RegexNode[][];
}

export function choiceOf(
...alternatives: Array<RegexNode | RegexNode[]>
): ChoiceOf {
export function choiceOf(...alternatives: Array<RegexNode | RegexNode[]>): ChoiceOf {
if (alternatives.length === 0) {
throw new Error('`choiceOf` should receive at least one alternative');
}
Expand Down
9 changes: 2 additions & 7 deletions src/components/repeat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,7 @@ export interface Repeat extends RegexElement {

export type RepeatOptions = { count: number } | { min: number; max?: number };

export function repeat(
options: RepeatOptions,
nodes: RegexNode | RegexNode[]
): Repeat {
export function repeat(options: RepeatOptions, nodes: RegexNode | RegexNode[]): Repeat {
const children = asNodeArray(nodes);

if (children.length === 0) {
Expand All @@ -41,8 +38,6 @@ function encodeRepeat(this: Repeat): EncodeOutput {

return {
precedence: 'sequence',
pattern: `${atomicNodes.pattern}{${this.options.min},${
this.options?.max ?? ''
}}`,
pattern: `${atomicNodes.pattern}{${this.options.min},${this.options?.max ?? ''}}`,
};
}
Loading