Skip to content

Commit e48796b

Browse files
committed
feat: characterClass
1 parent 1b4e445 commit e48796b

File tree

3 files changed

+51
-9
lines changed

3 files changed

+51
-9
lines changed

src/components/__tests__/character-class.test.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { one, oneOrMore } from '../quantifiers';
33
import {
44
any,
55
anyOf,
6+
characterClass,
67
digit,
78
encodeCharacterClass,
89
whitespace,
@@ -33,6 +34,15 @@ test('"word" character class', () => {
3334
expect(buildPattern(one('ab'), word, one('c'))).toEqual(`ab\\wc`);
3435
});
3536

37+
test('"characterClass" base cases', () => {
38+
expect(buildPattern(characterClass(['a', 'z']))).toBe('[a-z]');
39+
expect(buildPattern(characterClass(['a', 'z'], ['A', 'Z']))).toBe('[a-zA-Z]');
40+
expect(buildPattern(characterClass(['a', 'z'], '0', '5'))).toBe('[a-z05]');
41+
42+
expect(buildPattern('x', characterClass(['a', 'z']))).toBe('x[a-z]');
43+
expect(buildPattern(characterClass(['a', 'z']), 'x')).toBe('[a-z]x');
44+
});
45+
3646
test('"any" character class', () => {
3747
expect(buildPattern(any)).toEqual(`.`);
3848

@@ -73,6 +83,6 @@ test('buildPattern throws on empty text', () => {
7383
characters: [],
7484
})
7585
).toThrowErrorMatchingInlineSnapshot(
76-
`"Character class should contain at least one character"`
86+
`"Character class should contain at least one range or character"`
7787
);
7888
});

src/components/character-class.ts

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { type EncoderNode, EncoderPrecedence } from '../encoder/types';
22
import { escapeText } from '../utils';
3-
import type { CharacterClass } from './types';
3+
import type { CharacterClass, CharacterRange } from './types';
44

55
export const any: CharacterClass = {
66
type: 'characterClass',
@@ -22,6 +22,24 @@ export const word: CharacterClass = {
2222
characters: ['\\w'],
2323
};
2424

25+
export function characterClass(
26+
...charactersOrRanges: Array<string | CharacterRange>
27+
): CharacterClass {
28+
const characters = charactersOrRanges.filter(
29+
(c) => !Array.isArray(c)
30+
) as string[];
31+
32+
const ranges = charactersOrRanges.filter((c) =>
33+
Array.isArray(c)
34+
) as CharacterRange[];
35+
36+
return {
37+
type: 'characterClass',
38+
characters: characters.map((c) => escapeText(c)),
39+
ranges: ranges.map((r) => [escapeText(r[0]), escapeText(r[1])]),
40+
};
41+
}
42+
2543
export function anyOf(characters: string): CharacterClass {
2644
const charactersArray = characters.split('').map(escapeText);
2745
if (charactersArray.length === 0) {
@@ -36,12 +54,15 @@ export function anyOf(characters: string): CharacterClass {
3654

3755
export function encodeCharacterClass({
3856
characters,
57+
ranges = [],
3958
}: CharacterClass): EncoderNode {
40-
if (characters.length === 0) {
41-
throw new Error('Character class should contain at least one character');
59+
if (characters.length === 0 && ranges.length === 0) {
60+
throw new Error(
61+
'Character class should contain at least one range or character'
62+
);
4263
}
4364

44-
if (characters.length === 1) {
65+
if (characters.length === 1 && ranges.length === 0) {
4566
return {
4667
precedence: EncoderPrecedence.Atom,
4768
pattern: characters[0]!,
@@ -50,17 +71,22 @@ export function encodeCharacterClass({
5071

5172
return {
5273
precedence: EncoderPrecedence.Atom,
53-
pattern: `[${reorderHyphen(characters).join('')}]`,
74+
pattern: `[${formatClass(characters, ranges)}]`,
5475
};
5576
}
5677

5778
// If passed characters includes hyphen (`-`) it need to be moved to
5879
// first (or last) place in order to treat it as hyphen character and not a range.
5980
// See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes#types
60-
function reorderHyphen(characters: string[]) {
81+
function formatClass(characters: string[], ranges: CharacterRange[] = []) {
82+
const formattedCharacters = characters.filter((c) => c !== '-').join('');
83+
const formattedRanges = ranges
84+
.map(([start, end]) => `${start}-${end}`)
85+
.join('');
86+
6187
if (characters.includes('-')) {
62-
return ['-', ...characters.filter((c) => c !== '-')];
88+
return `-${formattedRanges}${formattedCharacters}`;
6389
}
6490

65-
return characters;
91+
return `${formattedRanges}${formattedCharacters}`;
6692
}

src/components/types.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,14 @@ export type Quantifier = One | OneOrMore | Optionally | ZeroOrMore | Repeat;
55
export type CharacterClass = {
66
type: 'characterClass';
77
characters: string[];
8+
ranges?: CharacterRange[];
89
};
910

11+
/**
12+
* Character range from start to end (inclusive).
13+
*/
14+
export type CharacterRange = [start: string, end: string];
15+
1016
// Components
1117
export type ChoiceOf = {
1218
type: 'choiceOf';

0 commit comments

Comments
 (0)