Skip to content

Commit

Permalink
fix: proper encoding for any('.') (#76)
Browse files Browse the repository at this point in the history
  • Loading branch information
mdjastrzebski authored Mar 27, 2024
1 parent b72a125 commit 4469146
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 23 deletions.
42 changes: 38 additions & 4 deletions src/constructs/__tests__/character-class.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,25 @@ test('`charClass` throws on negated arguments', () => {
);
});

test('`charClass` joins character escapes', () => {
expect(charClass(any)).toEqualRegex(/./);
expect(charClass(word)).toEqualRegex(/\w/);
expect(charClass(digit)).toEqualRegex(/\d/);
expect(charClass(whitespace)).toEqualRegex(/\s/);
expect(charClass(nonWord)).toEqualRegex(/\W/);
expect(charClass(nonDigit)).toEqualRegex(/\D/);
expect(charClass(nonWhitespace)).toEqualRegex(/\S/);

expect(charClass(any, whitespace)).toEqualRegex(/[.\s]/);
expect(charClass(any, nonWhitespace)).toEqualRegex(/[.\S]/);

expect(charClass(word, whitespace)).toEqualRegex(/[\w\s]/);
expect(charClass(any, word, digit)).toEqualRegex(/[.\w\d]/);

expect(charClass(word, digit, whitespace)).toEqualRegex(/[\w\d\s]/);
expect(charClass(any, word, digit, whitespace)).toEqualRegex(/[.\w\d\s]/);
});

test('`charRange` pattern', () => {
expect(charRange('a', 'z')).toEqualRegex(/[a-z]/);
expect(['x', charRange('0', '9')]).toEqualRegex(/x[0-9]/);
Expand All @@ -108,8 +127,8 @@ test('`charRange` throws on incorrect arguments', () => {
});

test('`anyOf` pattern', () => {
expect(anyOf('a')).toEqualRegex(/a/);
expect(['x', anyOf('a'), 'x']).toEqualRegex(/xax/);
expect(anyOf('a')).toEqualRegex(/[a]/);
expect(['x', anyOf('a'), 'x']).toEqualRegex(/x[a]x/);
expect(anyOf('ab')).toEqualRegex(/[ab]/);
expect(['x', anyOf('ab')]).toEqualRegex(/x[ab]/);
expect(['x', anyOf('ab'), 'x']).toEqualRegex(/x[ab]x/);
Expand All @@ -129,10 +148,25 @@ test('`anyOf` pattern moves hyphen to the last position', () => {
expect(anyOf('a-bc')).toEqualRegex(/[abc-]/);
});

test('`anyOf` pattern edge case caret and hyphen', () => {
test('`anyOf` pattern edge cases', () => {
expect(anyOf('^-')).toEqualRegex(/[\^-]/);
expect(anyOf('-^')).toEqualRegex(/[\^-]/);
expect(anyOf('-^a')).toEqualRegex(/[a^-]/);

expect(anyOf('.')).toEqualRegex(/[.]/);
expect(anyOf('*')).toEqualRegex(/[*]/);
expect(anyOf('+')).toEqualRegex(/[+]/);
expect(anyOf('?')).toEqualRegex(/[?]/);
expect(anyOf('^')).toEqualRegex(/[^]/);
expect(anyOf('$')).toEqualRegex(/[$]/);
expect(anyOf('{')).toEqualRegex(/[{]/);
expect(anyOf('}')).toEqualRegex(/[}]/);
expect(anyOf('(')).toEqualRegex(/[(]/);
expect(anyOf(')')).toEqualRegex(/[)]/);
expect(anyOf('|')).toEqualRegex(/[|]/);
expect(anyOf('[')).toEqualRegex(/[[]/);
expect(anyOf(']')).toEqualRegex(/[\]]/);
expect(anyOf('\\')).toEqualRegex(/[\\]/);
});

test('`anyOf` throws on empty text', () => {
Expand All @@ -147,7 +181,7 @@ test('`negated` character class pattern', () => {
});

test('`negated` character class pattern double inversion', () => {
expect(negated(negated(anyOf('a')))).toEqualRegex(/a/);
expect(negated(negated(anyOf('a')))).toEqualRegex(/[a]/);
expect(negated(negated(anyOf('abc')))).toEqualRegex(/[abc]/);
});

Expand Down
58 changes: 39 additions & 19 deletions src/constructs/character-class.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import type { RegexConstruct } from '../types';

export interface CharacterClass extends RegexConstruct {
type: 'characterClass';
escape?: string;
chars: string[];
ranges: CharacterRange[];
isNegated: boolean;
Expand All @@ -19,55 +20,62 @@ export interface CharacterRange {

export const any: CharacterClass = {
type: 'characterClass',
chars: ['.'],
escape: '.',
chars: [],
ranges: [],
isNegated: false,
encode: encodeCharacterClass,
};

export const digit: CharacterClass = {
type: 'characterClass',
chars: ['\\d'],
escape: '\\d',
chars: [],
ranges: [],
isNegated: false,
encode: encodeCharacterClass,
};

export const nonDigit: CharacterClass = {
type: 'characterClass',
chars: ['\\D'],
escape: '\\D',
chars: [],
ranges: [],
isNegated: false,
encode: encodeCharacterClass,
};

export const word: CharacterClass = {
type: 'characterClass',
chars: ['\\w'],
escape: '\\w',
chars: [],
ranges: [],
isNegated: false,
encode: encodeCharacterClass,
};

export const nonWord: CharacterClass = {
type: 'characterClass',
chars: ['\\W'],
escape: '\\W',
chars: [],
ranges: [],
isNegated: false,
encode: encodeCharacterClass,
};

export const whitespace: CharacterClass = {
type: 'characterClass',
chars: ['\\s'],
escape: '\\s',
chars: [],
ranges: [],
isNegated: false,
encode: encodeCharacterClass,
};

export const nonWhitespace: CharacterClass = {
type: 'characterClass',
chars: ['\\S'],
escape: '\\S',
chars: [],
ranges: [],
isNegated: false,
encode: encodeCharacterClass,
Expand All @@ -89,15 +97,17 @@ export const notWord = nonWord;
export const notWhitespace = nonWhitespace;

export function charClass(...elements: CharacterClass[]): CharacterClass {
elements.forEach((element) => {
if (element.isNegated) {
throw new Error('`charClass` should receive only non-negated character classes');
}
});
if (elements.some((e) => e.isNegated)) {
throw new Error('`charClass` should receive only non-negated character classes');
}

if (elements.length === 1) {
return elements[0]!;
}

return {
type: 'characterClass',
chars: elements.map((c) => c.chars).flat(),
chars: elements.map((c) => getAllChars(c)).flat(),
ranges: elements.map((c) => c.ranges).flat(),
isNegated: false,
encode: encodeCharacterClass,
Expand Down Expand Up @@ -158,24 +168,26 @@ export function negated(element: CharacterClass): CharacterClass {
export const inverted = negated;

function encodeCharacterClass(this: CharacterClass): EncodeResult {
if (this.chars.length === 0 && this.ranges.length === 0) {
if (this.escape === undefined && this.chars.length === 0 && this.ranges.length === 0) {
throw new Error('Character class should contain at least one character or character range');
}

// Direct rendering for single-character class
if (this.chars.length === 1 && this.ranges?.length === 0 && !this.isNegated) {
if (this.escape !== undefined && !this.chars.length && !this.ranges.length && !this.isNegated) {
return {
precedence: 'atom',
pattern: this.chars[0]!,
pattern: this.escape,
};
}

const allChars = getAllChars(this);

// If passed characters includes hyphen (`-`) it need to be moved to
// first (or last) place in order to treat it as hyphen character and not a range.
// See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes#types
const hyphen = this.chars.includes('-') ? '-' : '';
const caret = this.chars.includes('^') ? '^' : '';
const otherChars = this.chars.filter((c) => c !== '-' && c !== '^').join('');
const hyphen = allChars.includes('-') ? '-' : '';
const caret = allChars.includes('^') ? '^' : '';
const otherChars = allChars.filter((c) => c !== '-' && c !== '^').join('');
const ranges = this.ranges.map(({ start, end }) => `${start}-${end}`).join('');
const negation = this.isNegated ? '^' : '';

Expand All @@ -191,3 +203,11 @@ function encodeCharacterClass(this: CharacterClass): EncodeResult {
function escapeForCharacterClass(text: string): string {
return text.replace(/[\]\\]/g, '\\$&'); // $& means the whole matched string
}

function getAllChars(characterClass: CharacterClass) {
if (characterClass.escape === undefined) {
return characterClass.chars;
}

return [characterClass.escape, ...characterClass.chars];
}

0 comments on commit 4469146

Please sign in to comment.