Skip to content

Commit 4c9ad8f

Browse files
fix: support double quote string enum (#122)
1 parent fc8da34 commit 4c9ad8f

File tree

2 files changed

+292
-17
lines changed

2 files changed

+292
-17
lines changed

src/__tests__/markdown-helpers.spec.ts

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,76 @@ def fn():
104104
expect(extractStringEnum('wassup')).toBe(null);
105105
});
106106

107+
it('should error helpfully on invalid value separators', () => {
108+
expect(() => extractStringEnum('Can be `x` sometimes `y'))
109+
.toThrowErrorMatchingInlineSnapshot(`
110+
"Unexpected separator token while extracting string enum, expected a comma or "and" or "or" but found "s"
111+
Context: \`x\` sometimes \`y
112+
^"
113+
`);
114+
});
115+
116+
it('should error helpfully on unterminated enum strings', () => {
117+
expect(() => extractStringEnum('Can be `x` or `y')).toThrowErrorMatchingInlineSnapshot(`
118+
"Unexpected early termination of token sequence while extracting string enum, did you forget to close a quote?
119+
Context: \`x\` or \`y"
120+
`);
121+
});
122+
123+
describe('mixed ticks', () => {
124+
it('should extract an enum when mixed quotes are used', () => {
125+
const values = extractStringEnum('Can be `x"` or "`y"')!;
126+
expect(values).not.toBe(null);
127+
expect(values).toHaveLength(2);
128+
expect(values[0].value).toBe('x"');
129+
expect(values[1].value).toBe('`y');
130+
});
131+
});
132+
133+
describe('deprecated wrappers', () => {
134+
it('should handle strikethrough deprecation wrappers', () => {
135+
const values = extractStringEnum('Can be `x` or ~~`y`~~')!;
136+
expect(values).not.toBe(null);
137+
expect(values).toHaveLength(2);
138+
expect(values[0].value).toBe('x');
139+
expect(values[1].value).toBe('y');
140+
});
141+
});
142+
143+
describe('lead-in descriptions', () => {
144+
it('should handle value lists that smoothly lead in to prose with a comma', () => {
145+
const values = extractStringEnum('Can be `x` or `y`, where `x` implies that...')!;
146+
expect(values).not.toBe(null);
147+
expect(values).toHaveLength(2);
148+
expect(values[0].value).toBe('x');
149+
expect(values[1].value).toBe('y');
150+
});
151+
152+
it('should handle value lists that smoothly lead in to prose with a fullstop', () => {
153+
const values = extractStringEnum('Can be `x` or `y`. The `x` value implies that...')!;
154+
expect(values).not.toBe(null);
155+
expect(values).toHaveLength(2);
156+
expect(values[0].value).toBe('x');
157+
expect(values[1].value).toBe('y');
158+
});
159+
160+
it('should handle value lists that smoothly lead in to prose with a semicolon', () => {
161+
const values = extractStringEnum('Can be `x` or `y`; the `x` value implies that...')!;
162+
expect(values).not.toBe(null);
163+
expect(values).toHaveLength(2);
164+
expect(values[0].value).toBe('x');
165+
expect(values[1].value).toBe('y');
166+
});
167+
168+
it('should handle value lists that smoothly lead in to prose with a hyphen', () => {
169+
const values = extractStringEnum('Can be `x` or `y` - the `x` value implies that...')!;
170+
expect(values).not.toBe(null);
171+
expect(values).toHaveLength(2);
172+
expect(values[0].value).toBe('x');
173+
expect(values[1].value).toBe('y');
174+
});
175+
});
176+
107177
describe('with backticks', () => {
108178
it('should extract an enum of the format "can be x"', () => {
109179
const values = extractStringEnum('Can be `x`')!;
@@ -260,6 +330,65 @@ def fn():
260330
});
261331
});
262332

333+
describe('with double quotes', () => {
334+
it('should extract an enum of the format "can be x"', () => {
335+
const values = extractStringEnum(`Can be "x"`)!;
336+
expect(values).not.toBe(null);
337+
expect(values).toHaveLength(1);
338+
expect(values[0].value).toBe('x');
339+
});
340+
341+
it('should extract an enum of the format "can be x or y"', () => {
342+
const values = extractStringEnum(`Can be "x" or "y"`)!;
343+
expect(values).not.toBe(null);
344+
expect(values).toHaveLength(2);
345+
expect(values[0].value).toBe('x');
346+
expect(values[1].value).toBe('y');
347+
});
348+
349+
it('should extract an enum of the format "can be x, y or z"', () => {
350+
const values = extractStringEnum(`Can be "x", "y" or "z"`)!;
351+
expect(values).not.toBe(null);
352+
expect(values).toHaveLength(3);
353+
expect(values[0].value).toBe('x');
354+
expect(values[1].value).toBe('y');
355+
expect(values[2].value).toBe('z');
356+
});
357+
358+
it('should extract an enum of the format "can be x, y, or z"', () => {
359+
const values = extractStringEnum(`Can be "x", "y", or "z"`)!;
360+
expect(values).not.toBe(null);
361+
expect(values).toHaveLength(3);
362+
expect(values[0].value).toBe('x');
363+
expect(values[1].value).toBe('y');
364+
expect(values[2].value).toBe('z');
365+
});
366+
367+
it('should extract an enum of the format "values include a', () => {
368+
const values = extractStringEnum(`Values include "a"`)!;
369+
expect(values).not.toBe(null);
370+
expect(values).toHaveLength(1);
371+
expect(values[0].value).toBe('a');
372+
});
373+
374+
it('should extract an enum of the format "values include a and b', () => {
375+
const values = extractStringEnum(`Values include "a" and "b"`)!;
376+
expect(values).not.toBe(null);
377+
expect(values).toHaveLength(2);
378+
expect(values[0].value).toBe('a');
379+
expect(values[1].value).toBe('b');
380+
});
381+
382+
it('should extract an enum of the format "values include a, b and c', () => {
383+
const values = extractStringEnum(`Values include "a", "b" and "c"`)!;
384+
expect(values).not.toBe(null);
385+
expect(values).toHaveLength(3);
386+
expect(values[0].value).toBe('a');
387+
expect(values[1].value).toBe('b');
388+
expect(values[2].value).toBe('c');
389+
});
390+
});
391+
263392
describe('rawTypeToTypeInformation()', () => {
264393
it('should map a primitive types correctly', () => {
265394
expect(rawTypeToTypeInformation('Boolean', '', null)).toMatchSnapshot();

src/markdown-helpers.ts

Lines changed: 163 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -453,28 +453,174 @@ export enum StripReturnTypeBehavior {
453453
DO_NOT_STRIP,
454454
}
455455

456+
// All possible value separators, sorted by reverse length to ensure
457+
// that we match the longer comma prefix variants first if they are present
458+
const niceSeparators = [',', 'and', 'or', ', and', ', or'].sort((a, b) => b.length - a.length);
459+
// Some string enums can also be objects, the final phrase is "or an object" and we
460+
// should gracefully terminate in that case
461+
const niceTerminators = [', or an Object', 'or an Object'].sort((a, b) => b.length - a.length);
462+
const suffixesToIgnore = ['(Deprecated)'];
463+
456464
export const extractStringEnum = (description: string): PossibleStringValue[] | null => {
457-
const possibleValues: PossibleStringValue[] = [];
458-
459-
const inlineValuesPattern = /(?:can be|values? includes?) ((?:(?:[`|'][a-zA-Z0-9-_\.:]+[`|'])(?:(, | )?))*(?:(?:or|and) [`|'][a-zA-Z0-9-_\.:]+[`|'])?)/i;
460-
const inlineMatch = inlineValuesPattern.exec(description);
461-
if (inlineMatch) {
462-
const valueString = inlineMatch[1];
463-
const valuePattern = /[`|']([a-zA-Z0-9-_\.:]+)[`|']/g;
464-
let value = valuePattern.exec(valueString);
465-
466-
while (value) {
467-
possibleValues.push({
468-
value: value[1],
469-
description: '',
470-
});
471-
value = valuePattern.exec(valueString);
465+
const inlineValuesLocatorPattern = /(?:can be|values? includes?) (.+)/i;
466+
const locatorMatch = inlineValuesLocatorPattern.exec(description);
467+
if (!locatorMatch) return null;
468+
469+
const valuesTokens = locatorMatch[1].split('');
470+
471+
const state = {
472+
// Where are we in the valueTokens array
473+
position: 0,
474+
// What values have we found so far
475+
values: [] as string[],
476+
// The current value we are building, was found wrapped by `currentQuoter`
477+
currentValue: '',
478+
// The quote character that we encountered to start building a value
479+
// We won't stop adding characters to `currentValue` until the same character
480+
// is encountered again
481+
currentQuoter: null as null | string,
482+
// In some cases quoted values are wrapped with other markdown indicators, for
483+
// instance strikethrough ~ characters. This handles those to ensure anything
484+
// we allow as a wrapping character is unwrapped after a value is extracted.
485+
currentQuoterWrappers: [] as string[],
486+
// This is set to true after a value is extracted to allow us to parse out a
487+
// nice separator. For instance a "comma", a complete list is in `niceSeparators`
488+
// above.
489+
expectingNiceSeparator: false,
490+
// This is set after the state machine reaches a point that _could_ be the end,
491+
// an invalid token when this is set to true is not a fatal error rather the
492+
// graceful termination of the state machine.
493+
couldBeDone: false,
494+
};
495+
const lookAhead = (length: number) => {
496+
return valuesTokens.slice(state.position - 1, state.position + length - 1).join('');
497+
};
498+
stringEnumTokenLoop: while (state.position < valuesTokens.length) {
499+
const char = valuesTokens[state.position];
500+
state.position++;
501+
502+
if (state.currentQuoter) {
503+
// We should never expect a separator inside a quoted value
504+
if (state.expectingNiceSeparator) {
505+
throw new Error('Impossible state encountered while extracting a string enum');
506+
}
507+
if (char === state.currentQuoter) {
508+
state.currentQuoter = null;
509+
state.values.push(state.currentValue);
510+
state.currentValue = '';
511+
state.expectingNiceSeparator = true;
512+
} else {
513+
state.currentValue += char;
514+
}
515+
} else {
516+
// Whitespace can be skipped
517+
if (char === ' ') {
518+
continue stringEnumTokenLoop;
519+
}
520+
521+
// If we're between values we should be expecting one of the above "nice"
522+
// separators.
523+
if (state.expectingNiceSeparator) {
524+
// Before checking for a separator we need to ensure we have unwrapped any wrapping
525+
// chars
526+
if (state.currentQuoterWrappers.length) {
527+
const expectedUnwrap = state.currentQuoterWrappers.pop();
528+
if (char !== expectedUnwrap) {
529+
throw new Error(
530+
`Unexpected token while extracting string enum. Expected an unwrapping token that matched "${expectedUnwrap}". But found token: ${char}\nContext: "${
531+
locatorMatch[1]
532+
}"\n${' '.repeat(8 + state.position)}^`,
533+
);
534+
}
535+
continue stringEnumTokenLoop;
536+
}
537+
538+
if (char === '.' || char === ';' || char === '-') {
539+
break stringEnumTokenLoop;
540+
}
541+
542+
for (const suffix of suffixesToIgnore) {
543+
if (lookAhead(suffix.length) === suffix) {
544+
state.position += suffix.length - 1;
545+
continue stringEnumTokenLoop;
546+
}
547+
}
548+
549+
for (const niceTerminator of niceTerminators) {
550+
if (lookAhead(niceTerminator.length) === niceTerminator) {
551+
state.position += niceTerminator.length - 1;
552+
state.expectingNiceSeparator = false;
553+
state.couldBeDone = true;
554+
continue stringEnumTokenLoop;
555+
}
556+
}
557+
558+
for (const niceSeparator of niceSeparators) {
559+
if (lookAhead(niceSeparator.length) === niceSeparator) {
560+
state.position += niceSeparator.length - 1;
561+
state.expectingNiceSeparator = false;
562+
if (niceSeparator === ',') {
563+
state.couldBeDone = true;
564+
}
565+
continue stringEnumTokenLoop;
566+
}
567+
}
568+
throw new Error(
569+
`Unexpected separator token while extracting string enum, expected a comma or "and" or "or" but found "${char}"\nContext: ${
570+
locatorMatch[1]
571+
}\n${' '.repeat(8 + state.position)}^`,
572+
);
573+
}
574+
575+
if (['"', "'", '`'].includes(char)) {
576+
// Quote chars start a new value
577+
state.currentQuoter = char;
578+
// A new value has started, we no longer could be done on an invalid char
579+
state.couldBeDone = false;
580+
continue stringEnumTokenLoop;
581+
}
582+
if (['~'].includes(char)) {
583+
// Deprecated string enum values are wrapped with strikethrough
584+
state.currentQuoterWrappers.push(char);
585+
continue stringEnumTokenLoop;
586+
}
587+
// If we are at the very start we should just assume our heuristic found something silly
588+
// and bail, 0 valid characters is skip-able
589+
if (state.position === 1) {
590+
return null;
591+
}
592+
// If the last thing we parsed _could_ have been a termination character
593+
// let's assume an invalid character here confirms that.
594+
if (state.couldBeDone) {
595+
break stringEnumTokenLoop;
596+
}
597+
// Anything else is unexpected
598+
throw new Error(
599+
`Unexpected token while extracting string enum. Token: ${char}\nContext: "${
600+
locatorMatch[1]
601+
}"\n${' '.repeat(9 + state.position)}^`,
602+
);
472603
}
604+
}
605+
606+
// Reached the end of the description, we should check
607+
// if we are in a clean state (not inside a quote).
608+
// If so we're good, if not hard error
609+
if (state.currentQuoter || state.currentValue) {
610+
throw new Error(
611+
`Unexpected early termination of token sequence while extracting string enum, did you forget to close a quote?\nContext: ${locatorMatch[1]}`,
612+
);
613+
}
473614

474-
return possibleValues.length === 0 ? null : possibleValues;
615+
// No options we should just bail, can't have a string enum with 0 options
616+
if (!state.values.length) {
617+
return null;
475618
}
476619

477-
return null;
620+
return state.values.map(value => ({
621+
value,
622+
description: '',
623+
}));
478624
};
479625

480626
export const extractReturnType = (

0 commit comments

Comments
 (0)