From b05d838d18dad259e637b07b100ce53527bde032 Mon Sep 17 00:00:00 2001 From: Anthony Fu Date: Fri, 13 Sep 2024 16:15:03 +0200 Subject: [PATCH] feat(engine-js): improve js engine handling for markdown --- docs/references/engine-js-compat.md | 8 +++--- .../engine-javascript/scripts/generate.ts | 4 +-- packages/engine-javascript/scripts/utils.ts | 1 - packages/engine-javascript/src/index.ts | 13 +++++++++ .../engine-javascript/src/replacements.ts | 4 +-- .../engine-javascript/test/scripts.test.ts | 19 ------------- packages/engine-javascript/test/utils.test.ts | 28 +++++++++++++++++++ 7 files changed, 49 insertions(+), 28 deletions(-) delete mode 100644 packages/engine-javascript/test/scripts.test.ts create mode 100644 packages/engine-javascript/test/utils.test.ts diff --git a/docs/references/engine-js-compat.md b/docs/references/engine-js-compat.md index fef12f059..3408f3b6f 100644 --- a/docs/references/engine-js-compat.md +++ b/docs/references/engine-js-compat.md @@ -11,8 +11,8 @@ | | Count | | :-------------- | --------------------------------: | | Total Languages | 213 | -| Fully Supported | [171](#fully-supported-languages) | -| Mismatched | [24](#mismatched-languages) | +| Fully Supported | [172](#fully-supported-languages) | +| Mismatched | [23](#mismatched-languages) | | Unsupported | [18](#unsupported-languages) | ## Fully Supported Languages @@ -115,6 +115,7 @@ Languages that works with the JavaScript RegExp engine, and will produce the sam | make | ✅ OK | 51 | - | | | marko | ✅ OK | 926 | - | | | matlab | ✅ OK | 88 | - | | +| mdc | ✅ OK | 784 | - | | | mojo | ✅ OK | 213 | - | | | move | ✅ OK | 120 | - | | | narrat | ✅ OK | 34 | - | | @@ -209,8 +210,7 @@ Languages that does not throw with the JavaScript RegExp engine, but will produc | glsl | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=glsl) | 186 | - | 306 | | haml | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=haml) | 1612 | - | 48 | | kusto | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=kusto) | 60 | - | 40 | -| markdown | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=markdown) | 118 | - | 648 | -| mdc | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=mdc) | 784 | - | 407 | +| markdown | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=markdown) | 118 | - | 78 | | mermaid | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=mermaid) | 129 | - | 38 | | nginx | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=nginx) | 378 | - | 4 | | objective-cpp | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=objective-cpp) | 309 | - | 172 | diff --git a/packages/engine-javascript/scripts/generate.ts b/packages/engine-javascript/scripts/generate.ts index de90fddcd..4a98a82b5 100644 --- a/packages/engine-javascript/scripts/generate.ts +++ b/packages/engine-javascript/scripts/generate.ts @@ -20,7 +20,7 @@ type Replacement = ReplacementRecursiveBackReference | ReplacementStatic const replacements: Replacement[] = [ { // Subroutine recursive reference are not supported in JavaScript regex engine. - // We expand a few levels of recursion to literals to simulate the behavior (incomplete) + // We expand a few levels of recursion to literals to simulate the behavior (it's incomplete tho) type: 'recursive-back-reference', regex: '(?[^\\[\\]\\\\]|\\\\.|\\[\\g*+\\])', groupName: 'square', @@ -30,7 +30,7 @@ const replacements: Replacement[] = [ type: 'recursive-back-reference', regex: '(?(?>[^\\s()]+)|\\(\\g*\\))', groupName: 'url', - fallback: '[^\\s\\(\\)]', + fallback: '(?>[^\\s()]+)', }, ] diff --git a/packages/engine-javascript/scripts/utils.ts b/packages/engine-javascript/scripts/utils.ts index 5bed87010..a3382aa0b 100644 --- a/packages/engine-javascript/scripts/utils.ts +++ b/packages/engine-javascript/scripts/utils.ts @@ -15,7 +15,6 @@ export function expandRecursiveBackReference( out = out .replace(refMarker, fallback) - .replace(groupMaker, '(?:') return out } diff --git a/packages/engine-javascript/src/index.ts b/packages/engine-javascript/src/index.ts index 4a12ad6db..ff2f7c68e 100644 --- a/packages/engine-javascript/src/index.ts +++ b/packages/engine-javascript/src/index.ts @@ -66,8 +66,21 @@ export class JavaScriptScanner implements PatternScanner { this.contiguousAnchorSimulation = Array.from({ length: patterns.length }, () => false) this.regexps = patterns.map((p, idx) => { + /** + * vscode-textmate replace anchors to \uFFFF, where we still not sure how to handle it correctly + * + * @see https://github.com/shikijs/vscode-textmate/blob/8d2e84a3aad21afd6b08fd53c7acd421c7f5aa44/src/rule.ts#L687-L702 + * + * This is a temporary workaround for markdown grammar + */ + if (simulation) + p = p.replaceAll('(^|\\\uFFFF)', '(^|\\G)') + + // Detect contiguous anchors for simulation if (simulation && (p.startsWith('(^|\\G)') || p.startsWith('(\\G|^)'))) this.contiguousAnchorSimulation[idx] = true + + // Cache const cached = cache?.get(p) if (cached) { if (cached instanceof RegExp) { diff --git a/packages/engine-javascript/src/replacements.ts b/packages/engine-javascript/src/replacements.ts index 9d7f20791..5ea83efdd 100644 --- a/packages/engine-javascript/src/replacements.ts +++ b/packages/engine-javascript/src/replacements.ts @@ -3,10 +3,10 @@ export const replacements = [ [ '(?[^\\[\\]\\\\]|\\\\.|\\[\\g*+\\])', - '(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*+\\])*+\\])*+\\])', + '(?[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*+\\])*+\\])*+\\])', ], [ '(?(?>[^\\s()]+)|\\(\\g*\\))', - '(?:(?>[^\\s()]+)|\\((?:(?>[^\\s()]+)|\\((?:(?>[^\\s()]+)|\\([^\\s\\(\\)]*\\))*\\))*\\))', + '(?(?>[^\\s()]+)|\\((?:(?>[^\\s()]+)|\\((?:(?>[^\\s()]+)|\\((?>[^\\s()]+)*\\))*\\))*\\))', ], ] as [string, string][] diff --git a/packages/engine-javascript/test/scripts.test.ts b/packages/engine-javascript/test/scripts.test.ts deleted file mode 100644 index e183001dc..000000000 --- a/packages/engine-javascript/test/scripts.test.ts +++ /dev/null @@ -1,19 +0,0 @@ -import { describe, expect, it } from 'vitest' -import { expandRecursiveBackReference } from '../scripts/utils' - -describe('expandRecursiveBackReference', () => { - it('case 1', () => { - const name = 'square' - const regex = '(?[^\\[\\]\\\\]|\\\\.|\\[\\g*\\])' - const fallback = '(?:[^\\[\\]\\\\])' - - expect(expandRecursiveBackReference(regex, name, fallback, 0)) - .toMatchInlineSnapshot(`"(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*\\])"`) - - expect(expandRecursiveBackReference(regex, name, fallback, 1)) - .toMatchInlineSnapshot(`"(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*\\])*\\])"`) - - expect(expandRecursiveBackReference(regex, name, fallback, 2)) - .toMatchInlineSnapshot(`"(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*\\])*\\])*\\])"`) - }) -}) diff --git a/packages/engine-javascript/test/utils.test.ts b/packages/engine-javascript/test/utils.test.ts new file mode 100644 index 000000000..f3cc535dd --- /dev/null +++ b/packages/engine-javascript/test/utils.test.ts @@ -0,0 +1,28 @@ +import { describe, expect, it } from 'vitest' +import { expandRecursiveBackReference } from '../scripts/utils' + +describe('expandRecursiveBackReference', () => { + it('case 1', () => { + const name = 'square' + const regex = '(?[^\\[\\]\\\\]|\\\\.|\\[\\g*\\])' + const fallback = '(?:[^\\[\\]\\\\])' + + expect(expandRecursiveBackReference(regex, name, fallback, 0)) + .toMatchInlineSnapshot(`"(?[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*\\])"`) + + expect(expandRecursiveBackReference(regex, name, fallback, 1)) + .toMatchInlineSnapshot(`"(?[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*\\])*\\])"`) + + expect(expandRecursiveBackReference(regex, name, fallback, 2)) + .toMatchInlineSnapshot(`"(?[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*\\])*\\])*\\])"`) + }) + + it('case 2', () => { + const name = 'url' + const regex = '(?(?>[^\\s()]+)|\\(\\g*\\))' + const fallback = '(?>[^\\s()]+)' + + expect(expandRecursiveBackReference(regex, name, fallback, 0)) + .toMatchInlineSnapshot(`"(?(?>[^\\s()]+)|\\((?>[^\\s()]+)*\\))"`) + }) +})