Skip to content

Commit

Permalink
feat(engine-js): improve js engine handling for markdown
Browse files Browse the repository at this point in the history
  • Loading branch information
antfu committed Sep 13, 2024
1 parent b3d493b commit b05d838
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 28 deletions.
8 changes: 4 additions & 4 deletions docs/references/engine-js-compat.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
| | Count |
| :-------------- | --------------------------------: |
| Total Languages | 213 |
| Fully Supported | [171](#fully-supported-languages) |
| Mismatched | [24](#mismatched-languages) |
| Fully Supported | [172](#fully-supported-languages) |
| Mismatched | [23](#mismatched-languages) |
| Unsupported | [18](#unsupported-languages) |

## Fully Supported Languages
Expand Down Expand Up @@ -115,6 +115,7 @@ Languages that works with the JavaScript RegExp engine, and will produce the sam
| make | ✅ OK | 51 | - | |
| marko | ✅ OK | 926 | - | |
| matlab | ✅ OK | 88 | - | |
| mdc | ✅ OK | 784 | - | |
| mojo | ✅ OK | 213 | - | |
| move | ✅ OK | 120 | - | |
| narrat | ✅ OK | 34 | - | |
Expand Down Expand Up @@ -209,8 +210,7 @@ Languages that does not throw with the JavaScript RegExp engine, but will produc
| glsl | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=glsl) | 186 | - | 306 |
| haml | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=haml) | 1612 | - | 48 |
| kusto | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=kusto) | 60 | - | 40 |
| markdown | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=markdown) | 118 | - | 648 |
| mdc | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=mdc) | 784 | - | 407 |
| markdown | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=markdown) | 118 | - | 78 |
| mermaid | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=mermaid) | 129 | - | 38 |
| nginx | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=nginx) | 378 | - | 4 |
| objective-cpp | [🚧 Mismatch](https://textmate-grammars-themes.netlify.app/?grammar=objective-cpp) | 309 | - | 172 |
Expand Down
4 changes: 2 additions & 2 deletions packages/engine-javascript/scripts/generate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ type Replacement = ReplacementRecursiveBackReference | ReplacementStatic
const replacements: Replacement[] = [
{
// Subroutine recursive reference are not supported in JavaScript regex engine.
// We expand a few levels of recursion to literals to simulate the behavior (incomplete)
// We expand a few levels of recursion to literals to simulate the behavior (it's incomplete tho)
type: 'recursive-back-reference',
regex: '(?<square>[^\\[\\]\\\\]|\\\\.|\\[\\g<square>*+\\])',
groupName: 'square',
Expand All @@ -30,7 +30,7 @@ const replacements: Replacement[] = [
type: 'recursive-back-reference',
regex: '(?<url>(?>[^\\s()]+)|\\(\\g<url>*\\))',
groupName: 'url',
fallback: '[^\\s\\(\\)]',
fallback: '(?>[^\\s()]+)',
},
]

Expand Down
1 change: 0 additions & 1 deletion packages/engine-javascript/scripts/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ export function expandRecursiveBackReference(

out = out
.replace(refMarker, fallback)
.replace(groupMaker, '(?:')

return out
}
13 changes: 13 additions & 0 deletions packages/engine-javascript/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,21 @@ export class JavaScriptScanner implements PatternScanner {

this.contiguousAnchorSimulation = Array.from({ length: patterns.length }, () => false)
this.regexps = patterns.map((p, idx) => {
/**
* vscode-textmate replace anchors to \uFFFF, where we still not sure how to handle it correctly
*
* @see https://github.com/shikijs/vscode-textmate/blob/8d2e84a3aad21afd6b08fd53c7acd421c7f5aa44/src/rule.ts#L687-L702
*
* This is a temporary workaround for markdown grammar
*/
if (simulation)
p = p.replaceAll('(^|\\\uFFFF)', '(^|\\G)')

// Detect contiguous anchors for simulation
if (simulation && (p.startsWith('(^|\\G)') || p.startsWith('(\\G|^)')))
this.contiguousAnchorSimulation[idx] = true

// Cache
const cached = cache?.get(p)
if (cached) {
if (cached instanceof RegExp) {
Expand Down
4 changes: 2 additions & 2 deletions packages/engine-javascript/src/replacements.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
export const replacements = [
[
'(?<square>[^\\[\\]\\\\]|\\\\.|\\[\\g<square>*+\\])',
'(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*+\\])*+\\])*+\\])',
'(?<square>[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*+\\])*+\\])*+\\])',
],
[
'(?<url>(?>[^\\s()]+)|\\(\\g<url>*\\))',
'(?:(?>[^\\s()]+)|\\((?:(?>[^\\s()]+)|\\((?:(?>[^\\s()]+)|\\([^\\s\\(\\)]*\\))*\\))*\\))',
'(?<url>(?>[^\\s()]+)|\\((?:(?>[^\\s()]+)|\\((?:(?>[^\\s()]+)|\\((?>[^\\s()]+)*\\))*\\))*\\))',
],
] as [string, string][]
19 changes: 0 additions & 19 deletions packages/engine-javascript/test/scripts.test.ts

This file was deleted.

28 changes: 28 additions & 0 deletions packages/engine-javascript/test/utils.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import { describe, expect, it } from 'vitest'
import { expandRecursiveBackReference } from '../scripts/utils'

describe('expandRecursiveBackReference', () => {
it('case 1', () => {
const name = 'square'
const regex = '(?<square>[^\\[\\]\\\\]|\\\\.|\\[\\g<square>*\\])'
const fallback = '(?:[^\\[\\]\\\\])'

expect(expandRecursiveBackReference(regex, name, fallback, 0))
.toMatchInlineSnapshot(`"(?<square>[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*\\])"`)

expect(expandRecursiveBackReference(regex, name, fallback, 1))
.toMatchInlineSnapshot(`"(?<square>[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*\\])*\\])"`)

expect(expandRecursiveBackReference(regex, name, fallback, 2))
.toMatchInlineSnapshot(`"(?<square>[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*\\])*\\])*\\])"`)
})

it('case 2', () => {
const name = 'url'
const regex = '(?<url>(?>[^\\s()]+)|\\(\\g<url>*\\))'
const fallback = '(?>[^\\s()]+)'

expect(expandRecursiveBackReference(regex, name, fallback, 0))
.toMatchInlineSnapshot(`"(?<url>(?>[^\\s()]+)|\\((?>[^\\s()]+)*\\))"`)
})
})

0 comments on commit b05d838

Please sign in to comment.