Skip to content

Commit c27a393

Browse files
committed
perf(linter/plugins): deserialize AST on demand (#14288)
Avoid deserializing the AST if it's not required. It's possible that all rules return an empty visitor, or that all `createOnce` rules return `false` from their `before` hook. In those cases, the AST does not need to be walked, so AST doesn't need to be deserialized. And the source text doesn't need to be decoded from buffer either. These cases probably aren't so common, but not completely uncommon either. However, source text and AST are both accessible before traversal via `context.sourceCode`. Enable the usage of those APIs in `create` method / `before` hook by decoding source text / deserializing AST on demand when those APIs are used.
1 parent b69028f commit c27a393

File tree

14 files changed

+442
-44
lines changed

14 files changed

+442
-44
lines changed

apps/oxlint/src-js/plugins/lint.ts

Lines changed: 12 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,6 @@
1-
import {
2-
DATA_POINTER_POS_32,
3-
SOURCE_LEN_OFFSET,
4-
// TODO(camc314): we need to generate `.d.ts` file for this module.
5-
// @ts-expect-error
6-
} from '../generated/constants.js';
71
import { diagnostics, setupContextForFile } from './context.js';
82
import { registeredRules } from './load.js';
9-
import { resetSource, setupSourceForFile } from './source_code.js';
3+
import { getAst, resetSource, setupSourceForFile } from './source_code.js';
104
import { assertIs, getErrorMessage } from './utils.js';
115
import { addVisitorToCompiled, compiledVisitor, finalizeCompiledVisitor, initCompiledVisitor } from './visitor.js';
126

@@ -18,18 +12,10 @@ import { TOKEN } from '../../dist/src-js/raw-transfer/lazy-common.js';
1812
import { walkProgram } from '../../dist/generated/lazy/walk.js';
1913
*/
2014

21-
// @ts-expect-error we need to generate `.d.ts` file for this module
22-
import { deserializeProgramOnly } from '../../dist/generated/deserialize/ts.js';
2315
// @ts-expect-error we need to generate `.d.ts` file for this module
2416
import { walkProgram } from '../../dist/generated/visit/walk.js';
2517

26-
import type { AfterHook } from './types.ts';
27-
28-
// Buffer with typed array views of itself stored as properties
29-
interface BufferWithArrays extends Uint8Array {
30-
uint32: Uint32Array;
31-
float64: Float64Array;
32-
}
18+
import type { AfterHook, BufferWithArrays } from './types.ts';
3319

3420
// Buffers cache.
3521
//
@@ -38,9 +24,6 @@ interface BufferWithArrays extends Uint8Array {
3824
// until the process exits.
3925
const buffers: (BufferWithArrays | null)[] = [];
4026

41-
// Text decoder, for decoding source text from buffer
42-
const textDecoder = new TextDecoder('utf-8', { ignoreBOM: true });
43-
4427
// Array of `after` hooks to run after traversal. This array reused for every file.
4528
const afterHooks: AfterHook[] = [];
4629

@@ -105,20 +88,16 @@ function lintFileImpl(filePath: string, bufferId: number, buffer: Uint8Array | n
10588
throw new Error('Expected `ruleIds` to be a non-zero len array');
10689
}
10790

108-
// Decode source text from buffer
109-
const { uint32 } = buffer,
110-
programPos = uint32[DATA_POINTER_POS_32],
111-
sourceByteLen = uint32[(programPos + SOURCE_LEN_OFFSET) >> 2];
112-
113-
const sourceText = textDecoder.decode(buffer.subarray(0, sourceByteLen));
114-
115-
// Deserialize AST from buffer.
116-
// `preserveParens` argument is `false`, to match ESLint.
117-
// ESLint does not include `ParenthesizedExpression` nodes in its AST.
118-
const program = deserializeProgramOnly(buffer, sourceText, sourceByteLen, false);
119-
91+
// Pass buffer to source code module, so it can decode source text and deserialize AST on demand.
92+
//
93+
// We don't want to do this eagerly, because all rules might return empty visitors,
94+
// or `createOnce` rules might return `false` from their `before` hooks.
95+
// In such cases, the AST doesn't need to be walked, so we can skip deserializing it.
96+
//
97+
// But... source text and AST can be accessed in body of `create` method, or `before` hook, via `context.sourceCode`.
98+
// So we pass the buffer to source code module here, so it can decode source text / deserialize AST on demand.
12099
const hasBOM = false; // TODO: Set this correctly
121-
setupSourceForFile(sourceText, hasBOM, program);
100+
setupSourceForFile(buffer, hasBOM);
122101

123102
// Get visitors for this file from all rules
124103
initCompiledVisitor();
@@ -155,6 +134,7 @@ function lintFileImpl(filePath: string, bufferId: number, buffer: Uint8Array | n
155134
// Some rules seen in the wild return an empty visitor object from `create` if some initial check fails
156135
// e.g. file extension is not one the rule acts on.
157136
if (needsVisit) {
137+
const program = getAst();
158138
walkProgram(program, compiledVisitor);
159139

160140
// Lazy implementation

apps/oxlint/src-js/plugins/source_code.ts

Lines changed: 68 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,86 @@
1+
import {
2+
DATA_POINTER_POS_32,
3+
SOURCE_LEN_OFFSET,
4+
// TODO(camc314): we need to generate `.d.ts` file for this module.
5+
// @ts-expect-error
6+
} from '../generated/constants.js';
7+
// @ts-expect-error we need to generate `.d.ts` file for this module
8+
import { deserializeProgramOnly } from '../../dist/generated/deserialize/ts.js';
9+
110
import type { Program } from '@oxc-project/types';
211
import type { Scope, ScopeManager, Variable } from './scope.ts';
3-
import type { Comment, LineColumn, Node, NodeOrToken, Token } from './types.ts';
12+
import type { BufferWithArrays, Comment, LineColumn, Node, NodeOrToken, Token } from './types.ts';
413

514
const { max } = Math;
615

7-
// Source text.
8-
// Initially `null`, but set to source text string before linting each file by `setupSourceForFile`.
9-
let sourceText: string | null = null;
10-
// Set before linting each file by `setupSourceForFile`.
16+
// Text decoder, for decoding source text from buffer
17+
const textDecoder = new TextDecoder('utf-8', { ignoreBOM: true });
18+
19+
// Buffer containing AST. Set before linting a file by `setupSourceForFile`.
20+
let buffer: BufferWithArrays | null = null;
21+
22+
// Indicates if the original source text has a BOM. Set before linting a file by `setupSourceForFile`.
1123
let hasBOM = false;
12-
// Set before linting each file by `setupSourceForFile`.
24+
25+
// Lazily populated when `SOURCE_CODE.text` or `SOURCE_CODE.ast` is accessed,
26+
// or `getAst()` is called before the AST is walked.
27+
let sourceText: string | null = null;
28+
let sourceByteLen: number = 0;
1329
let ast: Program | null = null;
1430

1531
/**
1632
* Set up source for the file about to be linted.
17-
* @param sourceTextInput - Source text
33+
* @param bufferInput - Buffer containing AST
1834
* @param hasBOMInput - `true` if file's original source text has Unicode BOM
19-
* @param astInput - The AST program for the file
2035
*/
21-
export function setupSourceForFile(sourceTextInput: string, hasBOMInput: boolean, astInput: Program): void {
22-
sourceText = sourceTextInput;
36+
export function setupSourceForFile(bufferInput: BufferWithArrays, hasBOMInput: boolean): void {
37+
buffer = bufferInput;
2338
hasBOM = hasBOMInput;
24-
ast = astInput;
39+
}
40+
41+
/**
42+
* Decode source text from buffer.
43+
*/
44+
function initSourceText(): void {
45+
const { uint32 } = buffer,
46+
programPos = uint32[DATA_POINTER_POS_32];
47+
sourceByteLen = uint32[(programPos + SOURCE_LEN_OFFSET) >> 2];
48+
sourceText = textDecoder.decode(buffer.subarray(0, sourceByteLen));
49+
}
50+
51+
/**
52+
* Deserialize AST from buffer.
53+
*/
54+
function initAst(): void {
55+
if (sourceText === null) initSourceText();
56+
57+
// `preserveParens` argument is `false`, to match ESLint.
58+
// ESLint does not include `ParenthesizedExpression` nodes in its AST.
59+
ast = deserializeProgramOnly(buffer, sourceText, sourceByteLen, false);
60+
}
61+
62+
/**
63+
* Get AST of the file being linted.
64+
* If AST has not already been deserialized, do it now.
65+
* @returns AST of the file being linted.
66+
*/
67+
export function getAst(): Program {
68+
if (ast === null) initAst();
69+
return ast;
2570
}
2671

2772
/**
2873
* Reset source after file has been linted, to free memory.
74+
*
75+
* Setting `buffer` to `null` also prevents AST being deserialized after linting,
76+
* at which point the buffer may be being reused for another file.
77+
* The buffer might contain a half-constructed AST (if parsing is currently in progress in Rust),
78+
* which would cause deserialization to malfunction.
79+
* With `buffer` set to `null`, accessing `SOURCE_CODE.ast` will still throw, but the error message will be clearer,
80+
* and no danger of an infinite loop due to a circular AST (unlikely but possible).
2981
*/
3082
export function resetSource(): void {
83+
buffer = null;
3184
sourceText = null;
3285
ast = null;
3386
}
@@ -44,6 +97,7 @@ export function resetSource(): void {
4497
export const SOURCE_CODE = Object.freeze({
4598
// Get source text.
4699
get text(): string {
100+
if (sourceText === null) initSourceText();
47101
return sourceText;
48102
},
49103

@@ -54,7 +108,7 @@ export const SOURCE_CODE = Object.freeze({
54108

55109
// Get AST of the file.
56110
get ast(): Program {
57-
return ast;
111+
return getAst();
58112
},
59113

60114
// Get `ScopeManager` for the file.
@@ -89,6 +143,8 @@ export const SOURCE_CODE = Object.freeze({
89143
beforeCount?: number | null | undefined,
90144
afterCount?: number | null | undefined,
91145
): string {
146+
if (sourceText === null) initSourceText();
147+
92148
// ESLint treats all falsy values for `node` as undefined
93149
if (!node) return sourceText;
94150

apps/oxlint/src-js/plugins/types.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,9 @@ export interface RuleMeta {
8080
fixable?: 'code' | 'whitespace' | null | undefined;
8181
[key: string]: unknown;
8282
}
83+
84+
// Buffer with typed array views of itself stored as properties.
85+
export interface BufferWithArrays extends Uint8Array {
86+
uint32: Uint32Array;
87+
float64: Float64Array;
88+
}

apps/oxlint/test/e2e.test.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,14 @@ describe('oxlint CLI', () => {
127127
await testFixture('sourceCode');
128128
});
129129

130+
it('should get source text and AST from `context.sourceCode` when accessed late', async () => {
131+
await testFixture('sourceCode_late_access');
132+
});
133+
134+
it('should get source text and AST from `context.sourceCode` when accessed in `after` hook only', async () => {
135+
await testFixture('sourceCode_late_access_after_only');
136+
});
137+
130138
it('should support `createOnce`', async () => {
131139
await testFixture('createOnce');
132140
});
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"jsPlugins": ["./plugin.ts"],
3+
"categories": { "correctness": "off" },
4+
"rules": {
5+
"source-code-plugin/create": "error",
6+
"source-code-plugin/create-once": "error"
7+
}
8+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
let foo, bar;
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
let qux;
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
# Exit code
2+
1
3+
4+
# stdout
5+
```
6+
x source-code-plugin(create): program:
7+
| text: "let foo, bar;\n"
8+
| getText(): "let foo, bar;\n"
9+
,-[files/1.js:1:1]
10+
1 | let foo, bar;
11+
: ^
12+
`----
13+
14+
x source-code-plugin(create-once): after:
15+
| source: "let foo, bar;\n"
16+
,-[files/1.js:1:1]
17+
1 | let foo, bar;
18+
: ^
19+
`----
20+
21+
x source-code-plugin(create-once): program:
22+
| text: "let foo, bar;\n"
23+
| getText(): "let foo, bar;\n"
24+
,-[files/1.js:1:1]
25+
1 | let foo, bar;
26+
: ^
27+
`----
28+
29+
x source-code-plugin(create): var decl:
30+
| source: "let foo, bar;"
31+
,-[files/1.js:1:1]
32+
1 | let foo, bar;
33+
: ^^^^^^^^^^^^^
34+
`----
35+
36+
x source-code-plugin(create-once): var decl:
37+
| source: "let foo, bar;"
38+
,-[files/1.js:1:1]
39+
1 | let foo, bar;
40+
: ^^^^^^^^^^^^^
41+
`----
42+
43+
x source-code-plugin(create): ident "foo":
44+
| source: "foo"
45+
| source with before: "t foo"
46+
| source with after: "foo,"
47+
| source with both: "t foo,"
48+
,-[files/1.js:1:5]
49+
1 | let foo, bar;
50+
: ^^^
51+
`----
52+
53+
x source-code-plugin(create-once): ident "foo":
54+
| source: "foo"
55+
| source with before: "t foo"
56+
| source with after: "foo,"
57+
| source with both: "t foo,"
58+
,-[files/1.js:1:5]
59+
1 | let foo, bar;
60+
: ^^^
61+
`----
62+
63+
x source-code-plugin(create): ident "bar":
64+
| source: "bar"
65+
| source with before: ", bar"
66+
| source with after: "bar;"
67+
| source with both: ", bar;"
68+
,-[files/1.js:1:10]
69+
1 | let foo, bar;
70+
: ^^^
71+
`----
72+
73+
x source-code-plugin(create-once): ident "bar":
74+
| source: "bar"
75+
| source with before: ", bar"
76+
| source with after: "bar;"
77+
| source with both: ", bar;"
78+
,-[files/1.js:1:10]
79+
1 | let foo, bar;
80+
: ^^^
81+
`----
82+
83+
x source-code-plugin(create): program:
84+
| text: "let qux;\n"
85+
| getText(): "let qux;\n"
86+
,-[files/2.js:1:1]
87+
1 | let qux;
88+
: ^
89+
`----
90+
91+
x source-code-plugin(create-once): after:
92+
| source: "let qux;\n"
93+
,-[files/2.js:1:1]
94+
1 | let qux;
95+
: ^
96+
`----
97+
98+
x source-code-plugin(create-once): program:
99+
| text: "let qux;\n"
100+
| getText(): "let qux;\n"
101+
,-[files/2.js:1:1]
102+
1 | let qux;
103+
: ^
104+
`----
105+
106+
x source-code-plugin(create): var decl:
107+
| source: "let qux;"
108+
,-[files/2.js:1:1]
109+
1 | let qux;
110+
: ^^^^^^^^
111+
`----
112+
113+
x source-code-plugin(create-once): var decl:
114+
| source: "let qux;"
115+
,-[files/2.js:1:1]
116+
1 | let qux;
117+
: ^^^^^^^^
118+
`----
119+
120+
x source-code-plugin(create): ident "qux":
121+
| source: "qux"
122+
| source with before: "t qux"
123+
| source with after: "qux;"
124+
| source with both: "t qux;"
125+
,-[files/2.js:1:5]
126+
1 | let qux;
127+
: ^^^
128+
`----
129+
130+
x source-code-plugin(create-once): ident "qux":
131+
| source: "qux"
132+
| source with before: "t qux"
133+
| source with after: "qux;"
134+
| source with both: "t qux;"
135+
,-[files/2.js:1:5]
136+
1 | let qux;
137+
: ^^^
138+
`----
139+
140+
Found 0 warnings and 16 errors.
141+
Finished in Xms on 2 files using X threads.
142+
```
143+
144+
# stderr
145+
```
146+
WARNING: JS plugins are experimental and not subject to semver.
147+
Breaking changes are possible while JS plugins support is under development.
148+
```

0 commit comments

Comments
 (0)