diff --git a/.gitignore b/.gitignore index ea00b8f0..657bcd13 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ out coverage test/**/dist test/**/actual.js +test/unit/cjs-querystring/who?what?idk!.js diff --git a/src/node-file-trace.ts b/src/node-file-trace.ts index df6dbb24..41e0e31c 100644 --- a/src/node-file-trace.ts +++ b/src/node-file-trace.ts @@ -12,6 +12,30 @@ const fsReadFile = fs.promises.readFile; const fsReadlink = fs.promises.readlink; const fsStat = fs.promises.stat; +type ParseSpecifierResult = { + path: string; + queryString: string | null +} + +// Splits an ESM specifier into path and querystring (including the leading `?`). (If the specifier is CJS, +// it is passed through untouched.) +export function parseSpecifier(specifier: string, cjsResolve: boolean = true): ParseSpecifierResult { + let path = specifier; + let queryString = null; + + if (!cjsResolve) { + // Regex which splits a specifier into path and querystring, inspired by that in `enhanced-resolve` + // https://github.com/webpack/enhanced-resolve/blob/157ed9bcc381857d979e56d2f20a5a17c6362bff/lib/util/identifier.js#L8 + const match = /^(#?(?:\0.|[^?#\0])*)(\?(?:\0.|[^\0])*)?$/.exec(specifier); + if (match) { + path = match[1] + queryString = match[2] + } + } + + return {path, queryString}; +} + function inPath (path: string, parent: string) { const pathWithSep = join(parent, sep); return path.startsWith(pathWithSep) && path !== pathWithSep; @@ -215,6 +239,8 @@ export class Job { } private maybeEmitDep = async (dep: string, path: string, cjsResolve: boolean) => { + // Only affects ESM dependencies + const { path: strippedDep, queryString = '' } = parseSpecifier(dep, cjsResolve) let resolved: string | string[] = ''; let error: Error | undefined; try { @@ -222,12 +248,12 @@ export class Job { } catch (e1: any) { error = e1; try { - if (this.ts && dep.endsWith('.js') && e1 instanceof NotFoundError) { + if (this.ts && strippedDep.endsWith('.js') && e1 instanceof NotFoundError) { // TS with ESM relative import paths need full extensions // (we have to write import "./foo.js" instead of import "./foo") // See https://www.typescriptlang.org/docs/handbook/esm-node.html - const depTS = dep.slice(0, -3) + '.ts'; - resolved = await this.resolve(depTS, path, this, cjsResolve); + const strippedDepTS = strippedDep.slice(0, -3) + '.ts'; + resolved = await this.resolve(strippedDepTS + queryString, path, this, cjsResolve); error = undefined; } } catch (e2: any) { @@ -240,16 +266,19 @@ export class Job { return; } - if (Array.isArray(resolved)) { - for (const item of resolved) { - // ignore builtins - if (item.startsWith('node:')) return; - await this.emitDependency(item, path); + // For simplicity, force `resolved` to be an array + resolved = Array.isArray(resolved) ? resolved : [resolved]; + for (let item of resolved) { + // ignore builtins for the purposes of both tracing and querystring handling (neither Node + // nor Webpack can handle querystrings on `node:xxx` imports). + if (item.startsWith('node:')) return; + + // If querystring was stripped during resolution, restore it + if (queryString && !item.endsWith(queryString)) { + item += queryString; } - } else { - // ignore builtins - if (resolved.startsWith('node:')) return; - await this.emitDependency(resolved, path); + + await this.analyzeAndEmitDependency(item, path, cjsResolve); } } @@ -343,13 +372,23 @@ export class Job { } async emitDependency (path: string, parent?: string) { - if (this.processed.has(path)) { + return this.analyzeAndEmitDependency(path, parent) + } + + private async analyzeAndEmitDependency(rawPath: string, parent?: string, cjsResolve?: boolean) { + + // Strip the querystring, if any. (Only affects ESM dependencies.) + const { path } = parseSpecifier(rawPath, cjsResolve) + + // Since different querystrings may lead to different results, include the full path + // when noting whether or not we've already seen this path + if (this.processed.has(rawPath)) { if (parent) { await this.emitFile(path, 'dependency', parent) } return }; - this.processed.add(path); + this.processed.add(rawPath); const emitted = await this.emitFile(path, 'dependency', parent); if (!emitted) return; @@ -368,18 +407,34 @@ export class Job { let analyzeResult: AnalyzeResult; - const cachedAnalysis = this.analysisCache.get(path); + // Since different querystrings may lead to analyses, include the full path when caching + const cachedAnalysis = this.analysisCache.get(rawPath); if (cachedAnalysis) { analyzeResult = cachedAnalysis; } else { - const source = await this.readFile(path); - if (source === null) throw new Error('File ' + path + ' does not exist.'); + // By default, `this.readFile` is the regular `fs.readFile`, but a different implementation + // can be specified via a user option in the main `nodeFileTrace` entrypoint. Depending on + // that implementation, having a querystring on the end of the path may either a) be necessary + // in order to specify the right module from which to read, or b) lead to an `ENOENT: no such + // file or directory` error because it thinks the querystring is part of the filename. We + // therefore try it with the querystring first, but have the non-querystringed version as a + // fallback. (If there's no `?` in the given path, `rawPath` will equal `path`, so order is a + // moot point.) + const source = await this.readFile(rawPath) || await this.readFile(path) ; + + if (source === null) { + const errorMessage = path === rawPath + ? 'File ' + path + ' does not exist.' + : 'Neither ' + path + ' nor ' + rawPath + ' exists.' + throw new Error(errorMessage); + } + // analyze should not have any side-effects e.g. calling `job.emitFile` // directly as this will not be included in the cachedAnalysis and won't // be emit for successive runs that leverage the cache analyzeResult = await analyze(path, source.toString(), this); - this.analysisCache.set(path, analyzeResult); + this.analysisCache.set(rawPath, analyzeResult); } const { deps, imports, assets, isESM } = analyzeResult; @@ -393,12 +448,12 @@ export class Job { const ext = extname(asset); if (ext === '.js' || ext === '.mjs' || ext === '.node' || ext === '' || this.ts && (ext === '.ts' || ext === '.tsx') && asset.startsWith(this.base) && asset.slice(this.base.length).indexOf(sep + 'node_modules' + sep) === -1) - await this.emitDependency(asset, path); + await this.analyzeAndEmitDependency(asset, path, !isESM); else await this.emitFile(asset, 'asset', path); }), - ...[...deps].map(async dep => this.maybeEmitDep(dep, path, !isESM)), - ...[...imports].map(async dep => this.maybeEmitDep(dep, path, false)), + ...[...deps].map(async dep => this.maybeEmitDep(dep, rawPath, !isESM)), + ...[...imports].map(async dep => this.maybeEmitDep(dep, rawPath, false)), ]); } } \ No newline at end of file diff --git a/src/resolve-dependency.ts b/src/resolve-dependency.ts index b50684f4..88ca47b3 100644 --- a/src/resolve-dependency.ts +++ b/src/resolve-dependency.ts @@ -1,11 +1,16 @@ import { isAbsolute, resolve, sep } from 'path'; -import { Job } from './node-file-trace'; +import { Job, parseSpecifier } from './node-file-trace'; // node resolver // custom implementation to emit only needed package.json files for resolver // (package.json files are emitted as they are hit) export default async function resolveDependency (specifier: string, parent: string, job: Job, cjsResolve = true): Promise { let resolved: string | string[]; + + // ESM imports are allowed to have querystrings, but the native Node behavior is to ignore them when doing + // file resolution, so emulate that here by stripping any querystring off before continuing + specifier = parseSpecifier(specifier, cjsResolve).path + if (isAbsolute(specifier) || specifier === '.' || specifier === '..' || specifier.startsWith('./') || specifier.startsWith('../')) { const trailingSlash = specifier.endsWith('/'); resolved = await resolvePath(resolve(parent, '..', specifier) + (trailingSlash ? '/' : ''), parent, job); diff --git a/test/unit.test.js b/test/unit.test.js index ce269b38..e774439c 100644 --- a/test/unit.test.js +++ b/test/unit.test.js @@ -1,23 +1,39 @@ const fs = require('fs'); -const { join, relative } = require('path'); +const { join, relative, sep } = require('path'); const { nodeFileTrace } = require('../out/node-file-trace'); global._unit = true; -const skipOnWindows = ['yarn-workspaces', 'yarn-workspaces-base-root', 'yarn-workspace-esm', 'asset-symlink', 'require-symlink']; +const skipOnWindows = ['yarn-workspaces', 'yarn-workspaces-base-root', 'yarn-workspace-esm', 'asset-symlink', 'require-symlink', 'cjs-querystring']; const unitTestDirs = fs.readdirSync(join(__dirname, 'unit')); const unitTests = [ - ...unitTestDirs.map(testName => ({testName, isRoot: false})), ...unitTestDirs.map(testName => ({testName, isRoot: true})), + ...unitTestDirs.map(testName => ({testName, isRoot: false})), ]; for (const { testName, isRoot } of unitTests) { const testSuffix = `${testName} from ${isRoot ? 'root' : 'cwd'}`; - if (process.platform === 'win32' && (isRoot || skipOnWindows.includes(testName))) { - console.log(`Skipping unit test on Windows: ${testSuffix}`); - continue; - }; const unitPath = join(__dirname, 'unit', testName); + + if (process.platform === 'win32') { + if (isRoot || skipOnWindows.includes(testName)) { + console.log(`Skipping unit test on Windows: ${testSuffix}`); + continue; + } + } else { + if (testName === 'cjs-querystring') { + // Create (a git-ignored copy of) the file we need, since committing it + // breaks CI on Windows. See https://github.com/vercel/nft/pull/322. + const currentFilepath = join(unitPath, 'noPunctuation', 'whowhatidk.js'); + const newFilepath = currentFilepath.replace( + 'noPunctuation' + sep + 'whowhatidk.js', + 'who?what?idk!.js' + ); + if (!fs.existsSync(newFilepath)) { + fs.copyFileSync(currentFilepath, newFilepath); + } + } + } it(`should correctly trace ${testSuffix}`, async () => { @@ -41,6 +57,25 @@ for (const { testName, isRoot } of unitTests) { return null } } + + // mock an in-memory module store (such as webpack's) where the same filename with + // two different querystrings can correspond to two different modules, one importing + // the other + if (testName === 'querystring-self-import') { + if (id.endsWith('input.js') || id.endsWith('base.js') || id.endsWith('dep.js')) { + return fs.readFileSync(id).toString() + } + + if (id.endsWith('base.js?__withQuery')) { + return ` + import * as origBase from './base'; + export const dogs = origBase.dogs.concat('Cory', 'Bodhi'); + export const cats = origBase.cats.concat('Teaberry', 'Sassafras', 'Persephone'); + export const rats = origBase.rats; + `; + } + } + return this.constructor.prototype.readFile.apply(this, arguments); }); @@ -67,8 +102,8 @@ for (const { testName, isRoot } of unitTests) { if (testName === 'multi-input') { inputFileNames.push('input-2.js', 'input-3.js', 'input-4.js'); } - - const { fileList, reasons } = await nodeFileTrace( + + const { fileList, reasons, warnings } = await nodeFileTrace( inputFileNames.map(file => join(unitPath, file)), { base: isRoot ? '/' : `${__dirname}/../`, @@ -193,7 +228,7 @@ for (const { testName, isRoot } of unitTests) { expect(sortedFileList).toEqual(expected); } catch (e) { - console.warn(reasons); + console.warn({reasons, warnings}); fs.writeFileSync(join(unitPath, 'actual.js'), JSON.stringify(sortedFileList, null, 2)); throw e; } diff --git a/test/unit/cjs-querystring/input.js b/test/unit/cjs-querystring/input.js new file mode 100644 index 00000000..5a1baf7e --- /dev/null +++ b/test/unit/cjs-querystring/input.js @@ -0,0 +1,7 @@ +// Test that CJS files treat question marks in filenames as any other character, +// matching Node behavior + +// https://www.youtube.com/watch?v=2ve20PVNZ18 + +const baseball = require('./who?what?idk!'); +console.log(baseball.players); diff --git a/test/unit/cjs-querystring/noPunctuation/whowhatidk.js b/test/unit/cjs-querystring/noPunctuation/whowhatidk.js new file mode 100644 index 00000000..4c4c4b72 --- /dev/null +++ b/test/unit/cjs-querystring/noPunctuation/whowhatidk.js @@ -0,0 +1,12 @@ +module.exports = { + players: { + first: 'Who', + second: 'What', + third: "I Don't Know", + left: 'Why', + center: 'Because', + pitcher: 'Tomorrow', + catcher: 'Today', + shortstop: "I Don't Give a Damn!", + }, +}; diff --git a/test/unit/cjs-querystring/output.js b/test/unit/cjs-querystring/output.js new file mode 100644 index 00000000..296a204b --- /dev/null +++ b/test/unit/cjs-querystring/output.js @@ -0,0 +1,5 @@ +[ + "package.json", + "test/unit/cjs-querystring/input.js", + "test/unit/cjs-querystring/who?what?idk!.js" +] diff --git a/test/unit/esm-querystring-mjs/animalFacts/aardvark.mjs b/test/unit/esm-querystring-mjs/animalFacts/aardvark.mjs new file mode 100644 index 00000000..a2efff8a --- /dev/null +++ b/test/unit/esm-querystring-mjs/animalFacts/aardvark.mjs @@ -0,0 +1,4 @@ +import { numSpecies } from "./bear.mjs?beaver?bison"; +console.log(`There are ${numSpecies} species of bears.`); + +export const food = "termites"; diff --git a/test/unit/esm-querystring-mjs/animalFacts/bear.mjs b/test/unit/esm-querystring-mjs/animalFacts/bear.mjs new file mode 100644 index 00000000..a8ee2dd0 --- /dev/null +++ b/test/unit/esm-querystring-mjs/animalFacts/bear.mjs @@ -0,0 +1,4 @@ +import * as cheetah from "./cheetah.mjs?cow=chipmunk"; +console.log(`Cheetahs can run ${cheetah.topSpeed} mph.`); + +export const numSpecies = 8; diff --git a/test/unit/esm-querystring-mjs/animalFacts/cheetah.mjs b/test/unit/esm-querystring-mjs/animalFacts/cheetah.mjs new file mode 100644 index 00000000..836b4ddf --- /dev/null +++ b/test/unit/esm-querystring-mjs/animalFacts/cheetah.mjs @@ -0,0 +1 @@ +export const topSpeed = 65; diff --git a/test/unit/esm-querystring-mjs/input.js b/test/unit/esm-querystring-mjs/input.js new file mode 100644 index 00000000..a07ace77 --- /dev/null +++ b/test/unit/esm-querystring-mjs/input.js @@ -0,0 +1,6 @@ +// Test that querystrings of various forms get stripped from esm imports when those +// imports contain the `.mjs` file extension + +import * as aardvark from "./animalFacts/aardvark.mjs?anteater"; + +console.log(`Aardvarks eat ${aardvark.food}.`); diff --git a/test/unit/esm-querystring-mjs/output.js b/test/unit/esm-querystring-mjs/output.js new file mode 100644 index 00000000..95137ed7 --- /dev/null +++ b/test/unit/esm-querystring-mjs/output.js @@ -0,0 +1,7 @@ +[ + "test/unit/esm-querystring-mjs/animalFacts/aardvark.mjs", + "test/unit/esm-querystring-mjs/animalFacts/bear.mjs", + "test/unit/esm-querystring-mjs/animalFacts/cheetah.mjs", + "test/unit/esm-querystring-mjs/input.js", + "test/unit/esm-querystring-mjs/package.json" +] \ No newline at end of file diff --git a/test/unit/esm-querystring-mjs/package.json b/test/unit/esm-querystring-mjs/package.json new file mode 100644 index 00000000..e986b24b --- /dev/null +++ b/test/unit/esm-querystring-mjs/package.json @@ -0,0 +1,4 @@ +{ + "private": true, + "type": "module" +} diff --git a/test/unit/esm-querystring/animalFacts/aardvark.js b/test/unit/esm-querystring/animalFacts/aardvark.js new file mode 100644 index 00000000..4c497265 --- /dev/null +++ b/test/unit/esm-querystring/animalFacts/aardvark.js @@ -0,0 +1,4 @@ +import { numSpecies } from './bear?beaver?bison'; +console.log(`There are ${numSpecies} species of bears.`); + +export const food = 'termites'; diff --git a/test/unit/esm-querystring/animalFacts/bear.js b/test/unit/esm-querystring/animalFacts/bear.js new file mode 100644 index 00000000..4578358b --- /dev/null +++ b/test/unit/esm-querystring/animalFacts/bear.js @@ -0,0 +1,4 @@ +import * as cheetah from './cheetah?cow=chipmunk'; +console.log(`Cheetahs can run ${cheetah.topSpeed} mph.`); + +export const numSpecies = 8; diff --git a/test/unit/esm-querystring/animalFacts/cheetah.js b/test/unit/esm-querystring/animalFacts/cheetah.js new file mode 100644 index 00000000..836b4ddf --- /dev/null +++ b/test/unit/esm-querystring/animalFacts/cheetah.js @@ -0,0 +1 @@ +export const topSpeed = 65; diff --git a/test/unit/esm-querystring/input.js b/test/unit/esm-querystring/input.js new file mode 100644 index 00000000..b0d51697 --- /dev/null +++ b/test/unit/esm-querystring/input.js @@ -0,0 +1,5 @@ +// Test that querystrings of various forms get stripped from esm imports + +import * as aardvark from './animalFacts/aardvark?anteater'; + +console.log(`Aardvarks eat ${aardvark.food}.`); diff --git a/test/unit/esm-querystring/output.js b/test/unit/esm-querystring/output.js new file mode 100644 index 00000000..d03f7cb8 --- /dev/null +++ b/test/unit/esm-querystring/output.js @@ -0,0 +1,7 @@ +[ + "test/unit/esm-querystring/animalFacts/aardvark.js", + "test/unit/esm-querystring/animalFacts/bear.js", + "test/unit/esm-querystring/animalFacts/cheetah.js", + "test/unit/esm-querystring/input.js", + "test/unit/esm-querystring/package.json" +] \ No newline at end of file diff --git a/test/unit/esm-querystring/package.json b/test/unit/esm-querystring/package.json new file mode 100644 index 00000000..e986b24b --- /dev/null +++ b/test/unit/esm-querystring/package.json @@ -0,0 +1,4 @@ +{ + "private": true, + "type": "module" +} diff --git a/test/unit/querystring-self-import/base.js b/test/unit/querystring-self-import/base.js new file mode 100644 index 00000000..8f48a38a --- /dev/null +++ b/test/unit/querystring-self-import/base.js @@ -0,0 +1,5 @@ +import * as dep from './dep'; + +export const dogs = ['Charlie', 'Maisey']; +export const cats = ['Piper']; +export const rats = dep.rats; diff --git a/test/unit/querystring-self-import/dep.js b/test/unit/querystring-self-import/dep.js new file mode 100644 index 00000000..aaff193a --- /dev/null +++ b/test/unit/querystring-self-import/dep.js @@ -0,0 +1 @@ +export const rats = ['Debra']; diff --git a/test/unit/querystring-self-import/input.js b/test/unit/querystring-self-import/input.js new file mode 100644 index 00000000..25f53ce9 --- /dev/null +++ b/test/unit/querystring-self-import/input.js @@ -0,0 +1,10 @@ +// Test that if a file and the same file with a querystring correspond to different +// modules in memory, one can successfully import the other. The import chain +// goes `input` (this file) -> `base?__withQuery` -> `base` -> `dep`, which means +// that if `dep` shows up in `output`, we know that both `base?__withQuery` and +// `base` have been loaded successfully. + +import * as baseWithQuery from './base?__withQuery'; +console.log('Dogs:', baseWithQuery.dogs); +console.log('Cats:', baseWithQuery.cats); +console.log('Rats:', baseWithQuery.rats); diff --git a/test/unit/querystring-self-import/output.js b/test/unit/querystring-self-import/output.js new file mode 100644 index 00000000..d6dc6b2c --- /dev/null +++ b/test/unit/querystring-self-import/output.js @@ -0,0 +1,6 @@ +[ + "package.json", + "test/unit/querystring-self-import/base.js", + "test/unit/querystring-self-import/dep.js", + "test/unit/querystring-self-import/input.js" +]