Skip to content

Commit

Permalink
Merge a7c4822 into 4949d8e
Browse files Browse the repository at this point in the history
  • Loading branch information
lobsterkatie authored Dec 6, 2022
2 parents 4949d8e + a7c4822 commit 3eb9da4
Show file tree
Hide file tree
Showing 23 changed files with 225 additions and 32 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ out
coverage
test/**/dist
test/**/actual.js
test/unit/cjs-querystring/who?what?idk!.js
97 changes: 76 additions & 21 deletions src/node-file-trace.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,30 @@ const fsReadFile = fs.promises.readFile;
const fsReadlink = fs.promises.readlink;
const fsStat = fs.promises.stat;

type ParseSpecifierResult = {
path: string;
queryString: string | null
}

// Splits an ESM specifier into path and querystring (including the leading `?`). (If the specifier is CJS,
// it is passed through untouched.)
export function parseSpecifier(specifier: string, cjsResolve: boolean = true): ParseSpecifierResult {
let path = specifier;
let queryString = null;

if (!cjsResolve) {
// Regex which splits a specifier into path and querystring, inspired by that in `enhanced-resolve`
// https://github.com/webpack/enhanced-resolve/blob/157ed9bcc381857d979e56d2f20a5a17c6362bff/lib/util/identifier.js#L8
const match = /^(#?(?:\0.|[^?#\0])*)(\?(?:\0.|[^\0])*)?$/.exec(specifier);
if (match) {
path = match[1]
queryString = match[2]
}
}

return {path, queryString};
}

function inPath (path: string, parent: string) {
const pathWithSep = join(parent, sep);
return path.startsWith(pathWithSep) && path !== pathWithSep;
Expand Down Expand Up @@ -215,19 +239,21 @@ export class Job {
}

private maybeEmitDep = async (dep: string, path: string, cjsResolve: boolean) => {
// Only affects ESM dependencies
const { path: strippedDep, queryString = '' } = parseSpecifier(dep, cjsResolve)
let resolved: string | string[] = '';
let error: Error | undefined;
try {
resolved = await this.resolve(dep, path, this, cjsResolve);
} catch (e1: any) {
error = e1;
try {
if (this.ts && dep.endsWith('.js') && e1 instanceof NotFoundError) {
if (this.ts && strippedDep.endsWith('.js') && e1 instanceof NotFoundError) {
// TS with ESM relative import paths need full extensions
// (we have to write import "./foo.js" instead of import "./foo")
// See https://www.typescriptlang.org/docs/handbook/esm-node.html
const depTS = dep.slice(0, -3) + '.ts';
resolved = await this.resolve(depTS, path, this, cjsResolve);
const strippedDepTS = strippedDep.slice(0, -3) + '.ts';
resolved = await this.resolve(strippedDepTS + queryString, path, this, cjsResolve);
error = undefined;
}
} catch (e2: any) {
Expand All @@ -240,16 +266,19 @@ export class Job {
return;
}

if (Array.isArray(resolved)) {
for (const item of resolved) {
// ignore builtins
if (item.startsWith('node:')) return;
await this.emitDependency(item, path);
// For simplicity, force `resolved` to be an array
resolved = Array.isArray(resolved) ? resolved : [resolved];
for (let item of resolved) {
// ignore builtins for the purposes of both tracing and querystring handling (neither Node
// nor Webpack can handle querystrings on `node:xxx` imports).
if (item.startsWith('node:')) return;

// If querystring was stripped during resolution, restore it
if (queryString && !item.endsWith(queryString)) {
item += queryString;
}
} else {
// ignore builtins
if (resolved.startsWith('node:')) return;
await this.emitDependency(resolved, path);

await this.analyzeAndEmitDependency(item, path, cjsResolve);
}
}

Expand Down Expand Up @@ -343,13 +372,23 @@ export class Job {
}

async emitDependency (path: string, parent?: string) {
if (this.processed.has(path)) {
return this.analyzeAndEmitDependency(path, parent)
}

private async analyzeAndEmitDependency(rawPath: string, parent?: string, cjsResolve?: boolean) {

// Strip the querystring, if any. (Only affects ESM dependencies.)
const { path } = parseSpecifier(rawPath, cjsResolve)

// Since different querystrings may lead to different results, include the full path
// when noting whether or not we've already seen this path
if (this.processed.has(rawPath)) {
if (parent) {
await this.emitFile(path, 'dependency', parent)
}
return
};
this.processed.add(path);
this.processed.add(rawPath);

const emitted = await this.emitFile(path, 'dependency', parent);
if (!emitted) return;
Expand All @@ -368,18 +407,34 @@ export class Job {

let analyzeResult: AnalyzeResult;

const cachedAnalysis = this.analysisCache.get(path);
// Since different querystrings may lead to analyses, include the full path when caching
const cachedAnalysis = this.analysisCache.get(rawPath);
if (cachedAnalysis) {
analyzeResult = cachedAnalysis;
}
else {
const source = await this.readFile(path);
if (source === null) throw new Error('File ' + path + ' does not exist.');
// By default, `this.readFile` is the regular `fs.readFile`, but a different implementation
// can be specified via a user option in the main `nodeFileTrace` entrypoint. Depending on
// that implementation, having a querystring on the end of the path may either a) be necessary
// in order to specify the right module from which to read, or b) lead to an `ENOENT: no such
// file or directory` error because it thinks the querystring is part of the filename. We
// therefore try it with the querystring first, but have the non-querystringed version as a
// fallback. (If there's no `?` in the given path, `rawPath` will equal `path`, so order is a
// moot point.)
const source = await this.readFile(rawPath) || await this.readFile(path) ;

if (source === null) {
const errorMessage = path === rawPath
? 'File ' + path + ' does not exist.'
: 'Neither ' + path + ' nor ' + rawPath + ' exists.'
throw new Error(errorMessage);
}

// analyze should not have any side-effects e.g. calling `job.emitFile`
// directly as this will not be included in the cachedAnalysis and won't
// be emit for successive runs that leverage the cache
analyzeResult = await analyze(path, source.toString(), this);
this.analysisCache.set(path, analyzeResult);
this.analysisCache.set(rawPath, analyzeResult);
}

const { deps, imports, assets, isESM } = analyzeResult;
Expand All @@ -393,12 +448,12 @@ export class Job {
const ext = extname(asset);
if (ext === '.js' || ext === '.mjs' || ext === '.node' || ext === '' ||
this.ts && (ext === '.ts' || ext === '.tsx') && asset.startsWith(this.base) && asset.slice(this.base.length).indexOf(sep + 'node_modules' + sep) === -1)
await this.emitDependency(asset, path);
await this.analyzeAndEmitDependency(asset, path, !isESM);
else
await this.emitFile(asset, 'asset', path);
}),
...[...deps].map(async dep => this.maybeEmitDep(dep, path, !isESM)),
...[...imports].map(async dep => this.maybeEmitDep(dep, path, false)),
...[...deps].map(async dep => this.maybeEmitDep(dep, rawPath, !isESM)),
...[...imports].map(async dep => this.maybeEmitDep(dep, rawPath, false)),
]);
}
}
7 changes: 6 additions & 1 deletion src/resolve-dependency.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
import { isAbsolute, resolve, sep } from 'path';
import { Job } from './node-file-trace';
import { Job, parseSpecifier } from './node-file-trace';

// node resolver
// custom implementation to emit only needed package.json files for resolver
// (package.json files are emitted as they are hit)
export default async function resolveDependency (specifier: string, parent: string, job: Job, cjsResolve = true): Promise<string | string[]> {
let resolved: string | string[];

// ESM imports are allowed to have querystrings, but the native Node behavior is to ignore them when doing
// file resolution, so emulate that here by stripping any querystring off before continuing
specifier = parseSpecifier(specifier, cjsResolve).path

if (isAbsolute(specifier) || specifier === '.' || specifier === '..' || specifier.startsWith('./') || specifier.startsWith('../')) {
const trailingSlash = specifier.endsWith('/');
resolved = await resolvePath(resolve(parent, '..', specifier) + (trailingSlash ? '/' : ''), parent, job);
Expand Down
55 changes: 45 additions & 10 deletions test/unit.test.js
Original file line number Diff line number Diff line change
@@ -1,23 +1,39 @@
const fs = require('fs');
const { join, relative } = require('path');
const { join, relative, sep } = require('path');
const { nodeFileTrace } = require('../out/node-file-trace');

global._unit = true;

const skipOnWindows = ['yarn-workspaces', 'yarn-workspaces-base-root', 'yarn-workspace-esm', 'asset-symlink', 'require-symlink'];
const skipOnWindows = ['yarn-workspaces', 'yarn-workspaces-base-root', 'yarn-workspace-esm', 'asset-symlink', 'require-symlink', 'cjs-querystring'];
const unitTestDirs = fs.readdirSync(join(__dirname, 'unit'));
const unitTests = [
...unitTestDirs.map(testName => ({testName, isRoot: false})),
...unitTestDirs.map(testName => ({testName, isRoot: true})),
...unitTestDirs.map(testName => ({testName, isRoot: false})),
];

for (const { testName, isRoot } of unitTests) {
const testSuffix = `${testName} from ${isRoot ? 'root' : 'cwd'}`;
if (process.platform === 'win32' && (isRoot || skipOnWindows.includes(testName))) {
console.log(`Skipping unit test on Windows: ${testSuffix}`);
continue;
};
const unitPath = join(__dirname, 'unit', testName);

if (process.platform === 'win32') {
if (isRoot || skipOnWindows.includes(testName)) {
console.log(`Skipping unit test on Windows: ${testSuffix}`);
continue;
}
} else {
if (testName === 'cjs-querystring') {
// Create (a git-ignored copy of) the file we need, since committing it
// breaks CI on Windows. See https://github.com/vercel/nft/pull/322.
const currentFilepath = join(unitPath, 'noPunctuation', 'whowhatidk.js');
const newFilepath = currentFilepath.replace(
'noPunctuation' + sep + 'whowhatidk.js',
'who?what?idk!.js'
);
if (!fs.existsSync(newFilepath)) {
fs.copyFileSync(currentFilepath, newFilepath);
}
}
}

it(`should correctly trace ${testSuffix}`, async () => {

Expand All @@ -41,6 +57,25 @@ for (const { testName, isRoot } of unitTests) {
return null
}
}

// mock an in-memory module store (such as webpack's) where the same filename with
// two different querystrings can correspond to two different modules, one importing
// the other
if (testName === 'querystring-self-import') {
if (id.endsWith('input.js') || id.endsWith('base.js') || id.endsWith('dep.js')) {
return fs.readFileSync(id).toString()
}

if (id.endsWith('base.js?__withQuery')) {
return `
import * as origBase from './base';
export const dogs = origBase.dogs.concat('Cory', 'Bodhi');
export const cats = origBase.cats.concat('Teaberry', 'Sassafras', 'Persephone');
export const rats = origBase.rats;
`;
}
}

return this.constructor.prototype.readFile.apply(this, arguments);
});

Expand All @@ -67,8 +102,8 @@ for (const { testName, isRoot } of unitTests) {
if (testName === 'multi-input') {
inputFileNames.push('input-2.js', 'input-3.js', 'input-4.js');
}
const { fileList, reasons } = await nodeFileTrace(

const { fileList, reasons, warnings } = await nodeFileTrace(
inputFileNames.map(file => join(unitPath, file)),
{
base: isRoot ? '/' : `${__dirname}/../`,
Expand Down Expand Up @@ -193,7 +228,7 @@ for (const { testName, isRoot } of unitTests) {
expect(sortedFileList).toEqual(expected);
}
catch (e) {
console.warn(reasons);
console.warn({reasons, warnings});
fs.writeFileSync(join(unitPath, 'actual.js'), JSON.stringify(sortedFileList, null, 2));
throw e;
}
Expand Down
7 changes: 7 additions & 0 deletions test/unit/cjs-querystring/input.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// Test that CJS files treat question marks in filenames as any other character,
// matching Node behavior

// https://www.youtube.com/watch?v=2ve20PVNZ18

const baseball = require('./who?what?idk!');
console.log(baseball.players);
12 changes: 12 additions & 0 deletions test/unit/cjs-querystring/noPunctuation/whowhatidk.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
module.exports = {
players: {
first: 'Who',
second: 'What',
third: "I Don't Know",
left: 'Why',
center: 'Because',
pitcher: 'Tomorrow',
catcher: 'Today',
shortstop: "I Don't Give a Damn!",
},
};
5 changes: 5 additions & 0 deletions test/unit/cjs-querystring/output.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[
"package.json",
"test/unit/cjs-querystring/input.js",
"test/unit/cjs-querystring/who?what?idk!.js"
]
4 changes: 4 additions & 0 deletions test/unit/esm-querystring-mjs/animalFacts/aardvark.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import { numSpecies } from "./bear.mjs?beaver?bison";
console.log(`There are ${numSpecies} species of bears.`);

export const food = "termites";
4 changes: 4 additions & 0 deletions test/unit/esm-querystring-mjs/animalFacts/bear.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import * as cheetah from "./cheetah.mjs?cow=chipmunk";
console.log(`Cheetahs can run ${cheetah.topSpeed} mph.`);

export const numSpecies = 8;
1 change: 1 addition & 0 deletions test/unit/esm-querystring-mjs/animalFacts/cheetah.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export const topSpeed = 65;
6 changes: 6 additions & 0 deletions test/unit/esm-querystring-mjs/input.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
// Test that querystrings of various forms get stripped from esm imports when those
// imports contain the `.mjs` file extension

import * as aardvark from "./animalFacts/aardvark.mjs?anteater";

console.log(`Aardvarks eat ${aardvark.food}.`);
7 changes: 7 additions & 0 deletions test/unit/esm-querystring-mjs/output.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[
"test/unit/esm-querystring-mjs/animalFacts/aardvark.mjs",
"test/unit/esm-querystring-mjs/animalFacts/bear.mjs",
"test/unit/esm-querystring-mjs/animalFacts/cheetah.mjs",
"test/unit/esm-querystring-mjs/input.js",
"test/unit/esm-querystring-mjs/package.json"
]
4 changes: 4 additions & 0 deletions test/unit/esm-querystring-mjs/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"private": true,
"type": "module"
}
4 changes: 4 additions & 0 deletions test/unit/esm-querystring/animalFacts/aardvark.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import { numSpecies } from './bear?beaver?bison';
console.log(`There are ${numSpecies} species of bears.`);

export const food = 'termites';
4 changes: 4 additions & 0 deletions test/unit/esm-querystring/animalFacts/bear.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import * as cheetah from './cheetah?cow=chipmunk';
console.log(`Cheetahs can run ${cheetah.topSpeed} mph.`);

export const numSpecies = 8;
1 change: 1 addition & 0 deletions test/unit/esm-querystring/animalFacts/cheetah.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export const topSpeed = 65;
5 changes: 5 additions & 0 deletions test/unit/esm-querystring/input.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// Test that querystrings of various forms get stripped from esm imports

import * as aardvark from './animalFacts/aardvark?anteater';

console.log(`Aardvarks eat ${aardvark.food}.`);
7 changes: 7 additions & 0 deletions test/unit/esm-querystring/output.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[
"test/unit/esm-querystring/animalFacts/aardvark.js",
"test/unit/esm-querystring/animalFacts/bear.js",
"test/unit/esm-querystring/animalFacts/cheetah.js",
"test/unit/esm-querystring/input.js",
"test/unit/esm-querystring/package.json"
]
4 changes: 4 additions & 0 deletions test/unit/esm-querystring/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"private": true,
"type": "module"
}
5 changes: 5 additions & 0 deletions test/unit/querystring-self-import/base.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import * as dep from './dep';

export const dogs = ['Charlie', 'Maisey'];
export const cats = ['Piper'];
export const rats = dep.rats;
1 change: 1 addition & 0 deletions test/unit/querystring-self-import/dep.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export const rats = ['Debra'];
10 changes: 10 additions & 0 deletions test/unit/querystring-self-import/input.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// Test that if a file and the same file with a querystring correspond to different
// modules in memory, one can successfully import the other. The import chain
// goes `input` (this file) -> `base?__withQuery` -> `base` -> `dep`, which means
// that if `dep` shows up in `output`, we know that both `base?__withQuery` and
// `base` have been loaded successfully.

import * as baseWithQuery from './base?__withQuery';
console.log('Dogs:', baseWithQuery.dogs);
console.log('Cats:', baseWithQuery.cats);
console.log('Rats:', baseWithQuery.rats);
Loading

0 comments on commit 3eb9da4

Please sign in to comment.