Skip to content

Commit

Permalink
esm: detect ESM syntax in ambiguous JavaScript
Browse files Browse the repository at this point in the history
PR-URL: nodejs#50096
Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com>
Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com>
Reviewed-By: Guy Bedford <guybedford@gmail.com>
  • Loading branch information
GeoffreyBooth authored and alexfernandez committed Nov 1, 2023
1 parent c518cd0 commit 87bf5f8
Show file tree
Hide file tree
Showing 30 changed files with 475 additions and 67 deletions.
37 changes: 37 additions & 0 deletions benchmark/esm/detect-esm-syntax.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
'use strict';

// This benchmarks the cost of running `containsModuleSyntax` on a CommonJS module being imported.
// We use the TypeScript fixture because it's a very large CommonJS file with no ESM syntax: the worst case.
const common = require('../common.js');
const tmpdir = require('../../test/common/tmpdir.js');
const fixtures = require('../../test/common/fixtures.js');
const scriptPath = fixtures.path('snapshot', 'typescript.js');
const fs = require('node:fs');

const bench = common.createBenchmark(main, {
type: ['with-module-syntax-detection', 'without-module-syntax-detection'],
n: [1e4],
}, {
flags: ['--experimental-detect-module'],
});

const benchmarkDirectory = tmpdir.fileURL('benchmark-detect-esm-syntax');
const ambiguousURL = new URL('./typescript.js', benchmarkDirectory);
const explicitURL = new URL('./typescript.cjs', benchmarkDirectory);

async function main({ n, type }) {
tmpdir.refresh();

fs.mkdirSync(benchmarkDirectory, { recursive: true });
fs.cpSync(scriptPath, ambiguousURL);
fs.cpSync(scriptPath, explicitURL);

bench.start();

for (let i = 0; i < n; i++) {
const url = type === 'with-module-syntax-detection' ? ambiguousURL : explicitURL;
await import(url);
}

bench.end(n);
}
27 changes: 27 additions & 0 deletions doc/api/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,32 @@ files with no extension will be treated as WebAssembly if they begin with the
WebAssembly magic number (`\0asm`); otherwise they will be treated as ES module
JavaScript.

### `--experimental-detect-module`

<!-- YAML
added:
- REPLACEME
-->

> Stability: 1.0 - Early development
Node.js will inspect the source code of ambiguous input to determine whether it
contains ES module syntax; if such syntax is detected, the input will be treated
as an ES module.

Ambiguous input is defined as:

* Files with a `.js` extension or no extension; and either no controlling
`package.json` file or one that lacks a `type` field; and
`--experimental-default-type` is not specified.
* String input (`--eval` or STDIN) when neither `--input-type` nor
`--experimental-default-type` are specified.

ES module syntax is defined as syntax that would throw when evaluated as
CommonJS. This includes `import` and `export` statements and `import.meta`
references. It does _not_ include `import()` expressions, which are valid in
CommonJS.

### `--experimental-import-meta-resolve`

<!-- YAML
Expand Down Expand Up @@ -2286,6 +2312,7 @@ Node.js options that are allowed are:
* `--enable-source-maps`
* `--experimental-abortcontroller`
* `--experimental-default-type`
* `--experimental-detect-module`
* `--experimental-import-meta-resolve`
* `--experimental-json-modules`
* `--experimental-loader`
Expand Down
59 changes: 42 additions & 17 deletions doc/api/esm.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,21 @@ provides interoperability between them and its original module format,

Node.js has two module systems: [CommonJS][] modules and ECMAScript modules.

Authors can tell Node.js to use the ECMAScript modules loader via the `.mjs`
file extension, the `package.json` [`"type"`][] field, the [`--input-type`][]
flag, or the [`--experimental-default-type`][] flag. Outside of those cases,
Node.js will use the CommonJS module loader. See [Determining module system][]
for more details.
Authors can tell Node.js to interpret JavaScript as an ES module via the `.mjs`
file extension, the `package.json` [`"type"`][] field with a value `"module"`,
the [`--input-type`][] flag with a value of `"module"`, or the
[`--experimental-default-type`][] flag with a value of `"module"`. These are
explicit markers of code being intended to run as an ES module.

Inversely, authors can tell Node.js to interpret JavaScript as CommonJS via the
`.cjs` file extension, the `package.json` [`"type"`][] field with a value
`"commonjs"`, the [`--input-type`][] flag with a value of `"commonjs"`, or the
[`--experimental-default-type`][] flag with a value of `"commonjs"`.

When code lacks explicit markers for either module system, Node.js will inspect
the source code of a module to look for ES module syntax. If such syntax is
found, Node.js will run the code as an ES module; otherwise it will run the
module as CommonJS. See [Determining module system][] for more details.

<!-- Anchors to make sure old links find a target -->

Expand Down Expand Up @@ -1019,18 +1029,33 @@ _isImports_, _conditions_)
> 1. Return _"commonjs"_.
> 4. If _url_ ends in _".json"_, then
> 1. Return _"json"_.
> 5. Let _packageURL_ be the result of **LOOKUP\_PACKAGE\_SCOPE**(_url_).
> 6. Let _pjson_ be the result of **READ\_PACKAGE\_JSON**(_packageURL_).
> 7. If _pjson?.type_ exists and is _"module"_, then
> 1. If _url_ ends in _".js"_ or has no file extension, then
> 1. If `--experimental-wasm-modules` is enabled and the file at _url_
> contains the header for a WebAssembly module, then
> 1. Return _"wasm"_.
> 2. Otherwise,
> 1. Return _"module"_.
> 2. Return **undefined**.
> 8. Otherwise,
> 1. Return **undefined**.
> 5. If `--experimental-wasm-modules` is enabled and _url_ ends in
> _".wasm"_, then
> 1. Return _"wasm"_.
> 6. Let _packageURL_ be the result of **LOOKUP\_PACKAGE\_SCOPE**(_url_).
> 7. Let _pjson_ be the result of **READ\_PACKAGE\_JSON**(_packageURL_).
> 8. Let _packageType_ be **null**.
> 9. If _pjson?.type_ is _"module"_ or _"commonjs"_, then
> 1. Set _packageType_ to _pjson.type_.
> 10. If _url_ ends in _".js"_, then
> 1. If _packageType_ is not **null**, then
> 1. Return _packageType_.
> 2. If `--experimental-detect-module` is enabled and the source of
> module contains static import or export syntax, then
> 1. Return _"module"_.
> 3. Return _"commonjs"_.
> 11. If _url_ does not have any extension, then
> 1. If _packageType_ is _"module"_ and `--experimental-wasm-modules` is
> enabled and the file at _url_ contains the header for a WebAssembly
> module, then
> 1. Return _"wasm"_.
> 2. If _packageType_ is not **null**, then
> 1. Return _packageType_.
> 3. If `--experimental-detect-module` is enabled and the source of
> module contains static import or export syntax, then
> 1. Return _"module"_.
> 4. Return _"commonjs"_.
> 12. Return **undefined** (will throw during load phase).
**LOOKUP\_PACKAGE\_SCOPE**(_url_)
Expand Down
6 changes: 4 additions & 2 deletions doc/api/modules.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,10 @@ By default, Node.js will treat the following as CommonJS modules:
* Files with a `.js` extension when the nearest parent `package.json` file
contains a top-level field [`"type"`][] with a value of `"commonjs"`.

* Files with a `.js` extension when the nearest parent `package.json` file
doesn't contain a top-level field [`"type"`][]. Package authors should include
* Files with a `.js` extension or without an extension, when the nearest parent
`package.json` file doesn't contain a top-level field [`"type"`][] or there is
no `package.json` in any parent folder; unless the file contains syntax that
errors unless it is evaluated as an ES module. Package authors should include
the [`"type"`][] field, even in packages where all sources are CommonJS. Being
explicit about the `type` of the package will make things easier for build
tools and loaders to determine how the files in the package should be
Expand Down
8 changes: 8 additions & 0 deletions doc/api/packages.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ expressions:
* Strings passed in as an argument to `--eval`, or piped to `node` via `STDIN`,
with the flag `--input-type=module`.

* Code that contains syntax that only parses successfully as [ES modules][],
such as `import` or `export` statements or `import.meta`, when the code has no
explicit marker of how it should be interpreted. Explicit markers are `.mjs`
or `.cjs` extensions, `package.json` `"type"` fields with either `"module"` or
`"commonjs"` values, or `--input-type` or `--experimental-default-type` flags.
Dynamic `import()` expressions are supported in either CommonJS or ES modules
and would not cause a file to be treated as an ES module.

Node.js will treat the following as [CommonJS][] when passed to `node` as the
initial input, or when referenced by `import` statements or `import()`
expressions:
Expand Down
64 changes: 43 additions & 21 deletions lib/internal/modules/esm/get_format.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@ const {

const experimentalNetworkImports =
getOptionValue('--experimental-network-imports');
const defaultTypeFlag = getOptionValue('--experimental-default-type');
// The next line is where we flip the default to ES modules someday.
const defaultType = defaultTypeFlag === 'module' ? 'module' : 'commonjs';
const { containsModuleSyntax } = internalBinding('contextify');
const { getPackageType } = require('internal/modules/esm/resolve');
const { fileURLToPath } = require('internal/url');
const { ERR_UNKNOWN_FILE_EXTENSION } = require('internal/errors').codes;
Expand Down Expand Up @@ -85,42 +83,66 @@ function underNodeModules(url) {

/**
* @param {URL} url
* @param {{parentURL: string}} context
* @param {{parentURL: string; source?: Buffer}} context
* @param {boolean} ignoreErrors
* @returns {string}
*/
function getFileProtocolModuleFormat(url, context, ignoreErrors) {
function getFileProtocolModuleFormat(url, context = { __proto__: null }, ignoreErrors) {
const { source } = context;
const ext = extname(url);

if (ext === '.js') {
const packageType = getPackageType(url);
if (packageType !== 'none') {
return packageType;
}

// The controlling `package.json` file has no `type` field.
if (defaultType === 'module') {
// An exception to the type flag making ESM the default everywhere is that package scopes under `node_modules`
// should retain the assumption that a lack of a `type` field means CommonJS.
return underNodeModules(url) ? 'commonjs' : 'module';
switch (getOptionValue('--experimental-default-type')) {
case 'module': { // The user explicitly passed `--experimental-default-type=module`.
// An exception to the type flag making ESM the default everywhere is that package scopes under `node_modules`
// should retain the assumption that a lack of a `type` field means CommonJS.
return underNodeModules(url) ? 'commonjs' : 'module';
}
case 'commonjs': { // The user explicitly passed `--experimental-default-type=commonjs`.
return 'commonjs';
}
default: { // The user did not pass `--experimental-default-type`.
// `source` is undefined when this is called from `defaultResolve`;
// but this gets called again from `defaultLoad`/`defaultLoadSync`.
if (source && getOptionValue('--experimental-detect-module')) {
return containsModuleSyntax(`${source}`, fileURLToPath(url)) ? 'module' : 'commonjs';
}
return 'commonjs';
}
}
return 'commonjs';
}

if (ext === '') {
const packageType = getPackageType(url);
if (defaultType === 'commonjs') { // Legacy behavior
if (packageType === 'none' || packageType === 'commonjs') {
return 'commonjs';
} // Else packageType === 'module'
if (packageType === 'module') {
return getFormatOfExtensionlessFile(url);
} // Else defaultType === 'module'
if (underNodeModules(url)) { // Exception for package scopes under `node_modules`
return packageType === 'module' ? getFormatOfExtensionlessFile(url) : 'commonjs';
}
if (packageType === 'none' || packageType === 'module') {
return getFormatOfExtensionlessFile(url);
} // Else packageType === 'commonjs'
return 'commonjs';
if (packageType !== 'none') {
return packageType; // 'commonjs' or future package types
}

// The controlling `package.json` file has no `type` field.
switch (getOptionValue('--experimental-default-type')) {
case 'module': { // The user explicitly passed `--experimental-default-type=module`.
return underNodeModules(url) ? 'commonjs' : getFormatOfExtensionlessFile(url);
}
case 'commonjs': { // The user explicitly passed `--experimental-default-type=commonjs`.
return 'commonjs';
}
default: { // The user did not pass `--experimental-default-type`.
if (source && getOptionValue('--experimental-detect-module') &&
getFormatOfExtensionlessFile(url) === 'module') {
return containsModuleSyntax(`${source}`, fileURLToPath(url)) ? 'module' : 'commonjs';
}
return 'commonjs';
}
}
}

const format = extensionFormatMap[ext];
Expand Down
36 changes: 21 additions & 15 deletions lib/internal/modules/esm/load.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ const DATA_URL_PATTERN = /^[^/]+\/[^,;]+(?:[^,]*?)(;base64)?,([\s\S]*)$/;
/**
* @param {URL} url URL to the module
* @param {ESModuleContext} context used to decorate error messages
* @returns {{ responseURL: string, source: string | BufferView }}
* @returns {Promise<{ responseURL: string, source: string | BufferView }>}
*/
async function getSource(url, context) {
const { protocol, href } = url;
Expand Down Expand Up @@ -105,7 +105,7 @@ function getSourceSync(url, context) {
* @param {LoadContext} context
* @returns {LoadReturn}
*/
async function defaultLoad(url, context = kEmptyObject) {
async function defaultLoad(url, context = { __proto__: null }) {
let responseURL = url;
let {
importAttributes,
Expand All @@ -127,19 +127,24 @@ async function defaultLoad(url, context = kEmptyObject) {

throwIfUnsupportedURLScheme(urlInstance, experimentalNetworkImports);

format ??= await defaultGetFormat(urlInstance, context);

validateAttributes(url, format, importAttributes);

if (
format === 'builtin' ||
format === 'commonjs'
) {
if (urlInstance.protocol === 'node:') {
source = null;
} else if (source == null) {
({ responseURL, source } = await getSource(urlInstance, context));
context.source = source;
}

if (format == null || format === 'commonjs') {
// Now that we have the source for the module, run `defaultGetFormat` again in case we detect ESM syntax.
format = await defaultGetFormat(urlInstance, context);
}

if (format === 'commonjs') {
source = null; // Let the CommonJS loader handle it (for now)
}

validateAttributes(url, format, importAttributes);

return {
__proto__: null,
format,
Expand Down Expand Up @@ -178,16 +183,17 @@ function defaultLoadSync(url, context = kEmptyObject) {

throwIfUnsupportedURLScheme(urlInstance, false);

format ??= defaultGetFormat(urlInstance, context);

validateAttributes(url, format, importAttributes);

if (format === 'builtin') {
if (urlInstance.protocol === 'node:') {
source = null;
} else if (source == null) {
({ responseURL, source } = getSourceSync(urlInstance, context));
context.source = source;
}

format ??= defaultGetFormat(urlInstance, context);

validateAttributes(url, format, importAttributes);

return {
__proto__: null,
format,
Expand Down
15 changes: 14 additions & 1 deletion lib/internal/modules/run_main.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const {
StringPrototypeEndsWith,
} = primordials;

const { containsModuleSyntax } = internalBinding('contextify');
const { getOptionValue } = require('internal/options');
const path = require('path');

Expand Down Expand Up @@ -70,7 +71,19 @@ function shouldUseESMLoader(mainPath) {
const { readPackageScope } = require('internal/modules/package_json_reader');
const pkg = readPackageScope(mainPath);
// No need to guard `pkg` as it can only be an object or `false`.
return pkg.data?.type === 'module' || getOptionValue('--experimental-default-type') === 'module';
switch (pkg.data?.type) {
case 'module':
return true;
case 'commonjs':
return false;
default: { // No package.json or no `type` field.
if (getOptionValue('--experimental-detect-module')) {
// If the first argument of `containsModuleSyntax` is undefined, it will read `mainPath` from the file system.
return containsModuleSyntax(undefined, mainPath);
}
return false;
}
}
}

/**
Expand Down
Loading

0 comments on commit 87bf5f8

Please sign in to comment.