From c64490e9aa46a19cc83fbfd57702660ae67dc9fb Mon Sep 17 00:00:00 2001 From: Geoffrey Booth Date: Wed, 18 Oct 2023 07:32:34 -0700 Subject: [PATCH] esm: improve check for ESM syntax PR-URL: https://github.com/nodejs/node/pull/50127 Reviewed-By: Guy Bedford Reviewed-By: Yagiz Nizipli Reviewed-By: Jacob Smith Reviewed-By: Antoine du Hamel --- lib/internal/modules/cjs/loader.js | 7 +- lib/internal/modules/esm/translators.js | 10 +- lib/internal/modules/helpers.js | 23 --- src/node_contextify.cc | 208 ++++++++++++++++++++---- src/node_contextify.h | 24 +++ 5 files changed, 207 insertions(+), 65 deletions(-) diff --git a/lib/internal/modules/cjs/loader.js b/lib/internal/modules/cjs/loader.js index b3b438372fe9ed..b077ee386bb40e 100644 --- a/lib/internal/modules/cjs/loader.js +++ b/lib/internal/modules/cjs/loader.js @@ -90,6 +90,7 @@ const { makeContextifyScript, runScriptInThisContext, } = require('internal/vm'); +const { containsModuleSyntax } = internalBinding('contextify'); const assert = require('internal/assert'); const fs = require('fs'); @@ -104,7 +105,6 @@ const { const { getCjsConditions, initializeCjsConditions, - hasEsmSyntax, loadBuiltinModule, makeRequireFunction, normalizeReferrerURL, @@ -1315,7 +1315,7 @@ function wrapSafe(filename, content, cjsModuleInstance, codeCache) { } catch (err) { if (process.mainModule === cjsModuleInstance) { const { enrichCJSError } = require('internal/modules/esm/translators'); - enrichCJSError(err, content); + enrichCJSError(err, content, filename); } throw err; } @@ -1400,10 +1400,11 @@ Module._extensions['.js'] = function(module, filename) { const pkg = packageJsonReader.readPackageScope(filename) || { __proto__: null }; // Function require shouldn't be used in ES modules. if (pkg.data?.type === 'module') { + // This is an error path because `require` of a `.js` file in a `"type": "module"` scope is not allowed. const parent = moduleParentCache.get(module); const parentPath = parent?.filename; const packageJsonPath = path.resolve(pkg.path, 'package.json'); - const usesEsm = hasEsmSyntax(content); + const usesEsm = containsModuleSyntax(content, filename); const err = new ERR_REQUIRE_ESM(filename, usesEsm, parentPath, packageJsonPath); // Attempt to reconstruct the parent require frame. diff --git a/lib/internal/modules/esm/translators.js b/lib/internal/modules/esm/translators.js index c36fe07a9503ae..7a62615cfe4210 100644 --- a/lib/internal/modules/esm/translators.js +++ b/lib/internal/modules/esm/translators.js @@ -30,11 +30,11 @@ function lazyTypes() { return _TYPES = require('internal/util/types'); } +const { containsModuleSyntax } = internalBinding('contextify'); const assert = require('internal/assert'); const { readFileSync } = require('fs'); const { dirname, extname, isAbsolute } = require('path'); const { - hasEsmSyntax, loadBuiltinModule, stripBOM, } = require('internal/modules/helpers'); @@ -166,11 +166,11 @@ translators.set('module', async function moduleStrategy(url, source, isMain) { * Provide a more informative error for CommonJS imports. * @param {Error | any} err * @param {string} [content] Content of the file, if known. - * @param {string} [filename] Useful only if `content` is unknown. + * @param {string} [filename] The filename of the erroring module. */ function enrichCJSError(err, content, filename) { if (err != null && ObjectGetPrototypeOf(err) === SyntaxErrorPrototype && - hasEsmSyntax(content || readFileSync(filename, 'utf-8'))) { + containsModuleSyntax(content, filename)) { // Emit the warning synchronously because we are in the middle of handling // a SyntaxError that will throw and likely terminate the process before an // asynchronous warning would be emitted. @@ -217,7 +217,7 @@ function loadCJSModule(module, source, url, filename) { importModuleDynamically, // importModuleDynamically ).function; } catch (err) { - enrichCJSError(err, source, url); + enrichCJSError(err, source, filename); throw err; } @@ -344,7 +344,7 @@ translators.set('commonjs', async function commonjsStrategy(url, source, assert(module === CJSModule._cache[filename]); CJSModule._load(filename); } catch (err) { - enrichCJSError(err, source, url); + enrichCJSError(err, source, filename); throw err; } } : loadCJSModule; diff --git a/lib/internal/modules/helpers.js b/lib/internal/modules/helpers.js index 7f2959cc469dc1..6b30a1d8c76d4b 100644 --- a/lib/internal/modules/helpers.js +++ b/lib/internal/modules/helpers.js @@ -3,7 +3,6 @@ const { ArrayPrototypeForEach, ArrayPrototypeJoin, - ArrayPrototypeSome, ObjectDefineProperty, ObjectPrototypeHasOwnProperty, SafeMap, @@ -299,32 +298,10 @@ function normalizeReferrerURL(referrer) { return new URL(referrer).href; } -/** - * For error messages only, check if ESM syntax is in use. - * @param {string} code - */ -function hasEsmSyntax(code) { - debug('Checking for ESM syntax'); - const parser = require('internal/deps/acorn/acorn/dist/acorn').Parser; - let root; - try { - root = parser.parse(code, { sourceType: 'module', ecmaVersion: 'latest' }); - } catch { - return false; - } - - return ArrayPrototypeSome(root.body, (stmt) => - stmt.type === 'ExportDefaultDeclaration' || - stmt.type === 'ExportNamedDeclaration' || - stmt.type === 'ImportDeclaration' || - stmt.type === 'ExportAllDeclaration'); -} - module.exports = { addBuiltinLibsToObject, getCjsConditions, initializeCjsConditions, - hasEsmSyntax, loadBuiltinModule, makeRequireFunction, normalizeReferrerURL, diff --git a/src/node_contextify.cc b/src/node_contextify.cc index f669f323b1c6f3..ddf2e2c2fb24d1 100644 --- a/src/node_contextify.cc +++ b/src/node_contextify.cc @@ -318,6 +318,7 @@ void ContextifyContext::CreatePerIsolateProperties( SetMethod(isolate, target, "makeContext", MakeContext); SetMethod(isolate, target, "isContext", IsContext); SetMethod(isolate, target, "compileFunction", CompileFunction); + SetMethod(isolate, target, "containsModuleSyntax", ContainsModuleSyntax); } void ContextifyContext::RegisterExternalReferences( @@ -325,6 +326,7 @@ void ContextifyContext::RegisterExternalReferences( registry->Register(MakeContext); registry->Register(IsContext); registry->Register(CompileFunction); + registry->Register(ContainsModuleSyntax); registry->Register(PropertyGetterCallback); registry->Register(PropertySetterCallback); registry->Register(PropertyDescriptorCallback); @@ -1205,33 +1207,18 @@ void ContextifyContext::CompileFunction( data + cached_data_buf->ByteOffset(), cached_data_buf->ByteLength()); } - // Set host_defined_options Local host_defined_options = - PrimitiveArray::New(isolate, loader::HostDefinedOptions::kLength); - host_defined_options->Set( - isolate, loader::HostDefinedOptions::kID, id_symbol); - - ScriptOrigin origin(isolate, - filename, - line_offset, // line offset - column_offset, // column offset - true, // is cross origin - -1, // script id - Local(), // source map URL - false, // is opaque (?) - false, // is WASM - false, // is ES Module - host_defined_options); - - ScriptCompiler::Source source(code, origin, cached_data); - ScriptCompiler::CompileOptions options; - if (source.GetCachedData() == nullptr) { - options = ScriptCompiler::kNoCompileOptions; - } else { - options = ScriptCompiler::kConsumeCodeCache; - } + GetHostDefinedOptions(isolate, id_symbol); + ScriptCompiler::Source source = + GetCommonJSSourceInstance(isolate, + code, + filename, + line_offset, + column_offset, + host_defined_options, + cached_data); + ScriptCompiler::CompileOptions options = GetCompileOptions(source); - TryCatchScope try_catch(env); Context::Scope scope(parsing_context); // Read context extensions from buffer @@ -1256,9 +1243,83 @@ void ContextifyContext::CompileFunction( } } + TryCatchScope try_catch(env); + Local result = CompileFunctionAndCacheResult(env, + parsing_context, + &source, + params, + context_extensions, + options, + produce_cached_data, + id_symbol, + try_catch); + + if (try_catch.HasCaught() && !try_catch.HasTerminated()) { + try_catch.ReThrow(); + return; + } + + if (result.IsEmpty()) { + return; + } + args.GetReturnValue().Set(result); +} + +Local ContextifyContext::GetHostDefinedOptions( + Isolate* isolate, Local id_symbol) { + Local host_defined_options = + PrimitiveArray::New(isolate, loader::HostDefinedOptions::kLength); + host_defined_options->Set( + isolate, loader::HostDefinedOptions::kID, id_symbol); + return host_defined_options; +} + +ScriptCompiler::Source ContextifyContext::GetCommonJSSourceInstance( + Isolate* isolate, + Local code, + Local filename, + int line_offset, + int column_offset, + Local host_defined_options, + ScriptCompiler::CachedData* cached_data) { + ScriptOrigin origin(isolate, + filename, + line_offset, // line offset + column_offset, // column offset + true, // is cross origin + -1, // script id + Local(), // source map URL + false, // is opaque (?) + false, // is WASM + false, // is ES Module + host_defined_options); + return ScriptCompiler::Source(code, origin, cached_data); +} + +ScriptCompiler::CompileOptions ContextifyContext::GetCompileOptions( + const ScriptCompiler::Source& source) { + ScriptCompiler::CompileOptions options; + if (source.GetCachedData() != nullptr) { + options = ScriptCompiler::kConsumeCodeCache; + } else { + options = ScriptCompiler::kNoCompileOptions; + } + return options; +} + +Local ContextifyContext::CompileFunctionAndCacheResult( + Environment* env, + Local parsing_context, + ScriptCompiler::Source* source, + std::vector> params, + std::vector> context_extensions, + ScriptCompiler::CompileOptions options, + bool produce_cached_data, + Local id_symbol, + const TryCatchScope& try_catch) { MaybeLocal maybe_fn = ScriptCompiler::CompileFunction( parsing_context, - &source, + source, params.size(), params.data(), context_extensions.size(), @@ -1270,24 +1331,26 @@ void ContextifyContext::CompileFunction( if (!maybe_fn.ToLocal(&fn)) { if (try_catch.HasCaught() && !try_catch.HasTerminated()) { errors::DecorateErrorStack(env, try_catch); - try_catch.ReThrow(); + return Object::New(env->isolate()); } - return; } + + Local context = env->context(); if (fn->SetPrivate(context, env->host_defined_option_symbol(), id_symbol) .IsNothing()) { - return; + return Object::New(env->isolate()); } + Isolate* isolate = env->isolate(); Local result = Object::New(isolate); if (result->Set(parsing_context, env->function_string(), fn).IsNothing()) - return; + return Object::New(env->isolate()); if (result ->Set(parsing_context, env->source_map_url_string(), fn->GetScriptOrigin().SourceMapUrl()) .IsNothing()) - return; + return Object::New(env->isolate()); std::unique_ptr new_cached_data; if (produce_cached_data) { @@ -1296,14 +1359,91 @@ void ContextifyContext::CompileFunction( if (StoreCodeCacheResult(env, result, options, - source, + *source, produce_cached_data, std::move(new_cached_data)) .IsNothing()) { - return; + return Object::New(env->isolate()); } - args.GetReturnValue().Set(result); + return result; +} + +// When compiling as CommonJS source code that contains ESM syntax, the +// following error messages are returned: +// - `import` statements: "Cannot use import statement outside a module" +// - `export` statements: "Unexpected token 'export'" +// - `import.meta` references: "Cannot use 'import.meta' outside a module" +// Dynamic `import()` is permitted in CommonJS, so it does not error. +// While top-level `await` is not permitted in CommonJS, it returns the same +// error message as when `await` is used in a sync function, so we don't use it +// as a disambiguation. +constexpr std::array esm_syntax_error_messages = { + "Cannot use import statement outside a module", // `import` statements + "Unexpected token 'export'", // `export` statements + "Cannot use 'import.meta' outside a module"}; // `import.meta` references + +void ContextifyContext::ContainsModuleSyntax( + const FunctionCallbackInfo& args) { + // Argument 1: source code + CHECK(args[0]->IsString()); + Local code = args[0].As(); + + // Argument 2: filename + Local filename = String::Empty(args.GetIsolate()); + if (!args[1]->IsUndefined()) { + CHECK(args[1]->IsString()); + filename = args[1].As(); + } + + Environment* env = Environment::GetCurrent(args); + Isolate* isolate = env->isolate(); + Local context = env->context(); + + // TODO(geoffreybooth): Centralize this rather than matching the logic in + // cjs/loader.js and translators.js + Local script_id = String::Concat( + isolate, String::NewFromUtf8(isolate, "cjs:").ToLocalChecked(), filename); + Local id_symbol = Symbol::New(isolate, script_id); + + Local host_defined_options = + GetHostDefinedOptions(isolate, id_symbol); + ScriptCompiler::Source source = GetCommonJSSourceInstance( + isolate, code, filename, 0, 0, host_defined_options, nullptr); + ScriptCompiler::CompileOptions options = GetCompileOptions(source); + + std::vector> params = { + String::NewFromUtf8(isolate, "exports").ToLocalChecked(), + String::NewFromUtf8(isolate, "require").ToLocalChecked(), + String::NewFromUtf8(isolate, "module").ToLocalChecked(), + String::NewFromUtf8(isolate, "__filename").ToLocalChecked(), + String::NewFromUtf8(isolate, "__dirname").ToLocalChecked()}; + + TryCatchScope try_catch(env); + + ContextifyContext::CompileFunctionAndCacheResult(env, + context, + &source, + params, + std::vector>(), + options, + true, + id_symbol, + try_catch); + + bool found_error_message_caused_by_module_syntax = false; + if (try_catch.HasCaught() && !try_catch.HasTerminated()) { + Utf8Value message_value(env->isolate(), try_catch.Message()->Get()); + auto message = message_value.ToStringView(); + + for (const auto& error_message : esm_syntax_error_messages) { + if (message.find(error_message) != std::string_view::npos) { + found_error_message_caused_by_module_syntax = true; + break; + } + } + } + args.GetReturnValue().Set(found_error_message_caused_by_module_syntax); } static void StartSigintWatchdog(const FunctionCallbackInfo& args) { diff --git a/src/node_contextify.h b/src/node_contextify.h index d1dddbf374d563..721c146ff88c35 100644 --- a/src/node_contextify.h +++ b/src/node_contextify.h @@ -83,6 +83,30 @@ class ContextifyContext : public BaseObject { static void IsContext(const v8::FunctionCallbackInfo& args); static void CompileFunction( const v8::FunctionCallbackInfo& args); + static v8::Local CompileFunctionAndCacheResult( + Environment* env, + v8::Local parsing_context, + v8::ScriptCompiler::Source* source, + std::vector> params, + std::vector> context_extensions, + v8::ScriptCompiler::CompileOptions options, + bool produce_cached_data, + v8::Local id_symbol, + const errors::TryCatchScope& try_catch); + static v8::Local GetHostDefinedOptions( + v8::Isolate* isolate, v8::Local id_symbol); + static v8::ScriptCompiler::Source GetCommonJSSourceInstance( + v8::Isolate* isolate, + v8::Local code, + v8::Local filename, + int line_offset, + int column_offset, + v8::Local host_defined_options, + v8::ScriptCompiler::CachedData* cached_data); + static v8::ScriptCompiler::CompileOptions GetCompileOptions( + const v8::ScriptCompiler::Source& source); + static void ContainsModuleSyntax( + const v8::FunctionCallbackInfo& args); static void WeakCallback( const v8::WeakCallbackInfo& data); static void PropertyGetterCallback(