From eb703f772620a1b9524d6effd9082393a88bc61d Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Fri, 24 Feb 2023 18:48:29 +0100 Subject: [PATCH] src: support snapshot in single executable applications This patch adds snapshot support to single executable applications. To build a snapshot from the main script when preparing the blob that will be injected into the single executable application, add `"useSnapshot": true` to the configuration passed to `--experimental-sea-config`. For example: ``` { "main": "snapshot.js", "output": "sea-prep.blob", "useSnapshot": true } ``` The main script used to build the snapshot must invoke `v8.startupSnapshot.setDeserializeMainFunction()` to configure the entry point. The generated startup snapshot would be part of the preparation blob and get injected into the final executable. When the single executable application is launched, instead of running the `main` script from scratch, Node.js would instead deserialize the snapshot to get to the state initialized during build-time directly. --- doc/api/single-executable-applications.md | 28 ++++++- lib/internal/process/pre_execution.js | 2 +- src/env.h | 1 + src/node.cc | 83 ++++++++++++++----- src/node_main_instance.cc | 10 ++- src/node_sea.cc | 79 ++++++++++++++++-- src/node_sea.h | 9 +- src/node_snapshotable.cc | 11 ++- ...-single-executable-application-snapshot.js | 83 +++++++++++++++++++ 9 files changed, 265 insertions(+), 41 deletions(-) create mode 100644 test/sequential/test-single-executable-application-snapshot.js diff --git a/doc/api/single-executable-applications.md b/doc/api/single-executable-applications.md index 2dedd17527ba87..4ec244bdb536ba 100644 --- a/doc/api/single-executable-applications.md +++ b/doc/api/single-executable-applications.md @@ -6,6 +6,10 @@ added: - v19.7.0 - v18.16.0 +changes: + - version: REPLACEME + pr-url: https://github.com/nodejs/node/pull/46824 + description: Added support for "useSnapshot". --> > Stability: 1 - Experimental: This feature is being designed and will change. @@ -177,7 +181,8 @@ The configuration currently reads the following top-level fields: { "main": "/path/to/bundled/script.js", "output": "/path/to/write/the/generated/blob.blob", - "disableExperimentalSEAWarning": true // Default: false + "disableExperimentalSEAWarning": true, // Default: false + "useSnapshot": true // Default: false } ``` @@ -185,6 +190,24 @@ If the paths are not absolute, Node.js will use the path relative to the current working directory. The version of the Node.js binary used to produce the blob must be the same as the one to which the blob will be injected. +### Startup snapshot support + +When `useSnapshot` is set to true in the configuration, during the generation +of the single executable preparation blob, Node.js will run the `main` script +to generate a startup snapshot. The script must invoke +[`v8.startupSnapshot.setDeserializeMainFunction()`][] to set up the entry point. +The generated startup snapshot would be part of the preparation blob and get +injected into the final executable. When the single executable application is +launched, instead of running the `main` script from scratch, Node.js would +instead deserialize the snapshot to get to the state initialized during +build-time directly. + +The general constraints of the startup snapshot scripts also apply to the main +script when it's used to build snapshot for the single executable application, +and the main script can use the [`v8.startupSnapshot` API][] to adapt to +these constraints. See +[documentation about startup snapshot support in Node.js][]. + ## Notes ### `require(id)` in the injected module is not file based @@ -257,6 +280,9 @@ to help us document them. [`process.execPath`]: process.md#processexecpath [`require()`]: modules.md#requireid [`require.main`]: modules.md#accessing-the-main-module +[`v8.startupSnapshot.setDeserializeMainFunction()`]: v8.md#v8startupsnapshotsetdeserializemainfunctioncallback-data +[`v8.startupSnapshot` API]: v8.md#startup-snapshot-api +[documentation about startup snapshot support in Node.js]: cli.md#--build-snapshot [fuse]: https://www.electronjs.org/docs/latest/tutorial/fuses [postject]: https://github.com/nodejs/postject [signtool]: https://learn.microsoft.com/en-us/windows/win32/seccrypto/signtool diff --git a/lib/internal/process/pre_execution.js b/lib/internal/process/pre_execution.js index 678bd05164f99a..99ad8f8b9faa19 100644 --- a/lib/internal/process/pre_execution.js +++ b/lib/internal/process/pre_execution.js @@ -143,7 +143,7 @@ function patchProcessObject(expandArgv1) { __proto__: null, enumerable: true, // Only set it to true during snapshot building. - configurable: getOptionValue('--build-snapshot'), + configurable: isBuildingSnapshot(), value: process.argv[0], }); diff --git a/src/env.h b/src/env.h index b31cd12dfe2ec3..b42ea4c5409a3d 100644 --- a/src/env.h +++ b/src/env.h @@ -533,6 +533,7 @@ struct SnapshotData { void ToFile(FILE* out) const; std::vector ToBlob() const; + void ToBlob(std::vector* out) const; // If returns false, the metadata doesn't match the current Node.js binary, // and the caller should not consume the snapshot data. bool Check() const; diff --git a/src/node.cc b/src/node.cc index f483e59dd155a8..3f35c3492c3aca 100644 --- a/src/node.cc +++ b/src/node.cc @@ -292,6 +292,17 @@ MaybeLocal StartExecution(Environment* env, StartExecutionCallback cb) { CHECK(!env->isolate_data()->is_building_snapshot()); +#ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION + if (sea::IsSingleExecutable()) { + sea::SeaResource sea = sea::FindSingleExecutableResource(); + // The SEA preparation blob building process should already enforce this, + // this check is just here to guard against the unlikely case where + // the SEA preparation blob has been manually modified by someone. + CHECK_IMPLIES(sea.use_snapshot(), + !env->snapshot_deserialize_main().IsEmpty()); + } +#endif + // TODO(joyeecheung): move these conditions into JS land and let the // deserialize main function take precedence. For workers, we need to // move the pre-execution part into a different file that can be @@ -1198,49 +1209,66 @@ ExitCode GenerateAndWriteSnapshotData(const SnapshotData** snapshot_data_ptr, return exit_code; } -ExitCode LoadSnapshotDataAndRun(const SnapshotData** snapshot_data_ptr, - const InitializationResultImpl* result) { - ExitCode exit_code = result->exit_code_enum(); +bool LoadSnapshotData(const SnapshotData** snapshot_data_ptr) { // nullptr indicates there's no snapshot data. DCHECK_NULL(*snapshot_data_ptr); + + bool is_sea = false; +#ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION + if (sea::IsSingleExecutable()) { + is_sea = true; + sea::SeaResource sea = sea::FindSingleExecutableResource(); + if (sea.use_snapshot()) { + std::unique_ptr read_data = + std::make_unique(); + std::string_view snapshot = sea.main_code_or_snapshot; + if (SnapshotData::FromBlob(read_data.get(), snapshot)) { + *snapshot_data_ptr = read_data.release(); + return true; + } else { + fprintf(stderr, "Invalid snapshot data in single executable binary\n"); + return false; + } + } + } +#endif + // --snapshot-blob indicates that we are reading a customized snapshot. - if (!per_process::cli_options->snapshot_blob.empty()) { + // Ignore it when we are loading from SEA. + if (!is_sea && !per_process::cli_options->snapshot_blob.empty()) { std::string filename = per_process::cli_options->snapshot_blob; FILE* fp = fopen(filename.c_str(), "rb"); if (fp == nullptr) { fprintf(stderr, "Cannot open %s", filename.c_str()); - exit_code = ExitCode::kStartupSnapshotFailure; - return exit_code; + return false; } std::unique_ptr read_data = std::make_unique(); bool ok = SnapshotData::FromFile(read_data.get(), fp); fclose(fp); if (!ok) { - // If we fail to read the customized snapshot, - // simply exit with kStartupSnapshotFailure. - exit_code = ExitCode::kStartupSnapshotFailure; - return exit_code; + return false; } *snapshot_data_ptr = read_data.release(); - } else if (per_process::cli_options->node_snapshot) { - // If --snapshot-blob is not specified, we are reading the embedded - // snapshot, but we will skip it if --no-node-snapshot is specified. + return true; + } + + if (per_process::cli_options->node_snapshot) { + // If --snapshot-blob is not specified or if the SEA contains not snapshot, + // we are reading the embedded snapshot, but we will skip it if + // --no-node-snapshot is specified. const node::SnapshotData* read_data = SnapshotBuilder::GetEmbeddedSnapshotData(); - if (read_data != nullptr && read_data->Check()) { + if (read_data != nullptr) { + if (!read_data->Check()) { + return false; + } // If we fail to read the embedded snapshot, treat it as if Node.js // was built without one. *snapshot_data_ptr = read_data; } } - NodeMainInstance main_instance(*snapshot_data_ptr, - uv_default_loop(), - per_process::v8_platform.Platform(), - result->args(), - result->exec_args()); - exit_code = main_instance.Run(); - return exit_code; + return true; } static ExitCode StartInternal(int argc, char** argv) { @@ -1275,7 +1303,8 @@ static ExitCode StartInternal(int argc, char** argv) { std::string sea_config = per_process::cli_options->experimental_sea_config; if (!sea_config.empty()) { - return sea::BuildSingleExecutableBlob(sea_config); + return sea::BuildSingleExecutableBlob( + sea_config, result->args(), result->exec_args()); } // --build-snapshot indicates that we are in snapshot building mode. @@ -1290,7 +1319,15 @@ static ExitCode StartInternal(int argc, char** argv) { } // Without --build-snapshot, we are in snapshot loading mode. - return LoadSnapshotDataAndRun(&snapshot_data, result.get()); + if (!LoadSnapshotData(&snapshot_data)) { + return ExitCode::kStartupSnapshotFailure; + } + NodeMainInstance main_instance(snapshot_data, + uv_default_loop(), + per_process::v8_platform.Platform(), + result->args(), + result->exec_args()); + return main_instance.Run(); } int Start(int argc, char** argv) { diff --git a/src/node_main_instance.cc b/src/node_main_instance.cc index 41e5bee353a579..2ef56f80dfc8f6 100644 --- a/src/node_main_instance.cc +++ b/src/node_main_instance.cc @@ -92,12 +92,16 @@ void NodeMainInstance::Run(ExitCode* exit_code, Environment* env) { bool runs_sea_code = false; #ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION if (sea::IsSingleExecutable()) { - runs_sea_code = true; sea::SeaResource sea = sea::FindSingleExecutableResource(); - std::string_view code = sea.code; - LoadEnvironment(env, code); + if (!sea.use_snapshot()) { + runs_sea_code = true; + std::string_view code = sea.main_code_or_snapshot; + LoadEnvironment(env, code); + } } #endif + // Either there is already a snapshot main function from SEA, or it's not + // a SEA at all. if (!runs_sea_code) { LoadEnvironment(env, StartExecutionCallback{}); } diff --git a/src/node_sea.cc b/src/node_sea.cc index 88741a5fce9d48..7ec54dfb949225 100644 --- a/src/node_sea.cc +++ b/src/node_sea.cc @@ -6,7 +6,9 @@ #include "json_parser.h" #include "node_external_reference.h" #include "node_internals.h" +#include "node_snapshot_builder.h" #include "node_union_bytes.h" +#include "node_v8_platform-inl.h" // The POSTJECT_SENTINEL_FUSE macro is a string of random characters selected by // the Node.js project that is present only once in the entire binary. It is @@ -64,7 +66,7 @@ class SeaSerializer : public BlobSerializer { template <> size_t SeaSerializer::Write(const SeaResource& sea) { - sink.reserve(SeaResource::kHeaderSize + sea.code.size()); + sink.reserve(SeaResource::kHeaderSize + sea.main_code_or_snapshot.size()); Debug("Write SEA magic %x\n", kMagic); size_t written_total = WriteArithmetic(kMagic); @@ -75,9 +77,12 @@ size_t SeaSerializer::Write(const SeaResource& sea) { DCHECK_EQ(written_total, SeaResource::kHeaderSize); Debug("Write SEA resource code %p, size=%zu\n", - sea.code.data(), - sea.code.size()); - written_total += WriteStringView(sea.code, StringLogMode::kAddressAndContent); + sea.main_code_or_snapshot.data(), + sea.main_code_or_snapshot.size()); + written_total += + WriteStringView(sea.main_code_or_snapshot, + sea.use_snapshot() ? StringLogMode::kAddressOnly + : StringLogMode::kAddressAndContent); return written_total; } @@ -103,7 +108,10 @@ SeaResource SeaDeserializer::Read() { Debug("Read SEA flags %x\n", static_cast(flags)); CHECK_EQ(read_total, SeaResource::kHeaderSize); - std::string_view code = ReadStringView(StringLogMode::kAddressAndContent); + std::string_view code = + ReadStringView(static_cast(flags & SeaFlags::kuseSnapshot) + ? StringLogMode::kAddressOnly + : StringLogMode::kAddressAndContent); Debug("Read SEA resource code %p, size=%zu\n", code.data(), code.size()); return {flags, code}; } @@ -133,6 +141,10 @@ std::string_view FindSingleExecutableBlob() { } // anonymous namespace +bool SeaResource::use_snapshot() const { + return static_cast(flags & SeaFlags::kuseSnapshot); +} + SeaResource FindSingleExecutableResource() { static const SeaResource sea_resource = []() -> SeaResource { std::string_view blob = FindSingleExecutableBlob(); @@ -235,10 +247,23 @@ std::optional ParseSingleExecutableConfig( result.flags |= SeaFlags::kDisableExperimentalSeaWarning; } + std::optional use_snapshot = parser.GetTopLevelBoolField("useSnapshot"); + if (!use_snapshot.has_value()) { + FPrintF( + stderr, "\"useSnapshot\" field of %s is not a Boolean\n", config_path); + return std::nullopt; + } + if (use_snapshot.value()) { + result.flags |= SeaFlags::kuseSnapshot; + } + return result; } -ExitCode GenerateSingleExecutableBlob(const SeaConfig& config) { +ExitCode GenerateSingleExecutableBlob( + const SeaConfig& config, + const std::vector args, + const std::vector exec_args) { std::string main_script; // TODO(joyeecheung): unify the file utils. int r = ReadFileSync(&main_script, config.main_path.c_str()); @@ -248,7 +273,40 @@ ExitCode GenerateSingleExecutableBlob(const SeaConfig& config) { return ExitCode::kGenericUserError; } - SeaResource sea{config.flags, main_script}; + std::vector snapshot_blob; + bool builds_snapshot_from_main = + static_cast(config.flags & SeaFlags::kuseSnapshot); + if (builds_snapshot_from_main) { + SnapshotData snapshot; + std::vector patched_args = {args[0], GetAnonymousMainPath()}; + ExitCode exit_code = SnapshotBuilder::Generate( + &snapshot, patched_args, exec_args, main_script); + if (exit_code != ExitCode::kNoFailure) { + return exit_code; + } + auto& persistents = snapshot.env_info.principal_realm.persistent_values; + auto it = std::find_if( + persistents.begin(), persistents.end(), [](const PropInfo& prop) { + return prop.name == "snapshot_deserialize_main"; + }); + if (it == persistents.end()) { + FPrintF( + stderr, + "%s does not invoke " + "v8.startupSnapshot.setDeserializeMainFunction(), which is required " + "for snapshot scripts used to build single executable applications." + "\n", + config.main_path); + return ExitCode::kGenericUserError; + } + snapshot.ToBlob(&snapshot_blob); + } + + SeaResource sea{ + config.flags, + builds_snapshot_from_main + ? std::string_view{snapshot_blob.data(), snapshot_blob.size()} + : std::string_view{main_script.data(), main_script.size()}}; SeaSerializer serializer; serializer.Write(sea); @@ -269,11 +327,14 @@ ExitCode GenerateSingleExecutableBlob(const SeaConfig& config) { } // anonymous namespace -ExitCode BuildSingleExecutableBlob(const std::string& config_path) { +ExitCode BuildSingleExecutableBlob(const std::string& config_path, + const std::vector args, + const std::vector exec_args) { std::optional config_opt = ParseSingleExecutableConfig(config_path); if (config_opt.has_value()) { - ExitCode code = GenerateSingleExecutableBlob(config_opt.value()); + ExitCode code = + GenerateSingleExecutableBlob(config_opt.value(), args, exec_args); return code; } diff --git a/src/node_sea.h b/src/node_sea.h index 8b0877df3eb0d7..f2c89f1d9e11a7 100644 --- a/src/node_sea.h +++ b/src/node_sea.h @@ -21,19 +21,24 @@ const uint32_t kMagic = 0x143da20; enum class SeaFlags : uint32_t { kDefault = 0, kDisableExperimentalSeaWarning = 1 << 0, + kuseSnapshot = 1 << 1, }; struct SeaResource { SeaFlags flags = SeaFlags::kDefault; - std::string_view code; + std::string_view main_code_or_snapshot; + bool use_snapshot() const; static constexpr size_t kHeaderSize = sizeof(kMagic) + sizeof(SeaFlags); }; bool IsSingleExecutable(); SeaResource FindSingleExecutableResource(); std::tuple FixupArgsForSEA(int argc, char** argv); -node::ExitCode BuildSingleExecutableBlob(const std::string& config_path); +node::ExitCode BuildSingleExecutableBlob( + const std::string& config_path, + const std::vector args, + const std::vector exec_args); } // namespace sea } // namespace node diff --git a/src/node_snapshotable.cc b/src/node_snapshotable.cc index 5724142de8e55c..69d1a0da4e4e71 100644 --- a/src/node_snapshotable.cc +++ b/src/node_snapshotable.cc @@ -584,7 +584,7 @@ size_t SnapshotSerializer::Write(const SnapshotMetadata& data) { // [ ... ] env_info // [ ... ] code_cache -std::vector SnapshotData::ToBlob() const { +void SnapshotData::ToBlob(std::vector* out) const { SnapshotSerializer w; w.Debug("SnapshotData::ToBlob()\n"); @@ -603,7 +603,14 @@ std::vector SnapshotData::ToBlob() const { w.Debug("Write code_cache\n"); written_total += w.WriteVector(code_cache); w.Debug("SnapshotData::ToBlob() Wrote %d bytes\n", written_total); - return w.sink; + + *out = std::move(w.sink); +} + +std::vector SnapshotData::ToBlob() const { + std::vector result; + ToBlob(&result); + return result; } void SnapshotData::ToFile(FILE* out) const { diff --git a/test/sequential/test-single-executable-application-snapshot.js b/test/sequential/test-single-executable-application-snapshot.js new file mode 100644 index 00000000000000..2595be35fcdc7e --- /dev/null +++ b/test/sequential/test-single-executable-application-snapshot.js @@ -0,0 +1,83 @@ +'use strict'; + +require('../common'); + +const { + injectAndCodeSign, + skipIfSingleExecutableIsNotSupported, +} = require('../common/sea'); + +skipIfSingleExecutableIsNotSupported(); + +// This tests the snapshot support in single executable applications. + +const tmpdir = require('../common/tmpdir'); +const { copyFileSync, writeFileSync, existsSync } = require('fs'); +const { execFileSync, spawnSync } = require('child_process'); +const { join } = require('path'); +const assert = require('assert'); + +const configFile = join(tmpdir.path, 'sea-config.json'); +const seaPrepBlob = join(tmpdir.path, 'sea-prep.blob'); +const outputFile = join(tmpdir.path, process.platform === 'win32' ? 'sea.exe' : 'sea'); + +{ + tmpdir.refresh(); + + writeFileSync(join(tmpdir.path, 'snapshot.js'), '', 'utf-8'); + writeFileSync(configFile, ` + { + "main": "snapshot.js", + "output": "sea-prep.blob", + "useSnapshot": true + } + `); + + const child = spawnSync( + process.execPath, + ['--experimental-sea-config', 'sea-config.json'], + { + cwd: tmpdir.path + }); + + assert.match( + child.stderr.toString(), + /snapshot\.js does not invoke v8\.startupSnapshot\.setDeserializeMainFunction\(\)/); +} + +{ + tmpdir.refresh(); + const code = ` + const { + setDeserializeMainFunction, + } = require('v8').startupSnapshot; + + setDeserializeMainFunction(() => { + console.log('Hello from snapshot'); + }); + `; + + writeFileSync(join(tmpdir.path, 'snapshot.js'), code, 'utf-8'); + writeFileSync(configFile, ` + { + "main": "snapshot.js", + "output": "sea-prep.blob", + "useSnapshot": true + } + `); + + execFileSync( + process.execPath, + ['--experimental-sea-config', 'sea-config.json'], + { + cwd: tmpdir.path + }); + + assert(existsSync(seaPrepBlob)); + + copyFileSync(process.execPath, outputFile); + injectAndCodeSign(outputFile, seaPrepBlob); + + const out = execFileSync(outputFile); + assert.strictEqual(out.toString().trim(), 'Hello from snapshot'); +}