From 168c23a113e5db1303e53c230753dcaa7f16bfd2 Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Fri, 30 Jun 2023 17:33:50 +0200 Subject: [PATCH] src: use effective cppgc wrapper id to deduce non-cppgc id Previously we hard-code a wrapper id to be used in BaseObjects to avoid accidentally triggering cppgc on these non-cppgc-managed objects, but hard-coding can be be hacky and result in mismatch when we start to create CppHeap ourselves. This patch makes it more robust by deducing non-cppgc id from the effective cppgc id, if there is one. --- node.gyp | 1 + src/base_object-inl.h | 23 +++--- src/base_object.cc | 13 +--- src/base_object.h | 11 +-- src/env-inl.h | 8 ++ src/env.cc | 47 ++++++++++++ src/env.h | 14 ++++ src/node_messaging.cc | 11 ++- src/node_snapshotable.cc | 6 +- test/cctest/node_test_fixture.cc | 1 + test/cctest/test_cppgc.cc | 127 +++++++++++++++++++++++++++++++ 11 files changed, 233 insertions(+), 29 deletions(-) create mode 100644 test/cctest/test_cppgc.cc diff --git a/node.gyp b/node.gyp index cf016ef468c275..db6d7455bf1dbd 100644 --- a/node.gyp +++ b/node.gyp @@ -1047,6 +1047,7 @@ 'test/cctest/test_aliased_buffer.cc', 'test/cctest/test_base64.cc', 'test/cctest/test_base_object_ptr.cc', + 'test/cctest/test_cppgc.cc', 'test/cctest/test_node_postmortem_metadata.cc', 'test/cctest/test_environment.cc', 'test/cctest/test_linked_binding.cc', diff --git a/src/base_object-inl.h b/src/base_object-inl.h index 99a438a4963717..9ac29633449b7f 100644 --- a/src/base_object-inl.h +++ b/src/base_object-inl.h @@ -70,23 +70,28 @@ Realm* BaseObject::realm() const { return realm_; } -bool BaseObject::IsBaseObject(v8::Local obj) { +bool BaseObject::IsBaseObject(IsolateData* isolate_data, + v8::Local obj) { if (obj->InternalFieldCount() < BaseObject::kInternalFieldCount) { return false; } - void* ptr = - obj->GetAlignedPointerFromInternalField(BaseObject::kEmbedderType); - return ptr == &kNodeEmbedderId; + + uint16_t* ptr = static_cast( + obj->GetAlignedPointerFromInternalField(BaseObject::kEmbedderType)); + return ptr == isolate_data->embedder_id_for_non_cppgc(); } -void BaseObject::TagBaseObject(v8::Local object) { +void BaseObject::TagBaseObject(IsolateData* isolate_data, + v8::Local object) { DCHECK_GE(object->InternalFieldCount(), BaseObject::kInternalFieldCount); - object->SetAlignedPointerInInternalField(BaseObject::kEmbedderType, - &kNodeEmbedderId); + object->SetAlignedPointerInInternalField( + BaseObject::kEmbedderType, isolate_data->embedder_id_for_non_cppgc()); } -void BaseObject::SetInternalFields(v8::Local object, void* slot) { - TagBaseObject(object); +void BaseObject::SetInternalFields(IsolateData* isolate_data, + v8::Local object, + void* slot) { + TagBaseObject(isolate_data, object); object->SetAlignedPointerInInternalField(BaseObject::kSlot, slot); } diff --git a/src/base_object.cc b/src/base_object.cc index ed0fafe74d43a9..70c77eea442e7f 100644 --- a/src/base_object.cc +++ b/src/base_object.cc @@ -17,7 +17,7 @@ BaseObject::BaseObject(Realm* realm, Local object) : persistent_handle_(realm->isolate(), object), realm_(realm) { CHECK_EQ(false, object.IsEmpty()); CHECK_GE(object->InternalFieldCount(), BaseObject::kInternalFieldCount); - SetInternalFields(object, static_cast(this)); + SetInternalFields(realm->isolate_data(), object, static_cast(this)); realm->AddCleanupHook(DeleteMe, static_cast(this)); realm->modify_base_object_count(1); } @@ -66,18 +66,13 @@ void BaseObject::MakeWeak() { WeakCallbackType::kParameter); } -// This just has to be different from the Chromium ones: -// https://source.chromium.org/chromium/chromium/src/+/main:gin/public/gin_embedders.h;l=18-23;drc=5a758a97032f0b656c3c36a3497560762495501a -// Otherwise, when Node is loaded in an isolate which uses cppgc, cppgc will -// misinterpret the data stored in the embedder fields and try to garbage -// collect them. -uint16_t kNodeEmbedderId = 0x90de; - void BaseObject::LazilyInitializedJSTemplateConstructor( const FunctionCallbackInfo& args) { DCHECK(args.IsConstructCall()); CHECK_GE(args.This()->InternalFieldCount(), BaseObject::kInternalFieldCount); - SetInternalFields(args.This(), nullptr); + Environment* env = Environment::GetCurrent(args); + DCHECK_NOT_NULL(env); + SetInternalFields(env->isolate_data(), args.This(), nullptr); } Local BaseObject::MakeLazilyInitializedJSTemplate( diff --git a/src/base_object.h b/src/base_object.h index cfd6af673cec97..5968694e8393d8 100644 --- a/src/base_object.h +++ b/src/base_object.h @@ -41,8 +41,6 @@ namespace worker { class TransferData; } -extern uint16_t kNodeEmbedderId; - class BaseObject : public MemoryRetainer { public: enum InternalFields { kEmbedderType, kSlot, kInternalFieldCount }; @@ -74,10 +72,13 @@ class BaseObject : public MemoryRetainer { // was also passed to the `BaseObject()` constructor initially. // This may return `nullptr` if the C++ object has not been constructed yet, // e.g. when the JS object used `MakeLazilyInitializedJSTemplate`. - static inline void SetInternalFields(v8::Local object, + static inline void SetInternalFields(IsolateData* isolate_data, + v8::Local object, void* slot); - static inline bool IsBaseObject(v8::Local object); - static inline void TagBaseObject(v8::Local object); + static inline bool IsBaseObject(IsolateData* isolate_data, + v8::Local object); + static inline void TagBaseObject(IsolateData* isolate_data, + v8::Local object); static void LazilyInitializedJSTemplateConstructor( const v8::FunctionCallbackInfo& args); static inline BaseObject* FromJSObject(v8::Local object); diff --git a/src/env-inl.h b/src/env-inl.h index 43fc11217133c2..9d2d3bfd81308a 100644 --- a/src/env-inl.h +++ b/src/env-inl.h @@ -61,6 +61,14 @@ inline uv_loop_t* IsolateData::event_loop() const { return event_loop_; } +inline uint16_t* IsolateData::embedder_id_for_cppgc() const { + return &(wrapper_data_->cppgc_id); +} + +inline uint16_t* IsolateData::embedder_id_for_non_cppgc() const { + return &(wrapper_data_->non_cppgc_id); +} + inline NodeArrayBufferAllocator* IsolateData::node_allocator() const { return node_allocator_; } diff --git a/src/env.cc b/src/env.cc index 56f4344d9e1b5d..266ca0040eb4d2 100644 --- a/src/env.cc +++ b/src/env.cc @@ -19,6 +19,7 @@ #include "tracing/agent.h" #include "tracing/traced_value.h" #include "util-inl.h" +#include "v8-cppgc.h" #include "v8-profiler.h" #include @@ -28,6 +29,7 @@ #include #include #include +#include namespace node { @@ -497,6 +499,11 @@ void IsolateData::CreateProperties() { contextify::ContextifyContext::InitializeGlobalTemplates(this); } +constexpr uint16_t kDefaultCppGCEmebdderID = 0x90de; +Mutex IsolateData::isolate_data_mutex_; +std::unordered_map> + IsolateData::wrapper_data_map_; + IsolateData::IsolateData(Isolate* isolate, uv_loop_t* event_loop, MultiIsolatePlatform* platform, @@ -510,6 +517,46 @@ IsolateData::IsolateData(Isolate* isolate, snapshot_data_(snapshot_data) { options_.reset( new PerIsolateOptions(*(per_process::cli_options->per_isolate))); + v8::CppHeap* cpp_heap = isolate->GetCppHeap(); + + uint16_t cppgc_id = kDefaultCppGCEmebdderID; + if (cpp_heap != nullptr) { + // The general convention of the wrappable layout for cppgc in the + // ecosystem is: + // [ 0 ] -> embedder id + // [ 1 ] -> wrappable instance + // If the Isolate includes a CppHeap attached by another embedder, + // And if they also use the field 0 for the ID, we DCHECK that + // the layout matches our layout, and record the embedder ID for cppgc + // to avoid accidentally enabling cppgc on non-cppgc-managed wrappers . + v8::WrapperDescriptor descriptor = cpp_heap->wrapper_descriptor(); + if (descriptor.wrappable_type_index == BaseObject::kEmbedderType) { + cppgc_id = descriptor.embedder_id_for_garbage_collected; + DCHECK_EQ(descriptor.wrappable_instance_index, BaseObject::kSlot); + } + // If the CppHeap uses the slot we use to put non-cppgc-traced BaseObject + // for embedder ID, V8 could accidentally enable cppgc on them. So + // safe guard against this. + DCHECK_NE(descriptor.wrappable_type_index, BaseObject::kSlot); + } + // We do not care about overflow since we just want this to be different + // from the cppgc id. + uint16_t non_cppgc_id = cppgc_id + 1; + + { + // GC could still be run after the IsolateData is destroyed, so we store + // the ids in a static map to ensure pointers to them are still valid + // then. In practice there should be very few variants of the cppgc id + // in one process so the size of this map should be very small. + node::Mutex::ScopedLock lock(isolate_data_mutex_); + auto it = wrapper_data_map_.find(cppgc_id); + if (it == wrapper_data_map_.end()) { + auto pair = wrapper_data_map_.emplace( + cppgc_id, new PerIsolateWrapperData{cppgc_id, non_cppgc_id}); + it = pair.first; + } + wrapper_data_ = it->second.get(); + } if (snapshot_data == nullptr) { CreateProperties(); diff --git a/src/env.h b/src/env.h index 231ca64db38e90..b57440e73d602a 100644 --- a/src/env.h +++ b/src/env.h @@ -124,6 +124,11 @@ struct IsolateDataSerializeInfo { const IsolateDataSerializeInfo& i); }; +struct PerIsolateWrapperData { + uint16_t cppgc_id; + uint16_t non_cppgc_id; +}; + class NODE_EXTERN_PRIVATE IsolateData : public MemoryRetainer { public: IsolateData(v8::Isolate* isolate, @@ -131,6 +136,7 @@ class NODE_EXTERN_PRIVATE IsolateData : public MemoryRetainer { MultiIsolatePlatform* platform = nullptr, ArrayBufferAllocator* node_allocator = nullptr, const SnapshotData* snapshot_data = nullptr); + SET_MEMORY_INFO_NAME(IsolateData) SET_SELF_SIZE(IsolateData) void MemoryInfo(MemoryTracker* tracker) const override; @@ -139,6 +145,9 @@ class NODE_EXTERN_PRIVATE IsolateData : public MemoryRetainer { bool is_building_snapshot() const { return is_building_snapshot_; } void set_is_building_snapshot(bool value) { is_building_snapshot_ = value; } + uint16_t* embedder_id_for_cppgc() const; + uint16_t* embedder_id_for_non_cppgc() const; + inline uv_loop_t* event_loop() const; inline MultiIsolatePlatform* platform() const; inline const SnapshotData* snapshot_data() const; @@ -223,6 +232,11 @@ class NODE_EXTERN_PRIVATE IsolateData : public MemoryRetainer { std::shared_ptr options_; worker::Worker* worker_context_ = nullptr; bool is_building_snapshot_ = false; + PerIsolateWrapperData* wrapper_data_; + + static Mutex isolate_data_mutex_; + static std::unordered_map> + wrapper_data_map_; }; struct ContextInfo { diff --git a/src/node_messaging.cc b/src/node_messaging.cc index 19b393bb308db6..146ef590fb6833 100644 --- a/src/node_messaging.cc +++ b/src/node_messaging.cc @@ -318,7 +318,7 @@ class SerializerDelegate : public ValueSerializer::Delegate { } Maybe WriteHostObject(Isolate* isolate, Local object) override { - if (BaseObject::IsBaseObject(object)) { + if (BaseObject::IsBaseObject(env_->isolate_data(), object)) { return WriteHostObject( BaseObjectPtr { Unwrap(object) }); } @@ -520,7 +520,8 @@ Maybe Message::Serialize(Environment* env, serializer.TransferArrayBuffer(id, ab); continue; } else if (entry->IsObject() && - BaseObject::IsBaseObject(entry.As())) { + BaseObject::IsBaseObject(env->isolate_data(), + entry.As())) { // Check if the source MessagePort is being transferred. if (!source_port.IsEmpty() && entry == source_port) { ThrowDataCloneException( @@ -1287,7 +1288,8 @@ JSTransferable::NestedTransferables() const { Local value; if (!list->Get(context, i).ToLocal(&value)) return Nothing(); - if (value->IsObject() && BaseObject::IsBaseObject(value.As())) + if (value->IsObject() && + BaseObject::IsBaseObject(env()->isolate_data(), value.As())) ret.emplace_back(Unwrap(value)); } return Just(ret); @@ -1342,7 +1344,8 @@ BaseObjectPtr JSTransferable::Data::Deserialize( if (!env->messaging_deserialize_create_object() ->Call(context, Null(env->isolate()), 1, &info) .ToLocal(&ret) || - !ret->IsObject() || !BaseObject::IsBaseObject(ret.As())) { + !ret->IsObject() || + !BaseObject::IsBaseObject(env->isolate_data(), ret.As())) { return {}; } diff --git a/src/node_snapshotable.cc b/src/node_snapshotable.cc index 5724142de8e55c..ff3e8bbc5d7547 100644 --- a/src/node_snapshotable.cc +++ b/src/node_snapshotable.cc @@ -1159,14 +1159,16 @@ void DeserializeNodeInternalFields(Local holder, StartupData SerializeNodeContextInternalFields(Local holder, int index, - void* env) { + void* callback_data) { // We only do one serialization for the kEmbedderType slot, the result // contains everything necessary for deserializing the entire object, // including the fields whose index is bigger than kEmbedderType // (most importantly, BaseObject::kSlot). // For Node.js this design is enough for all the native binding that are // serializable. - if (index != BaseObject::kEmbedderType || !BaseObject::IsBaseObject(holder)) { + Environment* env = static_cast(callback_data); + if (index != BaseObject::kEmbedderType || + !BaseObject::IsBaseObject(env->isolate_data(), holder)) { return StartupData{nullptr, 0}; } diff --git a/test/cctest/node_test_fixture.cc b/test/cctest/node_test_fixture.cc index fe879eb5b46ded..d2662604d14ae9 100644 --- a/test/cctest/node_test_fixture.cc +++ b/test/cctest/node_test_fixture.cc @@ -34,6 +34,7 @@ void NodeTestEnvironment::SetUp() { } void NodeTestEnvironment::TearDown() { + cppgc::ShutdownProcess(); v8::V8::Dispose(); v8::V8::DisposePlatform(); NodeZeroIsolateTestFixture::platform->Shutdown(); diff --git a/test/cctest/test_cppgc.cc b/test/cctest/test_cppgc.cc new file mode 100644 index 00000000000000..49665174615870 --- /dev/null +++ b/test/cctest/test_cppgc.cc @@ -0,0 +1,127 @@ +#include +#include +#include +#include +#include +#include +#include "node_test_fixture.h" + +// This tests that Node.js can work with an existing CppHeap. + +// Mimic the Blink layout. +static int kWrappableTypeIndex = 0; +static int kWrappableInstanceIndex = 1; +static uint16_t kEmbedderID = 0x1; + +// Mimic a class that does not know about Node.js. +class CppGCed : public cppgc::GarbageCollected { + public: + static int kConstructCount; + static int kDestructCount; + static int kTraceCount; + + static void New(const v8::FunctionCallbackInfo& args) { + v8::Isolate* isolate = args.GetIsolate(); + v8::Local js_object = args.This(); + CppGCed* gc_object = cppgc::MakeGarbageCollected( + isolate->GetCppHeap()->GetAllocationHandle()); + js_object->SetAlignedPointerInInternalField(kWrappableTypeIndex, + &kEmbedderID); + js_object->SetAlignedPointerInInternalField(kWrappableInstanceIndex, + gc_object); + kConstructCount++; + args.GetReturnValue().Set(js_object); + } + + static v8::Local GetConstructor( + v8::Local context) { + auto ft = v8::FunctionTemplate::New(context->GetIsolate(), New); + auto ot = ft->InstanceTemplate(); + ot->SetInternalFieldCount(2); + return ft->GetFunction(context).ToLocalChecked(); + } + + CppGCed() = default; + + ~CppGCed() { kDestructCount++; } + + void Trace(cppgc::Visitor* visitor) const { kTraceCount++; } +}; + +int CppGCed::kConstructCount = 0; +int CppGCed::kDestructCount = 0; +int CppGCed::kTraceCount = 0; + +TEST_F(NodeZeroIsolateTestFixture, ExistingCppHeapTest) { + v8::Isolate* isolate = + node::NewIsolate(allocator.get(), ¤t_loop, platform.get()); + + // Create and attach the CppHeap before we set up the IsolateData so that + // it recognizes the existing heap. + std::unique_ptr cpp_heap = v8::CppHeap::Create( + platform.get(), + v8::CppHeapCreateParams( + {}, + v8::WrapperDescriptor( + kWrappableTypeIndex, kWrappableInstanceIndex, kEmbedderID))); + isolate->AttachCppHeap(cpp_heap.get()); + + // Try creating Context + IsolateData + Environment. + { + v8::Isolate::Scope isolate_scope(isolate); + v8::HandleScope handle_scope(isolate); + + std::unique_ptr + isolate_data{ + node::CreateIsolateData(isolate, ¤t_loop, platform.get()), + node::FreeIsolateData}; + CHECK(isolate_data); + + { + auto context = node::NewContext(isolate); + CHECK(!context.IsEmpty()); + v8::Context::Scope context_scope(context); + + // An Environment should already contain a few BaseObjects that are + // supposed to have non-cppgc IDs. + std::unique_ptr + environment{ + node::CreateEnvironment(isolate_data.get(), context, {}, {}), + node::FreeEnvironment}; + CHECK(environment); + + context->Global() + ->Set(context, + v8::String::NewFromUtf8(isolate, "CppGCed").ToLocalChecked(), + CppGCed::GetConstructor(context)) + .FromJust(); + + const char* code = + "globalThis.array = [];" + "for (let i = 0; i < 100; ++i) { array.push(new CppGCed()); }"; + node::LoadEnvironment(environment.get(), code).ToLocalChecked(); + // Request a GC and check if CppGCed::Trace() is invoked. + isolate->LowMemoryNotification(); + int exit_code = SpinEventLoop(environment.get()).FromJust(); + EXPECT_EQ(exit_code, 0); + } + + platform->DrainTasks(isolate); + + // Cleanup. + isolate->DetachCppHeap(); + cpp_heap->Terminate(); + platform->DrainTasks(isolate); + } + + platform->UnregisterIsolate(isolate); + isolate->Dispose(); + + // Check that all the objects are created and destroyed properly. + EXPECT_EQ(CppGCed::kConstructCount, 100); + EXPECT_EQ(CppGCed::kDestructCount, 100); + // GC does not have to feel pressured enough to visit all of them to + // reclaim memory before we are done with the isolate and the entire + // heap can be reclaimed. So just check at least some of them are traced. + EXPECT_GT(CppGCed::kTraceCount, 0); +}