From 98e4a61a09e8d49271940986fe09c8cb158189f0 Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Wed, 6 Apr 2022 21:30:53 +0800 Subject: [PATCH] src: fix near heap limit callback - Use the allocated space size to calculate the raised heap limit, as that is what V8 uses to determine whether it should crash - previously we use the used size for the calculation and that was too conservative and did not prevent the crashes effectively enough. - Use RequestInterrupt() to take the snapshot since we need to make sure that the heap limit is raised first before the snapshot can be taken. --- doc/api/cli.md | 35 +++++++++++++------- src/env.cc | 86 +++++++++++++++++++++++++++++++++++--------------- src/env.h | 3 ++ 3 files changed, 87 insertions(+), 37 deletions(-) diff --git a/doc/api/cli.md b/doc/api/cli.md index 92acc87e648a34..28652cedff6274 100644 --- a/doc/api/cli.md +++ b/doc/api/cli.md @@ -436,23 +436,36 @@ Writes a V8 heap snapshot to disk when the V8 heap usage is approaching the heap limit. `count` should be a non-negative integer (in which case Node.js will write no more than `max_count` snapshots to disk). -When generating snapshots, garbage collection may be triggered and bring -the heap usage down. Therefore multiple snapshots may be written to disk -before the Node.js instance finally runs out of memory. These heap snapshots -can be compared to determine what objects are being allocated during the -time consecutive snapshots are taken. It's not guaranteed that Node.js will -write exactly `max_count` snapshots to disk, but it will try -its best to generate at least one and up to `max_count` snapshots before the -Node.js instance runs out of memory when `max_count` is greater than `0`. - -Generating V8 snapshots takes time and memory (both memory managed by the +Generating V8 heap snapshots takes time and memory (both memory managed by the V8 heap and native memory outside the V8 heap). The bigger the heap is, -the more resources it needs. Node.js will adjust the V8 heap to accommodate +the more resources it needs. When generating heap snapshots for this +feature, Node.js will temporarily raise the V8 heap limit to accommodate the additional V8 heap memory overhead, and try its best to avoid using up all the memory available to the process. When the process uses more memory than the system deems appropriate, the process may be terminated abruptly by the system, depending on the system configuration. +Heap snapshot generation could trigger garbage collections. If enough memory +can be reclaimed after the garbage collection, the heap usage may go down +and so multiple snapshots may be written to disk before the Node.js instance +finally runs out of memory. On the other hand, since Node.js temporarily +raises the heap limit before the heap snapshot is generated, and the limit +only gets restored when the heap usage falls below it, if the application +allocates reachable memory faster than what the garbage collector can keep up +with, the heap usage could also go up and exceed the initial limit quite a bit +until Node.js stops raising the heap limit. + +To control the number of heap snapshots to be written to disk, it is +recommended to specify a value of `--heapsnapshot-near-heap-limit`. +It's not guaranteed that Node.js will write exactly `max_count` snapshots +to disk, but it will try its best to generate at least one and up to +`max_count` snapshots before the Node.js instance runs out of memory when +`max_count` is greater than `0`. + +When multiple heap snapshots are generated, they can be compared to determine +what objects are being allocated during the time consecutive snapshots +are taken. + ```console $ node --max-old-space-size=100 --heapsnapshot-near-heap-limit=3 index.js Wrote snapshot to Heap.20200430.100036.49580.0.001.heapsnapshot diff --git a/src/env.cc b/src/env.cc index 3c07e9342fd338..f95bcb40a8deac 100644 --- a/src/env.cc +++ b/src/env.cc @@ -1562,14 +1562,31 @@ size_t Environment::NearHeapLimitCallback(void* data, size_t num_heap_spaces = env->isolate()->NumberOfHeapSpaces(); for (size_t i = 0; i < num_heap_spaces; ++i) { env->isolate()->GetHeapSpaceStatistics(&stats, i); + + Debug(env, + DebugCategory::DIAGNOSTICS, + "%s space_size = %" PRIu64 ", " + "space_used_size = %" PRIu64 ", " + "space_available_size = %" PRIu64 ", " + "physical_space_size = %" PRIu64 "\n", + stats.space_name(), + static_cast(stats.space_size()), + static_cast(stats.space_used_size()), + static_cast(stats.space_available_size()), + static_cast(stats.physical_space_size())); + + // space_size() returns the allocated size of a given space, + // we use this to calculate the new limit because V8 also + // uses the allocated size to determine whether it should crash. if (strcmp(stats.space_name(), "new_space") == 0 || strcmp(stats.space_name(), "new_large_object_space") == 0) { - young_gen_size += stats.space_used_size(); + young_gen_size += stats.space_size(); } else { - old_gen_size += stats.space_used_size(); + old_gen_size += stats.space_size(); } } + size_t total_size = young_gen_size + old_gen_size; Debug(env, DebugCategory::DIAGNOSTICS, "max_young_gen_size=%" PRIu64 ", " @@ -1579,21 +1596,15 @@ size_t Environment::NearHeapLimitCallback(void* data, static_cast(max_young_gen_size), static_cast(young_gen_size), static_cast(old_gen_size), - static_cast(young_gen_size + old_gen_size)); + static_cast(total_size)); uint64_t available = GuessMemoryAvailableToTheProcess(); // TODO(joyeecheung): get a better estimate about the native memory // usage into the overhead, e.g. based on the count of objects. - uint64_t estimated_overhead = max_young_gen_size; - Debug(env, - DebugCategory::DIAGNOSTICS, - "Estimated available memory=%" PRIu64 ", " - "estimated overhead=%" PRIu64 "\n", - static_cast(available), - static_cast(estimated_overhead)); - - // This might be hit when the snapshot is being taken in another - // NearHeapLimitCallback invocation. + uint64_t estimated_overhead = young_gen_size; + // The new limit must be higher than current_heap_limit or V8 might + // crash. + uint64_t minimun_new_limit = static_cast(current_heap_limit + 1); // When taking the snapshot, objects in the young generation may be // promoted to the old generation, result in increased heap usage, // but it should be no more than the young generation size. @@ -1602,33 +1613,56 @@ size_t Environment::NearHeapLimitCallback(void* data, // new limit, so in a heap with unbounded growth the isolate // may eventually crash with this new limit - effectively raising // the heap limit to the new one. + uint64_t estimated_space_needed = + std::max(estimated_overhead + total_size, minimun_new_limit); + + Debug(env, + DebugCategory::DIAGNOSTICS, + "Estimated available memory=%" PRIu64 ", " + "estimated overhead=%" PRIu64 "\n" + "estimated space needed=%" PRIu64 "\n", + static_cast(available), + static_cast(estimated_overhead), + static_cast(estimated_space_needed)); + + // This might be hit when the snapshot is being taken in another + // NearHeapLimitCallback invocation. + // TODO(joyeecheung): turn this into + // DCHECK(!env->is_processing_heap_limit_callback_) + // when V8 ensures that the callback can't be nested. if (env->is_processing_heap_limit_callback_) { - size_t new_limit = current_heap_limit + max_young_gen_size; Debug(env, DebugCategory::DIAGNOSTICS, "Not generating snapshots in nested callback. " "new_limit=%" PRIu64 "\n", - static_cast(new_limit)); - return new_limit; + static_cast(estimated_space_needed)); + return estimated_space_needed; } // Estimate whether the snapshot is going to use up all the memory // available to the process. If so, just give up to prevent the system // from killing the process for a system OOM. - if (estimated_overhead > available) { + if (estimated_space_needed > available) { Debug(env, DebugCategory::DIAGNOSTICS, "Not generating snapshots because it's too risky.\n"); env->isolate()->RemoveNearHeapLimitCallback(NearHeapLimitCallback, initial_heap_limit); - // The new limit must be higher than current_heap_limit or V8 might - // crash. - return current_heap_limit + 1; + + return minimun_new_limit; } - // Take the snapshot synchronously. + env->initial_heap_limit_ = initial_heap_limit; env->is_processing_heap_limit_callback_ = true; + env->isolate()->RequestInterrupt(TakeSnapshotInNearHeapLimitCallback, env); + // The new limit must be higher than current_heap_limit or V8 might + // crash. + return estimated_space_needed; +} +void Environment::TakeSnapshotInNearHeapLimitCallback(v8::Isolate* isolate, + void* data) { + Environment* env = static_cast(data); std::string dir = env->options()->diagnostic_dir; if (dir.empty()) { dir = env->GetCwd(); @@ -1640,8 +1674,12 @@ size_t Environment::NearHeapLimitCallback(void* data, // Remove the callback first in case it's triggered when generating // the snapshot. + // TODO(joyeecheung): when V8 ensures that the callback can't be nested, + // we can simply remove the callback when env->heap_limit_snapshot_taken_ + // reaches env->options_->heap_snapshot_near_heap_limit at the + // end of this interrupt. env->isolate()->RemoveNearHeapLimitCallback(NearHeapLimitCallback, - initial_heap_limit); + env->initial_heap_limit_); heap::WriteSnapshot(env, filename.c_str()); env->heap_limit_snapshot_taken_ += 1; @@ -1659,10 +1697,6 @@ size_t Environment::NearHeapLimitCallback(void* data, env->isolate()->AutomaticallyRestoreInitialHeapLimit(0.95); env->is_processing_heap_limit_callback_ = false; - - // The new limit must be higher than current_heap_limit or V8 might - // crash. - return current_heap_limit + 1; } inline size_t Environment::SelfSize() const { diff --git a/src/env.h b/src/env.h index efaeb53fbc7599..11dbc20b5ab53e 100644 --- a/src/env.h +++ b/src/env.h @@ -1392,6 +1392,8 @@ class Environment : public MemoryRetainer { inline void RemoveCleanupHook(CleanupCallback cb, void* arg); void RunCleanup(); + static void TakeSnapshotInNearHeapLimitCallback(v8::Isolate* isolate, + void* data); static size_t NearHeapLimitCallback(void* data, size_t current_heap_limit, size_t initial_heap_limit); @@ -1524,6 +1526,7 @@ class Environment : public MemoryRetainer { bool is_processing_heap_limit_callback_ = false; int64_t heap_limit_snapshot_taken_ = 0; + size_t initial_heap_limit_ = 0; uint32_t module_id_counter_ = 0; uint32_t script_id_counter_ = 0;