Skip to content

Commit

Permalink
[yugabyte#19841] docdb: Only allow one heap profile at a time
Browse files Browse the repository at this point in the history
Summary:
Since commit 9554847, with Google TCMalloc enabled, the /pprof/heap UI endpoint records an allocation profile for the specified number of seconds at a specified sample frequency.

The sample frequency is reset to the value at the start of the profile once the profile is complete. This can be problematic if two profiles are run concurrently. For example,

1. Sampling frequency starts at 1 MB
2. Go to `pprof/heap?seconds=10&sample_frequency_bytes=1000` and start profile 1 (sets sampling freq to 1000).
3. Go to `pprof/heap?seconds=1&sample_frequency_bytes=1000` which starts profile 2 (NB: some URL parameter must be different, otherwise it looks like we have some caching that serializes the calls).
4. Call 1 finishes and resets sampling frequency to 1 MB.
5. Call 2 finishes and resets sampling frequency to 1000.

This diff prevents this by only allowing one heap profile to run at a time.
This diff also removes existing usage of the phrase "allocation profile" in favor of "heap profile", which is used everywhere else in our code / documentation.
Jira: DB-8778

Test Plan: `ybd --cxx-test server_pprof-path-handler_util-test --gtest_filter SamplingProfilerTest.OnlyOneHeapProfile`. Also tested manually by testing the steps listed above and verifying that we get the error message.

Reviewers: mlillibridge

Reviewed By: mlillibridge

Subscribers: ybase, esheng, bogdan

Tags: #jenkins-ready

Differential Revision: https://phorge.dev.yugabyte.com/D29963
  • Loading branch information
SrivastavaAnubhav committed Nov 27, 2023
1 parent b0bc2d9 commit 03e91df
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 9 deletions.
36 changes: 34 additions & 2 deletions src/yb/server/pprof-path-handler_util-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,31 @@

#include "yb/server/pprof-path-handler_util-test.h"

#include <chrono>
#include <memory>
#include <string>
#include <thread>

#include <gflags/gflags_declare.h>
#include "yb/util/logging.h"
#include <gtest/gtest.h>

#include "yb/gutil/dynamic_annotations.h"

#include "yb/server/pprof-path-handlers_util.h"

#include "yb/util/flags.h"
#include "yb/util/logging.h"
#include "yb/util/monotime.h"
#include "yb/util/size_literals.h"
#include "yb/util/tcmalloc_util.h"
#include "yb/util/test_macros.h"
#include "yb/util/test_util.h"
#include "yb/util/thread.h"

DECLARE_int32(v);
DECLARE_string(vmodule);

using std::literals::chrono_literals::operator""s;
using std::vector;

namespace yb {
Expand Down Expand Up @@ -143,7 +149,7 @@ TEST_F(SamplingProfilerTest, HeapSnapshot) {
}

// Basic test for pprof/heap.
TEST_F(SamplingProfilerTest, AllocationProfile) {
TEST_F(SamplingProfilerTest, HeapProfile) {
SetProfileSamplingRate(1);
const int64_t kAllocSizeExcluded = 1_MB;
const int64_t kAllocSizeAllocated = 2_MB;
Expand Down Expand Up @@ -179,6 +185,32 @@ TEST_F(SamplingProfilerTest, AllocationProfile) {
ASSERT_EQ(samples[0].second.sampled_allocated_bytes, kAllocSizeAllocated);
}

TEST_F(SamplingProfilerTest, OnlyOneHeapProfile) {
auto StartHeapProfileWithFrequency = [](int64_t sample_freq_bytes, Status* status) {
Result<tcmalloc::Profile> profile = GetHeapProfile(3 /* seconds */, sample_freq_bytes);
*status = profile.ok() ? Status::OK() : profile.status();
};

SetProfileSamplingRate(1);

yb::ThreadPtr thread1, thread2;
Status status1, status2;
ASSERT_OK(yb::Thread::Create(
CURRENT_TEST_NAME(), "heap profile",
std::bind(StartHeapProfileWithFrequency, 2, &status1), &thread1));
SleepFor(1s);
ASSERT_OK(yb::Thread::Create(
CURRENT_TEST_NAME(), "heap profile",
std::bind(StartHeapProfileWithFrequency, 3, &status2), &thread2));
thread1->Join();
ASSERT_OK(status1);
thread2->Join();
ASSERT_NOK(status2);
ASSERT_STR_CONTAINS(status2.message().ToBuffer(), "A heap profile is already running");

ASSERT_EQ(GetTCMallocSamplingFrequency(), 1);
}

// Verify that the estimated bytes and count are close to their actual values.
TEST_F(SamplingProfilerTest, EstimatedBytesAndCount) {
const auto kSampleFreqBytes = 10_KB;
Expand Down
8 changes: 6 additions & 2 deletions src/yb/server/pprof-path-handlers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,12 @@ static void PprofHeapHandler(const Webserver::WebRequest& req,

const string title = only_growth ? "In use profile" : "Allocation profile";

auto profile = GetAllocationProfile(seconds, sample_freq_bytes);
auto samples = AggregateAndSortProfile(profile, only_growth, order);
Result<tcmalloc::Profile> profile = GetHeapProfile(seconds, sample_freq_bytes);
if (!profile.ok()) {
(*output) << profile.status().message();
return;
}
auto samples = AggregateAndSortProfile(*profile, only_growth, order);
GenerateTable(output, samples, title, 1000 /* max_call_stacks */, order);
#endif // YB_GOOGLE_TCMALLOC

Expand Down
16 changes: 12 additions & 4 deletions src/yb/server/pprof-path-handlers_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,24 @@ bool Symbolize(void *pc, char *out, int out_size) {
#endif

#if YB_GOOGLE_TCMALLOC
tcmalloc::Profile GetAllocationProfile(int seconds, int64_t sample_freq_bytes) {
Result<tcmalloc::Profile> GetHeapProfile(int seconds, int64_t sample_freq_bytes) {
static std::atomic<bool> heap_profile_running{false};
bool expected = false;
if (!heap_profile_running.compare_exchange_strong(expected, /* desired= */ true)) {
return STATUS_FORMAT(IllegalState, "A heap profile is already running.");
}

auto prev_sample_rate = tcmalloc::MallocExtension::GetProfileSamplingRate();
tcmalloc::MallocExtension::SetProfileSamplingRate(sample_freq_bytes);
tcmalloc::MallocExtension::AllocationProfilingToken token;
token = tcmalloc::MallocExtension::StartLifetimeProfiling(/* seed_with_live_allocs= */ false);
auto token =
tcmalloc::MallocExtension::StartLifetimeProfiling(/* seed_with_live_allocs= */ false);

LOG(INFO) << Format("Sleeping for $0 seconds while profile is collected.", seconds);
SleepFor(MonoDelta::FromSeconds(seconds));
tcmalloc::MallocExtension::SetProfileSamplingRate(prev_sample_rate);
return std::move(token).Stop();
auto profile = std::move(token).Stop();
heap_profile_running = false;
return profile;
}

tcmalloc::Profile GetHeapSnapshot(HeapSnapshotType snapshot_type) {
Expand Down
2 changes: 1 addition & 1 deletion src/yb/server/pprof-path-handlers_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ YB_DEFINE_ENUM(SampleOrder, (kSampledCount)(kSampledBytes)(kEstimatedBytes));

#if YB_GOOGLE_TCMALLOC

tcmalloc::Profile GetAllocationProfile(int seconds, int64_t sample_freq_bytes);
Result<tcmalloc::Profile> GetHeapProfile(int seconds, int64_t sample_freq_bytes);

YB_DEFINE_ENUM(HeapSnapshotType, (kCurrentHeap)(kPeakHeap));

Expand Down

0 comments on commit 03e91df

Please sign in to comment.