Skip to content

fix(profiling): increase default max frames for stack profiler to 512 #13323

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
#include <cstdint>
#include <string_view>

// Default value for the max frames; this number will always be overridden by whatever the default
// is for ddtrace/settings/profiling.py:ProfilingConfig.max_frames, but should conform
constexpr unsigned int g_default_max_nframes = 64;
// Default value for the max frames; this number will always be overridden by
// the max of ddtrace/settings/profiling.py:ProfilingConfig.max_frames and
// ddtrace/settings/profiling.py:ProfilingConfig.stack.v2_max_frames, but should
// conform to the default of max of the two.
constexpr unsigned int g_default_max_nframes = 256;

// Maximum number of frames admissible in the Profiling backend. If a user exceeds this number, then
// their stacks may be silently truncated, which is unfortunate.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class Sampler
PyObject* _asyncio_eager_tasks);
void link_tasks(PyObject* parent, PyObject* child);
void sampling_thread(const uint64_t seq_num);
void set_max_nframes(int max_nframes);

// The Python side dynamically adjusts the sampling rate based on overhead, so we need to be able to update our
// own intervals accordingly. Rather than a preemptive measure, we assume the rate is ~fairly stable and just
Expand Down
14 changes: 14 additions & 0 deletions ddtrace/internal/datadog/profiling/stack_v2/src/sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "thread_span_links.hpp"

#include "echion/config.h"
#include "echion/interp.h"
#include "echion/tasks.h"
#include "echion/threads.h"
Expand Down Expand Up @@ -338,3 +339,16 @@ Sampler::link_tasks(PyObject* parent, PyObject* child)
std::lock_guard<std::mutex> guard(task_link_map_lock);
task_link_map[child] = parent;
}

void
Sampler::set_max_nframes(int new_max_frames)
{
// max_frames is a global variable that is used to limit the number of frames
// that are unwound for a single thread/task.
if (new_max_frames > 0) {
max_frames = new_max_frames;
}
if (max_frames > g_backend_max_nframes) {
max_frames = g_backend_max_nframes;
}
}
15 changes: 15 additions & 0 deletions ddtrace/internal/datadog/profiling/stack_v2/src/stack_v2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,20 @@ stack_v2_set_adaptive_sampling(PyObject* Py_UNUSED(self), PyObject* args)
Py_RETURN_NONE;
}

static PyObject*
stack_v2_set_max_nframes(PyObject* Py_UNUSED(self), PyObject* args)
{
int max_nframes;

if (!PyArg_ParseTuple(args, "i", &max_nframes)) {
return NULL;
}

Sampler::get().set_max_nframes(max_nframes);

Py_RETURN_NONE;
}

static PyMethodDef _stack_v2_methods[] = {
{ "start", reinterpret_cast<PyCFunction>(stack_v2_start), METH_VARARGS | METH_KEYWORDS, "Start the sampler" },
{ "stop", stack_v2_stop, METH_VARARGS, "Stop the sampler" },
Expand All @@ -201,6 +215,7 @@ static PyMethodDef _stack_v2_methods[] = {
reinterpret_cast<PyCFunction>(stack_v2_link_span),
METH_VARARGS | METH_KEYWORDS,
"Link a span to a thread" },
{ "set_max_nframes", stack_v2_set_max_nframes, METH_VARARGS, "Set the maximum number of frames to unwind" },
// asyncio task support
{ "track_asyncio_loop", stack_v2_track_asyncio_loop, METH_VARARGS, "Map the name of a task with its identifier" },
{ "init_asyncio", stack_v2_init_asyncio, METH_VARARGS, "Initialise asyncio tracking" },
Expand Down
6 changes: 6 additions & 0 deletions ddtrace/profiling/collector/stack.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,12 @@ class StackCollector(collector.PeriodicCollector):
# TODO take the `threading` import out of here and just handle it in v2 startup
threading.init_stack_v2()
stack_v2.set_adaptive_sampling(config.stack.v2_adaptive_sampling)
stack_v2.set_max_nframes(
max(
config.max_frames,
config.stack.v2_max_frames
)
)
stack_v2.start()

def _start_service(self):
Expand Down
5 changes: 4 additions & 1 deletion ddtrace/profiling/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,12 +189,15 @@ def _build_default_exporters(self):
# * If initialization fails, disable the libdd collector and fall back to the legacy exporter
if self._export_libdd_enabled:
try:
max_nframes = profiling_config.max_frames
if self._stack_v2_enabled:
max_nframes = max(max_nframes, profiling_config.stack.v2_max_frames)
ddup.config(
env=self.env,
service=self.service,
version=self.version,
tags=self.tags, # type: ignore
max_nframes=profiling_config.max_frames,
max_nframes=max_nframes,
timeline_enabled=profiling_config.timeline_enabled,
output_filename=profiling_config.output_pprof,
sample_pool_capacity=profiling_config.sample_pool_capacity,
Expand Down
9 changes: 9 additions & 0 deletions ddtrace/settings/profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,14 @@ class ProfilingConfigStack(DDConfig):
private=True,
)

v2_max_frames = DDConfig.v(
int,
"v2.max_frames",
default=256,
help_type="Integer",
help="The maximum number of frames to capture in stack execution tracing for the v2 stack profiler",
)


class ProfilingConfigLock(DDConfig):
__item__ = __prefix__ = "lock"
Expand Down Expand Up @@ -492,6 +500,7 @@ def config_str(config):
if config.stack.enabled:
if config.stack.v2_enabled:
configured_features.append("stack_v2")
configured_features.append("MAXF" + str(config.stack.v2_max_frames))
else:
configured_features.append("stack")
if config.lock.enabled:
Expand Down
7 changes: 7 additions & 0 deletions releasenotes/notes/prof-max-frames-916cf15f8006e4ff.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
features:
- |
profiling: introduces ``DD_PROFILING_STACK_V2_MAX_FRAMES` with default 256
which increases the number of frames that are exported from the profiler.
Lock and memory profilers will continue use ``DD_PROFILING_MAX_FRAMES`` with
default value 64.
2 changes: 1 addition & 1 deletion tests/internal/crashtracker/test_crashtracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ def test_crashtracker_set_tag_profiler_config(run_python_code_in_subprocess):

# Now check for the profiler_config tag
assert b"profiler_config" in data
profiler_config = "stack_v2_lock_mem_heap_exp_dd_CAP1.0_MAXF64"
profiler_config = "stack_v2_MAXF256_lock_mem_heap_exp_dd_CAP1.0_MAXF64"
assert profiler_config.encode() in data


Expand Down
1 change: 1 addition & 0 deletions tests/telemetry/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,7 @@ def test_app_started_event_configuration_override(test_agent_session, run_python
{"name": "DD_PROFILING_SAMPLE_POOL_CAPACITY", "origin": "default", "value": 4},
{"name": "DD_PROFILING_STACK_ENABLED", "origin": "env_var", "value": False},
{"name": "DD_PROFILING_STACK_V2_ENABLED", "origin": "default", "value": True},
{"name": "DD_PROFILING_STACK_V2_MAX_FRAMES", "origin": "default", "value": 256},
{"name": "DD_PROFILING_TAGS", "origin": "default", "value": ""},
{"name": "DD_PROFILING_TIMELINE_ENABLED", "origin": "default", "value": False},
{"name": "DD_PROFILING_UPLOAD_INTERVAL", "origin": "env_var", "value": 10.0},
Expand Down
Loading