From 94f14f02f826b46aca6944ae09ffeea0425776e0 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 22 Jul 2024 13:31:45 +0100 Subject: [PATCH] [PROF-10201] Disable profiler allocation counting feature by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **What does this PR do?** This PR introduces a new setting for the profiler: `profiling.advanced.allocation_counting_enabled` which controls the profiler allocation counting feature. This setting is off by default, enabling us to reduce allocation profiling overhead slightly. **Motivation:** We actually used to have this setting back in the ddtrace 1.x series. We introduced it in #2635 and removed it in #3281 -- by tieing it directly to allocation profiling. I decided to re-introduce it so we can disable this feature by default. **Additional Notes:** This PR sits atop #3797 because it makes sense to measure both overhead improvements together, but is otherwise completely independent. **How to test the change?** Here's the results for running the `profiler_allocation` benchmark: ``` ruby 2.7.7p221 (2022-11-24 revision 168ec2b1e5) [x86_64-linux] Warming up -------------------------------------- Allocations (baseline) 1.419M i/100ms Calculating ------------------------------------- Allocations (baseline) 14.535M (± 2.1%) i/s - 146.197M in 10.062717s Warming up -------------------------------------- Allocations (alloc_profiling_enabled) 1.122M i/100ms Calculating ------------------------------------- Allocations (alloc_profiling_enabled) 11.636M (± 2.2%) i/s - 116.679M in 10.032209s Warming up -------------------------------------- Allocations (alloc_profiling_disabled) 1.124M i/100ms Calculating ------------------------------------- Allocations (alloc_profiling_disabled) 11.866M (± 2.6%) i/s - 119.175M in 10.050580s Comparison: Allocations (baseline): 14534979.3 i/s Allocations (alloc_profiling_disabled): 11865926.7 i/s - 1.22x slower Allocations (alloc_profiling_enabled): 11635919.9 i/s - 1.25x slower ``` The difference is close to the margin of error; nevertheless this feature was showing up on the native profiler, and since it was on the hot path for allocation profiling, I think it's worth it. --- .../collectors_cpu_and_wall_time_worker.c | 28 +++++++++++++------ lib/datadog/core/configuration/settings.rb | 10 +++++++ lib/datadog/profiling.rb | 1 + .../collectors/cpu_and_wall_time_worker.rb | 2 ++ lib/datadog/profiling/component.rb | 1 + .../collectors/cpu_and_wall_time_worker.rbs | 2 ++ .../core/configuration/settings_spec.rb | 15 ++++++++++ .../cpu_and_wall_time_worker_spec.rb | 16 ++++++++++- spec/datadog/profiling/component_spec.rb | 3 ++ 9 files changed, 68 insertions(+), 10 deletions(-) diff --git a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index 30a05e4f55..32abc490c3 100644 --- a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -96,6 +96,7 @@ struct cpu_and_wall_time_worker_state { bool no_signals_workaround_enabled; bool dynamic_sampling_rate_enabled; bool allocation_profiling_enabled; + bool allocation_counting_enabled; bool skip_idle_samples_for_testing; VALUE self_instance; VALUE thread_context_collector_instance; @@ -181,6 +182,7 @@ static VALUE _native_initialize( VALUE dynamic_sampling_rate_enabled, VALUE dynamic_sampling_rate_overhead_target_percentage, VALUE allocation_profiling_enabled, + VALUE allocation_counting_enabled, VALUE skip_idle_samples_for_testing ); static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr); @@ -278,7 +280,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) { // https://bugs.ruby-lang.org/issues/18007 for a discussion around this. rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new); - rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 9); + rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 10); rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1); rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2); rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1); @@ -325,6 +327,7 @@ static VALUE _native_new(VALUE klass) { state->no_signals_workaround_enabled = false; state->dynamic_sampling_rate_enabled = true; state->allocation_profiling_enabled = false; + state->allocation_counting_enabled = false; state->skip_idle_samples_for_testing = false; state->thread_context_collector_instance = Qnil; state->idle_sampling_helper_instance = Qnil; @@ -366,6 +369,7 @@ static VALUE _native_initialize( VALUE dynamic_sampling_rate_enabled, VALUE dynamic_sampling_rate_overhead_target_percentage, VALUE allocation_profiling_enabled, + VALUE allocation_counting_enabled, VALUE skip_idle_samples_for_testing ) { ENFORCE_BOOLEAN(gc_profiling_enabled); @@ -373,6 +377,7 @@ static VALUE _native_initialize( ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled); ENFORCE_TYPE(dynamic_sampling_rate_overhead_target_percentage, T_FLOAT); ENFORCE_BOOLEAN(allocation_profiling_enabled); + ENFORCE_BOOLEAN(allocation_counting_enabled); ENFORCE_BOOLEAN(skip_idle_samples_for_testing) struct cpu_and_wall_time_worker_state *state; @@ -382,6 +387,7 @@ static VALUE _native_initialize( state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue); state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue); state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue); + state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue); state->skip_idle_samples_for_testing = (skip_idle_samples_for_testing == Qtrue); double total_overhead_target_percentage = NUM2DBL(dynamic_sampling_rate_overhead_target_percentage); @@ -1040,7 +1046,9 @@ static void sleep_for(uint64_t time_ns) { } static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) { - bool are_allocations_being_tracked = active_sampler_instance_state != NULL && active_sampler_instance_state->allocation_profiling_enabled; + struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; + + bool are_allocations_being_tracked = state != NULL && state->allocation_profiling_enabled && state->allocation_counting_enabled; return are_allocations_being_tracked ? ULL2NUM(allocation_count) : Qnil; } @@ -1067,19 +1075,21 @@ static VALUE _native_allocation_count(DDTRACE_UNUSED VALUE self) { // On big applications, path 1. is the hottest, since we don't sample every option. So it's quite important for it to // be as fast as possible. static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) { - // Update thread-local allocation count - if (RB_UNLIKELY(allocation_count == UINT64_MAX)) { - allocation_count = 0; - } else { - allocation_count++; - } - struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above // This should not happen in a normal situation because the tracepoint is always enabled after the instance is set // and disabled before it is cleared, but just in case... if (state == NULL) return; + if (RB_UNLIKELY(state->allocation_counting_enabled)) { + // Update thread-local allocation count + if (RB_UNLIKELY(allocation_count == UINT64_MAX)) { + allocation_count = 0; + } else { + allocation_count++; + } + } + // In rare cases, we may actually be allocating an object as part of profiler sampling. We don't want to recursively // sample, so we just return early if (state->during_sample) { diff --git a/lib/datadog/core/configuration/settings.rb b/lib/datadog/core/configuration/settings.rb index a241b73cc7..3512bd134d 100644 --- a/lib/datadog/core/configuration/settings.rb +++ b/lib/datadog/core/configuration/settings.rb @@ -301,6 +301,16 @@ def initialize(*_) o.default true end + # Can be used to enable/disable the Datadog::Profiling.allocation_count feature. + # + # Requires allocation profiling to be enabled. + # + # @default false + option :allocation_counting_enabled do |o| + o.type :bool + o.default false + end + # Can be used to enable/disable the collection of heap profiles. # # This feature is alpha and disabled by default diff --git a/lib/datadog/profiling.rb b/lib/datadog/profiling.rb index 367ce82941..aa4d21f361 100644 --- a/lib/datadog/profiling.rb +++ b/lib/datadog/profiling.rb @@ -47,6 +47,7 @@ def self.start_if_enabled # (This is similar to some OS-based time representations.) # # Note 2: All fibers in the same thread will share the same counter values. + # Note 3: This counter is not accurate when using the M:N scheduler. # # Only available when the profiler is running, and allocation-related features are not disabled via configuration. # diff --git a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb index de53333a09..39b98adca9 100644 --- a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +++ b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb @@ -21,6 +21,7 @@ def initialize( thread_context_collector:, dynamic_sampling_rate_overhead_target_percentage:, allocation_profiling_enabled:, + allocation_counting_enabled:, # **NOTE**: This should only be used for testing; disabling the dynamic sampling rate will increase the # profiler overhead! dynamic_sampling_rate_enabled: true, @@ -42,6 +43,7 @@ def initialize( dynamic_sampling_rate_enabled, dynamic_sampling_rate_overhead_target_percentage, allocation_profiling_enabled, + allocation_counting_enabled, skip_idle_samples_for_testing, ) @worker_thread = nil diff --git a/lib/datadog/profiling/component.rb b/lib/datadog/profiling/component.rb index d08e161d35..bdac9c4eab 100644 --- a/lib/datadog/profiling/component.rb +++ b/lib/datadog/profiling/component.rb @@ -61,6 +61,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) thread_context_collector: thread_context_collector, dynamic_sampling_rate_overhead_target_percentage: overhead_target_percentage, allocation_profiling_enabled: allocation_profiling_enabled, + allocation_counting_enabled: settings.profiling.advanced.allocation_counting_enabled, ) internal_metadata = { diff --git a/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs b/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs index bea60e8971..3770a0ecd0 100644 --- a/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs +++ b/sig/datadog/profiling/collectors/cpu_and_wall_time_worker.rbs @@ -15,6 +15,7 @@ module Datadog ?idle_sampling_helper: Datadog::Profiling::Collectors::IdleSamplingHelper, ?dynamic_sampling_rate_enabled: bool, allocation_profiling_enabled: bool, + allocation_counting_enabled: bool, ?skip_idle_samples_for_testing: false, ) -> void @@ -27,6 +28,7 @@ module Datadog bool dynamic_sampling_rate_enabled, Float dynamic_sampling_rate_overhead_target_percentage, bool allocation_profiling_enabled, + bool allocation_counting_enabled, bool skip_idle_samples_for_testing, ) -> true diff --git a/spec/datadog/core/configuration/settings_spec.rb b/spec/datadog/core/configuration/settings_spec.rb index 79077bb87c..9e7813eec0 100644 --- a/spec/datadog/core/configuration/settings_spec.rb +++ b/spec/datadog/core/configuration/settings_spec.rb @@ -526,6 +526,21 @@ end end + describe '#allocation_counting_enabled' do + subject(:allocation_counting_enabled) { settings.profiling.advanced.allocation_counting_enabled } + + it { is_expected.to be false } + end + + describe '#allocation_counting_enabled=' do + it 'updates the #allocation_counting_enabled setting' do + expect { settings.profiling.advanced.allocation_counting_enabled = true } + .to change { settings.profiling.advanced.allocation_counting_enabled } + .from(false) + .to(true) + end + end + describe '#experimental_heap_enabled' do subject(:experimental_heap_enabled) { settings.profiling.advanced.experimental_heap_enabled } diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb index 633e1271ac..2f92bcfa07 100644 --- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb +++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb @@ -15,6 +15,7 @@ let(:no_signals_workaround_enabled) { false } let(:timeline_enabled) { false } let(:options) { {} } + let(:allocation_counting_enabled) { false } let(:worker_settings) do { gc_profiling_enabled: gc_profiling_enabled, @@ -22,6 +23,7 @@ thread_context_collector: build_thread_context_collector(recorder), dynamic_sampling_rate_overhead_target_percentage: 2.0, allocation_profiling_enabled: allocation_profiling_enabled, + allocation_counting_enabled: allocation_counting_enabled, **options } end @@ -961,8 +963,9 @@ cpu_and_wall_time_worker.stop end - context 'when allocation profiling is enabled' do + context 'when allocation profiling and allocation counting is enabled' do let(:allocation_profiling_enabled) { true } + let(:allocation_counting_enabled) { true } it 'always returns a >= 0 value' do expect(described_class._native_allocation_count).to be >= 0 @@ -1040,10 +1043,21 @@ expect(after_t1 - before_t1).to be 100 expect(after_allocations - before_allocations).to be < 10 end + + context 'when allocation profiling is enabled but allocation counting is disabled' do + let(:allocation_counting_enabled) { false } + + it 'always returns a nil value' do + 100.times { Object.new } + + expect(described_class._native_allocation_count).to be nil + end + end end context 'when allocation profiling is disabled' do let(:allocation_profiling_enabled) { false } + it 'always returns a nil value' do 100.times { Object.new } diff --git a/spec/datadog/profiling/component_spec.rb b/spec/datadog/profiling/component_spec.rb index 8ae27da479..ec69456827 100644 --- a/spec/datadog/profiling/component_spec.rb +++ b/spec/datadog/profiling/component_spec.rb @@ -84,6 +84,8 @@ .and_return(:overhead_target_percentage_config) expect(described_class).to receive(:valid_overhead_target) .with(:overhead_target_percentage_config).and_return(:overhead_target_percentage_config) + expect(settings.profiling.advanced) + .to receive(:allocation_counting_enabled).and_return(:allocation_counting_enabled_config) expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with( gc_profiling_enabled: anything, @@ -91,6 +93,7 @@ thread_context_collector: instance_of(Datadog::Profiling::Collectors::ThreadContext), dynamic_sampling_rate_overhead_target_percentage: :overhead_target_percentage_config, allocation_profiling_enabled: false, + allocation_counting_enabled: :allocation_counting_enabled_config, ) build_profiler_component