From a1e2ead973f821fe85fbd8b4bf31300d2531e625 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Thu, 2 Feb 2023 11:39:48 +0000 Subject: [PATCH] Fix profiler sometimes not adding the "In native code" placeholder **What does this PR do?**: In #2583 we added a very small check to our custom `ddtrace_rb_profile_frames` to match a similar fix added to upstream Ruby. But I've just spotted this caused one of the profiling tests to become flaky: ``` $ bundle exec rspec spec/datadog/profiling --seed 14514 Failures: 1) Datadog::Profiling::Collectors::Stack when sampling a thread with empty locations gathers a one-element stack with a "In native code" placeholder Failure/Error: expect(gathered_stack).to contain_exactly({ base_label: '', path: 'In native code', lineno: 0 }) expected collection contained: [{:base_label=>"", :lineno=>0, :path=>"In native code"}] actual collection contained: [] the missing elements were: [{:base_label=>"", :lineno=>0, :path=>"In native code"}] # ./spec/datadog/profiling/collectors/stack_spec.rb:373:in `block (3 levels) in ' # ./spec/spec_helper.rb:220:in `block (2 levels) in ' # ./spec/spec_helper.rb:112:in `block (2 levels) in ' # /home/ivo.anjo/.rvm/gems/ruby-2.7.7/gems/webmock-3.13.0/lib/webmock/rspec.rb:37:in `block (2 levels) in ' 2) Datadog::Profiling::Collectors::Stack when sampling a thread with empty locations when marking sample as being in garbage collection gathers a two-element stack with a placeholder for "In native code" and another for garbage collection Failure/Error: expect(gathered_stack).to contain_exactly( { base_label: '', path: 'Garbage Collection', lineno: 0 }, { base_label: '', path: 'In native code', lineno: 0 } ) expected collection contained: [{:base_label=>"", :lineno=>0, :path=>"Garbage Collection"}, {:base_label=>"", :lineno=>0, :path=>"In native code"}] actual collection contained: [{:base_label=>"", :lineno=>0, :path=>"Garbage Collection"}] the missing elements were: [{:base_label=>"", :lineno=>0, :path=>"In native code"}] # ./spec/datadog/profiling/collectors/stack_spec.rb:380:in `block (4 levels) in ' # ./spec/spec_helper.rb:220:in `block (2 levels) in ' # ./spec/spec_helper.rb:112:in `block (2 levels) in ' # /home/ivo.anjo/.rvm/gems/ruby-2.7.7/gems/webmock-3.13.0/lib/webmock/rspec.rb:37:in `block (2 levels) in ' Finished in 13.42 seconds (files took 0.44192 seconds to load) 563 examples, 2 failures, 15 pending Failed examples: rspec ./spec/datadog/profiling/collectors/stack_spec.rb:372 # Datadog::Profiling::Collectors::Stack when sampling a thread with empty locations gathers a one-element stack with a "In native code" placeholder rspec ./spec/datadog/profiling/collectors/stack_spec.rb:379 # Datadog::Profiling::Collectors::Stack when sampling a thread with empty locations when marking sample as being in garbage collection gathers a two-element stack with a placeholder for "In native code" and another for garbage collection ``` This issue was caused by the `cfp == NULL` check being added before the `end_cfp == NULL` check we have to detect when we should add the "In native code" placeholder. In some cases, `cfp` is set and the placeholder is added, but in others, it's not. To fix this, I've swapped the order of the checks, so that the check to introduce the placeholder gets higher priority. **Motivation**: Fix bug (that also causes flaky tests). **Additional Notes**: (N/A) **How to test the change?**: Validate that running profiling test suite works with for instance seed 14154. --- .../private_vm_api_access.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/private_vm_api_access.c b/ext/ddtrace_profiling_native_extension/private_vm_api_access.c index 894d61a2e9..9e54d67e40 100644 --- a/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +++ b/ext/ddtrace_profiling_native_extension/private_vm_api_access.c @@ -422,10 +422,6 @@ int ddtrace_rb_profile_frames(VALUE thread, int start, int limit, VALUE *buff, i const rb_control_frame_t *cfp = ec->cfp, *end_cfp = RUBY_VM_END_CONTROL_FRAME(ec); const rb_callable_method_entry_t *cme; - // This should not happen for ddtrace (it can only happen when a thread is still being created), but I've imported - // it from https://github.com/ruby/ruby/pull/7116 in a "just in case" kind of mindset. - if (cfp == NULL) return 0; - // Avoid sampling dead threads if (th->status == THREAD_KILLED) return 0; @@ -433,6 +429,10 @@ int ddtrace_rb_profile_frames(VALUE thread, int start, int limit, VALUE *buff, i // also (not entirely sure) happen on dead threads if (end_cfp == NULL) return PLACEHOLDER_STACK_IN_NATIVE_CODE; + // This should not happen for ddtrace (it can only happen when a thread is still being created), but I've imported + // it from https://github.com/ruby/ruby/pull/7116 in a "just in case" kind of mindset. + if (cfp == NULL) return 0; + // Fix: Skip dummy frame that shows up in main thread. // // According to a comment in `backtrace_each` (`vm_backtrace.c`), there's two dummy frames that we should ignore