Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PROF-7307] Add support for allocation samples to ThreadContext #2657

Merged
merged 4 commits into from
Mar 3, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ static void trace_identifiers_for(struct thread_context_collector_state *state,
static bool is_type_web(VALUE root_span_type);
static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
static VALUE thread_list(struct thread_context_collector_state *state);
static VALUE _native_sample_allocation(VALUE self, VALUE collector_instance, VALUE sample_weight);

void collectors_thread_context_init(VALUE profiling_module) {
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
Expand All @@ -201,6 +202,7 @@ void collectors_thread_context_init(VALUE profiling_module) {
rb_define_singleton_method(collectors_thread_context_class, "_native_inspect", _native_inspect, 1);
rb_define_singleton_method(collectors_thread_context_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
rb_define_singleton_method(testing_module, "_native_sample", _native_sample, 2);
rb_define_singleton_method(testing_module, "_native_sample_allocation", _native_sample_allocation, 2);
rb_define_singleton_method(testing_module, "_native_on_gc_start", _native_on_gc_start, 1);
rb_define_singleton_method(testing_module, "_native_on_gc_finish", _native_on_gc_finish, 1);
rb_define_singleton_method(testing_module, "_native_sample_after_gc", _native_sample_after_gc, 1);
Expand Down Expand Up @@ -951,3 +953,26 @@ static VALUE thread_list(struct thread_context_collector_state *state) {
ddtrace_thread_list(result);
return result;
}

void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight) {
struct thread_context_collector_state *state;
TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);

VALUE current_thread = rb_thread_current();

trigger_sample_for_thread(
state,
/* thread: */ current_thread,
/* stack_from_thread: */ current_thread,
get_or_create_context_for(current_thread, state),
(sample_values) {.alloc_samples = sample_weight},
SAMPLE_REGULAR
);
}

// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
// It SHOULD NOT be used for other purposes.
static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight) {
thread_context_collector_sample_allocation(collector_instance, NUM2UINT(sample_weight));
return Qtrue;
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ void thread_context_collector_sample(
long current_monotonic_wall_time_ns,
VALUE profiler_overhead_stack_thread
);
void thread_context_collector_sample_allocation(VALUE self_instance, unsigned int sample_weight);
VALUE thread_context_collector_sample_after_gc(VALUE self_instance);
void thread_context_collector_on_gc_start(VALUE self_instance);
void thread_context_collector_on_gc_finish(VALUE self_instance);
Expand Down
73 changes: 73 additions & 0 deletions spec/datadog/profiling/collectors/thread_context_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ def sample_after_gc
described_class::Testing._native_sample_after_gc(cpu_and_wall_time_collector)
end

def sample_allocation(weight:)
described_class::Testing._native_sample_allocation(cpu_and_wall_time_collector, weight)
end

def thread_list
described_class::Testing._native_thread_list
end
Expand Down Expand Up @@ -815,6 +819,75 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current
end
end

describe '#sample_allocation' do
let(:single_sample) do
expect(samples.size).to be 1
samples.first
end

it 'samples the caller thread' do
sample_allocation(weight: 123)

expect(object_id_from(single_sample.labels.fetch(:'thread id'))).to be Thread.current.object_id
end

it 'tags the sample with the provided weight' do
sample_allocation(weight: 123)

expect(single_sample.values).to include(:'alloc-samples' => 123)
end

it 'includes the thread names, if available' do
thread_with_name = Thread.new do
Thread.current.name = 'thread_with_name'
sample_allocation(weight: 123)
end.join
thread_without_name = Thread.new { sample_allocation(weight: 123) }.join

sample_with_name = samples_for_thread(samples, thread_with_name).first
sample_without_name = samples_for_thread(samples, thread_without_name).first

expect(sample_with_name.labels).to include(:'thread name' => 'thread_with_name')
expect(sample_without_name.labels).to_not include(:'thread name')
end

describe 'code hotspots' do
# NOTE: To avoid duplicating all of the similar-but-slightly different tests from `#sample` (due to how
# `#sample` includes every thread, but `#sample_allocation` includes only the caller thread), here is a simpler
# test to make sure this works in the common case
context 'when there is an active trace on the sampled thread' do
let(:tracer) { Datadog::Tracing.send(:tracer) }
let(:t1) do
Thread.new(ready_queue) do |ready_queue|
inside_t1 do
Datadog::Tracing.trace('profiler.test', type: 'web') do |_span, trace|
trace.resource = 'trace_resource'

Datadog::Tracing.trace('profiler.test.inner') do |inner_span|
@t1_span_id = inner_span.id
@t1_local_root_span_id = trace.send(:root_span).id
sample_allocation(weight: 456)
ready_queue << true
sleep
end
end
end
end
end

after { Datadog::Tracing.shutdown! }

it 'gathers the "local root span id", "span id" and "trace endpoint"' do
expect(single_sample.labels).to include(
:'local root span id' => @t1_local_root_span_id.to_i,
:'span id' => @t1_span_id.to_i,
:'trace endpoint' => 'trace_resource',
)
end
end
end
end

describe '#thread_list' do
it "returns the same as Ruby's Thread.list" do
expect(thread_list).to eq Thread.list
Expand Down
3 changes: 3 additions & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
$LOAD_PATH.unshift File.expand_path('..', __dir__)
$LOAD_PATH.unshift File.expand_path('../lib', __dir__)

Thread.main.name = 'Thread.main' unless Gem::Version.new(RUBY_VERSION) < Gem::Version.new('2.3')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we adding this change?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's to make it easier to debug when we have multiple threads -- if you print/list the threads it's sometimes not obvious which thread is the main one (and main is the one driving the RSpec framework). It's unrelated to the other changes (which is why I put it on a separate commit -- 67b7fa8 ).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's one example where this is useful. This spec failed and printed the list of threads:

[#<Thread:0x000055df58fcafc0@Thread.main run>, #<Thread:0x000055df5b3d1f90 /app/spec/spec_helper.rb:241 sleep>, #<Thread:0x000055df5d235c48 /usr/local/bundle/gems/webrick-1.7.0/lib/webrick/utils.rb:158 sleep_forever>]


require 'pry'
require 'rspec/collection_matchers'
require 'rspec/wait'
Expand Down