Skip to content

Commit

Permalink
Ensure that at most one gc_tracepoint is active
Browse files Browse the repository at this point in the history
There's two situations that can happen here (see comments in code),
and in either situation we want to ensure that at most one
`gc_tracepoint` is active (or none, if disabled).
  • Loading branch information
ivoanjo committed Nov 9, 2022
1 parent b4b0807 commit 105fc01
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -205,11 +205,25 @@ static VALUE _native_sampling_loop(DDTRACE_UNUSED VALUE _self, VALUE instance) {
struct cpu_and_wall_time_worker_state *state;
TypedData_Get_Struct(instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state);

if (active_sampler_owner_thread != Qnil && is_thread_alive(active_sampler_owner_thread)) {
rb_raise(
rb_eRuntimeError,
"Could not start CpuAndWallTimeWorker: There's already another instance of CpuAndWallTimeWorker active in a different thread"
);
if (active_sampler_owner_thread != Qnil) {
if (is_thread_alive(active_sampler_owner_thread)) {
rb_raise(
rb_eRuntimeError,
"Could not start CpuAndWallTimeWorker: There's already another instance of CpuAndWallTimeWorker active in a different thread"
);
} else {
// The previously active thread seems to have died without cleaning up after itself.
// In this case, we can still go ahead and start the profiler BUT we make sure to disable any existing GC tracepoint
// first as:
// a) If this is a new instance of the CpuAndWallTimeWorker, we don't want the tracepoint from the old instance
// being kept around
// b) If this is the same instance of the CpuAndWallTimeWorker if we call enable on a tracepoint that is already
// enabled, it will start firing more than once, see https://bugs.ruby-lang.org/issues/19114 for details.

struct cpu_and_wall_time_worker_state *old_state;
TypedData_Get_Struct(active_sampler_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, old_state);
rb_tracepoint_disable(old_state->gc_tracepoint);
}
}

// This write to a global is thread-safe BECAUSE we're still holding on to the global VM lock at this point
Expand Down
18 changes: 18 additions & 0 deletions spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,24 @@
try_wait_until(backoff: 0.01) { described_class::Testing._native_is_running?(another_instance) }
end
end

it 'disables the existing gc_tracepoint before starting another CpuAndWallTimeWorker' do
start

expect_in_fork do
another_instance = described_class.new(
recorder: Datadog::Profiling::StackRecorder.new,
max_frames: 400,
tracer: nil,
gc_profiling_enabled: gc_profiling_enabled,
)
another_instance.start

try_wait_until(backoff: 0.01) { described_class::Testing._native_is_running?(another_instance) }

expect(described_class::Testing._native_gc_tracepoint(cpu_and_wall_time_worker)).to_not be_enabled
end
end
end
end

Expand Down

0 comments on commit 105fc01

Please sign in to comment.