Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PROF-9342] Introduce profiler workaround for Ruby Dir interruption bug #3720

Merged
merged 22 commits into from
Jul 2, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
b03696e
Expose methods for holding/resuming interruptions
ivoanjo Apr 11, 2024
aeb2db6
Bootstrap setting for controlling dir interruption workaround
ivoanjo Apr 24, 2024
88f1164
Add monkey patches for affected dir class APIs
ivoanjo May 10, 2024
3c51234
Add inline suggestion to function
ivoanjo Jun 12, 2024
1721270
Add benchmark for hold/resume interruptions
ivoanjo Jun 12, 2024
42c40b2
Add helper to load monkey patches (similar to our Kernel monkey patches)
ivoanjo Jun 13, 2024
25a96a1
Add type signatures for dir monkey patches
ivoanjo Jun 13, 2024
57e64bf
Load new file when loading profiler
ivoanjo Jun 13, 2024
0e80c77
Wire up dir interruption workaround to setting
ivoanjo Jun 13, 2024
577fdb0
Add test coverage for DirMonkeyPatches
ivoanjo Jun 13, 2024
43a3e5e
Add comments to make Rubocop happy
ivoanjo Jun 14, 2024
9152470
Add nice description to dir interruption workaround setting
ivoanjo Jun 14, 2024
b30ca79
Tweak spec for compatibility with Ruby 2.7
ivoanjo Jun 14, 2024
d6eebe9
Load `DirMonkeyPatches` before referencing it to avoid failing on JRuby
ivoanjo Jun 14, 2024
30601a2
Tweak spec to remove assumption that no signals workaround is disable…
ivoanjo Jun 14, 2024
9ad82fe
Make rubocop happy
ivoanjo Jun 14, 2024
8099c94
Tweak naming of functions to clarify that it's signals being held
ivoanjo Jun 18, 2024
ba0d6b5
Avoid ruby2_keywords and instead have variants for Ruby 2 and 3
ivoanjo Jun 18, 2024
a6366f9
Add test to make sure we don't forget to register new profiler benchm…
ivoanjo Jun 19, 2024
3501237
Add new benchmark to validate_benchmarks_spec
ivoanjo Jun 19, 2024
533eeca
Merge branch 'master' into ivoanjo/prof-9342-dir-interruption-workaround
ivoanjo Jul 1, 2024
259e134
Add clarification for expected behavior of yield
ivoanjo Jul 2, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Expose methods for holding/resuming interruptions
These will be needed to implement the dir interruption workaround.
  • Loading branch information
ivoanjo committed Jun 13, 2024
commit b03696ef5ea475f8a60c0c69f91e7ffe59a6e4f2
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@ static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self);
static VALUE rescued_sample_allocation(VALUE tracepoint_data);
static void delayed_error(struct cpu_and_wall_time_worker_state *state, const char *error);
static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VALUE error_msg);
static VALUE _native_hold_interruptions(DDTRACE_UNUSED VALUE self);
static VALUE _native_resume_interruptions(DDTRACE_UNUSED VALUE self);

// Note on sampler global state safety:
//
Expand Down Expand Up @@ -285,7 +287,9 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_allocation_count", _native_allocation_count, 0);
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_is_running?", _native_is_running, 1);
rb_define_singleton_method(testing_module, "_native_current_sigprof_signal_handler", _native_current_sigprof_signal_handler, 0);
// TODO: Remove `_native_is_running` from `testing_module` once `prof-correctness` has been updated to not need it
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_hold_interruptions", _native_hold_interruptions, 0);
rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_resume_interruptions", _native_resume_interruptions, 0);
// TODO: Remove `_native_is_running` from `testing_module` (should be in class) once `prof-correctness` has been updated to not need it
rb_define_singleton_method(testing_module, "_native_is_running?", _native_is_running, 1);
rb_define_singleton_method(testing_module, "_native_install_testing_signal_handler", _native_install_testing_signal_handler, 0);
rb_define_singleton_method(testing_module, "_native_remove_testing_signal_handler", _native_remove_testing_signal_handler, 0);
Expand Down Expand Up @@ -1159,3 +1163,17 @@ static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VA

return Qnil;
}

// Masks SIGPROF interruptions for the current thread. Please don't use this -- you may end up with incomplete
// profiling data.
static VALUE _native_hold_interruptions(DDTRACE_UNUSED VALUE self) {
block_sigprof_signal_handler_from_running_in_current_thread();
return Qtrue;
}

// Unmasks SIGPROF interruptions for the current thread. If there's a pending sample, it'll be triggered inside this
// method.
static VALUE _native_resume_interruptions(DDTRACE_UNUSED VALUE self) {
unblock_sigprof_signal_handler_from_running_in_current_thread();
return Qtrue;
}
lloeki marked this conversation as resolved.
Show resolved Hide resolved
3 changes: 3 additions & 0 deletions sig/datadog/profiling.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ module Datadog
def self.allocation_count: () -> ::Integer?
def self.enabled?: () -> bool
def self.wait_until_running: () -> true

private

def self.replace_noop_allocation_count: () -> void
def self.native_library_compilation_skipped?: () -> ::String?
def self.try_reading_skipped_reason_file: (?untyped file_api) -> ::String?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ module Datadog
def self._native_is_running?: (CpuAndWallTimeWorker self_instance) -> bool
def self._native_allocation_count: () -> ::Integer?
def self._native_sampling_loop: (CpuAndWallTimeWorker self_instance) -> void
def self._native_hold_interruptions: () -> void
def self._native_resume_interruptions: () -> void

def wait_until_running: (timeout_seconds: ::Integer?) -> true
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1176,6 +1176,20 @@
end
end

describe '._native_hold_interruptions and _native_resume_interruptions' do
it 'blocks/unblocks interruptions for the current thread' do
expect(described_class::Testing._native_is_sigprof_blocked_in_current_thread).to be false

described_class._native_hold_interruptions

expect(described_class::Testing._native_is_sigprof_blocked_in_current_thread).to be true

described_class._native_resume_interruptions

expect(described_class::Testing._native_is_sigprof_blocked_in_current_thread).to be false
end
end

def wait_until_running
cpu_and_wall_time_worker.wait_until_running
end
Expand Down