From ea7b13b1a8ade4e4f1cead572cd35b378cd371e9 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Tue, 15 Feb 2022 00:53:44 -0500 Subject: [PATCH] Revert "Add profiling of already running tasks via SIGINFO/SIGUSR1 (#43179)" (#44184) This reverts commit a9aad9784b5f94a518263172702379b390e8856d. --- NEWS.md | 5 +- contrib/generate_precompile.jl | 1 - src/julia_internal.h | 4 - src/signal-handling.c | 29 -------- src/signals-mach.c | 1 - src/signals-unix.c | 35 ++------- src/signals-win.c | 1 - stdlib/Profile/docs/src/index.md | 75 ------------------- stdlib/Profile/src/Profile.jl | 124 +++++++++---------------------- stdlib/Profile/test/runtests.jl | 43 ----------- 10 files changed, 41 insertions(+), 277 deletions(-) diff --git a/NEWS.md b/NEWS.md index e159a47c9c4ee9..fd217eb7f954f0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -146,7 +146,7 @@ Standard library changes * Now uses `textwidth` for formatting `%s` and `%c` widths ([#41085]). #### Profile -* CPU profiling now records sample metadata including thread and task. `Profile.print()` has a new `groupby` kwarg that allows +* Profiling now records sample metadata including thread and task. `Profile.print()` has a new `groupby` kwarg that allows grouping by thread, task, or nested thread/task, task/thread, and `threads` and `tasks` kwargs to allow filtering. Further, percent utilization is now reported as a total or per-thread, based on whether the thread is idle or not at each sample. `Profile.fetch()` by default strips out the new metadata to ensure backwards compatibility with external @@ -154,9 +154,6 @@ Standard library changes * The new `Profile.Allocs` module allows memory allocations to be profiled. The stack trace, type, and size of each allocation is recorded, and a `sample_rate` argument allows a tunable amount of allocations to be skipped, reducing performance overhead. ([#42768]) -* A fixed duration cpu profile can now be triggered by the user during running tasks without `Profile` being loaded - first and the report will show during execution. On MacOS & FreeBSD press `ctrl-t` or raise a `SIGINFO`. - For other platforms raise a `SIGUSR1` i.e. `% kill -USR1 $julia_pid`. Not currently available on windows. ([#43179]) #### Random diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl index f2d39918ffe0d3..983f5c3c8ef363 100644 --- a/contrib/generate_precompile.jl +++ b/contrib/generate_precompile.jl @@ -221,7 +221,6 @@ Profile = get(Base.loaded_modules, Base.PkgId(Base.UUID("9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"), "Profile"), nothing) if Profile !== nothing - repl_script *= Profile.precompile_script hardcoded_precompile_statements *= """ precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UInt}) precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UnitRange{UInt}}) diff --git a/src/julia_internal.h b/src/julia_internal.h index 759fd8e3c76547..96d064c9df067a 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -128,10 +128,6 @@ int jl_running_under_rr(int recheck) JL_NOTSAFEPOINT; // Returns time in nanosec JL_DLLEXPORT uint64_t jl_hrtime(void) JL_NOTSAFEPOINT; -JL_DLLEXPORT void jl_set_peek_cond(uintptr_t); -JL_DLLEXPORT double jl_get_profile_peek_duration(void); -JL_DLLEXPORT void jl_set_profile_peek_duration(double); - // number of cycles since power-on static inline uint64_t cycleclock(void) JL_NOTSAFEPOINT { diff --git a/src/signal-handling.c b/src/signal-handling.c index 142f03b6c899db..0f3b715d430611 100644 --- a/src/signal-handling.c +++ b/src/signal-handling.c @@ -129,35 +129,6 @@ static size_t jl_safe_read_mem(const volatile char *ptr, char *out, size_t len) return i; } -static double profile_autostop_time = -1.0; -static double profile_peek_duration = 1.0; // seconds - -double jl_get_profile_peek_duration(void) { - return profile_peek_duration; -} -void jl_set_profile_peek_duration(double t) { - profile_peek_duration = t; - return; -} - -uintptr_t profile_show_peek_cond_loc; -JL_DLLEXPORT void jl_set_peek_cond(uintptr_t cond) -{ - profile_show_peek_cond_loc = cond; - return; -} - -static void jl_check_profile_autostop(void) { - if ((profile_autostop_time != -1.0) && (jl_hrtime() > profile_autostop_time)) { - profile_autostop_time = -1.0; - jl_profile_stop_timer(); - jl_safe_printf("\n==============================================================\n"); - jl_safe_printf("Profile collected. A report will print at the next yield point\n"); - jl_safe_printf("==============================================================\n\n"); - uv_async_send((uv_async_t*)profile_show_peek_cond_loc); - } -} - #if defined(_WIN32) #include "signals-win.c" #else diff --git a/src/signals-mach.c b/src/signals-mach.c index 130603931c5aa2..4ddfad4e10600f 100644 --- a/src/signals-mach.c +++ b/src/signals-mach.c @@ -643,7 +643,6 @@ void *mach_profile_listener(void *arg) } jl_unlock_profile_mach(0, keymgr_locked); if (running) { - jl_check_profile_autostop(); // Reset the alarm kern_return_t ret = clock_alarm(clk, TIME_RELATIVE, timerprof, profile_port); HANDLE_MACH_ERROR("clock_alarm", ret) diff --git a/src/signals-unix.c b/src/signals-unix.c index b2e3ef29520299..67af5c0b1358de 100644 --- a/src/signals-unix.c +++ b/src/signals-unix.c @@ -61,6 +61,7 @@ bt_context_t *jl_to_bt_context(void *sigctx) #endif } + static int thread0_exit_count = 0; static void jl_exit_thread0(int exitstate, jl_bt_element_t *bt_data, size_t bt_size); @@ -518,22 +519,17 @@ JL_DLLEXPORT int jl_profile_start_timer(void) sigprof.sigev_notify = SIGEV_SIGNAL; sigprof.sigev_signo = SIGUSR1; sigprof.sigev_value.sival_ptr = &timerprof; - // Because SIGUSR1 is multipurpose, set `running` before so that we know that the first SIGUSR1 came from the timer - running = 1; - if (timer_create(CLOCK_REALTIME, &sigprof, &timerprof) == -1) { - running = 0; + if (timer_create(CLOCK_REALTIME, &sigprof, &timerprof) == -1) return -2; - } // Start the timer itsprof.it_interval.tv_sec = 0; itsprof.it_interval.tv_nsec = 0; itsprof.it_value.tv_sec = nsecprof / GIGA; itsprof.it_value.tv_nsec = nsecprof % GIGA; - if (timer_settime(timerprof, 0, &itsprof, NULL) == -1) { - running = 0; + if (timer_settime(timerprof, 0, &itsprof, NULL) == -1) return -3; - } + running = 1; return 0; } @@ -652,18 +648,6 @@ static void kqueue_signal(int *sigqueue, struct kevent *ev, int sig) } #endif -void trigger_profile_peek(void) -{ - jl_safe_printf("\n======================================================================================\n"); - jl_safe_printf("Information request received. A stacktrace will print followed by a %.1f second profile\n", profile_peek_duration); - jl_safe_printf("======================================================================================\n"); - bt_size_cur = 0; // clear profile buffer - if (jl_profile_start_timer() < 0) - jl_safe_printf("ERROR: Could not start profile timer\n"); - else - profile_autostop_time = jl_hrtime() + (profile_peek_duration * 1e9); -} - static void *signal_listener(void *arg) { static jl_bt_element_t bt_data[JL_MAX_BT_SIZE + 1]; @@ -769,17 +753,11 @@ static void *signal_listener(void *arg) int doexit = critical; #ifdef SIGINFO - if (sig == SIGINFO) { - if (running != 1) - trigger_profile_peek(); + if (sig == SIGINFO) doexit = 0; - } #else - if (sig == SIGUSR1) { - if (running != 1) - trigger_profile_peek(); + if (sig == SIGUSR1) doexit = 0; - } #endif bt_size = 0; @@ -857,7 +835,6 @@ static void *signal_listener(void *arg) } #ifndef HAVE_MACH if (profile && running) { - jl_check_profile_autostop(); #if defined(HAVE_TIMER) timer_settime(timerprof, 0, &itsprof, NULL); #elif defined(HAVE_ITIMER) diff --git a/src/signals-win.c b/src/signals-win.c index 178a7463b8d502..fefb29a529847d 100644 --- a/src/signals-win.c +++ b/src/signals-win.c @@ -388,7 +388,6 @@ static DWORD WINAPI profile_bt( LPVOID lparam ) jl_gc_debug_critical_error(); abort(); } - jl_check_profile_autostop(); } } } diff --git a/stdlib/Profile/docs/src/index.md b/stdlib/Profile/docs/src/index.md index 8701dded0d4275..89894723b11165 100644 --- a/stdlib/Profile/docs/src/index.md +++ b/stdlib/Profile/docs/src/index.md @@ -2,79 +2,6 @@ ## CPU Profiling -There are two main approaches to CPU profiling julia code: - -## Via `@profile` - -Where profiling is enabled for a given call via the `@profile` macro. - -```julia-repl -julia> using Profile - -julia> @profile foo() - -julia> Profile.print() -Overhead ╎ [+additional indent] Count File:Line; Function -========================================================= - ╎147 @Base/client.jl:506; _start() - ╎ 147 @Base/client.jl:318; exec_options(opts::Base.JLOptions) -... -``` - -## Triggered During Execution - -Tasks that are already running can also be profiled for a fixed time period at any user-triggered time. - -To trigger the profiling: -- MacOS & FreeBSD (BSD-based platforms): Use `ctrl-t` or pass a `SIGINFO` signal to the julia process i.e. `% kill -INFO $julia_pid` -- Linux: Pass a `SIGUSR1` signal to the julia process i.e. `% kill -USR1 $julia_pid` -- Windows: Not currently supported. - -First, a single stack trace at the instant that the signal was thrown is shown, then a 1 second profile is collected, -followed by the profile report at the next yield point, which may be at task completion for code without yield points -e.g. tight loops. - -```julia-repl -julia> foo() -##== the user sends a trigger while foo is running ==## -load: 2.53 cmd: julia 88903 running 6.16u 0.97s - -====================================================================================== -Information request received. A stacktrace will print followed by a 1.0 second profile -====================================================================================== - -signal (29): Information request: 29 -__psynch_cvwait at /usr/lib/system/libsystem_kernel.dylib (unknown line) -_pthread_cond_wait at /usr/lib/system/libsystem_pthread.dylib (unknown line) -... - -====================================================================== -Profile collected. A report will print if the Profile module is loaded -====================================================================== - -Overhead ╎ [+additional indent] Count File:Line; Function -========================================================= -Thread 1 Task 0x000000011687c010 Total snapshots: 572. Utilization: 100% - ╎147 @Base/client.jl:506; _start() - ╎ 147 @Base/client.jl:318; exec_options(opts::Base.JLOptions) -... - -Thread 2 Task 0x0000000116960010 Total snapshots: 572. Utilization: 0% - ╎572 @Base/task.jl:587; task_done_hook(t::Task) - ╎ 572 @Base/task.jl:879; wait() -... -``` - -### Customization - -The duration of the profiling can be adjusted via [`Profile.set_peek_duration`](@ref) - -The profile report is broken down by thread and task. Pass a no-arg function to `Profile.peek_report[]` to override this. -i.e. `Profile.peek_report[] = () -> Profile.print()` to remove any grouping. This could also be overridden by an external -profile data consumer. - -## Reference - ```@docs Profile.@profile ``` @@ -89,8 +16,6 @@ Profile.fetch Profile.retrieve Profile.callers Profile.clear_malloc_data -Profile.get_peek_duration -Profile.set_peek_duration ``` ## Memory profiling diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index a5f383b77efa1a..6916f400e16daa 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -34,52 +34,6 @@ macro profile(ex) end end -# triggers printing the report after a SIGINFO/SIGUSR1 profile request -const PROFILE_PRINT_COND = Ref{Base.AsyncCondition}() -function profile_printing_listener() - try - while true - wait(PROFILE_PRINT_COND[]) - peek_report[]() - end - catch ex - if !isa(ex, InterruptException) - @error "Profile printing listener crashed" exception=ex,catch_backtrace() - end - end -end - -# An internal function called to show the report after an information request (SIGINFO or SIGUSR1). -function _peek_report() - iob = IOBuffer() - ioc = IOContext(IOContext(iob, stdout), :displaysize=>displaysize(stdout)) - print(ioc, groupby = [:thread, :task]) - Base.print(stdout, String(resize!(iob.data, iob.size))) -end -# This is a ref so that it can be overridden by other profile info consumers. -const peek_report = Ref{Function}(_peek_report) - -""" - get_peek_duration() - -Get the duration in seconds of the profile "peek" that is triggered via `SIGINFO` or `SIGUSR1`, depending on platform. -""" -get_peek_duration() = ccall(:jl_get_profile_peek_duration, Float64, ()) -""" - set_peek_duration(t::Float64) - -Set the duration in seconds of the profile "peek" that is triggered via `SIGINFO` or `SIGUSR1`, depending on platform. -""" -set_peek_duration(t::Float64) = ccall(:jl_set_profile_peek_duration, Cvoid, (Float64,), t) - -precompile_script = """ -let _Profile = Base.PkgId(Base.UUID("9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"), "Profile") - Base.loaded_modules[_Profile].@profile sleep(1) - Base.loaded_modules[_Profile].peek_report[]() - Base.loaded_modules[_Profile].clear() -end -""" - #### #### User-level functions #### @@ -127,23 +81,15 @@ function init(n::Integer, delay::Real; limitwarn::Bool = true) end end -function __init__() - # init with default values - # Use a max size of 10M profile samples, and fire timer every 1ms - # (that should typically give around 100 seconds of record) - @static if Sys.iswindows() && Sys.WORD_SIZE == 32 - # The Win32 unwinder is 1000x slower than elsewhere (around 1ms/frame), - # so we don't want to slow the program down by quite that much - n = 1_000_000 - delay = 0.01 - else - n = 10_000_000 - delay = 0.001 - end - init(n, delay, limitwarn = false) - PROFILE_PRINT_COND[] = Base.AsyncCondition() - ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), PROFILE_PRINT_COND[].handle) - errormonitor(Threads.@spawn(profile_printing_listener())) +# init with default values +# Use a max size of 10M profile samples, and fire timer every 1ms +# (that should typically give around 100 seconds of record) +if Sys.iswindows() && Sys.WORD_SIZE == 32 + # The Win32 unwinder is 1000x slower than elsewhere (around 1ms/frame), + # so we don't want to slow the program down by quite that much + __init__() = init(1_000_000, 0.01, limitwarn = false) +else + __init__() = init(10_000_000, 0.001, limitwarn = false) end """ @@ -183,7 +129,7 @@ const META_OFFSET_TASKID = 4 const META_OFFSET_THREADID = 5 """ - print([io::IO = stdout,] [data::Vector = fetch()], [lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data)]; kwargs...) + print([io::IO = stdout,] [data::Vector]; kwargs...) Prints profiling results to `io` (by default, `stdout`). If you do not supply a `data` vector, the internal buffer of accumulated backtraces @@ -224,7 +170,7 @@ The keyword arguments can be any combination of: does not control which tasks samples are collected within. """ function print(io::IO, - data::Vector{<:Unsigned} = fetch(), + data::Vector{<:Unsigned} = fetch(include_meta = true), lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data) ; format = :tree, @@ -260,7 +206,7 @@ function print(io::IO, nl = length(threadids) > 1 ? "\n" : "" printstyled(io, "Task $(Base.repr(taskid))$nl"; bold=true, color=Base.debug_color()) for threadid in threadids - printstyled(io, " Thread $threadid "; bold=true, color=Base.info_color()) + printstyled(io, " Thread $threadid\n"; bold=true, color=Base.info_color()) nosamples = print(io, data, lidict, pf, format, threadid, taskid, true) nosamples && (any_nosamples = true) println(io) @@ -276,7 +222,7 @@ function print(io::IO, nl = length(taskids) > 1 ? "\n" : "" printstyled(io, "Thread $threadid$nl"; bold=true, color=Base.info_color()) for taskid in taskids - printstyled(io, " Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color()) + printstyled(io, " Task $(Base.repr(taskid))\n"; bold=true, color=Base.debug_color()) nosamples = print(io, data, lidict, pf, format, threadid, taskid, true) nosamples && (any_nosamples = true) println(io) @@ -286,7 +232,7 @@ function print(io::IO, elseif groupby == :task threads = 1:typemax(Int) for taskid in intersect(get_task_ids(data), tasks) - printstyled(io, "Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color()) + printstyled(io, "Task $(Base.repr(taskid))\n"; bold=true, color=Base.debug_color()) nosamples = print(io, data, lidict, pf, format, threads, taskid, true) nosamples && (any_nosamples = true) println(io) @@ -294,7 +240,7 @@ function print(io::IO, elseif groupby == :thread tasks = 1:typemax(UInt) for threadid in intersect(get_thread_ids(data), threads) - printstyled(io, "Thread $threadid "; bold=true, color=Base.info_color()) + printstyled(io, "Thread $threadid\n"; bold=true, color=Base.info_color()) nosamples = print(io, data, lidict, pf, format, threadid, tasks, true) nosamples && (any_nosamples = true) println(io) @@ -305,18 +251,6 @@ function print(io::IO, return end -""" - print([io::IO = stdout,] data::Vector, lidict::LineInfoDict; kwargs...) - -Prints profiling results to `io`. This variant is used to examine results exported by a -previous call to [`retrieve`](@ref). Supply the vector `data` of backtraces and -a dictionary `lidict` of line information. - -See `Profile.print([io], data)` for an explanation of the valid keyword arguments. -""" -print(data::Vector{<:Unsigned} = fetch(), lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data); kwargs...) = - print(stdout, data, lidict; kwargs...) - function print(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat, format::Symbol, threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool = false) @@ -384,6 +318,18 @@ function has_meta(data) return false end +""" + print([io::IO = stdout,] data::Vector, lidict::LineInfoDict; kwargs...) + +Prints profiling results to `io`. This variant is used to examine results exported by a +previous call to [`retrieve`](@ref). Supply the vector `data` of backtraces and +a dictionary `lidict` of line information. + +See `Profile.print([io], data)` for an explanation of the valid keyword arguments. +""" +print(data::Vector{<:Unsigned} = fetch(include_meta = true), lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data); kwargs...) = + print(stdout, data, lidict; kwargs...) + """ retrieve(; kwargs...) -> data, lidict @@ -595,10 +541,10 @@ depends on the exact memory addresses used in JIT-compiling. This function is pr internal use; [`retrieve`](@ref) may be a better choice for most users. By default metadata such as threadid and taskid is included. Set `include_meta` to `false` to strip metadata. """ -function fetch(;include_meta = true, limitwarn = true) +function fetch(;include_meta = true) maxlen = maxlen_data() len = len_data() - if limitwarn && is_buffer_full() + if is_buffer_full() @warn """The profile data buffer is full; profiling probably terminated before your program finished. To profile for longer runs, call `Profile.init()` with a larger buffer and/or larger delay.""" @@ -1112,24 +1058,22 @@ function tree(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoFlatDict, Line root, nsleeping = tree!(StackFrameTree{UInt64}(), data, lidict, fmt.C, fmt.recur, threads, tasks) end util_perc = (1 - (nsleeping / root.count)) * 100 - !is_subsection && print_tree(io, root, cols, fmt, is_subsection) if isempty(root.down) if is_subsection Base.print(io, "Total snapshots: ") printstyled(io, "$(root.count)", color=Base.warn_color()) - Base.println(io, ". Utilization: ", round(Int, util_perc), "%") + Base.println(io, " (", round(Int, util_perc), "% utilization)") else warning_empty() end return true - else - Base.print(io, "Total snapshots: ", root.count, ". Utilization: ", round(Int, util_perc), "%") end + print_tree(io, root, cols, fmt, is_subsection) + Base.print(io, "Total snapshots: ", root.count, " (", round(Int, util_perc), "% utilization") if is_subsection - println(io) - print_tree(io, root, cols, fmt, is_subsection) + println(io, ")") else - println(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task") + println(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task)") end return false end diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl index 50917b9797c7dc..ddddac55fcd931 100644 --- a/stdlib/Profile/test/runtests.jl +++ b/stdlib/Profile/test/runtests.jl @@ -187,49 +187,6 @@ let cmd = Base.julia_cmd() @test parse(Int, s) > 100 end -if Sys.isbsd() || Sys.islinux() - @testset "SIGINFO/SIGUSR1 profile triggering" begin - let cmd = Base.julia_cmd() - script = """ - x = rand(1000, 1000) - println("started") - while true - x * x - yield() - end - """ - iob = Base.BufferStream() - p = run(pipeline(`$cmd -e $script`, stderr = devnull, stdout = iob), wait = false) - t = Timer(60) do t # should be done in under 10 seconds - kill(p, Base.SIGKILL) - sleep(5) - close(iob) - end - try - s = readuntil(iob, "started", keep = true) - @assert occursin("started", s) - @assert process_running(p) - for _ in 1:2 - sleep(2) - if Sys.isbsd() - kill(p, 29) # SIGINFO - elseif Sys.islinux() - kill(p, 10) # SIGUSR1 - end - s = readuntil(iob, "Overhead ╎", keep = true) - @test process_running(p) - @test occursin("Overhead ╎", s) - end - finally - kill(p, Base.SIGKILL) - close(t) - end - end - end -else - @warn "Skipping \"SIGINFO/SIGUSR1 profile triggering\" test as it is not supported on this platform" -end - @testset "FlameGraphs" begin # FlameGraphs makes use of some Profile's internals. Detect possible breakage by mimicking some of its tests. # Breakage is acceptable since these internals are not part of the stable API, but it's better to know, and ideally