From a96ad143a409188f58e55d3f46f5bedda11a727c Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Thu, 10 Feb 2022 15:29:59 -0500 Subject: [PATCH] Backport latest src/gc-alloc-profiler.* stdlib/Profile/*/allocs.jl --- src/gc-alloc-profiler.h | 2 +- stdlib/Profile/src/Allocs.jl | 60 +++++------------ stdlib/Profile/test/allocs.jl | 122 ++++++++++++++++++++++++++++++++++ 3 files changed, 141 insertions(+), 43 deletions(-) create mode 100644 stdlib/Profile/test/allocs.jl diff --git a/src/gc-alloc-profiler.h b/src/gc-alloc-profiler.h index 3cfe7237dd183..8be6fed21a899 100644 --- a/src/gc-alloc-profiler.h +++ b/src/gc-alloc-profiler.h @@ -35,7 +35,7 @@ void _maybe_record_alloc_to_profile(jl_value_t *val, size_t size, jl_datatype_t extern int g_alloc_profile_enabled; -#define jl_gc_unknown_type_tag ((uintptr_t)0xdeadaa03) +#define jl_gc_unknown_type_tag ((jl_datatype_t*)0xdeadaa03) static inline void maybe_record_alloc_to_profile(jl_value_t *val, size_t size, jl_datatype_t *typ) JL_NOTSAFEPOINT { if (__unlikely(g_alloc_profile_enabled)) { diff --git a/stdlib/Profile/src/Allocs.jl b/stdlib/Profile/src/Allocs.jl index 70a9978b456ca..15a7afbff76fe 100644 --- a/stdlib/Profile/src/Allocs.jl +++ b/stdlib/Profile/src/Allocs.jl @@ -44,6 +44,18 @@ julia> results = Profile.Allocs.fetch() julia> last(sort(results.allocs, by=x->x.size)) Profile.Allocs.Alloc(Vector{Any}, Base.StackTraces.StackFrame[_new_array_ at array.c:127, ...], 5576) ``` + +!!! note + The current implementation of the Allocations Profiler does not + capture types for all allocations. Allocations for which the profiler + could not capture the type are represented as having type + `Profile.Allocs.UnknownType`. + + You can read more about the missing types and the plan to improve this, here: + https://github.com/JuliaLang/julia/issues/43688. + +!!! compat "Julia 1.8" + The allocation profiler was added in Julia 1.8. """ macro profile(opts, ex) _prof_expr(ex, opts) @@ -52,18 +64,14 @@ macro profile(ex) _prof_expr(ex, :(sample_rate=0.0001)) end -# globals used for tracking how many allocs we're missing -# vs the alloc counters used by @time -const _g_gc_num_before = Ref{Base.GC_Num}() -const _g_sample_rate = Ref{Real}() -const _g_expected_sampled_allocs = Ref{Float64}(0) - function _prof_expr(expr, opts) quote $start(; $(esc(opts))) - local res = $(esc(expr)) - $stop() - res + try + $(esc(expr)) + finally + $stop() + end end end @@ -75,9 +83,6 @@ A sample rate of 1.0 will record everything; 0.0 will record nothing. """ function start(; sample_rate::Real) ccall(:jl_start_alloc_profile, Cvoid, (Cdouble,), Float64(sample_rate)) - - _g_sample_rate[] = sample_rate - _g_gc_num_before[] = Base.gc_num() end """ @@ -87,15 +92,6 @@ Stop recording allocations. """ function stop() ccall(:jl_stop_alloc_profile, Cvoid, ()) - - # increment a counter of how many allocs we would expect - # the memory profiler to see, based on how many allocs - # actually happened. - gc_num_after = Base.gc_num() - gc_diff = Base.GC_Diff(gc_num_after, _g_gc_num_before[]) - alloc_count = Base.gc_alloc_count(gc_diff) - expected_samples = alloc_count * _g_sample_rate[] - _g_expected_sampled_allocs[] += expected_samples end """ @@ -105,8 +101,6 @@ Clear all previously profiled allocation information from memory. """ function clear() ccall(:jl_free_alloc_profile, Cvoid, ()) - - _g_expected_sampled_allocs[] = 0 return nothing end @@ -118,25 +112,7 @@ objects which can be analyzed. """ function fetch() raw_results = ccall(:jl_fetch_alloc_profile, RawResults, ()) - decoded_results = decode(raw_results) - - # avoid divide-by-0 errors - if _g_expected_sampled_allocs[] > 0 - missed_allocs = max(0, _g_expected_sampled_allocs[] - length(decoded_results.allocs)) - missed_percentage = max(0, round(Int, missed_allocs / _g_expected_sampled_allocs[] * 100)) - if missed_percentage > 0 - @warn("The allocation profiler is not fully implemented, and missed approximately" * - " $(missed_percentage)% (estimated $(round(Int, missed_allocs)) / $(round(Int, - _g_expected_sampled_allocs[]))) " * - "of sampled allocs in the last run. " * - "For more info see https://github.com/JuliaLang/julia/issues/43688") - else - @warn("The allocation profiler is not fully implemented, and may have missed" * - " some of the allocs. " * - "For more info see https://github.com/JuliaLang/julia/issues/43688") - end - end - return decoded_results + return decode(raw_results) end # decoded results diff --git a/stdlib/Profile/test/allocs.jl b/stdlib/Profile/test/allocs.jl new file mode 100644 index 0000000000000..b8d6222d07567 --- /dev/null +++ b/stdlib/Profile/test/allocs.jl @@ -0,0 +1,122 @@ +using Test +using Profile: Allocs + +@testset "alloc profiler doesn't segfault" begin + res = Allocs.@profile sample_rate=1.0 begin + # test the allocations during compilation + using Base64 + end + profile = Allocs.fetch() + + @test length(profile.allocs) > 0 + first_alloc = profile.allocs[1] + @test first_alloc.size > 0 + @test length(first_alloc.stacktrace) > 0 + @test length(string(first_alloc.type)) > 0 +end + +@testset "alloc profiler works when there are multiple tasks on multiple threads" begin + NUM_TASKS = 1000 + + # This test is only really meaningful if we're running on + # multiple threads, but this isn't true on the windows tests, + # causing them to fail. So, commenting this assertion out. + # @test Threads.nthreads() > 1 + + function do_work() + ch = Channel{Vector{Float64}}(Inf) + @sync for i in 1:NUM_TASKS + Threads.@spawn begin + # generate garbage + put!(ch, zeros(100)) + end + end + close(ch) + end + + # call once to make sure it's compiled + precompile(do_work, ()) + do_work() + + res = Allocs.@profile sample_rate=1 begin + do_work() + end + profile = Allocs.fetch() + + # expecting at least 2 allocations per task: + # 1. the task + # 2. the vector + @test length(profile.allocs) >= 2*NUM_TASKS + first_alloc = profile.allocs[1] + @test first_alloc.size > 0 + @test length(first_alloc.stacktrace) > 0 + @test length(string(first_alloc.type)) > 0 + + @testset for type in (Task, Vector{Float64},) + @test length(filter(a->a.type <: type, profile.allocs)) >= NUM_TASKS + end + + # TODO: it would be nice to assert that these tasks + # were actually scheduled onto multiple threads, + # and we see allocs from all threads in the profile +end + +@testset "alloc profiler start stop fetch clear" begin + function do_work() + # Compiling allocates a lot + for f in (gensym() for _ in 1:10) + @eval begin + $f() = 10 + $f() + end + end + end + + Allocs.@profile sample_rate=1 do_work() + @test length(Allocs.fetch().allocs) > 10 + + Allocs.clear() + @test length(Allocs.fetch().allocs) == 0 + Allocs.clear() + @test length(Allocs.fetch().allocs) == 0 + + Allocs.@profile sample_rate=1 do_work() + curr_allocs = length(Allocs.fetch().allocs) + @test curr_allocs > 10 + + # Do _more_ work, adding into the same profile + Allocs.@profile sample_rate=1 do_work() + @test length(Allocs.fetch().allocs) > curr_allocs + + Allocs.clear() + @test length(Allocs.fetch().allocs) == 0 + + # Clear without fetching + + Allocs.@profile sample_rate=1 do_work() + Allocs.clear() + @test length(Allocs.fetch().allocs) == 0 + + # And things still work like normal afterwards + + Allocs.@profile sample_rate=1 do_work() + Allocs.@profile sample_rate=1 do_work() + Allocs.@profile sample_rate=1 do_work() + @test length(Allocs.fetch().allocs) > 10 + + Allocs.@profile sample_rate=1 do_work() + Allocs.@profile sample_rate=1 do_work() + @test length(Allocs.fetch().allocs) > 10 + + Allocs.clear() +end + +@testset "alloc profiler catches strings" begin + Allocs.@profile sample_rate=1 "$(rand())" + + prof = Allocs.fetch() + Allocs.clear() + + @test length(prof.allocs) >= 1 + @test length([a for a in prof.allocs if a.type == String]) >= 1 +end