Skip to content

Commit

Permalink
Backport latest src/gc-alloc-profiler.* stdlib/Profile/*/allocs.jl
Browse files Browse the repository at this point in the history
  • Loading branch information
NHDaly committed Feb 10, 2022
1 parent 7a65f99 commit a96ad14
Show file tree
Hide file tree
Showing 3 changed files with 141 additions and 43 deletions.
2 changes: 1 addition & 1 deletion src/gc-alloc-profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ void _maybe_record_alloc_to_profile(jl_value_t *val, size_t size, jl_datatype_t

extern int g_alloc_profile_enabled;

#define jl_gc_unknown_type_tag ((uintptr_t)0xdeadaa03)
#define jl_gc_unknown_type_tag ((jl_datatype_t*)0xdeadaa03)

static inline void maybe_record_alloc_to_profile(jl_value_t *val, size_t size, jl_datatype_t *typ) JL_NOTSAFEPOINT {
if (__unlikely(g_alloc_profile_enabled)) {
Expand Down
60 changes: 18 additions & 42 deletions stdlib/Profile/src/Allocs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,18 @@ julia> results = Profile.Allocs.fetch()
julia> last(sort(results.allocs, by=x->x.size))
Profile.Allocs.Alloc(Vector{Any}, Base.StackTraces.StackFrame[_new_array_ at array.c:127, ...], 5576)
```
!!! note
The current implementation of the Allocations Profiler does not
capture types for all allocations. Allocations for which the profiler
could not capture the type are represented as having type
`Profile.Allocs.UnknownType`.
You can read more about the missing types and the plan to improve this, here:
https://github.com/JuliaLang/julia/issues/43688.
!!! compat "Julia 1.8"
The allocation profiler was added in Julia 1.8.
"""
macro profile(opts, ex)
_prof_expr(ex, opts)
Expand All @@ -52,18 +64,14 @@ macro profile(ex)
_prof_expr(ex, :(sample_rate=0.0001))
end

# globals used for tracking how many allocs we're missing
# vs the alloc counters used by @time
const _g_gc_num_before = Ref{Base.GC_Num}()
const _g_sample_rate = Ref{Real}()
const _g_expected_sampled_allocs = Ref{Float64}(0)

function _prof_expr(expr, opts)
quote
$start(; $(esc(opts)))
local res = $(esc(expr))
$stop()
res
try
$(esc(expr))
finally
$stop()
end
end
end

Expand All @@ -75,9 +83,6 @@ A sample rate of 1.0 will record everything; 0.0 will record nothing.
"""
function start(; sample_rate::Real)
ccall(:jl_start_alloc_profile, Cvoid, (Cdouble,), Float64(sample_rate))

_g_sample_rate[] = sample_rate
_g_gc_num_before[] = Base.gc_num()
end

"""
Expand All @@ -87,15 +92,6 @@ Stop recording allocations.
"""
function stop()
ccall(:jl_stop_alloc_profile, Cvoid, ())

# increment a counter of how many allocs we would expect
# the memory profiler to see, based on how many allocs
# actually happened.
gc_num_after = Base.gc_num()
gc_diff = Base.GC_Diff(gc_num_after, _g_gc_num_before[])
alloc_count = Base.gc_alloc_count(gc_diff)
expected_samples = alloc_count * _g_sample_rate[]
_g_expected_sampled_allocs[] += expected_samples
end

"""
Expand All @@ -105,8 +101,6 @@ Clear all previously profiled allocation information from memory.
"""
function clear()
ccall(:jl_free_alloc_profile, Cvoid, ())

_g_expected_sampled_allocs[] = 0
return nothing
end

Expand All @@ -118,25 +112,7 @@ objects which can be analyzed.
"""
function fetch()
raw_results = ccall(:jl_fetch_alloc_profile, RawResults, ())
decoded_results = decode(raw_results)

# avoid divide-by-0 errors
if _g_expected_sampled_allocs[] > 0
missed_allocs = max(0, _g_expected_sampled_allocs[] - length(decoded_results.allocs))
missed_percentage = max(0, round(Int, missed_allocs / _g_expected_sampled_allocs[] * 100))
if missed_percentage > 0
@warn("The allocation profiler is not fully implemented, and missed approximately" *
" $(missed_percentage)% (estimated $(round(Int, missed_allocs)) / $(round(Int,
_g_expected_sampled_allocs[]))) " *
"of sampled allocs in the last run. " *
"For more info see https://github.com/JuliaLang/julia/issues/43688")
else
@warn("The allocation profiler is not fully implemented, and may have missed" *
" some of the allocs. " *
"For more info see https://github.com/JuliaLang/julia/issues/43688")
end
end
return decoded_results
return decode(raw_results)
end

# decoded results
Expand Down
122 changes: 122 additions & 0 deletions stdlib/Profile/test/allocs.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
using Test
using Profile: Allocs

@testset "alloc profiler doesn't segfault" begin
res = Allocs.@profile sample_rate=1.0 begin
# test the allocations during compilation
using Base64
end
profile = Allocs.fetch()

@test length(profile.allocs) > 0
first_alloc = profile.allocs[1]
@test first_alloc.size > 0
@test length(first_alloc.stacktrace) > 0
@test length(string(first_alloc.type)) > 0
end

@testset "alloc profiler works when there are multiple tasks on multiple threads" begin
NUM_TASKS = 1000

# This test is only really meaningful if we're running on
# multiple threads, but this isn't true on the windows tests,
# causing them to fail. So, commenting this assertion out.
# @test Threads.nthreads() > 1

function do_work()
ch = Channel{Vector{Float64}}(Inf)
@sync for i in 1:NUM_TASKS
Threads.@spawn begin
# generate garbage
put!(ch, zeros(100))
end
end
close(ch)
end

# call once to make sure it's compiled
precompile(do_work, ())
do_work()

res = Allocs.@profile sample_rate=1 begin
do_work()
end
profile = Allocs.fetch()

# expecting at least 2 allocations per task:
# 1. the task
# 2. the vector
@test length(profile.allocs) >= 2*NUM_TASKS
first_alloc = profile.allocs[1]
@test first_alloc.size > 0
@test length(first_alloc.stacktrace) > 0
@test length(string(first_alloc.type)) > 0

@testset for type in (Task, Vector{Float64},)
@test length(filter(a->a.type <: type, profile.allocs)) >= NUM_TASKS
end

# TODO: it would be nice to assert that these tasks
# were actually scheduled onto multiple threads,
# and we see allocs from all threads in the profile
end

@testset "alloc profiler start stop fetch clear" begin
function do_work()
# Compiling allocates a lot
for f in (gensym() for _ in 1:10)
@eval begin
$f() = 10
$f()
end
end
end

Allocs.@profile sample_rate=1 do_work()
@test length(Allocs.fetch().allocs) > 10

Allocs.clear()
@test length(Allocs.fetch().allocs) == 0
Allocs.clear()
@test length(Allocs.fetch().allocs) == 0

Allocs.@profile sample_rate=1 do_work()
curr_allocs = length(Allocs.fetch().allocs)
@test curr_allocs > 10

# Do _more_ work, adding into the same profile
Allocs.@profile sample_rate=1 do_work()
@test length(Allocs.fetch().allocs) > curr_allocs

Allocs.clear()
@test length(Allocs.fetch().allocs) == 0

# Clear without fetching

Allocs.@profile sample_rate=1 do_work()
Allocs.clear()
@test length(Allocs.fetch().allocs) == 0

# And things still work like normal afterwards

Allocs.@profile sample_rate=1 do_work()
Allocs.@profile sample_rate=1 do_work()
Allocs.@profile sample_rate=1 do_work()
@test length(Allocs.fetch().allocs) > 10

Allocs.@profile sample_rate=1 do_work()
Allocs.@profile sample_rate=1 do_work()
@test length(Allocs.fetch().allocs) > 10

Allocs.clear()
end

@testset "alloc profiler catches strings" begin
Allocs.@profile sample_rate=1 "$(rand())"

prof = Allocs.fetch()
Allocs.clear()

@test length(prof.allocs) >= 1
@test length([a for a in prof.allocs if a.type == String]) >= 1
end

0 comments on commit a96ad14

Please sign in to comment.