Skip to content

Commit

Permalink
[Profile] add builtin Allocs.print and formatting equivalents to Prof…
Browse files Browse the repository at this point in the history
…ile.print

Defines a converter for Allocs to the types implemented by Profile for
printing, allowing flamegraph report generation of allocations with just
the stdlib tooling.

Make these methods public as well, so that users can access the
documentation without getting warnings.

Refs: #42768
  • Loading branch information
vtjnash committed Nov 1, 2023
1 parent 5db9dbd commit b6a843d
Show file tree
Hide file tree
Showing 5 changed files with 257 additions and 27 deletions.
1 change: 1 addition & 0 deletions stdlib/Profile/docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ The methods in `Profile.Allocs` are not exported and need to be called e.g. as `

```@docs
Profile.Allocs.clear
Profile.Allocs.print
Profile.Allocs.fetch
Profile.Allocs.start
Profile.Allocs.stop
Expand Down
206 changes: 205 additions & 1 deletion stdlib/Profile/src/Allocs.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
module Allocs

global print # Allocs.print is separate from both Base.print and Profile.print
public @profile,
clear,
print,
fetch

using ..Profile: Profile, ProfileFormat, StackFrameTree, print_flat, print_tree
using Base.StackTraces: StackTrace, StackFrame, lookup
using Base: InterpreterIP

Expand Down Expand Up @@ -138,7 +145,7 @@ end
# Without this, the Alloc's stacktrace prints for lines and lines and lines...
function Base.show(io::IO, a::Alloc)
stacktrace_sample = length(a.stacktrace) >= 1 ? "$(a.stacktrace[1]), ..." : ""
print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))")
Base.print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))")
end

const BacktraceCache = Dict{BTElement,Vector{StackFrame}}
Expand Down Expand Up @@ -216,4 +223,201 @@ function stacktrace_memoized(
return stack
end

function warning_empty()
@warn """
There were no samples collected.
Run your program longer (perhaps by running it multiple times),
or adjust the frequency of samples to record every event with
the `sample_rate=1.0` kwarg."""
end


"""
Profile.Allocs.print([io::IO = stdout,] [data::AllocResults = fetch()]; kwargs...)
Prints profiling results to `io` (by default, `stdout`). If you do not
supply a `data` vector, the internal buffer of accumulated backtraces
will be used.
See `Profile.print` for an explanation of the valid keyword arguments.
"""
print(; kwargs...) =
Profile.print(stdout, fetch(); kwargs...)
print(io::IO; kwargs...) =
Profile.print(io, fetch(); kwargs...)
print(io::IO, data::AllocResults; kwargs...) =
Profile.print(io, data; kwargs...)
Profile.print(data::AllocResults; kwargs...) =
Profile.print(stdout, data; kwargs...)

function Profile.print(io::IO,
data::AllocResults,
;
format = :tree,
C = false,
#combine = true,
maxdepth::Int = typemax(Int),
mincount::Int = 0,
noisefloor = 0,
sortedby::Symbol = :filefuncline,
groupby::Union{Symbol,AbstractVector{Symbol}} = :none,
recur::Symbol = :off,
)
pf = ProfileFormat(;C, maxdepth, mincount, noisefloor, sortedby, recur)
Profile.print(io, data, pf, format)
return
end

function Profile.print(io::IO, data::AllocResults, fmt::ProfileFormat, format::Symbol)
cols::Int = Base.displaysize(io)[2]
fmt.recur (:off, :flat, :flatc) || throw(ArgumentError("recur value not recognized"))
data = data.allocs
if format === :tree
tree(io, data, cols, fmt)
elseif format === :flat
fmt.recur === :off || throw(ArgumentError("format flat only implements recur=:off"))
flat(io, data, cols, fmt)
else
throw(ArgumentError("output format $(repr(format)) not recognized"))
end
nothing
end


function parse_flat(::Type{T}, data::Vector{Alloc}, C::Bool) where T
lilist = StackFrame[]
n = Int[]
m = Int[]
lilist_idx = Dict{T, Int}()
recursive = Set{T}()
totalbytes = 0
for r in data
first = true
empty!(recursive)
nb = r.size # or 1 for counting
totalbytes += nb
for frame in r.stacktrace
!C && frame.from_c && continue
key = (T === UInt64 ? ip : frame)
idx = get!(lilist_idx, key, length(lilist) + 1)
if idx > length(lilist)
push!(recursive, key)
push!(lilist, frame)
push!(n, nb)
push!(m, 0)
elseif !(key in recursive)
push!(recursive, key)
n[idx] += nb
end
if first
m[idx] += nb
first = false
end
end
end
@assert length(lilist) == length(n) == length(m) == length(lilist_idx)
return (lilist, n, m, totalbytes)
end

function flat(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat)
fmt.combine || error(ArgumentError("combine=false"))
lilist, n, m, totalbytes = parse_flat(fmt.combine ? StackFrame : UInt64, data, fmt.C)
filenamemap = Dict{Symbol,String}()
if isempty(lilist)
warning_empty()
return true
end
print_flat(io, lilist, n, m, cols, filenamemap, fmt)
Base.println(io, "Total snapshots: ", length(data))
Base.println(io, "Total bytes: ", totalbytes)
return false
end

function tree!(root::StackFrameTree{T}, all::Vector{Alloc}, C::Bool, recur::Symbol) where {T}
tops = Vector{StackFrameTree{T}}()
build = Dict{T, StackFrameTree{T}}()
for r in all
first = true
nb = r.size # or 1 for counting
root.recur = 0
root.count += nb
parent = root
for i in reverse(eachindex(r.stacktrace))
frame = r.stacktrace[i]
key = (T === UInt64 ? ip : frame)
if (recur === :flat && !frame.from_c) || recur === :flatc
# see if this frame already has a parent
this = get!(build, frame, parent)
if this !== parent
# Rewind the `parent` tree back, if this exact ip (FIXME) was already present *higher* in the current tree
push!(tops, parent)
parent = this
end
end
!C && frame.from_c && continue
this = get!(StackFrameTree{T}, parent.down, key)
if recur === :off || this.recur == 0
this.frame = frame
this.up = parent
this.count += nb
this.recur = 1
else
this.count_recur += 1
end
parent = this
end
parent.overhead += nb
if recur !== :off
# We mark all visited nodes to so we'll only count those branches
# once for each backtrace. Reset that now for the next backtrace.
empty!(build)
push!(tops, parent)
for top in tops
while top.recur != 0
top.max_recur < top.recur && (top.max_recur = top.recur)
top.recur = 0
top = top.up
end
end
empty!(tops)
end
let this = parent
while this !== root
this.flat_count += nb
this = this.up
end
end
end
function cleanup!(node::StackFrameTree)
stack = [node]
while !isempty(stack)
node = pop!(stack)
node.recur = 0
empty!(node.builder_key)
empty!(node.builder_value)
append!(stack, values(node.down))
end
nothing
end
cleanup!(root)
return root
end

function tree(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat)
fmt.combine || error(ArgumentError("combine=false"))
if fmt.combine
root = tree!(StackFrameTree{StackFrame}(), data, fmt.C, fmt.recur)
else
root = tree!(StackFrameTree{UInt64}(), data, fmt.C, fmt.recur)
end
print_tree(io, root, cols, fmt, false)
if isempty(root.down)
warning_empty()
return true
end
Base.println(io, "Total snapshots: ", length(data))
Base.println(io, "Total bytes: ", root.count)
return false
end

end
26 changes: 20 additions & 6 deletions stdlib/Profile/src/Profile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,20 @@ Profiling support, main entry point is the [`@profile`](@ref) macro.
"""
module Profile

global print
public @profile,
clear,
print,
fetch,
retrieve,
add_fake_meta,
flatten,
callers,
init,
take_heap_snapshot,
clear_malloc_data,
Allocs

import Base.StackTraces: lookup, UNKNOWN, show_spec_linfo, StackFrame

const nmeta = 4 # number of metadata fields per block (threadid, taskid, cpu_cycle_clock, thread_sleeping)
Expand Down Expand Up @@ -220,7 +234,7 @@ function print(io::IO,

pf = ProfileFormat(;C, combine, maxdepth, mincount, noisefloor, sortedby, recur)
if groupby === :none
print(io, data, lidict, pf, format, threads, tasks, false)
print_group(io, data, lidict, pf, format, threads, tasks, false)
else
if !in(groupby, [:thread, :task, [:task, :thread], [:thread, :task]])
error(ArgumentError("Unrecognized groupby option: $groupby. Options are :none (default), :task, :thread, [:task, :thread], or [:thread, :task]"))
Expand All @@ -244,7 +258,7 @@ function print(io::IO,
printstyled(io, "Task $(Base.repr(taskid))$nl"; bold=true, color=Base.debug_color())
for threadid in threadids
printstyled(io, " Thread $threadid "; bold=true, color=Base.info_color())
nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
nosamples && (any_nosamples = true)
println(io)
end
Expand All @@ -262,7 +276,7 @@ function print(io::IO,
printstyled(io, "Thread $threadid$nl"; bold=true, color=Base.info_color())
for taskid in taskids
printstyled(io, " Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
nosamples && (any_nosamples = true)
println(io)
end
Expand All @@ -274,7 +288,7 @@ function print(io::IO,
isempty(taskids) && (any_nosamples = true)
for taskid in taskids
printstyled(io, "Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
nosamples = print(io, data, lidict, pf, format, threads, taskid, true)
nosamples = print_group(io, data, lidict, pf, format, threads, taskid, true)
nosamples && (any_nosamples = true)
println(io)
end
Expand All @@ -284,7 +298,7 @@ function print(io::IO,
isempty(threadids) && (any_nosamples = true)
for threadid in threadids
printstyled(io, "Thread $threadid "; bold=true, color=Base.info_color())
nosamples = print(io, data, lidict, pf, format, threadid, tasks, true)
nosamples = print_group(io, data, lidict, pf, format, threadid, tasks, true)
nosamples && (any_nosamples = true)
println(io)
end
Expand All @@ -306,7 +320,7 @@ See `Profile.print([io], data)` for an explanation of the valid keyword argument
print(data::Vector{<:Unsigned} = fetch(), lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data); kwargs...) =
print(stdout, data, lidict; kwargs...)

function print(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
function print_group(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
format::Symbol, threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}},
is_subsection::Bool = false)
cols::Int = Base.displaysize(io)[2]
Expand Down
21 changes: 21 additions & 0 deletions stdlib/Profile/test/allocs.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
using Test
using Profile: Allocs

Allocs.clear()
let iobuf = IOBuffer()
for format in (:tree, :flat)
Test.@test_logs (:warn, r"^There were no samples collected\.") Allocs.print(iobuf; format, C=true)
end
end

@testset "alloc profiler doesn't segfault" begin
res = Allocs.@profile sample_rate=1.0 begin
# test the allocations during compilation
Expand All @@ -13,6 +20,20 @@ using Profile: Allocs
@test first_alloc.size > 0
@test length(first_alloc.stacktrace) > 0
@test length(string(first_alloc.type)) > 0

# test printing options
for options in ((format=:tree, C=true),
(format=:tree, maxdepth=2),
(format=:flat, C=true),
(),
(format=:flat, sortedby=:count),
(format=:tree, recur=:flat),
)
iobuf = IOBuffer()
Allocs.print(iobuf; options...)
str = String(take!(iobuf))
@test !isempty(str)
end
end

@testset "alloc profiler works when there are multiple tasks on multiple threads" begin
Expand Down
30 changes: 10 additions & 20 deletions stdlib/Profile/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,28 +38,18 @@ let r = Profile.retrieve()
end
end

let iobuf = IOBuffer()
Profile.print(iobuf, format=:tree, C=true)
str = String(take!(iobuf))
@test !isempty(str)
truncate(iobuf, 0)
Profile.print(iobuf, format=:tree, maxdepth=2)
str = String(take!(iobuf))
@test !isempty(str)
truncate(iobuf, 0)
Profile.print(iobuf, format=:flat, C=true)
str = String(take!(iobuf))
@test !isempty(str)
truncate(iobuf, 0)
Profile.print(iobuf)
@test !isempty(String(take!(iobuf)))
truncate(iobuf, 0)
Profile.print(iobuf, format=:flat, sortedby=:count)
@test !isempty(String(take!(iobuf)))
Profile.print(iobuf, format=:tree, recur=:flat)
# test printing options
for options in ((format=:tree, C=true),
(format=:tree, maxdepth=2),
(format=:flat, C=true),
(),
(format=:flat, sortedby=:count),
(format=:tree, recur=:flat),
)
iobuf = IOBuffer()
Profile.print(iobuf; options...)
str = String(take!(iobuf))
@test !isempty(str)
truncate(iobuf, 0)
end

@testset "Profile.print() groupby options" begin
Expand Down

0 comments on commit b6a843d

Please sign in to comment.