Skip to content

Commit c16472b

Browse files
Change to streaming out the heap snapshot data (#52854)
This PR is to continue the work on the following PR: Prevent OOMs during heap snapshot: Change to streaming out the snapshot data (#51518 ) Here are the commit history: ``` * Streaming the heap snapshot! This should prevent the engine from OOMing while recording the snapshot! Now we just need to sample the files, either online, before downloading, or offline after downloading :) If we're gonna do it offline, we'll want to gzip the files before downloading them. * Allow custom filename; use original API * Support legacy heap snapshot interface. Add reassembly function. * Add tests * Apply suggestions from code review * Update src/gc-heap-snapshot.cpp * Change to always save the parts in the same directory This way you can always recover from an OOM * Fix bug in reassembler: from_node and to_node were in the wrong order * Fix correctness mistake: The edges have to be reordered according to the node order. That's the whole reason this is tricky. But i'm not sure now whether the SoAs approach is actually an optimization.... It seems like we should probably prefer to inline the Edges right into the vector, rather than having to do another random lookup into the edges table? * Debugging messed up edge array idxs * Disable log message * Write the .nodes and .edges as binary data * Remove unnecessary logging * fix merge issues * attempt to add back the orphan node checking logic ``` --------- Co-authored-by: Nathan Daly <nathan.daly@relational.ai> Co-authored-by: Nathan Daly <NHDaly@gmail.com>
1 parent 8b88e3b commit c16472b

File tree

7 files changed

+571
-203
lines changed

7 files changed

+571
-203
lines changed

src/gc-heap-snapshot.cpp

+204-193
Large diffs are not rendered by default.

src/gc-heap-snapshot.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,8 @@ static inline void gc_heap_snapshot_record_finlist(jl_value_t *finlist, size_t i
120120
// ---------------------------------------------------------------------
121121
// Functions to call from Julia to take heap snapshot
122122
// ---------------------------------------------------------------------
123-
JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one);
123+
JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
124+
ios_t *strings, ios_t *json, char all_one);
124125

125126

126127
#ifdef __cplusplus

stdlib/Profile/docs/src/index.md

+19
Original file line numberDiff line numberDiff line change
@@ -134,5 +134,24 @@ Traces and records julia objects on the heap. This only records objects known to
134134
garbage collector. Memory allocated by external libraries not managed by the garbage
135135
collector will not show up in the snapshot.
136136

137+
To avoid OOMing while recording the snapshot, we added a streaming option to stream out the heap snapshot
138+
into four files,
139+
140+
```julia-repl
141+
julia> using Profile
142+
143+
julia> Profile.take_heap_snapshot("snapshot"; streaming=true)
144+
```
145+
146+
where "snapshot" is the filepath as the prefix for the generated files.
147+
148+
Once the snapshot files are generated, they could be assembled offline with the following command:
149+
150+
```julia-repl
151+
julia> using Profile
152+
153+
julia> Profile.HeapSnapshot.assemble_snapshot("snapshot", "snapshot.heapsnapshot")
154+
```
155+
137156
The resulting heap snapshot file can be uploaded to chrome devtools to be viewed.
138157
For more information, see the [chrome devtools docs](https://developer.chrome.com/docs/devtools/memory-problems/heap-snapshots/#view_snapshots).

stdlib/Profile/src/Profile.jl

+60-9
Original file line numberDiff line numberDiff line change
@@ -1245,9 +1245,8 @@ end
12451245

12461246

12471247
"""
1248-
Profile.take_heap_snapshot(io::IOStream, all_one::Bool=false)
1249-
Profile.take_heap_snapshot(filepath::String, all_one::Bool=false)
1250-
Profile.take_heap_snapshot(all_one::Bool=false; dir::String)
1248+
Profile.take_heap_snapshot(filepath::String, all_one::Bool=false, streaming=false)
1249+
Profile.take_heap_snapshot(all_one::Bool=false; dir::String, streaming=false)
12511250
12521251
Write a snapshot of the heap, in the JSON format expected by the Chrome
12531252
Devtools Heap Snapshot viewer (.heapsnapshot extension) to a file
@@ -1257,16 +1256,67 @@ full file path, or IO stream.
12571256
12581257
If `all_one` is true, then report the size of every object as one so they can be easily
12591258
counted. Otherwise, report the actual size.
1259+
1260+
If `streaming` is true, we will stream the snapshot data out into four files, using filepath
1261+
as the prefix, to avoid having to hold the entire snapshot in memory. This option should be
1262+
used for any setting where your memory is constrained. These files can then be reassembled
1263+
by calling Profile.HeapSnapshot.assemble_snapshot(), which can
1264+
be done offline.
1265+
1266+
NOTE: We strongly recommend setting streaming=true for performance reasons. Reconstructing
1267+
the snapshot from the parts requires holding the entire snapshot in memory, so if the
1268+
snapshot is large, you can run out of memory while processing it. Streaming allows you to
1269+
reconstruct the snapshot offline, after your workload is done running.
1270+
If you do attempt to collect a snapshot with streaming=false (the default, for
1271+
backwards-compatibility) and your process is killed, note that this will always save the
1272+
parts in the same directory as your provided filepath, so you can still reconstruct the
1273+
snapshot after the fact, via `assemble_snapshot()`.
12601274
"""
1261-
function take_heap_snapshot(io::IOStream, all_one::Bool=false)
1262-
Base.@_lock_ios(io, ccall(:jl_gc_take_heap_snapshot, Cvoid, (Ptr{Cvoid}, Cchar), io.handle, Cchar(all_one)))
1263-
end
1264-
function take_heap_snapshot(filepath::String, all_one::Bool=false)
1265-
open(filepath, "w") do io
1266-
take_heap_snapshot(io, all_one)
1275+
function take_heap_snapshot(filepath::AbstractString, all_one::Bool=false; streaming::Bool=false)
1276+
if streaming
1277+
_stream_heap_snapshot(filepath, all_one)
1278+
else
1279+
# Support the legacy, non-streaming mode, by first streaming the parts, then
1280+
# reassembling it after we're done.
1281+
prefix = filepath
1282+
_stream_heap_snapshot(prefix, all_one)
1283+
Profile.HeapSnapshot.assemble_snapshot(prefix, filepath)
12671284
end
12681285
return filepath
12691286
end
1287+
function take_heap_snapshot(io::IO, all_one::Bool=false)
1288+
# Support the legacy, non-streaming mode, by first streaming the parts to a tempdir,
1289+
# then reassembling it after we're done.
1290+
dir = tempdir()
1291+
prefix = joinpath(dir, "snapshot")
1292+
_stream_heap_snapshot(prefix, all_one)
1293+
Profile.HeapSnapshot.assemble_snapshot(prefix, io)
1294+
end
1295+
function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool)
1296+
# Nodes and edges are binary files
1297+
open("$prefix.nodes", "w") do nodes
1298+
open("$prefix.edges", "w") do edges
1299+
open("$prefix.strings", "w") do strings
1300+
# The following file is json data
1301+
open("$prefix.metadata.json", "w") do json
1302+
Base.@_lock_ios(nodes,
1303+
Base.@_lock_ios(edges,
1304+
Base.@_lock_ios(strings,
1305+
Base.@_lock_ios(json,
1306+
ccall(:jl_gc_take_heap_snapshot,
1307+
Cvoid,
1308+
(Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}, Cchar),
1309+
nodes.handle, edges.handle, strings.handle, json.handle,
1310+
Cchar(all_one))
1311+
)
1312+
)
1313+
)
1314+
)
1315+
end
1316+
end
1317+
end
1318+
end
1319+
end
12701320
function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing) where {S <: AbstractString}
12711321
fname = "$(getpid())_$(time_ns()).heapsnapshot"
12721322
if isnothing(dir)
@@ -1302,6 +1352,7 @@ function take_page_profile(filepath::String)
13021352
end
13031353

13041354
include("Allocs.jl")
1355+
include("heapsnapshot_reassemble.jl")
13051356
include("precompile.jl")
13061357

13071358
end # module
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
# This file is a part of Julia. License is MIT: https://julialang.org/license
2+
3+
module HeapSnapshot
4+
5+
"""
6+
assemble_snapshot(filepath::AbstractString, out_file::AbstractString)
7+
8+
Assemble a .heapsnapshot file from the .json files produced by `Profile.take_snapshot`.
9+
"""
10+
11+
# SoA layout to reduce padding
12+
struct Edges
13+
type::Vector{Int8} # index into `snapshot.meta.edge_types`
14+
name_or_index::Vector{UInt} # Either an index into `snapshot.strings`, or the index in an array, depending on edge_type
15+
to_pos::Vector{UInt} # index into `snapshot.nodes`
16+
end
17+
function Edges(n::Int)
18+
Edges(
19+
Vector{Int8}(undef, n),
20+
Vector{UInt}(undef, n),
21+
Vector{UInt}(undef, n),
22+
)
23+
end
24+
Base.length(n::Edges) = length(n.type)
25+
26+
# trace_node_id and detachedness are always 0 in the snapshots Julia produces so we don't store them
27+
struct Nodes
28+
type::Vector{Int8} # index into `snapshot.meta.node_types`
29+
name_idx::Vector{UInt32} # index into `snapshot.strings`
30+
id::Vector{UInt} # unique id, in julia it is the address of the object
31+
self_size::Vector{Int} # size of the object itself, not including the size of its fields
32+
edge_count::Vector{UInt} # number of outgoing edges
33+
edges::Edges # outgoing edges
34+
# This is the main complexity of the .heapsnapshot format, and it's the reason we need
35+
# to read in all the data before writing it out. The edges vector contains all edges,
36+
# but organized by which node they came from. First, it contains all the edges coming
37+
# out of node 0, then all edges leaving node 1, etc. So we need to have visited all
38+
# edges, and assigned them to their corresponding nodes, before we can emit the file.
39+
edge_idxs::Vector{Vector{UInt}} # indexes into edges, keeping per-node outgoing edge ids
40+
end
41+
function Nodes(n::Int, e::Int)
42+
Nodes(
43+
Vector{Int8}(undef, n),
44+
Vector{UInt32}(undef, n),
45+
Vector{UInt}(undef, n),
46+
Vector{Int}(undef, n),
47+
Vector{UInt32}(undef, n),
48+
Edges(e),
49+
[Vector{UInt}() for _ in 1:n], # Take care to construct n separate empty vectors
50+
)
51+
end
52+
Base.length(n::Nodes) = length(n.type)
53+
54+
const k_node_number_of_fields = 7
55+
56+
# Like Base.dec, but doesn't allocate a string and writes directly to the io object
57+
# We know all of the numbers we're about to write fit into a UInt and are non-negative
58+
let _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99]
59+
global _write_decimal_number
60+
_write_decimal_number(io, x::Integer, buf) = _write_decimal_number(io, unsigned(x), buf)
61+
function _write_decimal_number(io, x::Unsigned, digits_buf)
62+
buf = digits_buf
63+
n = ndigits(x)
64+
i = n
65+
@inbounds while i >= 2
66+
d, r = divrem(x, 0x64)
67+
d100 = _dec_d100[(r % Int)::Int + 1]
68+
buf[i-1] = d100 % UInt8
69+
buf[i] = (d100 >> 0x8) % UInt8
70+
x = oftype(x, d)
71+
i -= 2
72+
end
73+
if i > 0
74+
@inbounds buf[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8
75+
end
76+
write(io, @view buf[max(i, 1):n])
77+
end
78+
end
79+
80+
function assemble_snapshot(in_prefix, out_file::AbstractString = in_prefix)
81+
open(out_file, "w") do io
82+
assemble_snapshot(in_prefix, io)
83+
end
84+
end
85+
86+
# Manually parse and write the .json files, given that we don't have JSON import/export in
87+
# julia's stdlibs.
88+
function assemble_snapshot(in_prefix, io::IO)
89+
preamble = read(string(in_prefix, ".metadata.json"), String)
90+
pos = last(findfirst("node_count\":", preamble)) + 1
91+
endpos = findnext(==(','), preamble, pos) - 1
92+
node_count = parse(Int, String(@view preamble[pos:endpos]))
93+
94+
pos = last(findnext("edge_count\":", preamble, endpos)) + 1
95+
endpos = findnext(==('}'), preamble, pos) - 1
96+
edge_count = parse(Int, String(@view preamble[pos:endpos]))
97+
98+
nodes = Nodes(node_count, edge_count)
99+
100+
orphans = Set{UInt}() # nodes that have no incoming edges
101+
# Parse nodes with empty edge counts that we need to fill later
102+
nodes_file = open(string(in_prefix, ".nodes"), "r")
103+
for i in 1:length(nodes)
104+
node_type = read(nodes_file, Int8)
105+
node_name_idx = read(nodes_file, UInt)
106+
id = read(nodes_file, UInt)
107+
self_size = read(nodes_file, Int)
108+
@assert read(nodes_file, Int) == 0 # trace_node_id
109+
@assert read(nodes_file, Int8) == 0 # detachedness
110+
111+
nodes.type[i] = node_type
112+
nodes.name_idx[i] = node_name_idx
113+
nodes.id[i] = id
114+
nodes.self_size[i] = self_size
115+
nodes.edge_count[i] = 0 # edge_count
116+
# populate the orphans set with node index
117+
push!(orphans, i-1)
118+
end
119+
120+
# Parse the edges to fill in the edge counts for nodes and correct the to_node offsets
121+
edges_file = open(string(in_prefix, ".edges"), "r")
122+
for i in 1:length(nodes.edges)
123+
edge_type = read(edges_file, Int8)
124+
edge_name_or_index = read(edges_file, UInt)
125+
from_node = read(edges_file, UInt)
126+
to_node = read(edges_file, UInt)
127+
128+
nodes.edges.type[i] = edge_type
129+
nodes.edges.name_or_index[i] = edge_name_or_index
130+
nodes.edges.to_pos[i] = to_node * k_node_number_of_fields # 7 fields per node, the streaming format doesn't multiply the offset by 7
131+
nodes.edge_count[from_node + 1] += UInt32(1) # C and JSON use 0-based indexing
132+
push!(nodes.edge_idxs[from_node + 1], i) # Index into nodes.edges
133+
# remove the node from the orphans if it has at least one incoming edge
134+
if to_node in orphans
135+
delete!(orphans, to_node)
136+
end
137+
end
138+
139+
_digits_buf = zeros(UInt8, ndigits(typemax(UInt)))
140+
println(io, @view(preamble[1:end-2]), ",") # remove trailing "}\n", we don't end the snapshot here
141+
println(io, "\"nodes\":[")
142+
for i in 1:length(nodes)
143+
i > 1 && println(io, ",")
144+
_write_decimal_number(io, nodes.type[i], _digits_buf)
145+
print(io, ",")
146+
_write_decimal_number(io, nodes.name_idx[i], _digits_buf)
147+
print(io, ",")
148+
_write_decimal_number(io, nodes.id[i], _digits_buf)
149+
print(io, ",")
150+
_write_decimal_number(io, nodes.self_size[i], _digits_buf)
151+
print(io, ",")
152+
_write_decimal_number(io, nodes.edge_count[i], _digits_buf)
153+
print(io, ",0,0")
154+
end
155+
print(io, "],\"edges\":[")
156+
e = 1
157+
for n in 1:length(nodes)
158+
count = nodes.edge_count[n]
159+
len_edges = length(nodes.edge_idxs[n])
160+
@assert count == len_edges "For node $n: $count != $len_edges"
161+
for i in nodes.edge_idxs[n]
162+
e > 1 && print(io, ",")
163+
println(io)
164+
_write_decimal_number(io, nodes.edges.type[i], _digits_buf)
165+
print(io, ",")
166+
_write_decimal_number(io, nodes.edges.name_or_index[i], _digits_buf)
167+
print(io, ",")
168+
_write_decimal_number(io, nodes.edges.to_pos[i], _digits_buf)
169+
if !(nodes.edges.to_pos[i] % k_node_number_of_fields == 0)
170+
@warn "Bug in to_pos for edge $i from node $n: $(nodes.edges.to_pos[i])"
171+
end
172+
e += 1
173+
end
174+
end
175+
println(io, "],")
176+
177+
println(io, "\"strings\":[")
178+
open(string(in_prefix, ".strings"), "r") do strings_io
179+
first = true
180+
while !eof(strings_io)
181+
str_size = read(strings_io, UInt)
182+
str_bytes = read(strings_io, str_size)
183+
str = String(str_bytes)
184+
if first
185+
print_str_escape_json(io, str)
186+
first = false
187+
else
188+
print(io, ",\n")
189+
print_str_escape_json(io, str)
190+
end
191+
end
192+
end
193+
print(io, "]}")
194+
195+
# remove the uber node from the orphans
196+
if 0 in orphans
197+
delete!(orphans, 0)
198+
end
199+
200+
@assert isempty(orphans) "Orphaned nodes: $(orphans), node count: $(length(nodes)), orphan node count: $(length(orphans))"
201+
202+
return nothing
203+
end
204+
205+
function print_str_escape_json(stream::IO, s::AbstractString)
206+
print(stream, '"')
207+
for c in s
208+
if c == '"'
209+
print(stream, "\\\"")
210+
elseif c == '\\'
211+
print(stream, "\\\\")
212+
elseif c == '\b'
213+
print(stream, "\\b")
214+
elseif c == '\f'
215+
print(stream, "\\f")
216+
elseif c == '\n'
217+
print(stream, "\\n")
218+
elseif c == '\r'
219+
print(stream, "\\r")
220+
elseif c == '\t'
221+
print(stream, "\\t")
222+
elseif '\x00' <= c <= '\x1f'
223+
print(stream, "\\u", lpad(string(UInt16(c), base=16), 4, '0'))
224+
else
225+
print(stream, c)
226+
end
227+
end
228+
print(stream, '"')
229+
end
230+
231+
end

0 commit comments

Comments
 (0)