| 
 | 1 | +# This file is a part of Julia. License is MIT: https://julialang.org/license  | 
 | 2 | + | 
 | 3 | +module HeapSnapshot  | 
 | 4 | + | 
 | 5 | +"""  | 
 | 6 | +    assemble_snapshot(filepath::AbstractString, out_file::AbstractString)  | 
 | 7 | +
  | 
 | 8 | +Assemble a .heapsnapshot file from the .json files produced by `Profile.take_snapshot`.  | 
 | 9 | +"""  | 
 | 10 | + | 
 | 11 | +# SoA layout to reduce padding  | 
 | 12 | +struct Edges  | 
 | 13 | +    type::Vector{Int8}       # index into `snapshot.meta.edge_types`  | 
 | 14 | +    name_or_index::Vector{UInt} # Either an index into `snapshot.strings`, or the index in an array, depending on edge_type  | 
 | 15 | +    to_pos::Vector{UInt}   # index into `snapshot.nodes`  | 
 | 16 | +end  | 
 | 17 | +function Edges(n::Int)  | 
 | 18 | +    Edges(  | 
 | 19 | +        Vector{Int8}(undef, n),  | 
 | 20 | +        Vector{UInt}(undef, n),  | 
 | 21 | +        Vector{UInt}(undef, n),  | 
 | 22 | +    )  | 
 | 23 | +end  | 
 | 24 | +Base.length(n::Edges) = length(n.type)  | 
 | 25 | + | 
 | 26 | +# trace_node_id and detachedness are always 0 in the snapshots Julia produces so we don't store them  | 
 | 27 | +struct Nodes  | 
 | 28 | +    type::Vector{Int8}         # index into `snapshot.meta.node_types`  | 
 | 29 | +    name_idx::Vector{UInt32} # index into `snapshot.strings`  | 
 | 30 | +    id::Vector{UInt}           # unique id, in julia it is the address of the object  | 
 | 31 | +    self_size::Vector{Int}     # size of the object itself, not including the size of its fields  | 
 | 32 | +    edge_count::Vector{UInt} # number of outgoing edges  | 
 | 33 | +    edges::Edges               # outgoing edges  | 
 | 34 | +    # This is the main complexity of the .heapsnapshot format, and it's the reason we need  | 
 | 35 | +    # to read in all the data before writing it out. The edges vector contains all edges,  | 
 | 36 | +    # but organized by which node they came from. First, it contains all the edges coming  | 
 | 37 | +    # out of node 0, then all edges leaving node 1, etc. So we need to have visited all  | 
 | 38 | +    # edges, and assigned them to their corresponding nodes, before we can emit the file.  | 
 | 39 | +    edge_idxs::Vector{Vector{UInt}} # indexes into edges, keeping per-node outgoing edge ids  | 
 | 40 | +end  | 
 | 41 | +function Nodes(n::Int, e::Int)  | 
 | 42 | +    Nodes(  | 
 | 43 | +        Vector{Int8}(undef, n),  | 
 | 44 | +        Vector{UInt32}(undef, n),  | 
 | 45 | +        Vector{UInt}(undef, n),  | 
 | 46 | +        Vector{Int}(undef, n),  | 
 | 47 | +        Vector{UInt32}(undef, n),  | 
 | 48 | +        Edges(e),  | 
 | 49 | +        [Vector{UInt}() for _ in 1:n],  # Take care to construct n separate empty vectors  | 
 | 50 | +    )  | 
 | 51 | +end  | 
 | 52 | +Base.length(n::Nodes) = length(n.type)  | 
 | 53 | + | 
 | 54 | +const k_node_number_of_fields = 7  | 
 | 55 | + | 
 | 56 | +# Like Base.dec, but doesn't allocate a string and writes directly to the io object  | 
 | 57 | +# We know all of the numbers we're about to write fit into a UInt and are non-negative  | 
 | 58 | +let _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99]  | 
 | 59 | +    global _write_decimal_number  | 
 | 60 | +    _write_decimal_number(io, x::Integer, buf) = _write_decimal_number(io, unsigned(x), buf)  | 
 | 61 | +    function _write_decimal_number(io, x::Unsigned, digits_buf)  | 
 | 62 | +        buf = digits_buf  | 
 | 63 | +        n = ndigits(x)  | 
 | 64 | +        i = n  | 
 | 65 | +        @inbounds while i >= 2  | 
 | 66 | +            d, r = divrem(x, 0x64)  | 
 | 67 | +            d100 = _dec_d100[(r % Int)::Int + 1]  | 
 | 68 | +            buf[i-1] = d100 % UInt8  | 
 | 69 | +            buf[i] = (d100 >> 0x8) % UInt8  | 
 | 70 | +            x = oftype(x, d)  | 
 | 71 | +            i -= 2  | 
 | 72 | +        end  | 
 | 73 | +        if i > 0  | 
 | 74 | +            @inbounds buf[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8  | 
 | 75 | +        end  | 
 | 76 | +        write(io, @view buf[max(i, 1):n])  | 
 | 77 | +    end  | 
 | 78 | +end  | 
 | 79 | + | 
 | 80 | +function assemble_snapshot(in_prefix, out_file::AbstractString = in_prefix)  | 
 | 81 | +    open(out_file, "w") do io  | 
 | 82 | +        assemble_snapshot(in_prefix, io)  | 
 | 83 | +    end  | 
 | 84 | +end  | 
 | 85 | + | 
 | 86 | +# Manually parse and write the .json files, given that we don't have JSON import/export in  | 
 | 87 | +# julia's stdlibs.  | 
 | 88 | +function assemble_snapshot(in_prefix, io::IO)  | 
 | 89 | +    preamble = read(string(in_prefix, ".metadata.json"), String)  | 
 | 90 | +    pos = last(findfirst("node_count\":", preamble)) + 1  | 
 | 91 | +    endpos = findnext(==(','), preamble, pos) - 1  | 
 | 92 | +    node_count = parse(Int, String(@view preamble[pos:endpos]))  | 
 | 93 | + | 
 | 94 | +    pos = last(findnext("edge_count\":", preamble, endpos)) + 1  | 
 | 95 | +    endpos = findnext(==('}'), preamble, pos) - 1  | 
 | 96 | +    edge_count = parse(Int, String(@view preamble[pos:endpos]))  | 
 | 97 | + | 
 | 98 | +    nodes = Nodes(node_count, edge_count)  | 
 | 99 | + | 
 | 100 | +    orphans = Set{UInt}() # nodes that have no incoming edges  | 
 | 101 | +    # Parse nodes with empty edge counts that we need to fill later  | 
 | 102 | +    nodes_file = open(string(in_prefix, ".nodes"), "r")  | 
 | 103 | +    for i in 1:length(nodes)  | 
 | 104 | +        node_type = read(nodes_file, Int8)  | 
 | 105 | +        node_name_idx = read(nodes_file, UInt)  | 
 | 106 | +        id = read(nodes_file, UInt)  | 
 | 107 | +        self_size = read(nodes_file, Int)  | 
 | 108 | +        @assert read(nodes_file, Int) == 0 # trace_node_id  | 
 | 109 | +        @assert read(nodes_file, Int8) == 0 # detachedness  | 
 | 110 | + | 
 | 111 | +        nodes.type[i] = node_type  | 
 | 112 | +        nodes.name_idx[i] = node_name_idx  | 
 | 113 | +        nodes.id[i] = id  | 
 | 114 | +        nodes.self_size[i] = self_size  | 
 | 115 | +        nodes.edge_count[i] = 0 # edge_count  | 
 | 116 | +        # populate the orphans set with node index  | 
 | 117 | +        push!(orphans, i-1)  | 
 | 118 | +    end  | 
 | 119 | + | 
 | 120 | +    # Parse the edges to fill in the edge counts for nodes and correct the to_node offsets  | 
 | 121 | +    edges_file = open(string(in_prefix, ".edges"), "r")  | 
 | 122 | +    for i in 1:length(nodes.edges)  | 
 | 123 | +        edge_type = read(edges_file, Int8)  | 
 | 124 | +        edge_name_or_index = read(edges_file, UInt)  | 
 | 125 | +        from_node = read(edges_file, UInt)  | 
 | 126 | +        to_node = read(edges_file, UInt)  | 
 | 127 | + | 
 | 128 | +        nodes.edges.type[i] = edge_type  | 
 | 129 | +        nodes.edges.name_or_index[i] = edge_name_or_index  | 
 | 130 | +        nodes.edges.to_pos[i] = to_node * k_node_number_of_fields # 7 fields per node, the streaming format doesn't multiply the offset by 7  | 
 | 131 | +        nodes.edge_count[from_node + 1] += UInt32(1)  # C and JSON use 0-based indexing  | 
 | 132 | +        push!(nodes.edge_idxs[from_node + 1], i) # Index into nodes.edges  | 
 | 133 | +        # remove the node from the orphans if it has at least one incoming edge  | 
 | 134 | +        if to_node in orphans  | 
 | 135 | +            delete!(orphans, to_node)  | 
 | 136 | +        end  | 
 | 137 | +    end  | 
 | 138 | + | 
 | 139 | +    _digits_buf = zeros(UInt8, ndigits(typemax(UInt)))  | 
 | 140 | +    println(io, @view(preamble[1:end-2]), ",") # remove trailing "}\n", we don't end the snapshot here  | 
 | 141 | +    println(io, "\"nodes\":[")  | 
 | 142 | +    for i in 1:length(nodes)  | 
 | 143 | +        i > 1 && println(io, ",")  | 
 | 144 | +        _write_decimal_number(io, nodes.type[i], _digits_buf)  | 
 | 145 | +        print(io, ",")  | 
 | 146 | +        _write_decimal_number(io, nodes.name_idx[i], _digits_buf)  | 
 | 147 | +        print(io, ",")  | 
 | 148 | +        _write_decimal_number(io, nodes.id[i], _digits_buf)  | 
 | 149 | +        print(io, ",")  | 
 | 150 | +        _write_decimal_number(io, nodes.self_size[i], _digits_buf)  | 
 | 151 | +        print(io, ",")  | 
 | 152 | +        _write_decimal_number(io, nodes.edge_count[i], _digits_buf)  | 
 | 153 | +        print(io, ",0,0")  | 
 | 154 | +    end  | 
 | 155 | +    print(io, "],\"edges\":[")  | 
 | 156 | +    e = 1  | 
 | 157 | +    for n in 1:length(nodes)  | 
 | 158 | +        count = nodes.edge_count[n]  | 
 | 159 | +        len_edges = length(nodes.edge_idxs[n])  | 
 | 160 | +        @assert count == len_edges "For node $n: $count != $len_edges"  | 
 | 161 | +        for i in nodes.edge_idxs[n]  | 
 | 162 | +            e > 1 && print(io, ",")  | 
 | 163 | +            println(io)  | 
 | 164 | +            _write_decimal_number(io, nodes.edges.type[i], _digits_buf)  | 
 | 165 | +            print(io, ",")  | 
 | 166 | +            _write_decimal_number(io, nodes.edges.name_or_index[i], _digits_buf)  | 
 | 167 | +            print(io, ",")  | 
 | 168 | +            _write_decimal_number(io, nodes.edges.to_pos[i], _digits_buf)  | 
 | 169 | +            if !(nodes.edges.to_pos[i] % k_node_number_of_fields == 0)  | 
 | 170 | +                @warn "Bug in to_pos for edge $i from node $n: $(nodes.edges.to_pos[i])"  | 
 | 171 | +            end  | 
 | 172 | +            e += 1  | 
 | 173 | +        end  | 
 | 174 | +    end  | 
 | 175 | +    println(io, "],")  | 
 | 176 | + | 
 | 177 | +    println(io, "\"strings\":[")  | 
 | 178 | +    open(string(in_prefix, ".strings"), "r") do strings_io  | 
 | 179 | +        first = true  | 
 | 180 | +        while !eof(strings_io)  | 
 | 181 | +            str_size = read(strings_io, UInt)  | 
 | 182 | +            str_bytes = read(strings_io, str_size)  | 
 | 183 | +            str = String(str_bytes)  | 
 | 184 | +            if first  | 
 | 185 | +                print_str_escape_json(io, str)  | 
 | 186 | +                first = false  | 
 | 187 | +            else  | 
 | 188 | +                print(io, ",\n")  | 
 | 189 | +                print_str_escape_json(io, str)  | 
 | 190 | +            end  | 
 | 191 | +        end  | 
 | 192 | +    end  | 
 | 193 | +    print(io, "]}")  | 
 | 194 | + | 
 | 195 | +    # remove the uber node from the orphans  | 
 | 196 | +    if 0 in orphans  | 
 | 197 | +        delete!(orphans, 0)  | 
 | 198 | +    end  | 
 | 199 | + | 
 | 200 | +    @assert isempty(orphans) "Orphaned nodes: $(orphans), node count: $(length(nodes)), orphan node count: $(length(orphans))"  | 
 | 201 | + | 
 | 202 | +    return nothing  | 
 | 203 | +end  | 
 | 204 | + | 
 | 205 | +function print_str_escape_json(stream::IO, s::AbstractString)  | 
 | 206 | +    print(stream, '"')  | 
 | 207 | +    for c in s  | 
 | 208 | +        if c == '"'  | 
 | 209 | +            print(stream, "\\\"")  | 
 | 210 | +        elseif c == '\\'  | 
 | 211 | +            print(stream, "\\\\")  | 
 | 212 | +        elseif c == '\b'  | 
 | 213 | +            print(stream, "\\b")  | 
 | 214 | +        elseif c == '\f'  | 
 | 215 | +            print(stream, "\\f")  | 
 | 216 | +        elseif c == '\n'  | 
 | 217 | +            print(stream, "\\n")  | 
 | 218 | +        elseif c == '\r'  | 
 | 219 | +            print(stream, "\\r")  | 
 | 220 | +        elseif c == '\t'  | 
 | 221 | +            print(stream, "\\t")  | 
 | 222 | +        elseif '\x00' <= c <= '\x1f'  | 
 | 223 | +            print(stream, "\\u", lpad(string(UInt16(c), base=16), 4, '0'))  | 
 | 224 | +        else  | 
 | 225 | +            print(stream, c)  | 
 | 226 | +        end  | 
 | 227 | +    end  | 
 | 228 | +    print(stream, '"')  | 
 | 229 | +end  | 
 | 230 | + | 
 | 231 | +end  | 
0 commit comments