|
| 1 | +//===- IndexedMemProfData.h - MemProf format support ------------*- C++ -*-===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | +// |
| 9 | +// MemProf data is serialized in writeMemProf provided in this file. |
| 10 | +// |
| 11 | +//===----------------------------------------------------------------------===// |
| 12 | + |
| 13 | +#include "llvm/ProfileData/InstrProf.h" |
| 14 | +#include "llvm/ProfileData/MemProf.h" |
| 15 | +#include "llvm/Support/FormatVariadic.h" |
| 16 | +#include "llvm/Support/OnDiskHashTable.h" |
| 17 | + |
| 18 | +namespace llvm { |
| 19 | + |
| 20 | +// Serialize Schema. |
| 21 | +static void writeMemProfSchema(ProfOStream &OS, |
| 22 | + const memprof::MemProfSchema &Schema) { |
| 23 | + OS.write(static_cast<uint64_t>(Schema.size())); |
| 24 | + for (const auto Id : Schema) |
| 25 | + OS.write(static_cast<uint64_t>(Id)); |
| 26 | +} |
| 27 | + |
| 28 | +// Serialize MemProfRecordData. Return RecordTableOffset. |
| 29 | +static uint64_t writeMemProfRecords( |
| 30 | + ProfOStream &OS, |
| 31 | + llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord> |
| 32 | + &MemProfRecordData, |
| 33 | + memprof::MemProfSchema *Schema, memprof::IndexedVersion Version, |
| 34 | + llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> |
| 35 | + *MemProfCallStackIndexes = nullptr) { |
| 36 | + memprof::RecordWriterTrait RecordWriter(Schema, Version, |
| 37 | + MemProfCallStackIndexes); |
| 38 | + OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait> |
| 39 | + RecordTableGenerator; |
| 40 | + for (auto &[GUID, Record] : MemProfRecordData) { |
| 41 | + // Insert the key (func hash) and value (memprof record). |
| 42 | + RecordTableGenerator.insert(GUID, Record, RecordWriter); |
| 43 | + } |
| 44 | + // Release the memory of this MapVector as it is no longer needed. |
| 45 | + MemProfRecordData.clear(); |
| 46 | + |
| 47 | + // The call to Emit invokes RecordWriterTrait::EmitData which destructs |
| 48 | + // the memprof record copies owned by the RecordTableGenerator. This works |
| 49 | + // because the RecordTableGenerator is not used after this point. |
| 50 | + return RecordTableGenerator.Emit(OS.OS, RecordWriter); |
| 51 | +} |
| 52 | + |
| 53 | +// Serialize MemProfFrameData. Return FrameTableOffset. |
| 54 | +static uint64_t writeMemProfFrames( |
| 55 | + ProfOStream &OS, |
| 56 | + llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) { |
| 57 | + OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait> |
| 58 | + FrameTableGenerator; |
| 59 | + for (auto &[FrameId, Frame] : MemProfFrameData) { |
| 60 | + // Insert the key (frame id) and value (frame contents). |
| 61 | + FrameTableGenerator.insert(FrameId, Frame); |
| 62 | + } |
| 63 | + // Release the memory of this MapVector as it is no longer needed. |
| 64 | + MemProfFrameData.clear(); |
| 65 | + |
| 66 | + return FrameTableGenerator.Emit(OS.OS); |
| 67 | +} |
| 68 | + |
| 69 | +// Serialize MemProfFrameData. Return the mapping from FrameIds to their |
| 70 | +// indexes within the frame array. |
| 71 | +static llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> |
| 72 | +writeMemProfFrameArray( |
| 73 | + ProfOStream &OS, |
| 74 | + llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData, |
| 75 | + llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) { |
| 76 | + // Mappings from FrameIds to array indexes. |
| 77 | + llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes; |
| 78 | + |
| 79 | + // Compute the order in which we serialize Frames. The order does not matter |
| 80 | + // in terms of correctness, but we still compute it for deserialization |
| 81 | + // performance. Specifically, if we serialize frequently used Frames one |
| 82 | + // after another, we have better cache utilization. For two Frames that |
| 83 | + // appear equally frequently, we break a tie by serializing the one that tends |
| 84 | + // to appear earlier in call stacks. We implement the tie-breaking mechanism |
| 85 | + // by computing the sum of indexes within call stacks for each Frame. If we |
| 86 | + // still have a tie, then we just resort to compare two FrameIds, which is |
| 87 | + // just for stability of output. |
| 88 | + std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder; |
| 89 | + FrameIdOrder.reserve(MemProfFrameData.size()); |
| 90 | + for (const auto &[Id, Frame] : MemProfFrameData) |
| 91 | + FrameIdOrder.emplace_back(Id, &Frame); |
| 92 | + assert(MemProfFrameData.size() == FrameIdOrder.size()); |
| 93 | + llvm::sort(FrameIdOrder, |
| 94 | + [&](const std::pair<memprof::FrameId, const memprof::Frame *> &L, |
| 95 | + const std::pair<memprof::FrameId, const memprof::Frame *> &R) { |
| 96 | + const auto &SL = FrameHistogram[L.first]; |
| 97 | + const auto &SR = FrameHistogram[R.first]; |
| 98 | + // Popular FrameIds should come first. |
| 99 | + if (SL.Count != SR.Count) |
| 100 | + return SL.Count > SR.Count; |
| 101 | + // If they are equally popular, then the one that tends to appear |
| 102 | + // earlier in call stacks should come first. |
| 103 | + if (SL.PositionSum != SR.PositionSum) |
| 104 | + return SL.PositionSum < SR.PositionSum; |
| 105 | + // Compare their FrameIds for sort stability. |
| 106 | + return L.first < R.first; |
| 107 | + }); |
| 108 | + |
| 109 | + // Serialize all frames while creating mappings from linear IDs to FrameIds. |
| 110 | + uint64_t Index = 0; |
| 111 | + MemProfFrameIndexes.reserve(FrameIdOrder.size()); |
| 112 | + for (const auto &[Id, F] : FrameIdOrder) { |
| 113 | + F->serialize(OS.OS); |
| 114 | + MemProfFrameIndexes.insert({Id, Index}); |
| 115 | + ++Index; |
| 116 | + } |
| 117 | + assert(MemProfFrameData.size() == Index); |
| 118 | + assert(MemProfFrameData.size() == MemProfFrameIndexes.size()); |
| 119 | + |
| 120 | + // Release the memory of this MapVector as it is no longer needed. |
| 121 | + MemProfFrameData.clear(); |
| 122 | + |
| 123 | + return MemProfFrameIndexes; |
| 124 | +} |
| 125 | + |
| 126 | +static uint64_t writeMemProfCallStacks( |
| 127 | + ProfOStream &OS, |
| 128 | + llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>> |
| 129 | + &MemProfCallStackData) { |
| 130 | + OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait> |
| 131 | + CallStackTableGenerator; |
| 132 | + for (auto &[CSId, CallStack] : MemProfCallStackData) |
| 133 | + CallStackTableGenerator.insert(CSId, CallStack); |
| 134 | + // Release the memory of this vector as it is no longer needed. |
| 135 | + MemProfCallStackData.clear(); |
| 136 | + |
| 137 | + return CallStackTableGenerator.Emit(OS.OS); |
| 138 | +} |
| 139 | + |
| 140 | +static llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> |
| 141 | +writeMemProfCallStackArray( |
| 142 | + ProfOStream &OS, |
| 143 | + llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>> |
| 144 | + &MemProfCallStackData, |
| 145 | + llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> |
| 146 | + &MemProfFrameIndexes, |
| 147 | + llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram, |
| 148 | + unsigned &NumElements) { |
| 149 | + llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> |
| 150 | + MemProfCallStackIndexes; |
| 151 | + |
| 152 | + memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder; |
| 153 | + Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes, |
| 154 | + FrameHistogram); |
| 155 | + for (auto I : Builder.getRadixArray()) |
| 156 | + OS.write32(I); |
| 157 | + NumElements = Builder.getRadixArray().size(); |
| 158 | + MemProfCallStackIndexes = Builder.takeCallStackPos(); |
| 159 | + |
| 160 | + // Release the memory of this vector as it is no longer needed. |
| 161 | + MemProfCallStackData.clear(); |
| 162 | + |
| 163 | + return MemProfCallStackIndexes; |
| 164 | +} |
| 165 | + |
| 166 | +// Write out MemProf Version2 as follows: |
| 167 | +// uint64_t Version |
| 168 | +// uint64_t RecordTableOffset = RecordTableGenerator.Emit |
| 169 | +// uint64_t FramePayloadOffset = Offset for the frame payload |
| 170 | +// uint64_t FrameTableOffset = FrameTableGenerator.Emit |
| 171 | +// uint64_t CallStackPayloadOffset = Offset for the call stack payload (NEW V2) |
| 172 | +// uint64_t CallStackTableOffset = CallStackTableGenerator.Emit (NEW in V2) |
| 173 | +// uint64_t Num schema entries |
| 174 | +// uint64_t Schema entry 0 |
| 175 | +// uint64_t Schema entry 1 |
| 176 | +// .... |
| 177 | +// uint64_t Schema entry N - 1 |
| 178 | +// OnDiskChainedHashTable MemProfRecordData |
| 179 | +// OnDiskChainedHashTable MemProfFrameData |
| 180 | +// OnDiskChainedHashTable MemProfCallStackData (NEW in V2) |
| 181 | +static Error writeMemProfV2(ProfOStream &OS, |
| 182 | + memprof::IndexedMemProfData &MemProfData, |
| 183 | + bool MemProfFullSchema) { |
| 184 | + OS.write(memprof::Version2); |
| 185 | + uint64_t HeaderUpdatePos = OS.tell(); |
| 186 | + OS.write(0ULL); // Reserve space for the memprof record table offset. |
| 187 | + OS.write(0ULL); // Reserve space for the memprof frame payload offset. |
| 188 | + OS.write(0ULL); // Reserve space for the memprof frame table offset. |
| 189 | + OS.write(0ULL); // Reserve space for the memprof call stack payload offset. |
| 190 | + OS.write(0ULL); // Reserve space for the memprof call stack table offset. |
| 191 | + |
| 192 | + auto Schema = memprof::getHotColdSchema(); |
| 193 | + if (MemProfFullSchema) |
| 194 | + Schema = memprof::getFullSchema(); |
| 195 | + writeMemProfSchema(OS, Schema); |
| 196 | + |
| 197 | + uint64_t RecordTableOffset = |
| 198 | + writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version2); |
| 199 | + |
| 200 | + uint64_t FramePayloadOffset = OS.tell(); |
| 201 | + uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames); |
| 202 | + |
| 203 | + uint64_t CallStackPayloadOffset = OS.tell(); |
| 204 | + uint64_t CallStackTableOffset = |
| 205 | + writeMemProfCallStacks(OS, MemProfData.CallStacks); |
| 206 | + |
| 207 | + uint64_t Header[] = { |
| 208 | + RecordTableOffset, FramePayloadOffset, FrameTableOffset, |
| 209 | + CallStackPayloadOffset, CallStackTableOffset, |
| 210 | + }; |
| 211 | + OS.patch({{HeaderUpdatePos, Header}}); |
| 212 | + |
| 213 | + return Error::success(); |
| 214 | +} |
| 215 | + |
| 216 | +// Write out MemProf Version3 as follows: |
| 217 | +// uint64_t Version |
| 218 | +// uint64_t CallStackPayloadOffset = Offset for the call stack payload |
| 219 | +// uint64_t RecordPayloadOffset = Offset for the record payload |
| 220 | +// uint64_t RecordTableOffset = RecordTableGenerator.Emit |
| 221 | +// uint64_t Num schema entries |
| 222 | +// uint64_t Schema entry 0 |
| 223 | +// uint64_t Schema entry 1 |
| 224 | +// .... |
| 225 | +// uint64_t Schema entry N - 1 |
| 226 | +// Frames serialized one after another |
| 227 | +// Call stacks encoded as a radix tree |
| 228 | +// OnDiskChainedHashTable MemProfRecordData |
| 229 | +static Error writeMemProfV3(ProfOStream &OS, |
| 230 | + memprof::IndexedMemProfData &MemProfData, |
| 231 | + bool MemProfFullSchema) { |
| 232 | + OS.write(memprof::Version3); |
| 233 | + uint64_t HeaderUpdatePos = OS.tell(); |
| 234 | + OS.write(0ULL); // Reserve space for the memprof call stack payload offset. |
| 235 | + OS.write(0ULL); // Reserve space for the memprof record payload offset. |
| 236 | + OS.write(0ULL); // Reserve space for the memprof record table offset. |
| 237 | + |
| 238 | + auto Schema = memprof::getHotColdSchema(); |
| 239 | + if (MemProfFullSchema) |
| 240 | + Schema = memprof::getFullSchema(); |
| 241 | + writeMemProfSchema(OS, Schema); |
| 242 | + |
| 243 | + llvm::DenseMap<memprof::FrameId, memprof::FrameStat> FrameHistogram = |
| 244 | + memprof::computeFrameHistogram(MemProfData.CallStacks); |
| 245 | + assert(MemProfData.Frames.size() == FrameHistogram.size()); |
| 246 | + |
| 247 | + llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes = |
| 248 | + writeMemProfFrameArray(OS, MemProfData.Frames, FrameHistogram); |
| 249 | + |
| 250 | + uint64_t CallStackPayloadOffset = OS.tell(); |
| 251 | + // The number of elements in the call stack array. |
| 252 | + unsigned NumElements = 0; |
| 253 | + llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> |
| 254 | + MemProfCallStackIndexes = |
| 255 | + writeMemProfCallStackArray(OS, MemProfData.CallStacks, |
| 256 | + MemProfFrameIndexes, FrameHistogram, |
| 257 | + NumElements); |
| 258 | + |
| 259 | + uint64_t RecordPayloadOffset = OS.tell(); |
| 260 | + uint64_t RecordTableOffset = |
| 261 | + writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version3, |
| 262 | + &MemProfCallStackIndexes); |
| 263 | + |
| 264 | + // IndexedMemProfReader::deserializeV3 computes the number of elements in the |
| 265 | + // call stack array from the difference between CallStackPayloadOffset and |
| 266 | + // RecordPayloadOffset. Verify that the computation works. |
| 267 | + assert(CallStackPayloadOffset + |
| 268 | + NumElements * sizeof(memprof::LinearFrameId) == |
| 269 | + RecordPayloadOffset); |
| 270 | + |
| 271 | + uint64_t Header[] = { |
| 272 | + CallStackPayloadOffset, |
| 273 | + RecordPayloadOffset, |
| 274 | + RecordTableOffset, |
| 275 | + }; |
| 276 | + OS.patch({{HeaderUpdatePos, Header}}); |
| 277 | + |
| 278 | + return Error::success(); |
| 279 | +} |
| 280 | + |
| 281 | +// Write out the MemProf data in a requested version. |
| 282 | +Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, |
| 283 | + memprof::IndexedVersion MemProfVersionRequested, |
| 284 | + bool MemProfFullSchema) { |
| 285 | + switch (MemProfVersionRequested) { |
| 286 | + case memprof::Version2: |
| 287 | + return writeMemProfV2(OS, MemProfData, MemProfFullSchema); |
| 288 | + case memprof::Version3: |
| 289 | + return writeMemProfV3(OS, MemProfData, MemProfFullSchema); |
| 290 | + } |
| 291 | + |
| 292 | + return make_error<InstrProfError>( |
| 293 | + instrprof_error::unsupported_version, |
| 294 | + formatv("MemProf version {} not supported; " |
| 295 | + "requires version between {} and {}, inclusive", |
| 296 | + MemProfVersionRequested, memprof::MinimumSupportedVersion, |
| 297 | + memprof::MaximumSupportedVersion)); |
| 298 | +} |
| 299 | + |
| 300 | +} // namespace llvm |
0 commit comments