Skip to content

Commit 9a8f90d

Browse files
[memprof] Move writeMemProf to a separate file (#137051)
This patch moves writeMemProf and its subroutines to a separate file. The intent is as follows: - Reduce the size of InstrProfWriter.cpp. - Move the subroutines to a separate file because they don't interact with anything else in InstrProfWriter.cpp. Remarks: - The new file is named IndexedMemProfData.cpp without "Writer" in the name so that we can move the reader code to this file in the future. - This patch just moves code without changing the function signatures for now. It might make sense to implement a class encompassing "serialize" and "deserialize" methods for IndexedMemProfData, but that's left to subsequent patches.
1 parent b6f32ad commit 9a8f90d

File tree

4 files changed

+325
-282
lines changed

4 files changed

+325
-282
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//===- IndexedMemProfData.h - MemProf format support ------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// MemProf data is serialized in writeMemProf provided in this header file.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "llvm/ProfileData/InstrProf.h"
14+
#include "llvm/ProfileData/MemProf.h"
15+
16+
namespace llvm {
17+
18+
// Write the MemProf data to OS.
19+
Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
20+
memprof::IndexedVersion MemProfVersionRequested,
21+
bool MemProfFullSchema);
22+
23+
} // namespace llvm

llvm/lib/ProfileData/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
add_llvm_component_library(LLVMProfileData
22
GCOV.cpp
3+
IndexedMemProfData.cpp
34
InstrProf.cpp
45
InstrProfCorrelator.cpp
56
InstrProfReader.cpp
Lines changed: 300 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,300 @@
1+
//===- IndexedMemProfData.h - MemProf format support ------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// MemProf data is serialized in writeMemProf provided in this file.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "llvm/ProfileData/InstrProf.h"
14+
#include "llvm/ProfileData/MemProf.h"
15+
#include "llvm/Support/FormatVariadic.h"
16+
#include "llvm/Support/OnDiskHashTable.h"
17+
18+
namespace llvm {
19+
20+
// Serialize Schema.
21+
static void writeMemProfSchema(ProfOStream &OS,
22+
const memprof::MemProfSchema &Schema) {
23+
OS.write(static_cast<uint64_t>(Schema.size()));
24+
for (const auto Id : Schema)
25+
OS.write(static_cast<uint64_t>(Id));
26+
}
27+
28+
// Serialize MemProfRecordData. Return RecordTableOffset.
29+
static uint64_t writeMemProfRecords(
30+
ProfOStream &OS,
31+
llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
32+
&MemProfRecordData,
33+
memprof::MemProfSchema *Schema, memprof::IndexedVersion Version,
34+
llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
35+
*MemProfCallStackIndexes = nullptr) {
36+
memprof::RecordWriterTrait RecordWriter(Schema, Version,
37+
MemProfCallStackIndexes);
38+
OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
39+
RecordTableGenerator;
40+
for (auto &[GUID, Record] : MemProfRecordData) {
41+
// Insert the key (func hash) and value (memprof record).
42+
RecordTableGenerator.insert(GUID, Record, RecordWriter);
43+
}
44+
// Release the memory of this MapVector as it is no longer needed.
45+
MemProfRecordData.clear();
46+
47+
// The call to Emit invokes RecordWriterTrait::EmitData which destructs
48+
// the memprof record copies owned by the RecordTableGenerator. This works
49+
// because the RecordTableGenerator is not used after this point.
50+
return RecordTableGenerator.Emit(OS.OS, RecordWriter);
51+
}
52+
53+
// Serialize MemProfFrameData. Return FrameTableOffset.
54+
static uint64_t writeMemProfFrames(
55+
ProfOStream &OS,
56+
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) {
57+
OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait>
58+
FrameTableGenerator;
59+
for (auto &[FrameId, Frame] : MemProfFrameData) {
60+
// Insert the key (frame id) and value (frame contents).
61+
FrameTableGenerator.insert(FrameId, Frame);
62+
}
63+
// Release the memory of this MapVector as it is no longer needed.
64+
MemProfFrameData.clear();
65+
66+
return FrameTableGenerator.Emit(OS.OS);
67+
}
68+
69+
// Serialize MemProfFrameData. Return the mapping from FrameIds to their
70+
// indexes within the frame array.
71+
static llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
72+
writeMemProfFrameArray(
73+
ProfOStream &OS,
74+
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
75+
llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) {
76+
// Mappings from FrameIds to array indexes.
77+
llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes;
78+
79+
// Compute the order in which we serialize Frames. The order does not matter
80+
// in terms of correctness, but we still compute it for deserialization
81+
// performance. Specifically, if we serialize frequently used Frames one
82+
// after another, we have better cache utilization. For two Frames that
83+
// appear equally frequently, we break a tie by serializing the one that tends
84+
// to appear earlier in call stacks. We implement the tie-breaking mechanism
85+
// by computing the sum of indexes within call stacks for each Frame. If we
86+
// still have a tie, then we just resort to compare two FrameIds, which is
87+
// just for stability of output.
88+
std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder;
89+
FrameIdOrder.reserve(MemProfFrameData.size());
90+
for (const auto &[Id, Frame] : MemProfFrameData)
91+
FrameIdOrder.emplace_back(Id, &Frame);
92+
assert(MemProfFrameData.size() == FrameIdOrder.size());
93+
llvm::sort(FrameIdOrder,
94+
[&](const std::pair<memprof::FrameId, const memprof::Frame *> &L,
95+
const std::pair<memprof::FrameId, const memprof::Frame *> &R) {
96+
const auto &SL = FrameHistogram[L.first];
97+
const auto &SR = FrameHistogram[R.first];
98+
// Popular FrameIds should come first.
99+
if (SL.Count != SR.Count)
100+
return SL.Count > SR.Count;
101+
// If they are equally popular, then the one that tends to appear
102+
// earlier in call stacks should come first.
103+
if (SL.PositionSum != SR.PositionSum)
104+
return SL.PositionSum < SR.PositionSum;
105+
// Compare their FrameIds for sort stability.
106+
return L.first < R.first;
107+
});
108+
109+
// Serialize all frames while creating mappings from linear IDs to FrameIds.
110+
uint64_t Index = 0;
111+
MemProfFrameIndexes.reserve(FrameIdOrder.size());
112+
for (const auto &[Id, F] : FrameIdOrder) {
113+
F->serialize(OS.OS);
114+
MemProfFrameIndexes.insert({Id, Index});
115+
++Index;
116+
}
117+
assert(MemProfFrameData.size() == Index);
118+
assert(MemProfFrameData.size() == MemProfFrameIndexes.size());
119+
120+
// Release the memory of this MapVector as it is no longer needed.
121+
MemProfFrameData.clear();
122+
123+
return MemProfFrameIndexes;
124+
}
125+
126+
static uint64_t writeMemProfCallStacks(
127+
ProfOStream &OS,
128+
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
129+
&MemProfCallStackData) {
130+
OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait>
131+
CallStackTableGenerator;
132+
for (auto &[CSId, CallStack] : MemProfCallStackData)
133+
CallStackTableGenerator.insert(CSId, CallStack);
134+
// Release the memory of this vector as it is no longer needed.
135+
MemProfCallStackData.clear();
136+
137+
return CallStackTableGenerator.Emit(OS.OS);
138+
}
139+
140+
static llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
141+
writeMemProfCallStackArray(
142+
ProfOStream &OS,
143+
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
144+
&MemProfCallStackData,
145+
llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
146+
&MemProfFrameIndexes,
147+
llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram,
148+
unsigned &NumElements) {
149+
llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
150+
MemProfCallStackIndexes;
151+
152+
memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder;
153+
Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
154+
FrameHistogram);
155+
for (auto I : Builder.getRadixArray())
156+
OS.write32(I);
157+
NumElements = Builder.getRadixArray().size();
158+
MemProfCallStackIndexes = Builder.takeCallStackPos();
159+
160+
// Release the memory of this vector as it is no longer needed.
161+
MemProfCallStackData.clear();
162+
163+
return MemProfCallStackIndexes;
164+
}
165+
166+
// Write out MemProf Version2 as follows:
167+
// uint64_t Version
168+
// uint64_t RecordTableOffset = RecordTableGenerator.Emit
169+
// uint64_t FramePayloadOffset = Offset for the frame payload
170+
// uint64_t FrameTableOffset = FrameTableGenerator.Emit
171+
// uint64_t CallStackPayloadOffset = Offset for the call stack payload (NEW V2)
172+
// uint64_t CallStackTableOffset = CallStackTableGenerator.Emit (NEW in V2)
173+
// uint64_t Num schema entries
174+
// uint64_t Schema entry 0
175+
// uint64_t Schema entry 1
176+
// ....
177+
// uint64_t Schema entry N - 1
178+
// OnDiskChainedHashTable MemProfRecordData
179+
// OnDiskChainedHashTable MemProfFrameData
180+
// OnDiskChainedHashTable MemProfCallStackData (NEW in V2)
181+
static Error writeMemProfV2(ProfOStream &OS,
182+
memprof::IndexedMemProfData &MemProfData,
183+
bool MemProfFullSchema) {
184+
OS.write(memprof::Version2);
185+
uint64_t HeaderUpdatePos = OS.tell();
186+
OS.write(0ULL); // Reserve space for the memprof record table offset.
187+
OS.write(0ULL); // Reserve space for the memprof frame payload offset.
188+
OS.write(0ULL); // Reserve space for the memprof frame table offset.
189+
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
190+
OS.write(0ULL); // Reserve space for the memprof call stack table offset.
191+
192+
auto Schema = memprof::getHotColdSchema();
193+
if (MemProfFullSchema)
194+
Schema = memprof::getFullSchema();
195+
writeMemProfSchema(OS, Schema);
196+
197+
uint64_t RecordTableOffset =
198+
writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version2);
199+
200+
uint64_t FramePayloadOffset = OS.tell();
201+
uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames);
202+
203+
uint64_t CallStackPayloadOffset = OS.tell();
204+
uint64_t CallStackTableOffset =
205+
writeMemProfCallStacks(OS, MemProfData.CallStacks);
206+
207+
uint64_t Header[] = {
208+
RecordTableOffset, FramePayloadOffset, FrameTableOffset,
209+
CallStackPayloadOffset, CallStackTableOffset,
210+
};
211+
OS.patch({{HeaderUpdatePos, Header}});
212+
213+
return Error::success();
214+
}
215+
216+
// Write out MemProf Version3 as follows:
217+
// uint64_t Version
218+
// uint64_t CallStackPayloadOffset = Offset for the call stack payload
219+
// uint64_t RecordPayloadOffset = Offset for the record payload
220+
// uint64_t RecordTableOffset = RecordTableGenerator.Emit
221+
// uint64_t Num schema entries
222+
// uint64_t Schema entry 0
223+
// uint64_t Schema entry 1
224+
// ....
225+
// uint64_t Schema entry N - 1
226+
// Frames serialized one after another
227+
// Call stacks encoded as a radix tree
228+
// OnDiskChainedHashTable MemProfRecordData
229+
static Error writeMemProfV3(ProfOStream &OS,
230+
memprof::IndexedMemProfData &MemProfData,
231+
bool MemProfFullSchema) {
232+
OS.write(memprof::Version3);
233+
uint64_t HeaderUpdatePos = OS.tell();
234+
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
235+
OS.write(0ULL); // Reserve space for the memprof record payload offset.
236+
OS.write(0ULL); // Reserve space for the memprof record table offset.
237+
238+
auto Schema = memprof::getHotColdSchema();
239+
if (MemProfFullSchema)
240+
Schema = memprof::getFullSchema();
241+
writeMemProfSchema(OS, Schema);
242+
243+
llvm::DenseMap<memprof::FrameId, memprof::FrameStat> FrameHistogram =
244+
memprof::computeFrameHistogram(MemProfData.CallStacks);
245+
assert(MemProfData.Frames.size() == FrameHistogram.size());
246+
247+
llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes =
248+
writeMemProfFrameArray(OS, MemProfData.Frames, FrameHistogram);
249+
250+
uint64_t CallStackPayloadOffset = OS.tell();
251+
// The number of elements in the call stack array.
252+
unsigned NumElements = 0;
253+
llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
254+
MemProfCallStackIndexes =
255+
writeMemProfCallStackArray(OS, MemProfData.CallStacks,
256+
MemProfFrameIndexes, FrameHistogram,
257+
NumElements);
258+
259+
uint64_t RecordPayloadOffset = OS.tell();
260+
uint64_t RecordTableOffset =
261+
writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version3,
262+
&MemProfCallStackIndexes);
263+
264+
// IndexedMemProfReader::deserializeV3 computes the number of elements in the
265+
// call stack array from the difference between CallStackPayloadOffset and
266+
// RecordPayloadOffset. Verify that the computation works.
267+
assert(CallStackPayloadOffset +
268+
NumElements * sizeof(memprof::LinearFrameId) ==
269+
RecordPayloadOffset);
270+
271+
uint64_t Header[] = {
272+
CallStackPayloadOffset,
273+
RecordPayloadOffset,
274+
RecordTableOffset,
275+
};
276+
OS.patch({{HeaderUpdatePos, Header}});
277+
278+
return Error::success();
279+
}
280+
281+
// Write out the MemProf data in a requested version.
282+
Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
283+
memprof::IndexedVersion MemProfVersionRequested,
284+
bool MemProfFullSchema) {
285+
switch (MemProfVersionRequested) {
286+
case memprof::Version2:
287+
return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
288+
case memprof::Version3:
289+
return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
290+
}
291+
292+
return make_error<InstrProfError>(
293+
instrprof_error::unsupported_version,
294+
formatv("MemProf version {} not supported; "
295+
"requires version between {} and {}, inclusive",
296+
MemProfVersionRequested, memprof::MinimumSupportedVersion,
297+
memprof::MaximumSupportedVersion));
298+
}
299+
300+
} // namespace llvm

0 commit comments

Comments
 (0)