Skip to content

Commit

Permalink
[MemProf] Templatize CallStackRadixTreeBuilder (NFC) (#117014)
Browse files Browse the repository at this point in the history
Prepare for usage in the bitcode reader/writer where we already have a
LinearFrameId:
- templatize input frame id type in CallStackRadixTreeBuilder
- templatize input frame id type in computeFrameHistogram
- make the map from FrameId to LinearFrameId optional

We plan to use the same radix format in the ThinLTO summary records,
where we already have a LinearFrameId.
  • Loading branch information
teresajohnson authored Nov 20, 2024
1 parent 6473a36 commit e14827f
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 35 deletions.
25 changes: 14 additions & 11 deletions llvm/include/llvm/ProfileData/MemProf.h
Original file line number Diff line number Diff line change
Expand Up @@ -1050,8 +1050,9 @@ struct FrameStat {
};

// Compute a histogram of Frames in call stacks.
llvm::DenseMap<FrameId, FrameStat>
computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
template <typename FrameIdTy>
llvm::DenseMap<FrameIdTy, FrameStat>
computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
&MemProfCallStackData);

// Construct a radix tree of call stacks.
Expand Down Expand Up @@ -1109,7 +1110,7 @@ computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
// On-disk IndexedMemProfRecord will refer to call stacks by their indexes into
// the radix tree array, so we do not explicitly encode mappings like:
// "CallStackId 1 -> 11".
class CallStackRadixTreeBuilder {
template <typename FrameIdTy> class CallStackRadixTreeBuilder {
// The radix tree array.
std::vector<LinearFrameId> RadixArray;

Expand All @@ -1136,23 +1137,25 @@ class CallStackRadixTreeBuilder {
// RadixArray[Indexes[5 - 1]] is the last frame of the common prefix.
std::vector<LinearCallStackId> Indexes;

using CSIdPair = std::pair<CallStackId, llvm::SmallVector<FrameId>>;
using CSIdPair = std::pair<CallStackId, llvm::SmallVector<FrameIdTy>>;

// Encode a call stack into RadixArray. Return the starting index within
// RadixArray.
LinearCallStackId encodeCallStack(
const llvm::SmallVector<FrameId> *CallStack,
const llvm::SmallVector<FrameId> *Prev,
const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes);
LinearCallStackId
encodeCallStack(const llvm::SmallVector<FrameIdTy> *CallStack,
const llvm::SmallVector<FrameIdTy> *Prev,
std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
MemProfFrameIndexes);

public:
CallStackRadixTreeBuilder() = default;

// Build a radix tree array.
void build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
void build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
&&MemProfCallStackData,
const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes,
llvm::DenseMap<FrameId, FrameStat> &FrameHistogram);
std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
MemProfFrameIndexes,
llvm::DenseMap<FrameIdTy, FrameStat> &FrameHistogram);

ArrayRef<LinearFrameId> getRadixArray() const { return RadixArray; }

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/ProfileData/InstrProfWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -635,7 +635,7 @@ writeMemProfCallStackArray(
llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
MemProfCallStackIndexes;

memprof::CallStackRadixTreeBuilder Builder;
memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder;
Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
FrameHistogram);
for (auto I : Builder.getRadixArray())
Expand Down
44 changes: 29 additions & 15 deletions llvm/lib/ProfileData/MemProf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,10 +436,12 @@ CallStackId hashCallStack(ArrayRef<FrameId> CS) {
// To quickly determine the location of the common prefix within RadixArray,
// Indexes caches the indexes of the previous call stack's frames within
// RadixArray.
LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack(
const llvm::SmallVector<FrameId> *CallStack,
const llvm::SmallVector<FrameId> *Prev,
const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes) {
template <typename FrameIdTy>
LinearCallStackId CallStackRadixTreeBuilder<FrameIdTy>::encodeCallStack(
const llvm::SmallVector<FrameIdTy> *CallStack,
const llvm::SmallVector<FrameIdTy> *Prev,
std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
MemProfFrameIndexes) {
// Compute the length of the common root prefix between Prev and CallStack.
uint32_t CommonLen = 0;
if (Prev) {
Expand All @@ -464,10 +466,11 @@ LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack(

// Copy the part of the call stack beyond the common prefix to RadixArray.
assert(CommonLen <= CallStack->size());
for (FrameId F : llvm::drop_begin(llvm::reverse(*CallStack), CommonLen)) {
for (FrameIdTy F : llvm::drop_begin(llvm::reverse(*CallStack), CommonLen)) {
// Remember the index of F in RadixArray.
Indexes.push_back(RadixArray.size());
RadixArray.push_back(MemProfFrameIndexes.find(F)->second);
RadixArray.push_back(
MemProfFrameIndexes ? MemProfFrameIndexes->find(F)->second : F);
}
assert(CallStack->size() == Indexes.size());

Expand All @@ -479,11 +482,13 @@ LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack(
return RadixArray.size() - 1;
}

void CallStackRadixTreeBuilder::build(
llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
template <typename FrameIdTy>
void CallStackRadixTreeBuilder<FrameIdTy>::build(
llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
&&MemProfCallStackData,
const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes,
llvm::DenseMap<FrameId, FrameStat> &FrameHistogram) {
std::optional<const llvm::DenseMap<FrameIdTy, LinearFrameId>>
MemProfFrameIndexes,
llvm::DenseMap<FrameIdTy, FrameStat> &FrameHistogram) {
// Take the vector portion of MemProfCallStackData. The vector is exactly
// what we need to sort. Also, we no longer need its lookup capability.
llvm::SmallVector<CSIdPair, 0> CallStacks = MemProfCallStackData.takeVector();
Expand Down Expand Up @@ -535,7 +540,7 @@ void CallStackRadixTreeBuilder::build(
// root.
return std::lexicographical_compare(
L.second.rbegin(), L.second.rend(), R.second.rbegin(), R.second.rend(),
[&](FrameId F1, FrameId F2) {
[&](FrameIdTy F1, FrameIdTy F2) {
uint64_t H1 = FrameHistogram[F1].Count;
uint64_t H2 = FrameHistogram[F2].Count;
// Popular frames should come later because we encode call stacks from
Expand Down Expand Up @@ -585,7 +590,7 @@ void CallStackRadixTreeBuilder::build(
// traverse CallStacks in the reverse order, then Call Stack 3 has the
// complete call stack encoded without any pointers. Call Stack 1 and 2 point
// to appropriate prefixes of Call Stack 3.
const llvm::SmallVector<FrameId> *Prev = nullptr;
const llvm::SmallVector<FrameIdTy> *Prev = nullptr;
for (const auto &[CSId, CallStack] : llvm::reverse(CallStacks)) {
LinearCallStackId Pos =
encodeCallStack(&CallStack, Prev, MemProfFrameIndexes);
Expand All @@ -608,10 +613,14 @@ void CallStackRadixTreeBuilder::build(
V = RadixArray.size() - 1 - V;
}

llvm::DenseMap<FrameId, FrameStat>
computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
// Explicitly instantiate class with the utilized FrameIdTy.
template class CallStackRadixTreeBuilder<FrameId>;

template <typename FrameIdTy>
llvm::DenseMap<FrameIdTy, FrameStat>
computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
&MemProfCallStackData) {
llvm::DenseMap<FrameId, FrameStat> Histogram;
llvm::DenseMap<FrameIdTy, FrameStat> Histogram;

for (const auto &KV : MemProfCallStackData) {
const auto &CS = KV.second;
Expand All @@ -624,6 +633,11 @@ computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
return Histogram;
}

// Explicitly instantiate function with the utilized FrameIdTy.
template llvm::DenseMap<FrameId, FrameStat> computeFrameHistogram<FrameId>(
llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
&MemProfCallStackData);

void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record) {
for (const auto &AS : Record.AllocSites) {
assert(AS.CSId == hashCallStack(AS.CallStack));
Expand Down
16 changes: 8 additions & 8 deletions llvm/unittests/ProfileData/MemProfTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -659,8 +659,8 @@ TEST(MemProf, RadixTreeBuilderEmpty) {
llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> MemProfCallStackData;
llvm::DenseMap<llvm::memprof::FrameId, llvm::memprof::FrameStat>
FrameHistogram =
llvm::memprof::computeFrameHistogram(MemProfCallStackData);
llvm::memprof::CallStackRadixTreeBuilder Builder;
llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
FrameHistogram);
ASSERT_THAT(Builder.getRadixArray(), testing::IsEmpty());
Expand All @@ -677,8 +677,8 @@ TEST(MemProf, RadixTreeBuilderOne) {
MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS1), CS1});
llvm::DenseMap<llvm::memprof::FrameId, llvm::memprof::FrameStat>
FrameHistogram =
llvm::memprof::computeFrameHistogram(MemProfCallStackData);
llvm::memprof::CallStackRadixTreeBuilder Builder;
llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
FrameHistogram);
EXPECT_THAT(Builder.getRadixArray(), testing::ElementsAreArray({
Expand All @@ -704,8 +704,8 @@ TEST(MemProf, RadixTreeBuilderTwo) {
MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS2), CS2});
llvm::DenseMap<llvm::memprof::FrameId, llvm::memprof::FrameStat>
FrameHistogram =
llvm::memprof::computeFrameHistogram(MemProfCallStackData);
llvm::memprof::CallStackRadixTreeBuilder Builder;
llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
FrameHistogram);
EXPECT_THAT(Builder.getRadixArray(),
Expand Down Expand Up @@ -742,8 +742,8 @@ TEST(MemProf, RadixTreeBuilderSuccessiveJumps) {
MemProfCallStackData.insert({llvm::memprof::hashCallStack(CS4), CS4});
llvm::DenseMap<llvm::memprof::FrameId, llvm::memprof::FrameStat>
FrameHistogram =
llvm::memprof::computeFrameHistogram(MemProfCallStackData);
llvm::memprof::CallStackRadixTreeBuilder Builder;
llvm::memprof::computeFrameHistogram<FrameId>(MemProfCallStackData);
llvm::memprof::CallStackRadixTreeBuilder<FrameId> Builder;
Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes,
FrameHistogram);
EXPECT_THAT(Builder.getRadixArray(),
Expand Down

0 comments on commit e14827f

Please sign in to comment.