Skip to content

[memprof] Deduplicate alloc site matches #142334

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 2, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 25 additions & 7 deletions llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,11 @@ static bool isAllocationWithHotColdVariant(const Function *Callee,
}
}

struct AllocMatchInfo {
uint64_t TotalSize = 0;
AllocationType AllocType = AllocationType::None;
};

DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI,
function_ref<bool(uint64_t)> IsPresentInProfile) {
Expand Down Expand Up @@ -994,6 +999,8 @@ static void addVPMetadata(Module &M, Instruction &I,
static void readMemprof(Module &M, Function &F,
IndexedInstrProfReader *MemProfReader,
const TargetLibraryInfo &TLI,
std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
&FullStackIdToAllocMatchInfo,
std::set<std::vector<uint64_t>> &MatchedCallSites,
DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
OptimizationRemarkEmitter &ORE) {
Expand Down Expand Up @@ -1206,11 +1213,9 @@ static void readMemprof(Module &M, Function &F,
// was requested.
if (ClPrintMemProfMatchInfo) {
assert(FullStackId != 0);
errs() << "MemProf " << getAllocTypeAttributeString(AllocType)
<< " context with id " << FullStackId
<< " has total profiled size "
<< AllocInfo->Info.getTotalSize() << " is matched with "
<< InlinedCallStack.size() << " frames\n";
FullStackIdToAllocMatchInfo[std::make_pair(
FullStackId, InlinedCallStack.size())] = {
AllocInfo->Info.getTotalSize(), AllocType};
}
}
}
Expand Down Expand Up @@ -1325,6 +1330,12 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
if (SalvageStaleProfile)
UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);

// Map from the stack has of each allocation context in the function profiles
// to the total profiled size (bytes), allocation type, and whether we matched
// it to an allocation in the IR.
std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
FullStackIdToAllocMatchInfo;

// Set of the matched call sites, each expressed as a sequence of an inline
// call stack.
std::set<std::vector<uint64_t>> MatchedCallSites;
Expand All @@ -1335,11 +1346,18 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {

const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
readMemprof(M, F, MemProfReader.get(), TLI, MatchedCallSites, UndriftMaps,
ORE);
readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
MatchedCallSites, UndriftMaps, ORE);
}

if (ClPrintMemProfMatchInfo) {
for (const auto &[IdLengthPair, Info] : FullStackIdToAllocMatchInfo) {
auto [Id, Length] = IdLengthPair;
errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
<< " context with id " << Id << " has total profiled size "
<< Info.TotalSize << " is matched with " << Length << " frames\n";
}

for (const auto &CallStack : MatchedCallSites) {
errs() << "MemProf callsite match for inline call stack";
for (uint64_t StackId : CallStack)
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/PGOProfile/memprof.ll
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,13 @@
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-min-ave-lifetime-access-density-hot-threshold=0 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL

; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched with 1 frames
; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched with 1 frames
; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched with 1 frames
; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched with 1 frames
; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames
; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched with 1 frames
; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched with 1 frames
; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched with 1 frames
; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames
; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched with 1 frames
; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched with 1 frames
; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched with 1 frames
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 748269490701775343
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 1544787832369987002
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 2061451396820446691
Expand Down
Loading