-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[memprof] Deduplicate alloc site matches #142334
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[memprof] Deduplicate alloc site matches #142334
Conversation
With: commit 2425626 Author: Kazu Hirata <kazu@google.com> Date: Sun Jun 1 08:09:58 2025 -0700 we print out a lot of duplicate alloc site matches. This patch partially reverts the patch above. The core idea of using a map to deduplicate entries remains the same, but details are different. Specifically: - This PR uses the [FullStackID, MatchLength] as the key, where MatchLength is the length of an alloc site match. - AllocMatchInfo in this PR no longer has Matched because we always report matches. - AllocMatchInfo in this PR no longer has NumFramesMatched because it has become part of the key. This deduplication roughly halves the amount of messages printed out.
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-pgo Author: Kazu Hirata (kazutakahirata) ChangesWith: commit 2425626 we print out a lot of duplicate alloc site matches. This patch partially reverts the patch above. The core idea of using
This deduplication roughly halves the amount of messages printed out. Full diff: https://github.com/llvm/llvm-project/pull/142334.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index a64dfc02f6bf3..9075c2663b108 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -816,6 +816,11 @@ static bool isAllocationWithHotColdVariant(const Function *Callee,
}
}
+struct AllocMatchInfo {
+ uint64_t TotalSize = 0;
+ AllocationType AllocType = AllocationType::None;
+};
+
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI,
function_ref<bool(uint64_t)> IsPresentInProfile) {
@@ -994,6 +999,8 @@ static void addVPMetadata(Module &M, Instruction &I,
static void readMemprof(Module &M, Function &F,
IndexedInstrProfReader *MemProfReader,
const TargetLibraryInfo &TLI,
+ std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
+ &FullStackIdToAllocMatchInfo,
std::set<std::vector<uint64_t>> &MatchedCallSites,
DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
OptimizationRemarkEmitter &ORE) {
@@ -1206,11 +1213,9 @@ static void readMemprof(Module &M, Function &F,
// was requested.
if (ClPrintMemProfMatchInfo) {
assert(FullStackId != 0);
- errs() << "MemProf " << getAllocTypeAttributeString(AllocType)
- << " context with id " << FullStackId
- << " has total profiled size "
- << AllocInfo->Info.getTotalSize() << " is matched with "
- << InlinedCallStack.size() << " frames\n";
+ FullStackIdToAllocMatchInfo[std::make_pair(
+ FullStackId, InlinedCallStack.size())] = {
+ AllocInfo->Info.getTotalSize(), AllocType};
}
}
}
@@ -1325,6 +1330,12 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
if (SalvageStaleProfile)
UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
+ // Map from the stack has of each allocation context in the function profiles
+ // to the total profiled size (bytes), allocation type, and whether we matched
+ // it to an allocation in the IR.
+ std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
+ FullStackIdToAllocMatchInfo;
+
// Set of the matched call sites, each expressed as a sequence of an inline
// call stack.
std::set<std::vector<uint64_t>> MatchedCallSites;
@@ -1335,11 +1346,18 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- readMemprof(M, F, MemProfReader.get(), TLI, MatchedCallSites, UndriftMaps,
- ORE);
+ readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
+ MatchedCallSites, UndriftMaps, ORE);
}
if (ClPrintMemProfMatchInfo) {
+ for (const auto &[IdLengthPair, Info] : FullStackIdToAllocMatchInfo) {
+ auto [Id, Length] = IdLengthPair;
+ errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
+ << " context with id " << Id << " has total profiled size "
+ << Info.TotalSize << " is matched with " << Length << " frames\n";
+ }
+
for (const auto &CallStack : MatchedCallSites) {
errs() << "MemProf callsite match for inline call stack";
for (uint64_t StackId : CallStack)
diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll
index e48da36a6d97c..c69d0311e0388 100644
--- a/llvm/test/Transforms/PGOProfile/memprof.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof.ll
@@ -111,13 +111,13 @@
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-min-ave-lifetime-access-density-hot-threshold=0 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL
; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched with 1 frames
-; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched with 1 frames
-; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched with 1 frames
-; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched with 1 frames
-; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames
; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched with 1 frames
; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames
; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched with 1 frames
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 748269490701775343
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 1544787832369987002
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 2061451396820446691
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm with comment update noted below
With: commit 2425626 Author: Kazu Hirata <kazu@google.com> Date: Sun Jun 1 08:09:58 2025 -0700 we print out a lot of duplicate alloc site matches. This patch partially reverts the patch above. The core idea of using a map to deduplicate entries remains the same, but details are different. Specifically: - This PR uses the [FullStackID, MatchLength] as the key, where MatchLength is the length of an alloc site match. - AllocMatchInfo in this PR no longer has Matched because we always report matches. - AllocMatchInfo in this PR no longer has NumFramesMatched because it has become part of the key. This deduplication roughly halves the amount of messages printed out.
With:
commit 2425626
Author: Kazu Hirata kazu@google.com
Date: Sun Jun 1 08:09:58 2025 -0700
we print out a lot of duplicate alloc site matches.
This patch partially reverts the patch above. The core idea of using
a map to deduplicate entries remains the same, but details are
different. Specifically:
This PR uses the [FullStackID, MatchLength] as the key, where
MatchLength is the length of an alloc site match.
AllocMatchInfo in this PR no longer has Matched because we always
report matches.
AllocMatchInfo in this PR no longer has NumFramesMatched because it
has become part of the key.
This deduplication roughly halves the amount of messages printed out.