Skip to content

Commit 2425626

Browse files
[memprof] Print alloc site matches immediately (#142233)
Without this patch, we buffer alloc site matches in FullStackIdToAllocMatchInfo and then print them out at the end of MemProfUsePass. This practice is problematic when we have multiple matches per alloc site. Consider: char *f1() { return new char[3]; } char *f2() { return f1(); } __attribute__((noinline)) char *f3() { return f2(); } In this example, f1 contains an alloc site, of course, but so do f2 and f3 via inlining. When something like this happens, FullStackIdToAllocMatchInfo gets updated multiple times for the same full stack ID at: FullStackIdToAllocMatchInfo[FullStackId] = { ... }; with different InlinedCallStack.size() each time. This patch changes the behavior by immediately printing out alloc site matches, potentially printing out multiple matches for the same FullStackId. It is up to the consumer of the message to figure out the length of the longest matches for example. For the test, this test adjusts an existing one, memprof-dump-matched-alloc-site.ll. Specifically, this patch "restores" the IR and corresponding profile for f2 and f1 so that the compiler generates a "MemProf notcold" message for each of f1, f2, and f3.
1 parent de7f2fb commit 2425626

File tree

3 files changed

+76
-48
lines changed

3 files changed

+76
-48
lines changed

llvm/lib/Transforms/Instrumentation/MemProfiler.cpp

Lines changed: 13 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -816,13 +816,6 @@ static bool isAllocationWithHotColdVariant(const Function *Callee,
816816
}
817817
}
818818

819-
struct AllocMatchInfo {
820-
uint64_t TotalSize = 0;
821-
size_t NumFramesMatched = 0;
822-
AllocationType AllocType = AllocationType::None;
823-
bool Matched = false;
824-
};
825-
826819
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
827820
memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI,
828821
function_ref<bool(uint64_t)> IsPresentInProfile) {
@@ -998,13 +991,12 @@ static void addVPMetadata(Module &M, Instruction &I,
998991
}
999992
}
1000993

1001-
static void
1002-
readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
1003-
const TargetLibraryInfo &TLI,
1004-
std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
1005-
std::set<std::vector<uint64_t>> &MatchedCallSites,
1006-
DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
1007-
OptimizationRemarkEmitter &ORE) {
994+
static void readMemprof(Module &M, Function &F,
995+
IndexedInstrProfReader *MemProfReader,
996+
const TargetLibraryInfo &TLI,
997+
std::set<std::vector<uint64_t>> &MatchedCallSites,
998+
DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
999+
OptimizationRemarkEmitter &ORE) {
10081000
auto &Ctx = M.getContext();
10091001
// Previously we used getIRPGOFuncName() here. If F is local linkage,
10101002
// getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
@@ -1214,9 +1206,11 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
12141206
// was requested.
12151207
if (ClPrintMemProfMatchInfo) {
12161208
assert(FullStackId != 0);
1217-
FullStackIdToAllocMatchInfo[FullStackId] = {
1218-
AllocInfo->Info.getTotalSize(), InlinedCallStack.size(),
1219-
AllocType, /*Matched=*/true};
1209+
errs() << "MemProf " << getAllocTypeAttributeString(AllocType)
1210+
<< " context with id " << FullStackId
1211+
<< " has total profiled size "
1212+
<< AllocInfo->Info.getTotalSize() << " is matched with "
1213+
<< InlinedCallStack.size() << " frames\n";
12201214
}
12211215
}
12221216
}
@@ -1331,11 +1325,6 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
13311325
if (SalvageStaleProfile)
13321326
UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
13331327

1334-
// Map from the stack has of each allocation context in the function profiles
1335-
// to the total profiled size (bytes), allocation type, and whether we matched
1336-
// it to an allocation in the IR.
1337-
std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
1338-
13391328
// Set of the matched call sites, each expressed as a sequence of an inline
13401329
// call stack.
13411330
std::set<std::vector<uint64_t>> MatchedCallSites;
@@ -1346,17 +1335,11 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
13461335

13471336
const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
13481337
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1349-
readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
1350-
MatchedCallSites, UndriftMaps, ORE);
1338+
readMemprof(M, F, MemProfReader.get(), TLI, MatchedCallSites, UndriftMaps,
1339+
ORE);
13511340
}
13521341

13531342
if (ClPrintMemProfMatchInfo) {
1354-
for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo)
1355-
errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
1356-
<< " context with id " << Id << " has total profiled size "
1357-
<< Info.TotalSize << (Info.Matched ? " is" : " not")
1358-
<< " matched with " << Info.NumFramesMatched << " frames\n";
1359-
13601343
for (const auto &CallStack : MatchedCallSites) {
13611344
errs() << "MemProf callsite match for inline call stack";
13621345
for (uint64_t StackId : CallStack)

llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll

Lines changed: 59 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,41 @@
3131
;--- memprof-dump-matched-alloc-site.yaml
3232
---
3333
HeapProfileRecords:
34+
- GUID: _Z2f2v
35+
AllocSites:
36+
- Callstack:
37+
- { Function: _Z2f1v, LineOffset: 0, Column: 21, IsInlineFrame: true }
38+
- { Function: _Z2f2v, LineOffset: 0, Column: 21, IsInlineFrame: true }
39+
- { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false }
40+
- { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
41+
MemInfoBlock:
42+
AllocCount: 1
43+
TotalSize: 3
44+
TotalLifetime: 0
45+
TotalLifetimeAccessDensity: 0
46+
CallSites:
47+
- Frames:
48+
- { Function: _Z2f1v, LineOffset: 0, Column: 21, IsInlineFrame: true }
49+
- { Function: _Z2f2v, LineOffset: 0, Column: 21, IsInlineFrame: true }
50+
- { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false }
51+
- GUID: _Z2f1v
52+
AllocSites:
53+
- Callstack:
54+
- { Function: _Z2f1v, LineOffset: 0, Column: 21, IsInlineFrame: true }
55+
- { Function: _Z2f2v, LineOffset: 0, Column: 21, IsInlineFrame: true }
56+
- { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false }
57+
- { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
58+
MemInfoBlock:
59+
AllocCount: 1
60+
TotalSize: 3
61+
TotalLifetime: 0
62+
TotalLifetimeAccessDensity: 0
63+
CallSites: []
3464
- GUID: _Z2f3v
3565
AllocSites:
3666
- Callstack:
37-
- { Function: _ZL2f1v, LineOffset: 0, Column: 35, IsInlineFrame: true }
38-
- { Function: _ZL2f2v, LineOffset: 0, Column: 35, IsInlineFrame: true }
67+
- { Function: _Z2f1v, LineOffset: 0, Column: 21, IsInlineFrame: true }
68+
- { Function: _Z2f2v, LineOffset: 0, Column: 21, IsInlineFrame: true }
3969
- { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false }
4070
- { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
4171
MemInfoBlock:
@@ -47,32 +77,47 @@ HeapProfileRecords:
4777
# Kept empty here because this section is irrelevant for this test.
4878
...
4979
;--- memprof-dump-matched-alloc-site.ll
50-
; CHECK: MemProf notcold context with id 12978026349401156968 has total profiled size 3 is matched with 3 frames
80+
; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 1 frames
81+
; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 2 frames
82+
; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 3 frames
5183

5284
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
5385
target triple = "x86_64-unknown-linux-gnu"
5486

55-
define ptr @_Z2f3v() {
87+
define ptr @_Z2f1v() {
5688
entry:
57-
%call.i.i = call ptr @_Znam(i64 0), !dbg !3
58-
ret ptr null
89+
%call = call ptr @_Znam(i64 0), !dbg !3
90+
ret ptr %call
5991
}
6092

6193
declare ptr @_Znam(i64)
6294

95+
define ptr @_Z2f2v() {
96+
entry:
97+
%call.i = call ptr @_Znam(i64 0), !dbg !7
98+
ret ptr %call.i
99+
}
100+
101+
define ptr @_Z2f3v() {
102+
entry:
103+
%call.i.i = call ptr @_Znam(i64 0), !dbg !10
104+
ret ptr %call.i.i
105+
}
106+
63107
!llvm.dbg.cu = !{!0}
64108
!llvm.module.flags = !{!2}
65109

66110
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
67111
!1 = !DIFile(filename: "memprof-dump-matched-alloc-site.cc", directory: "/")
68112
!2 = !{i32 2, !"Debug Info Version", i32 3}
69-
!3 = !DILocation(line: 1, column: 35, scope: !4, inlinedAt: !7)
70-
!4 = distinct !DISubprogram(name: "f1", linkageName: "_ZL2f1v", scope: !1, file: !1, line: 1, type: !5, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
113+
!3 = !DILocation(line: 1, column: 21, scope: !4)
114+
!4 = distinct !DISubprogram(name: "f1", linkageName: "_Z2f1v", scope: !1, file: !1, line: 1, type: !5, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
71115
!5 = !DISubroutineType(types: !6)
72116
!6 = !{}
73-
!7 = distinct !DILocation(line: 2, column: 35, scope: !8, inlinedAt: !9)
74-
!8 = distinct !DISubprogram(name: "f2", linkageName: "_ZL2f2v", scope: !1, file: !1, line: 2, type: !5, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
75-
!9 = distinct !DILocation(line: 3, column: 47, scope: !10)
76-
!10 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 3, type: !5, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
77-
!11 = !DILocation(line: 6, column: 3, scope: !12)
78-
!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !5, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
117+
!7 = !DILocation(line: 1, column: 21, scope: !4, inlinedAt: !8)
118+
!8 = distinct !DILocation(line: 2, column: 21, scope: !9)
119+
!9 = distinct !DISubprogram(name: "f2", linkageName: "_Z2f2v", scope: !1, file: !1, line: 2, type: !5, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
120+
!10 = !DILocation(line: 1, column: 21, scope: !4, inlinedAt: !11)
121+
!11 = distinct !DILocation(line: 2, column: 21, scope: !9, inlinedAt: !12)
122+
!12 = distinct !DILocation(line: 3, column: 47, scope: !13)
123+
!13 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 3, type: !5, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)

llvm/test/Transforms/PGOProfile/memprof.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,13 +111,13 @@
111111
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-min-ave-lifetime-access-density-hot-threshold=0 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL
112112

113113
; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched with 1 frames
114-
; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched with 1 frames
115-
; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched with 1 frames
116114
; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched with 1 frames
117-
; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames
118-
; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched with 1 frames
119115
; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched with 1 frames
120116
; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched with 1 frames
117+
; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames
118+
; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched with 1 frames
119+
; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched with 1 frames
120+
; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched with 1 frames
121121
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 748269490701775343
122122
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 1544787832369987002
123123
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 2061451396820446691

0 commit comments

Comments
 (0)