Skip to content

Commit 21ba91c

Browse files
authored
[SamplePGO] Add a cutoff for number of profile matching anchors (#95542)
The algorithm added by PR #87375 can be potentially quadratic in the number of anchors. This is almost never a problem because normally functions have a reasonable number of function calls. However, in some rare cases of auto-generated code we observed very large functions that trigger quadratic behaviour here (resulting in >130GB of peak heap memory usage for clang). Let's add a knob for controlling the max number of callsites in a function above which stale profile matching won't be performed.
1 parent 3a2e442 commit 21ba91c

File tree

2 files changed

+19
-0
lines changed

2 files changed

+19
-0
lines changed

llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/Transforms/IPO/SampleProfileMatcher.h"
1515
#include "llvm/IR/IntrinsicInst.h"
1616
#include "llvm/IR/MDBuilder.h"
17+
#include "llvm/Support/CommandLine.h"
1718

1819
using namespace llvm;
1920
using namespace sampleprof;
@@ -24,6 +25,11 @@ extern cl::opt<bool> SalvageStaleProfile;
2425
extern cl::opt<bool> PersistProfileStaleness;
2526
extern cl::opt<bool> ReportProfileStaleness;
2627

28+
static cl::opt<unsigned> SalvageStaleProfileMaxCallsites(
29+
"salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX),
30+
cl::desc("The maximum number of callsites in a function, above which stale "
31+
"profile matching will be skipped."));
32+
2733
void SampleProfileMatcher::findIRAnchors(const Function &F,
2834
AnchorMap &IRAnchors) {
2935
// For inlined code, recover the original callsite and callee by finding the
@@ -300,6 +306,16 @@ void SampleProfileMatcher::runStaleProfileMatching(
300306
if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
301307
return;
302308

309+
if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites ||
310+
FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) {
311+
LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName()
312+
<< " because the number of callsites in the IR is "
313+
<< FilteredIRAnchorsList.size()
314+
<< " and in the profile is "
315+
<< FilteredProfileAnchorList.size() << "\n");
316+
return;
317+
}
318+
303319
// Match the callsite anchors by finding the longest common subsequence
304320
// between IR and profile. Note that we need to use IR anchor as base(A side)
305321
// to align with the order of IRToProfileLocationMap.

llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-LCS.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; REQUIRES: x86_64-linux
22
; REQUIRES: asserts
33
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-matching-LCS.prof --salvage-stale-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl 2>&1 | FileCheck %s
4+
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-matching-LCS.prof --salvage-stale-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl --salvage-stale-profile-max-callsites=6 2>&1 | FileCheck %s -check-prefix=CHECK-MAX-CALLSITES
45

56
; CHECK: Run stale profile matching for test_direct_call
67
; CHECK: Location is matched from 1 to 1
@@ -27,6 +28,8 @@
2728
; CHECK: Callsite with callee:unknown.indirect.callee is matched from 9 to 6
2829
; CHECK: Callsite with callee:C is matched from 10 to 7
2930

31+
; CHECK-MAX-CALLSITES: Skip stale profile matching for test_direct_call
32+
; CHECK-MAX-CALLSITES-NOT: Skip stale profile matching for test_indirect_call
3033

3134
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
3235
target triple = "x86_64-unknown-linux-gnu"

0 commit comments

Comments
 (0)