Skip to content

Commit 17a2ba8

Browse files
committed
[ctx_prof] Handle select
1 parent 12d9485 commit 17a2ba8

File tree

5 files changed

+141
-3
lines changed

5 files changed

+141
-3
lines changed

llvm/include/llvm/Analysis/CtxProfAnalysis.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,9 @@ class CtxProfAnalysis : public AnalysisInfoMixin<CtxProfAnalysis> {
117117

118118
/// Get the instruction instrumenting a BB, or nullptr if not present.
119119
static InstrProfIncrementInst *getBBInstrumentation(BasicBlock &BB);
120+
121+
/// Get the step instrumentation associated with a `select`
122+
static InstrProfIncrementInstStep *getSelectInstrumentation(SelectInst &SI);
120123
};
121124

122125
class CtxProfAnalysisPrinterPass

llvm/lib/Analysis/CtxProfAnalysis.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,15 @@ InstrProfIncrementInst *CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) {
254254
return nullptr;
255255
}
256256

257+
InstrProfIncrementInstStep *
258+
CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) {
259+
Instruction *Prev = &SI;
260+
while ((Prev = Prev->getPrevNode()))
261+
if (auto *Step = dyn_cast<InstrProfIncrementInstStep>(Prev))
262+
return Step;
263+
return nullptr;
264+
}
265+
257266
template <class ProfilesTy, class ProfTy>
258267
static void preorderVisit(ProfilesTy &Profiles,
259268
function_ref<void(ProfTy &)> Visitor,

llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ class ProfileAnnotator final {
154154

155155
bool hasCount() const { return Count.has_value(); }
156156

157+
uint64_t getCount() const { return *Count; }
158+
157159
bool trySetSingleUnknownInEdgeCount() {
158160
if (UnknownCountInEdges == 1) {
159161
setSingleUnknownEdgeCount(InEdges);
@@ -266,6 +268,21 @@ class ProfileAnnotator final {
266268
return HitExit;
267269
}
268270

271+
bool allNonColdSelectsHaveProfile() const {
272+
for (const auto &BB : F) {
273+
if (getBBInfo(BB).getCount() > 0) {
274+
for (const auto &I : BB) {
275+
if (const auto *SI = dyn_cast<SelectInst>(&I)) {
276+
if (!SI->getMetadata(LLVMContext::MD_prof)) {
277+
return false;
278+
}
279+
}
280+
}
281+
}
282+
}
283+
return true;
284+
}
285+
269286
public:
270287
ProfileAnnotator(Function &F, const SmallVectorImpl<uint64_t> &Counters,
271288
InstrProfSummaryBuilder &PB)
@@ -324,12 +341,34 @@ class ProfileAnnotator final {
324341
PB.addEntryCount(Counters[0]);
325342

326343
for (auto &BB : F) {
344+
const auto &BBInfo = getBBInfo(BB);
345+
if (BBInfo.getCount() > 0) {
346+
for (auto &I : BB) {
347+
if (auto *SI = dyn_cast<SelectInst>(&I)) {
348+
if (auto *Step = CtxProfAnalysis::getSelectInstrumentation(*SI)) {
349+
auto Index = Step->getIndex()->getZExtValue();
350+
assert(
351+
Index < Counters.size() &&
352+
"The index of the step instruction must be inside the "
353+
"counters vector by "
354+
"construction - tripping this assertion indicates a bug in "
355+
"how the contextual profile is managed by IPO transforms");
356+
auto TotalCount = BBInfo.getCount();
357+
auto TrueCount = Counters[Index];
358+
auto FalseCount =
359+
(TotalCount > TrueCount ? TotalCount - TrueCount : 0U);
360+
setProfMetadata(F.getParent(), SI, {TrueCount, FalseCount},
361+
std::max(TrueCount, FalseCount));
362+
}
363+
}
364+
}
365+
}
327366
if (succ_size(&BB) < 2)
328367
continue;
329368
auto *Term = BB.getTerminator();
330369
SmallVector<uint64_t, 2> EdgeCounts(Term->getNumSuccessors(), 0);
331370
uint64_t MaxCount = 0;
332-
const auto &BBInfo = getBBInfo(BB);
371+
333372
for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size;
334373
++SuccIdx) {
335374
uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);
@@ -343,12 +382,15 @@ class ProfileAnnotator final {
343382
setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount);
344383
}
345384
assert(allCountersAreAssigned() &&
346-
"Expected all counters have been assigned.");
385+
"[ctx-prof] Expected all counters have been assigned.");
347386
assert(allTakenPathsExit() &&
348387
"[ctx-prof] Encountered a BB with more than one successor, where "
349388
"all outgoing edges have a 0 count. This occurs in non-exiting "
350389
"functions (message pumps, usually) which are not supported in the "
351390
"contextual profiling case");
391+
assert(allNonColdSelectsHaveProfile() &&
392+
"[ctx-prof] All non-cold select instructions were expected to have "
393+
"a profile.");
352394
}
353395
};
354396

llvm/lib/Transforms/Utils/InlineFunction.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2211,7 +2211,15 @@ remapIndices(Function &Caller, BasicBlock *StartBB,
22112211
}
22122212
for (auto &I : llvm::make_early_inc_range(*BB)) {
22132213
if (auto *Inc = dyn_cast<InstrProfIncrementInst>(&I)) {
2214-
if (Inc != BBID) {
2214+
if (isa<InstrProfIncrementInstStep>(Inc)) {
2215+
if (isa<Constant>(Inc->getStep())) {
2216+
assert(!Inc->getNextNode() || !isa<SelectInst>(Inc->getNextNode()));
2217+
Inc->eraseFromParent();
2218+
} else {
2219+
assert(isa_and_nonnull<SelectInst>(Inc->getNextNode()));
2220+
RewriteInstrIfNeeded(*Inc);
2221+
}
2222+
} else if (Inc != BBID) {
22152223
// If we're here it means that the BB had more than 1 IDs, presumably
22162224
// some coming from the callee. We "made up our mind" to keep the
22172225
// first one (which may or may not have been originally the caller's).
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
; Check that we handle `step` instrumentations. These addorn `select`s.
2+
; We don't want to confuse the `step` with normal increments, the latter of which
3+
; we use for BB ID-ing: we want to keep the `step`s after inlining, except if
4+
; the `select` is elided.
5+
;
6+
; RUN: split-file %s %t
7+
; RUN: llvm-ctxprof-util fromJSON --input=%t/profile.json --output=%t/profile.ctxprofdata
8+
;
9+
; RUN: opt -passes=ctx-instr-gen %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s --check-prefix=INSTR
10+
; RUN: opt -passes=ctx-instr-gen,module-inline %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s --check-prefix=POST-INL
11+
; RUN: opt -passes=ctx-instr-gen,module-inline,ctx-prof-flatten %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s --check-prefix=FLATTEN
12+
13+
; INSTR-LABEL: yes:
14+
; INSTR-NEXT: call void @llvm.instrprof.increment(ptr @foo, i64 [[#]], i32 2, i32 1)
15+
; INSTR-NEXT: call void @llvm.instrprof.callsite(ptr @foo, i64 [[#]], i32 2, i32 0, ptr @bar)
16+
17+
; INSTR-LABEL: no:
18+
; INSTR-NEXT: call void @llvm.instrprof.callsite(ptr @foo, i64 [[#]], i32 2, i32 1, ptr @bar)
19+
20+
; INSTR-LABEL: define i32 @bar
21+
; INSTR-NEXT: call void @llvm.instrprof.increment(ptr @bar, i64 [[#]], i32 2, i32 0)
22+
; INSTR-NEXT: %inc =
23+
; INSTR: %test = icmp eq i32 %t, 0
24+
; INSTR-NEXT: %1 = zext i1 %test to i64
25+
; INSTR-NEXT: call void @llvm.instrprof.increment.step(ptr @bar, i64 [[#]], i32 2, i32 1, i64 %1)
26+
; INSTR-NEXT: %res = select
27+
28+
; POST-INL-LABEL: yes:
29+
; POST-INL-NEXT: call void @llvm.instrprof.increment
30+
; POST-INL: call void @llvm.instrprof.increment.step
31+
; POST-INL-NEXT: %res.i = select
32+
33+
; POST-INL-LABEL: no:
34+
; POST-INL-NEXT: call void @llvm.instrprof.increment
35+
; POST-INL-NEXT: br label
36+
37+
; POST-INL-LABEL: exit:
38+
; POST-INL-NEXT: %res = phi i32 [ %res.i, %yes ], [ 1, %no ]
39+
40+
; FLATTEN-LABEL: yes:
41+
; FLATTEN: %res.i = select i1 %test.i, i32 %inc.i, i32 %dec.i, !prof ![[SELPROF:[0-9]+]]
42+
; FLATTEN-LABEL: no:
43+
;
44+
; See the profile, in the "yes" case we set the step counter's value, in @bar, to 3. The total
45+
; entry count of that BB is 4.
46+
; ![[SELPROF]] = !{!"branch_weights", i32 3, i32 1}
47+
48+
;--- example.ll
49+
define i32 @foo(i32 %t) !guid !0 {
50+
%test = icmp slt i32 %t, 0
51+
br i1 %test, label %yes, label %no
52+
yes:
53+
%res1 = call i32 @bar(i32 %t) alwaysinline
54+
br label %exit
55+
no:
56+
; this will result in eliding the select in @bar, when inlined.
57+
%res2 = call i32 @bar(i32 0) alwaysinline
58+
br label %exit
59+
exit:
60+
%res = phi i32 [%res1, %yes], [%res2, %no]
61+
ret i32 %res
62+
}
63+
64+
define i32 @bar(i32 %t) !guid !1 {
65+
%inc = add i32 %t, 1
66+
%dec = sub i32 %t, 1
67+
%test = icmp eq i32 %t, 0
68+
%res = select i1 %test, i32 %inc, i32 %dec
69+
ret i32 %res
70+
}
71+
72+
!0 = !{i64 1234}
73+
!1 = !{i64 5678}
74+
75+
;--- profile.json
76+
[{"Guid":1234, "Counters":[10, 4], "Callsites":[[{"Guid": 5678, "Counters":[4,3]}],[{"Guid": 5678, "Counters":[6,6]}]]}]

0 commit comments

Comments
 (0)