Skip to content

Commit f7fe1e8

Browse files
committed
[Instrumentation] Fix EdgeCounts vector size in SetBranchWeights
SetBranchWeights() calculates the size of the EdgeCounts vector using OutEdges.Size(), but this is an under-estimate with coroutines. Use the number of successors, as the vector will be indexed by the result of the GetSuccessorNumber() function. Rename the Size local, to make it clear what it refers to. A unit test, provided by @ellishg, is included. Fixes #97962 (regression from ffd337b)
1 parent 79658d6 commit f7fe1e8

File tree

2 files changed

+52
-4
lines changed

2 files changed

+52
-4
lines changed

llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1628,11 +1628,17 @@ void PGOUseFunc::setBranchWeights() {
16281628
continue;
16291629

16301630
// We have a non-zero Branch BB.
1631-
unsigned Size = BBCountInfo.OutEdges.size();
1632-
SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
1631+
1632+
// SuccessorCount can be greater than OutEdgesCount, because
1633+
// removed edges don't appear in OutEdges.
1634+
unsigned OutEdgesCount = BBCountInfo.OutEdges.size();
1635+
unsigned SuccessorCount = BB.getTerminator()->getNumSuccessors();
1636+
assert(OutEdgesCount <= SuccessorCount);
1637+
1638+
SmallVector<uint64_t, 2> EdgeCounts(SuccessorCount, 0);
16331639
uint64_t MaxCount = 0;
1634-
for (unsigned s = 0; s < Size; s++) {
1635-
const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1640+
for (unsigned It = 0; It < OutEdgesCount; It++) {
1641+
const PGOUseEdge *E = BBCountInfo.OutEdges[It];
16361642
const BasicBlock *SrcBB = E->SrcBB;
16371643
const BasicBlock *DestBB = E->DestBB;
16381644
if (DestBB == nullptr)
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
; RUN: rm -rf %t && split-file %s %t
2+
3+
; RUN: llvm-profdata merge %t/a.proftext -o %t/a.profdata
4+
; RUN: opt < %t/a.ll --passes=pgo-instr-use -pgo-test-profile-file=%t/a.profdata
5+
6+
;--- a.ll
7+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
8+
target triple = "x86_64-redhat-linux-gnu"
9+
10+
define void @_bar() presplitcoroutine personality ptr null {
11+
%1 = call token @llvm.coro.save(ptr null)
12+
%2 = call i8 @llvm.coro.suspend(token none, i1 false)
13+
switch i8 %2, label %5 [
14+
i8 0, label %3
15+
i8 1, label %4
16+
]
17+
18+
3: ; preds = %0
19+
ret void
20+
21+
4: ; preds = %0
22+
ret void
23+
24+
5: ; preds = %0
25+
ret void
26+
}
27+
28+
declare token @llvm.coro.save(ptr)
29+
30+
declare i8 @llvm.coro.suspend(token, i1)
31+
32+
;--- a.proftext
33+
# IR level Instrumentation Flag
34+
:ir
35+
36+
_bar
37+
# Func Hash:
38+
1063705160175073211
39+
# Num Counters:
40+
2
41+
1
42+
0

0 commit comments

Comments
 (0)