Skip to content

Commit a84d7ab

Browse files
committed
[DFAJumpThreading] Add an early exit heuristic for unpredictable values
Right now the algorithm does not exit on unpredictable values. It waits until all the paths have been enumerated to see if any of those paths have that value. Waiting this late leads to a lot of wasteful computation and higher compile time. In this patch I have added a heuristic that checks if the value comes from the same inner loops as the switch, if so, then it is likely that the value will also be seen on a threadable path and the code in `getStateDefMap()` return an empty map. I tested this on the llvm test suite and the only change in the number of threaded switches was in 7zip (before 23, after 18). In all of those cases the current algorithm was partially threading the loop because it was hitting a limit on the number of paths to be explored. On increasing this limit even the current algorithm finds paths where the unpredictable value is seen. Compile time(with pass enabled by default and this patch): https://llvm-compile-time-tracker.com/compare.php?from=8c5e9cf737138aba22a4a8f64ef2c5efc80dd7f9&to=42c75d888058b35c6d15901b34e36251d8f766b9&stat=instructions:u Change-Id: Id6b61a2ce177cdb433c97b7916218a7fc2092d73
1 parent 8c5e9cf commit a84d7ab

File tree

3 files changed

+163
-15
lines changed

3 files changed

+163
-15
lines changed

llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
#include "llvm/Analysis/AssumptionCache.h"
6666
#include "llvm/Analysis/CodeMetrics.h"
6767
#include "llvm/Analysis/DomTreeUpdater.h"
68+
#include "llvm/Analysis/LoopInfo.h"
6869
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
6970
#include "llvm/Analysis/TargetTransformInfo.h"
7071
#include "llvm/IR/CFG.h"
@@ -95,6 +96,11 @@ static cl::opt<bool>
9596
cl::desc("View the CFG before DFA Jump Threading"),
9697
cl::Hidden, cl::init(false));
9798

99+
static cl::opt<bool> EarlyExitHeuristic(
100+
"dfa-early-exit-heuristic",
101+
cl::desc("Exit early if an unpredictable value come from the same loop"),
102+
cl::Hidden, cl::init(true));
103+
98104
static cl::opt<unsigned> MaxPathLength(
99105
"dfa-max-path-length",
100106
cl::desc("Max number of blocks searched to find a threading path"),
@@ -131,9 +137,9 @@ void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
131137

132138
class DFAJumpThreading {
133139
public:
134-
DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT,
140+
DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
135141
TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE)
136-
: AC(AC), DT(DT), TTI(TTI), ORE(ORE) {}
142+
: AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {}
137143

138144
bool run(Function &F);
139145

@@ -161,6 +167,7 @@ class DFAJumpThreading {
161167

162168
AssumptionCache *AC;
163169
DominatorTree *DT;
170+
LoopInfo *LI;
164171
TargetTransformInfo *TTI;
165172
OptimizationRemarkEmitter *ORE;
166173
};
@@ -378,7 +385,8 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) {
378385
#endif
379386

380387
struct MainSwitch {
381-
MainSwitch(SwitchInst *SI, OptimizationRemarkEmitter *ORE) {
388+
MainSwitch(SwitchInst *SI, LoopInfo *LI, OptimizationRemarkEmitter *ORE)
389+
: LI(LI) {
382390
if (isCandidate(SI)) {
383391
Instr = SI;
384392
} else {
@@ -402,7 +410,7 @@ struct MainSwitch {
402410
///
403411
/// Also, collect select instructions to unfold.
404412
bool isCandidate(const SwitchInst *SI) {
405-
std::deque<Value *> Q;
413+
std::deque<std::pair<Value *, BasicBlock *>> Q;
406414
SmallSet<Value *, 16> SeenValues;
407415
SelectInsts.clear();
408416

@@ -411,22 +419,24 @@ struct MainSwitch {
411419
if (!isa<PHINode>(SICond))
412420
return false;
413421

414-
addToQueue(SICond, Q, SeenValues);
422+
addToQueue(SICond, nullptr, Q, SeenValues);
415423

416424
while (!Q.empty()) {
417-
Value *Current = Q.front();
425+
Value *Current = Q.front().first;
426+
BasicBlock *CurrentIncomingBB = Q.front().second;
418427
Q.pop_front();
419428

420429
if (auto *Phi = dyn_cast<PHINode>(Current)) {
421-
for (Value *Incoming : Phi->incoming_values()) {
422-
addToQueue(Incoming, Q, SeenValues);
430+
for (BasicBlock *IncomingBB : Phi->blocks()) {
431+
Value *Incoming = Phi->getIncomingValueForBlock(IncomingBB);
432+
addToQueue(Incoming, IncomingBB, Q, SeenValues);
423433
}
424434
LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n");
425435
} else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) {
426436
if (!isValidSelectInst(SelI))
427437
return false;
428-
addToQueue(SelI->getTrueValue(), Q, SeenValues);
429-
addToQueue(SelI->getFalseValue(), Q, SeenValues);
438+
addToQueue(SelI->getTrueValue(), CurrentIncomingBB, Q, SeenValues);
439+
addToQueue(SelI->getFalseValue(), CurrentIncomingBB, Q, SeenValues);
430440
LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n");
431441
if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back()))
432442
SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse));
@@ -439,18 +449,30 @@ struct MainSwitch {
439449
// initial switch values that can be ignored (they will hit the
440450
// unthreaded switch) but this assumption will get checked later after
441451
// paths have been enumerated (in function getStateDefMap).
452+
453+
// If the unpredictable value comes from the same inner loop it is
454+
// likely that it will also be on the enumerated paths, causing us to
455+
// exit after we have enumerated all the paths. This heuristic save
456+
// compile time because a search for all the paths can become expensive.
457+
if (EarlyExitHeuristic && LI->getLoopFor(SI->getParent()) ==
458+
LI->getLoopFor(CurrentIncomingBB)) {
459+
LLVM_DEBUG(dbgs() << "\tExiting early due to unpredictability heuristic.\n");
460+
return false;
461+
}
462+
442463
continue;
443464
}
444465
}
445466

446467
return true;
447468
}
448469

449-
void addToQueue(Value *Val, std::deque<Value *> &Q,
470+
void addToQueue(Value *Val, BasicBlock *BB,
471+
std::deque<std::pair<Value *, BasicBlock *>> &Q,
450472
SmallSet<Value *, 16> &SeenValues) {
451473
if (SeenValues.contains(Val))
452474
return;
453-
Q.push_back(Val);
475+
Q.push_back({Val, BB});
454476
SeenValues.insert(Val);
455477
}
456478

@@ -488,6 +510,7 @@ struct MainSwitch {
488510
return true;
489511
}
490512

513+
LoopInfo *LI;
491514
SwitchInst *Instr = nullptr;
492515
SmallVector<SelectInstToUnfold, 4> SelectInsts;
493516
};
@@ -1262,7 +1285,7 @@ bool DFAJumpThreading::run(Function &F) {
12621285

12631286
LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName()
12641287
<< " is a candidate\n");
1265-
MainSwitch Switch(SI, ORE);
1288+
MainSwitch Switch(SI, LI, ORE);
12661289

12671290
if (!Switch.getInstr())
12681291
continue;
@@ -1315,10 +1338,11 @@ PreservedAnalyses DFAJumpThreadingPass::run(Function &F,
13151338
FunctionAnalysisManager &AM) {
13161339
AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
13171340
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
1341+
LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
13181342
TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
13191343
OptimizationRemarkEmitter ORE(&F);
13201344

1321-
if (!DFAJumpThreading(&AC, &DT, &TTI, &ORE).run(F))
1345+
if (!DFAJumpThreading(&AC, &DT, &LI, &TTI, &ORE).run(F))
13221346
return PreservedAnalyses::all();
13231347

13241348
PreservedAnalyses PA;

llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -S -passes=dfa-jump-threading %s | FileCheck %s
2+
; RUN: opt -S -passes=dfa-jump-threading -dfa-early-exit-heuristic=false %s | FileCheck %s
33

44
; These tests check if selects are unfolded properly for jump threading
55
; opportunities. There are three different patterns to consider:
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
; REQUIRES: asserts
2+
; RUN: opt -S -passes=dfa-jump-threading %s -debug-only=dfa-jump-threading 2>&1 | FileCheck %s
3+
4+
; CHECK-COUNT-3: Exiting early due to unpredictability heuristic.
5+
6+
@.str.1 = private unnamed_addr constant [3 x i8] c"10\00", align 1
7+
@.str.2 = private unnamed_addr constant [3 x i8] c"30\00", align 1
8+
@.str.3 = private unnamed_addr constant [3 x i8] c"20\00", align 1
9+
@.str.4 = private unnamed_addr constant [3 x i8] c"40\00", align 1
10+
11+
define void @test1(i32 noundef %num, i32 noundef %num2) {
12+
entry:
13+
br label %while.body
14+
15+
while.body: ; preds = %entry, %sw.epilog
16+
%num.addr.0 = phi i32 [ %num, %entry ], [ %num.addr.1, %sw.epilog ]
17+
switch i32 %num.addr.0, label %sw.default [
18+
i32 10, label %sw.bb
19+
i32 30, label %sw.bb1
20+
i32 20, label %sw.bb2
21+
i32 40, label %sw.bb3
22+
]
23+
24+
sw.bb: ; preds = %while.body
25+
%call.i = tail call i32 @bar(ptr noundef nonnull @.str.1)
26+
br label %sw.epilog
27+
28+
sw.bb1: ; preds = %while.body
29+
%call.i4 = tail call i32 @bar(ptr noundef nonnull @.str.2)
30+
br label %sw.epilog
31+
32+
sw.bb2: ; preds = %while.body
33+
%call.i5 = tail call i32 @bar(ptr noundef nonnull @.str.3)
34+
br label %sw.epilog
35+
36+
sw.bb3: ; preds = %while.body
37+
%call.i6 = tail call i32 @bar(ptr noundef nonnull @.str.4)
38+
%call = tail call noundef i32 @foo()
39+
%add = add nsw i32 %call, %num2
40+
br label %sw.epilog
41+
42+
sw.default: ; preds = %while.body
43+
ret void
44+
45+
sw.epilog: ; preds = %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
46+
%num.addr.1 = phi i32 [ %add, %sw.bb3 ], [ 40, %sw.bb2 ], [ 20, %sw.bb1 ], [ 30, %sw.bb ]
47+
br label %while.body
48+
}
49+
50+
51+
define void @test2(i32 noundef %num, i32 noundef %num2) {
52+
entry:
53+
br label %while.body
54+
55+
while.body: ; preds = %entry, %sw.epilog
56+
%num.addr.0 = phi i32 [ %num, %entry ], [ %num.addr.1, %sw.epilog ]
57+
switch i32 %num.addr.0, label %sw.default [
58+
i32 10, label %sw.epilog
59+
i32 30, label %sw.bb1
60+
i32 20, label %sw.bb2
61+
i32 40, label %sw.bb3
62+
]
63+
64+
sw.bb1: ; preds = %while.body
65+
br label %sw.epilog
66+
67+
sw.bb2: ; preds = %while.body
68+
br label %sw.epilog
69+
70+
sw.bb3: ; preds = %while.body
71+
br label %sw.epilog
72+
73+
sw.default: ; preds = %while.body
74+
ret void
75+
76+
sw.epilog: ; preds = %while.body, %sw.bb3, %sw.bb2, %sw.bb1
77+
%.str.4.sink = phi ptr [ @.str.4, %sw.bb3 ], [ @.str.3, %sw.bb2 ], [ @.str.2, %sw.bb1 ], [ @.str.1, %while.body ]
78+
%num.addr.1 = phi i32 [ %num2, %sw.bb3 ], [ 40, %sw.bb2 ], [ 20, %sw.bb1 ], [ 30, %while.body ]
79+
%call.i6 = tail call i32 @bar(ptr noundef nonnull %.str.4.sink)
80+
br label %while.body
81+
}
82+
83+
84+
define void @test3(i32 noundef %num, i32 noundef %num2) {
85+
entry:
86+
%add = add nsw i32 %num2, 40
87+
br label %while.body
88+
89+
while.body: ; preds = %entry, %sw.epilog
90+
%num.addr.0 = phi i32 [ %num, %entry ], [ %num.addr.1, %sw.epilog ]
91+
switch i32 %num.addr.0, label %sw.default [
92+
i32 10, label %sw.bb
93+
i32 30, label %sw.bb1
94+
i32 20, label %sw.bb2
95+
i32 40, label %sw.bb3
96+
]
97+
98+
sw.bb: ; preds = %while.body
99+
%call.i = tail call i32 @bar(ptr noundef nonnull @.str.1)
100+
br label %sw.epilog
101+
102+
sw.bb1: ; preds = %while.body
103+
%call.i5 = tail call i32 @bar(ptr noundef nonnull @.str.2)
104+
br label %sw.epilog
105+
106+
sw.bb2: ; preds = %while.body
107+
%call.i6 = tail call i32 @bar(ptr noundef nonnull @.str.3)
108+
br label %sw.epilog
109+
110+
sw.bb3: ; preds = %while.body
111+
%call.i7 = tail call i32 @bar(ptr noundef nonnull @.str.4)
112+
br label %sw.epilog
113+
114+
sw.default: ; preds = %while.body
115+
ret void
116+
117+
sw.epilog: ; preds = %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
118+
%num.addr.1 = phi i32 [ %add, %sw.bb3 ], [ 40, %sw.bb2 ], [ 20, %sw.bb1 ], [ 30, %sw.bb ]
119+
br label %while.body
120+
}
121+
122+
123+
declare noundef i32 @foo()
124+
declare noundef i32 @bar(ptr nocapture noundef readonly)

0 commit comments

Comments
 (0)