Skip to content

Commit 154eb5a

Browse files
author
Jingyue Wu
committed
Add a speculative execution pass
Summary: This is a pass for speculative execution of instructions for simple if-then (triangle) control flow. It's aimed at GPUs, but could perhaps be used in other contexts. Enabling this pass gives us a 1.0% geomean improvement on Google benchmark suites, with one benchmark improving 33%. Credit goes to Jingyue Wu for writing an earlier version of this pass. Patched by Bjarke Roune. Test Plan: This patch adds a set of tests in test/Transforms/SpeculativeExecution/spec.ll The pass is controlled by a flag which defaults to having the pass not run. Reviewers: eliben, dberlin, meheff, jingyue, hfinkel Reviewed By: jingyue, hfinkel Subscribers: majnemer, jholewinski, llvm-commits Differential Revision: http://reviews.llvm.org/D9360 llvm-svn: 237459
1 parent 1675b4a commit 154eb5a

File tree

10 files changed

+460
-3
lines changed

10 files changed

+460
-3
lines changed

llvm/include/llvm/IR/BasicBlock.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -206,9 +206,19 @@ class BasicBlock : public Value, // Basic blocks are data objects also
206206
return const_cast<BasicBlock*>(this)->getUniquePredecessor();
207207
}
208208

209-
/// Return the successor of this block if it has a unique successor.
210-
/// Otherwise return a null pointer. This method is analogous to
211-
/// getUniquePredeccessor above.
209+
/// \brief Return the successor of this block if it has a single successor.
210+
/// Otherwise return a null pointer.
211+
///
212+
/// This method is analogous to getSinglePredecessor above.
213+
BasicBlock *getSingleSuccessor();
214+
const BasicBlock *getSingleSuccessor() const {
215+
return const_cast<BasicBlock*>(this)->getSingleSuccessor();
216+
}
217+
218+
/// \brief Return the successor of this block if it has a unique successor.
219+
/// Otherwise return a null pointer.
220+
///
221+
/// This method is analogous to getUniquePredecessor above.
212222
BasicBlock *getUniqueSuccessor();
213223
const BasicBlock *getUniqueSuccessor() const {
214224
return const_cast<BasicBlock*>(this)->getUniqueSuccessor();

llvm/include/llvm/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,7 @@ void initializeSinkingPass(PassRegistry&);
256256
void initializeSeparateConstOffsetFromGEPPass(PassRegistry &);
257257
void initializeSlotIndexesPass(PassRegistry&);
258258
void initializeSpillPlacementPass(PassRegistry&);
259+
void initializeSpeculativeExecutionPass(PassRegistry&);
259260
void initializeStackProtectorPass(PassRegistry&);
260261
void initializeStackColoringPass(PassRegistry&);
261262
void initializeStackSlotColoringPass(PassRegistry&);

llvm/include/llvm/LinkAllPasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ namespace {
170170
(void) llvm::createPartiallyInlineLibCallsPass();
171171
(void) llvm::createScalarizerPass();
172172
(void) llvm::createSeparateConstOffsetFromGEPPass();
173+
(void) llvm::createSpeculativeExecutionPass();
173174
(void) llvm::createRewriteSymbolsPass();
174175
(void) llvm::createStraightLineStrengthReducePass();
175176
(void) llvm::createMemDerefPrinter();

llvm/include/llvm/Transforms/Scalar.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,13 @@ FunctionPass *
421421
createSeparateConstOffsetFromGEPPass(const TargetMachine *TM = nullptr,
422422
bool LowerGEP = false);
423423

424+
//===----------------------------------------------------------------------===//
425+
//
426+
// SpeculativeExecution - Aggressively hoist instructions to enable
427+
// speculative execution on targets where branches are expensive.
428+
//
429+
FunctionPass *createSpeculativeExecutionPass();
430+
424431
//===----------------------------------------------------------------------===//
425432
//
426433
// LoadCombine - Combine loads into bigger loads.

llvm/lib/IR/BasicBlock.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,14 @@ BasicBlock *BasicBlock::getUniquePredecessor() {
238238
return PredBB;
239239
}
240240

241+
BasicBlock *BasicBlock::getSingleSuccessor() {
242+
succ_iterator SI = succ_begin(this), E = succ_end(this);
243+
if (SI == E) return nullptr; // no successors
244+
BasicBlock *TheSucc = *SI;
245+
++SI;
246+
return (SI == E) ? TheSucc : nullptr /* multiple successors */;
247+
}
248+
241249
BasicBlock *BasicBlock::getUniqueSuccessor() {
242250
succ_iterator SI = succ_begin(this), E = succ_end(this);
243251
if (SI == E) return NULL; // No successors

llvm/lib/Transforms/IPO/PassManagerBuilder.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ void PassManagerBuilder::populateModulePassManager(
231231
MPM.add(createSROAPass(/*RequiresDomTree*/ false));
232232
else
233233
MPM.add(createScalarReplAggregatesPass(-1, false));
234+
234235
MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
235236
MPM.add(createJumpThreadingPass()); // Thread jumps.
236237
MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals

llvm/lib/Transforms/Scalar/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ add_llvm_library(LLVMScalarOpts
4545
SeparateConstOffsetFromGEP.cpp
4646
SimplifyCFGPass.cpp
4747
Sink.cpp
48+
SpeculativeExecution.cpp
4849
StraightLineStrengthReduce.cpp
4950
StructurizeCFG.cpp
5051
TailRecursionElimination.cpp

llvm/lib/Transforms/Scalar/Scalar.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
7474
initializeSinkingPass(Registry);
7575
initializeTailCallElimPass(Registry);
7676
initializeSeparateConstOffsetFromGEPPass(Registry);
77+
initializeSpeculativeExecutionPass(Registry);
7778
initializeStraightLineStrengthReducePass(Registry);
7879
initializeLoadCombinePass(Registry);
7980
initializePlaceBackedgeSafepointsImplPass(Registry);
Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
//===- SpeculativeExecution.cpp ---------------------------------*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
// This pass hoists instructions to enable speculative execution on
11+
// targets where branches are expensive. This is aimed at GPUs. It
12+
// currently works on simple if-then and if-then-else
13+
// patterns.
14+
//
15+
// Removing branches is not the only motivation for this
16+
// pass. E.g. consider this code and assume that there is no
17+
// addressing mode for multiplying by sizeof(*a):
18+
//
19+
// if (b > 0)
20+
// c = a[i + 1]
21+
// if (d > 0)
22+
// e = a[i + 2]
23+
//
24+
// turns into
25+
//
26+
// p = &a[i + 1];
27+
// if (b > 0)
28+
// c = *p;
29+
// q = &a[i + 2];
30+
// if (d > 0)
31+
// e = *q;
32+
//
33+
// which could later be optimized to
34+
//
35+
// r = &a[i];
36+
// if (b > 0)
37+
// c = r[1];
38+
// if (d > 0)
39+
// e = r[2];
40+
//
41+
// Later passes sink back much of the speculated code that did not enable
42+
// further optimization.
43+
//
44+
//===----------------------------------------------------------------------===//
45+
46+
#include "llvm/ADT/SmallSet.h"
47+
#include "llvm/Analysis/TargetTransformInfo.h"
48+
#include "llvm/Analysis/ValueTracking.h"
49+
#include "llvm/IR/Instructions.h"
50+
#include "llvm/IR/Module.h"
51+
#include "llvm/IR/Operator.h"
52+
#include "llvm/Support/CommandLine.h"
53+
#include "llvm/Support/Debug.h"
54+
55+
using namespace llvm;
56+
57+
#define DEBUG_TYPE "speculative-execution"
58+
59+
// The risk that speculation will not pay off increases with the
60+
// number of instructions speculated, so we put a limit on that.
61+
static cl::opt<unsigned> SpecExecMaxSpeculationCost(
62+
"spec-exec-max-speculation-cost", cl::init(7), cl::Hidden,
63+
cl::desc("Speculative execution is not applied to basic blocks where "
64+
"the cost of the instructions to speculatively execute "
65+
"exceeds this limit."));
66+
67+
// Speculating just a few instructions from a larger block tends not
68+
// to be profitable and this limit prevents that. A reason for that is
69+
// that small basic blocks are more likely to be candidates for
70+
// further optimization.
71+
static cl::opt<unsigned> SpecExecMaxNotHoisted(
72+
"spec-exec-max-not-hoisted", cl::init(5), cl::Hidden,
73+
cl::desc("Speculative execution is not applied to basic blocks where the "
74+
"number of instructions that would not be speculatively executed "
75+
"exceeds this limit."));
76+
77+
class SpeculativeExecution : public FunctionPass {
78+
public:
79+
static char ID;
80+
SpeculativeExecution(): FunctionPass(ID) {}
81+
82+
void getAnalysisUsage(AnalysisUsage &AU) const override;
83+
bool runOnFunction(Function &F) override;
84+
85+
private:
86+
bool runOnBasicBlock(BasicBlock &B);
87+
bool considerHoistingFromTo(BasicBlock &FromBlock, BasicBlock &ToBlock);
88+
89+
const TargetTransformInfo *TTI = nullptr;
90+
};
91+
92+
char SpeculativeExecution::ID = 0;
93+
INITIALIZE_PASS_BEGIN(SpeculativeExecution, "speculative-execution",
94+
"Speculatively execute instructions", false, false)
95+
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
96+
INITIALIZE_PASS_END(SpeculativeExecution, "speculative-execution",
97+
"Speculatively execute instructions", false, false)
98+
99+
void SpeculativeExecution::getAnalysisUsage(AnalysisUsage &AU) const {
100+
AU.addRequired<TargetTransformInfoWrapperPass>();
101+
}
102+
103+
bool SpeculativeExecution::runOnFunction(Function &F) {
104+
if (skipOptnoneFunction(F))
105+
return false;
106+
107+
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
108+
109+
bool Changed = false;
110+
for (auto& B : F) {
111+
Changed |= runOnBasicBlock(B);
112+
}
113+
return Changed;
114+
}
115+
116+
bool SpeculativeExecution::runOnBasicBlock(BasicBlock &B) {
117+
BranchInst *BI = dyn_cast<BranchInst>(B.getTerminator());
118+
if (BI == nullptr)
119+
return false;
120+
121+
if (BI->getNumSuccessors() != 2)
122+
return false;
123+
BasicBlock &Succ0 = *BI->getSuccessor(0);
124+
BasicBlock &Succ1 = *BI->getSuccessor(1);
125+
126+
if (&B == &Succ0 || &B == &Succ1 || &Succ0 == &Succ1) {
127+
return false;
128+
}
129+
130+
// Hoist from if-then (triangle).
131+
if (Succ0.getSinglePredecessor() != nullptr &&
132+
Succ0.getSingleSuccessor() == &Succ1) {
133+
return considerHoistingFromTo(Succ0, B);
134+
}
135+
136+
// Hoist from if-else (triangle).
137+
if (Succ1.getSinglePredecessor() != nullptr &&
138+
Succ1.getSingleSuccessor() == &Succ0) {
139+
return considerHoistingFromTo(Succ1, B);
140+
}
141+
142+
// Hoist from if-then-else (diamond), but only if it is equivalent to
143+
// an if-else or if-then due to one of the branches doing nothing.
144+
if (Succ0.getSinglePredecessor() != nullptr &&
145+
Succ1.getSinglePredecessor() != nullptr &&
146+
Succ1.getSingleSuccessor() != nullptr &&
147+
Succ1.getSingleSuccessor() != &B &&
148+
Succ1.getSingleSuccessor() == Succ0.getSingleSuccessor()) {
149+
// If a block has only one instruction, then that is a terminator
150+
// instruction so that the block does nothing. This does happen.
151+
if (Succ1.size() == 1) // equivalent to if-then
152+
return considerHoistingFromTo(Succ0, B);
153+
if (Succ0.size() == 1) // equivalent to if-else
154+
return considerHoistingFromTo(Succ1, B);
155+
}
156+
157+
return false;
158+
}
159+
160+
static unsigned ComputeSpeculationCost(const Instruction *I,
161+
const TargetTransformInfo &TTI) {
162+
switch (Operator::getOpcode(I)) {
163+
case Instruction::GetElementPtr:
164+
case Instruction::Add:
165+
case Instruction::Mul:
166+
case Instruction::And:
167+
case Instruction::Or:
168+
case Instruction::Select:
169+
case Instruction::Shl:
170+
case Instruction::Sub:
171+
case Instruction::LShr:
172+
case Instruction::AShr:
173+
case Instruction::Xor:
174+
case Instruction::ZExt:
175+
case Instruction::SExt:
176+
return TTI.getUserCost(I);
177+
178+
default:
179+
return UINT_MAX; // Disallow anything not whitelisted.
180+
}
181+
}
182+
183+
bool SpeculativeExecution::considerHoistingFromTo(BasicBlock &FromBlock,
184+
BasicBlock &ToBlock) {
185+
SmallSet<const Instruction *, 8> NotHoisted;
186+
const auto AllPrecedingUsesFromBlockHoisted = [&NotHoisted](User *U) {
187+
for (Value* V : U->operand_values()) {
188+
if (Instruction *I = dyn_cast<Instruction>(V)) {
189+
if (NotHoisted.count(I) > 0)
190+
return false;
191+
}
192+
}
193+
return true;
194+
};
195+
196+
unsigned TotalSpeculationCost = 0;
197+
for (auto& I : FromBlock) {
198+
const unsigned Cost = ComputeSpeculationCost(&I, *TTI);
199+
if (Cost != UINT_MAX && isSafeToSpeculativelyExecute(&I) &&
200+
AllPrecedingUsesFromBlockHoisted(&I)) {
201+
TotalSpeculationCost += Cost;
202+
if (TotalSpeculationCost > SpecExecMaxSpeculationCost)
203+
return false; // too much to hoist
204+
} else {
205+
NotHoisted.insert(&I);
206+
if (NotHoisted.size() > SpecExecMaxNotHoisted)
207+
return false; // too much left behind
208+
}
209+
}
210+
211+
if (TotalSpeculationCost == 0)
212+
return false; // nothing to hoist
213+
214+
for (auto I = FromBlock.begin(); I != FromBlock.end();) {
215+
// We have to increment I before moving Current as moving Current
216+
// changes the list that I is iterating through.
217+
auto Current = I;
218+
++I;
219+
if (!NotHoisted.count(Current)) {
220+
Current->moveBefore(ToBlock.getTerminator());
221+
}
222+
}
223+
return true;
224+
}
225+
226+
namespace llvm {
227+
228+
FunctionPass *createSpeculativeExecutionPass() {
229+
return new SpeculativeExecution();
230+
}
231+
232+
} // namespace llvm

0 commit comments

Comments
 (0)