Skip to content

Commit f756f06

Browse files
committed
[SimpleLoopUnswitch] Skip non-trivial unswitching of cold loops
With profile data, non-trivial LoopUnswitch will only apply on non-cold loops, as unswitching cold loops may not gain much benefit but significantly increase the code size. Reviewed By: aeubanks, asbirlea Differential Revision: https://reviews.llvm.org/D129599
1 parent 6c52f82 commit f756f06

11 files changed

+38
-27
lines changed

llvm/lib/Passes/PassBuilder.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1399,8 +1399,10 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
13991399
return Err;
14001400
// Add the nested pass manager with the appropriate adaptor.
14011401
bool UseMemorySSA = (Name == "loop-mssa");
1402-
bool UseBFI = llvm::any_of(
1403-
InnerPipeline, [](auto Pipeline) { return Pipeline.Name == "licm"; });
1402+
bool UseBFI = llvm::any_of(InnerPipeline, [](auto Pipeline) {
1403+
return Pipeline.Name.contains("licm") ||
1404+
Pipeline.Name.contains("simple-loop-unswitch");
1405+
});
14041406
bool UseBPI = llvm::any_of(InnerPipeline, [](auto Pipeline) {
14051407
return Pipeline.Name == "loop-predication";
14061408
});

llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "llvm/ADT/Statistic.h"
1717
#include "llvm/ADT/Twine.h"
1818
#include "llvm/Analysis/AssumptionCache.h"
19+
#include "llvm/Analysis/BlockFrequencyInfo.h"
1920
#include "llvm/Analysis/CFG.h"
2021
#include "llvm/Analysis/CodeMetrics.h"
2122
#include "llvm/Analysis/GuardUtils.h"
@@ -26,6 +27,7 @@
2627
#include "llvm/Analysis/MemorySSA.h"
2728
#include "llvm/Analysis/MemorySSAUpdater.h"
2829
#include "llvm/Analysis/MustExecute.h"
30+
#include "llvm/Analysis/ProfileSummaryInfo.h"
2931
#include "llvm/Analysis/ScalarEvolution.h"
3032
#include "llvm/Analysis/TargetTransformInfo.h"
3133
#include "llvm/Analysis/ValueTracking.h"
@@ -3044,6 +3046,7 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
30443046
bool NonTrivial,
30453047
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
30463048
ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
3049+
ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
30473050
function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
30483051
assert(L.isRecursivelyLCSSAForm(DT, LI) &&
30493052
"Loops must be in LCSSA form before unswitching.");
@@ -3080,6 +3083,14 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
30803083
if (L.getHeader()->getParent()->hasOptSize())
30813084
return false;
30823085

3086+
// Skip cold loops, as unswitching them brings little benefit
3087+
// but increases the code size
3088+
if (PSI && PSI->hasProfileSummary() && BFI &&
3089+
PSI->isColdBlock(L.getHeader(), BFI)) {
3090+
LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
3091+
return false;
3092+
}
3093+
30833094
// Skip non-trivial unswitching for loops that cannot be cloned.
30843095
if (!L.isSafeToClone())
30853096
return false;
@@ -3105,7 +3116,11 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
31053116
LPMUpdater &U) {
31063117
Function &F = *L.getHeader()->getParent();
31073118
(void)F;
3108-
3119+
ProfileSummaryInfo *PSI = nullptr;
3120+
if (auto OuterProxy =
3121+
AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR)
3122+
.getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
3123+
PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
31093124
LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
31103125
<< "\n");
31113126

@@ -3152,7 +3167,7 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
31523167
}
31533168
if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
31543169
UnswitchCB, &AR.SE, MSSAU ? MSSAU.getPointer() : nullptr,
3155-
DestroyLoopCB))
3170+
PSI, AR.BFI, DestroyLoopCB))
31563171
return PreservedAnalyses::all();
31573172

31583173
if (AR.MSSA && VerifyMemorySSA)
@@ -3214,7 +3229,6 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
32143229

32153230
LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L
32163231
<< "\n");
3217-
32183232
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
32193233
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
32203234
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
@@ -3251,9 +3265,9 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
32513265

32523266
if (VerifyMemorySSA)
32533267
MSSA->verifyMemorySSA();
3254-
3255-
bool Changed = unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial,
3256-
UnswitchCB, SE, &MSSAU, DestroyLoopCB);
3268+
bool Changed =
3269+
unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE,
3270+
&MSSAU, nullptr, nullptr, DestroyLoopCB);
32573271

32583272
if (VerifyMemorySSA)
32593273
MSSA->verifyMemorySSA();

llvm/test/Other/new-pm-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@
174174
; CHECK-O-NEXT: Running pass: LoopRotatePass
175175
; CHECK-O-NEXT: Running pass: LICM
176176
; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
177+
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
177178
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
178179
; CHECK-O-NEXT: Running pass: InstCombinePass
179180
; CHECK-O-NEXT: Running pass: LoopSimplifyPass

llvm/test/Other/new-pm-thinlto-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@
137137
; CHECK-O-NEXT: Running pass: LoopRotatePass
138138
; CHECK-O-NEXT: Running pass: LICM
139139
; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
140+
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
140141
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
141142
; CHECK-O-NEXT: Running pass: InstCombinePass
142143
; CHECK-O-NEXT: Running pass: LoopSimplifyPass

llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@
110110
; CHECK-O-NEXT: Running pass: LoopRotatePass
111111
; CHECK-O-NEXT: Running pass: LICM
112112
; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
113+
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
113114
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
114115
; CHECK-O-NEXT: Running pass: InstCombinePass
115116
; CHECK-O-NEXT: Running pass: LoopSimplifyPass

llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@
119119
; CHECK-O-NEXT: Running pass: LoopRotatePass
120120
; CHECK-O-NEXT: Running pass: LICM
121121
; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
122+
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
122123
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
123124
; CHECK-O-NEXT: Running pass: InstCombinePass
124125
; CHECK-O-NEXT: Running pass: LoopSimplifyPass

llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@
148148
; CHECK-O-NEXT: Running pass: LoopRotatePass
149149
; CHECK-O-NEXT: Running pass: LICM
150150
; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
151+
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
151152
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
152153
; CHECK-O-NEXT: Running pass: InstCombinePass
153154
; CHECK-O-NEXT: Running pass: LoopSimplifyPass

llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@
114114
; CHECK-O-NEXT: Running pass: LoopRotatePass
115115
; CHECK-O-NEXT: Running pass: LICM
116116
; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass
117+
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
117118
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
118119
; CHECK-O-NEXT: Running pass: InstCombinePass
119120
; CHECK-O-NEXT: Running pass: LoopSimplifyPass

llvm/test/Transforms/LoopPredication/preserve-bpi.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ declare void @llvm.experimental.guard(i1, ...)
1010
; CHECK: Running pass: LoopPredicationPass on Loop at depth 1
1111
; CHECK-NEXT: Running pass: LICMPass on Loop at depth 1
1212
; CHECK-NEXT: Running pass: SimpleLoopUnswitchPass on Loop at depth 1
13+
; CHECK-NEXT: Running analysis: OuterAnalysisManagerProxy
1314
; CHECK-NEXT: Running pass: LoopPredicationPass on Loop at depth 1
1415
; CHECK-NEXT: Running pass: LICMPass on Loop at depth 1
1516
; CHECK-NEXT: Running pass: SimpleLoopUnswitchPass on Loop at depth 1

llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -46,31 +46,18 @@ define void @f1(i32 %i, i1 %cond, i1 %hot_cond, i1 %cold_cond, i1* %ptr) !prof !
4646
; CHECK: entry_cold_loop:
4747
; CHECK-NEXT: br i1 [[COLD_COND:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER:%.*]], label [[COLD_LOOP_EXIT:%.*]], !prof [[PROF16:![0-9]+]]
4848
; CHECK: cold_loop_begin.preheader:
49-
; CHECK-NEXT: br i1 [[COND]], label [[COLD_LOOP_BEGIN_PREHEADER_SPLIT_US:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER_SPLIT:%.*]]
50-
; CHECK: cold_loop_begin.preheader.split.us:
51-
; CHECK-NEXT: br label [[COLD_LOOP_BEGIN_US:%.*]]
52-
; CHECK: cold_loop_begin.us:
53-
; CHECK-NEXT: br label [[COLD_LOOP_A_US:%.*]]
54-
; CHECK: cold_loop_a.us:
55-
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @a()
56-
; CHECK-NEXT: br label [[COLD_LOOP_LATCH_US:%.*]]
57-
; CHECK: cold_loop_latch.us:
58-
; CHECK-NEXT: [[V2_US:%.*]] = load i1, i1* [[PTR]], align 1
59-
; CHECK-NEXT: br i1 [[V2_US]], label [[COLD_LOOP_BEGIN_US]], label [[COLD_LOOP_EXIT_LOOPEXIT_SPLIT_US:%.*]]
60-
; CHECK: cold_loop_exit.loopexit.split.us:
61-
; CHECK-NEXT: br label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]]
62-
; CHECK: cold_loop_begin.preheader.split:
6349
; CHECK-NEXT: br label [[COLD_LOOP_BEGIN:%.*]]
6450
; CHECK: cold_loop_begin:
65-
; CHECK-NEXT: br label [[COLD_LOOP_B:%.*]]
51+
; CHECK-NEXT: br i1 [[COND]], label [[COLD_LOOP_A:%.*]], label [[COLD_LOOP_B:%.*]]
52+
; CHECK: cold_loop_a:
53+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @a()
54+
; CHECK-NEXT: br label [[COLD_LOOP_LATCH:%.*]]
6655
; CHECK: cold_loop_b:
6756
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @b()
68-
; CHECK-NEXT: br label [[COLD_LOOP_LATCH:%.*]]
57+
; CHECK-NEXT: br label [[COLD_LOOP_LATCH]]
6958
; CHECK: cold_loop_latch:
7059
; CHECK-NEXT: [[V2:%.*]] = load i1, i1* [[PTR]], align 1
71-
; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT_SPLIT:%.*]]
72-
; CHECK: cold_loop_exit.loopexit.split:
73-
; CHECK-NEXT: br label [[COLD_LOOP_EXIT_LOOPEXIT]]
60+
; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]]
7461
; CHECK: cold_loop_exit.loopexit:
7562
; CHECK-NEXT: br label [[COLD_LOOP_EXIT]]
7663
; CHECK: cold_loop_exit:

0 commit comments

Comments
 (0)