Skip to content

Commit a8ffa66

Browse files
committed
[CodeGen][NPM] Support CodeGenSCCOrder in pipeline
pb/codegenscc-order
1 parent f541a3a commit a8ffa66

File tree

3 files changed

+219
-16
lines changed

3 files changed

+219
-16
lines changed

llvm/include/llvm/Passes/CodeGenPassBuilder.h

Lines changed: 73 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "llvm/ADT/StringRef.h"
1919
#include "llvm/Analysis/AliasAnalysis.h"
2020
#include "llvm/Analysis/BasicAliasAnalysis.h"
21+
#include "llvm/Analysis/CGSCCPassManager.h"
2122
#include "llvm/Analysis/ProfileSummaryInfo.h"
2223
#include "llvm/Analysis/ScopedNoAliasAA.h"
2324
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -207,10 +208,7 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
207208
class AddIRPass {
208209
public:
209210
AddIRPass(ModulePassManager &MPM, const DerivedT &PB) : MPM(MPM), PB(PB) {}
210-
~AddIRPass() {
211-
if (!FPM.isEmpty())
212-
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
213-
}
211+
~AddIRPass() { flushFPMToMPM(); }
214212

215213
template <typename PassT>
216214
void operator()(PassT &&Pass, StringRef Name = PassT::name()) {
@@ -228,16 +226,40 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
228226
FPM.addPass(std::forward<PassT>(Pass));
229227
} else {
230228
// Add Module Pass
231-
if (!FPM.isEmpty()) {
232-
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
233-
FPM = FunctionPassManager();
234-
}
235-
229+
flushFPMToMPM();
236230
MPM.addPass(std::forward<PassT>(Pass));
237231
}
238232
}
239233

234+
/// Setting this will add passes to the CGSCC pass manager.
235+
void requireCGSCCOrder() {
236+
if (PB.AddInCGSCCOrder)
237+
return;
238+
flushFPMToMPM();
239+
PB.AddInCGSCCOrder = true;
240+
}
241+
242+
/// Stop adding passes to the CGSCC pass manager.
243+
/// Existing passes won't be removed.
244+
void stopAddingInCGSCCOrder() {
245+
if (!PB.AddInCGSCCOrder)
246+
return;
247+
flushFPMToMPM();
248+
PB.AddInCGSCCOrder = false;
249+
}
250+
240251
private:
252+
void flushFPMToMPM() {
253+
if (!FPM.isEmpty()) {
254+
if (PB.AddInCGSCCOrder) {
255+
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
256+
createCGSCCToFunctionPassAdaptor(std::move(FPM))));
257+
} else {
258+
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
259+
}
260+
FPM = FunctionPassManager();
261+
}
262+
}
241263
ModulePassManager &MPM;
242264
FunctionPassManager FPM;
243265
const DerivedT &PB;
@@ -254,7 +276,11 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
254276
FPM.addPass(
255277
createFunctionToMachineFunctionPassAdaptor(std::move(MFPM)));
256278
FPM.addPass(InvalidateAnalysisPass<MachineFunctionAnalysis>());
257-
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
279+
if (this->PB.AddInCGSCCOrder) {
280+
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
281+
createCGSCCToFunctionPassAdaptor(std::move(FPM))));
282+
} else
283+
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
258284
}
259285
}
260286

@@ -273,20 +299,47 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
273299
MFPM.addPass(std::forward<PassT>(Pass));
274300
} else {
275301
// Add Module Pass
276-
if (!MFPM.isEmpty()) {
277-
MPM.addPass(createModuleToFunctionPassAdaptor(
278-
createFunctionToMachineFunctionPassAdaptor(std::move(MFPM))));
279-
MFPM = MachineFunctionPassManager();
280-
}
281-
302+
flushMFPMToMPM();
282303
MPM.addPass(std::forward<PassT>(Pass));
283304
}
284305

285306
for (auto &C : PB.AfterCallbacks)
286307
C(Name, MFPM);
287308
}
288309

310+
/// Setting this will add passes to the CGSCC pass manager.
311+
void requireCGSCCOrder() {
312+
if (PB.AddInCGSCCOrder)
313+
return;
314+
flushMFPMToMPM();
315+
PB.AddInCGSCCOrder = true;
316+
}
317+
318+
/// Stop adding passes to the CGSCC pass manager.
319+
/// Existing passes won't be removed.
320+
void stopAddingInCGSCCOrder() {
321+
if (!PB.AddInCGSCCOrder)
322+
return;
323+
flushMFPMToMPM();
324+
PB.AddInCGSCCOrder = false;
325+
}
326+
289327
private:
328+
void flushMFPMToMPM() {
329+
if (!MFPM.isEmpty()) {
330+
if (PB.AddInCGSCCOrder) {
331+
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
332+
createCGSCCToFunctionPassAdaptor(
333+
createFunctionToMachineFunctionPassAdaptor(
334+
std::move(MFPM)))));
335+
} else {
336+
MPM.addPass(createModuleToFunctionPassAdaptor(
337+
createFunctionToMachineFunctionPassAdaptor(std::move(MFPM))));
338+
}
339+
MFPM = MachineFunctionPassManager();
340+
}
341+
}
342+
290343
ModulePassManager &MPM;
291344
MachineFunctionPassManager MFPM;
292345
const DerivedT &PB;
@@ -552,6 +605,7 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
552605
/// Helper variable for `-start-before/-start-after/-stop-before/-stop-after`
553606
mutable bool Started = true;
554607
mutable bool Stopped = true;
608+
mutable bool AddInCGSCCOrder = false;
555609
};
556610

557611
template <typename Derived, typename TargetMachineT>
@@ -810,6 +864,9 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addISelPrepare(
810864
AddIRPass &addPass) const {
811865
derived().addPreISel(addPass);
812866

867+
if (Opt.RequiresCodeGenSCCOrder)
868+
addPass.requireCGSCCOrder();
869+
813870
addPass(CallBrPreparePass());
814871
// Add both the safe stack and the stack protection passes: each of them will
815872
// only protect functions that have corresponding attributes.

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2067,6 +2067,8 @@ void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const {
20672067
// being run on them, which causes crashes in the resource usage analysis).
20682068
addPass(AMDGPULowerBufferFatPointersPass(TM));
20692069

2070+
addPass.requireCGSCCOrder();
2071+
20702072
Base::addCodeGenPrepare(addPass);
20712073

20722074
if (isPassEnabled(EnableLoadStoreVectorizer))
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
; UNSUPPORTED: expensive_checks
2+
; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -disable-verify -print-pipeline-passes < %s 2>&1 \
3+
; RUN: | tr ',' '\n' | FileCheck -check-prefix=GCN-O3 %s
4+
5+
; REQUIRES: asserts
6+
7+
; GCN-O3: require<MachineModuleAnalysis>
8+
; GCN-O3-NEXT: require<profile-summary>
9+
; GCN-O3-NEXT: require<collector-metadata>
10+
; GCN-O3-NEXT: pre-isel-intrinsic-lowering
11+
; GCN-O3-NEXT: function(expand-large-div-rem
12+
; GCN-O3-NEXT: expand-fp)
13+
; GCN-O3-NEXT: amdgpu-remove-incompatible-functions
14+
; GCN-O3-NEXT: amdgpu-printf-runtime-binding
15+
; GCN-O3-NEXT: amdgpu-lower-ctor-dtor
16+
; GCN-O3-NEXT: function(amdgpu-image-intrinsic-opt)
17+
; GCN-O3-NEXT: expand-variadics
18+
; GCN-O3-NEXT: amdgpu-always-inline
19+
; GCN-O3-NEXT: always-inline
20+
; GCN-O3-NEXT: amdgpu-export-kernel-runtime-handles
21+
; GCN-O3-NEXT: amdgpu-sw-lower-lds
22+
; GCN-O3-NEXT: amdgpu-lower-module-lds
23+
; GCN-O3-NEXT: function(infer-address-spaces
24+
; GCN-O3-NEXT: amdgpu-atomic-optimizer
25+
; GCN-O3-NEXT: atomic-expand
26+
; GCN-O3-NEXT: amdgpu-promote-alloca
27+
; GCN-O3-NEXT: separate-const-offset-from-gep<>
28+
; GCN-O3-NEXT: slsr
29+
; GCN-O3-NEXT: gvn<>
30+
; GCN-O3-NEXT: nary-reassociate
31+
; GCN-O3-NEXT: early-cse<>
32+
; GCN-O3-NEXT: amdgpu-codegenprepare
33+
; GCN-O3-NEXT: loop-mssa(loop-reduce)
34+
; GCN-O3-NEXT: mergeicmps
35+
; GCN-O3-NEXT: expand-memcmp
36+
; GCN-O3-NEXT: gc-lowering
37+
; GCN-O3-NEXT: lower-constant-intrinsics
38+
; GCN-O3-NEXT: UnreachableBlockElimPass
39+
; GCN-O3-NEXT: consthoist
40+
; GCN-O3-NEXT: ReplaceWithVeclib
41+
; GCN-O3-NEXT: partially-inline-libcalls
42+
; GCN-O3-NEXT: ee-instrument<post-inline>
43+
; GCN-O3-NEXT: scalarize-masked-mem-intrin
44+
; GCN-O3-NEXT: ExpandReductionsPass
45+
; GCN-O3-NEXT: gvn<>
46+
; GCN-O3-NEXT: amdgpu-lower-kernel-arguments)
47+
; GCN-O3-NEXT: amdgpu-lower-buffer-fat-pointers
48+
; GCN-O3-NEXT: cgscc(function(codegenprepare
49+
; GCN-O3-NEXT: load-store-vectorizer
50+
; GCN-O3-NEXT: lower-switch
51+
; GCN-O3-NEXT: lower-invoke
52+
; GCN-O3-NEXT: UnreachableBlockElimPass
53+
; GCN-O3-NEXT: flatten-cfg
54+
; GCN-O3-NEXT: sink
55+
; GCN-O3-NEXT: amdgpu-late-codegenprepare
56+
; GCN-O3-NEXT: amdgpu-unify-divergent-exit-nodes
57+
; GCN-O3-NEXT: fix-irreducible
58+
; GCN-O3-NEXT: unify-loop-exits
59+
; GCN-O3-NEXT: StructurizeCFGPass
60+
; GCN-O3-NEXT: amdgpu-annotate-uniform
61+
; GCN-O3-NEXT: si-annotate-control-flow
62+
; GCN-O3-NEXT: amdgpu-rewrite-undef-for-phi
63+
; GCN-O3-NEXT: lcssa))
64+
; GCN-O3-NEXT: amdgpu-perf-hint
65+
; GCN-O3-NEXT: cgscc(function(require<uniformity>
66+
; GCN-O3-NEXT: callbr-prepare
67+
; GCN-O3-NEXT: safe-stack
68+
; GCN-O3-NEXT: stack-protector))
69+
; GCN-O3-NEXT: cgscc(function(machine-function(amdgpu-isel
70+
; GCN-O3-NEXT: si-fix-sgpr-copies
71+
; GCN-O3-NEXT: si-i1-copies
72+
; GCN-O3-NEXT: finalize-isel
73+
; GCN-O3-NEXT: early-tailduplication
74+
; GCN-O3-NEXT: opt-phis
75+
; GCN-O3-NEXT: stack-coloring
76+
; GCN-O3-NEXT: localstackalloc
77+
; GCN-O3-NEXT: dead-mi-elimination
78+
; GCN-O3-NEXT: early-machinelicm
79+
; GCN-O3-NEXT: machine-cse
80+
; GCN-O3-NEXT: machine-sink
81+
; GCN-O3-NEXT: peephole-opt
82+
; GCN-O3-NEXT: dead-mi-elimination
83+
; GCN-O3-NEXT: si-fold-operands
84+
; GCN-O3-NEXT: gcn-dpp-combine
85+
; GCN-O3-NEXT: si-load-store-opt
86+
; GCN-O3-NEXT: si-peephole-sdwa
87+
; GCN-O3-NEXT: early-machinelicm
88+
; GCN-O3-NEXT: machine-cse
89+
; GCN-O3-NEXT: si-fold-operands
90+
; GCN-O3-NEXT: dead-mi-elimination
91+
; GCN-O3-NEXT: si-shrink-instructions
92+
; GCN-O3-NEXT: detect-dead-lanes
93+
; GCN-O3-NEXT: InitUndefPass
94+
; GCN-O3-NEXT: ProcessImplicitDefsPass
95+
; GCN-O3-NEXT: unreachable-mbb-elimination
96+
; GCN-O3-NEXT: require<live-vars>
97+
; GCN-O3-NEXT: require<machine-loops>
98+
; GCN-O3-NEXT: phi-node-elimination
99+
; GCN-O3-NEXT: two-address-instruction
100+
; GCN-O3-NEXT: register-coalescer
101+
; GCN-O3-NEXT: rename-independent-subregs
102+
; GCN-O3-NEXT: machine-scheduler
103+
; GCN-O3-NEXT: greedy<all>
104+
; GCN-O3-NEXT: amdgpu-nsa-reassign
105+
; GCN-O3-NEXT: VirtRegRewriterPass
106+
; GCN-O3-NEXT: stack-slot-coloring
107+
; GCN-O3-NEXT: machine-cp
108+
; GCN-O3-NEXT: machinelicm
109+
; GCN-O3-NEXT: si-fix-vgpr-copies
110+
; GCN-O3-NEXT: si-optimize-exec-masking
111+
; GCN-O3-NEXT: remove-redundant-debug-values
112+
; GCN-O3-NEXT: fixup-statepoint-caller-saved
113+
; GCN-O3-NEXT: PostRAMachineSinkingPass
114+
; GCN-O3-NEXT: ShrinkWrapPass
115+
; GCN-O3-NEXT: PrologEpilogInserterPass
116+
; GCN-O3-NEXT: branch-folder
117+
; GCN-O3-NEXT: tailduplication
118+
; GCN-O3-NEXT: machine-latecleanup
119+
; GCN-O3-NEXT: machine-cp
120+
; GCN-O3-NEXT: post-ra-pseudos
121+
; GCN-O3-NEXT: postmisched
122+
; GCN-O3-NEXT: block-placement
123+
; GCN-O3-NEXT: fentry-insert
124+
; GCN-O3-NEXT: xray-instrumentation
125+
; GCN-O3-NEXT: patchable-function
126+
; GCN-O3-NEXT: gcn-create-vopd
127+
; GCN-O3-NEXT: si-memory-legalizer
128+
; GCN-O3-NEXT: si-insert-waitcnts
129+
; GCN-O3-NEXT: si-late-branch-lowering
130+
; GCN-O3-NEXT: si-pre-emit-peephole
131+
; GCN-O3-NEXT: post-RA-hazard-rec
132+
; GCN-O3-NEXT: AMDGPUWaitSGPRHazardsPass
133+
; GCN-O3-NEXT: amdgpu-insert-delay-alu
134+
; GCN-O3-NEXT: branch-relaxation
135+
; GCN-O3-NEXT: remove-loads-into-fake-uses
136+
; GCN-O3-NEXT: live-debug-values
137+
; GCN-O3-NEXT: machine-sanmd
138+
; GCN-O3-NEXT: stack-frame-layout)
139+
; GCN-O3-NEXT: invalidate<machine-function-info>))
140+
141+
142+
define void @empty() {
143+
ret void
144+
}

0 commit comments

Comments
 (0)