Skip to content

Commit 06a3abd

Browse files
authored
[AMDGPU][NewPM] Port "SIFormMemoryClauses" to NPM (#127181)
1 parent c3cae9d commit 06a3abd

10 files changed

+92
-46
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ FunctionPass *createLowerWWMCopiesPass();
5050
FunctionPass *createSIMemoryLegalizerPass();
5151
FunctionPass *createSIInsertWaitcntsPass();
5252
FunctionPass *createSIPreAllocateWWMRegsLegacyPass();
53-
FunctionPass *createSIFormMemoryClausesPass();
53+
FunctionPass *createSIFormMemoryClausesLegacyPass();
5454

5555
FunctionPass *createSIPostRABundlerPass();
5656
FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *);
@@ -425,7 +425,7 @@ extern char &SIInsertHardClausesID;
425425
void initializeSIInsertWaitcntsPass(PassRegistry&);
426426
extern char &SIInsertWaitcntsID;
427427

428-
void initializeSIFormMemoryClausesPass(PassRegistry&);
428+
void initializeSIFormMemoryClausesLegacyPass(PassRegistry &);
429429
extern char &SIFormMemoryClausesID;
430430

431431
void initializeSIPostRABundlerPass(PassRegistry&);

llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
104104
MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
105105
MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", SIFixVGPRCopiesPass())
106106
MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
107+
MACHINE_FUNCTION_PASS("si-form-memory-clauses", SIFormMemoryClausesPass())
107108
MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
108109
MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
109110
MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass())
@@ -124,7 +125,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizations
124125
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
125126
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass())
126127

127-
DUMMY_MACHINE_FUNCTION_PASS("si-form-memory-clauses", SIFormMemoryClausesPass())
128128
DUMMY_MACHINE_FUNCTION_PASS("si-insert-hard-clauses", SIInsertHardClausesPass())
129129
DUMMY_MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
130130
DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass())

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include "SIFixSGPRCopies.h"
4343
#include "SIFixVGPRCopies.h"
4444
#include "SIFoldOperands.h"
45+
#include "SIFormMemoryClauses.h"
4546
#include "SILoadStoreOptimizer.h"
4647
#include "SILowerControlFlow.h"
4748
#include "SILowerSGPRSpills.h"
@@ -540,7 +541,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
540541
initializeSIMemoryLegalizerPass(*PR);
541542
initializeSIOptimizeExecMaskingLegacyPass(*PR);
542543
initializeSIPreAllocateWWMRegsLegacyPass(*PR);
543-
initializeSIFormMemoryClausesPass(*PR);
544+
initializeSIFormMemoryClausesLegacyPass(*PR);
544545
initializeSIPostRABundlerPass(*PR);
545546
initializeGCNCreateVOPDPass(*PR);
546547
initializeAMDGPUUnifyDivergentExitNodesPass(*PR);

llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp

Lines changed: 60 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
///
1515
//===----------------------------------------------------------------------===//
1616

17+
#include "SIFormMemoryClauses.h"
1718
#include "AMDGPU.h"
1819
#include "GCNRegPressure.h"
1920
#include "SIMachineFunctionInfo.h"
@@ -31,15 +32,37 @@ MaxClause("amdgpu-max-memory-clause", cl::Hidden, cl::init(15),
3132

3233
namespace {
3334

34-
class SIFormMemoryClauses : public MachineFunctionPass {
35+
class SIFormMemoryClausesImpl {
3536
using RegUse = DenseMap<unsigned, std::pair<unsigned, LaneBitmask>>;
3637

38+
bool canBundle(const MachineInstr &MI, const RegUse &Defs,
39+
const RegUse &Uses) const;
40+
bool checkPressure(const MachineInstr &MI, GCNDownwardRPTracker &RPT);
41+
void collectRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses) const;
42+
bool processRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses,
43+
GCNDownwardRPTracker &RPT);
44+
45+
const GCNSubtarget *ST;
46+
const SIRegisterInfo *TRI;
47+
const MachineRegisterInfo *MRI;
48+
SIMachineFunctionInfo *MFI;
49+
LiveIntervals *LIS;
50+
51+
unsigned LastRecordedOccupancy;
52+
unsigned MaxVGPRs;
53+
unsigned MaxSGPRs;
54+
3755
public:
38-
static char ID;
56+
SIFormMemoryClausesImpl(LiveIntervals *LS) : LIS(LS) {}
57+
bool run(MachineFunction &MF);
58+
};
3959

60+
class SIFormMemoryClausesLegacy : public MachineFunctionPass {
4061
public:
41-
SIFormMemoryClauses() : MachineFunctionPass(ID) {
42-
initializeSIFormMemoryClausesPass(*PassRegistry::getPassRegistry());
62+
static char ID;
63+
64+
SIFormMemoryClausesLegacy() : MachineFunctionPass(ID) {
65+
initializeSIFormMemoryClausesLegacyPass(*PassRegistry::getPassRegistry());
4366
}
4467

4568
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -58,40 +81,22 @@ class SIFormMemoryClauses : public MachineFunctionPass {
5881
return MachineFunctionProperties().set(
5982
MachineFunctionProperties::Property::IsSSA);
6083
}
61-
62-
private:
63-
bool canBundle(const MachineInstr &MI, const RegUse &Defs,
64-
const RegUse &Uses) const;
65-
bool checkPressure(const MachineInstr &MI, GCNDownwardRPTracker &RPT);
66-
void collectRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses) const;
67-
bool processRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses,
68-
GCNDownwardRPTracker &RPT);
69-
70-
const GCNSubtarget *ST;
71-
const SIRegisterInfo *TRI;
72-
const MachineRegisterInfo *MRI;
73-
SIMachineFunctionInfo *MFI;
74-
75-
unsigned LastRecordedOccupancy;
76-
unsigned MaxVGPRs;
77-
unsigned MaxSGPRs;
7884
};
7985

8086
} // End anonymous namespace.
8187

82-
INITIALIZE_PASS_BEGIN(SIFormMemoryClauses, DEBUG_TYPE,
88+
INITIALIZE_PASS_BEGIN(SIFormMemoryClausesLegacy, DEBUG_TYPE,
8389
"SI Form memory clauses", false, false)
8490
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
85-
INITIALIZE_PASS_END(SIFormMemoryClauses, DEBUG_TYPE,
91+
INITIALIZE_PASS_END(SIFormMemoryClausesLegacy, DEBUG_TYPE,
8692
"SI Form memory clauses", false, false)
8793

94+
char SIFormMemoryClausesLegacy::ID = 0;
8895

89-
char SIFormMemoryClauses::ID = 0;
90-
91-
char &llvm::SIFormMemoryClausesID = SIFormMemoryClauses::ID;
96+
char &llvm::SIFormMemoryClausesID = SIFormMemoryClausesLegacy::ID;
9297

93-
FunctionPass *llvm::createSIFormMemoryClausesPass() {
94-
return new SIFormMemoryClauses();
98+
FunctionPass *llvm::createSIFormMemoryClausesLegacyPass() {
99+
return new SIFormMemoryClausesLegacy();
95100
}
96101

97102
static bool isVMEMClauseInst(const MachineInstr &MI) {
@@ -147,8 +152,9 @@ static unsigned getMopState(const MachineOperand &MO) {
147152

148153
// Returns false if there is a use of a def already in the map.
149154
// In this case we must break the clause.
150-
bool SIFormMemoryClauses::canBundle(const MachineInstr &MI, const RegUse &Defs,
151-
const RegUse &Uses) const {
155+
bool SIFormMemoryClausesImpl::canBundle(const MachineInstr &MI,
156+
const RegUse &Defs,
157+
const RegUse &Uses) const {
152158
// Check interference with defs.
153159
for (const MachineOperand &MO : MI.operands()) {
154160
// TODO: Prologue/Epilogue Insertion pass does not process bundled
@@ -184,8 +190,8 @@ bool SIFormMemoryClauses::canBundle(const MachineInstr &MI, const RegUse &Defs,
184190
// Since all defs in the clause are early clobber we can run out of registers.
185191
// Function returns false if pressure would hit the limit if instruction is
186192
// bundled into a memory clause.
187-
bool SIFormMemoryClauses::checkPressure(const MachineInstr &MI,
188-
GCNDownwardRPTracker &RPT) {
193+
bool SIFormMemoryClausesImpl::checkPressure(const MachineInstr &MI,
194+
GCNDownwardRPTracker &RPT) {
189195
// NB: skip advanceBeforeNext() call. Since all defs will be marked
190196
// early-clobber they will all stay alive at least to the end of the
191197
// clause. Therefor we should not decrease pressure even if load
@@ -213,8 +219,8 @@ bool SIFormMemoryClauses::checkPressure(const MachineInstr &MI,
213219
}
214220

215221
// Collect register defs and uses along with their lane masks and states.
216-
void SIFormMemoryClauses::collectRegUses(const MachineInstr &MI,
217-
RegUse &Defs, RegUse &Uses) const {
222+
void SIFormMemoryClausesImpl::collectRegUses(const MachineInstr &MI,
223+
RegUse &Defs, RegUse &Uses) const {
218224
for (const MachineOperand &MO : MI.operands()) {
219225
if (!MO.isReg())
220226
continue;
@@ -239,9 +245,9 @@ void SIFormMemoryClauses::collectRegUses(const MachineInstr &MI,
239245
// Check register def/use conflicts, occupancy limits and collect def/use maps.
240246
// Return true if instruction can be bundled with previous. If it cannot
241247
// def/use maps are not updated.
242-
bool SIFormMemoryClauses::processRegUses(const MachineInstr &MI,
243-
RegUse &Defs, RegUse &Uses,
244-
GCNDownwardRPTracker &RPT) {
248+
bool SIFormMemoryClausesImpl::processRegUses(const MachineInstr &MI,
249+
RegUse &Defs, RegUse &Uses,
250+
GCNDownwardRPTracker &RPT) {
245251
if (!canBundle(MI, Defs, Uses))
246252
return false;
247253

@@ -252,10 +258,7 @@ bool SIFormMemoryClauses::processRegUses(const MachineInstr &MI,
252258
return true;
253259
}
254260

255-
bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
256-
if (skipFunction(MF.getFunction()))
257-
return false;
258-
261+
bool SIFormMemoryClausesImpl::run(MachineFunction &MF) {
259262
ST = &MF.getSubtarget<GCNSubtarget>();
260263
if (!ST->isXNACKEnabled())
261264
return false;
@@ -264,7 +267,6 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
264267
TRI = ST->getRegisterInfo();
265268
MRI = &MF.getRegInfo();
266269
MFI = MF.getInfo<SIMachineFunctionInfo>();
267-
LiveIntervals *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
268270
SlotIndexes *Ind = LIS->getSlotIndexes();
269271
bool Changed = false;
270272

@@ -416,3 +418,19 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
416418

417419
return Changed;
418420
}
421+
422+
bool SIFormMemoryClausesLegacy::runOnMachineFunction(MachineFunction &MF) {
423+
if (skipFunction(MF.getFunction()))
424+
return false;
425+
426+
LiveIntervals *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
427+
return SIFormMemoryClausesImpl(LIS).run(MF);
428+
}
429+
430+
PreservedAnalyses
431+
SIFormMemoryClausesPass::run(MachineFunction &MF,
432+
MachineFunctionAnalysisManager &MFAM) {
433+
LiveIntervals &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
434+
SIFormMemoryClausesImpl(&LIS).run(MF);
435+
return PreservedAnalyses::all();
436+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//===- SIFormMemoryClauses.h ------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIB_TARGET_AMDGPU_SIFORMMEMORYCLAUSES_H
10+
#define LLVM_LIB_TARGET_AMDGPU_SIFORMMEMORYCLAUSES_H
11+
12+
#include "llvm/CodeGen/MachinePassManager.h"
13+
14+
namespace llvm {
15+
class SIFormMemoryClausesPass : public PassInfoMixin<SIFormMemoryClausesPass> {
16+
public:
17+
PreservedAnalyses run(MachineFunction &MF,
18+
MachineFunctionAnalysisManager &MFAM);
19+
};
20+
} // namespace llvm
21+
22+
#endif // LLVM_LIB_TARGET_AMDGPU_SIFORMMEMORYCLAUSES_H

llvm/test/CodeGen/AMDGPU/limit-soft-clause-reg-pressure.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack -run-pass=si-form-memory-clauses -verify-machineinstrs -o - %s | FileCheck %s
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack -passes="si-form-memory-clauses" -o - %s | FileCheck %s
23

34
# This previously would produce a bundle that could not be satisfied
45
# due to using nearly the entire register budget and not considering

llvm/test/CodeGen/AMDGPU/memory_clause.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# RUN: llc -mtriple=amdgcn -mcpu=gfx902 -verify-machineinstrs -run-pass=si-form-memory-clauses %s -o - | FileCheck -check-prefix=GCN %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx902 -passes="si-form-memory-clauses" %s -o - | FileCheck -check-prefix=GCN %s
23

34
# GCN-LABEL: {{^}}name: vector_clause{{$}}
45
# GCN: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec

llvm/test/CodeGen/AMDGPU/reserved-reg-in-clause.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -run-pass si-form-memory-clauses %s -o - | FileCheck -check-prefix=GCN %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -passes="si-form-memory-clauses" %s -o - | FileCheck -check-prefix=GCN %s
23

34
# Make sure we do not produce early-clobber list with odd subregs.
45

llvm/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# RUN: llc -mtriple=amdgcn -mcpu=gfx902 -o - %s -run-pass si-form-memory-clauses -verify-machineinstrs | FileCheck -check-prefix=XNACK %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx902 -o - %s -passes="si-form-memory-clauses" | FileCheck -check-prefix=XNACK %s
23

34
# The SIFormMemoryClauses pass must not form a clause (indicated by BUNDLE)
45
# from the two adjacent smem instructions, because the first one has its

llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack -run-pass=si-form-memory-clauses -verify-machineinstrs -o - %s | FileCheck %s
3+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack -passes="si-form-memory-clauses" -o - %s | FileCheck %s
34

45
# Make sure that debug instructions do not change the bundling, and
56
# the dbg_values which break the clause are inserted after the new

0 commit comments

Comments
 (0)