Skip to content

Commit c076481

Browse files
committed
Revert "Reland [AMDGPU] Support block load/store for CSR llvm#130013 (llvm#137169)"
needs to be integrated into the downstream divergence. This reverts commit 5bad5d8.
1 parent 173de22 commit c076481

20 files changed

+56
-1110
lines changed

llvm/include/llvm/CodeGen/MachineFrameInfo.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ class CalleeSavedInfo {
6161
MCRegister getReg() const { return Reg; }
6262
int getFrameIdx() const { return FrameIdx; }
6363
MCRegister getDstReg() const { return DstReg; }
64-
void setReg(MCRegister R) { Reg = R; }
6564
void setFrameIdx(int FI) {
6665
FrameIdx = FI;
6766
SpilledToReg = false;

llvm/include/llvm/CodeGen/TargetFrameLowering.h

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -271,14 +271,6 @@ class TargetFrameLowering {
271271
return false;
272272
}
273273

274-
/// spillCalleeSavedRegister - Default implementation for spilling a single
275-
/// callee saved register.
276-
void spillCalleeSavedRegister(MachineBasicBlock &SaveBlock,
277-
MachineBasicBlock::iterator MI,
278-
const CalleeSavedInfo &CS,
279-
const TargetInstrInfo *TII,
280-
const TargetRegisterInfo *TRI) const;
281-
282274
/// restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee
283275
/// saved registers and returns true if it isn't possible / profitable to do
284276
/// so by issuing a series of load instructions via loadRegToStackSlot().
@@ -293,15 +285,6 @@ class TargetFrameLowering {
293285
return false;
294286
}
295287

296-
// restoreCalleeSavedRegister - Default implementation for restoring a single
297-
// callee saved register. Should be called in reverse order. Can insert
298-
// multiple instructions.
299-
void restoreCalleeSavedRegister(MachineBasicBlock &MBB,
300-
MachineBasicBlock::iterator MI,
301-
const CalleeSavedInfo &CS,
302-
const TargetInstrInfo *TII,
303-
const TargetRegisterInfo *TRI) const;
304-
305288
/// hasFP - Return true if the specified function should have a dedicated
306289
/// frame pointer register. For most targets this is true only if the function
307290
/// has variable sized allocas or if frame pointer elimination is disabled.

llvm/lib/CodeGen/PrologEpilogInserter.cpp

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -476,8 +476,8 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
476476
// Now that we know which registers need to be saved and restored, allocate
477477
// stack slots for them.
478478
for (auto &CS : CSI) {
479-
// If the target has spilled this register to another register or already
480-
// handled it , we don't need to allocate a stack slot.
479+
// If the target has spilled this register to another register, we don't
480+
// need to allocate a stack slot.
481481
if (CS.isSpilledToReg())
482482
continue;
483483

@@ -597,14 +597,25 @@ static void updateLiveness(MachineFunction &MF) {
597597
static void insertCSRSaves(MachineBasicBlock &SaveBlock,
598598
ArrayRef<CalleeSavedInfo> CSI) {
599599
MachineFunction &MF = *SaveBlock.getParent();
600-
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
600+
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
601601
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
602602
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
603603

604604
MachineBasicBlock::iterator I = SaveBlock.begin();
605605
if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
606606
for (const CalleeSavedInfo &CS : CSI) {
607-
TFI->spillCalleeSavedRegister(SaveBlock, I, CS, TII, TRI);
607+
// Insert the spill to the stack frame.
608+
MCRegister Reg = CS.getReg();
609+
610+
if (CS.isSpilledToReg()) {
611+
BuildMI(SaveBlock, I, DebugLoc(), TII.get(TargetOpcode::COPY),
612+
CS.getDstReg())
613+
.addReg(Reg, getKillRegState(true));
614+
} else {
615+
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
616+
TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
617+
TRI, Register());
618+
}
608619
}
609620
}
610621
}
@@ -613,7 +624,7 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,
613624
static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
614625
std::vector<CalleeSavedInfo> &CSI) {
615626
MachineFunction &MF = *RestoreBlock.getParent();
616-
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
627+
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
617628
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
618629
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
619630

@@ -623,7 +634,19 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
623634

624635
if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
625636
for (const CalleeSavedInfo &CI : reverse(CSI)) {
626-
TFI->restoreCalleeSavedRegister(RestoreBlock, I, CI, TII, TRI);
637+
MCRegister Reg = CI.getReg();
638+
if (CI.isSpilledToReg()) {
639+
BuildMI(RestoreBlock, I, DebugLoc(), TII.get(TargetOpcode::COPY), Reg)
640+
.addReg(CI.getDstReg(), getKillRegState(true));
641+
} else {
642+
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
643+
TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC,
644+
TRI, Register());
645+
assert(I != RestoreBlock.begin() &&
646+
"loadRegFromStackSlot didn't insert any code!");
647+
// Insert in reverse order. loadRegFromStackSlot can insert
648+
// multiple instructions.
649+
}
627650
}
628651
}
629652
}

llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#include "llvm/CodeGen/MachineModuleInfo.h"
1717
#include "llvm/CodeGen/MachineRegisterInfo.h"
1818
#include "llvm/CodeGen/TargetFrameLowering.h"
19-
#include "llvm/CodeGen/TargetInstrInfo.h"
2019
#include "llvm/CodeGen/TargetSubtargetInfo.h"
2120
#include "llvm/IR/Attributes.h"
2221
#include "llvm/IR/CallingConv.h"
@@ -212,37 +211,3 @@ TargetFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const {
212211
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
213212
return DwarfFrameBase{DwarfFrameBase::Register, {RI->getFrameRegister(MF).id()}};
214213
}
215-
216-
void TargetFrameLowering::spillCalleeSavedRegister(
217-
MachineBasicBlock &SaveBlock, MachineBasicBlock::iterator MI,
218-
const CalleeSavedInfo &CS, const TargetInstrInfo *TII,
219-
const TargetRegisterInfo *TRI) const {
220-
// Insert the spill to the stack frame.
221-
MCRegister Reg = CS.getReg();
222-
223-
if (CS.isSpilledToReg()) {
224-
BuildMI(SaveBlock, MI, DebugLoc(), TII->get(TargetOpcode::COPY),
225-
CS.getDstReg())
226-
.addReg(Reg, getKillRegState(true));
227-
} else {
228-
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
229-
TII->storeRegToStackSlot(SaveBlock, MI, Reg, true, CS.getFrameIdx(), RC,
230-
TRI, Register());
231-
}
232-
}
233-
234-
void TargetFrameLowering::restoreCalleeSavedRegister(
235-
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
236-
const CalleeSavedInfo &CS, const TargetInstrInfo *TII,
237-
const TargetRegisterInfo *TRI) const {
238-
MCRegister Reg = CS.getReg();
239-
if (CS.isSpilledToReg()) {
240-
BuildMI(MBB, MI, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
241-
.addReg(CS.getDstReg(), getKillRegState(true));
242-
} else {
243-
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
244-
TII->loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI,
245-
Register());
246-
assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
247-
}
248-
}

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1275,14 +1275,6 @@ def FeatureDynamicVGPRBlockSize32 : SubtargetFeature<"dynamic-vgpr-block-size-32
12751275
"Use a block size of 32 for dynamic VGPR allocation (default is 16)"
12761276
>;
12771277

1278-
// Enable the use of SCRATCH_STORE/LOAD_BLOCK instructions for saving and
1279-
// restoring the callee-saved registers.
1280-
def FeatureUseBlockVGPROpsForCSR : SubtargetFeature<"block-vgpr-csr",
1281-
"UseBlockVGPROpsForCSR",
1282-
"true",
1283-
"Use block load/store for VGPR callee saved registers"
1284-
>;
1285-
12861278
def FeatureLshlAddU64Inst
12871279
: SubtargetFeature<"lshl-add-u64-inst", "HasLshlAddU64Inst", "true",
12881280
"Has v_lshl_add_u64 instruction">;

llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include "MCTargetDesc/AMDGPUInstPrinter.h"
2020
#include "MCTargetDesc/AMDGPUMCExpr.h"
2121
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22-
#include "SIMachineFunctionInfo.h"
2322
#include "llvm/CodeGen/MachineBasicBlock.h"
2423
#include "llvm/CodeGen/MachineInstr.h"
2524
#include "llvm/IR/Constants.h"
@@ -244,36 +243,6 @@ const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV,
244243
return AsmPrinter::lowerConstant(CV, BaseCV, Offset);
245244
}
246245

247-
static void emitVGPRBlockComment(const MachineInstr *MI, const SIInstrInfo *TII,
248-
const TargetRegisterInfo *TRI,
249-
const SIMachineFunctionInfo *MFI,
250-
MCStreamer &OS) {
251-
// The instruction will only transfer a subset of the registers in the block,
252-
// based on the mask that is stored in m0. We could search for the instruction
253-
// that sets m0, but most of the time we'll already have the mask stored in
254-
// the machine function info. Try to use that. This assumes that we only use
255-
// block loads/stores for CSR spills.
256-
Register RegBlock =
257-
TII->getNamedOperand(*MI, MI->mayLoad() ? AMDGPU::OpName::vdst
258-
: AMDGPU::OpName::vdata)
259-
->getReg();
260-
Register FirstRegInBlock = TRI->getSubReg(RegBlock, AMDGPU::sub0);
261-
uint32_t Mask = MFI->getMaskForVGPRBlockOps(RegBlock);
262-
263-
if (!Mask)
264-
return; // Nothing to report
265-
266-
SmallString<512> TransferredRegs;
267-
for (unsigned I = 0; I < sizeof(Mask) * 8; ++I) {
268-
if (Mask & (1 << I)) {
269-
(llvm::Twine(" ") + TRI->getRegAsmName(FirstRegInBlock + I))
270-
.toVector(TransferredRegs);
271-
}
272-
}
273-
274-
OS.emitRawComment(" transferring at most " + TransferredRegs);
275-
}
276-
277246
void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
278247
switch (MI->getOpcode()) {
279248
case TargetOpcode::DBG_VALUE:
@@ -369,12 +338,6 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
369338
return;
370339
}
371340

372-
if (isVerbose())
373-
if (STI.getInstrInfo()->isBlockLoadStore(MI->getOpcode()))
374-
emitVGPRBlockComment(MI, STI.getInstrInfo(), STI.getRegisterInfo(),
375-
MF->getInfo<SIMachineFunctionInfo>(),
376-
*OutStreamer);
377-
378341
MCInst TmpInst;
379342
MCInstLowering.lower(MI, TmpInst);
380343
EmitToStreamer(*OutStreamer, TmpInst);

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
262262
bool HasPointSampleAccel = false;
263263

264264
bool RequiresCOV6 = false;
265-
bool UseBlockVGPROpsForCSR = false;
266265

267266
// Dummy feature to use for assembler in tablegen.
268267
bool FeatureDisable = false;
@@ -1280,8 +1279,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
12801279

12811280
bool requiresCodeObjectV6() const { return RequiresCOV6; }
12821281

1283-
bool useVGPRBlockOpsForCSR() const { return UseBlockVGPROpsForCSR; }
1284-
12851282
bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
12861283

12871284
bool hasVALUReadSGPRHazard() const { return getGeneration() == GFX12; }

0 commit comments

Comments
 (0)