Skip to content

Commit 6bb2f90

Browse files
authored
Revert "[AMDGPU] Support block load/store for CSR" (#136846)
Reverts #130013 due to failures with expensive checks on.
1 parent 4e073a1 commit 6bb2f90

19 files changed

+41
-1065
lines changed

llvm/include/llvm/CodeGen/MachineFrameInfo.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ class CalleeSavedInfo {
6161
MCRegister getReg() const { return Reg; }
6262
int getFrameIdx() const { return FrameIdx; }
6363
MCRegister getDstReg() const { return DstReg; }
64-
void setReg(MCRegister R) { Reg = R; }
6564
void setFrameIdx(int FI) {
6665
FrameIdx = FI;
6766
SpilledToReg = false;

llvm/include/llvm/CodeGen/TargetFrameLowering.h

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -270,14 +270,6 @@ class TargetFrameLowering {
270270
return false;
271271
}
272272

273-
/// spillCalleeSavedRegister - Default implementation for spilling a single
274-
/// callee saved register.
275-
void spillCalleeSavedRegister(MachineBasicBlock &SaveBlock,
276-
MachineBasicBlock::iterator MI,
277-
const CalleeSavedInfo &CS,
278-
const TargetInstrInfo *TII,
279-
const TargetRegisterInfo *TRI) const;
280-
281273
/// restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee
282274
/// saved registers and returns true if it isn't possible / profitable to do
283275
/// so by issuing a series of load instructions via loadRegToStackSlot().
@@ -292,15 +284,6 @@ class TargetFrameLowering {
292284
return false;
293285
}
294286

295-
// restoreCalleeSavedRegister - Default implementation for restoring a single
296-
// callee saved register. Should be called in reverse order. Can insert
297-
// multiple instructions.
298-
void restoreCalleeSavedRegister(MachineBasicBlock &MBB,
299-
MachineBasicBlock::iterator MI,
300-
const CalleeSavedInfo &CS,
301-
const TargetInstrInfo *TII,
302-
const TargetRegisterInfo *TRI) const;
303-
304287
/// hasFP - Return true if the specified function should have a dedicated
305288
/// frame pointer register. For most targets this is true only if the function
306289
/// has variable sized allocas or if frame pointer elimination is disabled.

llvm/lib/CodeGen/PrologEpilogInserter.cpp

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -476,8 +476,8 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
476476
// Now that we know which registers need to be saved and restored, allocate
477477
// stack slots for them.
478478
for (auto &CS : CSI) {
479-
// If the target has spilled this register to another register or already
480-
// handled it , we don't need to allocate a stack slot.
479+
// If the target has spilled this register to another register, we don't
480+
// need to allocate a stack slot.
481481
if (CS.isSpilledToReg())
482482
continue;
483483

@@ -597,14 +597,25 @@ static void updateLiveness(MachineFunction &MF) {
597597
static void insertCSRSaves(MachineBasicBlock &SaveBlock,
598598
ArrayRef<CalleeSavedInfo> CSI) {
599599
MachineFunction &MF = *SaveBlock.getParent();
600-
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
600+
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
601601
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
602602
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
603603

604604
MachineBasicBlock::iterator I = SaveBlock.begin();
605605
if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
606606
for (const CalleeSavedInfo &CS : CSI) {
607-
TFI->spillCalleeSavedRegister(SaveBlock, I, CS, TII, TRI);
607+
// Insert the spill to the stack frame.
608+
MCRegister Reg = CS.getReg();
609+
610+
if (CS.isSpilledToReg()) {
611+
BuildMI(SaveBlock, I, DebugLoc(), TII.get(TargetOpcode::COPY),
612+
CS.getDstReg())
613+
.addReg(Reg, getKillRegState(true));
614+
} else {
615+
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
616+
TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
617+
TRI, Register());
618+
}
608619
}
609620
}
610621
}
@@ -613,7 +624,7 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,
613624
static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
614625
std::vector<CalleeSavedInfo> &CSI) {
615626
MachineFunction &MF = *RestoreBlock.getParent();
616-
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
627+
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
617628
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
618629
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
619630

@@ -623,7 +634,19 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
623634

624635
if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
625636
for (const CalleeSavedInfo &CI : reverse(CSI)) {
626-
TFI->restoreCalleeSavedRegister(RestoreBlock, I, CI, TII, TRI);
637+
MCRegister Reg = CI.getReg();
638+
if (CI.isSpilledToReg()) {
639+
BuildMI(RestoreBlock, I, DebugLoc(), TII.get(TargetOpcode::COPY), Reg)
640+
.addReg(CI.getDstReg(), getKillRegState(true));
641+
} else {
642+
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
643+
TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC,
644+
TRI, Register());
645+
assert(I != RestoreBlock.begin() &&
646+
"loadRegFromStackSlot didn't insert any code!");
647+
// Insert in reverse order. loadRegFromStackSlot can insert
648+
// multiple instructions.
649+
}
627650
}
628651
}
629652
}

llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
#include "llvm/CodeGen/MachineFunction.h"
1616
#include "llvm/CodeGen/MachineRegisterInfo.h"
1717
#include "llvm/CodeGen/TargetFrameLowering.h"
18-
#include "llvm/CodeGen/TargetInstrInfo.h"
1918
#include "llvm/CodeGen/TargetSubtargetInfo.h"
2019
#include "llvm/IR/Attributes.h"
2120
#include "llvm/IR/Function.h"
@@ -183,37 +182,3 @@ TargetFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const {
183182
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
184183
return DwarfFrameBase{DwarfFrameBase::Register, {RI->getFrameRegister(MF).id()}};
185184
}
186-
187-
void TargetFrameLowering::spillCalleeSavedRegister(
188-
MachineBasicBlock &SaveBlock, MachineBasicBlock::iterator MI,
189-
const CalleeSavedInfo &CS, const TargetInstrInfo *TII,
190-
const TargetRegisterInfo *TRI) const {
191-
// Insert the spill to the stack frame.
192-
MCRegister Reg = CS.getReg();
193-
194-
if (CS.isSpilledToReg()) {
195-
BuildMI(SaveBlock, MI, DebugLoc(), TII->get(TargetOpcode::COPY),
196-
CS.getDstReg())
197-
.addReg(Reg, getKillRegState(true));
198-
} else {
199-
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
200-
TII->storeRegToStackSlot(SaveBlock, MI, Reg, true, CS.getFrameIdx(), RC,
201-
TRI, Register());
202-
}
203-
}
204-
205-
void TargetFrameLowering::restoreCalleeSavedRegister(
206-
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
207-
const CalleeSavedInfo &CS, const TargetInstrInfo *TII,
208-
const TargetRegisterInfo *TRI) const {
209-
MCRegister Reg = CS.getReg();
210-
if (CS.isSpilledToReg()) {
211-
BuildMI(MBB, MI, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
212-
.addReg(CS.getDstReg(), getKillRegState(true));
213-
} else {
214-
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
215-
TII->loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI,
216-
Register());
217-
assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
218-
}
219-
}

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1275,14 +1275,6 @@ def FeatureDynamicVGPRBlockSize32 : SubtargetFeature<"dynamic-vgpr-block-size-32
12751275
"Use a block size of 32 for dynamic VGPR allocation (default is 16)"
12761276
>;
12771277

1278-
// Enable the use of SCRATCH_STORE/LOAD_BLOCK instructions for saving and
1279-
// restoring the callee-saved registers.
1280-
def FeatureUseBlockVGPROpsForCSR : SubtargetFeature<"block-vgpr-csr",
1281-
"UseBlockVGPROpsForCSR",
1282-
"true",
1283-
"Use block load/store for VGPR callee saved registers"
1284-
>;
1285-
12861278
def FeatureLshlAddU64Inst
12871279
: SubtargetFeature<"lshl-add-u64-inst", "HasLshlAddU64Inst", "true",
12881280
"Has v_lshl_add_u64 instruction">;

llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include "MCTargetDesc/AMDGPUInstPrinter.h"
2020
#include "MCTargetDesc/AMDGPUMCExpr.h"
2121
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22-
#include "SIMachineFunctionInfo.h"
2322
#include "llvm/CodeGen/MachineBasicBlock.h"
2423
#include "llvm/CodeGen/MachineInstr.h"
2524
#include "llvm/IR/Constants.h"
@@ -244,36 +243,6 @@ const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV,
244243
return AsmPrinter::lowerConstant(CV, BaseCV, Offset);
245244
}
246245

247-
static void emitVGPRBlockComment(const MachineInstr *MI, const SIInstrInfo *TII,
248-
const TargetRegisterInfo *TRI,
249-
const SIMachineFunctionInfo *MFI,
250-
MCStreamer &OS) {
251-
// The instruction will only transfer a subset of the registers in the block,
252-
// based on the mask that is stored in m0. We could search for the instruction
253-
// that sets m0, but most of the time we'll already have the mask stored in
254-
// the machine function info. Try to use that. This assumes that we only use
255-
// block loads/stores for CSR spills.
256-
Register RegBlock =
257-
TII->getNamedOperand(*MI, MI->mayLoad() ? AMDGPU::OpName::vdst
258-
: AMDGPU::OpName::vdata)
259-
->getReg();
260-
Register FirstRegInBlock = TRI->getSubReg(RegBlock, AMDGPU::sub0);
261-
uint32_t Mask = MFI->getMaskForVGPRBlockOps(RegBlock);
262-
263-
if (!Mask)
264-
return; // Nothing to report
265-
266-
SmallString<512> TransferredRegs;
267-
for (unsigned I = 0; I < sizeof(Mask) * 8; ++I) {
268-
if (Mask & (1 << I)) {
269-
(llvm::Twine(" ") + TRI->getRegAsmName(FirstRegInBlock + I))
270-
.toVector(TransferredRegs);
271-
}
272-
}
273-
274-
OS.emitRawComment(" transferring at most " + TransferredRegs);
275-
}
276-
277246
void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
278247
// FIXME: Enable feature predicate checks once all the test pass.
279248
// AMDGPU_MC::verifyInstructionPredicates(MI->getOpcode(),
@@ -362,12 +331,6 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
362331
return;
363332
}
364333

365-
if (isVerbose())
366-
if (STI.getInstrInfo()->isBlockLoadStore(MI->getOpcode()))
367-
emitVGPRBlockComment(MI, STI.getInstrInfo(), STI.getRegisterInfo(),
368-
MF->getInfo<SIMachineFunctionInfo>(),
369-
*OutStreamer);
370-
371334
MCInst TmpInst;
372335
MCInstLowering.lower(MI, TmpInst);
373336
EmitToStreamer(*OutStreamer, TmpInst);

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
262262
bool HasPointSampleAccel = false;
263263

264264
bool RequiresCOV6 = false;
265-
bool UseBlockVGPROpsForCSR = false;
266265

267266
// Dummy feature to use for assembler in tablegen.
268267
bool FeatureDisable = false;
@@ -1278,8 +1277,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
12781277

12791278
bool requiresCodeObjectV6() const { return RequiresCOV6; }
12801279

1281-
bool useVGPRBlockOpsForCSR() const { return UseBlockVGPROpsForCSR; }
1282-
12831280
bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
12841281

12851282
bool hasVALUReadSGPRHazard() const { return getGeneration() == GFX12; }

0 commit comments

Comments
 (0)