Skip to content

Commit e4df867

Browse files
IanWood1Muzammiluddin-Syed-ECE
authored andcommitted
Revert "[AMDGPU] SIPeepholeSDWA: Handle V_CNDMASK_B32_e64 (llvm#137930)"
This reverts commit 721cba4. Signed-off-by: Ian Wood <ianwood2024@u.northwestern.edu>
1 parent 6ae4479 commit e4df867

21 files changed

+2017
-2543
lines changed

llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp

Lines changed: 9 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ class SIPeepholeSDWA {
6262
std::unique_ptr<SDWAOperand> matchSDWAOperand(MachineInstr &MI);
6363
void pseudoOpConvertToVOP2(MachineInstr &MI,
6464
const GCNSubtarget &ST) const;
65-
void convertVcndmaskToVOP2(MachineInstr &MI, const GCNSubtarget &ST) const;
6665
MachineInstr *createSDWAVersion(MachineInstr &MI);
6766
bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
6867
void legalizeScalarOperands(MachineInstr &MI, const GCNSubtarget &ST) const;
@@ -1038,8 +1037,7 @@ void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,
10381037
return;
10391038
// Make sure VCC or its subregs are dead before MI.
10401039
MachineBasicBlock &MBB = *MI.getParent();
1041-
MachineBasicBlock::LivenessQueryResult Liveness =
1042-
MBB.computeRegisterLiveness(TRI, AMDGPU::VCC, MI, 25);
1040+
auto Liveness = MBB.computeRegisterLiveness(TRI, AMDGPU::VCC, MI, 25);
10431041
if (Liveness != MachineBasicBlock::LQR_Dead)
10441042
return;
10451043
// Check if VCC is referenced in range of (MI,MISucc].
@@ -1063,53 +1061,6 @@ void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,
10631061
MISucc.substituteRegister(CarryIn->getReg(), TRI->getVCC(), 0, *TRI);
10641062
}
10651063

1066-
/// Try to convert an \p MI in VOP3 which takes an src2 carry-in
1067-
/// operand into the corresponding VOP2 form which expects the
1068-
/// argument in VCC. To this end, add an copy from the carry-in to
1069-
/// VCC. The conversion will only be applied if \p MI can be shrunk
1070-
/// to VOP2 and if VCC can be proven to be dead before \p MI.
1071-
void SIPeepholeSDWA::convertVcndmaskToVOP2(MachineInstr &MI,
1072-
const GCNSubtarget &ST) const {
1073-
assert(MI.getOpcode() == AMDGPU::V_CNDMASK_B32_e64);
1074-
1075-
LLVM_DEBUG(dbgs() << "Attempting VOP2 conversion: " << MI);
1076-
if (!TII->canShrink(MI, *MRI)) {
1077-
LLVM_DEBUG(dbgs() << "Cannot shrink instruction\n");
1078-
return;
1079-
}
1080-
1081-
const MachineOperand &CarryIn =
1082-
*TII->getNamedOperand(MI, AMDGPU::OpName::src2);
1083-
Register CarryReg = CarryIn.getReg();
1084-
MachineInstr *CarryDef = MRI->getVRegDef(CarryReg);
1085-
if (!CarryDef) {
1086-
LLVM_DEBUG(dbgs() << "Missing carry-in operand definition\n");
1087-
return;
1088-
}
1089-
1090-
// Make sure VCC or its subregs are dead before MI.
1091-
MCRegister Vcc = TRI->getVCC();
1092-
MachineBasicBlock &MBB = *MI.getParent();
1093-
MachineBasicBlock::LivenessQueryResult Liveness =
1094-
MBB.computeRegisterLiveness(TRI, Vcc, MI);
1095-
if (Liveness != MachineBasicBlock::LQR_Dead) {
1096-
LLVM_DEBUG(dbgs() << "VCC not known to be dead before instruction\n");
1097-
return;
1098-
}
1099-
1100-
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), Vcc).add(CarryIn);
1101-
1102-
auto Converted = BuildMI(MBB, MI, MI.getDebugLoc(),
1103-
TII->get(AMDGPU::getVOPe32(MI.getOpcode())))
1104-
.add(*TII->getNamedOperand(MI, AMDGPU::OpName::vdst))
1105-
.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0))
1106-
.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src1))
1107-
.setMIFlags(MI.getFlags());
1108-
TII->fixImplicitOperands(*Converted);
1109-
LLVM_DEBUG(dbgs() << "Converted to VOP2: " << *Converted);
1110-
MI.eraseFromParent();
1111-
}
1112-
11131064
namespace {
11141065
bool isConvertibleToSDWA(MachineInstr &MI,
11151066
const GCNSubtarget &ST,
@@ -1119,11 +1070,6 @@ bool isConvertibleToSDWA(MachineInstr &MI,
11191070
if (TII->isSDWA(Opc))
11201071
return true;
11211072

1122-
// Can only be handled after ealier conversion to
1123-
// AMDGPU::V_CNDMASK_B32_e32 which is not always possible.
1124-
if (Opc == AMDGPU::V_CNDMASK_B32_e64)
1125-
return false;
1126-
11271073
// Check if this instruction has opcode that supports SDWA
11281074
if (AMDGPU::getSDWAOp(Opc) == -1)
11291075
Opc = AMDGPU::getVOPe32(Opc);
@@ -1162,6 +1108,10 @@ bool isConvertibleToSDWA(MachineInstr &MI,
11621108
if (TII->pseudoToMCOpcode(Opc) == -1)
11631109
return false;
11641110

1111+
// FIXME: has SDWA but require handling of implicit VCC use
1112+
if (Opc == AMDGPU::V_CNDMASK_B32_e32)
1113+
return false;
1114+
11651115
if (MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0)) {
11661116
if (!Src0->isReg() && !Src0->isImm())
11671117
return false;
@@ -1316,9 +1266,7 @@ MachineInstr *SIPeepholeSDWA::createSDWAVersion(MachineInstr &MI) {
13161266
SDWAInst->tieOperands(PreserveDstIdx, SDWAInst->getNumOperands() - 1);
13171267
}
13181268

1319-
MachineInstr *Ret = SDWAInst.getInstr();
1320-
TII->fixImplicitOperands(*Ret);
1321-
return Ret;
1269+
return SDWAInst.getInstr();
13221270
}
13231271

13241272
bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
@@ -1436,18 +1384,10 @@ bool SIPeepholeSDWA::run(MachineFunction &MF) {
14361384
for (const auto &OperandPair : SDWAOperands) {
14371385
const auto &Operand = OperandPair.second;
14381386
MachineInstr *PotentialMI = Operand->potentialToConvert(TII, ST);
1439-
if (!PotentialMI)
1440-
continue;
1441-
1442-
switch (PotentialMI->getOpcode()) {
1443-
case AMDGPU::V_ADD_CO_U32_e64:
1444-
case AMDGPU::V_SUB_CO_U32_e64:
1387+
if (PotentialMI &&
1388+
(PotentialMI->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 ||
1389+
PotentialMI->getOpcode() == AMDGPU::V_SUB_CO_U32_e64))
14451390
pseudoOpConvertToVOP2(*PotentialMI, ST);
1446-
break;
1447-
case AMDGPU::V_CNDMASK_B32_e64:
1448-
convertVcndmaskToVOP2(*PotentialMI, ST);
1449-
break;
1450-
};
14511391
}
14521392
SDWAOperands.clear();
14531393

0 commit comments

Comments
 (0)