@@ -62,7 +62,6 @@ class SIPeepholeSDWA {
62
62
std::unique_ptr<SDWAOperand> matchSDWAOperand (MachineInstr &MI);
63
63
void pseudoOpConvertToVOP2 (MachineInstr &MI,
64
64
const GCNSubtarget &ST) const ;
65
- void convertVcndmaskToVOP2 (MachineInstr &MI, const GCNSubtarget &ST) const ;
66
65
MachineInstr *createSDWAVersion (MachineInstr &MI);
67
66
bool convertToSDWA (MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
68
67
void legalizeScalarOperands (MachineInstr &MI, const GCNSubtarget &ST) const ;
@@ -1038,8 +1037,7 @@ void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,
1038
1037
return ;
1039
1038
// Make sure VCC or its subregs are dead before MI.
1040
1039
MachineBasicBlock &MBB = *MI.getParent ();
1041
- MachineBasicBlock::LivenessQueryResult Liveness =
1042
- MBB.computeRegisterLiveness (TRI, AMDGPU::VCC, MI, 25 );
1040
+ auto Liveness = MBB.computeRegisterLiveness (TRI, AMDGPU::VCC, MI, 25 );
1043
1041
if (Liveness != MachineBasicBlock::LQR_Dead)
1044
1042
return ;
1045
1043
// Check if VCC is referenced in range of (MI,MISucc].
@@ -1063,53 +1061,6 @@ void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,
1063
1061
MISucc.substituteRegister (CarryIn->getReg (), TRI->getVCC (), 0 , *TRI);
1064
1062
}
1065
1063
1066
- // / Try to convert an \p MI in VOP3 which takes an src2 carry-in
1067
- // / operand into the corresponding VOP2 form which expects the
1068
- // / argument in VCC. To this end, add an copy from the carry-in to
1069
- // / VCC. The conversion will only be applied if \p MI can be shrunk
1070
- // / to VOP2 and if VCC can be proven to be dead before \p MI.
1071
- void SIPeepholeSDWA::convertVcndmaskToVOP2 (MachineInstr &MI,
1072
- const GCNSubtarget &ST) const {
1073
- assert (MI.getOpcode () == AMDGPU::V_CNDMASK_B32_e64);
1074
-
1075
- LLVM_DEBUG (dbgs () << " Attempting VOP2 conversion: " << MI);
1076
- if (!TII->canShrink (MI, *MRI)) {
1077
- LLVM_DEBUG (dbgs () << " Cannot shrink instruction\n " );
1078
- return ;
1079
- }
1080
-
1081
- const MachineOperand &CarryIn =
1082
- *TII->getNamedOperand (MI, AMDGPU::OpName::src2);
1083
- Register CarryReg = CarryIn.getReg ();
1084
- MachineInstr *CarryDef = MRI->getVRegDef (CarryReg);
1085
- if (!CarryDef) {
1086
- LLVM_DEBUG (dbgs () << " Missing carry-in operand definition\n " );
1087
- return ;
1088
- }
1089
-
1090
- // Make sure VCC or its subregs are dead before MI.
1091
- MCRegister Vcc = TRI->getVCC ();
1092
- MachineBasicBlock &MBB = *MI.getParent ();
1093
- MachineBasicBlock::LivenessQueryResult Liveness =
1094
- MBB.computeRegisterLiveness (TRI, Vcc, MI);
1095
- if (Liveness != MachineBasicBlock::LQR_Dead) {
1096
- LLVM_DEBUG (dbgs () << " VCC not known to be dead before instruction\n " );
1097
- return ;
1098
- }
1099
-
1100
- BuildMI (MBB, MI, MI.getDebugLoc (), TII->get (AMDGPU::COPY), Vcc).add (CarryIn);
1101
-
1102
- auto Converted = BuildMI (MBB, MI, MI.getDebugLoc (),
1103
- TII->get (AMDGPU::getVOPe32 (MI.getOpcode ())))
1104
- .add (*TII->getNamedOperand (MI, AMDGPU::OpName::vdst))
1105
- .add (*TII->getNamedOperand (MI, AMDGPU::OpName::src0))
1106
- .add (*TII->getNamedOperand (MI, AMDGPU::OpName::src1))
1107
- .setMIFlags (MI.getFlags ());
1108
- TII->fixImplicitOperands (*Converted);
1109
- LLVM_DEBUG (dbgs () << " Converted to VOP2: " << *Converted);
1110
- MI.eraseFromParent ();
1111
- }
1112
-
1113
1064
namespace {
1114
1065
bool isConvertibleToSDWA (MachineInstr &MI,
1115
1066
const GCNSubtarget &ST,
@@ -1119,11 +1070,6 @@ bool isConvertibleToSDWA(MachineInstr &MI,
1119
1070
if (TII->isSDWA (Opc))
1120
1071
return true ;
1121
1072
1122
- // Can only be handled after ealier conversion to
1123
- // AMDGPU::V_CNDMASK_B32_e32 which is not always possible.
1124
- if (Opc == AMDGPU::V_CNDMASK_B32_e64)
1125
- return false ;
1126
-
1127
1073
// Check if this instruction has opcode that supports SDWA
1128
1074
if (AMDGPU::getSDWAOp (Opc) == -1 )
1129
1075
Opc = AMDGPU::getVOPe32 (Opc);
@@ -1162,6 +1108,10 @@ bool isConvertibleToSDWA(MachineInstr &MI,
1162
1108
if (TII->pseudoToMCOpcode (Opc) == -1 )
1163
1109
return false ;
1164
1110
1111
+ // FIXME: has SDWA but require handling of implicit VCC use
1112
+ if (Opc == AMDGPU::V_CNDMASK_B32_e32)
1113
+ return false ;
1114
+
1165
1115
if (MachineOperand *Src0 = TII->getNamedOperand (MI, AMDGPU::OpName::src0)) {
1166
1116
if (!Src0->isReg () && !Src0->isImm ())
1167
1117
return false ;
@@ -1316,9 +1266,7 @@ MachineInstr *SIPeepholeSDWA::createSDWAVersion(MachineInstr &MI) {
1316
1266
SDWAInst->tieOperands (PreserveDstIdx, SDWAInst->getNumOperands () - 1 );
1317
1267
}
1318
1268
1319
- MachineInstr *Ret = SDWAInst.getInstr ();
1320
- TII->fixImplicitOperands (*Ret);
1321
- return Ret;
1269
+ return SDWAInst.getInstr ();
1322
1270
}
1323
1271
1324
1272
bool SIPeepholeSDWA::convertToSDWA (MachineInstr &MI,
@@ -1436,18 +1384,10 @@ bool SIPeepholeSDWA::run(MachineFunction &MF) {
1436
1384
for (const auto &OperandPair : SDWAOperands) {
1437
1385
const auto &Operand = OperandPair.second ;
1438
1386
MachineInstr *PotentialMI = Operand->potentialToConvert (TII, ST);
1439
- if (!PotentialMI)
1440
- continue ;
1441
-
1442
- switch (PotentialMI->getOpcode ()) {
1443
- case AMDGPU::V_ADD_CO_U32_e64:
1444
- case AMDGPU::V_SUB_CO_U32_e64:
1387
+ if (PotentialMI &&
1388
+ (PotentialMI->getOpcode () == AMDGPU::V_ADD_CO_U32_e64 ||
1389
+ PotentialMI->getOpcode () == AMDGPU::V_SUB_CO_U32_e64))
1445
1390
pseudoOpConvertToVOP2 (*PotentialMI, ST);
1446
- break ;
1447
- case AMDGPU::V_CNDMASK_B32_e64:
1448
- convertVcndmaskToVOP2 (*PotentialMI, ST);
1449
- break ;
1450
- };
1451
1391
}
1452
1392
SDWAOperands.clear ();
1453
1393
0 commit comments