Skip to content

Commit 5236c8b

Browse files
bcahoonakadutta
andauthored
[AMDGPU]: Unpack packed instructions overlapped by MFMAs post-RA scheduling (llvm#157968) (llvm#4137) (llvm#4355)
This is a cleaned up version of PR llvm#151704. These optimizations are now performed post-RA scheduling. (cherry picked from commit c256966) (cherry picked from commit 2e67921) Co-authored-by: Akash Dutta <137309513+akadutta@users.noreply.github.com>
1 parent d619709 commit 5236c8b

File tree

5 files changed

+1588
-14
lines changed

5 files changed

+1588
-14
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6081,6 +6081,66 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
60816081
return isImmOperandLegal(MI, OpIdx, *MO);
60826082
}
60836083

6084+
bool SIInstrInfo::isNeverCoissue(MachineInstr &MI) const {
6085+
bool IsGFX950Only = ST.hasGFX950Insts();
6086+
bool IsGFX940Only = ST.hasGFX940Insts();
6087+
6088+
if (!IsGFX950Only && !IsGFX940Only)
6089+
return false;
6090+
6091+
if (!isVALU(MI))
6092+
return false;
6093+
6094+
// V_COS, V_EXP, V_RCP, etc.
6095+
if (isTRANS(MI))
6096+
return true;
6097+
6098+
// DOT2, DOT2C, DOT4, etc.
6099+
if (isDOT(MI))
6100+
return true;
6101+
6102+
// MFMA, SMFMA
6103+
if (isMFMA(MI))
6104+
return true;
6105+
6106+
unsigned Opcode = MI.getOpcode();
6107+
switch (Opcode) {
6108+
case AMDGPU::V_CVT_PK_BF8_F32_e64:
6109+
case AMDGPU::V_CVT_PK_FP8_F32_e64:
6110+
case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6111+
case AMDGPU::V_MQSAD_U32_U8_e64:
6112+
case AMDGPU::V_PK_ADD_F16:
6113+
case AMDGPU::V_PK_ADD_F32:
6114+
case AMDGPU::V_PK_ADD_I16:
6115+
case AMDGPU::V_PK_ADD_U16:
6116+
case AMDGPU::V_PK_ASHRREV_I16:
6117+
case AMDGPU::V_PK_FMA_F16:
6118+
case AMDGPU::V_PK_FMA_F32:
6119+
case AMDGPU::V_PK_FMAC_F16_e32:
6120+
case AMDGPU::V_PK_FMAC_F16_e64:
6121+
case AMDGPU::V_PK_LSHLREV_B16:
6122+
case AMDGPU::V_PK_LSHRREV_B16:
6123+
case AMDGPU::V_PK_MAD_I16:
6124+
case AMDGPU::V_PK_MAD_U16:
6125+
case AMDGPU::V_PK_MAX_F16:
6126+
case AMDGPU::V_PK_MAX_I16:
6127+
case AMDGPU::V_PK_MAX_U16:
6128+
case AMDGPU::V_PK_MIN_F16:
6129+
case AMDGPU::V_PK_MIN_I16:
6130+
case AMDGPU::V_PK_MIN_U16:
6131+
case AMDGPU::V_PK_MOV_B32:
6132+
case AMDGPU::V_PK_MUL_F16:
6133+
case AMDGPU::V_PK_MUL_F32:
6134+
case AMDGPU::V_PK_MUL_LO_U16:
6135+
case AMDGPU::V_PK_SUB_I16:
6136+
case AMDGPU::V_PK_SUB_U16:
6137+
case AMDGPU::V_QSAD_PK_U16_U8_e64:
6138+
return true;
6139+
default:
6140+
return false;
6141+
}
6142+
}
6143+
60846144
void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
60856145
MachineInstr &MI) const {
60866146
unsigned Opc = MI.getOpcode();

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1139,6 +1139,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
11391139
bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
11401140
const MachineOperand &MO) const;
11411141

1142+
bool isNeverCoissue(MachineInstr &MI) const;
1143+
11421144
/// Return true if this 64-bit VALU instruction has a 32-bit encoding.
11431145
/// This function will return false if you pass it a 32-bit instruction.
11441146
bool hasVALU32BitEncoding(unsigned Opcode) const;

0 commit comments

Comments
 (0)