Skip to content

Commit ff2a819

Browse files
committed
WIP: AMDGPU: Always select the VGPR version of MFMAs
We do not want to use AGPRs unless absolutely required due to register pressure. Rely on a post-regalloc pass to replace VGPR MFMAs with the AGPR version if it avoids the copies introduced due to live range splitting.
1 parent c8ea114 commit ff2a819

File tree

5 files changed

+35
-62
lines changed

5 files changed

+35
-62
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4867,31 +4867,29 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
48674867
// for srcA/srcB?
48684868
//
48694869
// vdst, srcA, srcB, srcC
4870-
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
48714870
OpdsMapping[0] =
4872-
Info->mayNeedAGPRs()
4871+
!Subtarget.hasGFX90AInsts()
48734872
? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI)
48744873
: getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
48754874
OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
48764875
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
48774876
OpdsMapping[4] =
4878-
Info->mayNeedAGPRs()
4877+
!Subtarget.hasGFX90AInsts()
48794878
? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI)
48804879
: getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
48814880
break;
48824881
}
48834882
case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
48844883
case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
4885-
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
48864884
OpdsMapping[0] =
4887-
Info->mayNeedAGPRs()
4885+
!Subtarget.hasGFX90AInsts()
48884886
? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI)
48894887
: getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
48904888

48914889
OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
48924890
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
48934891
OpdsMapping[4] =
4894-
Info->mayNeedAGPRs()
4892+
!Subtarget.hasGFX90AInsts()
48954893
? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI)
48964894
: getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
48974895

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16227,7 +16227,6 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
1622716227

1622816228
MachineFunction *MF = MI.getParent()->getParent();
1622916229
MachineRegisterInfo &MRI = MF->getRegInfo();
16230-
SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
1623116230

1623216231
if (TII->isVOP3(MI.getOpcode())) {
1623316232
// Make sure constant bus requirements are respected.
@@ -16238,15 +16237,14 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
1623816237
// use between vgpr and agpr as agpr tuples tend to be big.
1623916238
if (!MI.getDesc().operands().empty()) {
1624016239
unsigned Opc = MI.getOpcode();
16241-
bool HasAGPRs = Info->mayNeedAGPRs();
1624216240
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
1624316241
int16_t Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
1624416242
for (auto I :
1624516243
{AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
1624616244
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1), Src2Idx}) {
1624716245
if (I == -1)
1624816246
break;
16249-
if ((I == Src2Idx) && (HasAGPRs))
16247+
if (I == Src2Idx)
1625016248
break;
1625116249
MachineOperand &Op = MI.getOperand(I);
1625216250
if (!Op.isReg() || !Op.getReg().isVirtual())
@@ -16280,22 +16278,6 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
1628016278
TII->legalizeOpWithMove(MI, Src1Idx);
1628116279
}
1628216280
}
16283-
16284-
if (!HasAGPRs)
16285-
return;
16286-
16287-
// Resolve the rest of AV operands to AGPRs.
16288-
if (auto *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2)) {
16289-
if (Src2->isReg() && Src2->getReg().isVirtual()) {
16290-
auto *RC = TRI->getRegClassForReg(MRI, Src2->getReg());
16291-
if (TRI->isVectorSuperClass(RC)) {
16292-
auto *NewRC = TRI->getEquivalentAGPRClass(RC);
16293-
MRI.setRegClass(Src2->getReg(), NewRC);
16294-
if (Src2->isTied())
16295-
MRI.setRegClass(MI.getOperand(0).getReg(), NewRC);
16296-
}
16297-
}
16298-
}
1629916281
}
1630016282

1630116283
return;

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,6 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
6969
PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
7070
}
7171

72-
MayNeedAGPRs = ST.hasMAIInsts();
73-
if (ST.hasGFX90AInsts() &&
74-
ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
75-
!mayUseAGPRs(F))
76-
MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
77-
7872
if (AMDGPU::isChainCC(CC)) {
7973
// Chain functions don't receive an SP from their caller, but are free to
8074
// set one up. For now, we can use s32 to match what amdgpu_gfx functions

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -497,8 +497,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
497497
// user arguments. This is an offset from the KernargSegmentPtr.
498498
bool ImplicitArgPtr : 1;
499499

500-
bool MayNeedAGPRs : 1;
501-
502500
// The hard-wired high half of the address of the global information table
503501
// for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
504502
// current hardware only allows a 16 bit value.
@@ -1172,10 +1170,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
11721170

11731171
unsigned getMaxMemoryClusterDWords() const { return MaxMemoryClusterDWords; }
11741172

1175-
bool mayNeedAGPRs() const {
1176-
return MayNeedAGPRs;
1177-
}
1178-
11791173
// \returns true if a function has a use of AGPRs via inline asm or
11801174
// has a call which may use it.
11811175
bool mayUseAGPRs(const Function &F) const;

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 30 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -856,17 +856,11 @@ defvar MayNotNeedAGPRs_gisel = [{
856856
return !MF.getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs();
857857
}];
858858

859-
class AgprMAIFrag<SDPatternOperator Op, bit HasAbid = true,
860-
bit Scaled = false> :
861-
MAIFrag<Op, MayNeedAGPRs, HasAbid, Scaled> {
862-
let GISelPredicateCode = MayNeedAGPRs_gisel;
863-
}
859+
class AgprMAIFrag<SDPatternOperator Op, bit HasAbid = true, bit Scaled = false>
860+
: MAIFrag<Op, [{}], HasAbid, Scaled> {}
864861

865-
class VgprMAIFrag<SDPatternOperator Op, bit HasAbid = true,
866-
bit Scaled = false> :
867-
MAIFrag<Op, MayNotNeedAGPRs, HasAbid, Scaled> {
868-
let GISelPredicateCode = MayNotNeedAGPRs_gisel;
869-
}
862+
class VgprMAIFrag<SDPatternOperator Op, bit HasAbid = true, bit Scaled = false>
863+
: MAIFrag<Op, [{}], HasAbid, Scaled> {}
870864

871865
let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
872866
defm V_ACCVGPR_READ_B32 : VOP3Inst<"v_accvgpr_read_b32", VOPProfileAccRead>;
@@ -917,10 +911,14 @@ multiclass MAIInst<string OpName, string P, SDPatternOperator node = null_frag,
917911
!if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag, AgprMAIFrag<node, HasAbid, Scaled>), Scaled>,
918912
MFMATable<0, "AGPR", NAME # "_e64">;
919913

920-
let OtherPredicates = [isGFX90APlus], Mnemonic = OpName in
921-
def _vgprcd_e64 : MAIInst<OpName # "_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD"),
922-
!if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag, VgprMAIFrag<node, HasAbid, Scaled>), Scaled>,
923-
MFMATable<0, "VGPR", NAME # "_vgprcd_e64", NAME # "_e64">;
914+
let OtherPredicates = [isGFX90APlus], Mnemonic = OpName,
915+
AddedComplexity = 10 in def _vgprcd_e64
916+
: MAIInst<OpName#"_vgprcd",
917+
!cast<VOPProfileMAI>("VOPProfileMAI_"#P#"_VCD"),
918+
!if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag,
919+
VgprMAIFrag<node, HasAbid, Scaled>),
920+
Scaled>,
921+
MFMATable<0, "VGPR", NAME#"_vgprcd_e64", NAME#"_e64">;
924922
}
925923

926924
if NoDstOverlap then {
@@ -931,16 +929,22 @@ multiclass MAIInst<string OpName, string P, SDPatternOperator node = null_frag,
931929
!if(!eq(node, null_frag), null_frag, AgprMAIFrag<node, HasAbid, Scaled>), Scaled>,
932930
MFMATable<1, "AGPR", NAME # "_e64", NAME # "_mac_e64">;
933931

934-
let OtherPredicates = [isGFX90APlus] in
935-
def _mac_vgprcd_e64 : MAIInst<OpName # "_mac_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD"),
936-
!if(!eq(node, null_frag), null_frag, VgprMAIFrag<node, HasAbid, Scaled>), Scaled>,
937-
MFMATable<1, "VGPR", NAME # "_vgprcd_e64", NAME # "_mac_e64">;
932+
let OtherPredicates = [isGFX90APlus],
933+
AddedComplexity = 10 in def _mac_vgprcd_e64
934+
: MAIInst<OpName#"_mac_vgprcd",
935+
!cast<VOPProfileMAI>("VOPProfileMAI_"#P#"_VCD"),
936+
!if(!eq(node, null_frag), null_frag,
937+
VgprMAIFrag<node, HasAbid, Scaled>),
938+
Scaled>,
939+
MFMATable<1, "VGPR", NAME#"_vgprcd_e64", NAME#"_mac_e64">;
938940
}
939941
}
940942
} // End isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1
941943
}
942944

943-
// Provide a wrapper around MAIInst that provides the appended operands from V_MFMA_LD_SCALE_B32
945+
// Provide a wrapper around MAIInst that provides the appended operands from
946+
// V_MFMA_LD_SCALE_B32 AGPR variants are never selected; VGPR is selected and
947+
// may later be rewritten to AGPR.
944948
multiclass ScaledMAIInst_mc<string OpName, string UnscaledOpName_, SDPatternOperator node> {
945949
defvar VariantSuffix = !subst(!toupper(OpName), "", NAME); // Drop the main opcode name prefix to get the "_fN_fM" suffix.
946950
defvar UnscaledOpName = UnscaledOpName_#VariantSuffix;
@@ -949,9 +953,9 @@ multiclass ScaledMAIInst_mc<string OpName, string UnscaledOpName_, SDPatternOper
949953

950954
defvar NoDstOverlap = !cast<VOPProfileMAI>(!cast<MAIInst>(UnscaledOpName#"_e64").Pfl).NoDstOverlap;
951955

952-
def _e64 : ScaledMAIInst<OpName,
953-
!cast<MAIInst>(UnscaledOpName#"_e64"), !if(NoDstOverlap, null_frag, AgprMAIFrag<node, HasAbid, true>)>,
954-
MFMATable<0, "AGPR", NAME # "_e64">;
956+
def _e64
957+
: ScaledMAIInst<OpName, !cast<MAIInst>(UnscaledOpName#"_e64"), null_frag>,
958+
MFMATable<0, "AGPR", NAME#"_e64">;
955959

956960
def _vgprcd_e64 : ScaledMAIInst<OpName # "_vgprcd",
957961
!cast<MAIInst>(UnscaledOpName#"_vgprcd_e64"), !if(NoDstOverlap, null_frag, VgprMAIFrag<node, HasAbid, true>)>,
@@ -961,9 +965,10 @@ multiclass ScaledMAIInst_mc<string OpName, string UnscaledOpName_, SDPatternOper
961965
let Constraints = !if(NoDstOverlap, "$vdst = $src2", ""),
962966
isConvertibleToThreeAddress = NoDstOverlap,
963967
Mnemonic = UnscaledOpName_ in {
964-
def _mac_e64 : ScaledMAIInst<OpName # "_mac",
965-
!cast<MAIInst>(UnscaledOpName # "_mac_e64"), AgprMAIFrag<node, HasAbid, true>>,
966-
MFMATable<1, "AGPR", NAME # "_e64">;
968+
def _mac_e64
969+
: ScaledMAIInst<OpName#"_mac",
970+
!cast<MAIInst>(UnscaledOpName#"_mac_e64"), null_frag>,
971+
MFMATable<1, "AGPR", NAME#"_e64">;
967972

968973
def _mac_vgprcd_e64 : ScaledMAIInst<OpName # " _mac_vgprcd",
969974
!cast<MAIInst>(UnscaledOpName # "_mac_vgprcd_e64"), VgprMAIFrag<node, HasAbid, true>>,

0 commit comments

Comments
 (0)