Skip to content

Commit 7f38e2b

Browse files
committed
WIP: AMDGPU: Always select the VGPR version of MFMAs
We do not want to use AGPRs unless absolutely required due to register pressure. Rely on a post-regalloc pass to replace VGPR MFMAs with the AGPR version if it avoids the copies introduced due to live range splitting.
1 parent cfdee22 commit 7f38e2b

File tree

5 files changed

+35
-62
lines changed

5 files changed

+35
-62
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4865,31 +4865,29 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
48654865
// for srcA/srcB?
48664866
//
48674867
// vdst, srcA, srcB, srcC
4868-
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
48694868
OpdsMapping[0] =
4870-
Info->mayNeedAGPRs()
4869+
!Subtarget.hasGFX90AInsts()
48714870
? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI)
48724871
: getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
48734872
OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
48744873
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
48754874
OpdsMapping[4] =
4876-
Info->mayNeedAGPRs()
4875+
!Subtarget.hasGFX90AInsts()
48774876
? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI)
48784877
: getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
48794878
break;
48804879
}
48814880
case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
48824881
case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
4883-
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
48844882
OpdsMapping[0] =
4885-
Info->mayNeedAGPRs()
4883+
!Subtarget.hasGFX90AInsts()
48864884
? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI)
48874885
: getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
48884886

48894887
OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
48904888
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
48914889
OpdsMapping[4] =
4892-
Info->mayNeedAGPRs()
4890+
!Subtarget.hasGFX90AInsts()
48934891
? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI)
48944892
: getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
48954893

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16076,7 +16076,6 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
1607616076

1607716077
MachineFunction *MF = MI.getParent()->getParent();
1607816078
MachineRegisterInfo &MRI = MF->getRegInfo();
16079-
SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
1608016079

1608116080
if (TII->isVOP3(MI.getOpcode())) {
1608216081
// Make sure constant bus requirements are respected.
@@ -16087,15 +16086,14 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
1608716086
// use between vgpr and agpr as agpr tuples tend to be big.
1608816087
if (!MI.getDesc().operands().empty()) {
1608916088
unsigned Opc = MI.getOpcode();
16090-
bool HasAGPRs = Info->mayNeedAGPRs();
1609116089
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
1609216090
int16_t Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
1609316091
for (auto I :
1609416092
{AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
1609516093
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1), Src2Idx}) {
1609616094
if (I == -1)
1609716095
break;
16098-
if ((I == Src2Idx) && (HasAGPRs))
16096+
if (I == Src2Idx)
1609916097
break;
1610016098
MachineOperand &Op = MI.getOperand(I);
1610116099
if (!Op.isReg() || !Op.getReg().isVirtual())
@@ -16129,22 +16127,6 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
1612916127
TII->legalizeOpWithMove(MI, Src1Idx);
1613016128
}
1613116129
}
16132-
16133-
if (!HasAGPRs)
16134-
return;
16135-
16136-
// Resolve the rest of AV operands to AGPRs.
16137-
if (auto *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2)) {
16138-
if (Src2->isReg() && Src2->getReg().isVirtual()) {
16139-
auto *RC = TRI->getRegClassForReg(MRI, Src2->getReg());
16140-
if (TRI->isVectorSuperClass(RC)) {
16141-
auto *NewRC = TRI->getEquivalentAGPRClass(RC);
16142-
MRI.setRegClass(Src2->getReg(), NewRC);
16143-
if (Src2->isTied())
16144-
MRI.setRegClass(MI.getOperand(0).getReg(), NewRC);
16145-
}
16146-
}
16147-
}
1614816130
}
1614916131

1615016132
return;

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,6 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
6363
PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
6464
}
6565

66-
MayNeedAGPRs = ST.hasMAIInsts();
67-
if (ST.hasGFX90AInsts() &&
68-
ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
69-
!mayUseAGPRs(F))
70-
MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
71-
7266
if (AMDGPU::isChainCC(CC)) {
7367
// Chain functions don't receive an SP from their caller, but are free to
7468
// set one up. For now, we can use s32 to match what amdgpu_gfx functions

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -493,8 +493,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
493493
// user arguments. This is an offset from the KernargSegmentPtr.
494494
bool ImplicitArgPtr : 1;
495495

496-
bool MayNeedAGPRs : 1;
497-
498496
// The hard-wired high half of the address of the global information table
499497
// for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
500498
// current hardware only allows a 16 bit value.
@@ -1165,10 +1163,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
11651163

11661164
unsigned getMaxMemoryClusterDWords() const { return MaxMemoryClusterDWords; }
11671165

1168-
bool mayNeedAGPRs() const {
1169-
return MayNeedAGPRs;
1170-
}
1171-
11721166
// \returns true if a function has a use of AGPRs via inline asm or
11731167
// has a call which may use it.
11741168
bool mayUseAGPRs(const Function &F) const;

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 30 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -856,17 +856,11 @@ defvar MayNotNeedAGPRs_gisel = [{
856856
return !MF.getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs();
857857
}];
858858

859-
class AgprMAIFrag<SDPatternOperator Op, bit HasAbid = true,
860-
bit Scaled = false> :
861-
MAIFrag<Op, MayNeedAGPRs, HasAbid, Scaled> {
862-
let GISelPredicateCode = MayNeedAGPRs_gisel;
863-
}
859+
class AgprMAIFrag<SDPatternOperator Op, bit HasAbid = true, bit Scaled = false>
860+
: MAIFrag<Op, [{}], HasAbid, Scaled> {}
864861

865-
class VgprMAIFrag<SDPatternOperator Op, bit HasAbid = true,
866-
bit Scaled = false> :
867-
MAIFrag<Op, MayNotNeedAGPRs, HasAbid, Scaled> {
868-
let GISelPredicateCode = MayNotNeedAGPRs_gisel;
869-
}
862+
class VgprMAIFrag<SDPatternOperator Op, bit HasAbid = true, bit Scaled = false>
863+
: MAIFrag<Op, [{}], HasAbid, Scaled> {}
870864

871865
let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
872866
defm V_ACCVGPR_READ_B32 : VOP3Inst<"v_accvgpr_read_b32", VOPProfileAccRead>;
@@ -917,10 +911,14 @@ multiclass MAIInst<string OpName, string P, SDPatternOperator node = null_frag,
917911
!if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag, AgprMAIFrag<node, HasAbid, Scaled>), Scaled>,
918912
MFMATable<0, "AGPR", NAME # "_e64">;
919913

920-
let OtherPredicates = [isGFX90APlus], Mnemonic = OpName in
921-
def _vgprcd_e64 : MAIInst<OpName # "_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD"),
922-
!if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag, VgprMAIFrag<node, HasAbid, Scaled>), Scaled>,
923-
MFMATable<0, "VGPR", NAME # "_vgprcd_e64", NAME # "_e64">;
914+
let OtherPredicates = [isGFX90APlus], Mnemonic = OpName,
915+
AddedComplexity = 10 in def _vgprcd_e64
916+
: MAIInst<OpName#"_vgprcd",
917+
!cast<VOPProfileMAI>("VOPProfileMAI_"#P#"_VCD"),
918+
!if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag,
919+
VgprMAIFrag<node, HasAbid, Scaled>),
920+
Scaled>,
921+
MFMATable<0, "VGPR", NAME#"_vgprcd_e64", NAME#"_e64">;
924922
}
925923

926924
if NoDstOverlap then {
@@ -931,16 +929,22 @@ multiclass MAIInst<string OpName, string P, SDPatternOperator node = null_frag,
931929
!if(!eq(node, null_frag), null_frag, AgprMAIFrag<node, HasAbid, Scaled>), Scaled>,
932930
MFMATable<1, "AGPR", NAME # "_e64", NAME # "_mac_e64">;
933931

934-
let OtherPredicates = [isGFX90APlus] in
935-
def _mac_vgprcd_e64 : MAIInst<OpName # "_mac_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD"),
936-
!if(!eq(node, null_frag), null_frag, VgprMAIFrag<node, HasAbid, Scaled>), Scaled>,
937-
MFMATable<1, "VGPR", NAME # "_vgprcd_e64", NAME # "_mac_e64">;
932+
let OtherPredicates = [isGFX90APlus],
933+
AddedComplexity = 10 in def _mac_vgprcd_e64
934+
: MAIInst<OpName#"_mac_vgprcd",
935+
!cast<VOPProfileMAI>("VOPProfileMAI_"#P#"_VCD"),
936+
!if(!eq(node, null_frag), null_frag,
937+
VgprMAIFrag<node, HasAbid, Scaled>),
938+
Scaled>,
939+
MFMATable<1, "VGPR", NAME#"_vgprcd_e64", NAME#"_mac_e64">;
938940
}
939941
}
940942
} // End isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1
941943
}
942944

943-
// Provide a wrapper around MAIInst that provides the appended operands from V_MFMA_LD_SCALE_B32
945+
// Provide a wrapper around MAIInst that provides the appended operands from
946+
// V_MFMA_LD_SCALE_B32 AGPR variants are never selected; VGPR is selected and
947+
// may later be rewritten to AGPR.
944948
multiclass ScaledMAIInst_mc<string OpName, string UnscaledOpName_, SDPatternOperator node> {
945949
defvar VariantSuffix = !subst(!toupper(OpName), "", NAME); // Drop the main opcode name prefix to get the "_fN_fM" suffix.
946950
defvar UnscaledOpName = UnscaledOpName_#VariantSuffix;
@@ -949,9 +953,9 @@ multiclass ScaledMAIInst_mc<string OpName, string UnscaledOpName_, SDPatternOper
949953

950954
defvar NoDstOverlap = !cast<VOPProfileMAI>(!cast<MAIInst>(UnscaledOpName#"_e64").Pfl).NoDstOverlap;
951955

952-
def _e64 : ScaledMAIInst<OpName,
953-
!cast<MAIInst>(UnscaledOpName#"_e64"), !if(NoDstOverlap, null_frag, AgprMAIFrag<node, HasAbid, true>)>,
954-
MFMATable<0, "AGPR", NAME # "_e64">;
956+
def _e64
957+
: ScaledMAIInst<OpName, !cast<MAIInst>(UnscaledOpName#"_e64"), null_frag>,
958+
MFMATable<0, "AGPR", NAME#"_e64">;
955959

956960
def _vgprcd_e64 : ScaledMAIInst<OpName # "_vgprcd",
957961
!cast<MAIInst>(UnscaledOpName#"_vgprcd_e64"), !if(NoDstOverlap, null_frag, VgprMAIFrag<node, HasAbid, true>)>,
@@ -961,9 +965,10 @@ multiclass ScaledMAIInst_mc<string OpName, string UnscaledOpName_, SDPatternOper
961965
let Constraints = !if(NoDstOverlap, "$vdst = $src2", ""),
962966
isConvertibleToThreeAddress = NoDstOverlap,
963967
Mnemonic = UnscaledOpName_ in {
964-
def _mac_e64 : ScaledMAIInst<OpName # "_mac",
965-
!cast<MAIInst>(UnscaledOpName # "_mac_e64"), AgprMAIFrag<node, HasAbid, true>>,
966-
MFMATable<1, "AGPR", NAME # "_e64">;
968+
def _mac_e64
969+
: ScaledMAIInst<OpName#"_mac",
970+
!cast<MAIInst>(UnscaledOpName#"_mac_e64"), null_frag>,
971+
MFMATable<1, "AGPR", NAME#"_e64">;
967972

968973
def _mac_vgprcd_e64 : ScaledMAIInst<OpName # " _mac_vgprcd",
969974
!cast<MAIInst>(UnscaledOpName # "_mac_vgprcd_e64"), VgprMAIFrag<node, HasAbid, true>>,

0 commit comments

Comments
 (0)