Skip to content

Commit d3d3005

Browse files
committed
AMDGPU: Use helper function for use/def chain walk
PeepholeOpt has a nicer version of this which handles more cases.
1 parent a88f4f1 commit d3d3005

File tree

1 file changed

+37
-18
lines changed

1 file changed

+37
-18
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,27 @@ bool SIFoldOperandsImpl::isUseSafeToFold(const MachineInstr &MI,
781781
return !TII->isSDWA(MI);
782782
}
783783

784+
static MachineOperand *lookUpCopyChain(const SIInstrInfo &TII,
785+
const MachineRegisterInfo &MRI,
786+
Register SrcReg) {
787+
MachineOperand *Sub = nullptr;
788+
for (MachineInstr *SubDef = MRI.getVRegDef(SrcReg);
789+
SubDef && TII.isFoldableCopy(*SubDef);
790+
SubDef = MRI.getVRegDef(Sub->getReg())) {
791+
MachineOperand &SrcOp = SubDef->getOperand(1);
792+
if (SrcOp.isImm())
793+
return &SrcOp;
794+
if (!SrcOp.isReg() || SrcOp.getReg().isPhysical())
795+
break;
796+
Sub = &SrcOp;
797+
// TODO: Support compose
798+
if (SrcOp.getSubReg())
799+
break;
800+
}
801+
802+
return Sub;
803+
}
804+
784805
// Find a def of the UseReg, check if it is a reg_sequence and find initializers
785806
// for each subreg, tracking it to foldable inline immediate if possible.
786807
// Returns true on success.
@@ -791,26 +812,24 @@ bool SIFoldOperandsImpl::getRegSeqInit(
791812
if (!Def || !Def->isRegSequence())
792813
return false;
793814

794-
for (unsigned I = 1, E = Def->getNumExplicitOperands(); I < E; I += 2) {
795-
MachineOperand *Sub = &Def->getOperand(I);
796-
assert(Sub->isReg());
797-
798-
for (MachineInstr *SubDef = MRI->getVRegDef(Sub->getReg());
799-
SubDef && Sub->isReg() && Sub->getReg().isVirtual() &&
800-
!Sub->getSubReg() && TII->isFoldableCopy(*SubDef);
801-
SubDef = MRI->getVRegDef(Sub->getReg())) {
802-
MachineOperand *Op = &SubDef->getOperand(1);
803-
if (Op->isImm()) {
804-
if (TII->isInlineConstant(*Op, OpTy))
805-
Sub = Op;
806-
break;
807-
}
808-
if (!Op->isReg() || Op->getReg().isPhysical())
809-
break;
810-
Sub = Op;
815+
for (unsigned I = 1, E = Def->getNumExplicitOperands(); I != E; I += 2) {
816+
MachineOperand &SrcOp = Def->getOperand(I);
817+
unsigned SubRegIdx = Def->getOperand(I + 1).getImm();
818+
819+
if (SrcOp.getSubReg()) {
820+
// TODO: Handle subregister compose
821+
Defs.emplace_back(&SrcOp, SubRegIdx);
822+
continue;
823+
}
824+
825+
MachineOperand *DefSrc = lookUpCopyChain(*TII, *MRI, SrcOp.getReg());
826+
if (DefSrc && (DefSrc->isReg() ||
827+
(DefSrc->isImm() && TII->isInlineConstant(*DefSrc, OpTy)))) {
828+
Defs.emplace_back(DefSrc, SubRegIdx);
829+
continue;
811830
}
812831

813-
Defs.emplace_back(Sub, Def->getOperand(I + 1).getImm());
832+
Defs.emplace_back(&SrcOp, SubRegIdx);
814833
}
815834

816835
return true;

0 commit comments

Comments
 (0)