Skip to content

Commit 67d0f18

Browse files
authored
[AMDGPU] Delete redundant s_or_b32 (#165261)
Transform sequences like: ``` s_cselect_b64 s[12:13], -1, 0 s_or_b32 s6, s12, s13 ``` where s6 is dead to: `s_cselect_b64 s[12:13], -1, 0` --------- Signed-off-by: John Lu <John.Lu@amd.com>
1 parent 7377ac0 commit 67d0f18

File tree

12 files changed

+1249
-1209
lines changed

12 files changed

+1249
-1209
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10163,7 +10163,7 @@ static bool followSubRegDef(MachineInstr &MI,
1016310163
}
1016410164

1016510165
MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
10166-
MachineRegisterInfo &MRI) {
10166+
const MachineRegisterInfo &MRI) {
1016710167
assert(MRI.isSSA());
1016810168
if (!P.Reg.isVirtual())
1016910169
return nullptr;
@@ -10628,6 +10628,8 @@ bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
1062810628
static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine,
1062910629
const SIRegisterInfo &RI) {
1063010630
MachineInstr *KillsSCC = nullptr;
10631+
if (SCCValid->getParent() != SCCRedefine->getParent())
10632+
return false;
1063110633
for (MachineInstr &MI : make_range(std::next(SCCValid->getIterator()),
1063210634
SCCRedefine->getIterator())) {
1063310635
if (MI.modifiesRegister(AMDGPU::SCC, &RI))
@@ -10672,8 +10674,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1067210674
if (CmpValue != 0)
1067310675
return false;
1067410676

10675-
MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg);
10676-
if (!Def || Def->getParent() != CmpInstr.getParent())
10677+
MachineInstr *Def = MRI->getVRegDef(SrcReg);
10678+
if (!Def)
1067710679
return false;
1067810680

1067910681
// For S_OP that set SCC = DST!=0, do the transformation
@@ -10692,6 +10694,32 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1069210694
if (!optimizeSCC(Def, &CmpInstr, RI))
1069310695
return false;
1069410696

10697+
// If s_or_b32 result, sY, is unused (i.e. it is effectively a 64-bit
10698+
// s_cmp_lg of a register pair) and the inputs are the hi and lo-halves of a
10699+
// 64-bit foldableSelect then delete s_or_b32 in the sequence:
10700+
// sX = s_cselect_b64 (non-zero imm), 0
10701+
// sLo = copy sX.sub0
10702+
// sHi = copy sX.sub1
10703+
// sY = s_or_b32 sLo, sHi
10704+
if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
10705+
MRI->use_nodbg_empty(Def->getOperand(0).getReg())) {
10706+
const MachineOperand &OrOpnd1 = Def->getOperand(1);
10707+
const MachineOperand &OrOpnd2 = Def->getOperand(2);
10708+
if (OrOpnd1.isReg() && OrOpnd2.isReg()) {
10709+
MachineInstr *Def1 = MRI->getVRegDef(OrOpnd1.getReg());
10710+
MachineInstr *Def2 = MRI->getVRegDef(OrOpnd2.getReg());
10711+
if (Def1 && Def1->getOpcode() == AMDGPU::COPY && Def2 &&
10712+
Def2->getOpcode() == AMDGPU::COPY && Def1->getOperand(1).isReg() &&
10713+
Def2->getOperand(1).isReg() &&
10714+
Def1->getOperand(1).getSubReg() == AMDGPU::sub0 &&
10715+
Def2->getOperand(1).getSubReg() == AMDGPU::sub1 &&
10716+
Def1->getOperand(1).getReg() == Def2->getOperand(1).getReg()) {
10717+
MachineInstr *Select = MRI->getVRegDef(Def1->getOperand(1).getReg());
10718+
if (Select && foldableSelect(*Select))
10719+
optimizeSCC(Select, Def, RI);
10720+
}
10721+
}
10722+
}
1069510723
return true;
1069610724
};
1069710725

@@ -10721,8 +10749,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1072110749
// s_cmp_lg_i32 (s_and_b32 $src, 1 << n), 1 << n => s_bitcmp0_b32 $src, n
1072210750
// s_cmp_lg_u64 (s_and_b64 $src, 1 << n), 1 << n => s_bitcmp0_b64 $src, n
1072310751

10724-
MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg);
10725-
if (!Def || Def->getParent() != CmpInstr.getParent())
10752+
MachineInstr *Def = MRI->getVRegDef(SrcReg);
10753+
if (!Def)
1072610754
return false;
1072710755

1072810756
if (Def->getOpcode() != AMDGPU::S_AND_B32 &&

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1687,7 +1687,7 @@ TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
16871687
/// skipping copy like instructions and subreg-manipulation pseudos.
16881688
/// Following another subreg of a reg:subreg isn't supported.
16891689
MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
1690-
MachineRegisterInfo &MRI);
1690+
const MachineRegisterInfo &MRI);
16911691

16921692
/// \brief Return false if EXEC is not changed between the def of \p VReg at \p
16931693
/// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not

0 commit comments

Comments
 (0)