Skip to content

Commit ba282ce

Browse files
committed
Delete s_cmp sX, 0 if it is redundant
Signed-off-by: John Lu <John.Lu@amd.com>
1 parent 7122185 commit ba282ce

29 files changed

+1263
-1779
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10608,6 +10608,73 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1060810608
if (SrcReg2 && !getFoldableImm(SrcReg2, *MRI, CmpValue))
1060910609
return false;
1061010610

10611+
const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
10612+
this]() -> bool {
10613+
if (CmpValue != 0)
10614+
return false;
10615+
10616+
MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg);
10617+
if (!Def || Def->getParent() != CmpInstr.getParent())
10618+
return false;
10619+
10620+
if (!(Def->getOpcode() == AMDGPU::S_LSHL_B32 ||
10621+
Def->getOpcode() == AMDGPU::S_LSHL_B64 ||
10622+
Def->getOpcode() == AMDGPU::S_LSHR_B32 ||
10623+
Def->getOpcode() == AMDGPU::S_LSHR_B64 ||
10624+
Def->getOpcode() == AMDGPU::S_AND_B32 ||
10625+
Def->getOpcode() == AMDGPU::S_AND_B64 ||
10626+
Def->getOpcode() == AMDGPU::S_OR_B32 ||
10627+
Def->getOpcode() == AMDGPU::S_OR_B64 ||
10628+
Def->getOpcode() == AMDGPU::S_XOR_B32 ||
10629+
Def->getOpcode() == AMDGPU::S_XOR_B64 ||
10630+
Def->getOpcode() == AMDGPU::S_NAND_B32 ||
10631+
Def->getOpcode() == AMDGPU::S_NAND_B64 ||
10632+
Def->getOpcode() == AMDGPU::S_NOR_B32 ||
10633+
Def->getOpcode() == AMDGPU::S_NOR_B64 ||
10634+
Def->getOpcode() == AMDGPU::S_XNOR_B32 ||
10635+
Def->getOpcode() == AMDGPU::S_XNOR_B64 ||
10636+
Def->getOpcode() == AMDGPU::S_ANDN2_B32 ||
10637+
Def->getOpcode() == AMDGPU::S_ANDN2_B64 ||
10638+
Def->getOpcode() == AMDGPU::S_ORN2_B32 ||
10639+
Def->getOpcode() == AMDGPU::S_ORN2_B64 ||
10640+
Def->getOpcode() == AMDGPU::S_BFE_I32 ||
10641+
Def->getOpcode() == AMDGPU::S_BFE_I64 ||
10642+
Def->getOpcode() == AMDGPU::S_BFE_U32 ||
10643+
Def->getOpcode() == AMDGPU::S_BFE_U64 ||
10644+
Def->getOpcode() == AMDGPU::S_BCNT0_I32_B32 ||
10645+
Def->getOpcode() == AMDGPU::S_BCNT0_I32_B64 ||
10646+
Def->getOpcode() == AMDGPU::S_BCNT1_I32_B32 ||
10647+
Def->getOpcode() == AMDGPU::S_BCNT1_I32_B64 ||
10648+
Def->getOpcode() == AMDGPU::S_QUADMASK_B32 ||
10649+
Def->getOpcode() == AMDGPU::S_QUADMASK_B64 ||
10650+
Def->getOpcode() == AMDGPU::S_NOT_B32 ||
10651+
Def->getOpcode() == AMDGPU::S_NOT_B64 ||
10652+
10653+
((Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
10654+
Def->getOpcode() == AMDGPU::S_CSELECT_B64) &&
10655+
Def->getOperand(1).isImm() && Def->getOperand(1).getImm() &&
10656+
!Def->getOperand(2).isImm() && !Def->getOperand(2).getImm())))
10657+
return false;
10658+
10659+
for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator();
10660+
I != E; ++I) {
10661+
if (I->modifiesRegister(AMDGPU::SCC, &RI) ||
10662+
I->killsRegister(AMDGPU::SCC, &RI))
10663+
return false;
10664+
}
10665+
10666+
if (!(Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
10667+
Def->getOpcode() == AMDGPU::S_CSELECT_B64)) {
10668+
MachineOperand *SccDef =
10669+
Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr);
10670+
assert(SccDef && "Def instruction must define SCC");
10671+
SccDef->setIsDead(false);
10672+
}
10673+
10674+
CmpInstr.eraseFromParent();
10675+
return true;
10676+
};
10677+
1061110678
const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
1061210679
this](int64_t ExpectedValue, unsigned SrcSize,
1061310680
bool IsReversible, bool IsSigned) -> bool {
@@ -10735,15 +10802,15 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1073510802
case AMDGPU::S_CMP_LG_I32:
1073610803
case AMDGPU::S_CMPK_LG_U32:
1073710804
case AMDGPU::S_CMPK_LG_I32:
10738-
return optimizeCmpAnd(0, 32, true, false);
10805+
return optimizeCmpAnd(0, 32, true, false) || optimizeCmpSelect();
1073910806
case AMDGPU::S_CMP_GT_U32:
1074010807
case AMDGPU::S_CMPK_GT_U32:
1074110808
return optimizeCmpAnd(0, 32, false, false);
1074210809
case AMDGPU::S_CMP_GT_I32:
1074310810
case AMDGPU::S_CMPK_GT_I32:
1074410811
return optimizeCmpAnd(0, 32, false, true);
1074510812
case AMDGPU::S_CMP_LG_U64:
10746-
return optimizeCmpAnd(0, 64, true, false);
10813+
return optimizeCmpAnd(0, 64, true, false) || optimizeCmpSelect();
1074710814
}
1074810815

1074910816
return false;

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,6 @@ define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_non_compare(i32 %v) {
140140
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
141141
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
142142
; CHECK-NEXT: s_and_b32 s0, vcc_lo, exec_lo
143-
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
144143
; CHECK-NEXT: s_cbranch_scc0 .LBB9_2
145144
; CHECK-NEXT: ; %bb.1: ; %false
146145
; CHECK-NEXT: s_mov_b32 s0, 33
@@ -345,7 +344,6 @@ define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) {
345344
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, 12, v0
346345
; CHECK-NEXT: v_cmp_lt_u32_e64 s0, 34, v1
347346
; CHECK-NEXT: s_and_b32 s0, vcc_lo, s0
348-
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
349347
; CHECK-NEXT: s_cbranch_scc0 .LBB17_2
350348
; CHECK-NEXT: ; %bb.1: ; %false
351349
; CHECK-NEXT: s_mov_b32 s0, 33

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,6 @@ define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_non_compare(i32 %v) {
143143
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
144144
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
145145
; CHECK-NEXT: s_and_b64 s[0:1], vcc, exec
146-
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
147146
; CHECK-NEXT: s_cbranch_scc0 .LBB9_2
148147
; CHECK-NEXT: ; %bb.1: ; %false
149148
; CHECK-NEXT: s_mov_b32 s0, 33
@@ -348,7 +347,6 @@ define amdgpu_cs i32 @branch_divergent_ballot_eq_zero_and(i32 %v1, i32 %v2) {
348347
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 12, v0
349348
; CHECK-NEXT: v_cmp_lt_u32_e64 s[0:1], 34, v1
350349
; CHECK-NEXT: s_and_b64 s[0:1], vcc, s[0:1]
351-
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
352350
; CHECK-NEXT: s_cbranch_scc0 .LBB17_2
353351
; CHECK-NEXT: ; %bb.1: ; %false
354352
; CHECK-NEXT: s_mov_b32 s0, 33

0 commit comments

Comments
 (0)