Skip to content

Commit 1f57ce2

Browse files
committed
Streamline code and handle more opcodes
Signed-off-by: John Lu <John.Lu@amd.com>
1 parent ba282ce commit 1f57ce2

File tree

2 files changed

+81
-43
lines changed

2 files changed

+81
-43
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 33 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -10617,43 +10617,38 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1061710617
if (!Def || Def->getParent() != CmpInstr.getParent())
1061810618
return false;
1061910619

10620-
if (!(Def->getOpcode() == AMDGPU::S_LSHL_B32 ||
10621-
Def->getOpcode() == AMDGPU::S_LSHL_B64 ||
10622-
Def->getOpcode() == AMDGPU::S_LSHR_B32 ||
10623-
Def->getOpcode() == AMDGPU::S_LSHR_B64 ||
10624-
Def->getOpcode() == AMDGPU::S_AND_B32 ||
10625-
Def->getOpcode() == AMDGPU::S_AND_B64 ||
10626-
Def->getOpcode() == AMDGPU::S_OR_B32 ||
10627-
Def->getOpcode() == AMDGPU::S_OR_B64 ||
10628-
Def->getOpcode() == AMDGPU::S_XOR_B32 ||
10629-
Def->getOpcode() == AMDGPU::S_XOR_B64 ||
10630-
Def->getOpcode() == AMDGPU::S_NAND_B32 ||
10631-
Def->getOpcode() == AMDGPU::S_NAND_B64 ||
10632-
Def->getOpcode() == AMDGPU::S_NOR_B32 ||
10633-
Def->getOpcode() == AMDGPU::S_NOR_B64 ||
10634-
Def->getOpcode() == AMDGPU::S_XNOR_B32 ||
10635-
Def->getOpcode() == AMDGPU::S_XNOR_B64 ||
10636-
Def->getOpcode() == AMDGPU::S_ANDN2_B32 ||
10637-
Def->getOpcode() == AMDGPU::S_ANDN2_B64 ||
10638-
Def->getOpcode() == AMDGPU::S_ORN2_B32 ||
10639-
Def->getOpcode() == AMDGPU::S_ORN2_B64 ||
10640-
Def->getOpcode() == AMDGPU::S_BFE_I32 ||
10641-
Def->getOpcode() == AMDGPU::S_BFE_I64 ||
10642-
Def->getOpcode() == AMDGPU::S_BFE_U32 ||
10643-
Def->getOpcode() == AMDGPU::S_BFE_U64 ||
10644-
Def->getOpcode() == AMDGPU::S_BCNT0_I32_B32 ||
10645-
Def->getOpcode() == AMDGPU::S_BCNT0_I32_B64 ||
10646-
Def->getOpcode() == AMDGPU::S_BCNT1_I32_B32 ||
10647-
Def->getOpcode() == AMDGPU::S_BCNT1_I32_B64 ||
10648-
Def->getOpcode() == AMDGPU::S_QUADMASK_B32 ||
10649-
Def->getOpcode() == AMDGPU::S_QUADMASK_B64 ||
10650-
Def->getOpcode() == AMDGPU::S_NOT_B32 ||
10651-
Def->getOpcode() == AMDGPU::S_NOT_B64 ||
10652-
10653-
((Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
10654-
Def->getOpcode() == AMDGPU::S_CSELECT_B64) &&
10655-
Def->getOperand(1).isImm() && Def->getOperand(1).getImm() &&
10656-
!Def->getOperand(2).isImm() && !Def->getOperand(2).getImm())))
10620+
bool CanOptimize = false;
10621+
MachineOperand *SccDef =
10622+
Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr);
10623+
10624+
// For S_OP that set SCC = DST!=0, do the transformation
10625+
//
10626+
// s_cmp_lg_* (S_OP ...), 0 => (S_OP ...)
10627+
if (SccDef && Def->getOpcode() != AMDGPU::S_ADD_I32 &&
10628+
Def->getOpcode() != AMDGPU::S_ADD_U32 &&
10629+
Def->getOpcode() != AMDGPU::S_ADDC_U32 &&
10630+
Def->getOpcode() != AMDGPU::S_SUB_I32 &&
10631+
Def->getOpcode() != AMDGPU::S_SUB_U32 &&
10632+
Def->getOpcode() != AMDGPU::S_SUBB_U32 &&
10633+
Def->getOpcode() != AMDGPU::S_MIN_I32 &&
10634+
Def->getOpcode() != AMDGPU::S_MIN_U32 &&
10635+
Def->getOpcode() != AMDGPU::S_MAX_I32 &&
10636+
Def->getOpcode() != AMDGPU::S_MAX_U32 &&
10637+
Def->getOpcode() != AMDGPU::S_ADDK_I32)
10638+
CanOptimize = true;
10639+
10640+
// s_cmp_lg_* is redundant because the SCC input value for S_CSELECT* has
10641+
// the same value that will be calculated by s_cmp_lg_*
10642+
//
10643+
// s_cmp_lg_* (S_CSELECT* (non-zero imm), 0), 0 => (S_CSELECT* (non-zero
10644+
// imm), 0)
10645+
if ((Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
10646+
Def->getOpcode() == AMDGPU::S_CSELECT_B64) &&
10647+
Def->getOperand(1).isImm() && Def->getOperand(1).getImm() &&
10648+
!Def->getOperand(2).isImm() && !Def->getOperand(2).getImm())
10649+
CanOptimize = true;
10650+
10651+
if (!CanOptimize)
1065710652
return false;
1065810653

1065910654
for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator();
@@ -10663,13 +10658,8 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
1066310658
return false;
1066410659
}
1066510660

10666-
if (!(Def->getOpcode() == AMDGPU::S_CSELECT_B32 ||
10667-
Def->getOpcode() == AMDGPU::S_CSELECT_B64)) {
10668-
MachineOperand *SccDef =
10669-
Def->findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr);
10670-
assert(SccDef && "Def instruction must define SCC");
10661+
if (SccDef)
1067110662
SccDef->setIsDead(false);
10672-
}
1067310663

1067410664
CmpInstr.eraseFromParent();
1067510665
return true;

llvm/test/CodeGen/AMDGPU/s_cmp_0.ll

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,54 @@ define amdgpu_ps i32 @lshr64(i64 inreg %val0, i64 inreg %val1) {
6262
ret i32 %zext
6363
}
6464

65+
define amdgpu_ps i32 @ashr32(i32 inreg %val0, i32 inreg %val1) {
66+
; CHECK-LABEL: ashr32:
67+
; CHECK: ; %bb.0:
68+
; CHECK-NEXT: s_ashr_i32 s0, s0, s1
69+
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
70+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
71+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
72+
; CHECK-NEXT: ; return to shader part epilog
73+
%result = ashr i32 %val0, %val1
74+
%cmp = icmp ne i32 %result, 0
75+
%zext = zext i1 %cmp to i32
76+
ret i32 %zext
77+
}
78+
79+
define amdgpu_ps i32 @ashr64(i64 inreg %val0, i64 inreg %val1) {
80+
; CHECK-LABEL: ashr64:
81+
; CHECK: ; %bb.0:
82+
; CHECK-NEXT: s_ashr_i64 s[0:1], s[0:1], s2
83+
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
84+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
85+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
86+
; CHECK-NEXT: ; return to shader part epilog
87+
%result = ashr i64 %val0, %val1
88+
%cmp = icmp ne i64 %result, 0
89+
%zext = zext i1 %cmp to i32
90+
ret i32 %zext
91+
}
92+
93+
define amdgpu_ps i32 @abs32(i32 inreg %val0, ptr addrspace(1) %ptr) {
94+
; CHECK-LABEL: abs32:
95+
; CHECK: ; %bb.0:
96+
; CHECK-NEXT: s_abs_i32 s0, s0
97+
; CHECK-NEXT: v_mov_b32_e32 v2, s0
98+
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
99+
; CHECK-NEXT: global_store_dword v[0:1], v2, off
100+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
101+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
102+
; CHECK-NEXT: s_waitcnt vmcnt(0)
103+
; CHECK-NEXT: ; return to shader part epilog
104+
%neg = sub i32 0, %val0
105+
%cond = icmp sgt i32 %val0, %neg
106+
%result = select i1 %cond, i32 %val0, i32 %neg
107+
store i32 %result, ptr addrspace(1) %ptr
108+
%cmp = icmp ne i32 %result, 0
109+
%zext = zext i1 %cmp to i32
110+
ret i32 %zext
111+
}
112+
65113
define amdgpu_ps i32 @and32(i32 inreg %val0, i32 inreg %val1) {
66114
; CHECK-LABEL: and32:
67115
; CHECK: ; %bb.0:

0 commit comments

Comments
 (0)