Skip to content

Commit

Permalink
[X86] combineSelect fold 'smin' style pattern select(pcmpgt(RHS, LHS)…
Browse files Browse the repository at this point in the history
…, LHS, RHS) -> select(pcmpgt(LHS, RHS), RHS, LHS) if pcmpgt(LHS, RHS) already exists

Avoids repeated commuted comparisons when we're performing min/max and clamp patterns
  • Loading branch information
RKSimon committed Jul 30, 2022
1 parent d4b4747 commit 813459e
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 136 deletions.
29 changes: 23 additions & 6 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44857,12 +44857,29 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
if (SDValue CondNot = IsNOT(Cond, DAG))
return DAG.getNode(N->getOpcode(), DL, VT,
DAG.getBitcast(CondVT, CondNot), RHS, LHS);
// pcmpgt(X, -1) -> pcmpgt(0, X) to help select/blendv just use the signbit.
if (Cond.getOpcode() == X86ISD::PCMPGT && Cond.hasOneUse() &&
ISD::isBuildVectorAllOnes(Cond.getOperand(1).getNode())) {
Cond = DAG.getNode(X86ISD::PCMPGT, DL, CondVT,
DAG.getConstant(0, DL, CondVT), Cond.getOperand(0));
return DAG.getNode(N->getOpcode(), DL, VT, Cond, RHS, LHS);

if (Cond.getOpcode() == X86ISD::PCMPGT && Cond.hasOneUse()) {
// pcmpgt(X, -1) -> pcmpgt(0, X) to help select/blendv just use the
// signbit.
if (ISD::isBuildVectorAllOnes(Cond.getOperand(1).getNode())) {
Cond = DAG.getNode(X86ISD::PCMPGT, DL, CondVT,
DAG.getConstant(0, DL, CondVT), Cond.getOperand(0));
return DAG.getNode(N->getOpcode(), DL, VT, Cond, RHS, LHS);
}

// smin(LHS, RHS) : select(pcmpgt(RHS, LHS), LHS, RHS)
// -> select(pcmpgt(LHS, RHS), RHS, LHS)
// iff the commuted pcmpgt() already exists.
// TODO: Could DAGCombiner::combine cse search for SETCC nodes, like it
// does for commutative binops?
if (Cond.getOperand(0) == RHS && Cond.getOperand(1) == LHS) {
if (SDNode *FlipCond =
DAG.getNodeIfExists(X86ISD::PCMPGT, DAG.getVTList(CondVT),
{Cond.getOperand(1), Cond.getOperand(0)})) {
return DAG.getNode(N->getOpcode(), DL, VT, SDValue(FlipCond, 0), RHS,
LHS);
}
}
}
}

Expand Down
24 changes: 8 additions & 16 deletions llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -956,8 +956,7 @@ define <2 x i64> @vec128_i64_signed_reg_reg(<2 x i64> %a1, <2 x i64> %a2) nounwi
; AVX1-FALLBACK: # %bb.0:
; AVX1-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; AVX1-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
; AVX1-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4
; AVX1-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
; AVX1-FALLBACK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm4
; AVX1-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
; AVX1-FALLBACK-NEXT: vpsubq %xmm4, %xmm1, %xmm1
; AVX1-FALLBACK-NEXT: vpsrlq $1, %xmm1, %xmm2
Expand All @@ -976,8 +975,7 @@ define <2 x i64> @vec128_i64_signed_reg_reg(<2 x i64> %a1, <2 x i64> %a2) nounwi
; AVX2-FALLBACK: # %bb.0:
; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; AVX2-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4
; AVX2-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm4
; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
; AVX2-FALLBACK-NEXT: vpsubq %xmm4, %xmm1, %xmm1
; AVX2-FALLBACK-NEXT: vpsrlq $1, %xmm1, %xmm2
Expand Down Expand Up @@ -1401,8 +1399,7 @@ define <2 x i64> @vec128_i64_signed_mem_reg(ptr %a1_addr, <2 x i64> %a2) nounwin
; AVX1-FALLBACK-NEXT: vmovdqa (%rdi), %xmm1
; AVX1-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; AVX1-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
; AVX1-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm4
; AVX1-FALLBACK-NEXT: vblendvpd %xmm4, %xmm1, %xmm0, %xmm4
; AVX1-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm4
; AVX1-FALLBACK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
; AVX1-FALLBACK-NEXT: vpsubq %xmm4, %xmm0, %xmm0
; AVX1-FALLBACK-NEXT: vpsrlq $1, %xmm0, %xmm2
Expand All @@ -1422,8 +1419,7 @@ define <2 x i64> @vec128_i64_signed_mem_reg(ptr %a1_addr, <2 x i64> %a2) nounwin
; AVX2-FALLBACK-NEXT: vmovdqa (%rdi), %xmm1
; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; AVX2-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm4
; AVX2-FALLBACK-NEXT: vblendvpd %xmm4, %xmm1, %xmm0, %xmm4
; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm4
; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
; AVX2-FALLBACK-NEXT: vpsubq %xmm4, %xmm0, %xmm0
; AVX2-FALLBACK-NEXT: vpsrlq $1, %xmm0, %xmm2
Expand Down Expand Up @@ -1624,8 +1620,7 @@ define <2 x i64> @vec128_i64_signed_reg_mem(<2 x i64> %a1, ptr %a2_addr) nounwin
; AVX1-FALLBACK-NEXT: vmovdqa (%rdi), %xmm1
; AVX1-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; AVX1-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
; AVX1-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4
; AVX1-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
; AVX1-FALLBACK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm4
; AVX1-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
; AVX1-FALLBACK-NEXT: vpsubq %xmm4, %xmm1, %xmm1
; AVX1-FALLBACK-NEXT: vpsrlq $1, %xmm1, %xmm2
Expand All @@ -1645,8 +1640,7 @@ define <2 x i64> @vec128_i64_signed_reg_mem(<2 x i64> %a1, ptr %a2_addr) nounwin
; AVX2-FALLBACK-NEXT: vmovdqa (%rdi), %xmm1
; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; AVX2-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4
; AVX2-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm4
; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
; AVX2-FALLBACK-NEXT: vpsubq %xmm4, %xmm1, %xmm1
; AVX2-FALLBACK-NEXT: vpsrlq $1, %xmm1, %xmm2
Expand Down Expand Up @@ -1850,8 +1844,7 @@ define <2 x i64> @vec128_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; AVX1-FALLBACK-NEXT: vmovdqa (%rsi), %xmm1
; AVX1-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; AVX1-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
; AVX1-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4
; AVX1-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
; AVX1-FALLBACK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm4
; AVX1-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
; AVX1-FALLBACK-NEXT: vpsubq %xmm4, %xmm1, %xmm1
; AVX1-FALLBACK-NEXT: vpsrlq $1, %xmm1, %xmm2
Expand All @@ -1872,8 +1865,7 @@ define <2 x i64> @vec128_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; AVX2-FALLBACK-NEXT: vmovdqa (%rsi), %xmm1
; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; AVX2-FALLBACK-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
; AVX2-FALLBACK-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4
; AVX2-FALLBACK-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm4
; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm4
; AVX2-FALLBACK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
; AVX2-FALLBACK-NEXT: vpsubq %xmm4, %xmm1, %xmm1
; AVX2-FALLBACK-NEXT: vpsrlq $1, %xmm1, %xmm2
Expand Down
Loading

0 comments on commit 813459e

Please sign in to comment.