-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[X86] Add checkSignTestSetCCCombine - if X86ISD::CMP/OR is testing for signbits, attempt to test for the signbit source instead. #97433
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesThere's a lot more we could do here (including the reverse fold back to X86::COND_S/NS with some other X86ISD nodes), but I wanted to address the MOVMSK issue initially. Fixes #66191 Full diff: https://github.com/llvm/llvm-project/pull/97433.diff 3 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8eadf079d4f2f..b8351f35daa9a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -46433,6 +46433,57 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
return LockOp;
}
+// Check whether we're just testing the signbit, and whether we can simplify
+// this by tracking where the signbit came from.
+static SDValue checkSignTestSetCCCombine(SDValue Cmp, X86::CondCode &CC,
+ SelectionDAG &DAG) {
+ if (CC != X86::COND_S && CC != X86::COND_NS)
+ return SDValue();
+
+ SDValue Src;
+ if (Cmp.getOpcode() == X86ISD::CMP) {
+ // CMP(X,0) -> signbit test
+ if (!isNullConstant(Cmp.getOperand(1)) || !Cmp->hasOneUse())
+ return SDValue();
+ Src = Cmp.getOperand(0);
+ // Peek through a SRA node as we just need the signbit.
+ // TODO: Use SimplifyDemandedBits instead of just SRA?
+ if (Src.getOpcode() != ISD::SRA || !Src->hasOneUse())
+ return SDValue();
+ Src = Src.getOperand(0);
+ } else if (Cmp.getOpcode() == X86ISD::OR && Cmp->hasOneUse()) {
+ // OR(X,Y) -> see if only one operand contributes to the signbit.
+ if (DAG.SignBitIsZero(Cmp.getOperand(0)))
+ Src = Cmp.getOperand(1);
+ else if (DAG.SignBitIsZero(Cmp.getOperand(1)))
+ Src = Cmp.getOperand(0);
+ else
+ return SDValue();
+ } else {
+ return SDValue();
+ }
+
+ // Replace with a TEST on the MSB.
+ SDLoc DL(Cmp);
+ MVT SrcVT = Src.getSimpleValueType();
+ APInt BitMask = APInt::getSignMask(SrcVT.getScalarSizeInBits());
+
+ // If Src came from a SHL (probably from an expanded SIGN_EXTEND_INREG), then
+ // peek through and adjust the TEST bit.
+ if (Src.getOpcode() == ISD::SHL) {
+ if (std::optional<uint64_t> ShiftAmt = DAG.getValidShiftAmount(Src)) {
+ Src = Src.getOperand(0);
+ BitMask.lshrInPlace(*ShiftAmt);
+ }
+ }
+
+ SDValue Mask = DAG.getNode(ISD::AND, DL, SrcVT, Src,
+ DAG.getConstant(BitMask, DL, SrcVT));
+ CC = CC == X86::COND_S ? X86::COND_NE : X86::COND_E;
+ return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Mask,
+ DAG.getConstant(0, DL, SrcVT));
+}
+
// Check whether a boolean test is testing a boolean value generated by
// X86ISD::SETCC. If so, return the operand of that SETCC and proper condition
// code.
@@ -47072,6 +47123,9 @@ static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
if (SDValue Flags = combineCarryThroughADD(EFLAGS, DAG))
return Flags;
+ if (SDValue R = checkSignTestSetCCCombine(EFLAGS, CC, DAG))
+ return R;
+
if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC))
return R;
diff --git a/llvm/test/CodeGen/X86/is_fpclass-fp80.ll b/llvm/test/CodeGen/X86/is_fpclass-fp80.ll
index ec2323ac2250c..7d0c5838c1554 100644
--- a/llvm/test/CodeGen/X86/is_fpclass-fp80.ll
+++ b/llvm/test/CodeGen/X86/is_fpclass-fp80.ll
@@ -363,10 +363,9 @@ define i1 @is_posnormal_f80(x86_fp80 %x) {
; CHECK-32-NEXT: pushl %esi
; CHECK-32-NEXT: .cfi_def_cfa_offset 8
; CHECK-32-NEXT: .cfi_offset %esi, -8
-; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %edx
-; CHECK-32-NEXT: movswl %dx, %ecx
-; CHECK-32-NEXT: sarl $15, %ecx
; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; CHECK-32-NEXT: movl %ecx, %edx
; CHECK-32-NEXT: andl $32767, %edx # imm = 0x7FFF
; CHECK-32-NEXT: decl %edx
; CHECK-32-NEXT: movzwl %dx, %edx
@@ -374,8 +373,8 @@ define i1 @is_posnormal_f80(x86_fp80 %x) {
; CHECK-32-NEXT: cmpl $32766, %edx # imm = 0x7FFE
; CHECK-32-NEXT: sbbl %esi, %esi
; CHECK-32-NEXT: setb %dl
-; CHECK-32-NEXT: testl %ecx, %ecx
-; CHECK-32-NEXT: setns %cl
+; CHECK-32-NEXT: testl $32768, %ecx # imm = 0x8000
+; CHECK-32-NEXT: sete %cl
; CHECK-32-NEXT: shrl $31, %eax
; CHECK-32-NEXT: andb %cl, %al
; CHECK-32-NEXT: andb %dl, %al
@@ -411,10 +410,9 @@ define i1 @is_negnormal_f80(x86_fp80 %x) {
; CHECK-32-NEXT: pushl %esi
; CHECK-32-NEXT: .cfi_def_cfa_offset 8
; CHECK-32-NEXT: .cfi_offset %esi, -8
-; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %edx
-; CHECK-32-NEXT: movswl %dx, %ecx
-; CHECK-32-NEXT: sarl $15, %ecx
; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; CHECK-32-NEXT: movl %ecx, %edx
; CHECK-32-NEXT: andl $32767, %edx # imm = 0x7FFF
; CHECK-32-NEXT: decl %edx
; CHECK-32-NEXT: movzwl %dx, %edx
@@ -422,8 +420,8 @@ define i1 @is_negnormal_f80(x86_fp80 %x) {
; CHECK-32-NEXT: cmpl $32766, %edx # imm = 0x7FFE
; CHECK-32-NEXT: sbbl %esi, %esi
; CHECK-32-NEXT: setb %dl
-; CHECK-32-NEXT: testl %ecx, %ecx
-; CHECK-32-NEXT: sets %cl
+; CHECK-32-NEXT: testl $32768, %ecx # imm = 0x8000
+; CHECK-32-NEXT: setne %cl
; CHECK-32-NEXT: shrl $31, %eax
; CHECK-32-NEXT: andb %cl, %al
; CHECK-32-NEXT: andb %dl, %al
@@ -543,24 +541,23 @@ define i1 @is_negsubnormal_f80(x86_fp80 %x) {
; CHECK-32-NEXT: .cfi_def_cfa_offset 12
; CHECK-32-NEXT: .cfi_offset %esi, -12
; CHECK-32-NEXT: .cfi_offset %edi, -8
-; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; CHECK-32-NEXT: movswl %cx, %eax
-; CHECK-32-NEXT: sarl $15, %eax
-; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %esi
; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; CHECK-32-NEXT: movl %eax, %ecx
; CHECK-32-NEXT: andl $32767, %ecx # imm = 0x7FFF
-; CHECK-32-NEXT: xorl %edx, %edx
-; CHECK-32-NEXT: addl $-1, %esi
-; CHECK-32-NEXT: adcl $-1, %edi
-; CHECK-32-NEXT: adcl $-1, %ecx
+; CHECK-32-NEXT: xorl %esi, %esi
+; CHECK-32-NEXT: addl $-1, %edi
; CHECK-32-NEXT: adcl $-1, %edx
-; CHECK-32-NEXT: cmpl $-1, %esi
-; CHECK-32-NEXT: sbbl $2147483647, %edi # imm = 0x7FFFFFFF
+; CHECK-32-NEXT: adcl $-1, %ecx
+; CHECK-32-NEXT: adcl $-1, %esi
+; CHECK-32-NEXT: cmpl $-1, %edi
+; CHECK-32-NEXT: sbbl $2147483647, %edx # imm = 0x7FFFFFFF
; CHECK-32-NEXT: sbbl $0, %ecx
-; CHECK-32-NEXT: sbbl $0, %edx
+; CHECK-32-NEXT: sbbl $0, %esi
; CHECK-32-NEXT: setb %cl
-; CHECK-32-NEXT: testl %eax, %eax
-; CHECK-32-NEXT: sets %al
+; CHECK-32-NEXT: testl $32768, %eax # imm = 0x8000
+; CHECK-32-NEXT: setne %al
; CHECK-32-NEXT: andb %cl, %al
; CHECK-32-NEXT: popl %esi
; CHECK-32-NEXT: .cfi_def_cfa_offset 8
diff --git a/llvm/test/CodeGen/X86/movmsk-bittest.ll b/llvm/test/CodeGen/X86/movmsk-bittest.ll
index 7c8fe03ff4741..b67e70e71c3d5 100644
--- a/llvm/test/CodeGen/X86/movmsk-bittest.ll
+++ b/llvm/test/CodeGen/X86/movmsk-bittest.ll
@@ -37,18 +37,16 @@ define i32 @movmsk_slt_v2i64_1(<2 x i64> %v, i32 %a, i32 %b) {
; SSE: # %bb.0:
; SSE-NEXT: movl %edi, %eax
; SSE-NEXT: movmskpd %xmm0, %ecx
-; SSE-NEXT: shlb $6, %cl
-; SSE-NEXT: sarb $6, %cl
-; SSE-NEXT: cmovnsl %esi, %eax
+; SSE-NEXT: testb $2, %cl
+; SSE-NEXT: cmovel %esi, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: movmsk_slt_v2i64_1:
; AVX: # %bb.0:
; AVX-NEXT: movl %edi, %eax
; AVX-NEXT: vmovmskpd %xmm0, %ecx
-; AVX-NEXT: shlb $6, %cl
-; AVX-NEXT: sarb $6, %cl
-; AVX-NEXT: cmovnsl %esi, %eax
+; AVX-NEXT: testb $2, %cl
+; AVX-NEXT: cmovel %esi, %eax
; AVX-NEXT: retq
%cmp = icmp slt <2 x i64> %v, zeroinitializer
%msk = bitcast <2 x i1> %cmp to i2
@@ -62,18 +60,16 @@ define i32 @movmsk_sgt_v2i64_1(<2 x i64> %v, i32 %a, i32 %b) {
; SSE: # %bb.0:
; SSE-NEXT: movl %edi, %eax
; SSE-NEXT: movmskpd %xmm0, %ecx
-; SSE-NEXT: shlb $6, %cl
-; SSE-NEXT: sarb $6, %cl
-; SSE-NEXT: cmovsl %esi, %eax
+; SSE-NEXT: testb $2, %cl
+; SSE-NEXT: cmovnel %esi, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: movmsk_sgt_v2i64_1:
; AVX: # %bb.0:
; AVX-NEXT: movl %edi, %eax
; AVX-NEXT: vmovmskpd %xmm0, %ecx
-; AVX-NEXT: shlb $6, %cl
-; AVX-NEXT: sarb $6, %cl
-; AVX-NEXT: cmovsl %esi, %eax
+; AVX-NEXT: testb $2, %cl
+; AVX-NEXT: cmovnel %esi, %eax
; AVX-NEXT: retq
%cmp = icmp slt <2 x i64> %v, zeroinitializer
%msk = bitcast <2 x i1> %cmp to i2
@@ -111,18 +107,16 @@ define i32 @movmsk_slt_v4i32_3(<4 x i32> %v, i32 %a, i32 %b) {
; SSE: # %bb.0:
; SSE-NEXT: movl %edi, %eax
; SSE-NEXT: movmskps %xmm0, %ecx
-; SSE-NEXT: shlb $4, %cl
-; SSE-NEXT: sarb $4, %cl
-; SSE-NEXT: cmovnsl %esi, %eax
+; SSE-NEXT: testb $8, %cl
+; SSE-NEXT: cmovel %esi, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: movmsk_slt_v4i32_3:
; AVX: # %bb.0:
; AVX-NEXT: movl %edi, %eax
; AVX-NEXT: vmovmskps %xmm0, %ecx
-; AVX-NEXT: shlb $4, %cl
-; AVX-NEXT: sarb $4, %cl
-; AVX-NEXT: cmovnsl %esi, %eax
+; AVX-NEXT: testb $8, %cl
+; AVX-NEXT: cmovel %esi, %eax
; AVX-NEXT: retq
%cmp = icmp slt <4 x i32> %v, zeroinitializer
%msk = bitcast <4 x i1> %cmp to i4
@@ -136,18 +130,16 @@ define i32 @movmsk_sgt_v4i32_3(<4 x i32> %v, i32 %a, i32 %b) {
; SSE: # %bb.0:
; SSE-NEXT: movl %edi, %eax
; SSE-NEXT: movmskps %xmm0, %ecx
-; SSE-NEXT: shlb $4, %cl
-; SSE-NEXT: sarb $4, %cl
-; SSE-NEXT: cmovsl %esi, %eax
+; SSE-NEXT: testb $8, %cl
+; SSE-NEXT: cmovnel %esi, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: movmsk_sgt_v4i32_3:
; AVX: # %bb.0:
; AVX-NEXT: movl %edi, %eax
; AVX-NEXT: vmovmskps %xmm0, %ecx
-; AVX-NEXT: shlb $4, %cl
-; AVX-NEXT: sarb $4, %cl
-; AVX-NEXT: cmovsl %esi, %eax
+; AVX-NEXT: testb $8, %cl
+; AVX-NEXT: cmovnel %esi, %eax
; AVX-NEXT: retq
%cmp = icmp slt <4 x i32> %v, zeroinitializer
%msk = bitcast <4 x i1> %cmp to i4
@@ -256,20 +248,17 @@ define i32 @movmsk_slt_v4i64_3(<4 x i64> %v, i32 %a, i32 %b) {
; SSE-LABEL: movmsk_slt_v4i64_3:
; SSE: # %bb.0:
; SSE-NEXT: movl %edi, %eax
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
-; SSE-NEXT: movmskps %xmm0, %ecx
-; SSE-NEXT: shlb $4, %cl
-; SSE-NEXT: sarb $4, %cl
-; SSE-NEXT: cmovnsl %esi, %eax
+; SSE-NEXT: movmskps %xmm1, %ecx
+; SSE-NEXT: testb $8, %cl
+; SSE-NEXT: cmovel %esi, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: movmsk_slt_v4i64_3:
; AVX: # %bb.0:
; AVX-NEXT: movl %edi, %eax
; AVX-NEXT: vmovmskpd %ymm0, %ecx
-; AVX-NEXT: shlb $4, %cl
-; AVX-NEXT: sarb $4, %cl
-; AVX-NEXT: cmovnsl %esi, %eax
+; AVX-NEXT: testb $8, %cl
+; AVX-NEXT: cmovel %esi, %eax
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%cmp = icmp slt <4 x i64> %v, zeroinitializer
@@ -283,20 +272,17 @@ define i32 @movmsk_sgt_v4i64_3(<4 x i64> %v, i32 %a, i32 %b) {
; SSE-LABEL: movmsk_sgt_v4i64_3:
; SSE: # %bb.0:
; SSE-NEXT: movl %edi, %eax
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
-; SSE-NEXT: movmskps %xmm0, %ecx
-; SSE-NEXT: shlb $4, %cl
-; SSE-NEXT: sarb $4, %cl
-; SSE-NEXT: cmovsl %esi, %eax
+; SSE-NEXT: movmskps %xmm1, %ecx
+; SSE-NEXT: testb $8, %cl
+; SSE-NEXT: cmovnel %esi, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: movmsk_sgt_v4i64_3:
; AVX: # %bb.0:
; AVX-NEXT: movl %edi, %eax
; AVX-NEXT: vmovmskpd %ymm0, %ecx
-; AVX-NEXT: shlb $4, %cl
-; AVX-NEXT: sarb $4, %cl
-; AVX-NEXT: cmovsl %esi, %eax
+; AVX-NEXT: testb $8, %cl
+; AVX-NEXT: cmovnel %esi, %eax
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%cmp = icmp slt <4 x i64> %v, zeroinitializer
@@ -487,22 +473,18 @@ define i32 @movmsk_slt_v32i8_31(<32 x i8> %v, i32 %a, i32 %b) {
; SSE-LABEL: movmsk_slt_v32i8_31:
; SSE: # %bb.0:
; SSE-NEXT: movl %edi, %eax
-; SSE-NEXT: pmovmskb %xmm0, %ecx
-; SSE-NEXT: pmovmskb %xmm1, %edx
-; SSE-NEXT: shll $16, %edx
-; SSE-NEXT: orl %ecx, %edx
-; SSE-NEXT: cmovnsl %esi, %eax
+; SSE-NEXT: pmovmskb %xmm1, %ecx
+; SSE-NEXT: testl $32768, %ecx # imm = 0x8000
+; SSE-NEXT: cmovel %esi, %eax
; SSE-NEXT: retq
;
; AVX1-LABEL: movmsk_slt_v32i8_31:
; AVX1: # %bb.0:
; AVX1-NEXT: movl %edi, %eax
-; AVX1-NEXT: vpmovmskb %xmm0, %ecx
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpmovmskb %xmm0, %edx
-; AVX1-NEXT: shll $16, %edx
-; AVX1-NEXT: orl %ecx, %edx
-; AVX1-NEXT: cmovnsl %esi, %eax
+; AVX1-NEXT: vpmovmskb %xmm0, %ecx
+; AVX1-NEXT: testl $32768, %ecx # imm = 0x8000
+; AVX1-NEXT: cmovel %esi, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -534,22 +516,18 @@ define i32 @movmsk_sgt_v32i8_31(<32 x i8> %v, i32 %a, i32 %b) {
; SSE-LABEL: movmsk_sgt_v32i8_31:
; SSE: # %bb.0:
; SSE-NEXT: movl %edi, %eax
-; SSE-NEXT: pmovmskb %xmm0, %ecx
-; SSE-NEXT: pmovmskb %xmm1, %edx
-; SSE-NEXT: shll $16, %edx
-; SSE-NEXT: orl %ecx, %edx
-; SSE-NEXT: cmovsl %esi, %eax
+; SSE-NEXT: pmovmskb %xmm1, %ecx
+; SSE-NEXT: testl $32768, %ecx # imm = 0x8000
+; SSE-NEXT: cmovnel %esi, %eax
; SSE-NEXT: retq
;
; AVX1-LABEL: movmsk_sgt_v32i8_31:
; AVX1: # %bb.0:
; AVX1-NEXT: movl %edi, %eax
-; AVX1-NEXT: vpmovmskb %xmm0, %ecx
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpmovmskb %xmm0, %edx
-; AVX1-NEXT: shll $16, %edx
-; AVX1-NEXT: orl %ecx, %edx
-; AVX1-NEXT: cmovsl %esi, %eax
+; AVX1-NEXT: vpmovmskb %xmm0, %ecx
+; AVX1-NEXT: testl $32768, %ecx # imm = 0x8000
+; AVX1-NEXT: cmovnel %esi, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…r signbits, attempt to test for the signbit source instead. There's a lot more we could do here (including the reverse fold back to X86::COND_S/NS with some other X86ISD nodes), but I wanted to address the MOVMSK issue initially. Fixes llvm#66191
4eef4f8
to
8e84914
Compare
…r signbits, attempt to test for the signbit source instead. (llvm#97433) There's a lot more we could do here (including the reverse fold back to X86::COND_S/NS with some other X86ISD nodes), but I wanted to address the MOVMSK issue initially. Fixes llvm#66191
…r signbits, attempt to test for the signbit source instead. (llvm#97433) There's a lot more we could do here (including the reverse fold back to X86::COND_S/NS with some other X86ISD nodes), but I wanted to address the MOVMSK issue initially. Fixes llvm#66191
There's a lot more we could do here (including the reverse fold back to X86::COND_S/NS with some other X86ISD nodes), but I wanted to address the MOVMSK issue initially.
Fixes #66191