-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[DAGCombiner][X86] Correctly clean up high bits in combinei64TruncSrlAdd
#128353
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 Author: Yingwei Zheng (dtcxzyw) ChangesA counterexample for original implementation: https://alive2.llvm.org/ce/z/YowPZY Full diff: https://github.com/llvm/llvm-project/pull/128353.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 95fd7f9b94282..0bd0dbeac2087 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53712,23 +53712,20 @@ static SDValue combinei64TruncSrlAdd(SDValue N, EVT VT, SelectionDAG &DAG,
m_ConstInt(SrlConst)))))
return SDValue();
- if (SrlConst.ule(32) || AddConst.lshr(SrlConst).shl(SrlConst) != AddConst)
+ if (SrlConst.ule(32) || AddConst.countr_zero() < SrlConst.getZExtValue())
return SDValue();
SDValue AddLHSSrl =
DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, N.getOperand(1));
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl);
- APInt NewAddConstVal =
- (~((~AddConst).lshr(SrlConst))).trunc(VT.getSizeInBits());
+ APInt NewAddConstVal = AddConst.lshr(SrlConst).trunc(VT.getSizeInBits());
SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT);
SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst);
- APInt CleanupSizeConstVal = (SrlConst - 32).zextOrTrunc(VT.getSizeInBits());
EVT CleanUpVT =
- EVT::getIntegerVT(*DAG.getContext(), CleanupSizeConstVal.getZExtValue());
- SDValue CleanUp = DAG.getAnyExtOrTrunc(NewAddNode, DL, CleanUpVT);
- return DAG.getAnyExtOrTrunc(CleanUp, DL, VT);
+ EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConst.getZExtValue());
+ return DAG.getZeroExtendInReg(NewAddNode, DL, CleanUpVT);
}
/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
diff --git a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
index 41e1a24b239a6..14992ca5bf488 100644
--- a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
+++ b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
@@ -7,8 +7,9 @@ define i1 @test_ult_trunc_add(i64 %x) {
; X64-LABEL: test_ult_trunc_add:
; X64: # %bb.0:
; X64-NEXT: shrq $48, %rdi
-; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
-; X64-NEXT: cmpl $3, %edi
+; X64-NEXT: addl $14, %edi
+; X64-NEXT: movzwl %di, %eax
+; X64-NEXT: cmpl $3, %eax
; X64-NEXT: setb %al
; X64-NEXT: retq
%add = add i64 %x, 3940649673949184
@@ -22,8 +23,9 @@ define i1 @test_ult_add(i64 %x) {
; X64-LABEL: test_ult_add:
; X64: # %bb.0:
; X64-NEXT: shrq $48, %rdi
-; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
-; X64-NEXT: cmpl $3, %edi
+; X64-NEXT: addl $14, %edi
+; X64-NEXT: movzwl %di, %eax
+; X64-NEXT: cmpl $3, %eax
; X64-NEXT: setb %al
; X64-NEXT: retq
%add = add i64 3940649673949184, %x
@@ -35,8 +37,9 @@ define i1 @test_ugt_trunc_add(i64 %x) {
; X64-LABEL: test_ugt_trunc_add:
; X64: # %bb.0:
; X64-NEXT: shrq $48, %rdi
-; X64-NEXT: addl $-65522, %edi # imm = 0xFFFF000E
-; X64-NEXT: cmpl $4, %edi
+; X64-NEXT: addl $14, %edi
+; X64-NEXT: movzwl %di, %eax
+; X64-NEXT: cmpl $4, %eax
; X64-NEXT: setae %al
; X64-NEXT: retq
%add = add i64 %x, 3940649673949184
@@ -116,7 +119,8 @@ define i32 @test_trunc_add(i64 %x) {
; X64-LABEL: test_trunc_add:
; X64: # %bb.0:
; X64-NEXT: shrq $48, %rdi
-; X64-NEXT: leal -65522(%rdi), %eax
+; X64-NEXT: addl $14, %edi
+; X64-NEXT: movzwl %di, %eax
; X64-NEXT: retq
%add = add i64 %x, 3940649673949184
%shr = lshr i64 %add, 48
@@ -151,3 +155,20 @@ for.body:
exit:
ret i32 0
}
+
+define i64 @pr128309(i64 %x) {
+; X64-LABEL: pr128309:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: andl $18114, %eax # imm = 0x46C2
+; X64-NEXT: addl $6, %eax
+; X64-NEXT: andl %edi, %eax
+; X64-NEXT: retq
+entry:
+ %shl = shl i64 %x, 48
+ %and = and i64 %shl, 5098637728136822784
+ %add = add i64 %and, 1688849860263936
+ %lshr = lshr i64 %add, 48
+ %res = and i64 %lshr, %x
+ ret i64 %res
+}
|
The alive2 just proves the problem in calculation of NewAddConstVal, but is the problem in #128309 actually caused by zext vs. anyext? |
We had discussions regarding eliminating |
One proof: https://alive2.llvm.org/ce/z/xMsYxr |
Yeah, the original issue can be fixed by using zext instead of anyext. But alive2 still complains about the correctness, so I have to fix the number of low bits that should be kept.
We can hardcode c2 into the type and divide anyext into sext+zext (2 pairs): https://alive2.llvm.org/ce/z/7ieYLg |
Thanks for the point! I finally proved we must use zext as long as |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/30/builds/16286 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/13479 Here is the relevant piece of the build log for the reference
|
… `xor` (#128435) As discussed in #126448, the fold implemented by #126448 / #128353 can be extended to operations other than `add`. This patch extends the fold performed by `combinei64TruncSrlAdd` to include `or` and `xor` (proof: https://alive2.llvm.org/ce/z/AXuaQu). There's no need to extend it to `sub` and `and`, as similar folds are already being performed for those operations. CC: @phoebewang @RKSimon
… `xor` (llvm#128435) As discussed in llvm#126448, the fold implemented by llvm#126448 / llvm#128353 can be extended to operations other than `add`. This patch extends the fold performed by `combinei64TruncSrlAdd` to include `or` and `xor` (proof: https://alive2.llvm.org/ce/z/AXuaQu). There's no need to extend it to `sub` and `and`, as similar folds are already being performed for those operations. CC: @phoebewang @RKSimon
A counterexample for original implementation: https://alive2.llvm.org/ce/z/7ieYLg
This patch uses zext instead of anyext to fix the original issue.
BTW, we should keep low
64 - shamt
bits instead ofshamt - 32
: https://alive2.llvm.org/ce/z/ruQP_ZSome codes are simplified to avoid confusion.
Proof: https://alive2.llvm.org/ce/z/z_jdHD
Closes #128309.