-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[DAGCombiner] Fold vector subtraction if above threshold to umin
#148834
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-risc-v Author: Piotr Fusik (pfusik) ChangesThis extends #134235 to vectors. Full diff: https://github.com/llvm/llvm-project/pull/148834.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 23812d795f5fa..27e9b3ccc47e3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4093,6 +4093,26 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return N0;
}
+ // (sub x, ([v]select (ult x, y), 0, y)) -> (umin x, (sub x, y))
+ // (sub x, ([v]select (uge x, y), y, 0)) -> (umin x, (sub x, y))
+ if (N1.hasOneUse() && hasUMin(VT)) {
+ SDValue Y;
+ if (sd_match(N1, m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
+ m_SpecificCondCode(ISD::SETULT)),
+ m_Zero(), m_Deferred(Y))) ||
+ sd_match(N1, m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
+ m_SpecificCondCode(ISD::SETUGE)),
+ m_Deferred(Y), m_Zero())) ||
+ sd_match(N1, m_VSelect(m_SetCC(m_Specific(N0), m_Value(Y),
+ m_SpecificCondCode(ISD::SETULT)),
+ m_Zero(), m_Deferred(Y))) ||
+ sd_match(N1, m_VSelect(m_SetCC(m_Specific(N0), m_Value(Y),
+ m_SpecificCondCode(ISD::SETUGE)),
+ m_Deferred(Y), m_Zero())))
+ return DAG.getNode(ISD::UMIN, DL, VT, N0,
+ DAG.getNode(ISD::SUB, DL, VT, N0, Y));
+ }
+
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -4442,20 +4462,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
sd_match(N1, m_UMaxLike(m_Specific(A), m_Specific(B))))
return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
- // (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y))
- // (sub x, (select (uge x, y), y, 0)) -> (umin x, (sub x, y))
- if (hasUMin(VT)) {
- SDValue Y;
- if (sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
- m_SpecificCondCode(ISD::SETULT)),
- m_Zero(), m_Deferred(Y)))) ||
- sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y),
- m_SpecificCondCode(ISD::SETUGE)),
- m_Deferred(Y), m_Zero()))))
- return DAG.getNode(ISD::UMIN, DL, VT, N0,
- DAG.getNode(ISD::SUB, DL, VT, N0, Y));
- }
-
return SDValue();
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll
index e3b2d6c1efe1f..98bfa8d359e21 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll
@@ -893,3 +893,107 @@ define <vscale x 8 x i32> @vmin_vi_mask_nxv8i32(<vscale x 8 x i32> %va, <vscale
%vc = select <vscale x 8 x i1> %cmp, <vscale x 8 x i32> %va, <vscale x 8 x i32> %vs
ret <vscale x 8 x i32> %vc
}
+
+define <vscale x 2 x i8> @vsub_if_uge_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
+; CHECK-LABEL: vsub_if_uge_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vsub.vv v9, v8, v9
+; CHECK-NEXT: vminu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %cmp = icmp ult <vscale x 2 x i8> %va, %vb
+ %select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i8> zeroinitializer, <vscale x 2 x i8> %vb
+ %sub = sub nuw <vscale x 2 x i8> %va, %select
+ ret <vscale x 2 x i8> %sub
+}
+
+define <vscale x 2 x i8> @vsub_if_uge_swapped_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
+; CHECK-LABEL: vsub_if_uge_swapped_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vsub.vv v9, v8, v9
+; CHECK-NEXT: vminu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %cmp = icmp uge <vscale x 2 x i8> %va, %vb
+ %select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i8> %vb, <vscale x 2 x i8> zeroinitializer
+ %sub = sub nuw <vscale x 2 x i8> %va, %select
+ ret <vscale x 2 x i8> %sub
+}
+
+define <vscale x 2 x i16> @vsub_if_uge_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
+; CHECK-LABEL: vsub_if_uge_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsub.vv v9, v8, v9
+; CHECK-NEXT: vminu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %cmp = icmp ult <vscale x 2 x i16> %va, %vb
+ %select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> %vb
+ %sub = sub nuw <vscale x 2 x i16> %va, %select
+ ret <vscale x 2 x i16> %sub
+}
+
+define <vscale x 2 x i16> @vsub_if_uge_swapped_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
+; CHECK-LABEL: vsub_if_uge_swapped_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsub.vv v9, v8, v9
+; CHECK-NEXT: vminu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %cmp = icmp uge <vscale x 2 x i16> %va, %vb
+ %select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i16> %vb, <vscale x 2 x i16> zeroinitializer
+ %sub = sub nuw <vscale x 2 x i16> %va, %select
+ ret <vscale x 2 x i16> %sub
+}
+
+define <vscale x 2 x i32> @vsub_if_uge_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) {
+; CHECK-LABEL: vsub_if_uge_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsub.vv v9, v8, v9
+; CHECK-NEXT: vminu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %cmp = icmp ult <vscale x 2 x i32> %va, %vb
+ %select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> %vb
+ %sub = sub nuw <vscale x 2 x i32> %va, %select
+ ret <vscale x 2 x i32> %sub
+}
+
+define <vscale x 2 x i32> @vsub_if_uge_swapped_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) {
+; CHECK-LABEL: vsub_if_uge_swapped_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsub.vv v9, v8, v9
+; CHECK-NEXT: vminu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %cmp = icmp uge <vscale x 2 x i32> %va, %vb
+ %select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i32> %vb, <vscale x 2 x i32> zeroinitializer
+ %sub = sub nuw <vscale x 2 x i32> %va, %select
+ ret <vscale x 2 x i32> %sub
+}
+
+define <vscale x 2 x i64> @vsub_if_uge_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb) {
+; CHECK-LABEL: vsub_if_uge_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT: vsub.vv v10, v8, v10
+; CHECK-NEXT: vminu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %cmp = icmp ult <vscale x 2 x i64> %va, %vb
+ %select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> %vb
+ %sub = sub nuw <vscale x 2 x i64> %va, %select
+ ret <vscale x 2 x i64> %sub
+}
+
+define <vscale x 2 x i64> @vsub_if_uge_swapped_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb) {
+; CHECK-LABEL: vsub_if_uge_swapped_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT: vsub.vv v10, v8, v10
+; CHECK-NEXT: vminu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %cmp = icmp uge <vscale x 2 x i64> %va, %vb
+ %select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %vb, <vscale x 2 x i64> zeroinitializer
+ %sub = sub nuw <vscale x 2 x i64> %va, %select
+ ret <vscale x 2 x i64> %sub
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Moved before:
if (SDValue NewSel = foldBinOpIntoSelect(N))which transforms this sub-maybe-zero pattern to a conditional sub in foldSelectWithIdentityConstant.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfortunately, fixed vectors are not matched on RISC-V.
TLI.isOperationLegal(ISD::UMIN, LK.second) in hasUMin yields false because operation action is Custom.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Most TLI.isOperationLegal checks should be isOperationLegalOrCustom
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
Tests merged into main branch as 4166df2. PR rebased. |
This extends #134235 and #135194 to vectors.