-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[InstCombine] Canonicalize max(min(X, MinC), MaxC) -> min(max(X, MaxC), MinC)
#136665
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Iris (el-ev) ChangesCloses #121870. Patch is 28.65 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/136665.diff 8 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 38519d81fce8d..3dd7a7a35bcdc 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1924,6 +1924,26 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
}
+ // Canonicalize smax(smin(X, MinC), MaxC) to smin(smax(X, MaxC), MinC)
+ if (IID == Intrinsic::smax) {
+ Constant *MinC, *MaxC;
+ if ((match(I1, m_Constant(MaxC)) &&
+ (match(I0, m_OneUse(m_Intrinsic<Intrinsic::smin>(
+ m_Value(X), m_Constant(MinC)))) ||
+ match(I0, m_OneUse(m_Intrinsic<Intrinsic::smin>(m_Constant(MinC),
+ m_Value(X)))))) ||
+ (match(I0, m_Constant(MaxC)) &&
+ (match(I1, m_OneUse(m_Intrinsic<Intrinsic::smin>(
+ m_Value(X), m_Constant(MinC)))) ||
+ match(I1, m_OneUse(m_Intrinsic<Intrinsic::smin>(m_Constant(MinC),
+ m_Value(X))))))) {
+ Value *NewSMax =
+ Builder.CreateBinaryIntrinsic(Intrinsic::smax, X, MaxC);
+ return replaceInstUsesWith(
+ *II, Builder.CreateBinaryIntrinsic(Intrinsic::smin, NewSMax, MinC));
+ }
+ }
+
// umin(i1 X, i1 Y) -> and i1 X, Y
// smax(i1 X, i1 Y) -> and i1 X, Y
if ((IID == Intrinsic::umin || IID == Intrinsic::smax) &&
diff --git a/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll
index 478d437847127..b557c0dbe2629 100644
--- a/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll
+++ b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll
@@ -83,7 +83,7 @@ define float @clamp_float_fast_unordered_strict_maxmin(float %x) {
; (X <= C1) ? C1 : MIN(X, C2)
define float @clamp_float_fast_unordered_nonstrict_maxmin(float %x) {
; CHECK-LABEL: @clamp_float_fast_unordered_nonstrict_maxmin(
-; CHECK-NEXT: [[MIN:%.*]] = call fast float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
+; CHECK-NEXT: [[MIN:%.*]] = call fast float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ule float [[X]], 1.000000e+00
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
; CHECK-NEXT: ret float [[R]]
@@ -98,7 +98,7 @@ define float @clamp_float_fast_unordered_nonstrict_maxmin(float %x) {
; (X > C1) ? C1 : MAX(X, C2)
define float @clamp_float_fast_unordered_strict_minmax(float %x) {
; CHECK-LABEL: @clamp_float_fast_unordered_strict_minmax(
-; CHECK-NEXT: [[MAX:%.*]] = call fast float @llvm.maxnum.f32(float [[X:%.*]], float 1.000000e+00)
+; CHECK-NEXT: [[MAX:%.*]] = call fast float @llvm.maxnum.f32(float [[X:%.*]], float 1.000000e+00)
; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ugt float [[X]], 2.550000e+02
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
; CHECK-NEXT: ret float [[R]]
@@ -113,7 +113,7 @@ define float @clamp_float_fast_unordered_strict_minmax(float %x) {
; (X >= C1) ? C1 : MAX(X, C2)
define float @clamp_float_fast_unordered_nonstrict_minmax(float %x) {
; CHECK-LABEL: @clamp_float_fast_unordered_nonstrict_minmax(
-; CHECK-NEXT: [[MAX:%.*]] = call fast float @llvm.maxnum.f32(float [[X:%.*]], float 1.000000e+00)
+; CHECK-NEXT: [[MAX:%.*]] = call fast float @llvm.maxnum.f32(float [[X:%.*]], float 1.000000e+00)
; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast uge float [[X]], 2.550000e+02
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
; CHECK-NEXT: ret float [[R]]
@@ -147,7 +147,7 @@ define float @clamp_test_1(float %x) {
; Like @clamp_test_1 but HighConst < LowConst
define float @clamp_negative_wrong_const(float %x) {
; CHECK-LABEL: @clamp_negative_wrong_const(
-; CHECK-NEXT: [[INNER_SEL:%.*]] = call fast float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
+; CHECK-NEXT: [[INNER_SEL:%.*]] = call fast float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
; CHECK-NEXT: [[OUTER_CMP:%.*]] = fcmp fast ugt float [[X]], 5.120000e+02
; CHECK-NEXT: [[R:%.*]] = select i1 [[OUTER_CMP]], float [[INNER_SEL]], float 5.120000e+02
; CHECK-NEXT: ret float [[R]]
@@ -162,7 +162,7 @@ define float @clamp_negative_wrong_const(float %x) {
; Like @clamp_test_1 but both are min
define float @clamp_negative_same_op(float %x) {
; CHECK-LABEL: @clamp_negative_same_op(
-; CHECK-NEXT: [[INNER_SEL:%.*]] = call fast float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
+; CHECK-NEXT: [[INNER_SEL:%.*]] = call fast float @llvm.minnum.f32(float [[X:%.*]], float 2.550000e+02)
; CHECK-NEXT: [[OUTER_CMP:%.*]] = fcmp fast ult float [[X]], 1.000000e+00
; CHECK-NEXT: [[R:%.*]] = select i1 [[OUTER_CMP]], float [[INNER_SEL]], float 1.000000e+00
; CHECK-NEXT: ret float [[R]]
@@ -500,9 +500,9 @@ define float @ui64_clamp_and_cast_to_float(i64 %x) {
define float @mixed_clamp_to_float_1(i32 %x) {
; CHECK-LABEL: @mixed_clamp_to_float_1(
-; CHECK-NEXT: [[SI_MIN:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 255)
-; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.smax.i32(i32 [[SI_MIN]], i32 1)
-; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[R1]] to float
+; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.smax.i32(i32 [[SI_MIN:%.*]], i32 1)
+; CHECK-NEXT: [[R2:%.*]] = call i32 @llvm.smin.i32(i32 [[R1]], i32 255)
+; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[R2]] to float
; CHECK-NEXT: ret float [[R]]
;
%si_min_cmp = icmp sgt i32 %x, 255
@@ -535,9 +535,9 @@ define i32 @mixed_clamp_to_i32_1(float %x) {
define float @mixed_clamp_to_float_2(i32 %x) {
; CHECK-LABEL: @mixed_clamp_to_float_2(
-; CHECK-NEXT: [[SI_MIN:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 255)
-; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.smax.i32(i32 [[SI_MIN]], i32 1)
-; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[R1]] to float
+; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.smax.i32(i32 [[SI_MIN:%.*]], i32 1)
+; CHECK-NEXT: [[R2:%.*]] = call i32 @llvm.smin.i32(i32 [[R1]], i32 255)
+; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[R2]] to float
; CHECK-NEXT: ret float [[R]]
;
%si_min_cmp = icmp sgt i32 %x, 255
@@ -568,9 +568,9 @@ define i32 @mixed_clamp_to_i32_2(float %x) {
define <2 x float> @mixed_clamp_to_float_vec(<2 x i32> %x) {
; CHECK-LABEL: @mixed_clamp_to_float_vec(
-; CHECK-NEXT: [[SI_MIN:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 255))
-; CHECK-NEXT: [[R1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[SI_MIN]], <2 x i32> splat (i32 1))
-; CHECK-NEXT: [[R:%.*]] = uitofp nneg <2 x i32> [[R1]] to <2 x float>
+; CHECK-NEXT: [[R1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[SI_MIN:%.*]], <2 x i32> splat (i32 1))
+; CHECK-NEXT: [[R2:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[R1]], <2 x i32> splat (i32 255))
+; CHECK-NEXT: [[R:%.*]] = uitofp nneg <2 x i32> [[R2]] to <2 x float>
; CHECK-NEXT: ret <2 x float> [[R]]
;
%si_min_cmp = icmp sgt <2 x i32> %x, <i32 255, i32 255>
diff --git a/llvm/test/Transforms/InstCombine/max_known_bits.ll b/llvm/test/Transforms/InstCombine/max_known_bits.ll
index 3eb53b32efecc..162abf0efb7cc 100644
--- a/llvm/test/Transforms/InstCombine/max_known_bits.ll
+++ b/llvm/test/Transforms/InstCombine/max_known_bits.ll
@@ -35,9 +35,9 @@ define i16 @min_max_clamp(i16 %x) {
; Same as above with min/max reversed.
define i16 @min_max_clamp_2(i16 %x) {
; CHECK-LABEL: @min_max_clamp_2(
-; CHECK-NEXT: [[B:%.*]] = call i16 @llvm.smin.i16(i16 [[X:%.*]], i16 2047)
-; CHECK-NEXT: [[D:%.*]] = call i16 @llvm.smax.i16(i16 [[B]], i16 -2048)
-; CHECK-NEXT: [[E:%.*]] = add nsw i16 [[D]], 1
+; CHECK-NEXT: [[D:%.*]] = call i16 @llvm.smax.i16(i16 [[B:%.*]], i16 -2048)
+; CHECK-NEXT: [[D1:%.*]] = call i16 @llvm.smin.i16(i16 [[D]], i16 2047)
+; CHECK-NEXT: [[E:%.*]] = add nsw i16 [[D1]], 1
; CHECK-NEXT: ret i16 [[E]]
;
%a = icmp slt i16 %x, 2047
@@ -71,9 +71,9 @@ define i32 @min_max_clamp_3(i16 %x) {
; Same as above with min/max order reversed
define i32 @min_max_clamp_4(i16 %x) {
; CHECK-LABEL: @min_max_clamp_4(
-; CHECK-NEXT: [[B:%.*]] = call i16 @llvm.smin.i16(i16 [[X:%.*]], i16 2047)
-; CHECK-NEXT: [[D:%.*]] = call i16 @llvm.smax.i16(i16 [[B]], i16 -2048)
-; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[D]] to i32
+; CHECK-NEXT: [[D:%.*]] = call i16 @llvm.smax.i16(i16 [[B:%.*]], i16 -2048)
+; CHECK-NEXT: [[D1:%.*]] = call i16 @llvm.smin.i16(i16 [[D]], i16 2047)
+; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[D1]] to i32
; CHECK-NEXT: ret i32 [[TMP1]]
;
%a = icmp slt i16 %x, 2047
@@ -106,9 +106,9 @@ define i16 @min_max_clamp_intrinsic(i16 %x) {
define i16 @min_max_clamp_intrinsic_2(i16 %x) {
; CHECK-LABEL: @min_max_clamp_intrinsic_2(
-; CHECK-NEXT: [[A:%.*]] = call i16 @llvm.smin.i16(i16 [[X:%.*]], i16 2047)
-; CHECK-NEXT: [[B:%.*]] = call i16 @llvm.smax.i16(i16 [[A]], i16 -2048)
-; CHECK-NEXT: [[C:%.*]] = add nsw i16 [[B]], 1
+; CHECK-NEXT: [[B:%.*]] = call i16 @llvm.smax.i16(i16 [[A:%.*]], i16 -2048)
+; CHECK-NEXT: [[B1:%.*]] = call i16 @llvm.smin.i16(i16 [[B]], i16 2047)
+; CHECK-NEXT: [[C:%.*]] = add nsw i16 [[B1]], 1
; CHECK-NEXT: ret i16 [[C]]
;
%a = call i16 @llvm.smin.i16(i16 %x, i16 2047)
@@ -134,9 +134,9 @@ define i32 @min_max_clamp_intrinsic_3(i16 %x) {
define i32 @min_max_clamp_intrinsic_4(i16 %x) {
; CHECK-LABEL: @min_max_clamp_intrinsic_4(
-; CHECK-NEXT: [[A:%.*]] = call i16 @llvm.smin.i16(i16 [[X:%.*]], i16 2047)
-; CHECK-NEXT: [[B:%.*]] = call i16 @llvm.smax.i16(i16 [[A]], i16 -2048)
-; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[B]] to i32
+; CHECK-NEXT: [[B:%.*]] = call i16 @llvm.smax.i16(i16 [[A:%.*]], i16 -2048)
+; CHECK-NEXT: [[B1:%.*]] = call i16 @llvm.smin.i16(i16 [[B]], i16 2047)
+; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[B1]] to i32
; CHECK-NEXT: ret i32 [[TMP1]]
;
%a = call i16 @llvm.smin.i16(i16 %x, i16 2047)
diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll
index 4d66e261c649c..0f73dc0e23d2f 100644
--- a/llvm/test/Transforms/InstCombine/minmax-fold.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll
@@ -346,9 +346,9 @@ define i32 @test75(i32 %x) {
define i32 @clamp_signed1(i32 %x) {
; CHECK-LABEL: @clamp_signed1(
-; CHECK-NEXT: [[MIN:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 255)
-; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.smax.i32(i32 [[MIN]], i32 15)
-; CHECK-NEXT: ret i32 [[R]]
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.smax.i32(i32 [[MIN:%.*]], i32 15)
+; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.smin.i32(i32 [[R]], i32 255)
+; CHECK-NEXT: ret i32 [[R1]]
;
%cmp2 = icmp slt i32 %x, 255
%min = select i1 %cmp2, i32 %x, i32 255
@@ -376,9 +376,9 @@ define i32 @clamp_signed2(i32 %x) {
define i32 @clamp_signed3(i32 %x) {
; CHECK-LABEL: @clamp_signed3(
-; CHECK-NEXT: [[MIN:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 255)
-; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.smax.i32(i32 [[MIN]], i32 15)
-; CHECK-NEXT: ret i32 [[R]]
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.smax.i32(i32 [[MIN:%.*]], i32 15)
+; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.smin.i32(i32 [[R]], i32 255)
+; CHECK-NEXT: ret i32 [[R1]]
;
%cmp2 = icmp slt i32 %x, 255
%min = select i1 %cmp2, i32 %x, i32 255
@@ -467,9 +467,9 @@ define i32 @clamp_unsigned4(i32 %x) {
; (icmp sgt smin(PositiveA, B) 0) -> (icmp sgt B 0)
define i32 @clamp_check_for_no_infinite_loop1(i32 %i) {
; CHECK-LABEL: @clamp_check_for_no_infinite_loop1(
-; CHECK-NEXT: [[SEL1:%.*]] = call i32 @llvm.smin.i32(i32 [[I:%.*]], i32 255)
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.smax.i32(i32 [[SEL1]], i32 0)
-; CHECK-NEXT: ret i32 [[RES]]
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.smax.i32(i32 [[SEL1:%.*]], i32 0)
+; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.smin.i32(i32 [[RES]], i32 255)
+; CHECK-NEXT: ret i32 [[RES1]]
;
%cmp1 = icmp slt i32 %i, 255
%sel1 = select i1 %cmp1, i32 %i, i32 255
@@ -1429,8 +1429,8 @@ define i8 @PR46271(<2 x i8> %x) {
define i32 @twoway_clamp_lt(i32 %num) {
; CHECK-LABEL: @twoway_clamp_lt(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[NUM:%.*]], 13767
-; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP0]], i32 13768, i32 13767
+; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 [[NUM:%.*]], 13768
+; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP0]], i32 13767, i32 13768
; CHECK-NEXT: ret i32 [[R]]
;
entry:
diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
index 0b7127f82b612..8532da966399a 100644
--- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
@@ -774,8 +774,8 @@ define i8 @clamp_two_vals_smax_smin(i8 %x) {
define <3 x i8> @clamp_two_vals_smin_smax(<3 x i8> %x) {
; CHECK-LABEL: @clamp_two_vals_smin_smax(
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <3 x i8> [[X:%.*]], splat (i8 41)
-; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i8> splat (i8 42), <3 x i8> splat (i8 41)
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <3 x i8> [[X:%.*]], splat (i8 42)
+; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i8> splat (i8 41), <3 x i8> splat (i8 42)
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%m = call <3 x i8> @llvm.smin.v3i8(<3 x i8> %x, <3 x i8> <i8 42, i8 42, i8 42>)
@@ -2192,9 +2192,9 @@ define i8 @umin_umin_reassoc_constants(i8 %x) {
define i8 @smin_smax_reassoc_constants(i8 %x) {
; CHECK-LABEL: @smin_smax_reassoc_constants(
-; CHECK-NEXT: [[M1:%.*]] = call i8 @llvm.smin.i8(i8 [[X:%.*]], i8 97)
-; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 [[M1]], i8 -3)
-; CHECK-NEXT: ret i8 [[M2]]
+; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 [[M1:%.*]], i8 -3)
+; CHECK-NEXT: [[M3:%.*]] = call i8 @llvm.smin.i8(i8 [[M2]], i8 97)
+; CHECK-NEXT: ret i8 [[M3]]
;
%m1 = call i8 @llvm.smin.i8(i8 %x, i8 97)
%m2 = call i8 @llvm.smax.i8(i8 %m1, i8 -3)
diff --git a/llvm/test/Transforms/InstCombine/sadd_sat.ll b/llvm/test/Transforms/InstCombine/sadd_sat.ll
index d27e7aa28d62c..6afb77d975b8c 100644
--- a/llvm/test/Transforms/InstCombine/sadd_sat.ll
+++ b/llvm/test/Transforms/InstCombine/sadd_sat.ll
@@ -77,8 +77,8 @@ define i32 @smul_sat32(i32 %a, i32 %b) {
; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64
; CHECK-NEXT: [[ADD:%.*]] = mul nsw i64 [[CONV1]], [[CONV]]
-; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 2147483647)
-; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call i64 @llvm.smax.i64(i64 [[SPEC_STORE_SELECT]], i64 -2147483648)
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.smax.i64(i64 [[ADD]], i64 -2147483648)
+; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call i64 @llvm.smin.i64(i64 [[TMP0]], i64 2147483647)
; CHECK-NEXT: [[CONV7:%.*]] = trunc nsw i64 [[SPEC_STORE_SELECT8]] to i32
; CHECK-NEXT: ret i32 [[CONV7]]
;
@@ -100,8 +100,8 @@ define i32 @smul_sat32_mm(i32 %a, i32 %b) {
; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64
; CHECK-NEXT: [[ADD:%.*]] = mul nsw i64 [[CONV1]], [[CONV]]
-; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 2147483647)
-; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call i64 @llvm.smax.i64(i64 [[SPEC_STORE_SELECT]], i64 -2147483648)
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.smax.i64(i64 [[ADD]], i64 -2147483648)
+; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call i64 @llvm.smin.i64(i64 [[TMP0]], i64 2147483647)
; CHECK-NEXT: [[CONV7:%.*]] = trunc nsw i64 [[SPEC_STORE_SELECT8]] to i32
; CHECK-NEXT: ret i32 [[CONV7]]
;
@@ -293,8 +293,8 @@ define signext i4 @sadd_sat4(i4 signext %a, i4 signext %b) {
; CHECK-NEXT: [[CONV:%.*]] = sext i4 [[A:%.*]] to i32
; CHECK-NEXT: [[CONV1:%.*]] = sext i4 [[B:%.*]] to i32
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]]
-; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i32 @llvm.smin.i32(i32 [[ADD]], i32 7)
-; CHECK-NEXT: [[SPEC_STORE_SELECT10:%.*]] = call i32 @llvm.smax.i32(i32 [[SPEC_STORE_SELECT]], i32 -8)
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.smax.i32(i32 [[ADD]], i32 -8)
+; CHECK-NEXT: [[SPEC_STORE_SELECT10:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP0]], i32 7)
; CHECK-NEXT: [[CONV9:%.*]] = trunc nsw i32 [[SPEC_STORE_SELECT10]] to i4
; CHECK-NEXT: ret i4 [[CONV9]]
;
@@ -316,8 +316,8 @@ define signext i4 @ssub_sat4(i4 signext %a, i4 signext %b) {
; CHECK-NEXT: [[CONV:%.*]] = sext i4 [[A:%.*]] to i32
; CHECK-NEXT: [[CONV1:%.*]] = sext i4 [[B:%.*]] to i32
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV]], [[CONV1]]
-; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i32 @llvm.smin.i32(i32 [[SUB]], i32 7)
-; CHECK-NEXT: [[SPEC_STORE_SELECT10:%.*]] = call i32 @llvm.smax.i32(i32 [[SPEC_STORE_SELECT]], i32 -8)
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.smax.i32(i32 [[SUB]], i32 -8)
+; CHECK-NEXT: [[SPEC_STORE_SELECT10:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP0]], i32 7)
; CHECK-NEXT: [[CONV9:%.*]] = trunc nsw i32 [[SPEC_STORE_SELECT10]] to i4
; CHECK-NEXT: ret i4 [[CONV9]]
;
@@ -405,8 +405,8 @@ define <4 x i32> @sadd_satv4i4(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @sadd_satv4i4(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[ADD]], <4 x i32> splat (i32 15))
-; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[SPEC_STORE_SELECT]], <4 x i32> splat (i32 -16))
+; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[ADD]], <4 x i32> splat (i32 -16))
+; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP0]], <4 x i32> splat (i32 15))
; CHECK-NEXT: ret <4 x i32> [[SPEC_STORE_SELECT8]]
;
entry:
@@ -422,8 +422,8 @@ define <4 x i32> @ssub_satv4i4(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @ssub_satv4i4(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = sub <4 x i32> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[ADD]], <4 x i32> splat (i32 15))
-; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[SPEC_STORE_SELECT]], <4 x i32> splat (i32 -16))
+; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[ADD]], <4 x i32> splat (i32 -16))
+; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP0]], <4 x i32> splat (i32 15))
; CHECK-NEXT: ret <4 x i32> [[SPEC_STORE_SELECT8]]
;
entry:
@@ -511,8 +511,8 @@ define i32 @sadd_sat32_extrause_3(i32 %a, i32 %b) {
; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64
; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV1]], [[CONV]]
-; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 2147483647)
-; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call i64 @llvm.smax.i64(i64 [[SPEC_STORE_SELECT]], i64 -2147483648)
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.smax.i64(i64 [[ADD]], i64 -2147483648)
+; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call i64 @llvm.smin.i64(i64 [[TMP0]], i64 2147483647)
; CHECK-NEXT: [[CONV7:%.*]] = trunc nsw i64 [[SPEC_STORE_SELECT8]] to i32
; CHECK-NEXT: call void @use64(i64 [[ADD]])
; CHECK-NEXT: ret i32 [[CONV7]]
@@ -536,8 +536,8 @@ define i32 @sadd_sat32_extrause_3_mm(i32 %a, i32 %b) {
; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64
; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV1]], [[CONV]]
-; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 2147483647)
-; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = call i64 @llvm.smax.i64(i64 [[SPEC_STORE_SELECT]], i64 -2147483648)
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.smax.i64(i64 [[ADD]], i64 -2147483648)
+; CHECK-NEXT: [[SPEC_S...
[truncated]
|
smax(smin(X, MinC), MaxC) -> smin(smax(X, MaxC), MinC)
smax(smin(X, MinC), MaxC) -> smin(smax(X, MaxC), MinC)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
BTW, the downside of this canonicalization is that clamp idioms may be broken by another smin -> umin
canonicalization.
Consider the following pattern:
%min = smin(X, MinC)
%max = smax(%min, PosMaxC) where MinC s>= PosMaxC
After this canonicalization, we got:
%max = smax(X, PosMaxC)
%min = smin(%max, MinC) ->
%min = umin(%max, MinC) since we know both %max and MinC are non-negative.
Please check if this pattern is handled in DAGCombine, or if this transform is reverted somewhere.
f1f2d44
to
ed285b0
Compare
The pattern has been handled in DAGCombiner by you... That's amazing! |
The unsigned version works as well. |
51fa11d
to
3d13662
Compare
smax(smin(X, MinC), MaxC) -> smin(smax(X, MaxC), MinC)
max(min(X, MinC), MaxC) -> min(max(X, MaxC), MinC)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thank you!
Please wait for additional approval from other reviewers.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks reasonable. Is there any particular reason why we expect min(max) to be better than max(main), or is this basically chosen at random?
Mostly because it was suggested in the original issue. I'm not quite aware of the performance impact. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/16486 Here is the relevant piece of the build log for the reference
|
Closes #121870.
alive2.llvm.org/ce/z/WjmAjz