-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[ValueTracking] Handle recursive phis in knownFPClass #114008
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: David Green (davemgreen) ChangesAs a follow-on to 113686, this breaks the recursion between phi nodes that have p1 = phi(x, p2) and p2 = phi(y, p1). The knownFPClass can be calculated from the classes of p1 and p2. Currently includes #113686, see 50686f5 for the individual commit. Patch is 29.09 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114008.diff 5 Files Affected:
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index e9ed8b3c862b55..877525e0315084 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -5999,6 +5999,21 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
if (IncValue == P)
continue;
+ // If the Use is a select of this phi, use the fp class of the other
+ // operand to break the recursion.
+ Value *V;
+ if (match(IncValue, m_Select(m_Value(), m_Specific(P), m_Value(V))) ||
+ match(IncValue, m_Select(m_Value(), m_Value(V), m_Specific(P))))
+ IncValue = V;
+ // Same around 2-operand phi nodes
+ if (auto *IncPhi = dyn_cast<PHINode>(IncValue);
+ IncPhi && IncPhi->getNumIncomingValues() == 2) {
+ if (IncPhi->getIncomingValue(0) == P)
+ IncValue = IncPhi->getIncomingValue(1);
+ if (IncPhi->getIncomingValue(1) == P)
+ IncValue = IncPhi->getIncomingValue(0);
+ }
+
KnownFPClass KnownSrc;
// Recurse, but cap the recursion to two levels, because we don't want
// to waste time spinning around in loops. We need at least depth 2 to
diff --git a/llvm/test/Transforms/Attributor/nofpclass-phiselect.ll b/llvm/test/Transforms/Attributor/nofpclass-phiselect.ll
new file mode 100644
index 00000000000000..21d4c10d37a703
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/nofpclass-phiselect.ll
@@ -0,0 +1,176 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -S < %s | FileCheck %s
+
+define float @phi_select(i1 %c, float nofpclass(inf) %base, float nofpclass(inf) %arg) {
+; CHECK-LABEL: define nofpclass(inf) float @phi_select
+; CHECK-SAME: (i1 [[C:%.*]], float nofpclass(inf) [[BASE:%.*]], float nofpclass(inf) [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[SELECT]] = select i1 [[C]], float [[PHI]], float [[ARG]]
+; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret float [[SELECT]]
+;
+entry:
+ br label %loop
+
+loop:
+ %phi = phi float [ %base, %entry ], [ %select, %loop ]
+ %select = select i1 %c, float %phi, float %arg
+ br i1 %c, label %loop, label %exit
+
+exit:
+ ret float %select
+}
+
+define float @phi_select_onlybase(i1 %c, float nofpclass(inf) %base, float %arg) {
+; CHECK-LABEL: define float @phi_select_onlybase
+; CHECK-SAME: (i1 [[C:%.*]], float nofpclass(inf) [[BASE:%.*]], float [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[SELECT]] = select i1 [[C]], float [[PHI]], float [[ARG]]
+; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret float [[SELECT]]
+;
+entry:
+ br label %loop
+
+loop:
+ %phi = phi float [ %base, %entry ], [ %select, %loop ]
+ %select = select i1 %c, float %phi, float %arg
+ br i1 %c, label %loop, label %exit
+
+exit:
+ ret float %select
+}
+
+define float @phi_select_onlyarg(i1 %c, float %base, float nofpclass(inf) %arg) {
+; CHECK-LABEL: define float @phi_select_onlyarg
+; CHECK-SAME: (i1 [[C:%.*]], float [[BASE:%.*]], float nofpclass(inf) [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[SELECT]] = select i1 [[C]], float [[PHI]], float [[ARG]]
+; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret float [[SELECT]]
+;
+entry:
+ br label %loop
+
+loop:
+ %phi = phi float [ %base, %entry ], [ %select, %loop ]
+ %select = select i1 %c, float %phi, float %arg
+ br i1 %c, label %loop, label %exit
+
+exit:
+ ret float %select
+}
+
+define float @phi_phi(i1 %c, float nofpclass(inf) %base, float nofpclass(inf) %arg) {
+; CHECK-LABEL: define nofpclass(inf) float @phi_phi
+; CHECK-SAME: (i1 [[C:%.*]], float nofpclass(inf) [[BASE:%.*]], float nofpclass(inf) [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[PHI2:%.*]], [[EXIT1:%.*]] ]
+; CHECK-NEXT: br label [[INNER:%.*]]
+; CHECK: inner:
+; CHECK-NEXT: [[PHI2]] = phi float [ [[PHI]], [[LOOP]] ], [ [[ARG]], [[INNER]] ]
+; CHECK-NEXT: br i1 [[C]], label [[INNER]], label [[EXIT1]]
+; CHECK: exit1:
+; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret float [[PHI2]]
+;
+entry:
+ br label %loop
+
+loop:
+ %phi = phi float [ %base, %entry ], [ %phi2, %exit1 ]
+ br label %inner
+
+inner:
+ %phi2 = phi float [ %phi, %loop ], [ %arg, %inner ]
+ br i1 %c, label %inner, label %exit1
+
+exit1:
+ br i1 %c, label %loop, label %exit
+
+exit:
+ ret float %phi2
+}
+
+define float @phi_phi_onlybase(i1 %c, float nofpclass(inf) %base, float %arg) {
+; CHECK-LABEL: define float @phi_phi_onlybase
+; CHECK-SAME: (i1 [[C:%.*]], float nofpclass(inf) [[BASE:%.*]], float [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[PHI2:%.*]], [[EXIT1:%.*]] ]
+; CHECK-NEXT: br label [[INNER:%.*]]
+; CHECK: inner:
+; CHECK-NEXT: [[PHI2]] = phi float [ [[PHI]], [[LOOP]] ], [ [[ARG]], [[INNER]] ]
+; CHECK-NEXT: br i1 [[C]], label [[INNER]], label [[EXIT1]]
+; CHECK: exit1:
+; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret float [[PHI2]]
+;
+entry:
+ br label %loop
+
+loop:
+ %phi = phi float [ %base, %entry ], [ %phi2, %exit1 ]
+ br label %inner
+
+inner:
+ %phi2 = phi float [ %phi, %loop ], [ %arg, %inner ]
+ br i1 %c, label %inner, label %exit1
+
+exit1:
+ br i1 %c, label %loop, label %exit
+
+exit:
+ ret float %phi2
+}
+
+define float @phi_phi_onlyarg(i1 %c, float %base, float nofpclass(inf) %arg) {
+; CHECK-LABEL: define float @phi_phi_onlyarg
+; CHECK-SAME: (i1 [[C:%.*]], float [[BASE:%.*]], float nofpclass(inf) [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[PHI2:%.*]], [[EXIT1:%.*]] ]
+; CHECK-NEXT: br label [[INNER:%.*]]
+; CHECK: inner:
+; CHECK-NEXT: [[PHI2]] = phi float [ [[PHI]], [[LOOP]] ], [ [[ARG]], [[INNER]] ]
+; CHECK-NEXT: br i1 [[C]], label [[INNER]], label [[EXIT1]]
+; CHECK: exit1:
+; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret float [[PHI2]]
+;
+entry:
+ br label %loop
+
+loop:
+ %phi = phi float [ %base, %entry ], [ %phi2, %exit1 ]
+ br label %inner
+
+inner:
+ %phi2 = phi float [ %phi, %loop ], [ %arg, %inner ]
+ br i1 %c, label %inner, label %exit1
+
+exit1:
+ br i1 %c, label %loop, label %exit
+
+exit:
+ ret float %phi2
+}
diff --git a/llvm/test/Transforms/EarlyCSE/nofpclass-phi-regression.ll b/llvm/test/Transforms/EarlyCSE/nofpclass-phi-regression.ll
index 81cead29ac9104..6fc6b735049828 100644
--- a/llvm/test/Transforms/EarlyCSE/nofpclass-phi-regression.ll
+++ b/llvm/test/Transforms/EarlyCSE/nofpclass-phi-regression.ll
@@ -20,7 +20,6 @@ define void @compute_known_fpclass_phi_assert(i1 %cmp46, i1 %tobool51, ptr %a01)
; CHECK-NEXT: [[TMP1]] = phi double [ 0.000000e+00, [[IF_THEN52]] ], [ [[TMP0]], [[FOR_BODY48]] ]
; CHECK-NEXT: br label [[FOR_COND45]]
; CHECK: for.end82:
-; CHECK-NEXT: [[MUL84:%.*]] = fmul double [[TMP0]], 0.000000e+00
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll
index fc56754166d609..b54d4a97d07be7 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll
@@ -45,8 +45,8 @@ define float @test(ptr nocapture readonly %pA, ptr nocapture readonly %pB, i32 %
; CHECK-NEXT: [[TMP7:%.*]] = fsub fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD7]]
; CHECK-NEXT: [[TMP8:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = fdiv fast <4 x float> [[TMP8]], [[TMP6]]
-; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[TMP9]], [[VEC_PHI]]
-; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[DOTNOT9]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP10]]
+; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[DOTNOT9]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[TMP9]]
+; CHECK-NEXT: [[PREDPHI]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP10]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll
index 7274e952567693..549e50fe9230b4 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll
@@ -12,26 +12,79 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef
; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
; CHECK: [[FOR_BODY_PREHEADER]]:
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[RAND_BLOCK_LENGTH]] to i64
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[RAND_BLOCK_LENGTH]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER23:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483644
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[Y]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <2 x double> poison, double [[Z]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT20:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT19]], <2 x double> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI15:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI16:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI17:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV1]]
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX1]], i64 8
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[ARRAYIDX1]], align 4
+; CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <2 x float>, ptr [[TMP23]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[WIDE_LOAD]] to <2 x double>
+; CHECK-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[WIDE_LOAD18]] to <2 x double>
+; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x double> [[BROADCAST_SPLAT]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x double> [[BROADCAST_SPLAT]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = fsub fast <2 x double> [[TMP4]], [[BROADCAST_SPLAT20]]
+; CHECK-NEXT: [[TMP7:%.*]] = fsub fast <2 x double> [[TMP5]], [[BROADCAST_SPLAT20]]
+; CHECK-NEXT: [[TMP8:%.*]] = fcmp fast ogt <2 x double> [[TMP6]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt <2 x double> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <2 x double> [[TMP6]], [[TMP6]]
+; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <2 x double> [[TMP7]], [[TMP7]]
+; CHECK-NEXT: [[TMP12:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP6]], <2 x double> <double -0.000000e+00, double -0.000000e+00>)
+; CHECK-NEXT: [[TMP13:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP7]], <2 x double> <double -0.000000e+00, double -0.000000e+00>)
+; CHECK-NEXT: [[TMP14]] = fadd fast <2 x double> [[TMP12]], [[VEC_PHI16]]
+; CHECK-NEXT: [[TMP15]] = fadd fast <2 x double> [[TMP13]], [[VEC_PHI17]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP8]], <2 x double> [[TMP10]], <2 x double> <double -0.000000e+00, double -0.000000e+00>
+; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP9]], <2 x double> [[TMP11]], <2 x double> <double -0.000000e+00, double -0.000000e+00>
+; CHECK-NEXT: [[TMP18]] = fadd fast <2 x double> [[TMP16]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP19]] = fadd fast <2 x double> [[TMP17]], [[VEC_PHI15]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV1]], 4
+; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x double> [[TMP19]], [[TMP18]]
+; CHECK-NEXT: [[TMP21:%.*]] = tail call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[BIN_RDX]])
+; CHECK-NEXT: [[BIN_RDX21:%.*]] = fadd fast <2 x double> [[TMP15]], [[TMP14]]
+; CHECK-NEXT: [[TMP22:%.*]] = tail call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[BIN_RDX21]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY_PREHEADER23]]
+; CHECK: [[FOR_BODY_PREHEADER23]]:
+; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[V1_012_PH:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[TMP21]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[V0_011_PH:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[TMP22]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
-; CHECK-NEXT: [[V1_011:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[V1_1:%.*]], %[[FOR_BODY]] ]
-; CHECK-NEXT: [[V0_010:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[V0_1:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER23]] ]
+; CHECK-NEXT: [[V1_012:%.*]] = phi double [ [[V1_2:%.*]], %[[FOR_BODY]] ], [ [[V1_012_PH]], %[[FOR_BODY_PREHEADER23]] ]
+; CHECK-NEXT: [[V0_011:%.*]] = phi double [ [[V0_2:%.*]], %[[FOR_BODY]] ], [ [[V0_011_PH]], %[[FOR_BODY_PREHEADER23]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[CONV:%.*]] = fpext float [[TMP0]] to double
; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[Y]], [[CONV]]
; CHECK-NEXT: [[SUB:%.*]] = fsub fast double [[MUL]], [[Z]]
; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ogt double [[SUB]], 0.000000e+00
-; CHECK-NEXT: [[ADD:%.*]] = fadd fast double [[SUB]], [[V0_010]]
-; CHECK-NEXT: [[MUL3:%.*]] = fmul fast double [[SUB]], [[SUB]]
-; CHECK-NEXT: [[ADD4:%.*]] = fadd fast double [[MUL3]], [[V1_011]]
-; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], double [[ADD]], double [[V0_010]]
-; CHECK-NEXT: [[V1_1]] = select i1 [[CMP1]], double [[ADD4]], double [[V1_011]]
+; CHECK-NEXT: [[ADD4:%.*]] = fmul fast double [[SUB]], [[SUB]]
+; CHECK-NEXT: [[ADD8:%.*]] = tail call fast double @llvm.maxnum.f64(double [[SUB]], double -0.000000e+00)
+; CHECK-NEXT: [[V0_2]] = fadd fast double [[ADD8]], [[V0_011]]
+; CHECK-NEXT: [[ADD6:%.*]] = select i1 [[CMP1]], double [[ADD4]], double -0.000000e+00
+; CHECK-NEXT: [[V1_2]] = fadd fast double [[ADD6]], [[V1_012]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[FOR_END_LOOPEXIT]]:
+; CHECK-NEXT: [[V0_1:%.*]] = phi double [ [[TMP22]], %[[MIDDLE_BLOCK]] ], [ [[V0_2]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[V1_1:%.*]] = phi double [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ [[V1_2]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = fadd fast double [[V1_1]], [[V0_1]]
; CHECK-NEXT: br label %[[FOR_END]]
; CHECK: [[FOR_END]]:
@@ -136,33 +189,90 @@ define nofpclass(nan inf) double @monte_exp(i32 noundef %nblocks, i32 noundef %R
; CHECK-NEXT: br i1 [[CMP211]], label %[[FOR_BODY_US_PREHEADER:.*]], label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY_US_PREHEADER]]:
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[RAND_BLOCK_LENGTH]] to i64
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[RAND_BLOCK_LENGTH]], 4
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483644
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[Y]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT35:%.*]] = insertelement <2 x double> poison, double [[Z]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT36:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT35]], <2 x double> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: br label %[[FOR_BODY_US:.*]]
; CHECK: [[FOR_BODY_US]]:
-; CHECK-NEXT: [[V1_019_US:%.*]] = phi double [ [[V1_2_US:%.*]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US:.*]] ], [ 0.000000e+00, %[[FOR_BODY_US_PREHEADER]] ]
-; CHECK-NEXT: [[V0_018_US:%.*]] = phi double [ [[V0_2_US:%.*]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0.000000e+00, %[[FOR_BODY_US_PREHEADER]] ]
+; CHECK-NEXT: [[V1_021_US:%.*]] = phi double [ [[V1_2_US_LCSSA:%.*]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US:.*]] ], [ 0.000000e+00, %[[FOR_BODY_US_PREHEADER]] ]
+; CHECK-NEXT: [[V0_020_US:%.*]] = phi double [ [[V0_2_US_LCSSA:%.*]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0.000000e+00, %[[FOR_BODY_US_PREHEADER]] ]
; CHECK-NEXT: [[BLOCK_017_US:%.*]] = phi i32 [ [[INC9_US:%.*]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0, %[[FOR_BODY_US_PREHEADER]] ]
; CHECK-NEXT: tail call void @resample(i32 noundef [[RAND_BLOCK_LENGTH]], ptr noundef [[SAMPLES]])
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY3_US_PREHEADER:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> <double poison, double 0.000000e+00>, double [[V1_021_US]], i64 0
+; CHECK-NEXT: [[TMP27:%.*]] = insertelement <2 x double> <double poison, double 0.000000e+00>, double [[V0_020_US]], i64 0
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDVARS_...
[truncated]
|
50686f5
to
5210091
Compare
As a follow-on to llvm#113686, this breaks the recursion between phi nodes that have p1 = phi(x, p2) and p2 = phi(y, p1). The knownFPClass can be calculated from the classes of p1 and p2.
5210091
to
549fb64
Compare
Finish porting llvm#114008 to `KnownBits` (Follow up to llvm#113707).
Finish porting llvm#114008 to `KnownBits` (Follow up to llvm#113707).
As a follow-on to 113686, this breaks the recursion between phi nodes that have p1 = phi(x, p2) and p2 = phi(y, p1). The knownFPClass can be calculated from the classes of p1 and p2.
Finish porting llvm#114008 to `KnownBits` (Follow up to llvm#113707).
As a follow-on to 113686, this breaks the recursion between phi nodes that have p1 = phi(x, p2) and p2 = phi(y, p1). The knownFPClass can be calculated from the classes of p1 and p2.
Finish porting llvm#114008 to `KnownBits` (Follow up to llvm#113707).
As a follow-on to 113686, this breaks the recursion between phi nodes that have p1 = phi(x, p2) and p2 = phi(y, p1). The knownFPClass can be calculated from the classes of p1 and p2.
Currently includes #113686, see 50686f5 for the individual commit.