Skip to content

Commit 65031f4

Browse files
committed
!fixup address latest comments, thanks
1 parent 78b687c commit 65031f4

File tree

4 files changed

+73
-81
lines changed

4 files changed

+73
-81
lines changed

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 40 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -823,7 +823,8 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
823823
};
824824

825825
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
826-
SmallVector<VPReductionPHIRecipe *> ReductionsToConvert;
826+
SmallVector<std::pair<VPReductionPHIRecipe *, VPValue *>>
827+
MinMaxNumReductionsToHandle;
827828
bool HasUnsupportedPhi = false;
828829
for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
829830
if (isa<VPCanonicalIVPHIRecipe, VPWidenIntOrFpInductionRecipe>(&R))
@@ -839,10 +840,15 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
839840
HasUnsupportedPhi = true;
840841
continue;
841842
}
842-
ReductionsToConvert.push_back(Cur);
843+
844+
VPValue *MinMaxOp = GetMinMaxCompareValue(Cur);
845+
if (!MinMaxOp)
846+
return false;
847+
848+
MinMaxNumReductionsToHandle.emplace_back(Cur, MinMaxOp);
843849
}
844850

845-
if (ReductionsToConvert.empty())
851+
if (MinMaxNumReductionsToHandle.empty())
846852
return true;
847853

848854
// We won't be able to resume execution in the scalar tail, if there are
@@ -867,32 +873,29 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
867873
}
868874

869875
VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
870-
VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
871-
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->begin());
872-
VPBuilder Builder(LatchVPBB->getTerminator());
873-
VPValue *AnyNaN = nullptr;
876+
VPBuilder LatchBuilder(LatchVPBB->getTerminator());
877+
VPValue *IsNaNLane = nullptr;
874878
SmallPtrSet<VPValue *, 2> RdxResults;
875-
for (VPReductionPHIRecipe *RedPhiR : ReductionsToConvert) {
879+
for (const auto &[RedPhiR, MinMaxOp] : MinMaxNumReductionsToHandle) {
876880
assert(RecurrenceDescriptor::isFPMinMaxNumRecurrenceKind(
877881
RedPhiR->getRecurrenceKind()) &&
878882
"unsupported reduction");
879883

880-
VPValue *MinMaxOp = GetMinMaxCompareValue(RedPhiR);
881-
if (!MinMaxOp)
882-
return false;
883-
884-
VPValue *IsNaN = Builder.createFCmp(CmpInst::FCMP_UNO, MinMaxOp, MinMaxOp);
885-
VPValue *HasNaN = Builder.createNaryOp(VPInstruction::AnyOf, {IsNaN});
886-
if (AnyNaN)
887-
AnyNaN = Builder.createOr(AnyNaN, HasNaN);
888-
else
889-
AnyNaN = HasNaN;
884+
VPValue *IsNaN =
885+
LatchBuilder.createFCmp(CmpInst::FCMP_UNO, MinMaxOp, MinMaxOp);
886+
IsNaNLane = IsNaNLane ? LatchBuilder.createOr(IsNaNLane, IsNaN) : IsNaN;
887+
}
890888

889+
VPValue *AnyNaNLane =
890+
LatchBuilder.createNaryOp(VPInstruction::AnyOf, {IsNaNLane});
891+
VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
892+
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->begin());
893+
for (const auto &[RedPhiR, MinMaxOp] : MinMaxNumReductionsToHandle) {
891894
// If we exit early due to NaNs, compute the final reduction result based
892895
// on the reduction phi at the beginning of the last vector iteration.
893896
auto *RdxResult = find_singleton<VPSingleDefRecipe>(
894897
RedPhiR->getBackedgeValue()->users(),
895-
[RedPhiR](VPUser *U, bool) -> VPSingleDefRecipe * {
898+
[RedPhiR = RedPhiR](VPUser *U, bool) -> VPSingleDefRecipe * {
896899
auto *VPI = dyn_cast<VPInstruction>(U);
897900
if (VPI && VPI->getOpcode() == VPInstruction::ComputeReductionResult)
898901
return VPI;
@@ -902,24 +905,25 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
902905
return nullptr;
903906
});
904907

905-
auto *NewSel =
906-
MiddleBuilder.createSelect(HasNaN, RedPhiR, RdxResult->getOperand(1));
908+
auto *NewSel = MiddleBuilder.createSelect(AnyNaNLane, RedPhiR,
909+
RdxResult->getOperand(1));
907910
RdxResult->setOperand(1, NewSel);
911+
assert(!RdxResults.contains(RdxResult) && "RdxResult already used");
908912
RdxResults.insert(RdxResult);
909913
}
910914

911915
auto *LatchExitingBranch = LatchVPBB->getTerminator();
912916
assert(match(LatchExitingBranch, m_BranchOnCount(m_VPValue(), m_VPValue())) &&
913917
"Unexpected terminator");
914-
auto *IsLatchExitTaken =
915-
Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
916-
LatchExitingBranch->getOperand(1));
917-
auto *AnyExitTaken =
918-
Builder.createNaryOp(Instruction::Or, {AnyNaN, IsLatchExitTaken});
919-
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
918+
auto *IsLatchExitTaken = LatchBuilder.createICmp(
919+
CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
920+
LatchExitingBranch->getOperand(1));
921+
auto *AnyExitTaken = LatchBuilder.createNaryOp(
922+
Instruction::Or, {AnyNaNLane, IsLatchExitTaken});
923+
LatchBuilder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
920924
LatchExitingBranch->eraseFromParent();
921925

922-
// Update resume phis for inductions in the scalar preheader. If AnyNaN is
926+
// Update resume phis for inductions in the scalar preheader. If AnyNaNLane is
923927
// true, the resume from the start of the last vector iteration via the
924928
// canonical IV, otherwise from the original value.
925929
for (auto &R : Plan.getScalarPreheader()->phis()) {
@@ -930,8 +934,9 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
930934
if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
931935
if (DerivedIV->getNumUsers() == 1 &&
932936
DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) {
933-
auto *NewSel = MiddleBuilder.createSelect(
934-
AnyNaN, LoopRegion->getCanonicalIV(), &Plan.getVectorTripCount());
937+
auto *NewSel =
938+
MiddleBuilder.createSelect(AnyNaNLane, LoopRegion->getCanonicalIV(),
939+
&Plan.getVectorTripCount());
935940
DerivedIV->moveAfter(&*MiddleBuilder.getInsertPoint());
936941
DerivedIV->setOperand(1, NewSel);
937942
continue;
@@ -944,15 +949,16 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
944949
"FMaxNum/FMinNum reduction.\n");
945950
return false;
946951
}
947-
auto *NewSel =
948-
MiddleBuilder.createSelect(AnyNaN, LoopRegion->getCanonicalIV(), VecV);
952+
auto *NewSel = MiddleBuilder.createSelect(
953+
AnyNaNLane, LoopRegion->getCanonicalIV(), VecV);
949954
ResumeR->setOperand(0, NewSel);
950955
}
951956

952957
auto *MiddleTerm = MiddleVPBB->getTerminator();
953-
Builder.setInsertPoint(MiddleTerm);
958+
MiddleBuilder.setInsertPoint(MiddleTerm);
954959
VPValue *MiddleCond = MiddleTerm->getOperand(0);
955-
VPValue *NewCond = Builder.createAnd(MiddleCond, Builder.createNot(AnyNaN));
960+
VPValue *NewCond =
961+
MiddleBuilder.createAnd(MiddleCond, MiddleBuilder.createNot(AnyNaNLane));
956962
MiddleTerm->setOperand(0, NewCond);
957963
return true;
958964
}

llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -145,47 +145,42 @@ define float @test_fmax_and_fmin(ptr %src.0, ptr %src.1, i64 %n) {
145145
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8
146146
; CHECK-NEXT: [[TMP8:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD5]], [[WIDE_LOAD5]]
147147
; CHECK-NEXT: [[TMP9:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD6]]
148-
; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP8]]
149-
; CHECK-NEXT: [[TMP11:%.*]] = freeze <4 x i1> [[TMP9]]
150-
; CHECK-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP10]], [[TMP11]]
151-
; CHECK-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP12]])
152-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP13]], i64 0
153-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
154148
; CHECK-NEXT: [[TMP14:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
155149
; CHECK-NEXT: [[TMP15:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD4]], [[WIDE_LOAD4]]
156-
; CHECK-NEXT: [[TMP16:%.*]] = freeze <4 x i1> [[TMP14]]
157-
; CHECK-NEXT: [[TMP17:%.*]] = freeze <4 x i1> [[TMP15]]
150+
; CHECK-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP8]], [[TMP14]]
151+
; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i1> [[TMP9]], [[TMP15]]
152+
; CHECK-NEXT: [[TMP16:%.*]] = freeze <4 x i1> [[TMP12]]
153+
; CHECK-NEXT: [[TMP17:%.*]] = freeze <4 x i1> [[TMP13]]
158154
; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP17]]
159155
; CHECK-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]])
160156
; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i1> poison, i1 [[TMP19]], i64 0
161157
; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT7]], <4 x i1> poison, <4 x i32> zeroinitializer
162-
; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP13]], [[TMP19]]
163158
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
164-
; CHECK-NEXT: [[TMP22:%.*]] = or i1 [[TMP20]], [[TMP21]]
165-
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
159+
; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP19]], [[TMP21]]
160+
; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
166161
; CHECK: [[MIDDLE_BLOCK]]:
167-
; CHECK-NEXT: [[TMP23:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP6]]
168-
; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP7]]
162+
; CHECK-NEXT: [[TMP23:%.*]] = select <4 x i1> [[BROADCAST_SPLAT8]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP6]]
163+
; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[BROADCAST_SPLAT8]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP7]]
169164
; CHECK-NEXT: [[TMP25:%.*]] = select <4 x i1> [[BROADCAST_SPLAT8]], <4 x float> [[VEC_PHI2]], <4 x float> [[TMP4]]
170165
; CHECK-NEXT: [[TMP26:%.*]] = select <4 x i1> [[BROADCAST_SPLAT8]], <4 x float> [[VEC_PHI3]], <4 x float> [[TMP5]]
171-
; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP20]], i64 [[IV]], i64 [[N_VEC]]
166+
; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP19]], i64 [[IV]], i64 [[N_VEC]]
172167
; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP23]], <4 x float> [[TMP24]])
173168
; CHECK-NEXT: [[TMP28:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[RDX_MINMAX]])
174169
; CHECK-NEXT: [[RDX_MINMAX9:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP25]], <4 x float> [[TMP26]])
175170
; CHECK-NEXT: [[TMP29:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[RDX_MINMAX9]])
176171
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
177-
; CHECK-NEXT: [[TMP30:%.*]] = xor i1 [[TMP20]], true
172+
; CHECK-NEXT: [[TMP30:%.*]] = xor i1 [[TMP19]], true
178173
; CHECK-NEXT: [[TMP31:%.*]] = and i1 [[CMP_N]], [[TMP30]]
179174
; CHECK-NEXT: br i1 [[TMP31]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
180175
; CHECK: [[SCALAR_PH]]:
181176
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP27]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
182177
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP28]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
183-
; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi float [ [[TMP29]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
178+
; CHECK-NEXT: [[BC_MERGE_RDX8:%.*]] = phi float [ [[TMP29]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
184179
; CHECK-NEXT: br label %[[LOOP:.*]]
185180
; CHECK: [[LOOP]]:
186181
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
187182
; CHECK-NEXT: [[MIN:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_NEXT:%.*]], %[[LOOP]] ]
188-
; CHECK-NEXT: [[MAX:%.*]] = phi float [ [[BC_MERGE_RDX10]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
183+
; CHECK-NEXT: [[MAX:%.*]] = phi float [ [[BC_MERGE_RDX8]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
189184
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_0]], i64 [[IV1]]
190185
; CHECK-NEXT: [[GEP_SRC_3:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_1]], i64 [[IV1]]
191186
; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[GEP_SRC_2]], align 4

llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -145,47 +145,42 @@ define float @test_fmax_and_fmin(ptr %src.0, ptr %src.1, i64 %n) {
145145
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8
146146
; CHECK-NEXT: [[TMP8:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD5]], [[WIDE_LOAD5]]
147147
; CHECK-NEXT: [[TMP9:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD6]]
148-
; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP8]]
149-
; CHECK-NEXT: [[TMP11:%.*]] = freeze <4 x i1> [[TMP9]]
150-
; CHECK-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP10]], [[TMP11]]
151-
; CHECK-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP12]])
152-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP13]], i64 0
153-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
154148
; CHECK-NEXT: [[TMP14:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
155149
; CHECK-NEXT: [[TMP15:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD4]], [[WIDE_LOAD4]]
156-
; CHECK-NEXT: [[TMP16:%.*]] = freeze <4 x i1> [[TMP14]]
157-
; CHECK-NEXT: [[TMP17:%.*]] = freeze <4 x i1> [[TMP15]]
150+
; CHECK-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP8]], [[TMP14]]
151+
; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i1> [[TMP9]], [[TMP15]]
152+
; CHECK-NEXT: [[TMP16:%.*]] = freeze <4 x i1> [[TMP12]]
153+
; CHECK-NEXT: [[TMP17:%.*]] = freeze <4 x i1> [[TMP13]]
158154
; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP17]]
159155
; CHECK-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]])
160156
; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i1> poison, i1 [[TMP19]], i64 0
161157
; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT7]], <4 x i1> poison, <4 x i32> zeroinitializer
162-
; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP13]], [[TMP19]]
163158
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
164-
; CHECK-NEXT: [[TMP22:%.*]] = or i1 [[TMP20]], [[TMP21]]
165-
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
159+
; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP19]], [[TMP21]]
160+
; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
166161
; CHECK: [[MIDDLE_BLOCK]]:
167-
; CHECK-NEXT: [[TMP23:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP6]]
168-
; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP7]]
162+
; CHECK-NEXT: [[TMP23:%.*]] = select <4 x i1> [[BROADCAST_SPLAT8]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP6]]
163+
; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[BROADCAST_SPLAT8]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP7]]
169164
; CHECK-NEXT: [[TMP25:%.*]] = select <4 x i1> [[BROADCAST_SPLAT8]], <4 x float> [[VEC_PHI2]], <4 x float> [[TMP4]]
170165
; CHECK-NEXT: [[TMP26:%.*]] = select <4 x i1> [[BROADCAST_SPLAT8]], <4 x float> [[VEC_PHI3]], <4 x float> [[TMP5]]
171-
; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP20]], i64 [[IV]], i64 [[N_VEC]]
166+
; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP19]], i64 [[IV]], i64 [[N_VEC]]
172167
; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP23]], <4 x float> [[TMP24]])
173168
; CHECK-NEXT: [[TMP28:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[RDX_MINMAX]])
174169
; CHECK-NEXT: [[RDX_MINMAX9:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP25]], <4 x float> [[TMP26]])
175170
; CHECK-NEXT: [[TMP29:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[RDX_MINMAX9]])
176171
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
177-
; CHECK-NEXT: [[TMP30:%.*]] = xor i1 [[TMP20]], true
172+
; CHECK-NEXT: [[TMP30:%.*]] = xor i1 [[TMP19]], true
178173
; CHECK-NEXT: [[TMP31:%.*]] = and i1 [[CMP_N]], [[TMP30]]
179174
; CHECK-NEXT: br i1 [[TMP31]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
180175
; CHECK: [[SCALAR_PH]]:
181176
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP27]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
182177
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP28]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
183-
; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi float [ [[TMP29]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
178+
; CHECK-NEXT: [[BC_MERGE_RDX8:%.*]] = phi float [ [[TMP29]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
184179
; CHECK-NEXT: br label %[[LOOP:.*]]
185180
; CHECK: [[LOOP]]:
186181
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
187182
; CHECK-NEXT: [[MIN:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_NEXT:%.*]], %[[LOOP]] ]
188-
; CHECK-NEXT: [[MAX:%.*]] = phi float [ [[BC_MERGE_RDX10]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
183+
; CHECK-NEXT: [[MAX:%.*]] = phi float [ [[BC_MERGE_RDX8]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
189184
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_0]], i64 [[IV1]]
190185
; CHECK-NEXT: [[GEP_SRC_3:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_1]], i64 [[IV1]]
191186
; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[GEP_SRC_2]], align 4

0 commit comments

Comments
 (0)