Skip to content

Commit dfca6c0

Browse files
authored
[VPlan] Remove no-op SCALAR-STEPS after unrolling. (#123655)
After unrolling, there may be additional simplifications that can be applied. One example is removing SCALAR-STEPS for the first part where only the first lane is demanded. This removes redundant adds of 0 from a large number of tests (~200), many which I am still working on updating. In preparation for removing redundant WideIV steps added in #119284. PR: #123655
1 parent cca9b55 commit dfca6c0

File tree

229 files changed

+2310
-3326
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

229 files changed

+2310
-3326
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3062,6 +3062,10 @@ class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
30623062
hasFastMathFlags() ? getFastMathFlags() : FastMathFlags());
30633063
}
30643064

3065+
/// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
3066+
/// this is only accurate after the VPlan has been unrolled.
3067+
bool isPart0() { return getUnrollPart(*this) == 0; }
3068+
30653069
VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
30663070

30673071
/// Generate the scalarized versions of the phi node as needed by their users.
@@ -3593,6 +3597,9 @@ class VPlan {
35933597
UFs.insert(UF);
35943598
}
35953599

3600+
/// Returns true if the VPlan already has been unrolled, i.e. it has UF = 1.
3601+
bool isUnrolled() const { return UFs.size() == 1 && UFs.back() == 1; }
3602+
35963603
/// Return a string with the name of the plan and the applicable VFs and UFs.
35973604
std::string getName() const;
35983605

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -924,6 +924,17 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
924924
return;
925925
}
926926

927+
// VPScalarIVSteps can only be simplified after unrolling. VPScalarIVSteps for
928+
// part 0 can be replaced by their start value, if only the first lane is
929+
// demanded.
930+
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&R)) {
931+
if (Steps->getParent()->getPlan()->isUnrolled() && Steps->isPart0() &&
932+
vputils::onlyFirstLaneUsed(Steps)) {
933+
Steps->replaceAllUsesWith(Steps->getOperand(0));
934+
return;
935+
}
936+
}
937+
927938
VPValue *A;
928939
if (match(&R, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
929940
VPValue *Trunc = R.getVPSingleValue();

llvm/test/Transforms/LoopLoadElim/versioning-scev-invalidation.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,7 @@ define void @g(ptr %dst.1, ptr %start, i64 %N) {
6464
; CHECK: vector.body:
6565
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6666
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
67-
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0
68-
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[LCSSA_PTR_IV_1]], i64 [[TMP4]]
67+
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[LCSSA_PTR_IV_1]], i64 [[OFFSET_IDX]]
6968
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i32 0
7069
; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP5]], align 8
7170
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4

llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,7 @@ define void @test_blend_feeding_replicated_store_1(i64 %N, ptr noalias %src, ptr
2121
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
2222
; CHECK: [[VECTOR_BODY]]:
2323
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE30:.*]] ]
24-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
25-
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP3]]
24+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]]
2625
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
2726
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4
2827
; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], zeroinitializer

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
3030
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3131
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
3232
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
33-
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 0
34-
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]]
33+
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
3534
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw <vscale x 8 x i64> [[VEC_IND]], splat (i64 3)
3635
; CHECK-NEXT: [[TMP11:%.*]] = lshr <vscale x 8 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
3736
; CHECK-NEXT: [[TMP14:%.*]] = trunc <vscale x 8 x i64> [[TMP11]] to <vscale x 8 x i8>
@@ -116,8 +115,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
116115
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
117116
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
118117
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
119-
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 0
120-
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]]
118+
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
121119
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw <vscale x 8 x i64> [[VEC_IND]], splat (i64 3)
122120
; CHECK-NEXT: [[TMP11:%.*]] = lshr <vscale x 8 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
123121
; CHECK-NEXT: [[TMP14:%.*]] = trunc <vscale x 8 x i64> [[TMP11]] to <vscale x 8 x i8>

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -359,8 +359,7 @@ define void @latch_branch_cost(ptr %dst) {
359359
; DEFAULT-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
360360
; DEFAULT: [[VEC_EPILOG_VECTOR_BODY]]:
361361
; DEFAULT-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
362-
; DEFAULT-NEXT: [[TMP7:%.*]] = add i64 [[INDEX1]], 0
363-
; DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
362+
; DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX1]]
364363
; DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i32 0
365364
; DEFAULT-NEXT: store <4 x i8> zeroinitializer, ptr [[TMP9]], align 1
366365
; DEFAULT-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 4
@@ -538,7 +537,6 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
538537
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
539538
; DEFAULT: [[VECTOR_BODY]]:
540539
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE37:.*]] ]
541-
; DEFAULT-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0
542540
; DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META7:![0-9]+]]
543541
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i64 0
544542
; DEFAULT-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT28]], <4 x i32> poison, <4 x i32> zeroinitializer
@@ -550,7 +548,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
550548
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0
551549
; DEFAULT-NEXT: [[BROADCAST_SPLAT31:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT30]], <4 x i32> poison, <4 x i32> zeroinitializer
552550
; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt <4 x i32> [[BROADCAST_SPLAT31]], [[TMP6]]
553-
; DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[D]], i64 [[TMP10]]
551+
; DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[D]], i64 [[INDEX]]
554552
; DEFAULT-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
555553
; DEFAULT-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
556554
; DEFAULT: [[PRED_STORE_IF]]:
@@ -672,8 +670,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
672670
; DEFAULT: [[VECTOR_BODY]]:
673671
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
674672
; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
675-
; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
676-
; DEFAULT-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]]
673+
; DEFAULT-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]]
677674
; DEFAULT-NEXT: [[TMP1:%.*]] = load i16, ptr [[SRC]], align 2
678675
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i64 0
679676
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
@@ -732,8 +729,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
732729
; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
733730
; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
734731
; PRED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
735-
; PRED-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 0
736-
; PRED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP11]]
732+
; PRED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]]
737733
; PRED-NEXT: [[TMP12:%.*]] = load i16, ptr [[SRC]], align 2
738734
; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i16> poison, i16 [[TMP12]], i64 0
739735
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -137,15 +137,14 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
137137
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
138138
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
139139
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
140-
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0
141140
; CHECK-NEXT: [[TMP22:%.*]] = icmp ule <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
142141
; CHECK-NEXT: [[TMP23:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP22]], <vscale x 2 x i1> zeroinitializer
143142
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <vscale x 2 x i1> [[TMP23]], i32 0
144143
; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[CONV6]], i64 1
145144
; CHECK-NEXT: [[TMP26:%.*]] = sdiv i64 [[M]], [[TMP25]]
146145
; CHECK-NEXT: [[TMP27:%.*]] = trunc i64 [[TMP26]] to i32
147146
; CHECK-NEXT: [[TMP28:%.*]] = mul i64 [[TMP26]], [[CONV61]]
148-
; CHECK-NEXT: [[TMP29:%.*]] = sub i64 [[TMP21]], [[TMP28]]
147+
; CHECK-NEXT: [[TMP29:%.*]] = sub i64 [[INDEX]], [[TMP28]]
149148
; CHECK-NEXT: [[TMP30:%.*]] = trunc i64 [[TMP29]] to i32
150149
; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[X]], [[TMP27]]
151150
; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[TMP31]], [[TMP30]]
@@ -261,9 +260,8 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
261260
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
262261
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
263262
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
264-
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0
265263
; CHECK-NEXT: [[TMP23:%.*]] = udiv <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
266-
; CHECK-NEXT: [[TMP24:%.*]] = urem i64 [[TMP21]], [[MUL_2_I]]
264+
; CHECK-NEXT: [[TMP24:%.*]] = urem i64 [[INDEX]], [[MUL_2_I]]
267265
; CHECK-NEXT: [[TMP25:%.*]] = udiv i64 [[TMP24]], [[MUL_1_I]]
268266
; CHECK-NEXT: [[TMP26:%.*]] = urem i64 [[TMP24]], [[MUL_1_I]]
269267
; CHECK-NEXT: [[TMP27:%.*]] = udiv i64 [[TMP26]], [[X]]

llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,13 @@ define void @check_widen_intrinsic_with_nnan(ptr noalias %dst.0, ptr noalias %ds
1414
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1515
; CHECK: [[VECTOR_BODY]]:
1616
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ]
17-
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
18-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[SRC_1]], i64 [[TMP0]]
17+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[SRC_1]], i64 [[INDEX]]
1918
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
2019
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP2]], align 8
2120
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[WIDE_LOAD]])
2221
; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <4 x double> [[TMP3]], splat (double 1.000000e+00)
2322
; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
24-
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP0]], -1
23+
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], -1
2524
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[DST_0]], i64 [[TMP6]]
2625
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr double, ptr [[TMP7]], i32 0
2726
; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> zeroinitializer, ptr [[TMP8]], i32 8, <4 x i1> [[TMP5]])
@@ -57,7 +56,7 @@ define void @check_widen_intrinsic_with_nnan(ptr noalias %dst.0, ptr noalias %ds
5756
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]]
5857
; CHECK: [[PRED_LOAD_CONTINUE6]]:
5958
; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x double> [ [[TMP20]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], %[[PRED_LOAD_IF5]] ]
60-
; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[TMP0]], -1
59+
; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[INDEX]], -1
6160
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[DST_0]], i64 [[TMP25]]
6261
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP26]], i32 0
6362
; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> zeroinitializer, ptr [[TMP27]], i32 8, <4 x i1> [[TMP4]])

llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,7 @@ define void @f1(ptr %A) #0 {
2222
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2323
; CHECK: vector.body:
2424
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
25-
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
26-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]]
25+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
2726
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
2827
; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 1), ptr [[TMP8]], align 4
2928
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]

llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -67,15 +67,14 @@ define void @add_i8(ptr noalias nocapture noundef writeonly %A, ptr nocapture no
6767
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
6868
; CHECK: vec.epilog.vector.body:
6969
; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
70-
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX11]], 0
71-
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP21]]
70+
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX11]]
7271
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP22]], i32 0
7372
; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x i8>, ptr [[TMP23]], align 1
74-
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[TMP21]]
73+
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[INDEX11]]
7574
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP24]], i32 0
7675
; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <8 x i8>, ptr [[TMP25]], align 1
7776
; CHECK-NEXT: [[TMP26:%.*]] = add <8 x i8> [[WIDE_LOAD13]], [[WIDE_LOAD12]]
78-
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP21]]
77+
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX11]]
7978
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i32 0
8079
; CHECK-NEXT: store <8 x i8> [[TMP26]], ptr [[TMP28]], align 1
8180
; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 8
@@ -185,15 +184,14 @@ define void @add_i16(ptr noalias nocapture noundef writeonly %A, ptr nocapture n
185184
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
186185
; CHECK: vec.epilog.vector.body:
187186
; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
188-
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX11]], 0
189-
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 [[TMP21]]
187+
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i16, ptr [[B]], i64 [[INDEX11]]
190188
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[TMP22]], i32 0
191189
; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i16>, ptr [[TMP23]], align 1
192-
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[C]], i64 [[TMP21]]
190+
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[C]], i64 [[INDEX11]]
193191
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i16, ptr [[TMP24]], i32 0
194192
; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i16>, ptr [[TMP25]], align 1
195193
; CHECK-NEXT: [[TMP26:%.*]] = add <4 x i16> [[WIDE_LOAD13]], [[WIDE_LOAD12]]
196-
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP21]]
194+
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDEX11]]
197195
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i16, ptr [[TMP27]], i32 0
198196
; CHECK-NEXT: store <4 x i16> [[TMP26]], ptr [[TMP28]], align 1
199197
; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 4
@@ -303,15 +301,14 @@ define void @add_i32(ptr noalias nocapture noundef writeonly %A, ptr nocapture n
303301
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
304302
; CHECK: vec.epilog.vector.body:
305303
; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
306-
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX11]], 0
307-
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
304+
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX11]]
308305
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 0
309306
; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[TMP23]], align 1
310-
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP21]]
307+
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX11]]
311308
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 0
312309
; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i32>, ptr [[TMP25]], align 1
313310
; CHECK-NEXT: [[TMP26:%.*]] = add <4 x i32> [[WIDE_LOAD13]], [[WIDE_LOAD12]]
314-
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
311+
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX11]]
315312
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 0
316313
; CHECK-NEXT: store <4 x i32> [[TMP26]], ptr [[TMP28]], align 1
317314
; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 4

0 commit comments

Comments
 (0)