Skip to content

Commit 6ebc786

Browse files
committed
[VPlan] Implementation of onlyFirstLaneUsed for VPLiveOut class
Following up on #83068, when scalarizing VPWidenPointerInductionRecipe, The `onlyScalarsGenerated` checks whether VF is scalable. With scalable VF, it requires all user to use the first lane only. However if any user happens to be VPLiveOut, the check inevitably fails. This patch addresses this by implementing onlyFirstLaneUsed for the VPLiveOut class. It ensures that if the operand is a VPWidenPointerInductionRecipe, it returns true.
1 parent 3bde379 commit 6ebc786

File tree

3 files changed

+17
-12
lines changed

3 files changed

+17
-12
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,8 @@ class VPLiveOut : public VPUser {
693693
return true;
694694
}
695695

696+
bool onlyFirstLaneUsed(const VPValue *Op) const override;
697+
696698
PHINode *getPhi() const { return Phi; }
697699

698700
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,13 @@ bool VPRecipeBase::mayHaveSideEffects() const {
186186
}
187187
}
188188

189+
bool VPLiveOut::onlyFirstLaneUsed(const VPValue *Op) const {
190+
assert(is_contained(operands(), Op) &&
191+
"Op must be an operand of the recipe");
192+
193+
return vputils::isUniformAfterVectorization(getOperand(0)) || isa<VPWidenPointerInductionRecipe>(Op);
194+
}
195+
189196
void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
190197
auto Lane = VPLane::getLastLaneForVF(State.VF);
191198
VPValue *ExitValue = getOperand(0);

llvm/test/Transforms/LoopVectorize/vplan-optimize-ptr-induction.ll

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,26 +21,22 @@ define ptr @foo(ptr %y, float %alpha, i32 %N) {
2121
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x float> [[BROADCAST_SPLATINSERT]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
2222
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
2323
; CHECK: [[VECTOR_BODY]]:
24-
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[Y]], %[[VECTOR_PH]] ], [ [[PTR_IND:%.*]], %[[VECTOR_BODY]] ]
25-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
26-
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
27-
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 1
28-
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 1, [[TMP4]]
29-
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP3]], 0
24+
; CHECK-NEXT: [[TMP6:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
25+
; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
3026
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP6]], i64 0
3127
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
32-
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
33-
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 1 x i64> [[DOTSPLAT]], [[TMP7]]
28+
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 1 x i64> zeroinitializer, [[TMP3]]
3429
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul <vscale x 1 x i64> [[TMP8]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
35-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 1 x i64> [[VECTOR_GEP]]
36-
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0
30+
; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 1 x i64> [[DOTSPLAT]], [[VECTOR_GEP]]
31+
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0
32+
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[Y]], i64 [[TMP7]]
33+
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP6]], 0
3734
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[TMP10]]
3835
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0
3936
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x float>, ptr [[TMP12]], align 4
4037
; CHECK-NEXT: [[TMP13:%.*]] = fadd fast <vscale x 1 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
4138
; CHECK-NEXT: store <vscale x 1 x float> [[TMP13]], ptr [[TMP12]], align 4
42-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]]
43-
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP5]]
39+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP6]], [[TMP2]]
4440
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4541
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4642
; CHECK: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)