Skip to content

[VPlan] Consistently use (Part, 0) for first lane scalar values #80271

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 25 commits into from
Feb 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
f4dabdf
[VPlan] Update VPInst::onlyFirstLaneUsed to check users.
fhahn Jan 31, 2024
b08e892
[VPlan] Consistently use (Part, 0) for first lane scalar values
fhahn Jan 31, 2024
f56e217
Merge branch 'main' into users/fhahn/vplan-uniform-scalar-lanes
fhahn Feb 3, 2024
172dbf6
!fixup fix merge
fhahn Feb 3, 2024
d2c51ec
Merge branch 'main' into users/fhahn/vplan-uniform-scalar-lanes
fhahn Feb 6, 2024
82d74df
Merge branch 'main' into users/fhahn/vplan-uniform-scalar-lanes
fhahn Feb 7, 2024
c6797e6
!fixup address latest comments, thanks!
fhahn Feb 7, 2024
53f2937
!fixup fix formatting
fhahn Feb 7, 2024
a166da5
Merge branch 'main' into users/fhahn/vplan-uniform-scalar-lanes
fhahn Feb 8, 2024
8b6fd60
Merge remote-tracking branch 'origin/main' into users/fhahn/vplan-uni…
fhahn Feb 17, 2024
865da64
!fixup update remaining places to use (Part, 0).
fhahn Feb 18, 2024
f71f752
Merge remote-tracking branch 'origin/main' into users/fhahn/vplan-uni…
fhahn Feb 18, 2024
ddf5f75
Merge remote-tracking branch 'origin/users/fhahn/vplan-uniform-scalar…
fhahn Feb 18, 2024
d6538e0
Merge remote-tracking branch 'origin/main' into users/fhahn/vplan-uni…
fhahn Feb 18, 2024
6429fdb
!fixpup address parts of outstanding comments, thanks!
fhahn Feb 19, 2024
89f7a80
Merge remote-tracking branch 'origin/main' into users/fhahn/vplan-uni…
fhahn Feb 19, 2024
567faea
!fixup address remaining comments
fhahn Feb 19, 2024
d9760f1
!fixup add TODOs for reduction recipes.
fhahn Feb 19, 2024
d72a629
Merge branch 'main' into users/fhahn/vplan-uniform-scalar-lanes
fhahn Feb 21, 2024
fa8f747
!fixup also use new scheme for in-loop reductions.
fhahn Feb 21, 2024
57b4229
Merge remote-tracking branch 'origin/main' into users/fhahn/vplan-uni…
fhahn Feb 21, 2024
8b48685
!fixup address remaining comments, thanks
fhahn Feb 22, 2024
e038070
Merge branch 'main' into users/fhahn/vplan-uniform-scalar-lanes
fhahn Feb 26, 2024
031df8e
!fixup address remaining comments, thanks!
fhahn Feb 26, 2024
4c2f243
Fix formatting.
fhahn Feb 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9127,7 +9127,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
"Unexpected type.");

auto *IVR = getParent()->getPlan()->getCanonicalIV();
PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, 0));
PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, 0, /*IsScalar*/ true));

if (onlyScalarsGenerated(State.VF.isScalable())) {
// This is the normalized GEP that starts counting at zero.
Expand Down Expand Up @@ -9243,7 +9243,7 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {

void VPReductionRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Reduction being replicated.");
Value *PrevInChain = State.get(getChainOp(), 0);
Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true);
RecurKind Kind = RdxDesc.getRecurrenceKind();
bool IsOrdered = State.ILV->useOrderedReductions(RdxDesc);
// Propagate the fast-math flags carried by the underlying instruction.
Expand All @@ -9252,8 +9252,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *NewVecOp = State.get(getVecOp(), Part);
if (VPValue *Cond = getCondOp()) {
Value *NewCond = State.VF.isVector() ? State.get(Cond, Part)
: State.get(Cond, {Part, 0});
Value *NewCond = State.get(Cond, Part, State.VF.isScalar());
VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy,
Expand All @@ -9278,7 +9277,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
NewVecOp);
PrevInChain = NewRed;
} else {
PrevInChain = State.get(getChainOp(), Part);
PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true);
NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp);
}
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
Expand All @@ -9289,7 +9288,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
else
NextInChain = State.Builder.CreateBinOp(
(Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain);
State.set(this, NextInChain, Part);
State.set(this, NextInChain, Part, /*IsScalar*/ true);
}
}

Expand Down Expand Up @@ -9404,7 +9403,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
// We don't want to update the value in the map as it might be used in
// another expression. So don't call resetVectorValue(StoredVal).
}
auto *VecPtr = State.get(getAddr(), Part);
auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
if (isMaskRequired)
NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
BlockInMaskParts[Part]);
Expand All @@ -9428,7 +9427,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
nullptr, "wide.masked.gather");
State.addMetadata(NewLI, LI);
} else {
auto *VecPtr = State.get(getAddr(), Part);
auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
if (isMaskRequired)
NewLI = Builder.CreateMaskedLoad(
DataTy, VecPtr, Alignment, BlockInMaskParts[Part],
Expand Down
35 changes: 21 additions & 14 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,16 @@ Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
return Extract;
}

Value *VPTransformState::get(VPValue *Def, unsigned Part) {
Value *VPTransformState::get(VPValue *Def, unsigned Part, bool NeedsScalar) {
if (NeedsScalar) {
assert((VF.isScalar() || Def->isLiveIn() ||
(hasScalarValue(Def, VPIteration(Part, 0)) &&
Data.PerPartScalars[Def][Part].size() == 1)) &&
"Trying to access a single scalar per part but has multiple scalars "
"per part.");
return get(Def, VPIteration(Part, 0));
}

// If Values have been set for this Def return the one relevant for \p Part.
if (hasVectorValue(Def, Part))
return Data.PerPartOutput[Def][Part];
Expand Down Expand Up @@ -789,21 +798,15 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
auto *TCMO = Builder.CreateSub(TripCountV,
ConstantInt::get(TripCountV->getType(), 1),
"trip.count.minus.1");
auto VF = State.VF;
Value *VTCMO =
VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast");
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
State.set(BackedgeTakenCount, VTCMO, Part);
BackedgeTakenCount->setUnderlyingValue(TCMO);
}

for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
State.set(&VectorTripCount, VectorTripCountV, Part);
VectorTripCount.setUnderlyingValue(VectorTripCountV);

IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
// FIXME: Model VF * UF computation completely in VPlan.
State.set(&VFxUF,
createStepForVF(Builder, TripCountV->getType(), State.VF, State.UF),
0);
VFxUF.setUnderlyingValue(
createStepForVF(Builder, TripCountV->getType(), State.VF, State.UF));

// When vectorizing the epilogue loop, the canonical induction start value
// needs to be changed from zero to the value after the main vector loop.
Expand Down Expand Up @@ -884,12 +887,16 @@ void VPlan::execute(VPTransformState *State) {
isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) ||
(isa<VPReductionPHIRecipe>(PhiR) &&
cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
bool NeedsScalar = isa<VPCanonicalIVPHIRecipe>(PhiR) ||
(isa<VPReductionPHIRecipe>(PhiR) &&
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
bool IsScalarNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added, thanks!

for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
Value *Phi = State->get(PhiR, Part);
Value *Val = State->get(PhiR->getBackedgeValue(),
SinglePartNeeded ? State->UF - 1 : Part);
Value *Phi = State->get(PhiR, Part, NeedsScalar);
Value *Val =
State->get(PhiR->getBackedgeValue(),
SinglePartNeeded ? State->UF - 1 : Part, NeedsScalar);
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
}
}
Expand Down
26 changes: 21 additions & 5 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,9 +259,10 @@ struct VPTransformState {
DenseMap<VPValue *, ScalarsPerPartValuesTy> PerPartScalars;
} Data;

/// Get the generated Value for the given VPValue \p Def and the given \p Part.
/// \see set.
Value *get(VPValue *Def, unsigned Part);
/// Get the generated vector Value for a given VPValue \p Def and a given \p
/// Part if \p IsScalar is false, otherwise return the generated scalar
/// for \p Part. \See set.
Value *get(VPValue *Def, unsigned Part, bool IsScalar = false);

/// Get the generated Value for a given VPValue and given Part and Lane.
Value *get(VPValue *Def, const VPIteration &Instance);
Expand All @@ -282,14 +283,22 @@ struct VPTransformState {
I->second[Instance.Part][CacheIdx];
}

/// Set the generated Value for a given VPValue and a given Part.
void set(VPValue *Def, Value *V, unsigned Part) {
/// Set the generated vector Value for a given VPValue and a given Part, if \p
/// IsScalar is false. If \p IsScalar is true, set the scalar in (Part, 0).
void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar = false) {
if (IsScalar) {
set(Def, V, VPIteration(Part, 0));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: can early return, leaving the rest intact, consistent with get() above.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks!

return;
}
assert((VF.isScalar() || V->getType()->isVectorTy()) &&
"scalar values must be stored as (Part, 0)");
if (!Data.PerPartOutput.count(Def)) {
DataState::PerPartValuesTy Entry(UF);
Data.PerPartOutput[Def] = Entry;
}
Data.PerPartOutput[Def][Part] = V;
}

/// Reset an existing vector value for \p Def and a given \p Part.
void reset(VPValue *Def, Value *V, unsigned Part) {
auto Iter = Data.PerPartOutput.find(Def);
Expand Down Expand Up @@ -1376,6 +1385,13 @@ class VPScalarCastRecipe : public VPSingleDefRecipe {

/// Returns the result type of the cast.
Type *getResultType() const { return ResultTy; }

bool onlyFirstLaneUsed(const VPValue *Op) const override {
// At the moment, only uniform codegen is implemented.
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can be pushed separately but only testable with rest of patch?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes exactly.

};

/// A recipe for widening Call instructions.
Expand Down
36 changes: 21 additions & 15 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,11 +279,12 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
Builder.SetCurrentDebugLocation(getDebugLoc());

if (Instruction::isBinaryOp(getOpcode())) {
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
if (Part != 0 && vputils::onlyFirstPartUsed(this))
return State.get(this, 0);
return State.get(this, 0, OnlyFirstLaneUsed);

Value *A = State.get(getOperand(0), Part);
Value *B = State.get(getOperand(1), Part);
Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);
Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);
auto *Res =
Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
if (auto *I = dyn_cast<Instruction>(Res))
Expand Down Expand Up @@ -385,8 +386,8 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
if (Part != 0)
return nullptr;
// First create the compare.
Value *IV = State.get(getOperand(0), Part);
Value *TC = State.get(getOperand(1), Part);
Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true);
Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true);
Value *Cond = Builder.CreateICmpEQ(IV, TC);

// Now create the branch.
Expand All @@ -407,7 +408,7 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
}
case VPInstruction::ComputeReductionResult: {
if (Part != 0)
return State.get(this, 0);
return State.get(this, 0, /*IsScalar*/ true);

// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
// and will be removed by breaking up the recipe further.
Expand All @@ -424,7 +425,7 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
Type *PhiTy = OrigPhi->getType();
VectorParts RdxParts(State.UF);
for (unsigned Part = 0; Part < State.UF; ++Part)
RdxParts[Part] = State.get(LoopExitingDef, Part);
RdxParts[Part] = State.get(LoopExitingDef, Part, PhiR->isInLoop());

// If the vector reduction can be performed in a smaller type, we truncate
// then extend the loop exit value to enable InstCombine to evaluate the
Expand Down Expand Up @@ -512,9 +513,15 @@ void VPInstruction::execute(VPTransformState &State) {
if (!hasResult())
continue;
assert(GeneratedValue && "generateInstruction must produce a value");
State.set(this, GeneratedValue, Part);

bool IsVector = GeneratedValue->getType()->isVectorTy();
State.set(this, GeneratedValue, Part, !IsVector);
assert((IsVector || getOpcode() == VPInstruction::ComputeReductionResult ||
State.VF.isScalar() || vputils::onlyFirstLaneUsed(this)) &&
"scalar value but not only first lane used");
}
}

bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
if (Instruction::isBinaryOp(getOpcode()))
Expand All @@ -530,8 +537,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::BranchOnCount:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add case ComputeReductionResult?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ComputeReductionResult combines the partial reduction vector values, so it should use more than the first lane per operand. It produces a single scalar value though.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh, right. Got confused with onlyFirstLaneDefined.

// TODO: Cover additional operands.
return getOperand(0) == Op;
return true;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is fine, can probably be pushed separately, if testable. (written before the separate push ;-)
CalculateTripCountMinusVF and CanonicalIVIncrementForPart have a single operand, so NFC for them.
ActiveLaneMask and BranchOnCount have two operands, the (first lane of the) IV and the scalar uniform trip count.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Left as is for now, as this only is needed for this patch at the moment.

};
llvm_unreachable("switch should return");
}
Expand Down Expand Up @@ -1344,7 +1350,7 @@ void VPVectorPointerRecipe ::execute(VPTransformState &State) {
PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
}

State.set(this, PartPtr, Part);
State.set(this, PartPtr, Part, /*IsScalar*/ true);
}
}

Expand Down Expand Up @@ -1640,7 +1646,7 @@ void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
EntryPart->addIncoming(Start, VectorPH);
EntryPart->setDebugLoc(getDebugLoc());
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
State.set(this, EntryPart, Part);
State.set(this, EntryPart, Part, /*IsScalar*/ true);
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Expand Down Expand Up @@ -1711,7 +1717,7 @@ void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
#endif

void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
Value *CanonicalIV = State.get(getOperand(0), 0);
Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true);
Type *STy = CanonicalIV->getType();
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
ElementCount VF = State.VF;
Expand Down Expand Up @@ -1801,7 +1807,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi");
EntryPart->insertBefore(HeaderBB->getFirstInsertionPt());
State.set(this, EntryPart, Part);
State.set(this, EntryPart, Part, IsInLoop);
}

BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
Expand Down Expand Up @@ -1833,7 +1839,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
}

for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
Value *EntryPart = State.get(this, Part);
Value *EntryPart = State.get(this, Part, IsInLoop);
// Make sure to add the reduction start value only to the
// first unroll part.
Value *StartVal = (Part == 0) ? StartV : Iden;
Expand Down
12 changes: 6 additions & 6 deletions llvm/lib/Transforms/Vectorize/VPlanValue.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,6 @@ class VPValue {
// for multiple underlying IRs (Polly?) by providing a new VPlan front-end,
// back-end and analysis information for the new IR.

// Set \p Val as the underlying Value of this VPValue.
void setUnderlyingValue(Value *Val) {
assert(!UnderlyingVal && "Underlying Value is already set.");
UnderlyingVal = Val;
}

public:
/// Return the underlying Value attached to this VPValue.
Value *getUnderlyingValue() { return UnderlyingVal; }
Expand Down Expand Up @@ -192,6 +186,12 @@ class VPValue {
/// is a live-in value.
/// TODO: Also handle recipes defined in pre-header blocks.
bool isDefinedOutsideVectorRegions() const { return !hasDefiningRecipe(); }

// Set \p Val as the underlying Value of this VPValue.
void setUnderlyingValue(Value *Val) {
assert(!UnderlyingVal && "Underlying Value is already set.");
UnderlyingVal = Val;
}
};

typedef DenseMap<Value *, VPValue *> Value2VPValueTy;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,10 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
; DATA_NO_LANEMASK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
; DATA_NO_LANEMASK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; DATA_NO_LANEMASK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX]], 1
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; DATA_NO_LANEMASK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; DATA_NO_LANEMASK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT4]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; DATA_NO_LANEMASK-NEXT: br label [[VECTOR_BODY:%.*]]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/LoopVectorize/X86/small-size.ll
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ define void @example2(i32 %n, i32 %x) optsize {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_116]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT17]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY19:%.*]]
; CHECK: vector.body19:
; CHECK: vector.body17:
; CHECK-NEXT: [[INDEX20:%.*]] = phi i64 [ 0, [[VECTOR_PH9]] ], [ [[INDEX_NEXT31:%.*]], [[PRED_STORE_CONTINUE30:%.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[I_0_LCSSA]], [[INDEX20]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT21:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX20]], i64 0
Expand Down