Skip to content

[LV, VP]VP intrinsics support for the Loop Vectorizer + adding new tail-folding mode using EVL. #76172

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,10 @@ enum class TailFoldingStyle {
/// Use predicate to control both data and control flow, but modify
/// the trip count so that a runtime overflow check can be avoided
/// and such that the scalar epilogue loop can always be removed.
DataAndControlFlowWithoutRuntimeCheck
DataAndControlFlowWithoutRuntimeCheck,
/// Use predicated EVL instructions for tail-folding.
/// Indicates that VP intrinsics should be used.
DataWithEVL,
};

struct TailFoldingInfo {
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,10 @@ RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
return TTI::TCC_Free;
}

bool RISCVTTIImpl::hasActiveVectorLength(unsigned, Type *DataTy, Align) const {
return ST->hasVInstructions();
}

TargetTransformInfo::PopcntSupportKind
RISCVTTIImpl::getPopcntSupport(unsigned TyWidth) {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
Expand Down
16 changes: 16 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,22 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind);

/// \name EVL Support for predicated vectorization.
/// Whether the target supports the %evl parameter of VP intrinsic efficiently
/// in hardware, for the given opcode and type/alignment. (see LLVM Language
/// Reference - "Vector Predication Intrinsics",
/// https://llvm.org/docs/LangRef.html#vector-predication-intrinsics and
/// "IR-level VP intrinsics",
/// https://llvm.org/docs/Proposals/VectorPredication.html#ir-level-vp-intrinsics).
/// \param Opcode the opcode of the instruction checked for predicated version
/// support.
/// \param DataType the type of the instruction with the \p Opcode checked for
/// prediction support.
/// \param Alignment the alignment for memory access operation checked for
/// predicated version support.
bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
Align Alignment) const;

TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth);

bool shouldExpandReduction(const IntrinsicInst *II) const;
Expand Down
205 changes: 180 additions & 25 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/VectorBuilder.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
Expand Down Expand Up @@ -248,10 +249,12 @@ static cl::opt<TailFoldingStyle> ForceTailFoldingStyle(
clEnumValN(TailFoldingStyle::DataAndControlFlow, "data-and-control",
"Create lane mask using active.lane.mask intrinsic, and use "
"it for both data and control flow"),
clEnumValN(
TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck,
"data-and-control-without-rt-check",
"Similar to data-and-control, but remove the runtime check")));
clEnumValN(TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck,
"data-and-control-without-rt-check",
"Similar to data-and-control, but remove the runtime check"),
clEnumValN(TailFoldingStyle::DataWithEVL, "data-with-evl",
"Use predicated EVL instructions for tail folding. If EVL "
"is unsupported, fallback to data-without-lane-mask.")));

static cl::opt<bool> MaximizeBandwidth(
"vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden,
Expand Down Expand Up @@ -1505,29 +1508,62 @@ class LoopVectorizationCostModel {

/// Returns the TailFoldingStyle that is best for the current loop.
TailFoldingStyle getTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
return IVUpdateMayOverflow ? ChosenTailFoldingStyle.first
: ChosenTailFoldingStyle.second;
if (!ChosenTailFoldingStyle)
return TailFoldingStyle::None;
return IVUpdateMayOverflow ? ChosenTailFoldingStyle->first
: ChosenTailFoldingStyle->second;
}

/// Selects and saves TailFoldingStyle for 2 options - if IV update may
/// overflow or not.
void setTailFoldingStyles() {
assert(ChosenTailFoldingStyle.first == TailFoldingStyle::None &&
ChosenTailFoldingStyle.second == TailFoldingStyle::None &&
"Tail folding must not be selected yet.");
if (!Legal->prepareToFoldTailByMasking())
/// \param IsScalableVF true if scalable vector factors enabled.
/// \param UserIC User specific interleave count.
void setTailFoldingStyles(bool IsScalableVF, unsigned UserIC) {
assert(!ChosenTailFoldingStyle && "Tail folding must not be selected yet.");
if (!Legal->prepareToFoldTailByMasking()) {
ChosenTailFoldingStyle =
std::make_pair(TailFoldingStyle::None, TailFoldingStyle::None);
return;
}

if (ForceTailFoldingStyle.getNumOccurrences()) {
ChosenTailFoldingStyle.first = ChosenTailFoldingStyle.second =
ForceTailFoldingStyle;
if (!ForceTailFoldingStyle.getNumOccurrences()) {
ChosenTailFoldingStyle = std::make_pair(
TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/true),
TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/false));
return;
}

ChosenTailFoldingStyle.first =
TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/true);
ChosenTailFoldingStyle.second =
TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/false);
// Set styles when forced.
ChosenTailFoldingStyle = std::make_pair(ForceTailFoldingStyle.getValue(),
ForceTailFoldingStyle.getValue());
if (ForceTailFoldingStyle != TailFoldingStyle::DataWithEVL)
return;
// Override forced styles if needed.
// FIXME: use actual opcode/data type for analysis here.
// FIXME: Investigate opportunity for fixed vector factor.
bool EVLIsLegal =
IsScalableVF && UserIC <= 1 &&
TTI.hasActiveVectorLength(0, nullptr, Align()) &&
!EnableVPlanNativePath &&
// FIXME: implement support for max safe dependency distance.
Legal->isSafeForAnyVectorWidth() &&
// FIXME: remove this once reductions are supported.
Legal->getReductionVars().empty();
if (!EVLIsLegal) {
// If for some reason EVL mode is unsupported, fallback to
// DataWithoutLaneMask to try to vectorize the loop with folded tail
// in a generic way.
ChosenTailFoldingStyle =
std::make_pair(TailFoldingStyle::DataWithoutLaneMask,
TailFoldingStyle::DataWithoutLaneMask);
LLVM_DEBUG(
dbgs()
<< "LV: Preference for VP intrinsics indicated. Will "
"not try to generate VP Intrinsics "
<< (UserIC > 1
? "since interleave count specified is greater than 1.\n"
: "due to non-interleaving reasons.\n"));
}
}

/// Returns true if all loop blocks should be masked to fold tail loop.
Expand All @@ -1544,6 +1580,18 @@ class LoopVectorizationCostModel {
return foldTailByMasking() || Legal->blockNeedsPredication(BB);
}

/// Returns true if VP intrinsics with explicit vector length support should
/// be generated in the tail folded loop.
bool foldTailWithEVL() const {
return getTailFoldingStyle() == TailFoldingStyle::DataWithEVL &&
// FIXME: remove this once vp_reverse is supported.
none_of(
WideningDecisions,
[](const std::pair<std::pair<Instruction *, ElementCount>,
std::pair<InstWidening, InstructionCost>>
&Data) { return Data.second.first == CM_Widen_Reverse; });
}

/// Returns true if the Phi is part of an inloop reduction.
bool isInLoopReduction(PHINode *Phi) const {
return InLoopReductions.contains(Phi);
Expand Down Expand Up @@ -1688,8 +1736,8 @@ class LoopVectorizationCostModel {

/// Control finally chosen tail folding style. The first element is used if
/// the IV update may overflow, the second element - if it does not.
std::pair<TailFoldingStyle, TailFoldingStyle> ChosenTailFoldingStyle =
std::make_pair(TailFoldingStyle::None, TailFoldingStyle::None);
std::optional<std::pair<TailFoldingStyle, TailFoldingStyle>>
ChosenTailFoldingStyle;

/// A map holding scalar costs for different vectorization factors. The
/// presence of a cost for an instruction in the mapping indicates that the
Expand Down Expand Up @@ -4647,9 +4695,24 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
// found modulo the vectorization factor is not zero, try to fold the tail
// by masking.
// FIXME: look for a smaller MaxVF that does divide TC rather than masking.
setTailFoldingStyles();
if (foldTailByMasking())
setTailFoldingStyles(MaxFactors.ScalableVF.isScalable(), UserIC);
if (foldTailByMasking()) {
if (getTailFoldingStyle() == TailFoldingStyle::DataWithEVL) {
LLVM_DEBUG(
dbgs()
<< "LV: tail is folded with EVL, forcing unroll factor to be 1. Will "
"try to generate VP Intrinsics with scalable vector "
"factors only.\n");
// Tail folded loop using VP intrinsics restricts the VF to be scalable
// for now.
// TODO: extend it for fixed vectors, if required.
assert(MaxFactors.ScalableVF.isScalable() &&
"Expected scalable vector factor.");

MaxFactors.FixedVF = ElementCount::getFixed(1);
}
return MaxFactors;
}

// If there was a tail-folding hint/switch, but we can't fold the tail by
// masking, fallback to a vectorization with a scalar epilogue.
Expand Down Expand Up @@ -5257,6 +5320,13 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
if (!isScalarEpilogueAllowed())
return 1;

// Do not interleave if EVL is preferred and no User IC is specified.
if (foldTailWithEVL()) {
LLVM_DEBUG(dbgs() << "LV: Preference for VP intrinsics indicated. "
"Unroll factor forced to be 1.\n");
return 1;
}

// We used the distance for the interleave count.
if (!Legal->isSafeForAnyVectorWidth())
return 1;
Expand Down Expand Up @@ -8487,6 +8557,9 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
VPlanTransforms::truncateToMinimalBitwidths(
*Plan, CM.getMinimalBitwidths(), PSE.getSE()->getContext());
VPlanTransforms::optimize(*Plan, *PSE.getSE());
// TODO: try to put it close to addActiveLaneMask().
if (CM.foldTailWithEVL())
VPlanTransforms::addExplicitVectorLength(*Plan);
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
VPlans.push_back(std::move(Plan));
}
Expand Down Expand Up @@ -9179,7 +9252,7 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
State.Builder.setFastMathFlags(FPBinOp->getFastMathFlags());

Value *Step = State.get(getStepValue(), VPIteration(0, 0));
Value *CanonicalIV = State.get(getCanonicalIV(), VPIteration(0, 0));
Value *CanonicalIV = State.get(getOperand(1), VPIteration(0, 0));
Value *DerivedIV = emitTransformedIndex(
State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step,
Kind, cast_if_present<BinaryOperator>(FPBinOp));
Expand Down Expand Up @@ -9307,6 +9380,52 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
State.ILV->scalarizeInstruction(UI, this, VPIteration(Part, Lane), State);
}

/// Creates either vp_store or vp_scatter intrinsics calls to represent
/// predicated store/scatter.
static Instruction *
lowerStoreUsingVectorIntrinsics(IRBuilderBase &Builder, Value *Addr,
Value *StoredVal, bool IsScatter, Value *Mask,
Value *EVL, const Align &Alignment) {
CallInst *Call;
if (IsScatter) {
Call = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
Intrinsic::vp_scatter,
{StoredVal, Addr, Mask, EVL});
} else {
VectorBuilder VBuilder(Builder);
VBuilder.setEVL(EVL).setMask(Mask);
Call = cast<CallInst>(VBuilder.createVectorInstruction(
Instruction::Store, Type::getVoidTy(EVL->getContext()),
{StoredVal, Addr}));
}
Call->addParamAttr(
1, Attribute::getWithAlignment(Call->getContext(), Alignment));
return Call;
}

/// Creates either vp_load or vp_gather intrinsics calls to represent
/// predicated load/gather.
static Instruction *lowerLoadUsingVectorIntrinsics(IRBuilderBase &Builder,
VectorType *DataTy,
Value *Addr, bool IsGather,
Value *Mask, Value *EVL,
const Align &Alignment) {
CallInst *Call;
if (IsGather) {
Call =
Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
nullptr, "wide.masked.gather");
} else {
VectorBuilder VBuilder(Builder);
VBuilder.setEVL(EVL).setMask(Mask);
Call = cast<CallInst>(VBuilder.createVectorInstruction(
Instruction::Load, DataTy, Addr, "vp.op.load"));
}
Call->addParamAttr(
0, Attribute::getWithAlignment(Call->getContext(), Alignment));
return Call;
}

void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
VPValue *StoredValue = isStore() ? getStoredValue() : nullptr;

Expand Down Expand Up @@ -9345,7 +9464,25 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
for (unsigned Part = 0; Part < State.UF; ++Part) {
Instruction *NewSI = nullptr;
Value *StoredVal = State.get(StoredValue, Part);
if (CreateGatherScatter) {
// TODO: split this into several classes for better design.
if (State.EVL) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still accessing EVL (as per https://reviews.llvm.org/D99750?id=558054#inline-1551722).

To move forward and make progress, I think this would suggest to leave as is now, with a TODO to model this explicitly in the recipe.

I am planning on splitting up the memory recipes soon now that address generation has been moved.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to check that EVL is accessible, cannot remove it now

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a plan to remove it later? This complication of a recipe's execute() goes against VPlan's refactoring guideline, and complicates the abovementioned plan to split up these overly complex recipes. (See "Simplify VPlan execution" in https://llvm.org/devmtg/2023-10/slides/techtalks/Hahn-VPlan-StatusUpdateAndRoadmap.pdf#page=32)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Above was raised earlier in
https://reviews.llvm.org/D99750#inline-1551721

Recipes should strive to have straightforward code-gen as much as possible (contrary to "smart vector instructions/vp intrinsics emission" of the 2nd bullet in the summary's Tentative Development Roadmap).
This is already challenged by the existing (non-EVL) VPWidenMemoryInstructionRecipe::execute.

Design dedicated recipe(s) for widening memory instructions under EVL, and introduce them instead of the existing non-EVL recipes, preferably as a VPlan-to-VPlan transformation, rather than try to fit everything here, and potentially elsewhere?
Also discussed below in https://reviews.llvm.org/D99750#inline-967127, and iinm in earlier revisions.

Why? I t already handles masking, why it should not be extended with the handling of EVL? New recipe will not add anything new here, just will be copy/paste of the existing recipes.

Reason explained above: execute() of recipes should be straightforward.
This is one of the main guidelines outlined in the VPlan roadmap.
This recipe is getting too complicated, should probably separate gather/scatter from wide load/store, and separate the pointer setting (as in [VPlan] Model address separately. #72164), independent of this patch.
Recipe should indicate statically if EVL is used or not, to simplify code-gen and facilitate cost estimation, rather than having to check State.EVL during execute().
If multiple recipes share some common core, it can be shared via a common base class, as in VPHeaderPHIRecipe and VPRecipeWithIRFlags.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Above was raised earlier in https://reviews.llvm.org/D99750#inline-1551721

Recipes should strive to have straightforward code-gen as much as possible (contrary to "smart vector instructions/vp intrinsics emission" of the 2nd bullet in the summary's Tentative Development Roadmap).
This is already challenged by the existing (non-EVL) VPWidenMemoryInstructionRecipe::execute.
Design dedicated recipe(s) for widening memory instructions under EVL, and introduce them instead of the existing non-EVL recipes, preferably as a VPlan-to-VPlan transformation, rather than try to fit everything here, and potentially elsewhere?
Also discussed below in https://reviews.llvm.org/D99750#inline-967127, and iinm in earlier revisions.

Why? I t already handles masking, why it should not be extended with the handling of EVL? New recipe will not add anything new here, just will be copy/paste of the existing recipes.

Reason explained above: execute() of recipes should be straightforward. This is one of the main guidelines outlined in the VPlan roadmap. This recipe is getting too complicated, should probably separate gather/scatter from wide load/store, and separate the pointer setting (as in [VPlan] Model address separately. #72164), independent of this patch. Recipe should indicate statically if EVL is used or not, to simplify code-gen and facilitate cost estimation, rather than having to check State.EVL during execute(). If multiple recipes share some common core, it can be shared via a common base class, as in VPHeaderPHIRecipe and VPRecipeWithIRFlags.

Do you want me to spend another year right now discussing the design of this new class?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned earlier, I think we are in agreement that the recipe should be split up (also independent of EVL support). It's something I am planning to get around to do sometime soon, but there are a number of other changes I would like to wrap up first.

So I don't think we need to block the current patch on the refactoring, but leave a TODO to split up the recipe in general.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @fhahn for answering the "Is there a plan to remove it later?" question, constructively! The excepts from D99750 (w/o response there, sigh) were provided as context.

assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
"explicit vector length.");
assert(cast<VPInstruction>(State.EVL)->getOpcode() ==
VPInstruction::ExplicitVectorLength &&
"EVL must be VPInstruction::ExplicitVectorLength.");
Value *EVL = State.get(State.EVL, VPIteration(0, 0));
// If EVL is not nullptr, then EVL must be a valid value set during plan
// creation, possibly default value = whole vector register length. EVL
// is created only if TTI prefers predicated vectorization, thus if EVL
// is not nullptr it also implies preference for predicated
// vectorization.
// FIXME: Support reverse store after vp_reverse is added.
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this exercised or currently dead code? Additional occurrences below. It treats all-true masks as unmasked, i.e., an absent mask operand passes a null MaskPart, and should be used as argued below.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For consecutive non-masked loads it returns nullptr, VectorBuilder then handles it and generates all-true mask.

NewSI = lowerStoreUsingVectorIntrinsics(
Builder, State.get(getAddr(), Part, !CreateGatherScatter),
StoredVal, CreateGatherScatter, MaskPart, EVL, Alignment);
} else if (CreateGatherScatter) {
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
Value *VectorGep = State.get(getAddr(), Part);
NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
Expand Down Expand Up @@ -9375,7 +9512,25 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
State.setDebugLocFrom(getDebugLoc());
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *NewLI;
if (CreateGatherScatter) {
// TODO: split this into several classes for better design.
if (State.EVL) {
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
"explicit vector length.");
assert(cast<VPInstruction>(State.EVL)->getOpcode() ==
VPInstruction::ExplicitVectorLength &&
"EVL must be VPInstruction::ExplicitVectorLength.");
Value *EVL = State.get(State.EVL, VPIteration(0, 0));
// If EVL is not nullptr, then EVL must be a valid value set during plan
// creation, possibly default value = whole vector register length. EVL
// is created only if TTI prefers predicated vectorization, thus if EVL
// is not nullptr it also implies preference for predicated
// vectorization.
// FIXME: Support reverse loading after vp_reverse is added.
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
NewLI = lowerLoadUsingVectorIntrinsics(
Builder, DataTy, State.get(getAddr(), Part, !CreateGatherScatter),
CreateGatherScatter, MaskPart, EVL, Alignment);
} else if (CreateGatherScatter) {
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
Value *VectorGep = State.get(getAddr(), Part);
NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart,
Expand Down
16 changes: 9 additions & 7 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -871,13 +871,15 @@ void VPlan::execute(VPTransformState *State) {
// only a single part is generated, which provides the last part from the
// previous iteration. For non-ordered reductions all UF parts are
// generated.
bool SinglePartNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR) ||
isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) ||
(isa<VPReductionPHIRecipe>(PhiR) &&
cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
bool NeedsScalar = isa<VPCanonicalIVPHIRecipe>(PhiR) ||
(isa<VPReductionPHIRecipe>(PhiR) &&
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
bool SinglePartNeeded =
isa<VPCanonicalIVPHIRecipe>(PhiR) ||
isa<VPFirstOrderRecurrencePHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
(isa<VPReductionPHIRecipe>(PhiR) &&
cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
bool NeedsScalar =
isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
(isa<VPReductionPHIRecipe>(PhiR) &&
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;

for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
Expand Down
Loading