Skip to content

[VPlan] Add new VPScalarCastRecipe, use for IV & step trunc. #78113

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9277,12 +9277,6 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step,
Kind, cast_if_present<BinaryOperator>(FPBinOp));
DerivedIV->setName("offset.idx");
if (TruncResultTy) {
assert(TruncResultTy != DerivedIV->getType() &&
Step->getType()->isIntegerTy() &&
"Truncation requires an integer step");
DerivedIV = State.Builder.CreateTrunc(DerivedIV, TruncResultTy);
}
assert(DerivedIV != CanonicalIV && "IV didn't need transforming?");

State.set(this, DerivedIV, VPIteration(0, 0));
Expand Down
46 changes: 34 additions & 12 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -859,6 +859,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenIntOrFpInductionSC:
case VPRecipeBase::VPWidenPointerInductionSC:
case VPRecipeBase::VPReductionPHISC:
case VPRecipeBase::VPScalarCastSC:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit (independent of this patch): would be good to keep in order.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, will do separately.

return true;
case VPRecipeBase::VPInterleaveSC:
case VPRecipeBase::VPBranchOnMaskSC:
Expand Down Expand Up @@ -1338,6 +1339,34 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
Type *getResultType() const { return ResultTy; }
};

/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
class VPScalarCastRecipe : public VPSingleDefRecipe {
Instruction::CastOps Opcode;

Type *ResultTy;

Value *generate(VPTransformState &State, unsigned Part);

public:
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
: VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}), Opcode(Opcode),
ResultTy(ResultTy) {}

~VPScalarCastRecipe() override = default;

VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)

void execute(VPTransformState &State) override;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif

/// Returns the result type of the cast.
Type *getResultType() const { return ResultTy; }
};

/// A recipe for widening Call instructions.
class VPWidenCallRecipe : public VPSingleDefRecipe {
/// ID of the vector intrinsic to call when widening the call. If set the
Expand Down Expand Up @@ -2254,10 +2283,9 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
}

/// Check if the induction described by \p Kind, /p Start and \p Step is
/// canonical, i.e. has the same start, step (of 1), and type as the
/// canonical IV.
/// canonical, i.e. has the same start and step (of 1) as the canonical IV.
bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start,
VPValue *Step, Type *Ty) const;
VPValue *Step) const;
};

/// A recipe for generating the active lane mask for the vector loop that is
Expand Down Expand Up @@ -2320,10 +2348,6 @@ class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe {
/// an IV with different start and step values, using Start + CanonicalIV *
/// Step.
class VPDerivedIVRecipe : public VPSingleDefRecipe {
/// If not nullptr, the result of the induction will get truncated to
/// TruncResultTy.
Type *TruncResultTy;

/// Kind of the induction.
const InductionDescriptor::InductionKind Kind;
/// If not nullptr, the floating point induction binary operator. Must be set
Expand All @@ -2332,10 +2356,9 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {

public:
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
Type *TruncResultTy)
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
: VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
TruncResultTy(TruncResultTy), Kind(IndDesc.getKind()),
Kind(IndDesc.getKind()),
FPBinOp(dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())) {
}

Expand All @@ -2354,8 +2377,7 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
#endif

Type *getScalarType() const {
return TruncResultTy ? TruncResultTy
: getStartValue()->getLiveInIRValue()->getType();
return getStartValue()->getLiveInIRValue()->getType();
}

VPValue *getStartValue() const { return getOperand(0); }
Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,13 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
return V->getUnderlyingValue()->getType();
})
.Case<VPWidenCastRecipe>(
[](const VPWidenCastRecipe *R) { return R->getResultType(); });
[](const VPWidenCastRecipe *R) { return R->getResultType(); })
.Case<VPScalarCastRecipe>(
[](const VPScalarCastRecipe *R) { return R->getResultType(); })
.Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding Expand SCEV case to inferScalarType is independent of this patch?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is needed to infer the type of the step in the new code in VPlanTransforms.cpp (if (TypeInfo.inferScalarType(BaseIV) != TypeInfo.inferScalarType(Step)) {)).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So now that VPDerivedIVRecipe simply mirrors the type of its first start-value operand, should it join VPCanonicalIVPHIRecipe et al.'s or VPPredInstPHIRecipe et al.'s cases above and remove its getScalarType() method? Possibly as a follow-up.

Should VPWidenIntOrFpInductionRecipe's trunc also utilize VPScalarCastRecipe, and retire its getScalarType(), possibly as a follow-up?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

VPDerivedIVRecipe getScalarType would be good to remove, will do as follow-up.

VPWidenIntOrFpInductionRecipe's trunc drives truncating the start value and step, effectively creating a narrow phi. I'll check to see if this can also be modeled with the new recipe.

return R->getSCEV()->getType();
});

assert(ResultTy && "could not infer type for the given VPValue");
CachedTypes[V] = ResultTy;
return ResultTy;
Expand Down
72 changes: 58 additions & 14 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
switch (getVPDefID()) {
case VPDerivedIVSC:
case VPPredInstPHISC:
case VPScalarCastSC:
return false;
case VPInstructionSC:
switch (cast<VPInstruction>(this)->getOpcode()) {
Expand Down Expand Up @@ -1096,9 +1097,6 @@ void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent,
getCanonicalIV()->printAsOperand(O, SlotTracker);
O << " * ";
getStepValue()->printAsOperand(O, SlotTracker);

if (TruncResultTy)
O << " (truncated to " << *TruncResultTy << ")";
}
#endif

Expand All @@ -1117,13 +1115,7 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {

// Ensure step has the same type as that of scalar IV.
Type *BaseIVTy = BaseIV->getType()->getScalarType();
if (BaseIVTy != Step->getType()) {
// TODO: Also use VPDerivedIVRecipe when only the step needs truncating, to
// avoid separate truncate here.
assert(Step->getType()->isIntegerTy() &&
"Truncation requires an integer step");
Step = State.Builder.CreateTrunc(Step, BaseIVTy);
}
assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");

// We build scalar steps for both integer and floating-point induction
// variables. Here, we determine the kind of arithmetic we will perform.
Expand Down Expand Up @@ -1469,6 +1461,58 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif

/// Checks if \p C is uniform across all VFs and UFs. It is considered as such
/// if it is either defined outside the vector region or its operand is known to
/// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI).
/// TODO: Uniformity should be associated with a VPValue and there should be a
/// generic way to check.
static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth documenting, potentially leaving behind a note that uniformity should essentially be associated with VPValues.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added comment + TODO, thanks!

return C->isDefinedOutsideVectorRegions() ||
isa<VPDerivedIVRecipe>(C->getOperand(0)) ||
isa<VPCanonicalIVPHIRecipe>(C->getOperand(0));
}

Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
assert(vputils::onlyFirstLaneUsed(this) &&
"Codegen only implemented for first lane.");
switch (Opcode) {
case Instruction::SExt:
case Instruction::ZExt:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only Trunc is currently being used, still.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, could remove if preferred, but State.Builder.CreateCast generically supports any cast

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good to either remove or make a note that SExt and ZExt are currently unused/dead cases.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a note, thanks!

case Instruction::Trunc: {
// Note: SExt/ZExt not used yet.
Value *Op = State.get(getOperand(0), VPIteration(Part, 0));
return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
}
default:
llvm_unreachable("opcode not implemented yet");
}
}

void VPScalarCastRecipe ::execute(VPTransformState &State) {
bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this);
for (unsigned Part = 0; Part != State.UF; ++Part) {
Value *Res;
// Only generate a single instance, if the recipe is uniform across UFs and
// VFs.
if (Part > 0 && IsUniformAcrossVFsAndUFs)
Res = State.get(this, VPIteration(0, 0));
else
Res = generate(State, Part);
State.set(this, Res, VPIteration(Part, 0));
}
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "SCALAR-CAST ";
printAsOperand(O, SlotTracker);
O << " = " << Instruction::getOpcodeName(Opcode) << " ";
printOperands(O, SlotTracker);
O << " to " << *ResultTy;
}
#endif

void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
assert(State.Instance && "Branch on Mask works only on single instance.");

Expand Down Expand Up @@ -1587,10 +1631,10 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
#endif

bool VPCanonicalIVPHIRecipe::isCanonical(
InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step,
Type *Ty) const {
// The types must match and it must be an integer induction.
if (Ty != getScalarType() || Kind != InductionDescriptor::IK_IntInduction)
InductionDescriptor::InductionKind Kind, VPValue *Start,
VPValue *Step) const {
// Must be an integer induction.
if (Kind != InductionDescriptor::IK_IntInduction)
return false;
// Start must match the start value of this canonical induction.
if (Start != getStartValue())
Expand Down
44 changes: 33 additions & 11 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,17 +491,39 @@ void VPlanTransforms::removeDeadRecipes(VPlan &Plan) {

static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
ScalarEvolution &SE, Instruction *TruncI,
Type *IVTy, VPValue *StartV,
VPValue *Step) {
VPValue *StartV, VPValue *Step) {
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
auto IP = HeaderVPBB->getFirstNonPhi();
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
Type *TruncTy = TruncI ? TruncI->getType() : IVTy;
VPValue *BaseIV = CanonicalIV;
if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step, TruncTy)) {
BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step,
TruncI ? TruncI->getType() : nullptr);
HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP);
VPSingleDefRecipe *BaseIV = CanonicalIV;
if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step)) {
BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step);
HeaderVPBB->insert(BaseIV, IP);
}

// Truncate base induction if needed.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  VPTypeAnalysis TypeInfo(SE.getContext());
  Type *ResultTy = TypeInfo.inferScalarType(BaseIV);

both BaseIV and Step are subject to redefinition and truncation, perhaps aim to define the type of the result which they may truncate to.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks! (missed originally)

VPTypeAnalysis TypeInfo(SE.getContext());
Type *ResultTy = TypeInfo.inferScalarType(BaseIV);
if (TruncI) {
Type *TruncTy = TruncI->getType();
assert(ResultTy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits() &&
"Not truncating.");
assert(ResultTy->isIntegerTy() && "Truncation requires an integer type");
BaseIV = new VPScalarCastRecipe(Instruction::Trunc, BaseIV, TruncTy);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, ResultTy should be reset to TruncTy here, so that Step will be truncated to the updated type of BaseIV, rather than its original type. (Or reset right after the asserts, so that both truncations use ResultTy.) Wonder about test coverage if all pass?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Argh, sent the update too early (or too late in the day)! Should be fixed now., there were a number of test failures.

HeaderVPBB->insert(BaseIV, IP);
ResultTy = TruncTy;
}

// Truncate step if needed.
Type *StepTy = TypeInfo.inferScalarType(Step);
if (ResultTy != StepTy) {
assert(StepTy->getScalarSizeInBits() > ResultTy->getScalarSizeInBits() &&
"Not truncating.");
assert(StepTy->isIntegerTy() && "Truncation requires an integer type");
Step = new VPScalarCastRecipe(Instruction::Trunc, Step, ResultTy);
auto *VecPreheader =
cast<VPBasicBlock>(HeaderVPBB->getSingleHierarchicalPredecessor());
VecPreheader->appendRecipe(Step->getDefiningRecipe());
}

VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step);
Expand All @@ -523,9 +545,9 @@ void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
continue;

const InductionDescriptor &ID = WideIV->getInductionDescriptor();
VPValue *Steps = createScalarIVSteps(
Plan, ID, SE, WideIV->getTruncInst(), WideIV->getPHINode()->getType(),
WideIV->getStartValue(), WideIV->getStepValue());
VPValue *Steps =
createScalarIVSteps(Plan, ID, SE, WideIV->getTruncInst(),
WideIV->getStartValue(), WideIV->getStepValue());

// Update scalar users of IV to use Step instead.
if (!HasOnlyVectorVFs)
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/VPlanValue.h
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ class VPDef {
VPInterleaveSC,
VPReductionSC,
VPReplicateSC,
VPScalarCastSC,
VPScalarIVStepsSC,
VPVectorPointerSC,
VPWidenCallSC,
Expand Down
4 changes: 3 additions & 1 deletion llvm/test/Transforms/LoopVectorize/cast-induction.ll
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,14 @@ define void @cast_variable_step(i64 %step) {
; VF4: middle.block:
;
; IC2-LABEL: @cast_variable_step(
; IC2: [[TRUNC_STEP:%.+]] = trunc i64 %step to i32
; IC2: br label %vector.body

; IC2-LABEL: vector.body:
; IC2-NEXT: [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ]
; IC2-NEXT: [[MUL:%.+]] = mul i64 %index, %step
; IC2-NEXT: [[OFFSET_IDX:%.+]] = add i64 10, [[MUL]]
; IC2-NEXT: [[TRUNC_OFF:%.+]] = trunc i64 [[OFFSET_IDX]] to i32
; IC2-NEXT: [[TRUNC_STEP:%.+]] = trunc i64 %step to i32
; IC2-NEXT: [[STEP0:%.+]] = mul i32 0, [[TRUNC_STEP]]
; IC2-NEXT: [[T0:%.+]] = add i32 [[TRUNC_OFF]], [[STEP0]]
; IC2-NEXT: [[STEP1:%.+]] = mul i32 1, [[TRUNC_STEP]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,14 +184,15 @@ exit:
; DBG-NEXT: No successors
; DBG-EMPTY:
; DBG-NEXT: vector.ph:
; DBG-NEXT: SCALAR-CAST vp<[[CAST:%.+]]> = trunc ir<1> to i32
; DBG-NEXT: Successor(s): vector loop
; DBG-EMPTY:
; DBG-NEXT: <x1> vector loop: {
; DBG-NEXT: vector.body:
; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; DBG-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<[[SCALAR_STEPS:.+]]>
; DBG-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<1> (truncated to i32)
; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1>
; DBG-NEXT: SCALAR-CAST vp<[[TRUNC_IV:%.+]]> = trunc vp<[[CAN_IV]]> to i32
; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[TRUNC_IV]]>, vp<[[CAST]]>
; DBG-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%for>, vp<[[SCALAR_STEPS]]>
; DBG-NEXT: CLONE store vp<[[SPLICE]]>, ir<%dst>
; DBG-NEXT: EMIT vp<[[IV_INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ define void @test(i16 %x, i64 %y, ptr %ptr) {
; CHECK-NEXT: [[V3:%.*]] = add i8 [[V2]], 1
; CHECK-NEXT: [[CMP15:%.*]] = icmp slt i8 [[V3]], 5
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[INC]]
; CHECK-NEXT: br i1 [[CMP15]], label [[LOOP]], label [[LOOP_EXIT]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK-NEXT: br i1 [[CMP15]], label [[LOOP]], label [[LOOP_EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: loop.exit:
; CHECK-NEXT: [[DIV_1:%.*]] = udiv i64 [[Y]], [[ADD]]
; CHECK-NEXT: [[V1:%.*]] = add i64 [[DIV_1]], 1
Expand Down