Skip to content

Commit

Permalink
[VPlan] Make sure last IV increment value is available if needed.
Browse files Browse the repository at this point in the history
Legalize extract-from-ends using uniform VPReplicateRecipe of wide
inductions to use regular VPReplicateRecipe, so the correct end value
is available.

Fixes llvm#121745.
  • Loading branch information
fhahn committed Jan 6, 2025
1 parent 3874c64 commit f9369cc
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 19 deletions.
62 changes: 46 additions & 16 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -562,21 +562,63 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step);
}

static SmallVector<VPUser *> collectUsersRecursively(VPValue *V) {
SetVector<VPUser *> Users(V->user_begin(), V->user_end());
for (unsigned I = 0; I != Users.size(); ++I) {
VPRecipeBase *Cur = cast<VPRecipeBase>(Users[I]);
if (isa<VPHeaderPHIRecipe>(Cur))
continue;
for (VPValue *V : Cur->definedValues())
Users.insert(V->user_begin(), V->user_end());
}
return Users.takeVector();
}

/// Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd
/// (IndStart, ScalarIVSteps (0, Step)) if only its scalar values are used, as
/// VPWidenPointerInductionRecipe will generate vectors only. If some users
/// require vectors while other require scalars, the scalar uses need to extract
/// the scalars from the generated vectors (Note that this is different to how
/// int/fp inductions are handled). Also optimize VPWidenIntOrFpInductionRecipe,
/// if any of its users needs scalar values, by providing them scalar steps
/// built on the canonical scalar IV and update the original IV's users. This is
/// an optional optimization to reduce the needs of vector extracts.
/// int/fp inductions are handled). Legalize extract-from-ends using uniform
/// VPReplicateRecipe of wide inductions to use regular VPReplicateRecipe, so
/// the correct end value is available. Also optimize
/// VPWidenIntOrFpInductionRecipe, if any of its users needs scalar values, by
/// providing them scalar steps built on the canonical scalar IV and update the
/// original IV's users. This is an optional optimization to reduce the needs of
/// vector extracts.
static void legalizeAndOptimizeInductions(VPlan &Plan) {
using namespace llvm::VPlanPatternMatch;
SmallVector<VPRecipeBase *> ToRemove;
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
VPBuilder Builder(HeaderVPBB, HeaderVPBB->getFirstNonPhi());
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
auto *PhiR = dyn_cast<VPHeaderPHIRecipe>(&Phi);
if (!PhiR)
break;

// Check if any uniform VPReplicateRecipes using the phi recipe are used by
// ExtractFromEnd. Those must be replaced by a regular VPReplicateRecipe to
// ensure the final value is available.
// TODO: Remove once uniformity analysis is done on VPlan.
for (VPUser *U : collectUsersRecursively(PhiR)) {
auto *ExitIRI = dyn_cast<VPIRInstruction>(U);
VPValue *Op;
if (!ExitIRI || !match(ExitIRI->getOperand(0),
m_VPInstruction<VPInstruction::ExtractFromEnd>(
m_VPValue(Op), m_VPValue())))
continue;
auto *RepR = dyn_cast<VPReplicateRecipe>(Op);
if (!RepR || !RepR->isUniform())
continue;
assert(!RepR->isPredicated() && "RepR must not be predicated");
Instruction *I = RepR->getUnderlyingInstr();
auto *Clone =
new VPReplicateRecipe(I, RepR->operands(), /*IsUniform*/ false);
Clone->insertAfter(RepR);
RepR->replaceAllUsesWith(Clone);
}

// Replace wide pointer inductions which have only their scalars used by
// PtrAdd(IndStart, ScalarIVSteps (0, Step)).
if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) {
Expand Down Expand Up @@ -1086,18 +1128,6 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
return true;
}

static SmallVector<VPUser *> collectUsersRecursively(VPValue *V) {
SetVector<VPUser *> Users(V->user_begin(), V->user_end());
for (unsigned I = 0; I != Users.size(); ++I) {
VPRecipeBase *Cur = cast<VPRecipeBase>(Users[I]);
if (isa<VPHeaderPHIRecipe>(Cur))
continue;
for (VPValue *V : Cur->definedValues())
Users.insert(V->user_begin(), V->user_end());
}
return Users.takeVector();
}

void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
for (VPRecipeBase &R :
Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
Original file line number Diff line number Diff line change
Expand Up @@ -981,7 +981,6 @@ exit:
}

; Test case for https://github.com/llvm/llvm-project/issues/121745.
; FIXME: At the moment an incorrect exit value is used for %iv.next.
define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
; VEC-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification(
; VEC-SAME: ptr [[DST:%.*]]) {
Expand All @@ -994,10 +993,12 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
; VEC: [[VECTOR_BODY]]:
; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VEC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; VEC-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1
; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]]
; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
; VEC-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP2]], align 2
; VEC-NEXT: [[TMP4:%.*]] = add i32 [[STEP_2]], [[TMP0]]
; VEC-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP6]]
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; VEC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8
; VEC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
Expand All @@ -1014,7 +1015,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
; VEC-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8
; VEC-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}}
; VEC: [[E_EXIT]]:
; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ]
; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
; VEC-NEXT: ret i32 [[RES]]
;
; INTERLEAVE-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification(
Expand Down Expand Up @@ -1071,7 +1072,6 @@ e.exit:
ret i32 %res
}

; FIXME: At the moment an incorrect exit value is used for %iv.next.
define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
; VEC-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(
; VEC-SAME: ptr [[DST:%.*]]) {
Expand Down

0 comments on commit f9369cc

Please sign in to comment.