[LV][VPlan] Use VF VPValue in VPVectorPointerRecipe #110974

arcbbb · 2024-10-03T09:16:03Z

Refactors VPVectorPointerRecipe to use the VF VPValue to obtain the runtime VF, similar to #95305.

Since only reverse vector pointers require the runtime VF, the patch sets VPUnrollPart::PartOpIndex to 1 for vector pointers and 2 for reverse vector pointers. As a result, the generation of reverse vector pointers is moved into a separate recipe.

llvmbot · 2024-10-03T09:16:36Z

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-risc-v

Author: Shih-Po Hung (arcbbb)

Changes

Refactors VPVectorPointerRecipe to use the VF VPValue to obtain the runtime VF, similar to #95305.

Since only reverse vector pointers require the runtime VF, the patch sets VPUnrollPart::PartOpIndex to 1 for vector pointers and 2 for reverse vector pointers. As a result, the generation of reverse vector pointers is moved into a separate recipe.

Patch is 48.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/110974.diff

11 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+10-3)
(modified) llvm/lib/Transforms/Vectorize/VPlan.h (+55-8)
(modified) llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (+4-3)
(modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+43-22)
(modified) llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp (+2-2)
(modified) llvm/lib/Transforms/Vectorize/VPlanValue.h (+1)
(modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll (+68-64)
(modified) llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll (+56-64)
(modified) llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll (+2-4)
(modified) llvm/test/Transforms/LoopVectorize/reverse_induction.ll (+12-12)
(modified) llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll (+3-2)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6e082b1c134dee..220625908dff9e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4442,6 +4442,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
       case VPDef::VPInstructionSC:
       case VPDef::VPCanonicalIVPHISC:
       case VPDef::VPVectorPointerSC:
+      case VPDef::VPReverseVectorPointerSC:
       case VPDef::VPExpandSCEVSC:
       case VPDef::VPEVLBasedIVPHISC:
       case VPDef::VPPredInstPHISC:
@@ -8160,9 +8161,15 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
   if (Consecutive) {
     auto *GEP = dyn_cast<GetElementPtrInst>(
         Ptr->getUnderlyingValue()->stripPointerCasts());
-    auto *VectorPtr = new VPVectorPointerRecipe(
-        Ptr, getLoadStoreType(I), Reverse, GEP ? GEP->isInBounds() : false,
-        I->getDebugLoc());
+    VPSingleDefRecipe *VectorPtr;
+    if (Reverse)
+      VectorPtr = new VPReverseVectorPointerRecipe(
+          Ptr, &Plan.getVF(), getLoadStoreType(I),
+          GEP ? GEP->isInBounds() : false, I->getDebugLoc());
+    else
+      VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
+                                            GEP ? GEP->isInBounds() : false,
+                                            I->getDebugLoc());
     Builder.getInsertBlock()->appendRecipe(VectorPtr);
     Ptr = VectorPtr;
   }
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 594492344d43ce..24e066753f9da4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -882,6 +882,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
     case VPRecipeBase::VPReplicateSC:
     case VPRecipeBase::VPScalarIVStepsSC:
     case VPRecipeBase::VPVectorPointerSC:
+    case VPRecipeBase::VPReverseVectorPointerSC:
     case VPRecipeBase::VPWidenCallSC:
     case VPRecipeBase::VPWidenCanonicalIVSC:
     case VPRecipeBase::VPWidenCastSC:
@@ -1078,6 +1079,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
            R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
            R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
            R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
+           R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
            R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
   }
 
@@ -1785,20 +1787,65 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
 #endif
 };
 
+/// A recipe to compute the pointers for widened memory accesses of IndexTy
+/// in reverse order per part.
+class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
+                                     public VPUnrollPartAccessor<2> {
+  Type *IndexedTy;
+
+public:
+  VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy,
+                               bool IsInBounds, DebugLoc DL)
+      : VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
+                            ArrayRef<VPValue *>({Ptr, VF}),
+                            GEPFlagsTy(IsInBounds), DL),
+        IndexedTy(IndexedTy) {}
+
+  VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
+
+  VPValue *getVFValue() { return getOperand(1); }
+  const VPValue *getVFValue() const { return getOperand(1); }
+
+  void execute(VPTransformState &State) override;
+
+  bool onlyFirstLaneUsed(const VPValue *Op) const override {
+    assert(is_contained(operands(), Op) &&
+           "Op must be an operand of the recipe");
+    return true;
+  }
+
+  /// Returns true if the recipe only uses the first part of operand \p Op.
+  bool onlyFirstPartUsed(const VPValue *Op) const override {
+    assert(is_contained(operands(), Op) &&
+           "Op must be an operand of the recipe");
+    assert(getNumOperands() <= 2 && "must have at most two operands");
+    return true;
+  }
+
+  VPReverseVectorPointerRecipe *clone() override {
+    return new VPReverseVectorPointerRecipe(
+        getOperand(0), getVFValue(), IndexedTy, isInBounds(), getDebugLoc());
+  }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  /// Print the recipe.
+  void print(raw_ostream &O, const Twine &Indent,
+             VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
 /// A recipe to compute the pointers for widened memory accesses of IndexTy for
-/// all parts. If IsReverse is true, compute pointers for accessing the input in
-/// reverse order per part.
+/// all parts.
 class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
                               public VPUnrollPartAccessor<1> {
   Type *IndexedTy;
-  bool IsReverse;
 
 public:
-  VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
-                        bool IsInBounds, DebugLoc DL)
+  VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsInBounds,
+                        DebugLoc DL)
       : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
                             GEPFlagsTy(IsInBounds), DL),
-        IndexedTy(IndexedTy), IsReverse(IsReverse) {}
+        IndexedTy(IndexedTy) {}
 
   VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
 
@@ -1819,8 +1866,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
   }
 
   VPVectorPointerRecipe *clone() override {
-    return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,
-                                     isInBounds(), getDebugLoc());
+    return new VPVectorPointerRecipe(getOperand(0), IndexedTy, isInBounds(),
+                                     getDebugLoc());
   }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 277df0637372d8..7bfbcc3dff0e1f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -261,9 +261,10 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
               [](const auto *R) { return R->getScalarType(); })
           .Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
                 VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
-                VPWidenCanonicalIVRecipe>([this](const VPRecipeBase *R) {
-            return inferScalarType(R->getOperand(0));
-          })
+                VPReverseVectorPointerRecipe, VPWidenCanonicalIVRecipe>(
+              [this](const VPRecipeBase *R) {
+                return inferScalarType(R->getOperand(0));
+              })
           .Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
                 VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
                 VPWidenSelectRecipe>(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 0d092b9c10acc8..bcf7b6d9b64c24 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1813,6 +1813,46 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
 }
 #endif
 
+void VPReverseVectorPointerRecipe ::execute(VPTransformState &State) {
+  auto &Builder = State.Builder;
+  State.setDebugLocFrom(getDebugLoc());
+  unsigned CurrentPart = getUnrollPart(*this);
+  // Use i32 for the gep index type when the value is constant,
+  // or query DataLayout for a more suitable index type otherwise.
+  const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
+  Type *IndexTy = State.VF.isScalable()
+                      ? DL.getIndexType(IndexedTy->getPointerTo())
+                      : Builder.getInt32Ty();
+  Value *Ptr = State.get(getOperand(0), VPLane(0));
+  bool InBounds = isInBounds();
+
+  // the wide store needs to start at the last vector element.
+  // RunTimeVF =  VScale * VF.getKnownMinValue()
+  // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
+  Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
+  if (IndexTy != RunTimeVF->getType())
+    RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
+  // NumElt = -CurrentPart * RunTimeVF
+  Value *NumElt = Builder.CreateMul(
+      ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
+  // LastLane = 1 - RunTimeVF
+  Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
+  Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
+  ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
+
+  State.set(this, ResultPtr, /*IsScalar*/ true);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
+                                         VPSlotTracker &SlotTracker) const {
+  O << Indent;
+  printAsOperand(O, SlotTracker);
+  O << " = reverse-vector-pointer ";
+  printOperands(O, SlotTracker);
+}
+#endif
+
 void VPVectorPointerRecipe ::execute(VPTransformState &State) {
   auto &Builder = State.Builder;
   State.setDebugLocFrom(getDebugLoc());
@@ -1820,31 +1860,14 @@ void VPVectorPointerRecipe ::execute(VPTransformState &State) {
   // Use i32 for the gep index type when the value is constant,
   // or query DataLayout for a more suitable index type otherwise.
   const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
-  Type *IndexTy = State.VF.isScalable() && (IsReverse || CurrentPart > 0)
+  Type *IndexTy = State.VF.isScalable() && (CurrentPart > 0)
                       ? DL.getIndexType(Builder.getPtrTy(0))
                       : Builder.getInt32Ty();
   Value *Ptr = State.get(getOperand(0), VPLane(0));
   bool InBounds = isInBounds();
 
-  Value *ResultPtr = nullptr;
-  if (IsReverse) {
-    // If the address is consecutive but reversed, then the
-    // wide store needs to start at the last vector element.
-    // RunTimeVF =  VScale * VF.getKnownMinValue()
-    // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
-    Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
-    // NumElt = -CurrentPart * RunTimeVF
-    Value *NumElt = Builder.CreateMul(
-        ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
-    // LastLane = 1 - RunTimeVF
-    Value *LastLane =
-        Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
-    ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
-    ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
-  } else {
-    Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
-    ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
-  }
+  Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
+  Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
 
   State.set(this, ResultPtr, /*IsScalar*/ true);
 }
@@ -1855,8 +1878,6 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
   O << Indent;
   printAsOperand(O, SlotTracker);
   O << " = vector-pointer ";
-  if (IsReverse)
-    O << "(reverse) ";
 
   printOperands(O, SlotTracker);
 }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index ca78f32506ef71..1e32865e8ee576 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -316,12 +316,12 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
     // Add operand indicating the part to generate code for, to recipes still
     // requiring it.
     if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
-            VPVectorPointerRecipe>(Copy) ||
+            VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(Copy) ||
         match(Copy, m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
                         m_VPValue())))
       Copy->addOperand(getConstantVPV(Part));
 
-    if (isa<VPVectorPointerRecipe>(R))
+    if (isa<VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(R))
       Copy->setOperand(0, R.getOperand(0));
   }
 }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 4c383244f96f1a..d7626e437db338 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -346,6 +346,7 @@ class VPDef {
     VPScalarCastSC,
     VPScalarIVStepsSC,
     VPVectorPointerSC,
+    VPReverseVectorPointerSC,
     VPWidenCallSC,
     VPWidenCanonicalIVSC,
     VPWidenCastSC,
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index b6a9fed507acd3..9631dfd5f15913 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -53,13 +53,14 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1
 ; CHECK-NEXT:  LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1
 ; CHECK-NEXT:  VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' {
-; CHECK-NEXT:  Live-in vp<%0> = VF * UF
-; CHECK-NEXT:  Live-in vp<%1> = vector-trip-count
-; CHECK-NEXT:  vp<%2> = original trip-count
+; CHECK-NEXT:  Live-in vp<%0> = VF
+; CHECK-NEXT:  Live-in vp<%1> = VF * UF
+; CHECK-NEXT:  Live-in vp<%2> = vector-trip-count
+; CHECK-NEXT:  vp<%3> = original trip-count
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  ir-bb<for.body.preheader>:
 ; CHECK-NEXT:    IR %0 = zext i32 %n to i64
-; CHECK-NEXT:    EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64)
+; CHECK-NEXT:    EMIT vp<%3> = EXPAND SCEV (zext i32 %n to i64)
 ; CHECK-NEXT:  No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  vector.ph:
@@ -67,27 +68,27 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  <x1> vector loop: {
 ; CHECK-NEXT:    vector.body:
-; CHECK-NEXT:      EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%8>
-; CHECK-NEXT:      vp<%4> = DERIVED-IV ir<%n> + vp<%3> * ir<-1>
-; CHECK-NEXT:      vp<%5> = SCALAR-STEPS vp<%4>, ir<-1>
-; CHECK-NEXT:      CLONE ir<%i.0> = add nsw vp<%5>, ir<-1>
+; CHECK-NEXT:      EMIT vp<%4> = CANONICAL-INDUCTION ir<0>, vp<%9>
+; CHECK-NEXT:      vp<%5> = DERIVED-IV ir<%n> + vp<%4> * ir<-1>
+; CHECK-NEXT:      vp<%6> = SCALAR-STEPS vp<%5>, ir<-1>
+; CHECK-NEXT:      CLONE ir<%i.0> = add nsw vp<%6>, ir<-1>
 ; CHECK-NEXT:      CLONE ir<%idxprom> = zext ir<%i.0>
 ; CHECK-NEXT:      CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
-; CHECK-NEXT:      vp<%6> = vector-pointer (reverse) ir<%arrayidx>
-; CHECK-NEXT:      WIDEN ir<%1> = load vp<%6>
+; CHECK-NEXT:      vp<%7> = reverse-vector-pointer ir<%arrayidx>, vp<%0>
+; CHECK-NEXT:      WIDEN ir<%1> = load vp<%7>
 ; CHECK-NEXT:      WIDEN ir<%add9> = add ir<%1>, ir<1>
 ; CHECK-NEXT:      CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
-; CHECK-NEXT:      vp<%7> = vector-pointer (reverse) ir<%arrayidx3>
-; CHECK-NEXT:      WIDEN store vp<%7>, ir<%add9>
-; CHECK-NEXT:      EMIT vp<%8> = add nuw vp<%3>, vp<%0>
-; CHECK-NEXT:      EMIT branch-on-count vp<%8>, vp<%1>
+; CHECK-NEXT:      vp<%8> = reverse-vector-pointer ir<%arrayidx3>, vp<%0>
+; CHECK-NEXT:      WIDEN store vp<%8>, ir<%add9>
+; CHECK-NEXT:      EMIT vp<%9> = add nuw vp<%4>, vp<%1>
+; CHECK-NEXT:      EMIT branch-on-count vp<%9>, vp<%2>
 ; CHECK-NEXT:    No successors
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Successor(s): middle.block
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  middle.block:
-; CHECK-NEXT:    EMIT vp<%10> = icmp eq vp<%2>, vp<%1>
-; CHECK-NEXT:    EMIT branch-on-cond vp<%10>
+; CHECK-NEXT:    EMIT vp<%11> = icmp eq vp<%3>, vp<%2>
+; CHECK-NEXT:    EMIT branch-on-cond vp<%11>
 ; CHECK-NEXT:  Successor(s): ir-bb<for.cond.cleanup.loopexit>, scalar.ph
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  ir-bb<for.cond.cleanup.loopexit>:
@@ -137,13 +138,14 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LEV: Epilogue vectorization is not profitable for this loop
 ; CHECK-NEXT:  Executing best plan with VF=vscale x 4, UF=1
 ; CHECK-NEXT:  VPlan 'Final VPlan for VF={vscale x 4},UF={1}' {
-; CHECK-NEXT:  Live-in vp<%0> = VF * UF
-; CHECK-NEXT:  Live-in vp<%1> = vector-trip-count
-; CHECK-NEXT:  vp<%2> = original trip-count
+; CHECK-NEXT:  Live-in vp<%0> = VF
+; CHECK-NEXT:  Live-in vp<%1> = VF * UF
+; CHECK-NEXT:  Live-in vp<%2> = vector-trip-count
+; CHECK-NEXT:  vp<%3> = original trip-count
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  ir-bb<for.body.preheader>:
 ; CHECK-NEXT:    IR %0 = zext i32 %n to i64
-; CHECK-NEXT:    EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64)
+; CHECK-NEXT:    EMIT vp<%3> = EXPAND SCEV (zext i32 %n to i64)
 ; CHECK-NEXT:  No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  vector.ph:
@@ -151,27 +153,27 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  <x1> vector loop: {
 ; CHECK-NEXT:    vector.body:
-; CHECK-NEXT:      EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%8>
-; CHECK-NEXT:      vp<%4> = DERIVED-IV ir<%n> + vp<%3> * ir<-1>
-; CHECK-NEXT:      vp<%5> = SCALAR-STEPS vp<%4>, ir<-1>
-; CHECK-NEXT:      CLONE ir<%i.0> = add nsw vp<%5>, ir<-1>
+; CHECK-NEXT:      EMIT vp<%4> = CANONICAL-INDUCTION ir<0>, vp<%9>
+; CHECK-NEXT:      vp<%5> = DERIVED-IV ir<%n> + vp<%4> * ir<-1>
+; CHECK-NEXT:      vp<%6> = SCALAR-STEPS vp<%5>, ir<-1>
+; CHECK-NEXT:      CLONE ir<%i.0> = add nsw vp<%6>, ir<-1>
 ; CHECK-NEXT:      CLONE ir<%idxprom> = zext ir<%i.0>
 ; CHECK-NEXT:      CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
-; CHECK-NEXT:      vp<%6> = vector-pointer (reverse) ir<%arrayidx>
-; CHECK-NEXT:      WIDEN ir<%13> = load vp<%6>
+; CHECK-NEXT:      vp<%7> = reverse-vector-pointer ir<%arrayidx>, vp<%0>
+; CHECK-NEXT:      WIDEN ir<%13> = load vp<%7>
 ; CHECK-NEXT:      WIDEN ir<%add9> = add ir<%13>, ir<1>
 ; CHECK-NEXT:      CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
-; CHECK-NEXT:      vp<%7> = vector-pointer (reverse) ir<%arrayidx3>
-; CHECK-NEXT:      WIDEN store vp<%7>, ir<%add9>
-; CHECK-NEXT:      EMIT vp<%8> = add nuw vp<%3>, vp<%0>
-; CHECK-NEXT:      EMIT branch-on-count vp<%8>, vp<%1>
+; CHECK-NEXT:      vp<%8> = reverse-vector-pointer ir<%arrayidx3>, vp<%0>
+; CHECK-NEXT:      WIDEN store vp<%8>, ir<%add9>
+; CHECK-NEXT:      EMIT vp<%9> = add nuw vp<%4>, vp<%1>
+; CHECK-NEXT:      EMIT branch-on-count vp<%9>, vp<%2>
 ; CHECK-NEXT:    No successors
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Successor(s): middle.block
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  middle.block:
-; CHECK-NEXT:    EMIT vp<%10> = icmp eq vp<%2>, vp<%1>
-; CHECK-NEXT:    EMIT branch-on-cond vp<%10>
+; CHECK-NEXT:    EMIT vp<%11> = icmp eq vp<%3>, vp<%2>
+; CHECK-NEXT:    EMIT branch-on-cond vp<%11>
 ; CHECK-NEXT:  Successor(s): ir-bb<for.cond.cleanup.loopexit>, scalar.ph
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  ir-bb<for.cond.cleanup.loopexit>:
@@ -257,13 +259,14 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1
 ; CHECK-NEXT:  LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1
 ; CHECK-NEXT:  VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' {
-; CHECK-NEXT:  Live-in vp<%0> = VF * UF
-; CHECK-NEXT:  Live-in vp<%1> = vector-trip-count
-; CHECK-NEXT:  vp<%2> = original trip-count
+; CHECK-NEXT:  Live-in vp<%0> = VF
+; CHECK-NEXT:  Live-in vp<%1> = VF * UF
+; CHECK-NEXT:  Live-in vp<%2> = vector-trip-count
+; CHECK-NEXT:  vp<%3> = original trip-count
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  ir-bb<for.body.preheader>:
 ; CHECK-NEXT:    IR %0 = zext i32 %n to i64
-; CHECK-NEXT:    EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64)
+; CHECK-NEXT:    EMIT vp<%3> = EXPAND SCEV (zext i32 %n to i64)
 ; CHECK-NEXT:  No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  vector.ph:
@@ -271,27 +274,27 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  <x1> vector loop: {
 ; CHECK-NEXT:    vector.body:
-; CHECK-NEXT:      EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%8>
-; CHECK-NEXT:      vp<%4> = DERIVED-IV ir<%n> + vp<%3> * ir<-1>
-; CHECK-NEXT:      vp<%5> = SCALAR-STEPS vp<%4>, ir<-1>
-; CHECK-NEXT:      CLONE ir<%i.0> = add nsw vp<...
[truncated]

fhahn · 2024-10-03T17:45:23Z

llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll

+; CHECK-NEXT:      EMIT vp<%4> = CANONICAL-INDUCTION ir<0>, vp<%9>
+; CHECK-NEXT:      vp<%5> = DERIVED-IV ir<%n> + vp<%4> * ir<-1>
+; CHECK-NEXT:      vp<%6> = SCALAR-STEPS vp<%5>, ir<-1>
+; CHECK-NEXT:      CLONE ir<%i.0> = add nsw vp<%6>, ir<-1>


Would it be possible to update the test to use patterns for the VPValue numbers separately, to avoid the unrelated changes?

sure, separate it into #111086