@@ -2640,17 +2640,6 @@ void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent,
2640
2640
}
2641
2641
#endif
2642
2642
2643
- // / Use all-true mask for reverse rather than actual mask, as it avoids a
2644
- // / dependence w/o affecting the result.
2645
- static Instruction *createReverseEVL (IRBuilderBase &Builder, Value *Operand,
2646
- Value *EVL, const Twine &Name) {
2647
- VectorType *ValTy = cast<VectorType>(Operand->getType ());
2648
- Value *AllTrueMask =
2649
- Builder.CreateVectorSplat (ValTy->getElementCount (), Builder.getTrue ());
2650
- return Builder.CreateIntrinsic (ValTy, Intrinsic::experimental_vp_reverse,
2651
- {Operand, AllTrueMask, EVL}, nullptr , Name);
2652
- }
2653
-
2654
2643
void VPWidenLoadEVLRecipe::execute (VPTransformState &State) {
2655
2644
auto *LI = cast<LoadInst>(&Ingredient);
2656
2645
@@ -2665,19 +2654,26 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
2665
2654
Value *EVL = State.get (getEVL (), VPLane (0 ));
2666
2655
Value *Addr = State.get (getAddr (), !CreateGather);
2667
2656
Value *Mask = nullptr ;
2668
- if (VPValue *VPMask = getMask ()) {
2657
+ if (VPValue *VPMask = getMask ())
2669
2658
Mask = State.get (VPMask);
2670
- if (isReverse ())
2671
- Mask = createReverseEVL (Builder, Mask, EVL, " vp.reverse.mask" );
2672
- } else {
2659
+ else
2673
2660
Mask = Builder.CreateVectorSplat (State.VF , Builder.getTrue ());
2674
- }
2675
2661
2676
2662
if (CreateGather) {
2677
2663
NewLI =
2678
2664
Builder.CreateIntrinsic (DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
2679
2665
nullptr , " wide.masked.gather" );
2680
2666
} else {
2667
+ if (isReverse ()) {
2668
+ auto *EltTy = DataTy->getElementType ();
2669
+ // if (EltTy->getScalarSizeInBits() !=
2670
+ // EVL->getType()->getScalarSizeInBits())
2671
+ // EVL = ConstantInt::getSigned(EVL->getType(),
2672
+ // static_cast<int64_t>(EltTy->getScalarSizeInBits()) / 8);
2673
+ auto *GEP = dyn_cast<GetElementPtrInst>(Addr->stripPointerCasts ());
2674
+ Value *Offset = Builder.CreateSub (State.Builder .getInt32 (1 ), EVL);
2675
+ Addr = Builder.CreateGEP (EltTy, Addr, Offset, " " , GEP->isInBounds ());
2676
+ }
2681
2677
VectorBuilder VBuilder (Builder);
2682
2678
VBuilder.setEVL (EVL).setMask (Mask);
2683
2679
NewLI = cast<CallInst>(VBuilder.createVectorInstruction (
@@ -2686,10 +2682,7 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
2686
2682
NewLI->addParamAttr (
2687
2683
0 , Attribute::getWithAlignment (NewLI->getContext (), Alignment));
2688
2684
State.addMetadata (NewLI, LI);
2689
- Instruction *Res = NewLI;
2690
- if (isReverse ())
2691
- Res = createReverseEVL (Builder, Res, EVL, " vp.reverse" );
2692
- State.set (this , Res);
2685
+ State.set (this , NewLI);
2693
2686
}
2694
2687
2695
2688
InstructionCost VPWidenLoadEVLRecipe::computeCost (ElementCount VF,
@@ -2707,14 +2700,13 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
2707
2700
getLoadStoreAlignment (const_cast <Instruction *>(&Ingredient));
2708
2701
unsigned AS =
2709
2702
getLoadStoreAddressSpace (const_cast <Instruction *>(&Ingredient));
2710
- InstructionCost Cost = Ctx.TTI .getMaskedMemoryOpCost (
2711
- Ingredient.getOpcode (), Ty, Alignment, AS, Ctx.CostKind );
2712
2703
if (!Reverse)
2713
- return Cost;
2704
+ return Ctx.TTI .getMaskedMemoryOpCost (Ingredient.getOpcode (), Ty, Alignment,
2705
+ AS, Ctx.CostKind );
2714
2706
2715
- return Cost + Ctx.TTI .getShuffleCost (TargetTransformInfo::SK_Reverse ,
2716
- cast<VectorType>(Ty ), {}, Ctx. CostKind ,
2717
- 0 );
2707
+ return Ctx.TTI .getStridedMemoryOpCost (Ingredient. getOpcode (), Ty ,
2708
+ getAddr ()-> getUnderlyingValue ( ), false ,
2709
+ Alignment, Ctx. CostKind );
2718
2710
}
2719
2711
2720
2712
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2775,7 +2767,8 @@ void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,
2775
2767
2776
2768
void VPWidenStoreEVLRecipe::execute (VPTransformState &State) {
2777
2769
auto *SI = cast<StoreInst>(&Ingredient);
2778
-
2770
+ Type *ScalarDataTy = getLoadStoreType (&Ingredient);
2771
+ auto *DataTy = VectorType::get (ScalarDataTy, State.VF );
2779
2772
VPValue *StoredValue = getStoredValue ();
2780
2773
bool CreateScatter = !isConsecutive ();
2781
2774
const Align Alignment = getLoadStoreAlignment (&Ingredient);
@@ -2786,22 +2779,32 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
2786
2779
CallInst *NewSI = nullptr ;
2787
2780
Value *StoredVal = State.get (StoredValue);
2788
2781
Value *EVL = State.get (getEVL (), VPLane (0 ));
2789
- if (isReverse ())
2790
- StoredVal = createReverseEVL (Builder, StoredVal, EVL, " vp.reverse" );
2791
2782
Value *Mask = nullptr ;
2792
- if (VPValue *VPMask = getMask ()) {
2783
+ if (VPValue *VPMask = getMask ())
2793
2784
Mask = State.get (VPMask);
2794
- if (isReverse ())
2795
- Mask = createReverseEVL (Builder, Mask, EVL, " vp.reverse.mask" );
2796
- } else {
2785
+ else
2797
2786
Mask = Builder.CreateVectorSplat (State.VF , Builder.getTrue ());
2798
- }
2787
+
2799
2788
Value *Addr = State.get (getAddr (), !CreateScatter);
2800
2789
if (CreateScatter) {
2801
2790
NewSI = Builder.CreateIntrinsic (Type::getVoidTy (EVL->getContext ()),
2802
2791
Intrinsic::vp_scatter,
2803
2792
{StoredVal, Addr, Mask, EVL});
2804
2793
} else {
2794
+ if (isReverse ()) {
2795
+ auto *EltTy = DataTy->getElementType ();
2796
+ // FIXME: we may need not deal with the size, the InstCombine will deal
2797
+ // with the Offset Type if (EltTy->getScalarSizeInBits() !=
2798
+ // EVL->getType()->getScalarSizeInBits())
2799
+ // EVL = ConstantInt::getSigned(EVL->getType(),
2800
+ // static_cast<int64_t>(EltTy->getScalarSizeInBits()) / 8);
2801
+ auto *GEP = dyn_cast<GetElementPtrInst>(Addr->stripPointerCasts ());
2802
+ // Value *Offset =
2803
+ // Builder.CreateSub(State.Builder.getIntN(EVL->getType()->getScalarSizeInBits(),
2804
+ // 1), EVL);
2805
+ Value *Offset = Builder.CreateSub (State.Builder .getInt32 (1 ), EVL);
2806
+ Addr = Builder.CreateGEP (EltTy, Addr, Offset, " " , GEP->isInBounds ());
2807
+ }
2805
2808
VectorBuilder VBuilder (Builder);
2806
2809
VBuilder.setEVL (EVL).setMask (Mask);
2807
2810
NewSI = cast<CallInst>(VBuilder.createVectorInstruction (
@@ -2828,14 +2831,13 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,
2828
2831
getLoadStoreAlignment (const_cast <Instruction *>(&Ingredient));
2829
2832
unsigned AS =
2830
2833
getLoadStoreAddressSpace (const_cast <Instruction *>(&Ingredient));
2831
- InstructionCost Cost = Ctx.TTI .getMaskedMemoryOpCost (
2832
- Ingredient.getOpcode (), Ty, Alignment, AS, Ctx.CostKind );
2833
2834
if (!Reverse)
2834
- return Cost;
2835
+ return Ctx.TTI .getMaskedMemoryOpCost (Ingredient.getOpcode (), Ty, Alignment,
2836
+ AS, Ctx.CostKind );
2835
2837
2836
- return Cost + Ctx.TTI .getShuffleCost (TargetTransformInfo::SK_Reverse ,
2837
- cast<VectorType>(Ty ), {}, Ctx. CostKind ,
2838
- 0 );
2838
+ return Ctx.TTI .getStridedMemoryOpCost (Ingredient. getOpcode (), Ty ,
2839
+ getAddr ()-> getUnderlyingValue ( ), false ,
2840
+ Alignment, Ctx. CostKind );
2839
2841
}
2840
2842
2841
2843
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
0 commit comments