@@ -3098,61 +3098,6 @@ bool VPReplicateRecipe::shouldPack() const {
30983098 });
30993099}
31003100
3101- // / Returns true if \p Ptr is a pointer computation for which the legacy cost
3102- // / model computes a SCEV expression when computing the address cost.
3103- static bool shouldUseAddressAccessSCEV (const VPValue *Ptr) {
3104- auto *PtrR = Ptr->getDefiningRecipe ();
3105- if (!PtrR || !((isa<VPReplicateRecipe>(PtrR) &&
3106- cast<VPReplicateRecipe>(PtrR)->getOpcode () ==
3107- Instruction::GetElementPtr) ||
3108- isa<VPWidenGEPRecipe>(PtrR)))
3109- return false ;
3110-
3111- // We are looking for a GEP where all indices are either loop invariant or
3112- // inductions.
3113- for (VPValue *Opd : drop_begin (PtrR->operands ())) {
3114- if (!Opd->isDefinedOutsideLoopRegions () &&
3115- !isa<VPScalarIVStepsRecipe, VPWidenIntOrFpInductionRecipe>(Opd))
3116- return false ;
3117- }
3118-
3119- return true ;
3120- }
3121-
3122- // / Returns true if \p V is used as part of the address of another load or
3123- // / store.
3124- static bool isUsedByLoadStoreAddress (const VPUser *V) {
3125- SmallPtrSet<const VPUser *, 4 > Seen;
3126- SmallVector<const VPUser *> WorkList = {V};
3127-
3128- while (!WorkList.empty ()) {
3129- auto *Cur = dyn_cast<VPSingleDefRecipe>(WorkList.pop_back_val ());
3130- if (!Cur || !Seen.insert (Cur).second )
3131- continue ;
3132-
3133- for (VPUser *U : Cur->users ()) {
3134- if (auto *InterleaveR = dyn_cast<VPInterleaveBase>(U))
3135- if (InterleaveR->getAddr () == Cur)
3136- return true ;
3137- if (auto *RepR = dyn_cast<VPReplicateRecipe>(U)) {
3138- if (RepR->getOpcode () == Instruction::Load &&
3139- RepR->getOperand (0 ) == Cur)
3140- return true ;
3141- if (RepR->getOpcode () == Instruction::Store &&
3142- RepR->getOperand (1 ) == Cur)
3143- return true ;
3144- }
3145- if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U)) {
3146- if (MemR->getAddr () == Cur && MemR->isConsecutive ())
3147- return true ;
3148- }
3149- }
3150-
3151- append_range (WorkList, cast<VPSingleDefRecipe>(Cur)->users ());
3152- }
3153- return false ;
3154- }
3155-
31563101InstructionCost VPReplicateRecipe::computeCost (ElementCount VF,
31573102 VPCostContext &Ctx) const {
31583103 Instruction *UI = cast<Instruction>(getUnderlyingValue ());
@@ -3260,58 +3205,21 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
32603205 }
32613206 case Instruction::Load:
32623207 case Instruction::Store: {
3263- if (VF.isScalable () && !isSingleScalar ())
3264- return InstructionCost::getInvalid ();
3265-
3208+ if (isSingleScalar ()) {
3209+ bool IsLoad = UI->getOpcode () == Instruction::Load;
3210+ Type *ValTy = Ctx.Types .inferScalarType (IsLoad ? this : getOperand (0 ));
3211+ Type *ScalarPtrTy = Ctx.Types .inferScalarType (getOperand (IsLoad ? 0 : 1 ));
3212+ const Align Alignment = getLoadStoreAlignment (UI);
3213+ unsigned AS = getLoadStoreAddressSpace (UI);
3214+ TTI::OperandValueInfo OpInfo = TTI::getOperandInfo (UI->getOperand (0 ));
3215+ InstructionCost ScalarMemOpCost = Ctx.TTI .getMemoryOpCost (
3216+ UI->getOpcode (), ValTy, Alignment, AS, Ctx.CostKind , OpInfo, UI);
3217+ return ScalarMemOpCost + Ctx.TTI .getAddressComputationCost (
3218+ ScalarPtrTy, nullptr , nullptr , Ctx.CostKind );
3219+ }
32663220 // TODO: See getMemInstScalarizationCost for how to handle replicating and
32673221 // predicated cases.
3268- const VPRegionBlock *ParentRegion = getParent ()->getParent ();
3269- if (ParentRegion && ParentRegion->isReplicator ())
3270- break ;
3271-
3272- bool IsLoad = UI->getOpcode () == Instruction::Load;
3273- const VPValue *PtrOp = getOperand (!IsLoad);
3274- // TODO: Handle cases where we need to pass a SCEV to
3275- // getAddressComputationCost.
3276- if (shouldUseAddressAccessSCEV (PtrOp))
3277- break ;
3278-
3279- Type *ValTy = Ctx.Types .inferScalarType (IsLoad ? this : getOperand (0 ));
3280- Type *ScalarPtrTy = Ctx.Types .inferScalarType (PtrOp);
3281- const Align Alignment = getLoadStoreAlignment (UI);
3282- unsigned AS = getLoadStoreAddressSpace (UI);
3283- TTI::OperandValueInfo OpInfo = TTI::getOperandInfo (UI->getOperand (0 ));
3284- InstructionCost ScalarMemOpCost = Ctx.TTI .getMemoryOpCost (
3285- UI->getOpcode (), ValTy, Alignment, AS, Ctx.CostKind , OpInfo);
3286-
3287- Type *PtrTy = isSingleScalar () ? ScalarPtrTy : toVectorTy (ScalarPtrTy, VF);
3288-
3289- InstructionCost ScalarCost =
3290- ScalarMemOpCost + Ctx.TTI .getAddressComputationCost (
3291- PtrTy, &Ctx.SE , nullptr , Ctx.CostKind );
3292- if (isSingleScalar ())
3293- return ScalarCost;
3294-
3295- SmallVector<const VPValue *> OpsToScalarize;
3296- Type *ResultTy = Type::getVoidTy (PtrTy->getContext ());
3297- // Set ResultTy and OpsToScalarize, if scalarization is needed. Currently we
3298- // don't assign scalarization overhead in general, if the target prefers
3299- // vectorized addressing or the loaded value is used as part of an address
3300- // of another load or store.
3301- bool PreferVectorizedAddressing = Ctx.TTI .prefersVectorizedAddressing ();
3302- if (PreferVectorizedAddressing || !isUsedByLoadStoreAddress (this )) {
3303- bool EfficientVectorLoadStore =
3304- Ctx.TTI .supportsEfficientVectorElementLoadStore ();
3305- if (!(IsLoad && !PreferVectorizedAddressing) &&
3306- !(!IsLoad && EfficientVectorLoadStore))
3307- append_range (OpsToScalarize, operands ());
3308-
3309- if (!EfficientVectorLoadStore)
3310- ResultTy = Ctx.Types .inferScalarType (this );
3311- }
3312-
3313- return (ScalarCost * VF.getFixedValue ()) +
3314- Ctx.getScalarizationOverhead (ResultTy, OpsToScalarize, VF, true );
3222+ break ;
33153223 }
33163224 }
33173225
0 commit comments