Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 9 additions & 14 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,9 @@ class IntrinsicCostAttributes {
InstructionCost ScalarizationCost = InstructionCost::getInvalid();
TargetLibraryInfo const *LibInfo = nullptr;

MaybeAlign Alignment;
bool VariableMask = false;

public:
LLVM_ABI IntrinsicCostAttributes(
Intrinsic::ID Id, const CallBase &CI,
Expand All @@ -146,6 +149,10 @@ class IntrinsicCostAttributes {
FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
InstructionCost ScalarCost = InstructionCost::getInvalid());

LLVM_ABI IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ArrayRef<Type *> Tys, Align Alignment,
bool VariableMask = false);

LLVM_ABI IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ArrayRef<const Value *> Args);

Expand All @@ -160,6 +167,8 @@ class IntrinsicCostAttributes {
const IntrinsicInst *getInst() const { return II; }
Type *getReturnType() const { return RetTy; }
FastMathFlags getFlags() const { return FMF; }
MaybeAlign getAlign() const { return Alignment; }
bool getVariableMask() const { return VariableMask; }
InstructionCost getScalarizationCost() const { return ScalarizationCost; }
const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }
const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
Expand Down Expand Up @@ -1586,20 +1595,6 @@ class TargetTransformInfo {
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr) const;

/// \return The cost of strided memory operations.
/// \p Opcode - is a type of memory access Load or Store
/// \p DataTy - a vector type of the data to be loaded or stored
/// \p Ptr - pointer [or vector of pointers] - address[es] in memory
/// \p VariableMask - true when the memory access is predicated with a mask
/// that is not a compile-time constant
/// \p Alignment - alignment of single element
/// \p I - the optional original context instruction, if one exists, e.g. the
/// load/store to transform or the call to the gather/scatter intrinsic
LLVM_ABI InstructionCost getStridedMemoryOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr) const;

/// \return The cost of the interleaved memory operation.
/// \p Opcode is the memory operation code
/// \p VecTy is the vector type of the interleaved access.
Expand Down
8 changes: 0 additions & 8 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -862,14 +862,6 @@ class TargetTransformInfoImplBase {
return 1;
}

virtual InstructionCost
getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
bool VariableMask, Align Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) const {
return InstructionCost::getInvalid();
}

virtual InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
Expand Down
45 changes: 18 additions & 27 deletions llvm/include/llvm/CodeGen/BasicTTIImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1574,18 +1574,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
/*IsGatherScatter*/ true, CostKind);
}

InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I) const override {
// For a target without strided memory operations (or for an illegal
// operation type on one which does), assume we lower to a gather/scatter
// operation. (Which may in turn be scalarized.)
return thisT()->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, CostKind, I);
}

InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
Expand Down Expand Up @@ -1958,27 +1946,26 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
}
case Intrinsic::experimental_vp_strided_store: {
const Value *Data = Args[0];
const Value *Ptr = Args[1];
const Value *Mask = Args[3];
const Value *EVL = Args[4];
bool VarMask = !isa<Constant>(Mask) || !isa<Constant>(EVL);
Type *EltTy = cast<VectorType>(Data->getType())->getElementType();
Align Alignment =
I->getParamAlign(1).value_or(thisT()->DL.getABITypeAlign(EltTy));
return thisT()->getStridedMemoryOpCost(Instruction::Store,
Data->getType(), Ptr, VarMask,
Alignment, CostKind, I);
return thisT()->getCommonMaskedMemoryOpCost(
Instruction::Store, Data->getType(), Alignment, VarMask,
/*IsGatherScatter*/ true, CostKind);
}
case Intrinsic::experimental_vp_strided_load: {
const Value *Ptr = Args[0];
const Value *Mask = Args[2];
const Value *EVL = Args[3];
bool VarMask = !isa<Constant>(Mask) || !isa<Constant>(EVL);
Type *EltTy = cast<VectorType>(RetTy)->getElementType();
Align Alignment =
I->getParamAlign(0).value_or(thisT()->DL.getABITypeAlign(EltTy));
return thisT()->getStridedMemoryOpCost(Instruction::Load, RetTy, Ptr,
VarMask, Alignment, CostKind, I);
return thisT()->getCommonMaskedMemoryOpCost(
Instruction::Load, RetTy, Alignment, VarMask,
/*IsGatherScatter*/ true, CostKind);
}
case Intrinsic::stepvector: {
if (isa<ScalableVectorType>(RetTy))
Expand Down Expand Up @@ -2418,17 +2405,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
}
case Intrinsic::experimental_vp_strided_store: {
auto *Ty = cast<VectorType>(ICA.getArgTypes()[0]);
Align Alignment = thisT()->DL.getABITypeAlign(Ty->getElementType());
return thisT()->getStridedMemoryOpCost(
Instruction::Store, Ty, /*Ptr=*/nullptr, /*VariableMask=*/true,
Alignment, CostKind, ICA.getInst());
Align Alignment = ICA.getAlign().value_or(
thisT()->DL.getABITypeAlign(Ty->getElementType()));
return thisT()->getCommonMaskedMemoryOpCost(
Instruction::Store, Ty, Alignment,
/*VariableMask=*/true,
/*IsGatherScatter*/ true, CostKind);
}
case Intrinsic::experimental_vp_strided_load: {
auto *Ty = cast<VectorType>(ICA.getReturnType());
Align Alignment = thisT()->DL.getABITypeAlign(Ty->getElementType());
return thisT()->getStridedMemoryOpCost(
Instruction::Load, Ty, /*Ptr=*/nullptr, /*VariableMask=*/true,
Alignment, CostKind, ICA.getInst());
Align Alignment = ICA.getAlign().value_or(
thisT()->DL.getABITypeAlign(Ty->getElementType()));
return thisT()->getCommonMaskedMemoryOpCost(
Instruction::Load, Ty, Alignment,
/*VariableMask=*/true,
/*IsGatherScatter*/ true, CostKind);
}
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_mul:
Expand Down
17 changes: 8 additions & 9 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,14 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
}

IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ArrayRef<Type *> Tys,
Align Alignment,
bool VariableMask)
: RetTy(RTy), IID(Id), Alignment(Alignment), VariableMask(VariableMask) {
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
}

IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty,
ArrayRef<const Value *> Args)
: RetTy(Ty), IID(Id) {
Expand Down Expand Up @@ -1210,15 +1218,6 @@ InstructionCost TargetTransformInfo::getExpandCompressMemoryOpCost(
return Cost;
}

InstructionCost TargetTransformInfo::getStridedMemoryOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
InstructionCost Cost = TTIImpl->getStridedMemoryOpCost(
Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}

InstructionCost TargetTransformInfo::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
Expand Down
60 changes: 37 additions & 23 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1172,29 +1172,6 @@ InstructionCost RISCVTTIImpl::getExpandCompressMemoryOpCost(
LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind);
}

InstructionCost RISCVTTIImpl::getStridedMemoryOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
if (((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
!isLegalStridedLoadStore(DataTy, Alignment)) ||
(Opcode != Instruction::Load && Opcode != Instruction::Store))
return BaseT::getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, CostKind, I);

if (CostKind == TTI::TCK_CodeSize)
return TTI::TCC_Basic;

// Cost is proportional to the number of memory operations implied. For
// scalable vectors, we use an estimate on that number since we don't
// know exactly what VL will be.
auto &VTy = *cast<VectorType>(DataTy);
InstructionCost MemOpCost =
getMemoryOpCost(Opcode, VTy.getElementType(), Alignment, 0, CostKind,
{TTI::OK_AnyValue, TTI::OP_None}, I);
unsigned NumLoads = getEstimatedVLFor(&VTy);
return NumLoads * MemOpCost;
}

InstructionCost
RISCVTTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
// FIXME: This is a property of the default vector convention, not
Expand Down Expand Up @@ -1561,6 +1538,43 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
cast<VectorType>(ICA.getArgTypes()[0]), {}, CostKind,
0, cast<VectorType>(ICA.getReturnType()));
}
case Intrinsic::experimental_vp_strided_load:
case Intrinsic::experimental_vp_strided_store: {
if (CostKind == TTI::TCK_CodeSize)
return TTI::TCC_Basic;

auto *DataTy = (ICA.getID() == Intrinsic::experimental_vp_strided_load)
? cast<VectorType>(ICA.getReturnType())
: cast<VectorType>(ICA.getArgTypes()[0]);
Type *EltTy = DataTy->getElementType();

Align ABITyAlign = DL.getABITypeAlign(EltTy);

const IntrinsicInst *I = ICA.getInst();
Align Alignment;
if (ICA.isTypeBasedOnly())
Alignment = ICA.getAlign().value_or(ABITyAlign);
else {
unsigned Index =
(ICA.getID() == Intrinsic::experimental_vp_strided_load) ? 0 : 1;
Alignment = I->getParamAlign(Index).value_or(ABITyAlign);
}

if (!isLegalStridedLoadStore(DataTy, Alignment))
return BaseT::getIntrinsicInstrCost(ICA, CostKind);

unsigned Opcode = ICA.getID() == Intrinsic::experimental_vp_strided_load
? Instruction::Load
: Instruction::Store;
// Cost is proportional to the number of memory operations implied. For
// scalable vectors, we use an estimate on that number since we don't
// know exactly what VL will be.
InstructionCost MemOpCost =
getMemoryOpCost(Opcode, EltTy, Alignment, 0, CostKind,
{TTI::OK_AnyValue, TTI::OP_None}, I);
unsigned NumLoads = getEstimatedVLFor(DataTy);
return NumLoads * MemOpCost;
}
case Intrinsic::fptoui_sat:
case Intrinsic::fptosi_sat: {
InstructionCost Cost = 0;
Expand Down
6 changes: 0 additions & 6 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,12 +202,6 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
Align Alignment, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) const override;

InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I) const override;

InstructionCost
getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const override;

Expand Down
51 changes: 35 additions & 16 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7224,10 +7224,13 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
VectorGEPCost;
break;
case LoadsState::StridedVectorize:
VecLdCost += TTI.getStridedMemoryOpCost(Instruction::Load, SubVecTy,
LI0->getPointerOperand(),
/*VariableMask=*/false,
CommonAlignment, CostKind) +
VecLdCost += TTI.getIntrinsicInstrCost(
{Intrinsic::experimental_vp_strided_load,
SubVecTy,
{},
CommonAlignment,
/*VariableMask=*/false},
CostKind) +
VectorGEPCost;
break;
case LoadsState::CompressVectorize:
Expand Down Expand Up @@ -13191,9 +13194,13 @@ void BoUpSLP::transformNodes() {
BaseLI->getPointerAddressSpace(), CostKind,
TTI::OperandValueInfo()) +
::getShuffleCost(*TTI, TTI::SK_Reverse, VecTy, Mask, CostKind);
InstructionCost StridedCost = TTI->getStridedMemoryOpCost(
Instruction::Load, VecTy, BaseLI->getPointerOperand(),
/*VariableMask=*/false, CommonAlignment, CostKind, BaseLI);
InstructionCost StridedCost =
TTI->getIntrinsicInstrCost({Intrinsic::experimental_vp_strided_load,
VecTy,
{},
CommonAlignment,
/*VariableMask=*/false},
CostKind);
if (StridedCost < OriginalVecCost || ForceStridedLoads) {
// Strided load is more profitable than consecutive load + reverse -
// transform the node to strided load.
Expand Down Expand Up @@ -13226,9 +13233,13 @@ void BoUpSLP::transformNodes() {
BaseSI->getPointerAddressSpace(), CostKind,
TTI::OperandValueInfo()) +
::getShuffleCost(*TTI, TTI::SK_Reverse, VecTy, Mask, CostKind);
InstructionCost StridedCost = TTI->getStridedMemoryOpCost(
Instruction::Store, VecTy, BaseSI->getPointerOperand(),
/*VariableMask=*/false, CommonAlignment, CostKind, BaseSI);
InstructionCost StridedCost = TTI->getIntrinsicInstrCost(
{Intrinsic::experimental_vp_strided_store,
Type::getVoidTy(VecTy->getContext()),
{VecTy},
CommonAlignment,
/*VariableMask=*/false},
CostKind);
if (StridedCost < OriginalVecCost)
// Strided store is more profitable than reverse + consecutive store -
// transform the node to strided store.
Expand Down Expand Up @@ -14991,9 +15002,13 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
case TreeEntry::StridedVectorize: {
Align CommonAlignment =
computeCommonAlignment<LoadInst>(UniqueValues.getArrayRef());
VecLdCost = TTI->getStridedMemoryOpCost(
Instruction::Load, VecTy, LI0->getPointerOperand(),
/*VariableMask=*/false, CommonAlignment, CostKind);
VecLdCost =
TTI->getIntrinsicInstrCost({Intrinsic::experimental_vp_strided_load,
VecTy,
{},
CommonAlignment,
/*VariableMask=*/false},
CostKind);
break;
}
case TreeEntry::CompressVectorize: {
Expand Down Expand Up @@ -15084,9 +15099,13 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
if (E->State == TreeEntry::StridedVectorize) {
Align CommonAlignment =
computeCommonAlignment<StoreInst>(UniqueValues.getArrayRef());
VecStCost = TTI->getStridedMemoryOpCost(
Instruction::Store, VecTy, BaseSI->getPointerOperand(),
/*VariableMask=*/false, CommonAlignment, CostKind);
VecStCost = TTI->getIntrinsicInstrCost(
{Intrinsic::experimental_vp_strided_store,
Type::getVoidTy(VecTy->getContext()),
{VecTy},
CommonAlignment,
/*VariableMask=*/false},
CostKind);
} else {
assert(E->State == TreeEntry::Vectorize &&
"Expected either strided or consecutive stores.");
Expand Down