Skip to content

Commit bc5bdea

Browse files
[RISCV][CostModel] Recommit VPIntrinsics have same cost as their non-vp counterparts
This was reverted in commit 0abaf3c (llvm#67178). This version of the patch includes a fix which was caused by vp-reductions having an extra start value argument which the non-vp counterparts did not have.
1 parent 56a3e49 commit bc5bdea

File tree

3 files changed

+429
-5
lines changed

3 files changed

+429
-5
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1691,6 +1691,62 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
16911691
}
16921692
}
16931693

1694+
// VP Intrinsics should have the same cost as their non-vp counterpart.
1695+
// TODO: Adjust the cost to make the vp intrinsic cheaper than its non-vp
1696+
// counterpart when the vector length argument is smaller than the maximum
1697+
// vector length.
1698+
if (VPIntrinsic::isVPIntrinsic(ICA.getID())) {
1699+
std::optional<unsigned> FOp =
1700+
VPIntrinsic::getFunctionalOpcodeForVP(ICA.getID());
1701+
if (FOp) {
1702+
// TODO: Support other kinds of Intrinsics (i.e. reductions)
1703+
if (ICA.getID() == Intrinsic::vp_load) {
1704+
Align Alignment;
1705+
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
1706+
Alignment = VPI->getPointerAlignment().valueOrOne();
1707+
unsigned AS = 0;
1708+
if (ICA.getArgs().size() > 1)
1709+
if (auto *PtrTy =
1710+
dyn_cast<PointerType>(ICA.getArgs()[0]->getType()))
1711+
AS = PtrTy->getAddressSpace();
1712+
return thisT()->getMemoryOpCost(*FOp, ICA.getReturnType(), Alignment,
1713+
AS, CostKind);
1714+
}
1715+
if (ICA.getID() == Intrinsic::vp_store) {
1716+
Align Alignment;
1717+
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
1718+
Alignment = VPI->getPointerAlignment().valueOrOne();
1719+
unsigned AS = 0;
1720+
if (ICA.getArgs().size() >= 2)
1721+
if (auto *PtrTy =
1722+
dyn_cast<PointerType>(ICA.getArgs()[1]->getType()))
1723+
AS = PtrTy->getAddressSpace();
1724+
return thisT()->getMemoryOpCost(*FOp, Args[0]->getType(), Alignment,
1725+
AS, CostKind);
1726+
}
1727+
if (VPBinOpIntrinsic::isVPBinOp(ICA.getID())) {
1728+
return thisT()->getArithmeticInstrCost(*FOp, ICA.getReturnType(),
1729+
CostKind);
1730+
}
1731+
}
1732+
1733+
std::optional<Intrinsic::ID> FID =
1734+
VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID());
1735+
if (FID) {
1736+
// Non-vp version will have same Args/Tys except mask and vector length.
1737+
assert(ICA.getArgs().size() >= 2 && ICA.getArgTypes().size() >= 2 &&
1738+
"Expected VPIntrinsic to have Mask and Vector Length args and "
1739+
"types");
1740+
ArrayRef<const Value *> NewArgs = ArrayRef(ICA.getArgs()).drop_back(2);
1741+
ArrayRef<Type *> NewTys = ArrayRef(ICA.getArgTypes()).drop_back(2);
1742+
1743+
IntrinsicCostAttributes NewICA(*FID, ICA.getReturnType(), NewArgs,
1744+
NewTys, ICA.getFlags(), ICA.getInst(),
1745+
ICA.getScalarizationCost());
1746+
return thisT()->getIntrinsicInstrCost(NewICA, CostKind);
1747+
}
1748+
}
1749+
16941750
// Assume that we need to scalarize this intrinsic.
16951751
// Compute the scalarization overhead based on Args for a vector
16961752
// intrinsic.

llvm/test/Analysis/CostModel/RISCV/gep.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ define void @non_foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
270270
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = getelementptr i8, ptr %base, i32 42
271271
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x4 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %4, <2 x i1> undef, <2 x i8> undef)
272272
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = getelementptr i8, ptr %base, i32 42
273-
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
273+
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
274274
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = getelementptr i8, ptr %base, i32 42
275275
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x6 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %6, i64 undef, <2 x i1> undef, i32 undef)
276276
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = getelementptr i8, ptr %base, i32 42
@@ -282,7 +282,7 @@ define void @non_foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
282282
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = getelementptr i8, ptr %base, i32 42
283283
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> undef, ptr %10, <2 x i1> undef)
284284
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = getelementptr i8, ptr %base, i32 42
285-
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
285+
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
286286
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = getelementptr i8, ptr %base, i32 42
287287
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> undef, ptr %12, i64 undef, <2 x i1> undef, i32 undef)
288288
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
@@ -340,7 +340,7 @@ define void @foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
340340
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %4 = getelementptr i8, ptr %base, i32 0
341341
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x4 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %4, <2 x i1> undef, <2 x i8> undef)
342342
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %5 = getelementptr i8, ptr %base, i32 0
343-
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
343+
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
344344
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %6 = getelementptr i8, ptr %base, i32 0
345345
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x6 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %6, i64 undef, <2 x i1> undef, i32 undef)
346346
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %7 = getelementptr i8, ptr %base, i32 0
@@ -352,7 +352,7 @@ define void @foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
352352
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %10 = getelementptr i8, ptr %base, i32 0
353353
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> undef, ptr %10, <2 x i1> undef)
354354
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %11 = getelementptr i8, ptr %base, i32 0
355-
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
355+
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
356356
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %12 = getelementptr i8, ptr %base, i32 0
357357
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> undef, ptr %12, i64 undef, <2 x i1> undef, i32 undef)
358358
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void

0 commit comments

Comments
 (0)