Skip to content

Commit

Permalink
[LV] Add initial support for vectorizing literal struct return values
Browse files Browse the repository at this point in the history
This patch adds initial support for vectorizing literal struct return
values. Currently, this is limited to the case where the struct is
homogeneous (all elements have the same type) and not packed.

The intended use case for this is vectorizing intrinsics such as:

```
declare { float, float } @llvm.sincos.f32(float %x)
```

Mapping them to structure-returning library calls such as:

```
declare { <4 x float>, <4 x i32> } @Sleef_sincosf4_u10advsimd(<4 x float>)
```

It could also be possible to vectorize the intrinsic (without a libcall)
and then later lower the intrinsic to a library call. This may be
desired if the only library calls available take output pointers rather
than return multiple values.

Implementing this required two main changes:

1. Supporting widening `extractvalue`
2. Adding support for "wide" types (in LV and parts of the cost model)

The first change is relatively straightforward, the second is larger as
it requires changing assumptions that types are always scalars or
vectors.

In this patch, a "wide" type is defined as a vector, or a struct literal
where all elements are vectors (of the same element count).

To help with the second change some helpers for wide types have been
added (that work similarly to existing vector helpers). These have been
used along the paths needed to support vectorizing calls, however, I
expect there are many places that still only expect vector types.
  • Loading branch information
MacDue committed Sep 24, 2024
1 parent 7773243 commit 59b3fa9
Show file tree
Hide file tree
Showing 16 changed files with 506 additions and 79 deletions.
15 changes: 3 additions & 12 deletions llvm/include/llvm/Analysis/VectorUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/VFABIDemangler.h"
#include "llvm/IR/VectorUtils.h"
#include "llvm/Support/CheckedArithmetic.h"

namespace llvm {
Expand Down Expand Up @@ -127,18 +128,8 @@ namespace Intrinsic {
typedef unsigned ID;
}

/// A helper function for converting Scalar types to vector types. If
/// the incoming type is void, we return void. If the EC represents a
/// scalar, we return the scalar type.
inline Type *ToVectorTy(Type *Scalar, ElementCount EC) {
if (Scalar->isVoidTy() || Scalar->isMetadataTy() || EC.isScalar())
return Scalar;
return VectorType::get(Scalar, EC);
}

inline Type *ToVectorTy(Type *Scalar, unsigned VF) {
return ToVectorTy(Scalar, ElementCount::getFixed(VF));
}
/// Returns true if `Ty` can be widened by the loop vectorizer.
bool canWidenType(Type *Ty);

/// Identify if the intrinsic is trivially vectorizable.
/// This method returns true if the intrinsic's argument types are all scalars
Expand Down
42 changes: 25 additions & 17 deletions llvm/include/llvm/CodeGen/BasicTTIImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1561,8 +1561,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
Type *RetTy = ICA.getReturnType();

ElementCount RetVF =
(RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getElementCount()
: ElementCount::getFixed(1));
isWideTy(RetTy) ? getWideTypeVF(RetTy) : ElementCount::getFixed(1);

const IntrinsicInst *I = ICA.getInst();
const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
FastMathFlags FMF = ICA.getFlags();
Expand Down Expand Up @@ -1883,10 +1883,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
InstructionCost ScalarizationCost = InstructionCost::getInvalid();
if (RetVF.isVector() && !RetVF.isScalable()) {
ScalarizationCost = 0;
if (!RetTy->isVoidTy())
ScalarizationCost += getScalarizationOverhead(
cast<VectorType>(RetTy),
/*Insert*/ true, /*Extract*/ false, CostKind);
if (!RetTy->isVoidTy()) {
for (Type *VectorTy : getContainedTypes(RetTy)) {
ScalarizationCost += getScalarizationOverhead(
cast<VectorType>(VectorTy),
/*Insert*/ true, /*Extract*/ false, CostKind);
}
}
ScalarizationCost +=
getOperandsScalarizationOverhead(Args, ICA.getArgTypes(), CostKind);
}
Expand Down Expand Up @@ -2477,27 +2480,32 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
// Else, assume that we need to scalarize this intrinsic. For math builtins
// this will emit a costly libcall, adding call overhead and spills. Make it
// very expensive.
if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
if (isWideTy(RetTy)) {
const SmallVector<Type *, 2> RetVTys = getContainedTypes(RetTy);

// Scalable vectors cannot be scalarized, so return Invalid.
if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
return isa<ScalableVectorType>(Ty);
}))
if (any_of(concat<Type *const>(RetVTys, Tys),
[](Type *Ty) { return isa<ScalableVectorType>(Ty); }))
return InstructionCost::getInvalid();

InstructionCost ScalarizationCost =
SkipScalarizationCost
? ScalarizationCostPassed
: getScalarizationOverhead(RetVTy, /*Insert*/ true,
/*Extract*/ false, CostKind);
InstructionCost ScalarizationCost = ScalarizationCostPassed;
if (!SkipScalarizationCost) {
ScalarizationCost = 0;
for (Type *RetVTy : RetVTys) {
ScalarizationCost += getScalarizationOverhead(
cast<VectorType>(RetVTy), /*Insert*/ true,
/*Extract*/ false, CostKind);
}
}

unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
unsigned ScalarCalls = getWideTypeVF(RetTy).getFixedValue();
SmallVector<Type *, 4> ScalarTys;
for (Type *Ty : Tys) {
if (Ty->isVectorTy())
Ty = Ty->getScalarType();
ScalarTys.push_back(Ty);
}
IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF);
IntrinsicCostAttributes Attrs(IID, ToNarrowTy(RetTy), ScalarTys, FMF);
InstructionCost ScalarCost =
thisT()->getIntrinsicInstrCost(Attrs, CostKind);
for (Type *Ty : Tys) {
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/IR/DerivedTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,10 @@ class StructType : public Type {
/// {<vscale x 2 x i32>, <vscale x 4 x i64>}}
bool containsHomogeneousScalableVectorTypes() const;

/// Return true if this struct is non-empty and all element types are the
/// same.
bool containsHomogeneousTypes() const;

/// Return true if this is a named struct that has a non-empty name.
bool hasName() const { return SymbolTableEntry != nullptr; }

Expand Down
53 changes: 53 additions & 0 deletions llvm/include/llvm/IR/VectorUtils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
//===----------- VectorUtils.h - Vector type utility functions -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/DerivedTypes.h"

namespace llvm {

/// A helper function for converting Scalar types to vector types. If
/// the incoming type is void, we return void. If the EC represents a
/// scalar, we return the scalar type.
inline Type *ToVectorTy(Type *Scalar, ElementCount EC) {
if (Scalar->isVoidTy() || Scalar->isMetadataTy() || EC.isScalar())
return Scalar;
return VectorType::get(Scalar, EC);
}

inline Type *ToVectorTy(Type *Scalar, unsigned VF) {
return ToVectorTy(Scalar, ElementCount::getFixed(VF));
}

/// A helper for converting to wider (vector) types. For scalar types, this is
/// equivalent to calling `ToVectorTy`. For struct types, this returns a new
/// struct where each element type has been widened to a vector type. Note: Only
/// unpacked literal struct types are supported.
Type *ToWideTy(Type *Ty, ElementCount EC);

/// A helper for converting wide types to narrow (non-vector) types. For vector
/// types, this is equivalent to calling .getScalarType(). For struct types,
/// this returns a new struct where each element type has been converted to a
/// scalar type. Note: Only unpacked literal struct types are supported.
Type *ToNarrowTy(Type *Ty);

/// Returns the types contained in `Ty`. For struct types, it returns the
/// elements, all other types are returned directly.
SmallVector<Type *, 2> getContainedTypes(Type *Ty);

/// Returns true if `Ty` is a vector type or a struct of vector types where all
/// vector types share the same VF.
bool isWideTy(Type *Ty);

/// Returns the vectorization factor for a widened type.
inline ElementCount getWideTypeVF(Type *Ty) {
assert(isWideTy(Ty) && "expected widened type!");
return cast<VectorType>(getContainedTypes(Ty).front())->getElementCount();
}

} // namespace llvm
14 changes: 14 additions & 0 deletions llvm/lib/Analysis/VectorUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,20 @@ static cl::opt<unsigned> MaxInterleaveGroupFactor(
cl::desc("Maximum factor for an interleaved access group (default = 8)"),
cl::init(8));

/// Returns true if `Ty` can be widened by the loop vectorizer.
bool llvm::canWidenType(Type *Ty) {
Type *ElTy = Ty;
// For now, only allow widening non-packed literal structs where all
// element types are the same. This simplifies the cost model and
// conversion between scalar and wide types.
if (auto *StructTy = dyn_cast<StructType>(Ty);
StructTy && !StructTy->isPacked() && StructTy->isLiteral() &&
StructTy->containsHomogeneousTypes()) {
ElTy = StructTy->elements().front();
}
return VectorType::isValidElementType(ElTy);
}

/// Return true if all of the intrinsic's arguments and return type are scalars
/// for the scalar form of the intrinsic, and vectors for the vector form of the
/// intrinsic (except operands that are marked as always being scalar by
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/IR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ add_llvm_component_library(LLVMCore
Value.cpp
ValueSymbolTable.cpp
VectorBuilder.cpp
VectorUtils.cpp
Verifier.cpp
VFABIDemangler.cpp
RuntimeLibcalls.cpp
Expand Down
10 changes: 6 additions & 4 deletions llvm/lib/IR/Type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -433,10 +433,12 @@ bool StructType::containsHomogeneousScalableVectorTypes() const {
Type *FirstTy = getNumElements() > 0 ? elements()[0] : nullptr;
if (!FirstTy || !isa<ScalableVectorType>(FirstTy))
return false;
for (Type *Ty : elements())
if (Ty != FirstTy)
return false;
return true;
return containsHomogeneousTypes();
}

bool StructType::containsHomogeneousTypes() const {
ArrayRef<Type *> ElementTys = elements();
return !ElementTys.empty() && all_equal(ElementTys);
}

void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
Expand Down
18 changes: 11 additions & 7 deletions llvm/lib/IR/VFABIDemangler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/VectorUtils.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <limits>
Expand Down Expand Up @@ -346,12 +347,15 @@ getScalableECFromSignature(const FunctionType *Signature, const VFISAKind ISA,
// Also check the return type if not void.
Type *RetTy = Signature->getReturnType();
if (!RetTy->isVoidTy()) {
std::optional<ElementCount> ReturnEC = getElementCountForTy(ISA, RetTy);
// If we have an unknown scalar element type we can't find a reasonable VF.
if (!ReturnEC)
return std::nullopt;
if (ElementCount::isKnownLT(*ReturnEC, MinEC))
MinEC = *ReturnEC;
for (Type *RetTy : getContainedTypes(RetTy)) {
std::optional<ElementCount> ReturnEC = getElementCountForTy(ISA, RetTy);
// If we have an unknown scalar element type we can't find a reasonable
// VF.
if (!ReturnEC)
return std::nullopt;
if (ElementCount::isKnownLT(*ReturnEC, MinEC))
MinEC = *ReturnEC;
}
}

// The SVE Vector function call ABI bases the VF on the widest element types
Expand Down Expand Up @@ -566,7 +570,7 @@ FunctionType *VFABI::createFunctionType(const VFInfo &Info,

auto *RetTy = ScalarFTy->getReturnType();
if (!RetTy->isVoidTy())
RetTy = VectorType::get(RetTy, VF);
RetTy = ToWideTy(RetTy, VF);
return FunctionType::get(RetTy, VecTypes, false);
}

Expand Down
69 changes: 69 additions & 0 deletions llvm/lib/IR/VectorUtils.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
//===----------- VectorUtils.cpp - Vector type utility functions ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/IR/VectorUtils.h"
#include "llvm/ADT/SmallVectorExtras.h"

using namespace llvm;

/// A helper for converting to wider (vector) types. For scalar types, this is
/// equivalent to calling `ToVectorTy`. For struct types, this returns a new
/// struct where each element type has been widened to a vector type. Note: Only
/// unpacked literal struct types are supported.
Type *llvm::ToWideTy(Type *Ty, ElementCount EC) {
if (EC.isScalar())
return Ty;
auto *StructTy = dyn_cast<StructType>(Ty);
if (!StructTy)
return ToVectorTy(Ty, EC);
assert(StructTy->isLiteral() && !StructTy->isPacked() &&
"expected unpacked struct literal");
return StructType::get(
Ty->getContext(),
map_to_vector(StructTy->elements(), [&](Type *ElTy) -> Type * {
return VectorType::get(ElTy, EC);
}));
}

/// A helper for converting wide types to narrow (non-vector) types. For vector
/// types, this is equivalent to calling .getScalarType(). For struct types,
/// this returns a new struct where each element type has been converted to a
/// scalar type. Note: Only unpacked literal struct types are supported.
Type *llvm::ToNarrowTy(Type *Ty) {
auto *StructTy = dyn_cast<StructType>(Ty);
if (!StructTy)
return Ty->getScalarType();
assert(StructTy->isLiteral() && !StructTy->isPacked() &&
"expected unpacked struct literal");
return StructType::get(
Ty->getContext(),
map_to_vector(StructTy->elements(), [](Type *ElTy) -> Type * {
return ElTy->getScalarType();
}));
}

/// Returns the types contained in `Ty`. For struct types, it returns the
/// elements, all other types are returned directly.
SmallVector<Type *, 2> llvm::getContainedTypes(Type *Ty) {
auto *StructTy = dyn_cast<StructType>(Ty);
if (StructTy)
return to_vector<2>(StructTy->elements());
return {Ty};
}

/// Returns true if `Ty` is a vector type or a struct of vector types where all
/// vector types share the same VF.
bool llvm::isWideTy(Type *Ty) {
auto ContainedTys = getContainedTypes(Ty);
if (ContainedTys.empty() || !ContainedTys.front()->isVectorTy())
return false;
ElementCount VF = cast<VectorType>(ContainedTys.front())->getElementCount();
return all_of(ContainedTys, [&](Type *Ty) {
return Ty->isVectorTy() && cast<VectorType>(Ty)->getElementCount() == VF;
});
}
4 changes: 2 additions & 2 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -945,8 +945,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Check that the instruction return type is vectorizable.
// We can't vectorize casts from vector type to scalar type.
// Also, we can't vectorize extractelement instructions.
if ((!VectorType::isValidElementType(I.getType()) &&
!I.getType()->isVoidTy()) ||
Type* InstTy = I.getType();
if (!(InstTy->isVoidTy() || canWidenType(InstTy)) ||
(isa<CastInst>(I) &&
!VectorType::isValidElementType(I.getOperand(0)->getType())) ||
isa<ExtractElementInst>(I)) {
Expand Down
Loading

0 comments on commit 59b3fa9

Please sign in to comment.