From d2643e2ee18045065f40683e544cb286f6e17986 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Tue, 15 Oct 2024 15:50:03 +0000 Subject: [PATCH] Initial changes picked over from #109833 --- llvm/include/llvm/Analysis/VectorUtils.h | 15 +--- llvm/include/llvm/IR/VectorUtils.h | 53 ++++++++++++++ llvm/lib/Analysis/VectorUtils.cpp | 14 ++++ llvm/lib/IR/CMakeLists.txt | 1 + llvm/lib/IR/VFABIDemangler.cpp | 18 +++-- llvm/lib/IR/VectorUtils.cpp | 69 +++++++++++++++++++ .../Vectorize/LoopVectorizationLegality.cpp | 4 +- .../Transforms/Vectorize/LoopVectorize.cpp | 46 +++++++------ .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +- 9 files changed, 180 insertions(+), 42 deletions(-) create mode 100644 llvm/include/llvm/IR/VectorUtils.h create mode 100644 llvm/lib/IR/VectorUtils.cpp diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index e2dd4976f39065..2a419560be3030 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -18,6 +18,7 @@ #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/IR/Module.h" #include "llvm/IR/VFABIDemangler.h" +#include "llvm/IR/VectorUtils.h" #include "llvm/Support/CheckedArithmetic.h" namespace llvm { @@ -127,18 +128,8 @@ namespace Intrinsic { typedef unsigned ID; } -/// A helper function for converting Scalar types to vector types. If -/// the incoming type is void, we return void. If the EC represents a -/// scalar, we return the scalar type. -inline Type *ToVectorTy(Type *Scalar, ElementCount EC) { - if (Scalar->isVoidTy() || Scalar->isMetadataTy() || EC.isScalar()) - return Scalar; - return VectorType::get(Scalar, EC); -} - -inline Type *ToVectorTy(Type *Scalar, unsigned VF) { - return ToVectorTy(Scalar, ElementCount::getFixed(VF)); -} +/// Returns true if `Ty` can be widened by the loop vectorizer. +bool canWidenType(Type *Ty); /// Identify if the intrinsic is trivially vectorizable. /// This method returns true if the intrinsic's argument types are all scalars diff --git a/llvm/include/llvm/IR/VectorUtils.h b/llvm/include/llvm/IR/VectorUtils.h new file mode 100644 index 00000000000000..e8e838d8287c42 --- /dev/null +++ b/llvm/include/llvm/IR/VectorUtils.h @@ -0,0 +1,53 @@ +//===----------- VectorUtils.h - Vector type utility functions -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DerivedTypes.h" + +namespace llvm { + +/// A helper function for converting Scalar types to vector types. If +/// the incoming type is void, we return void. If the EC represents a +/// scalar, we return the scalar type. +inline Type *ToVectorTy(Type *Scalar, ElementCount EC) { + if (Scalar->isVoidTy() || Scalar->isMetadataTy() || EC.isScalar()) + return Scalar; + return VectorType::get(Scalar, EC); +} + +inline Type *ToVectorTy(Type *Scalar, unsigned VF) { + return ToVectorTy(Scalar, ElementCount::getFixed(VF)); +} + +/// A helper for converting to wider (vector) types. For scalar types, this is +/// equivalent to calling `ToVectorTy`. For struct types, this returns a new +/// struct where each element type has been widened to a vector type. Note: Only +/// unpacked literal struct types are supported. +Type *ToWideTy(Type *Ty, ElementCount EC); + +/// A helper for converting wide types to narrow (non-vector) types. For vector +/// types, this is equivalent to calling .getScalarType(). For struct types, +/// this returns a new struct where each element type has been converted to a +/// scalar type. Note: Only unpacked literal struct types are supported. +Type *ToNarrowTy(Type *Ty); + +/// Returns the types contained in `Ty`. For struct types, it returns the +/// elements, all other types are returned directly. +SmallVector getContainedTypes(Type *Ty); + +/// Returns true if `Ty` is a vector type or a struct of vector types where all +/// vector types share the same VF. +bool isWideTy(Type *Ty); + +/// Returns the vectorization factor for a widened type. +inline ElementCount getWideTypeVF(Type *Ty) { + assert(isWideTy(Ty) && "expected widened type!"); + return cast(getContainedTypes(Ty).front())->getElementCount(); +} + +} // namespace llvm diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 6b5251e0ad34eb..0e99abd48991e3 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -39,6 +39,20 @@ static cl::opt MaxInterleaveGroupFactor( cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8)); +/// Returns true if `Ty` can be widened by the loop vectorizer. +bool llvm::canWidenType(Type *Ty) { + Type *ElTy = Ty; + // For now, only allow widening non-packed literal structs where all + // element types are the same. This simplifies the cost model and + // conversion between scalar and wide types. + if (auto *StructTy = dyn_cast(Ty); + StructTy && !StructTy->isPacked() && StructTy->isLiteral() && + StructTy->containsHomogeneousTypes()) { + ElTy = StructTy->elements().front(); + } + return VectorType::isValidElementType(ElTy); +} + /// Return true if all of the intrinsic's arguments and return type are scalars /// for the scalar form of the intrinsic, and vectors for the vector form of the /// intrinsic (except operands that are marked as always being scalar by diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt index 544f4ea9223d0e..7eaf35e10ebc67 100644 --- a/llvm/lib/IR/CMakeLists.txt +++ b/llvm/lib/IR/CMakeLists.txt @@ -73,6 +73,7 @@ add_llvm_component_library(LLVMCore Value.cpp ValueSymbolTable.cpp VectorBuilder.cpp + VectorUtils.cpp Verifier.cpp VFABIDemangler.cpp RuntimeLibcalls.cpp diff --git a/llvm/lib/IR/VFABIDemangler.cpp b/llvm/lib/IR/VFABIDemangler.cpp index cdfb9fbfaa084d..6ccd77fd23793a 100644 --- a/llvm/lib/IR/VFABIDemangler.cpp +++ b/llvm/lib/IR/VFABIDemangler.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Module.h" +#include "llvm/IR/VectorUtils.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include @@ -346,12 +347,15 @@ getScalableECFromSignature(const FunctionType *Signature, const VFISAKind ISA, // Also check the return type if not void. Type *RetTy = Signature->getReturnType(); if (!RetTy->isVoidTy()) { - std::optional ReturnEC = getElementCountForTy(ISA, RetTy); - // If we have an unknown scalar element type we can't find a reasonable VF. - if (!ReturnEC) - return std::nullopt; - if (ElementCount::isKnownLT(*ReturnEC, MinEC)) - MinEC = *ReturnEC; + for (Type *RetTy : getContainedTypes(RetTy)) { + std::optional ReturnEC = getElementCountForTy(ISA, RetTy); + // If we have an unknown scalar element type we can't find a reasonable + // VF. + if (!ReturnEC) + return std::nullopt; + if (ElementCount::isKnownLT(*ReturnEC, MinEC)) + MinEC = *ReturnEC; + } } // The SVE Vector function call ABI bases the VF on the widest element types @@ -566,7 +570,7 @@ FunctionType *VFABI::createFunctionType(const VFInfo &Info, auto *RetTy = ScalarFTy->getReturnType(); if (!RetTy->isVoidTy()) - RetTy = VectorType::get(RetTy, VF); + RetTy = ToWideTy(RetTy, VF); return FunctionType::get(RetTy, VecTypes, false); } diff --git a/llvm/lib/IR/VectorUtils.cpp b/llvm/lib/IR/VectorUtils.cpp new file mode 100644 index 00000000000000..c89a8eaf2ad1e0 --- /dev/null +++ b/llvm/lib/IR/VectorUtils.cpp @@ -0,0 +1,69 @@ +//===----------- VectorUtils.cpp - Vector type utility functions ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/VectorUtils.h" +#include "llvm/ADT/SmallVectorExtras.h" + +using namespace llvm; + +/// A helper for converting to wider (vector) types. For scalar types, this is +/// equivalent to calling `ToVectorTy`. For struct types, this returns a new +/// struct where each element type has been widened to a vector type. Note: Only +/// unpacked literal struct types are supported. +Type *llvm::ToWideTy(Type *Ty, ElementCount EC) { + if (EC.isScalar()) + return Ty; + auto *StructTy = dyn_cast(Ty); + if (!StructTy) + return ToVectorTy(Ty, EC); + assert(StructTy->isLiteral() && !StructTy->isPacked() && + "expected unpacked struct literal"); + return StructType::get( + Ty->getContext(), + map_to_vector(StructTy->elements(), [&](Type *ElTy) -> Type * { + return VectorType::get(ElTy, EC); + })); +} + +/// A helper for converting wide types to narrow (non-vector) types. For vector +/// types, this is equivalent to calling .getScalarType(). For struct types, +/// this returns a new struct where each element type has been converted to a +/// scalar type. Note: Only unpacked literal struct types are supported. +Type *llvm::ToNarrowTy(Type *Ty) { + auto *StructTy = dyn_cast(Ty); + if (!StructTy) + return Ty->getScalarType(); + assert(StructTy->isLiteral() && !StructTy->isPacked() && + "expected unpacked struct literal"); + return StructType::get( + Ty->getContext(), + map_to_vector(StructTy->elements(), [](Type *ElTy) -> Type * { + return ElTy->getScalarType(); + })); +} + +/// Returns the types contained in `Ty`. For struct types, it returns the +/// elements, all other types are returned directly. +SmallVector llvm::getContainedTypes(Type *Ty) { + auto *StructTy = dyn_cast(Ty); + if (StructTy) + return to_vector<2>(StructTy->elements()); + return {Ty}; +} + +/// Returns true if `Ty` is a vector type or a struct of vector types where all +/// vector types share the same VF. +bool llvm::isWideTy(Type *Ty) { + auto ContainedTys = getContainedTypes(Ty); + if (ContainedTys.empty() || !ContainedTys.front()->isVectorTy()) + return false; + ElementCount VF = cast(ContainedTys.front())->getElementCount(); + return all_of(ContainedTys, [&](Type *Ty) { + return Ty->isVectorTy() && cast(Ty)->getElementCount() == VF; + }); +} diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 43be72f0f34d45..cb6327640dbdbb 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -949,8 +949,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Check that the instruction return type is vectorizable. // We can't vectorize casts from vector type to scalar type. // Also, we can't vectorize extractelement instructions. - if ((!VectorType::isValidElementType(I.getType()) && - !I.getType()->isVoidTy()) || + Type *InstTy = I.getType(); + if (!(InstTy->isVoidTy() || canWidenType(InstTy)) || (isa(I) && !VectorType::isValidElementType(I.getOperand(0)->getType())) || isa(I)) { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 8bf92f3480620a..d8247d1d3bff72 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2861,10 +2861,10 @@ LoopVectorizationCostModel::getVectorCallCost(CallInst *CI, return ScalarCallCost; } -static Type *maybeVectorizeType(Type *Elt, ElementCount VF) { - if (VF.isScalar() || (!Elt->isIntOrPtrTy() && !Elt->isFloatingPointTy())) - return Elt; - return VectorType::get(Elt, VF); +static Type *maybeVectorizeType(Type *Ty, ElementCount VF) { + if (VF.isScalar() || !canWidenType(Ty)) + return Ty; + return ToWideTy(Ty, VF); } InstructionCost @@ -3635,9 +3635,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) { // ExtractValue instructions must be uniform, because the operands are // known to be loop-invariant. - if (auto *EVI = dyn_cast(&I)) { - assert(IsOutOfScope(EVI->getAggregateOperand()) && - "Expected aggregate value to be loop invariant"); + if (auto *EVI = dyn_cast(&I); + EVI && IsOutOfScope(EVI->getAggregateOperand())) { AddToWorklistIfAllowed(EVI); continue; } @@ -5461,10 +5460,13 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount( // and phi nodes. TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; if (isScalarWithPredication(I, VF) && !I->getType()->isVoidTy()) { - ScalarCost += TTI.getScalarizationOverhead( - cast(ToVectorTy(I->getType(), VF)), - APInt::getAllOnes(VF.getFixedValue()), /*Insert*/ true, - /*Extract*/ false, CostKind); + Type *WideTy = ToWideTy(I->getType(), VF); + for (Type *VectorTy : getContainedTypes(WideTy)) { + ScalarCost += TTI.getScalarizationOverhead( + cast(VectorTy), APInt::getAllOnes(VF.getFixedValue()), + /*Insert*/ true, + /*Extract*/ false, CostKind); + } ScalarCost += VF.getFixedValue() * TTI.getCFInstrCost(Instruction::PHI, CostKind); } @@ -5953,13 +5955,17 @@ InstructionCost LoopVectorizationCostModel::getScalarizationOverhead( return 0; InstructionCost Cost = 0; - Type *RetTy = ToVectorTy(I->getType(), VF); + Type *RetTy = ToWideTy(I->getType(), VF); if (!RetTy->isVoidTy() && - (!isa(I) || !TTI.supportsEfficientVectorElementLoadStore())) - Cost += TTI.getScalarizationOverhead( - cast(RetTy), APInt::getAllOnes(VF.getKnownMinValue()), - /*Insert*/ true, - /*Extract*/ false, CostKind); + (!isa(I) || !TTI.supportsEfficientVectorElementLoadStore())) { + + for (Type *VectorTy : getContainedTypes(RetTy)) { + Cost += TTI.getScalarizationOverhead( + cast(VectorTy), APInt::getAllOnes(VF.getKnownMinValue()), + /*Insert*/ true, + /*Extract*/ false, CostKind); + } + } // Some targets keep addresses scalar. if (isa(I) && !TTI.prefersVectorizedAddressing()) @@ -6219,9 +6225,9 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) { bool MaskRequired = Legal->isMaskRequired(CI); // Compute corresponding vector type for return value and arguments. - Type *RetTy = ToVectorTy(ScalarRetTy, VF); + Type *RetTy = ToWideTy(ScalarRetTy, VF); for (Type *ScalarTy : ScalarTys) - Tys.push_back(ToVectorTy(ScalarTy, VF)); + Tys.push_back(ToWideTy(ScalarTy, VF)); // An in-loop reduction using an fmuladd intrinsic is a special case; // we don't want the normal cost for that intrinsic. @@ -6398,7 +6404,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, HasSingleCopyAfterVectorization(I, VF)); VectorTy = RetTy; } else - VectorTy = ToVectorTy(RetTy, VF); + VectorTy = ToWideTy(RetTy, VF); if (VF.isVector() && VectorTy->isVectorTy() && !TTI.getNumberOfParts(VectorTy)) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 6fe30356e8c912..4ca9e43b63aea6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1026,7 +1026,7 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF, Arguments.push_back(V); } - Type *RetTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF); + Type *RetTy = ToWideTy(Ctx.Types.inferScalarType(this), VF); SmallVector ParamTys; for (unsigned I = 0; I != getNumOperands(); ++I) ParamTys.push_back(