-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[SLP][NFC]Store operand entries in the map #140549
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SLP][NFC]Store operand entries in the map #140549
Conversation
Created using spr 1.3.5
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Alexey Bataev (alexey-bataev) ChangesInstead of looking through all the vectorizable tree to find the operand Full diff: https://github.com/llvm/llvm-project/pull/140549.diff 1 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index db4a5713a49a2..5b9ced4561a0c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1886,6 +1886,7 @@ class BoUpSLP {
void deleteTree() {
VectorizableTree.clear();
ScalarToTreeEntries.clear();
+ OperandsToTreeEntry.clear();
ScalarsInSplitNodes.clear();
MustGather.clear();
NonScheduledFirst.clear();
@@ -3401,54 +3402,23 @@ class BoUpSLP {
const SmallDenseSet<unsigned, 8> &NodesToKeepBWs, unsigned &MaxDepthLevel,
bool &IsProfitableToDemote, bool IsTruncRoot) const;
- /// Check if the operands on the edges \p Edges of the \p UserTE allows
- /// reordering (i.e. the operands can be reordered because they have only one
- /// user and reordarable).
+ /// Builds the list of reorderable operands on the edges \p Edges of the \p
+ /// UserTE, which allow reordering (i.e. the operands can be reordered because
+ /// they have only one user and reordarable).
/// \param ReorderableGathers List of all gather nodes that require reordering
/// (e.g., gather of extractlements or partially vectorizable loads).
/// \param GatherOps List of gather operand nodes for \p UserTE that require
/// reordering, subset of \p NonVectorized.
- bool
- canReorderOperands(TreeEntry *UserTE,
- SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
- ArrayRef<TreeEntry *> ReorderableGathers,
- SmallVectorImpl<TreeEntry *> &GatherOps);
+ void buildReorderableOperands(
+ TreeEntry *UserTE,
+ SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
+ const SmallPtrSetImpl<const TreeEntry *> &ReorderableGathers,
+ SmallVectorImpl<TreeEntry *> &GatherOps);
/// Checks if the given \p TE is a gather node with clustered reused scalars
/// and reorders it per given \p Mask.
void reorderNodeWithReuses(TreeEntry &TE, ArrayRef<int> Mask) const;
- /// Returns vectorized operand \p OpIdx of the node \p UserTE from the graph,
- /// if any. If it is not vectorized (gather node), returns nullptr.
- TreeEntry *getVectorizedOperand(TreeEntry *UserTE, unsigned OpIdx) {
- ArrayRef<Value *> VL = UserTE->getOperand(OpIdx);
- TreeEntry *TE = nullptr;
- const auto *It = find_if(VL, [&](Value *V) {
- if (!isa<Instruction>(V))
- return false;
- for (TreeEntry *E : getTreeEntries(V)) {
- if (E->UserTreeIndex == EdgeInfo(UserTE, OpIdx)) {
- TE = E;
- return true;
- }
- }
- return false;
- });
- if (It != VL.end()) {
- assert(TE->isSame(VL) && "Expected same scalars.");
- return TE;
- }
- return nullptr;
- }
-
- /// Returns vectorized operand \p OpIdx of the node \p UserTE from the graph,
- /// if any. If it is not vectorized (gather node), returns nullptr.
- const TreeEntry *getVectorizedOperand(const TreeEntry *UserTE,
- unsigned OpIdx) const {
- return const_cast<BoUpSLP *>(this)->getVectorizedOperand(
- const_cast<TreeEntry *>(UserTE), OpIdx);
- }
-
/// Checks if all users of \p I are the part of the vectorization tree.
bool areAllUsersVectorized(
Instruction *I,
@@ -3509,19 +3479,6 @@ class BoUpSLP {
/// Vectorize a single entry in the tree.
Value *vectorizeTree(TreeEntry *E);
- /// Returns vectorized operand node, that matches the order of the scalars
- /// operand number \p NodeIdx in entry \p E.
- TreeEntry *getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx,
- ArrayRef<Value *> VL,
- const InstructionsState &S);
- const TreeEntry *
- getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx,
- ArrayRef<Value *> VL,
- const InstructionsState &S) const {
- return const_cast<BoUpSLP *>(this)->getMatchedVectorizedOperand(E, NodeIdx,
- VL, S);
- }
-
/// Vectorize a single entry in the tree, the \p Idx-th operand of the entry
/// \p E.
Value *vectorizeOperand(TreeEntry *E, unsigned NodeIdx);
@@ -3715,11 +3672,6 @@ class BoUpSLP {
return IsSame(Scalars, ReuseShuffleIndices);
}
- bool isOperandGatherNode(const EdgeInfo &UserEI) const {
- return isGather() && UserTreeIndex.EdgeIdx == UserEI.EdgeIdx &&
- UserTreeIndex.UserTE == UserEI.UserTE;
- }
-
/// \returns true if current entry has same operands as \p TE.
bool hasEqualOperands(const TreeEntry &TE) const {
if (TE.getNumOperands() != getNumOperands())
@@ -4107,6 +4059,9 @@ class BoUpSLP {
TreeEntry *Last = VectorizableTree.back().get();
Last->Idx = VectorizableTree.size() - 1;
Last->State = EntryState;
+ if (UserTreeIdx.UserTE)
+ OperandsToTreeEntry.try_emplace(
+ std::make_pair(UserTreeIdx.UserTE, UserTreeIdx.EdgeIdx), Last);
// FIXME: Remove once support for ReuseShuffleIndices has been implemented
// for non-power-of-two vectors.
assert(
@@ -4298,6 +4253,10 @@ class BoUpSLP {
/// Maps a specific scalar to its tree entry(ies).
SmallDenseMap<Value *, SmallVector<TreeEntry *>> ScalarToTreeEntries;
+ /// Maps the operand index and entry to the corresponding tree entry.
+ SmallDenseMap<std::pair<const TreeEntry *, unsigned>, TreeEntry *>
+ OperandsToTreeEntry;
+
/// Scalars, used in split vectorize nodes.
SmallDenseMap<Value *, SmallVector<TreeEntry *>> ScalarsInSplitNodes;
@@ -7411,11 +7370,11 @@ void BoUpSLP::reorderTopToBottom() {
}
}
-bool BoUpSLP::canReorderOperands(
+void BoUpSLP::buildReorderableOperands(
TreeEntry *UserTE, SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
- ArrayRef<TreeEntry *> ReorderableGathers,
+ const SmallPtrSetImpl<const TreeEntry *> &ReorderableGathers,
SmallVectorImpl<TreeEntry *> &GatherOps) {
- for (unsigned I = 0, E = UserTE->getNumOperands(); I < E; ++I) {
+ for (unsigned I : seq<unsigned>(UserTE->getNumOperands())) {
if (any_of(Edges, [I](const std::pair<unsigned, TreeEntry *> &OpData) {
return OpData.first == I &&
(OpData.second->State == TreeEntry::Vectorize ||
@@ -7424,7 +7383,25 @@ bool BoUpSLP::canReorderOperands(
OpData.second->State == TreeEntry::SplitVectorize);
}))
continue;
- if (TreeEntry *TE = getVectorizedOperand(UserTE, I)) {
+ // Do not request operands, if they do not exist.
+ if (UserTE->hasState()) {
+ if (UserTE->getOpcode() == Instruction::ExtractElement ||
+ UserTE->getOpcode() == Instruction::ExtractValue)
+ continue;
+ if (UserTE->getOpcode() == Instruction::InsertElement && I == 0)
+ continue;
+ if (UserTE->getOpcode() == Instruction::Store &&
+ UserTE->State == TreeEntry::Vectorize && I == 1)
+ continue;
+ if (UserTE->getOpcode() == Instruction::Load &&
+ (UserTE->State == TreeEntry::Vectorize ||
+ UserTE->State == TreeEntry::StridedVectorize ||
+ UserTE->State == TreeEntry::CompressVectorize))
+ continue;
+ }
+ TreeEntry *TE = getOperandEntry(UserTE, I);
+ assert(TE && "Expected operand entry.");
+ if (!TE->isGather()) {
// Add the node to the list of the ordered nodes with the identity
// order.
Edges.emplace_back(I, TE);
@@ -7433,37 +7410,14 @@ bool BoUpSLP::canReorderOperands(
// simply add to the list of gathered ops.
// If there are reused scalars, process this node as a regular vectorize
// node, just reorder reuses mask.
- if (TE->State != TreeEntry::Vectorize &&
- TE->State != TreeEntry::StridedVectorize &&
- TE->State != TreeEntry::CompressVectorize &&
- TE->State != TreeEntry::SplitVectorize &&
+ if (TE->State == TreeEntry::ScatterVectorize &&
TE->ReuseShuffleIndices.empty() && TE->ReorderIndices.empty())
GatherOps.push_back(TE);
continue;
}
- TreeEntry *Gather = nullptr;
- if (count_if(ReorderableGathers,
- [&Gather, UserTE, I](TreeEntry *TE) {
- assert(TE->State != TreeEntry::Vectorize &&
- TE->State != TreeEntry::StridedVectorize &&
- TE->State != TreeEntry::CompressVectorize &&
- TE->State != TreeEntry::SplitVectorize &&
- "Only non-vectorized nodes are expected.");
- if (TE->UserTreeIndex.UserTE == UserTE &&
- TE->UserTreeIndex.EdgeIdx == I) {
- assert(TE->isSame(UserTE->getOperand(I)) &&
- "Operand entry does not match operands.");
- Gather = TE;
- return true;
- }
- return false;
- }) > 1 &&
- !allConstant(UserTE->getOperand(I)))
- return false;
- if (Gather)
- GatherOps.push_back(Gather);
+ if (ReorderableGathers.contains(TE))
+ GatherOps.push_back(TE);
}
- return true;
}
void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
@@ -7479,13 +7433,13 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
// Find all reorderable leaf nodes with the given VF.
// Currently the are vectorized loads,extracts without alternate operands +
// some gathering of extracts.
- SmallVector<TreeEntry *> NonVectorized;
+ SmallPtrSet<const TreeEntry *, 4> NonVectorized;
for (const std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
if (TE->State != TreeEntry::Vectorize &&
TE->State != TreeEntry::StridedVectorize &&
TE->State != TreeEntry::CompressVectorize &&
TE->State != TreeEntry::SplitVectorize)
- NonVectorized.push_back(TE.get());
+ NonVectorized.insert(TE.get());
if (std::optional<OrdersType> CurrentOrder =
getReorderingData(*TE, /*TopToBottom=*/false, IgnoreReorder)) {
Queue.push(TE.get());
@@ -7584,11 +7538,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
}
// Check that operands are used only in the User node.
SmallVector<TreeEntry *> GatherOps;
- if (!canReorderOperands(Data.first, Data.second, NonVectorized,
- GatherOps)) {
- Visited.insert_range(llvm::make_second_range(Data.second));
- continue;
- }
+ buildReorderableOperands(Data.first, Data.second, NonVectorized,
+ GatherOps);
// All operands are reordered and used only in this node - propagate the
// most used order to the user node.
MapVector<OrdersType, unsigned,
@@ -12916,33 +12867,9 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
const BoUpSLP::TreeEntry *BoUpSLP::getOperandEntry(const TreeEntry *E,
unsigned Idx) const {
- ArrayRef<Value *> VL = E->getOperand(Idx);
- InstructionsState S = getSameOpcode(VL, *TLI);
- // Special processing for GEPs bundle, which may include non-gep values.
- if (!S && VL.front()->getType()->isPointerTy()) {
- const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
- if (It != VL.end())
- S = getSameOpcode(*It, *TLI);
- }
- if (const TreeEntry *VE = getMatchedVectorizedOperand(E, Idx, VL, S))
- return VE;
- if (S || !isConstant(VL.front())) {
- for (const TreeEntry *VE :
- ValueToGatherNodes.lookup(S ? S.getMainOp() : VL.front()))
- if (VE->UserTreeIndex.EdgeIdx == Idx && VE->UserTreeIndex.UserTE == E) {
- assert(VE->isSame(VL) && "Expected gather node with same values.");
- return VE;
- }
- }
- const auto *It = find_if(ArrayRef(VectorizableTree).drop_front(E->Idx + 1),
- [&](const std::unique_ptr<TreeEntry> &TE) {
- return (TE->isGather() ||
- TE->State == TreeEntry::SplitVectorize) &&
- TE->UserTreeIndex.EdgeIdx == Idx &&
- TE->UserTreeIndex.UserTE == E;
- });
- assert(It != VectorizableTree.end() && "Expected vectorizable entry.");
- return It->get();
+ TreeEntry *Op = OperandsToTreeEntry.at({E, Idx});
+ assert(Op->isSame(E->getOperand(Idx)) && "Operands mismatch!");
+ return Op;
}
TTI::CastContextHint BoUpSLP::getCastContextHint(const TreeEntry &TE) const {
@@ -16914,121 +16841,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
}
};
-BoUpSLP::TreeEntry *
-BoUpSLP::getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx,
- ArrayRef<Value *> VL,
- const InstructionsState &S) {
- if (!S)
- return nullptr;
- for (TreeEntry *TE : ScalarToTreeEntries.lookup(S.getMainOp()))
- if (TE->UserTreeIndex.UserTE == E && TE->UserTreeIndex.EdgeIdx == NodeIdx &&
- TE->isSame(VL))
- return TE;
- return nullptr;
-}
-
Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) {
- ValueList &VL = E->getOperand(NodeIdx);
- InstructionsState S = getSameOpcode(VL, *TLI);
- // Special processing for GEPs bundle, which may include non-gep values.
- if (!S && VL.front()->getType()->isPointerTy()) {
- const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
- if (It != VL.end())
- S = getSameOpcode(*It, *TLI);
- }
- const unsigned VF = VL.size();
- if (TreeEntry *VE = getMatchedVectorizedOperand(E, NodeIdx, VL, S)) {
- auto FinalShuffle = [&](Value *V, ArrayRef<int> Mask) {
- // V may be affected by MinBWs.
- // We want ShuffleInstructionBuilder to correctly support REVEC. The key
- // factor is the number of elements, not their type.
- Type *ScalarTy = cast<VectorType>(V->getType())->getElementType();
- unsigned NumElements = getNumElements(VL.front()->getType());
- ShuffleInstructionBuilder ShuffleBuilder(
- NumElements != 1 ? FixedVectorType::get(ScalarTy, NumElements)
- : ScalarTy,
- Builder, *this);
- ShuffleBuilder.add(V, Mask);
- SmallVector<std::pair<const TreeEntry *, unsigned>> SubVectors(
- E->CombinedEntriesWithIndices.size());
- transform(E->CombinedEntriesWithIndices, SubVectors.begin(),
- [&](const auto &P) {
- return std::make_pair(VectorizableTree[P.first].get(),
- P.second);
- });
- assert((E->CombinedEntriesWithIndices.empty() ||
- E->ReorderIndices.empty()) &&
- "Expected either combined subnodes or reordering");
- return ShuffleBuilder.finalize({}, SubVectors, {});
- };
- Value *V = vectorizeTree(VE);
- if (VF * getNumElements(VL[0]->getType()) !=
- cast<FixedVectorType>(V->getType())->getNumElements()) {
- if (!VE->ReuseShuffleIndices.empty()) {
- // Reshuffle to get only unique values.
- // If some of the scalars are duplicated in the vectorization
- // tree entry, we do not vectorize them but instead generate a
- // mask for the reuses. But if there are several users of the
- // same entry, they may have different vectorization factors.
- // This is especially important for PHI nodes. In this case, we
- // need to adapt the resulting instruction for the user
- // vectorization factor and have to reshuffle it again to take
- // only unique elements of the vector. Without this code the
- // function incorrectly returns reduced vector instruction with
- // the same elements, not with the unique ones.
-
- // block:
- // %phi = phi <2 x > { .., %entry} {%shuffle, %block}
- // %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0>
- // ... (use %2)
- // %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0}
- // br %block
- SmallVector<int> Mask(VF, PoisonMaskElem);
- for (auto [I, V] : enumerate(VL)) {
- if (isa<PoisonValue>(V))
- continue;
- Mask[I] = VE->findLaneForValue(V);
- }
- V = FinalShuffle(V, Mask);
- } else {
- assert(VF < cast<FixedVectorType>(V->getType())->getNumElements() &&
- "Expected vectorization factor less "
- "than original vector size.");
- SmallVector<int> UniformMask(VF, 0);
- std::iota(UniformMask.begin(), UniformMask.end(), 0);
- V = FinalShuffle(V, UniformMask);
- }
- }
- // Need to update the operand gather node, if actually the operand is not a
- // vectorized node, but the buildvector/gather node, which matches one of
- // the vectorized nodes.
- if (VE->UserTreeIndex.UserTE != E || VE->UserTreeIndex.EdgeIdx != NodeIdx) {
- auto *It = find_if(ArrayRef(VectorizableTree).drop_front(E->Idx + 1),
- [&](const std::unique_ptr<TreeEntry> &TE) {
- return TE->isGather() &&
- TE->UserTreeIndex.UserTE == E &&
- TE->UserTreeIndex.EdgeIdx == NodeIdx;
- });
- assert(It != VectorizableTree.end() && "Expected gather node operand.");
- (*It)->VectorizedValue = V;
- }
- return V;
- }
-
- // Find the corresponding gather entry and vectorize it.
- // Allows to be more accurate with tree/graph transformations, checks for the
- // correctness of the transformations in many cases.
- auto *I = find_if(ArrayRef(VectorizableTree).drop_front(E->Idx + 1),
- [E, NodeIdx](const std::unique_ptr<TreeEntry> &TE) {
- return TE->isOperandGatherNode({E, NodeIdx}) ||
- (TE->State == TreeEntry::SplitVectorize &&
- TE->UserTreeIndex == EdgeInfo(E, NodeIdx));
- });
- assert(I != VectorizableTree.end() && "Gather node is not in the graph.");
- assert(I->get()->UserTreeIndex &&
- "Expected only single user for the gather node.");
- assert(I->get()->isSame(VL) && "Expected same list of scalars.");
- return vectorizeTree(I->get());
+ return vectorizeTree(getOperandEntry(E, NodeIdx));
}
template <typename BVTy, typename ResTy, typename... Args>
|
Instead of looking through all the vectorizable tree to find the operand entry, better to store it in a separate map and perform quick lookup, basing on user tree entry and operand index. It allows to remove lots of duplicated code, simplify processing and fix potential future issues with the analysis, affected by the codegen. Also, improves compile time. Reviewers: HanKuanChen, RKSimon, hiraditya Reviewed By: hiraditya Pull Request: llvm/llvm-project#140549
Instead of looking through all the vectorizable tree to find the operand entry, better to store it in a separate map and perform quick lookup, basing on user tree entry and operand index. It allows to remove lots of duplicated code, simplify processing and fix potential future issues with the analysis, affected by the codegen. Also, improves compile time. Reviewers: HanKuanChen, RKSimon, hiraditya Reviewed By: hiraditya Pull Request: llvm#140549
Instead of looking through all the vectorizable tree to find the operand entry, better to store it in a separate map and perform quick lookup, basing on user tree entry and operand index. It allows to remove lots of duplicated code, simplify processing and fix potential future issues with the analysis, affected by the codegen. Also, improves compile time. Reviewers: HanKuanChen, RKSimon, hiraditya Reviewed By: hiraditya Pull Request: llvm#140549
Instead of looking through all the vectorizable tree to find the operand
entry, better to store it in a separate map and perform quick lookup,
basing on user tree entry and operand index.
It allows to remove lots of duplicated code, simplify processing and fix
potential future issues with the analysis, affected by the codegen.
Also, improves compile time.