-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[PromoteMem2Reg] Optimize memory usage in PromoteMem2Reg #142474
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[PromoteMem2Reg] Optimize memory usage in PromoteMem2Reg #142474
Conversation
Created using spr 1.3.6 [skip ci]
Created using spr 1.3.6
When BasicBlock has a large number of allocas, and successors, we had to copy entire IncomingVals and IncomingLocs vectors for successors. Additional changes in IncomingVals and IncomingLocs are infrequent (only Load/Store into alloc affect arrays). Given the nature of DFS traversal, instead of copying the entire vector, we can keep track of the changes and undo all changes done by successors. Pull Request: llvm#142474
@llvm/pr-subscribers-llvm-transforms Author: Vitaly Buka (vitalybuka) ChangesWhen BasicBlock has a large number of allocas, and Additional changes in IncomingVals and Given the nature of DFS traversal, instead of copying Full diff: https://github.com/llvm/llvm-project/pull/142474.diff 1 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 9ddcbd516e00a..3220f57aeeade 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -281,18 +281,44 @@ struct AllocaInfo {
}
};
+template <typename T> class VectorWithUndo {
+ SmallVector<T, 8> Vals;
+ SmallVector<std::pair<size_t, T>, 8> Undo;
+
+public:
+ void undo(size_t S) {
+ while (S < Undo.size()) {
+ Vals[Undo.back().first] = Undo.back().second;
+ Undo.pop_back();
+ }
+ }
+
+ void assign(size_t Sz, const T &Val) { Vals.assign(Sz, Val); }
+
+ size_t size() const { return Undo.size(); }
+
+ const T &operator[](size_t Idx) const { return Vals[Idx]; }
+
+ void set(size_t Idx, const T &Val) {
+ if (Vals[Idx] == Val)
+ return;
+ Undo.emplace_back(Idx, Vals[Idx]);
+ Vals[Idx] = Val;
+ }
+
+ void init(size_t Idx, const T &Val) { Vals[Idx] = Val; }
+};
+
/// Data package used by RenamePass().
struct RenamePassData {
- using ValVector = std::vector<Value *>;
- using LocationVector = std::vector<DebugLoc>;
-
- RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V, LocationVector L)
- : BB(B), Pred(P), Values(std::move(V)), Locations(std::move(L)) {}
+ RenamePassData(BasicBlock *B, BasicBlock *P, size_t V, size_t L)
+ : BB(B), Pred(P), UndoVals(V), UndoLocs(L) {}
BasicBlock *BB;
BasicBlock *Pred;
- ValVector Values;
- LocationVector Locations;
+
+ size_t UndoVals;
+ size_t UndoLocs;
};
/// This assigns and keeps a per-bb relative ordering of load/store
@@ -393,10 +419,10 @@ struct PromoteMem2Reg {
SmallVector<unsigned> BBNumPreds;
/// The state of incoming values for the current DFS step.
- RenamePassData::ValVector IncomingVals;
+ VectorWithUndo<Value *> IncomingVals;
/// The state of incoming locations for the current DFS step.
- RenamePassData::LocationVector IncomingLocs;
+ VectorWithUndo<DebugLoc> IncomingLocs;
// DFS work stack.
SmallVector<RenamePassData, 8> WorkList;
@@ -445,17 +471,15 @@ struct PromoteMem2Reg {
DVRAssignsToDelete.clear();
}
- void pushToWorklist(BasicBlock *BB, BasicBlock *Pred,
- RenamePassData::ValVector IncVals,
- RenamePassData::LocationVector IncLocs) {
- WorkList.emplace_back(BB, Pred, std::move(IncVals), std::move(IncLocs));
+ void pushToWorklist(BasicBlock *BB, BasicBlock *Pred) {
+ WorkList.emplace_back(BB, Pred, IncomingVals.size(), IncomingVals.size());
}
RenamePassData popFromWorklist() {
- RenamePassData R = std::move(WorkList.back());
+ RenamePassData R = WorkList.back();
WorkList.pop_back();
- IncomingVals = std::move(R.Values);
- IncomingLocs = std::move(R.Locations);
+ IncomingVals.undo(R.UndoVals);
+ IncomingLocs.undo(R.UndoLocs);
return R;
}
};
@@ -871,22 +895,20 @@ void PromoteMem2Reg::run() {
// been stored yet. In this case, it will get this null value.
IncomingVals.assign(Allocas.size(), nullptr);
for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
- IncomingVals[i] = UndefValue::get(Allocas[i]->getAllocatedType());
+ IncomingVals.init(i, UndefValue::get(Allocas[i]->getAllocatedType()));
// When handling debug info, treat all incoming values as if they have unknown
// locations until proven otherwise.
IncomingLocs.assign(Allocas.size(), {});
// The renamer uses the Visited set to avoid infinite loops.
- Visited.resize(F.getMaxBlockNumber());
+ Visited.resize(F.getMaxBlockNumber(), false);
+
+ // Add the entry block to the worklist, with a null predecessor.
+ pushToWorklist(&F.front(), nullptr);
- // Walks all basic blocks in the function performing the SSA rename algorithm
- // and inserting the phi nodes we marked as necessary
- pushToWorklist(&F.front(), nullptr, std::move(IncomingVals),
- std::move(IncomingLocs));
do {
RenamePassData RPD = popFromWorklist();
- // RenamePass may add new worklist entries.
RenamePass(RPD.BB, RPD.Pred);
} while (!WorkList.empty());
@@ -1153,7 +1175,7 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred) {
APN->setHasNoSignedZeros(true);
// The currently active variable for this block is now the PHI.
- IncomingVals[AllocaNo] = APN;
+ IncomingVals.set(AllocaNo, APN);
AllocaATInfo[AllocaNo].updateForNewPhi(APN, DIB);
auto ConvertDbgDeclares = [&](auto &Container) {
for (auto *DbgItem : Container)
@@ -1211,10 +1233,10 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred) {
// what value were we writing?
unsigned AllocaNo = ai->second;
- IncomingVals[AllocaNo] = SI->getOperand(0);
+ IncomingVals.set(AllocaNo, SI->getOperand(0));
// Record debuginfo for the store before removing it.
- IncomingLocs[AllocaNo] = SI->getDebugLoc();
+ IncomingLocs.set(AllocaNo, SI->getDebugLoc());
AllocaATInfo[AllocaNo].updateForDeletedStore(SI, DIB, &DbgAssignsToDelete,
&DVRAssignsToDelete);
auto ConvertDbgDeclares = [&](auto &Container) {
@@ -1234,16 +1256,8 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred) {
SmallPtrSet<BasicBlock *, 8> VisitedSuccs;
for (BasicBlock *S : reverse(successors(BB)))
- if (VisitedSuccs.insert(S).second) {
- if (VisitedSuccs.size() == 1) {
- // Let the first successor to own allocated arrays.
- pushToWorklist(S, BB, std::move(IncomingVals), std::move(IncomingLocs));
- } else {
- // Other successors have to make a copy.
- pushToWorklist(S, BB, WorkList.back().Values,
- WorkList.back().Locations);
- }
- }
+ if (VisitedSuccs.insert(S).second)
+ pushToWorklist(S, BB);
}
void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
|
Created using spr 1.3.6 [skip ci]
When BasicBlock has a large number of allocas, and successors, we had to copy entire IncomingVals and IncomingLocs vectors for successors. Additional changes in IncomingVals and IncomingLocs are infrequent (only Load/Store into alloc affect arrays). Given the nature of DFS traversal, instead of copying the entire vector, we can keep track of the changes and undo all changes done by successors. Pull Request: llvm#142474
When BasicBlock has a large number of allocas, and successors, we had to copy entire IncomingVals and IncomingLocs vectors for successors. Additional changes in IncomingVals and IncomingLocs are infrequent (only Load/Store into alloc affect arrays). Given the nature of DFS traversal, instead of copying the entire vector, we can keep track of the changes and undo all changes done by successors. Pull Request: llvm#142474
When BasicBlock has a large number of allocas, and successors, we had to copy entire IncomingVals and IncomingLocs vectors for successors. Additional changes in IncomingVals and IncomingLocs are infrequent (only Load/Store into alloc affect arrays). Given the nature of DFS traversal, instead of copying the entire vector, we can keep track of the changes and undo all changes done by successors. Pull Request: llvm#142474
Created using spr 1.3.6
Created using spr 1.3.6 [skip ci]
When BasicBlock has a large number of allocas, and successors, we had to copy entire IncomingVals and IncomingLocs vectors for successors. Additional changes in IncomingVals and IncomingLocs are infrequent (only Load/Store into alloc affect arrays). Given the nature of DFS traversal, instead of copying the entire vector, we can keep track of the changes and undo all changes done by successors. Pull Request: llvm#142474
Created using spr 1.3.6 [skip ci]
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Created using spr 1.3.6 [skip ci]
When BasicBlock has a large number of allocas, and successors, we had to copy entire IncomingVals and IncomingLocs vectors for successors. Additional changes in IncomingVals and IncomingLocs are infrequent (only Load/Store into alloc affect arrays). Given the nature of DFS traversal, instead of copying the entire vector, we can keep track of the changes and undo all changes done by successors. Pull Request: llvm#142474
Created using spr 1.3.6 [skip ci]
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/186/builds/9601 Here is the relevant piece of the build log for the reference
|
When BasicBlock has a large number of allocas, and
successors, we had to copy entire IncomingVals and
IncomingLocs vectors for successors.
Additional changes in IncomingVals and
IncomingLocs are infrequent (only Load/Store into
alloc affect arrays).
Given the nature of DFS traversal, instead of copying
the entire vector, we can keep track of the changes
and undo all changes done by successors.
Fixes #142461
On the attached to issue #142461 IR RSS drops from 35Gb to 2Gb.
But it does not affect compile time on average
https://llvm-compile-time-tracker.com/compare.php?from=2e98ed8caa0b47ee79af4ad24b5436a89fe49dfa&to=effac6d1fd600e544f8bc21382c7e541973b1378&stat=instructions:u