Skip to content

Commit

Permalink
Revert "[MemProf] Reduce cloning overhead by sharing nodes when possi…
Browse files Browse the repository at this point in the history
…ble" (#102932)

Reverts llvm/llvm-project#99832

This caused a couple failures in wider testing, reverting for now and
will recommit once they are addressed
  • Loading branch information
teresajohnson authored Aug 12, 2024
1 parent d9caea1 commit 11aa31f
Showing 1 changed file with 15 additions and 109 deletions.
124 changes: 15 additions & 109 deletions llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,16 +242,9 @@ class CallsiteContextGraph {
// recursion.
bool Recursive = false;

// The corresponding allocation or interior call. This is the primary call
// for which we have created this node.
// The corresponding allocation or interior call.
CallInfo Call;

// List of other calls that can be treated the same as the primary call
// through cloning. I.e. located in the same function and have the same
// (possibly pruned) stack ids. They will be updated the same way as the
// primary call when assigning to function clones.
std::vector<CallInfo> MatchingCalls;

// For alloc nodes this is a unique id assigned when constructed, and for
// callsite stack nodes it is the original stack id when the node is
// constructed from the memprof MIB metadata on the alloc nodes. Note that
Expand Down Expand Up @@ -464,9 +457,6 @@ class CallsiteContextGraph {
/// iteration.
MapVector<FuncTy *, std::vector<CallInfo>> FuncToCallsWithMetadata;

/// Records the function each call is located in.
DenseMap<CallInfo, const FuncTy *> CallToFunc;

/// Map from callsite node to the enclosing caller function.
std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;

Expand All @@ -484,8 +474,7 @@ class CallsiteContextGraph {
/// StackIdToMatchingCalls map.
void assignStackNodesPostOrder(
ContextNode *Node, DenseSet<const ContextNode *> &Visited,
DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls,
DenseMap<CallInfo, CallInfo> &CallToMatchingCall);
DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls);

/// Duplicates the given set of context ids, updating the provided
/// map from each original id with the newly generated context ids,
Expand Down Expand Up @@ -1241,11 +1230,10 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,

template <typename DerivedCCG, typename FuncTy, typename CallTy>
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
assignStackNodesPostOrder(
ContextNode *Node, DenseSet<const ContextNode *> &Visited,
DenseMap<uint64_t, std::vector<CallContextInfo>>
&StackIdToMatchingCalls,
DenseMap<CallInfo, CallInfo> &CallToMatchingCall) {
assignStackNodesPostOrder(ContextNode *Node,
DenseSet<const ContextNode *> &Visited,
DenseMap<uint64_t, std::vector<CallContextInfo>>
&StackIdToMatchingCalls) {
auto Inserted = Visited.insert(Node);
if (!Inserted.second)
return;
Expand All @@ -1258,8 +1246,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
// Skip any that have been removed during the recursion.
if (!Edge)
continue;
assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls,
CallToMatchingCall);
assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls);
}

// If this node's stack id is in the map, update the graph to contain new
Expand Down Expand Up @@ -1302,19 +1289,8 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
// Skip any for which we didn't assign any ids, these don't get a node in
// the graph.
if (SavedContextIds.empty()) {
// If this call has a matching call (located in the same function and
// having the same stack ids), simply add it to the context node created
// for its matching call earlier. These can be treated the same through
// cloning and get updated at the same time.
if (!CallToMatchingCall.contains(Call))
continue;
auto MatchingCall = CallToMatchingCall[Call];
assert(NonAllocationCallToContextNodeMap.contains(MatchingCall));
NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
Call);
if (SavedContextIds.empty())
continue;
}

assert(LastId == Ids.back());

Expand Down Expand Up @@ -1446,10 +1422,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
// there is more than one call with the same stack ids. Their (possibly newly
// duplicated) context ids are saved in the StackIdToMatchingCalls map.
DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
// Save a map from each call to any that are found to match it. I.e. located
// in the same function and have the same (possibly pruned) stack ids. We use
// this to avoid creating extra graph nodes as they can be treated the same.
DenseMap<CallInfo, CallInfo> CallToMatchingCall;
for (auto &It : StackIdToMatchingCalls) {
auto &Calls = It.getSecond();
// Skip single calls with a single stack id. These don't need a new node.
Expand Down Expand Up @@ -1488,13 +1460,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
assert(!LastNodeContextIds.empty());

// Map from function to the first call from the below list (with matching
// stack ids) found in that function. Note that calls from different
// functions can have the same stack ids because this is the list of stack
// ids that had (possibly pruned) nodes after building the graph from the
// allocation MIBs.
DenseMap<const FuncTy *, CallInfo> FuncToCallMap;

for (unsigned I = 0; I < Calls.size(); I++) {
auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
assert(SavedContextIds.empty());
Expand Down Expand Up @@ -1568,18 +1533,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
continue;
}

const FuncTy *CallFunc = CallToFunc[Call];

// If the prior call had the same stack ids this map would not be empty.
// Check if we already have a call that "matches" because it is located
// in the same function.
if (FuncToCallMap.contains(CallFunc)) {
// Record the matching call found for this call, and skip it. We
// will subsequently combine it into the same node.
CallToMatchingCall[Call] = FuncToCallMap[CallFunc];
continue;
}

// Check if the next set of stack ids is the same (since the Calls vector
// of tuples is sorted by the stack ids we can just look at the next one).
bool DuplicateContextIds = false;
Expand Down Expand Up @@ -1609,14 +1562,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
set_subtract(LastNodeContextIds, StackSequenceContextIds);
if (LastNodeContextIds.empty())
break;
// No longer possibly in a sequence of calls with duplicate stack ids,
// clear the map.
FuncToCallMap.clear();
} else
// Record the call with its function, so we can locate it the next time
// we find a call from this function when processing the calls with the
// same stack ids.
FuncToCallMap[CallFunc] = Call;
}
}
}

Expand All @@ -1633,8 +1579,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
// associated context ids over to the new nodes.
DenseSet<const ContextNode *> Visited;
for (auto &Entry : AllocationCallToContextNodeMap)
assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls,
CallToMatchingCall);
assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls);
if (VerifyCCG)
check();
}
Expand Down Expand Up @@ -1734,7 +1679,6 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
continue;
if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) {
CallsWithMetadata.push_back(&I);
CallToFunc[&I] = &F;
auto *AllocNode = addAllocNode(&I, &F);
auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite);
assert(CallsiteMD);
Expand All @@ -1756,10 +1700,8 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
I.setMetadata(LLVMContext::MD_callsite, nullptr);
}
// For callsite metadata, add to list for this function for later use.
else if (I.getMetadata(LLVMContext::MD_callsite)) {
else if (I.getMetadata(LLVMContext::MD_callsite))
CallsWithMetadata.push_back(&I);
CallToFunc[&I] = &F;
}
}
}
if (!CallsWithMetadata.empty())
Expand Down Expand Up @@ -1814,10 +1756,8 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
// correlate properly in applyImport in the backends.
if (AN.MIBs.empty())
continue;
IndexCall AllocCall(&AN);
CallsWithMetadata.push_back(AllocCall);
CallToFunc[AllocCall] = FS;
auto *AllocNode = addAllocNode(AllocCall, FS);
CallsWithMetadata.push_back({&AN});
auto *AllocNode = addAllocNode({&AN}, FS);
// Pass an empty CallStack to the CallsiteContext (second)
// parameter, since for ThinLTO we already collapsed out the inlined
// stack ids on the allocation call during ModuleSummaryAnalysis.
Expand Down Expand Up @@ -1848,11 +1788,8 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
}
// For callsite metadata, add to list for this function for later use.
if (!FS->callsites().empty())
for (auto &SN : FS->mutableCallsites()) {
IndexCall StackNodeCall(&SN);
CallsWithMetadata.push_back(StackNodeCall);
CallToFunc[StackNodeCall] = FS;
}
for (auto &SN : FS->mutableCallsites())
CallsWithMetadata.push_back({&SN});

if (!CallsWithMetadata.empty())
FuncToCallsWithMetadata[FS] = CallsWithMetadata;
Expand Down Expand Up @@ -2288,14 +2225,6 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
if (Recursive)
OS << " (recursive)";
OS << "\n";
if (!MatchingCalls.empty()) {
OS << "\tMatchingCalls:\n";
for (auto &MatchingCall : MatchingCalls) {
OS << "\t";
MatchingCall.print(OS);
OS << "\n";
}
}
OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n";
OS << "\tContextIds:";
// Make a copy of the computed context ids that we can sort for stability.
Expand Down Expand Up @@ -2549,7 +2478,6 @@ CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
std::make_unique<ContextNode>(Node->IsAllocation, Node->Call));
ContextNode *Clone = NodeOwner.back().get();
Node->addClone(Clone);
Clone->MatchingCalls = Node->MatchingCalls;
assert(NodeToCallingFunc.count(Node));
NodeToCallingFunc[Clone] = NodeToCallingFunc[Node];
moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true,
Expand Down Expand Up @@ -3093,14 +3021,6 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
if (CallMap.count(Call))
CallClone = CallMap[Call];
CallsiteClone->setCall(CallClone);
// Need to do the same for all matching calls.
for (auto &MatchingCall : Node->MatchingCalls) {
CallInfo CallClone(MatchingCall);
if (CallMap.count(MatchingCall))
CallClone = CallMap[MatchingCall];
// Updates the call in the list.
MatchingCall = CallClone;
}
};

// Keep track of the clones of callsite Node that need to be assigned to
Expand Down Expand Up @@ -3267,16 +3187,6 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
CallInfo NewCall(CallMap[OrigCall]);
assert(NewCall);
NewClone->setCall(NewCall);
// Need to do the same for all matching calls.
for (auto &MatchingCall : NewClone->MatchingCalls) {
CallInfo OrigMatchingCall(MatchingCall);
OrigMatchingCall.setCloneNo(0);
assert(CallMap.count(OrigMatchingCall));
CallInfo NewCall(CallMap[OrigMatchingCall]);
assert(NewCall);
// Updates the call in the list.
MatchingCall = NewCall;
}
}
}
// Fall through to handling below to perform the recording of the
Expand Down Expand Up @@ -3463,7 +3373,6 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {

if (Node->IsAllocation) {
updateAllocationCall(Node->Call, allocTypeToUse(Node->AllocTypes));
assert(Node->MatchingCalls.empty());
return;
}

Expand All @@ -3472,9 +3381,6 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {

auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node];
updateCall(Node->Call, CalleeFunc);
// Update all the matching calls as well.
for (auto &Call : Node->MatchingCalls)
updateCall(Call, CalleeFunc);
};

// Performs DFS traversal starting from allocation nodes to update calls to
Expand Down

0 comments on commit 11aa31f

Please sign in to comment.