From 02d6aad5cc940f17904c1288dfabc3fd2d439279 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Tue, 24 Sep 2024 16:18:48 -0700 Subject: [PATCH] [MemProf] Reduce unnecessary context id computation (NFC) (#109857) One of the memory reduction techniques was to compute node context ids on the fly. This reduced memory at the expense of some compile time increase. For a large binary we were spending a lot of time invoking getContextIds on the node during assignStackNodesPostOrder, because we were iterating through the stack ids for a call from leaf to root (first to last node in the parlance used in that code). However, all calls for a given entry in the StackIdToMatchingCalls map share the same last node, so we can borrow the approach used by similar code in updateStackNodes and compute the context ids on the last node once, then iterate each call's stack ids in reverse order while reusing the last node's context ids. This reduced the thin link time by 43% for a large target. It isn't clear why there wasn't a similar increase measured when introducing the node context id recomputation, but the compile time was longer to start with then. --- .../IPO/MemProfContextDisambiguation.cpp | 44 +++++++++++++------ 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 6927fe538e367..576a31f8b86ae 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -1362,12 +1362,22 @@ void CallsiteContextGraph:: } } +#ifndef NDEBUG // Find the node for the last stack id, which should be the same // across all calls recorded for this id, and is this node's id. uint64_t LastId = Node->OrigStackOrAllocId; ContextNode *LastNode = getNodeForStackId(LastId); // We should only have kept stack ids that had nodes. assert(LastNode); + assert(LastNode == Node); +#else + ContextNode *LastNode = Node; +#endif + + // Compute the last node's context ids once, as it is shared by all calls in + // this entry. + DenseSet LastNodeContextIds = LastNode->getContextIds(); + assert(!LastNodeContextIds.empty()); for (unsigned I = 0; I < Calls.size(); I++) { auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; @@ -1389,40 +1399,43 @@ void CallsiteContextGraph:: assert(LastId == Ids.back()); - ContextNode *FirstNode = getNodeForStackId(Ids[0]); - assert(FirstNode); - // Recompute the context ids for this stack id sequence (the // intersection of the context ids of the corresponding nodes). // Start with the ids we saved in the map for this call, which could be // duplicated context ids. We have to recompute as we might have overlap // overlap between the saved context ids for different last nodes, and // removed them already during the post order traversal. - set_intersect(SavedContextIds, FirstNode->getContextIds()); - ContextNode *PrevNode = nullptr; - for (auto Id : Ids) { + set_intersect(SavedContextIds, LastNodeContextIds); + ContextNode *PrevNode = LastNode; + bool Skip = false; + // Iterate backwards through the stack Ids, starting after the last Id + // in the list, which was handled once outside for all Calls. + for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) { + auto Id = *IdIter; ContextNode *CurNode = getNodeForStackId(Id); // We should only have kept stack ids that had nodes and weren't // recursive. assert(CurNode); assert(!CurNode->Recursive); - if (!PrevNode) { - PrevNode = CurNode; - continue; - } - auto *Edge = CurNode->findEdgeFromCallee(PrevNode); + + auto *Edge = CurNode->findEdgeFromCaller(PrevNode); if (!Edge) { - SavedContextIds.clear(); + Skip = true; break; } PrevNode = CurNode; + + // Update the context ids, which is the intersection of the ids along + // all edges in the sequence. set_intersect(SavedContextIds, Edge->getContextIds()); // If we now have no context ids for clone, skip this call. - if (SavedContextIds.empty()) + if (SavedContextIds.empty()) { + Skip = true; break; + } } - if (SavedContextIds.empty()) + if (Skip) continue; // Create new context node. @@ -1433,6 +1446,9 @@ void CallsiteContextGraph:: NonAllocationCallToContextNodeMap[Call] = NewNode; NewNode->AllocTypes = computeAllocType(SavedContextIds); + ContextNode *FirstNode = getNodeForStackId(Ids[0]); + assert(FirstNode); + // Connect to callees of innermost stack frame in inlined call chain. // This updates context ids for FirstNode's callee's to reflect those // moved to NewNode.