Skip to content

Revert "[MemProf] Optionally save context size info on largest cold allocations" #142688

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 3, 2025

Conversation

teresajohnson
Copy link
Contributor

Reverts #142507 due to buildbot failures that I will look into tomorrow.

@llvmbot llvmbot added PGO Profile Guided Optimizations LTO Link time optimization (regular/full LTO or ThinLTO) llvm:analysis llvm:transforms labels Jun 3, 2025
@teresajohnson teresajohnson merged commit 6c1091e into main Jun 3, 2025
11 of 14 checks passed
@teresajohnson teresajohnson deleted the revert-142507-memprof_max_cold_thresh branch June 3, 2025 23:05
@llvmbot
Copy link
Member

llvmbot commented Jun 3, 2025

@llvm/pr-subscribers-llvm-analysis
@llvm/pr-subscribers-lto

@llvm/pr-subscribers-llvm-transforms

Author: Teresa Johnson (teresajohnson)

Changes

Reverts llvm/llvm-project#142507 due to buildbot failures that I will look into tomorrow.


Patch is 29.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142688.diff

9 Files Affected:

  • (modified) llvm/include/llvm/Analysis/MemoryProfileInfo.h (+1-18)
  • (modified) llvm/lib/Analysis/MemoryProfileInfo.cpp (+9-39)
  • (modified) llvm/lib/Analysis/ModuleSummaryAnalysis.cpp (+3-22)
  • (modified) llvm/lib/Bitcode/Reader/BitcodeReader.cpp (-8)
  • (modified) llvm/lib/Bitcode/Writer/BitcodeWriter.cpp (+8-18)
  • (modified) llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp (+3-2)
  • (modified) llvm/lib/Transforms/Instrumentation/MemProfiler.cpp (+16-9)
  • (removed) llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll (-73)
  • (removed) llvm/test/Transforms/PGOProfile/memprof_max_cold_threshold.test (-163)
diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
index b042a717e4e49..8cbb8673b69f5 100644
--- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h
+++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
@@ -24,18 +24,6 @@ class OptimizationRemarkEmitter;
 
 namespace memprof {
 
-/// Whether the alloc memeprof metadata will include context size info for all
-/// MIBs.
-LLVM_ABI bool metadataIncludesAllContextSizeInfo();
-
-/// Whether the alloc memprof metadata may include context size info for some
-/// MIBs (but possibly not all).
-LLVM_ABI bool metadataMayIncludeContextSizeInfo();
-
-/// Whether we need to record the context size info in the alloc trie used to
-/// build metadata.
-LLVM_ABI bool recordContextSizeInfoForAnalysis();
-
 /// Build callstack metadata from the provided list of call stack ids. Returns
 /// the resulting metadata node.
 LLVM_ABI MDNode *buildCallstackMetadata(ArrayRef<uint64_t> CallStack,
@@ -99,9 +87,6 @@ class CallStackTrie {
   // allocations for which we apply non-context sensitive allocation hints.
   OptimizationRemarkEmitter *ORE;
 
-  // The maximum size of a cold allocation context, from the profile summary.
-  uint64_t MaxColdSize;
-
   void deleteTrieNode(CallStackTrieNode *Node) {
     if (!Node)
       return;
@@ -128,9 +113,7 @@ class CallStackTrie {
                      uint64_t &ColdBytes);
 
 public:
-  CallStackTrie(OptimizationRemarkEmitter *ORE = nullptr,
-                uint64_t MaxColdSize = 0)
-      : ORE(ORE), MaxColdSize(MaxColdSize) {}
+  CallStackTrie(OptimizationRemarkEmitter *ORE = nullptr) : ORE(ORE) {}
   ~CallStackTrie() { deleteTrieNode(Alloc); }
 
   bool empty() const { return Alloc == nullptr; }
diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp
index c08024a38ffc2..347377522101a 100644
--- a/llvm/lib/Analysis/MemoryProfileInfo.cpp
+++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp
@@ -46,25 +46,6 @@ cl::opt<unsigned> MinCallsiteColdBytePercent(
     cl::desc("Min percent of cold bytes at a callsite to discard non-cold "
              "contexts"));
 
-// Enable saving context size information for largest cold contexts, which can
-// be used to flag contexts for more aggressive cloning and reporting.
-cl::opt<unsigned> MinPercentMaxColdSize(
-    "memprof-min-percent-max-cold-size", cl::init(100), cl::Hidden,
-    cl::desc("Min percent of max cold bytes for critical cold context"));
-
-bool llvm::memprof::metadataIncludesAllContextSizeInfo() {
-  return MemProfReportHintedSizes || MinClonedColdBytePercent < 100;
-}
-
-bool llvm::memprof::metadataMayIncludeContextSizeInfo() {
-  return metadataIncludesAllContextSizeInfo() || MinPercentMaxColdSize < 100;
-}
-
-bool llvm::memprof::recordContextSizeInfoForAnalysis() {
-  return metadataMayIncludeContextSizeInfo() ||
-         MinCallsiteColdBytePercent < 100;
-}
-
 MDNode *llvm::memprof::buildCallstackMetadata(ArrayRef<uint64_t> CallStack,
                                               LLVMContext &Ctx) {
   SmallVector<Metadata *, 8> StackVals;
@@ -187,8 +168,7 @@ void CallStackTrie::addCallStack(MDNode *MIB) {
 static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
                              AllocationType AllocType,
                              ArrayRef<ContextTotalSize> ContextSizeInfo,
-                             const uint64_t MaxColdSize, uint64_t &TotalBytes,
-                             uint64_t &ColdBytes) {
+                             uint64_t &TotalBytes, uint64_t &ColdBytes) {
   SmallVector<Metadata *> MIBPayload(
       {buildCallstackMetadata(MIBCallStack, Ctx)});
   MIBPayload.push_back(
@@ -204,21 +184,12 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
 
   for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) {
     TotalBytes += TotalSize;
-    bool LargeColdContext = false;
-    if (AllocType == AllocationType::Cold) {
+    if (AllocType == AllocationType::Cold)
       ColdBytes += TotalSize;
-      // If we have the max cold context size from summary information and have
-      // requested identification of contexts above a percentage of the max, see
-      // if this context qualifies.
-      if (MaxColdSize > 0 && MinPercentMaxColdSize < 100 &&
-          TotalSize * 100 >= MaxColdSize * MinPercentMaxColdSize)
-        LargeColdContext = true;
-    }
     // Only add the context size info as metadata if we need it in the thin
-    // link (currently if reporting of hinted sizes is enabled, we have
-    // specified a threshold for marking allocations cold after cloning, or we
-    // have identified this as a large cold context of interest above).
-    if (metadataIncludesAllContextSizeInfo() || LargeColdContext) {
+    // link (currently if reporting of hinted sizes is enabled or we have
+    // specified a threshold for marking allocations cold after cloning).
+    if (MemProfReportHintedSizes || MinClonedColdBytePercent < 100) {
       auto *FullStackIdMD = ValueAsMetadata::get(
           ConstantInt::get(Type::getInt64Ty(Ctx), FullStackId));
       auto *TotalSizeMD = ValueAsMetadata::get(
@@ -386,9 +357,9 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
   if (hasSingleAllocType(Node->AllocTypes)) {
     std::vector<ContextTotalSize> ContextSizeInfo;
     collectContextSizeInfo(Node, ContextSizeInfo);
-    MIBNodes.push_back(
-        createMIBNode(Ctx, MIBCallStack, (AllocationType)Node->AllocTypes,
-                      ContextSizeInfo, MaxColdSize, TotalBytes, ColdBytes));
+    MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack,
+                                     (AllocationType)Node->AllocTypes,
+                                     ContextSizeInfo, TotalBytes, ColdBytes));
     return true;
   }
 
@@ -442,8 +413,7 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
   std::vector<ContextTotalSize> ContextSizeInfo;
   collectContextSizeInfo(Node, ContextSizeInfo);
   MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack, AllocationType::NotCold,
-                                   ContextSizeInfo, MaxColdSize, TotalBytes,
-                                   ColdBytes));
+                                   ContextSizeInfo, TotalBytes, ColdBytes));
   return true;
 }
 
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index a317ac471a231..59fa1a4b03c37 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -525,7 +525,6 @@ static void computeFunctionSummary(
       if (MemProfMD) {
         std::vector<MIBInfo> MIBs;
         std::vector<std::vector<ContextTotalSize>> ContextSizeInfos;
-        bool HasNonZeroContextSizeInfos = false;
         for (auto &MDOp : MemProfMD->operands()) {
           auto *MIBMD = cast<const MDNode>(MDOp);
           MDNode *StackNode = getMIBStackNode(MIBMD);
@@ -545,8 +544,7 @@ static void computeFunctionSummary(
           }
           // If we have context size information, collect it for inclusion in
           // the summary.
-          assert(MIBMD->getNumOperands() > 2 ||
-                 !metadataIncludesAllContextSizeInfo());
+          assert(MIBMD->getNumOperands() > 2 || !MemProfReportHintedSizes);
           if (MIBMD->getNumOperands() > 2) {
             std::vector<ContextTotalSize> ContextSizes;
             for (unsigned I = 2; I < MIBMD->getNumOperands(); I++) {
@@ -560,31 +558,14 @@ static void computeFunctionSummary(
                                 ->getZExtValue();
               ContextSizes.push_back({FullStackId, TS});
             }
-            // Flag that we need to keep the ContextSizeInfos array for this
-            // alloc as it now contains non-zero context info sizes.
-            HasNonZeroContextSizeInfos = true;
             ContextSizeInfos.push_back(std::move(ContextSizes));
-          } else {
-            // The ContextSizeInfos must be in the same relative position as the
-            // associated MIB. In some cases we only include a ContextSizeInfo
-            // for a subset of MIBs in an allocation. To handle that, eagerly
-            // fill any MIB entries that don't have context size info metadata
-            // with a pair of 0s. Later on we will only use this array if it
-            // ends up containing any non-zero entries (see where we set
-            // HasNonZeroContextSizeInfos above).
-            ContextSizeInfos.push_back({{0, 0}});
           }
           MIBs.push_back(
               MIBInfo(getMIBAllocType(MIBMD), std::move(StackIdIndices)));
         }
         Allocs.push_back(AllocInfo(std::move(MIBs)));
-        assert(HasNonZeroContextSizeInfos ||
-               !metadataIncludesAllContextSizeInfo());
-        // We eagerly build the ContextSizeInfos array, but it will be filled
-        // with sub arrays of pairs of 0s if no MIBs on this alloc actually
-        // contained context size info metadata. Only save it if any MIBs had
-        // any such metadata.
-        if (HasNonZeroContextSizeInfos) {
+        assert(!ContextSizeInfos.empty() || !MemProfReportHintedSizes);
+        if (!ContextSizeInfos.empty()) {
           assert(Allocs.back().MIBs.size() == ContextSizeInfos.size());
           Allocs.back().ContextSizeInfos = std::move(ContextSizeInfos);
         }
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 105edb943eb7f..47388c232b1ad 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -8164,14 +8164,6 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
           ContextSizes.reserve(NumContextSizeInfoEntries);
           for (unsigned J = 0; J < NumContextSizeInfoEntries; J++) {
             assert(ContextIdIndex < PendingContextIds.size());
-            // Skip any 0 entries for MIBs without the context size info.
-            if (PendingContextIds[ContextIdIndex] == 0) {
-              // The size should also be 0 if the context was 0.
-              assert(!Record[I]);
-              ContextIdIndex++;
-              I++;
-              continue;
-            }
             // PendingContextIds read from the preceding FS_ALLOC_CONTEXT_IDS
             // should be in the same order as the total sizes.
             ContextSizes.push_back(
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index fad8ebfad9f9a..8789b3123cd60 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -23,7 +23,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/MemoryProfileInfo.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/Bitcode/BitcodeCommon.h"
 #include "llvm/Bitcode/BitcodeReader.h"
@@ -4586,23 +4585,14 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
     Stream.EmitRecord(bitc::FS_STACK_IDS, Vals, StackIdAbbvId);
   }
 
-  unsigned ContextIdAbbvId = 0;
-  if (metadataMayIncludeContextSizeInfo()) {
-    // n x context id
-    auto ContextIdAbbv = std::make_shared<BitCodeAbbrev>();
-    ContextIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_ALLOC_CONTEXT_IDS));
-    ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
-    // The context ids are hashes that are close to 64 bits in size, so emitting
-    // as a pair of 32-bit fixed-width values is more efficient than a VBR if we
-    // are emitting them for all MIBs. Otherwise we use VBR to better compress 0
-    // values that are expected to more frequently occur in an alloc's memprof
-    // summary.
-    if (metadataIncludesAllContextSizeInfo())
-      ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
-    else
-      ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
-    ContextIdAbbvId = Stream.EmitAbbrev(std::move(ContextIdAbbv));
-  }
+  // n x context id
+  auto ContextIdAbbv = std::make_shared<BitCodeAbbrev>();
+  ContextIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_ALLOC_CONTEXT_IDS));
+  ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  // The context ids are hashes that are close to 64 bits in size, so emitting
+  // as a pair of 32-bit fixed-width values is more efficient than a VBR.
+  ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+  unsigned ContextIdAbbvId = Stream.EmitAbbrev(std::move(ContextIdAbbv));
 
   // Abbrev for FS_PERMODULE_PROFILE.
   Abbv = std::make_shared<BitCodeAbbrev>();
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index cff38a8e68c6a..5b4350845b726 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -2232,8 +2232,9 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
           CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
               EmptyContext;
           unsigned I = 0;
-          assert(!metadataMayIncludeContextSizeInfo() ||
-                 AN.ContextSizeInfos.size() == AN.MIBs.size());
+          assert(
+              (!MemProfReportHintedSizes && MinClonedColdBytePercent >= 100) ||
+              AN.ContextSizeInfos.size() == AN.MIBs.size());
           // Now add all of the MIBs and their stack nodes.
           for (auto &MIB : AN.MIBs) {
             CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 3f277b1b84ac1..e06add7cf313b 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -184,6 +184,10 @@ static cl::opt<bool> ClMemProfAttachCalleeGuids(
         "Attach calleeguids as value profile metadata for indirect calls."),
     cl::init(true), cl::Hidden);
 
+extern cl::opt<bool> MemProfReportHintedSizes;
+extern cl::opt<unsigned> MinClonedColdBytePercent;
+extern cl::opt<unsigned> MinCallsiteColdBytePercent;
+
 static cl::opt<unsigned> MinMatchedColdBytePercent(
     "memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
     cl::desc("Min percent of cold bytes matched to hint allocation cold"));
@@ -295,6 +299,13 @@ class ModuleMemProfiler {
   Function *MemProfCtorFunction = nullptr;
 };
 
+// Options under which we need to record the context size info in the alloc trie
+// used to build metadata.
+bool recordContextSizeInfo() {
+  return MemProfReportHintedSizes || MinClonedColdBytePercent < 100 ||
+         MinCallsiteColdBytePercent < 100;
+}
+
 } // end anonymous namespace
 
 MemProfilerPass::MemProfilerPass() = default;
@@ -747,7 +758,7 @@ static AllocationType addCallStack(CallStackTrie &AllocTrie,
                                 AllocInfo->Info.getAllocCount(),
                                 AllocInfo->Info.getTotalLifetime());
   std::vector<ContextTotalSize> ContextSizeInfo;
-  if (recordContextSizeInfoForAnalysis()) {
+  if (recordContextSizeInfo()) {
     auto TotalSize = AllocInfo->Info.getTotalSize();
     assert(TotalSize);
     assert(FullStackId != 0);
@@ -992,7 +1003,7 @@ static void readMemprof(Module &M, Function &F,
                             &FullStackIdToAllocMatchInfo,
                         std::set<std::vector<uint64_t>> &MatchedCallSites,
                         DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
-                        OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize) {
+                        OptimizationRemarkEmitter &ORE) {
   auto &Ctx = M.getContext();
   // Previously we used getIRPGOFuncName() here. If F is local linkage,
   // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
@@ -1181,7 +1192,7 @@ static void readMemprof(Module &M, Function &F,
         // We may match this instruction's location list to multiple MIB
         // contexts. Add them to a Trie specialized for trimming the contexts to
         // the minimal needed to disambiguate contexts with unique behavior.
-        CallStackTrie AllocTrie(&ORE, MaxColdSize);
+        CallStackTrie AllocTrie(&ORE);
         uint64_t TotalSize = 0;
         uint64_t TotalColdSize = 0;
         for (auto *AllocInfo : AllocInfoIter->second) {
@@ -1192,7 +1203,7 @@ static void readMemprof(Module &M, Function &F,
                                                  InlinedCallStack)) {
             NumOfMemProfMatchedAllocContexts++;
             uint64_t FullStackId = 0;
-            if (ClPrintMemProfMatchInfo || recordContextSizeInfoForAnalysis())
+            if (ClPrintMemProfMatchInfo || recordContextSizeInfo())
               FullStackId = computeFullStackId(AllocInfo->CallStack);
             auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId);
             TotalSize += AllocInfo->Info.getTotalSize();
@@ -1329,10 +1340,6 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
   // call stack.
   std::set<std::vector<uint64_t>> MatchedCallSites;
 
-  uint64_t MaxColdSize = 0;
-  if (auto *MemProfSum = MemProfReader->getMemProfSummary())
-    MaxColdSize = MemProfSum->getMaxColdTotalSize();
-
   for (auto &F : M) {
     if (F.isDeclaration())
       continue;
@@ -1340,7 +1347,7 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
     const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
     auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
     readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
-                MatchedCallSites, UndriftMaps, ORE, MaxColdSize);
+                MatchedCallSites, UndriftMaps, ORE);
   }
 
   if (ClPrintMemProfMatchInfo) {
diff --git a/llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll b/llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll
deleted file mode 100644
index d4a3f9bca2cab..0000000000000
--- a/llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll
+++ /dev/null
@@ -1,73 +0,0 @@
-;; Test that we get hinted size reporting for just the subset of MIBs that
-;; contain context size info in the metadata.
-
-;; Generate the bitcode including ThinLTO summary. Specify
-;; -memprof-min-percent-max-cold-size (value doesn't matter) to indicate to
-;; the bitcode writer that it should expect and optimize for partial context
-;; size info.
-; RUN: opt -thinlto-bc -memprof-min-percent-max-cold-size=50 %s >%t.o
-
-; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
-; RUN:	-supports-hot-cold-new \
-; RUN:	-r=%t.o,main,plx \
-; RUN:	-r=%t.o,_Znam, \
-; RUN:	-memprof-report-hinted-sizes \
-; RUN:	-o %t.out 2>&1 | FileCheck %s --check-prefix=SIZES
-
-;; We should only get these two messages from -memprof-report-hinted-sizes
-;; as they are the only MIBs with recorded context size info.
-; SIZES-NOT: full allocation context
-; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning (context id 2)
-; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning (context id 2)
-; SIZES-NOT: full allocation context
-
-source_filename = "memprof-report-hinted-partial.ll"
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define i32 @main() #0 {
-entry:
-  %call = call ptr @_Z3foov(), !callsite !0
-  %call1 = call ptr @_Z3foov(), !callsite !1
-  ret i32 0
-}
-
-define internal ptr @_Z3barv() #0 {
-entry:
-  %call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7
-  ret ptr null
-}
-
-declare ptr @_Znam(i64)
-
-define internal ptr @_Z3bazv() #0 {
-entry:
-  %call = call ptr @_Z3barv(), !callsite !8
-  ret ptr null
-}
-
-define internal ptr @_Z3foov() #0 {
-entry:
-  %call = call ptr @_Z3bazv(), !callsite !9
-  ret ptr null
-}
-
-; uselistorder directives
-uselistorder ptr @_Z3foov, { 1, 0 }
-
-attributes #0 = { noinline optnone }
-
-!0 = !{i64 8632435727821051414}
-!1 = !{i64 -3421689549917153178}
-!2 = !{!3, !5, !13}
-!3 = !{!4, !"notcold"}
-!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
-!5 = !{!6, !"cold", !11, !12}
-!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
-!7 = !{i64 9086428284934...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Jun 3, 2025

@llvm/pr-subscribers-pgo

Author: Teresa Johnson (teresajohnson)

Changes

Reverts llvm/llvm-project#142507 due to buildbot failures that I will look into tomorrow.


Patch is 29.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142688.diff

9 Files Affected:

  • (modified) llvm/include/llvm/Analysis/MemoryProfileInfo.h (+1-18)
  • (modified) llvm/lib/Analysis/MemoryProfileInfo.cpp (+9-39)
  • (modified) llvm/lib/Analysis/ModuleSummaryAnalysis.cpp (+3-22)
  • (modified) llvm/lib/Bitcode/Reader/BitcodeReader.cpp (-8)
  • (modified) llvm/lib/Bitcode/Writer/BitcodeWriter.cpp (+8-18)
  • (modified) llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp (+3-2)
  • (modified) llvm/lib/Transforms/Instrumentation/MemProfiler.cpp (+16-9)
  • (removed) llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll (-73)
  • (removed) llvm/test/Transforms/PGOProfile/memprof_max_cold_threshold.test (-163)
diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
index b042a717e4e49..8cbb8673b69f5 100644
--- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h
+++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
@@ -24,18 +24,6 @@ class OptimizationRemarkEmitter;
 
 namespace memprof {
 
-/// Whether the alloc memeprof metadata will include context size info for all
-/// MIBs.
-LLVM_ABI bool metadataIncludesAllContextSizeInfo();
-
-/// Whether the alloc memprof metadata may include context size info for some
-/// MIBs (but possibly not all).
-LLVM_ABI bool metadataMayIncludeContextSizeInfo();
-
-/// Whether we need to record the context size info in the alloc trie used to
-/// build metadata.
-LLVM_ABI bool recordContextSizeInfoForAnalysis();
-
 /// Build callstack metadata from the provided list of call stack ids. Returns
 /// the resulting metadata node.
 LLVM_ABI MDNode *buildCallstackMetadata(ArrayRef<uint64_t> CallStack,
@@ -99,9 +87,6 @@ class CallStackTrie {
   // allocations for which we apply non-context sensitive allocation hints.
   OptimizationRemarkEmitter *ORE;
 
-  // The maximum size of a cold allocation context, from the profile summary.
-  uint64_t MaxColdSize;
-
   void deleteTrieNode(CallStackTrieNode *Node) {
     if (!Node)
       return;
@@ -128,9 +113,7 @@ class CallStackTrie {
                      uint64_t &ColdBytes);
 
 public:
-  CallStackTrie(OptimizationRemarkEmitter *ORE = nullptr,
-                uint64_t MaxColdSize = 0)
-      : ORE(ORE), MaxColdSize(MaxColdSize) {}
+  CallStackTrie(OptimizationRemarkEmitter *ORE = nullptr) : ORE(ORE) {}
   ~CallStackTrie() { deleteTrieNode(Alloc); }
 
   bool empty() const { return Alloc == nullptr; }
diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp
index c08024a38ffc2..347377522101a 100644
--- a/llvm/lib/Analysis/MemoryProfileInfo.cpp
+++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp
@@ -46,25 +46,6 @@ cl::opt<unsigned> MinCallsiteColdBytePercent(
     cl::desc("Min percent of cold bytes at a callsite to discard non-cold "
              "contexts"));
 
-// Enable saving context size information for largest cold contexts, which can
-// be used to flag contexts for more aggressive cloning and reporting.
-cl::opt<unsigned> MinPercentMaxColdSize(
-    "memprof-min-percent-max-cold-size", cl::init(100), cl::Hidden,
-    cl::desc("Min percent of max cold bytes for critical cold context"));
-
-bool llvm::memprof::metadataIncludesAllContextSizeInfo() {
-  return MemProfReportHintedSizes || MinClonedColdBytePercent < 100;
-}
-
-bool llvm::memprof::metadataMayIncludeContextSizeInfo() {
-  return metadataIncludesAllContextSizeInfo() || MinPercentMaxColdSize < 100;
-}
-
-bool llvm::memprof::recordContextSizeInfoForAnalysis() {
-  return metadataMayIncludeContextSizeInfo() ||
-         MinCallsiteColdBytePercent < 100;
-}
-
 MDNode *llvm::memprof::buildCallstackMetadata(ArrayRef<uint64_t> CallStack,
                                               LLVMContext &Ctx) {
   SmallVector<Metadata *, 8> StackVals;
@@ -187,8 +168,7 @@ void CallStackTrie::addCallStack(MDNode *MIB) {
 static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
                              AllocationType AllocType,
                              ArrayRef<ContextTotalSize> ContextSizeInfo,
-                             const uint64_t MaxColdSize, uint64_t &TotalBytes,
-                             uint64_t &ColdBytes) {
+                             uint64_t &TotalBytes, uint64_t &ColdBytes) {
   SmallVector<Metadata *> MIBPayload(
       {buildCallstackMetadata(MIBCallStack, Ctx)});
   MIBPayload.push_back(
@@ -204,21 +184,12 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
 
   for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) {
     TotalBytes += TotalSize;
-    bool LargeColdContext = false;
-    if (AllocType == AllocationType::Cold) {
+    if (AllocType == AllocationType::Cold)
       ColdBytes += TotalSize;
-      // If we have the max cold context size from summary information and have
-      // requested identification of contexts above a percentage of the max, see
-      // if this context qualifies.
-      if (MaxColdSize > 0 && MinPercentMaxColdSize < 100 &&
-          TotalSize * 100 >= MaxColdSize * MinPercentMaxColdSize)
-        LargeColdContext = true;
-    }
     // Only add the context size info as metadata if we need it in the thin
-    // link (currently if reporting of hinted sizes is enabled, we have
-    // specified a threshold for marking allocations cold after cloning, or we
-    // have identified this as a large cold context of interest above).
-    if (metadataIncludesAllContextSizeInfo() || LargeColdContext) {
+    // link (currently if reporting of hinted sizes is enabled or we have
+    // specified a threshold for marking allocations cold after cloning).
+    if (MemProfReportHintedSizes || MinClonedColdBytePercent < 100) {
       auto *FullStackIdMD = ValueAsMetadata::get(
           ConstantInt::get(Type::getInt64Ty(Ctx), FullStackId));
       auto *TotalSizeMD = ValueAsMetadata::get(
@@ -386,9 +357,9 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
   if (hasSingleAllocType(Node->AllocTypes)) {
     std::vector<ContextTotalSize> ContextSizeInfo;
     collectContextSizeInfo(Node, ContextSizeInfo);
-    MIBNodes.push_back(
-        createMIBNode(Ctx, MIBCallStack, (AllocationType)Node->AllocTypes,
-                      ContextSizeInfo, MaxColdSize, TotalBytes, ColdBytes));
+    MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack,
+                                     (AllocationType)Node->AllocTypes,
+                                     ContextSizeInfo, TotalBytes, ColdBytes));
     return true;
   }
 
@@ -442,8 +413,7 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
   std::vector<ContextTotalSize> ContextSizeInfo;
   collectContextSizeInfo(Node, ContextSizeInfo);
   MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack, AllocationType::NotCold,
-                                   ContextSizeInfo, MaxColdSize, TotalBytes,
-                                   ColdBytes));
+                                   ContextSizeInfo, TotalBytes, ColdBytes));
   return true;
 }
 
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index a317ac471a231..59fa1a4b03c37 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -525,7 +525,6 @@ static void computeFunctionSummary(
       if (MemProfMD) {
         std::vector<MIBInfo> MIBs;
         std::vector<std::vector<ContextTotalSize>> ContextSizeInfos;
-        bool HasNonZeroContextSizeInfos = false;
         for (auto &MDOp : MemProfMD->operands()) {
           auto *MIBMD = cast<const MDNode>(MDOp);
           MDNode *StackNode = getMIBStackNode(MIBMD);
@@ -545,8 +544,7 @@ static void computeFunctionSummary(
           }
           // If we have context size information, collect it for inclusion in
           // the summary.
-          assert(MIBMD->getNumOperands() > 2 ||
-                 !metadataIncludesAllContextSizeInfo());
+          assert(MIBMD->getNumOperands() > 2 || !MemProfReportHintedSizes);
           if (MIBMD->getNumOperands() > 2) {
             std::vector<ContextTotalSize> ContextSizes;
             for (unsigned I = 2; I < MIBMD->getNumOperands(); I++) {
@@ -560,31 +558,14 @@ static void computeFunctionSummary(
                                 ->getZExtValue();
               ContextSizes.push_back({FullStackId, TS});
             }
-            // Flag that we need to keep the ContextSizeInfos array for this
-            // alloc as it now contains non-zero context info sizes.
-            HasNonZeroContextSizeInfos = true;
             ContextSizeInfos.push_back(std::move(ContextSizes));
-          } else {
-            // The ContextSizeInfos must be in the same relative position as the
-            // associated MIB. In some cases we only include a ContextSizeInfo
-            // for a subset of MIBs in an allocation. To handle that, eagerly
-            // fill any MIB entries that don't have context size info metadata
-            // with a pair of 0s. Later on we will only use this array if it
-            // ends up containing any non-zero entries (see where we set
-            // HasNonZeroContextSizeInfos above).
-            ContextSizeInfos.push_back({{0, 0}});
           }
           MIBs.push_back(
               MIBInfo(getMIBAllocType(MIBMD), std::move(StackIdIndices)));
         }
         Allocs.push_back(AllocInfo(std::move(MIBs)));
-        assert(HasNonZeroContextSizeInfos ||
-               !metadataIncludesAllContextSizeInfo());
-        // We eagerly build the ContextSizeInfos array, but it will be filled
-        // with sub arrays of pairs of 0s if no MIBs on this alloc actually
-        // contained context size info metadata. Only save it if any MIBs had
-        // any such metadata.
-        if (HasNonZeroContextSizeInfos) {
+        assert(!ContextSizeInfos.empty() || !MemProfReportHintedSizes);
+        if (!ContextSizeInfos.empty()) {
           assert(Allocs.back().MIBs.size() == ContextSizeInfos.size());
           Allocs.back().ContextSizeInfos = std::move(ContextSizeInfos);
         }
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 105edb943eb7f..47388c232b1ad 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -8164,14 +8164,6 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
           ContextSizes.reserve(NumContextSizeInfoEntries);
           for (unsigned J = 0; J < NumContextSizeInfoEntries; J++) {
             assert(ContextIdIndex < PendingContextIds.size());
-            // Skip any 0 entries for MIBs without the context size info.
-            if (PendingContextIds[ContextIdIndex] == 0) {
-              // The size should also be 0 if the context was 0.
-              assert(!Record[I]);
-              ContextIdIndex++;
-              I++;
-              continue;
-            }
             // PendingContextIds read from the preceding FS_ALLOC_CONTEXT_IDS
             // should be in the same order as the total sizes.
             ContextSizes.push_back(
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index fad8ebfad9f9a..8789b3123cd60 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -23,7 +23,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/MemoryProfileInfo.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/Bitcode/BitcodeCommon.h"
 #include "llvm/Bitcode/BitcodeReader.h"
@@ -4586,23 +4585,14 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
     Stream.EmitRecord(bitc::FS_STACK_IDS, Vals, StackIdAbbvId);
   }
 
-  unsigned ContextIdAbbvId = 0;
-  if (metadataMayIncludeContextSizeInfo()) {
-    // n x context id
-    auto ContextIdAbbv = std::make_shared<BitCodeAbbrev>();
-    ContextIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_ALLOC_CONTEXT_IDS));
-    ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
-    // The context ids are hashes that are close to 64 bits in size, so emitting
-    // as a pair of 32-bit fixed-width values is more efficient than a VBR if we
-    // are emitting them for all MIBs. Otherwise we use VBR to better compress 0
-    // values that are expected to more frequently occur in an alloc's memprof
-    // summary.
-    if (metadataIncludesAllContextSizeInfo())
-      ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
-    else
-      ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
-    ContextIdAbbvId = Stream.EmitAbbrev(std::move(ContextIdAbbv));
-  }
+  // n x context id
+  auto ContextIdAbbv = std::make_shared<BitCodeAbbrev>();
+  ContextIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_ALLOC_CONTEXT_IDS));
+  ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  // The context ids are hashes that are close to 64 bits in size, so emitting
+  // as a pair of 32-bit fixed-width values is more efficient than a VBR.
+  ContextIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+  unsigned ContextIdAbbvId = Stream.EmitAbbrev(std::move(ContextIdAbbv));
 
   // Abbrev for FS_PERMODULE_PROFILE.
   Abbv = std::make_shared<BitCodeAbbrev>();
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index cff38a8e68c6a..5b4350845b726 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -2232,8 +2232,9 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
           CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
               EmptyContext;
           unsigned I = 0;
-          assert(!metadataMayIncludeContextSizeInfo() ||
-                 AN.ContextSizeInfos.size() == AN.MIBs.size());
+          assert(
+              (!MemProfReportHintedSizes && MinClonedColdBytePercent >= 100) ||
+              AN.ContextSizeInfos.size() == AN.MIBs.size());
           // Now add all of the MIBs and their stack nodes.
           for (auto &MIB : AN.MIBs) {
             CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 3f277b1b84ac1..e06add7cf313b 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -184,6 +184,10 @@ static cl::opt<bool> ClMemProfAttachCalleeGuids(
         "Attach calleeguids as value profile metadata for indirect calls."),
     cl::init(true), cl::Hidden);
 
+extern cl::opt<bool> MemProfReportHintedSizes;
+extern cl::opt<unsigned> MinClonedColdBytePercent;
+extern cl::opt<unsigned> MinCallsiteColdBytePercent;
+
 static cl::opt<unsigned> MinMatchedColdBytePercent(
     "memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
     cl::desc("Min percent of cold bytes matched to hint allocation cold"));
@@ -295,6 +299,13 @@ class ModuleMemProfiler {
   Function *MemProfCtorFunction = nullptr;
 };
 
+// Options under which we need to record the context size info in the alloc trie
+// used to build metadata.
+bool recordContextSizeInfo() {
+  return MemProfReportHintedSizes || MinClonedColdBytePercent < 100 ||
+         MinCallsiteColdBytePercent < 100;
+}
+
 } // end anonymous namespace
 
 MemProfilerPass::MemProfilerPass() = default;
@@ -747,7 +758,7 @@ static AllocationType addCallStack(CallStackTrie &AllocTrie,
                                 AllocInfo->Info.getAllocCount(),
                                 AllocInfo->Info.getTotalLifetime());
   std::vector<ContextTotalSize> ContextSizeInfo;
-  if (recordContextSizeInfoForAnalysis()) {
+  if (recordContextSizeInfo()) {
     auto TotalSize = AllocInfo->Info.getTotalSize();
     assert(TotalSize);
     assert(FullStackId != 0);
@@ -992,7 +1003,7 @@ static void readMemprof(Module &M, Function &F,
                             &FullStackIdToAllocMatchInfo,
                         std::set<std::vector<uint64_t>> &MatchedCallSites,
                         DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
-                        OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize) {
+                        OptimizationRemarkEmitter &ORE) {
   auto &Ctx = M.getContext();
   // Previously we used getIRPGOFuncName() here. If F is local linkage,
   // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
@@ -1181,7 +1192,7 @@ static void readMemprof(Module &M, Function &F,
         // We may match this instruction's location list to multiple MIB
         // contexts. Add them to a Trie specialized for trimming the contexts to
         // the minimal needed to disambiguate contexts with unique behavior.
-        CallStackTrie AllocTrie(&ORE, MaxColdSize);
+        CallStackTrie AllocTrie(&ORE);
         uint64_t TotalSize = 0;
         uint64_t TotalColdSize = 0;
         for (auto *AllocInfo : AllocInfoIter->second) {
@@ -1192,7 +1203,7 @@ static void readMemprof(Module &M, Function &F,
                                                  InlinedCallStack)) {
             NumOfMemProfMatchedAllocContexts++;
             uint64_t FullStackId = 0;
-            if (ClPrintMemProfMatchInfo || recordContextSizeInfoForAnalysis())
+            if (ClPrintMemProfMatchInfo || recordContextSizeInfo())
               FullStackId = computeFullStackId(AllocInfo->CallStack);
             auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId);
             TotalSize += AllocInfo->Info.getTotalSize();
@@ -1329,10 +1340,6 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
   // call stack.
   std::set<std::vector<uint64_t>> MatchedCallSites;
 
-  uint64_t MaxColdSize = 0;
-  if (auto *MemProfSum = MemProfReader->getMemProfSummary())
-    MaxColdSize = MemProfSum->getMaxColdTotalSize();
-
   for (auto &F : M) {
     if (F.isDeclaration())
       continue;
@@ -1340,7 +1347,7 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
     const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
     auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
     readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
-                MatchedCallSites, UndriftMaps, ORE, MaxColdSize);
+                MatchedCallSites, UndriftMaps, ORE);
   }
 
   if (ClPrintMemProfMatchInfo) {
diff --git a/llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll b/llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll
deleted file mode 100644
index d4a3f9bca2cab..0000000000000
--- a/llvm/test/ThinLTO/X86/memprof-report-hinted-partial.ll
+++ /dev/null
@@ -1,73 +0,0 @@
-;; Test that we get hinted size reporting for just the subset of MIBs that
-;; contain context size info in the metadata.
-
-;; Generate the bitcode including ThinLTO summary. Specify
-;; -memprof-min-percent-max-cold-size (value doesn't matter) to indicate to
-;; the bitcode writer that it should expect and optimize for partial context
-;; size info.
-; RUN: opt -thinlto-bc -memprof-min-percent-max-cold-size=50 %s >%t.o
-
-; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
-; RUN:	-supports-hot-cold-new \
-; RUN:	-r=%t.o,main,plx \
-; RUN:	-r=%t.o,_Znam, \
-; RUN:	-memprof-report-hinted-sizes \
-; RUN:	-o %t.out 2>&1 | FileCheck %s --check-prefix=SIZES
-
-;; We should only get these two messages from -memprof-report-hinted-sizes
-;; as they are the only MIBs with recorded context size info.
-; SIZES-NOT: full allocation context
-; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning (context id 2)
-; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning (context id 2)
-; SIZES-NOT: full allocation context
-
-source_filename = "memprof-report-hinted-partial.ll"
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define i32 @main() #0 {
-entry:
-  %call = call ptr @_Z3foov(), !callsite !0
-  %call1 = call ptr @_Z3foov(), !callsite !1
-  ret i32 0
-}
-
-define internal ptr @_Z3barv() #0 {
-entry:
-  %call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7
-  ret ptr null
-}
-
-declare ptr @_Znam(i64)
-
-define internal ptr @_Z3bazv() #0 {
-entry:
-  %call = call ptr @_Z3barv(), !callsite !8
-  ret ptr null
-}
-
-define internal ptr @_Z3foov() #0 {
-entry:
-  %call = call ptr @_Z3bazv(), !callsite !9
-  ret ptr null
-}
-
-; uselistorder directives
-uselistorder ptr @_Z3foov, { 1, 0 }
-
-attributes #0 = { noinline optnone }
-
-!0 = !{i64 8632435727821051414}
-!1 = !{i64 -3421689549917153178}
-!2 = !{!3, !5, !13}
-!3 = !{!4, !"notcold"}
-!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
-!5 = !{!6, !"cold", !11, !12}
-!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
-!7 = !{i64 9086428284934...
[truncated]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
llvm:analysis llvm:transforms LTO Link time optimization (regular/full LTO or ThinLTO) PGO Profile Guided Optimizations
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants