Skip to content

[MemProf] Add matching statistics and tracing #94814

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 81 additions & 8 deletions llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,11 +149,33 @@ static cl::opt<bool> ClMemProfMatchHotColdNew(
"Match allocation profiles onto existing hot/cold operator new calls"),
cl::Hidden, cl::init(false));

static cl::opt<bool>
ClPrintMemProfMatchInfo("memprof-print-match-info",
cl::desc("Print matching stats for each allocation "
"context in this module's profiles"),
cl::Hidden, cl::init(false));

// Instrumentation statistics
STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads");
STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes");

// Matching statistics
STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
STATISTIC(NumOfMemProfMismatch,
"Number of functions having mismatched memory profile hash.");
STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile.");
STATISTIC(NumOfMemProfAllocContextProfiles,
"Number of alloc contexts in memory profile.");
STATISTIC(NumOfMemProfCallSiteProfiles,
"Number of callsites in memory profile.");
STATISTIC(NumOfMemProfMatchedAllocContexts,
"Number of matched memory profile alloc contexts.");
STATISTIC(NumOfMemProfMatchedAllocs,
"Number of matched memory profile allocs.");
STATISTIC(NumOfMemProfMatchedCallSites,
"Number of matched memory profile callsites.");

namespace {

Expand Down Expand Up @@ -637,15 +659,30 @@ static uint64_t computeStackId(const memprof::Frame &Frame) {
return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
}

static void addCallStack(CallStackTrie &AllocTrie,
const AllocationInfo *AllocInfo) {
// Helper to generate a single hash id for a given callstack, used for emitting
// matching statistics and useful for uniquing such statistics across modules.
static uint64_t
computeFullStackId(const SmallVectorImpl<memprof::Frame> &CallStack) {
llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
HashBuilder;
for (auto &F : CallStack)
HashBuilder.add(F.Function, F.LineOffset, F.Column);
llvm::BLAKE3Result<8> Hash = HashBuilder.final();
uint64_t Id;
std::memcpy(&Id, Hash.data(), sizeof(Hash));
return Id;
}

static AllocationType addCallStack(CallStackTrie &AllocTrie,
const AllocationInfo *AllocInfo) {
SmallVector<uint64_t> StackIds;
for (const auto &StackFrame : AllocInfo->CallStack)
StackIds.push_back(computeStackId(StackFrame));
auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
AllocInfo->Info.getAllocCount(),
AllocInfo->Info.getTotalLifetime());
AllocTrie.addCallStack(AllocType, StackIds);
return AllocType;
}

// Helper to compare the InlinedCallStack computed from an instruction's debug
Expand Down Expand Up @@ -701,9 +738,16 @@ static bool isNewWithHotColdVariant(Function *Callee,
}
}

static void readMemprof(Module &M, Function &F,
IndexedInstrProfReader *MemProfReader,
const TargetLibraryInfo &TLI) {
struct AllocMatchInfo {
uint64_t TotalSize = 0;
AllocationType AllocType = AllocationType::None;
bool Matched = false;
};

static void
readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
const TargetLibraryInfo &TLI,
std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
auto &Ctx = M.getContext();
// Previously we used getIRPGOFuncName() here. If F is local linkage,
// getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
Expand All @@ -727,6 +771,7 @@ static void readMemprof(Module &M, Function &F,
SkipWarning = !PGOWarnMissing;
LLVM_DEBUG(dbgs() << "unknown function");
} else if (Err == instrprof_error::hash_mismatch) {
NumOfMemProfMismatch++;
SkipWarning =
NoPGOWarnMismatch ||
(NoPGOWarnMismatchComdatWeak &&
Expand All @@ -748,6 +793,8 @@ static void readMemprof(Module &M, Function &F,
return;
}

NumOfMemProfFunc++;

// Detect if there are non-zero column numbers in the profile. If not,
// treat all column numbers as 0 when matching (i.e. ignore any non-zero
// columns in the IR). The profiled binary might have been built with
Expand All @@ -762,6 +809,7 @@ static void readMemprof(Module &M, Function &F,
std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>>
LocHashToCallSites;
for (auto &AI : MemProfRec->AllocSites) {
NumOfMemProfAllocContextProfiles++;
// Associate the allocation info with the leaf frame. The later matching
// code will match any inlined call sequences in the IR with a longer prefix
// of call stack frames.
Expand All @@ -770,6 +818,7 @@ static void readMemprof(Module &M, Function &F,
ProfileHasColumns |= AI.CallStack[0].Column;
}
for (auto &CS : MemProfRec->CallSites) {
NumOfMemProfCallSiteProfiles++;
// Need to record all frames from leaf up to and including this function,
// as any of these may or may not have been inlined at this point.
unsigned Idx = 0;
Expand Down Expand Up @@ -863,13 +912,23 @@ static void readMemprof(Module &M, Function &F,
// If we found and thus matched all frames on the call, include
// this MIB.
if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
InlinedCallStack))
addCallStack(AllocTrie, AllocInfo);
InlinedCallStack)) {
NumOfMemProfMatchedAllocContexts++;
auto AllocType = addCallStack(AllocTrie, AllocInfo);
// Record information about the allocation if match info printing
// was requested.
if (ClPrintMemProfMatchInfo) {
auto FullStackId = computeFullStackId(AllocInfo->CallStack);
FullStackIdToAllocMatchInfo[FullStackId] = {
AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true};
}
}
}
// We might not have matched any to the full inlined call stack.
// But if we did, create and attach metadata, or a function attribute if
// all contexts have identical profiled behavior.
if (!AllocTrie.empty()) {
NumOfMemProfMatchedAllocs++;
// MemprofMDAttached will be false if a function attribute was
// attached.
bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
Expand Down Expand Up @@ -897,6 +956,7 @@ static void readMemprof(Module &M, Function &F,
// attach call stack metadata.
if (stackFrameIncludesInlinedCallStack(
*CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) {
NumOfMemProfMatchedCallSites++;
addCallsiteMetadata(I, InlinedCallStack, Ctx);
// Only need to find one with a matching call stack and add a single
// callsite metadata.
Expand Down Expand Up @@ -942,12 +1002,25 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {

auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();

// Map from the stack has of each allocation context in the function profiles
// to the total profiled size (bytes), allocation type, and whether we matched
// it to an allocation in the IR.
std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;

for (auto &F : M) {
if (F.isDeclaration())
continue;

const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
readMemprof(M, F, MemProfReader.get(), TLI);
readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo);
}

if (ClPrintMemProfMatchInfo) {
for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo)
errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
<< " context with id " << Id << " has total profiled size "
<< Info.TotalSize << (Info.Matched ? " is" : " not")
<< " matched\n";
}

return PreservedAnalyses::none();
Expand Down
21 changes: 20 additions & 1 deletion llvm/test/Transforms/PGOProfile/memprof.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
; REQUIRES: zlib
;; Avoid failures on big-endian systems that can't read the profile properly
; REQUIRES: x86_64-linux
;; -stats requires asserts
; REQUIRES: asserts

;; TODO: Use text profile inputs once that is available for memprof.
;; # To update the Inputs below, run Inputs/update_memprof_inputs.sh.
Expand All @@ -25,7 +27,7 @@
; ALL-NOT: no profile data available for function

;; Using a memprof-only profile for memprof-use should only give memprof metadata
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-print-match-info -stats 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY,MEMPROFMATCHINFO,MEMPROFSTATS
; There should not be any PGO metadata
; MEMPROFONLY-NOT: !prof

Expand Down Expand Up @@ -61,6 +63,15 @@
;; give both memprof and pgo metadata.
; RUN: opt < %s -passes='pgo-instr-use,memprof-use<profile-filename=%t.pgomemprofdata>' -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,PGO

; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched
; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched
; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched
; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched
; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched
; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched
; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched
; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched

; ModuleID = 'memprof.cc'
source_filename = "memprof.cc"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
Expand Down Expand Up @@ -346,6 +357,14 @@ for.end: ; preds = %for.cond
; MEMPROFNOCOLINFO: ![[C10]] = !{i64 -4535090212904553409}
; MEMPROFNOCOLINFO: ![[C11]] = !{i64 3577763375057267810}

; MEMPROFSTATS: 8 memprof - Number of alloc contexts in memory profile.
; MEMPROFSTATS: 10 memprof - Number of callsites in memory profile.
; MEMPROFSTATS: 6 memprof - Number of functions having valid memory profile.
; MEMPROFSTATS: 8 memprof - Number of matched memory profile alloc contexts.
; MEMPROFSTATS: 3 memprof - Number of matched memory profile allocs.
; MEMPROFSTATS: 10 memprof - Number of matched memory profile callsites.


; Function Attrs: argmemonly nofree nounwind willreturn writeonly
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3

Expand Down
Loading