Skip to content

Commit

Permalink
[MemProf] Fix the option to disable memprof ICP (#112917)
Browse files Browse the repository at this point in the history
The -enable-memprof-indirect-call-support meant to guard the recently
added memprof ICP support was not used in enough places. Specifically,
it was not checked in mayHaveMemprofSummary, which is called from the
ThinLTO backend applyImports. This led to failures when checking the
callsite records, as we incorrectly expected records for indirect calls.

Fix the option to be checked in all necessary locations, and add
testing.
  • Loading branch information
teresajohnson authored Oct 18, 2024
1 parent 6c60ead commit 6264288
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 4 deletions.
10 changes: 9 additions & 1 deletion llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,10 @@ static void computeFunctionSummary(
if (!IsThinLTO)
continue;

// Skip indirect calls if we haven't enabled memprof ICP.
if (!CalledFunction && !EnableMemProfIndirectCallSupport)
continue;

// Ensure we keep this analysis in sync with the handling in the ThinLTO
// backend (see MemProfContextDisambiguation::applyImport). Save this call
// so that we can skip it in checking the reverse case later.
Expand Down Expand Up @@ -561,7 +565,8 @@ static void computeFunctionSummary(
auto CalleeValueInfo =
Index.getOrInsertValueInfo(cast<GlobalValue>(CalledValue));
Callsites.push_back({CalleeValueInfo, StackIdIndices});
} else if (EnableMemProfIndirectCallSupport) {
} else {
assert(EnableMemProfIndirectCallSupport);
// For indirect callsites, create multiple Callsites, one per target.
// This enables having a different set of clone versions per target,
// and we will apply the cloning decisions while speculatively
Expand Down Expand Up @@ -1223,6 +1228,9 @@ bool llvm::mayHaveMemprofSummary(const CallBase *CB) {
if (CI && CalledFunction->isIntrinsic())
return false;
} else {
// Skip indirect calls if we haven't enabled memprof ICP.
if (!EnableMemProfIndirectCallSupport)
return false;
// Skip inline assembly calls.
if (CI && CI->isInlineAsm())
return false;
Expand Down
45 changes: 42 additions & 3 deletions llvm/test/ThinLTO/X86/memprof-icp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,18 @@
;; for each profiled target in the VP metadata. They will have the same stackIds
;; since the debug information for the callsite is the same.
; RUN: llvm-dis %t/foo.o -o - | FileCheck %s --check-prefix=CALLSITES
; CALLSITES: gv: (name: "_Z3fooR2B0j", {{.*}} callsites: ((callee: ^{{[0-9]+}}, clones: (0), stackIds: (16345663650247127235)), (callee: ^{{[0-9]+}}, clones: (0), stackIds: (16345663650247127235)))
; CALLSITES: gv: (name: "_Z3fooR2B0j", {{.*}} callsites: ((callee: ^{{[0-9]+}}, clones: (0), stackIds: (16345663650247127235)), (callee: ^{{[0-9]+}}, clones: (0), stackIds: (16345663650247127235))

;; Make sure that we don't get the synthesized callsite records if the
;; -enable-memprof-indirect-call-support flag is false.
; RUN: opt -thinlto-bc %t/foo.ll -enable-memprof-indirect-call-support=false -o - \
; RUN: | llvm-dis -o - | FileCheck %s --implicit-check-not callsites
; RUN: opt -thinlto-bc %t/foo.ll -enable-memprof-indirect-call-support=false >%t/foo.noicp.o
; RUN: llvm-dis %t/foo.noicp.o -o - | FileCheck %s --implicit-check-not "stackIds: (16345663650247127235)"

;; First perform in-process ThinLTO
; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \
; RUN: -supports-hot-cold-new \
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \
; RUN: -r=%t/foo.o,_Z3xyzR2B0j, \
; RUN: -r=%t/main.o,_Z3fooR2B0j, \
; RUN: -r=%t/main.o,_Znwm, \
; RUN: -r=%t/main.o,_ZdlPvm, \
Expand Down Expand Up @@ -116,6 +117,7 @@
; RUN: -supports-hot-cold-new \
; RUN: -thinlto-distributed-indexes \
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \
; RUN: -r=%t/foo.o,_Z3xyzR2B0j, \
; RUN: -r=%t/main.o,_Z3fooR2B0j, \
; RUN: -r=%t/main.o,_Znwm, \
; RUN: -r=%t/main.o,_ZdlPvm, \
Expand All @@ -141,6 +143,36 @@
; RUN: %t/foo.o -S 2>&1 | FileCheck %s --check-prefix=IR \
; RUN: --check-prefix=STATS-BE-DISTRIB --check-prefix=REMARKS-FOO

;; Retry with the ICP-disabled object file, and make sure we disable it again
;; so we don't look for the synthesized callsite records when applying imports.
;; We should not get any cloning.
; RUN: llvm-lto2 run %t/main.o %t/foo.noicp.o -enable-memprof-context-disambiguation \
; RUN: -enable-memprof-indirect-call-support=false \
; RUN: -supports-hot-cold-new \
; RUN: -r=%t/foo.noicp.o,_Z3fooR2B0j,plx \
; RUN: -r=%t/foo.noicp.o,_Z3xyzR2B0j, \
; RUN: -r=%t/main.o,_Z3fooR2B0j, \
; RUN: -r=%t/main.o,_Znwm, \
; RUN: -r=%t/main.o,_ZdlPvm, \
; RUN: -r=%t/main.o,_Z8externalPi, \
; RUN: -r=%t/main.o,main,plx \
; RUN: -r=%t/main.o,_ZN2B03barEj,plx \
; RUN: -r=%t/main.o,_ZN1B3barEj,plx \
; RUN: -r=%t/main.o,_ZTV1B,plx \
; RUN: -r=%t/main.o,_ZTVN10__cxxabiv120__si_class_type_infoE,plx \
; RUN: -r=%t/main.o,_ZTS1B,plx \
; RUN: -r=%t/main.o,_ZTVN10__cxxabiv117__class_type_infoE,plx \
; RUN: -r=%t/main.o,_ZTS2B0,plx \
; RUN: -r=%t/main.o,_ZTI2B0,plx \
; RUN: -r=%t/main.o,_ZTI1B,plx \
; RUN: -r=%t/main.o,_ZTV2B0,plx \
; RUN: -thinlto-threads=1 \
; RUN: -memprof-verify-ccg -memprof-verify-nodes \
; RUN: -pass-remarks=. -save-temps \
; RUN: -o %t.noicp.out 2>&1 | FileCheck %s --implicit-check-not "created clone"

; RUN: llvm-dis %t.noicp.out.2.4.opt.bc -o - | FileCheck %s --implicit-check-not "_Z3fooR2B0j.memprof"

; REMARKS-MAIN: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1
; REMARKS-MAIN: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1
; REMARKS-MAIN: created clone _ZN2B03barEj.memprof.1
Expand Down Expand Up @@ -215,15 +247,22 @@
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

declare i32 @_Z3xyzR2B0j(ptr %b)

define i32 @_Z3fooR2B0j(ptr %b) {
entry:
%0 = load ptr, ptr %b, align 8
%call = tail call i32 %0(ptr null, i32 0), !prof !0, !callsite !1
;; Add a dummy call to ensure that we have some callsite metadata,
;; which triggers callsite record checking in the ThinLTO backend
;; even with -enable-memprof-indirect-call-support=false.
%call2 = call i32 @_Z3xyzR2B0j(ptr null, i32 0), !callsite !2
ret i32 0
}

!0 = !{!"VP", i32 0, i64 4, i64 4445083295448962937, i64 2, i64 -2718743882639408571, i64 2}
!1 = !{i64 -2101080423462424381}
!2 = !{i64 1234}

;--- main.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
Expand Down

0 comments on commit 6264288

Please sign in to comment.