Skip to content

[Inline]Update value profile for non-call instructions #83769

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/include/llvm/IR/ProfDataUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,5 +108,8 @@ bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalWeights);
/// a `prof` metadata reference to instruction `I`.
void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights);

/// Scaling value profile 'ProfData' using the ratio of S/T.
MDNode *scaleValueProfile(const MDNode *ProfData, uint64_t S, uint64_t T);

} // namespace llvm
#endif
32 changes: 32 additions & 0 deletions llvm/lib/IR/ProfDataUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,4 +190,36 @@ void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights) {
I.setMetadata(LLVMContext::MD_prof, BranchWeights);
}

MDNode *scaleValueProfile(const MDNode *ProfData, uint64_t S, uint64_t T) {
if (ProfData == nullptr)
return nullptr;
assert(
dyn_cast<MDString>(ProfData->getOperand(0))->getString().equals("VP") &&
"Expects value profile metadata");
LLVMContext &C = ProfData->getContext();
MDBuilder MDB(C);
APInt APS(128, S), APT(128, T);

SmallVector<Metadata *, 3> Vals;
Vals.push_back(ProfData->getOperand(0));
for (unsigned i = 1; i < ProfData->getNumOperands(); i += 2) {
Vals.push_back(ProfData->getOperand(i));
uint64_t Count =
mdconst::dyn_extract<ConstantInt>(ProfData->getOperand(i + 1))
->getValue()
.getZExtValue();
// Don't scale the magic number.
if (Count == NOMORE_ICP_MAGICNUM) {
Vals.push_back(ProfData->getOperand(i + 1));
continue;
}
// Using APInt::div may be expensive, but most cases should fit 64 bits.
APInt Val(128, Count);
Val *= APS;
Vals.push_back(MDB.createConstant(ConstantInt::get(
Type::getInt64Ty(C), Val.udiv(APT).getLimitedValue())));
}
return MDNode::get(C, Vals);
}

} // namespace llvm
26 changes: 23 additions & 3 deletions llvm/lib/Transforms/Utils/InlineFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,16 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/IndirectCallVisitor.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
#include "llvm/Analysis/ObjCARCUtil.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
Expand All @@ -55,6 +56,7 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
Expand Down Expand Up @@ -1910,9 +1912,18 @@ void llvm::updateProfileCallee(
if (VMap) {
uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount;
for (auto Entry : *VMap)
// FIXME: Update the profiles for invoke instruction after inline
if (isa<CallInst>(Entry.first))
if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) {
CI->updateProfWeight(CloneEntryCount, PriorEntryCount);
Instruction *VPtr =
PGOIndirectCallVisitor::tryGetVTableInstruction(CI);
if (VPtr)
VPtr->setMetadata(
LLVMContext::MD_prof,
scaleValueProfile(VPtr->getMetadata(LLVMContext::MD_prof),
CloneEntryCount, PriorEntryCount));
}
}

if (EntryDelta) {
Expand All @@ -1922,8 +1933,17 @@ void llvm::updateProfileCallee(
// No need to update the callsite if it is pruned during inlining.
if (!VMap || VMap->count(&BB))
for (Instruction &I : BB)
if (CallInst *CI = dyn_cast<CallInst>(&I))
// FIXME: Update the profiles for invoke instruction after inline
if (CallInst *CI = dyn_cast<CallInst>(&I)) {
CI->updateProfWeight(NewEntryCount, PriorEntryCount);
Instruction *VPtr =
PGOIndirectCallVisitor::tryGetVTableInstruction(CI);
if (VPtr)
VPtr->setMetadata(
LLVMContext::MD_prof,
scaleValueProfile(VPtr->getMetadata(LLVMContext::MD_prof),
NewEntryCount, PriorEntryCount));
}
}
}

Expand Down
89 changes: 89 additions & 0 deletions llvm/test/Transforms/Inline/update_value_profile.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -S | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

;@_ZTV4Base = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN4Base3keyEv, ptr @_ZN4Base4funcEi] }
;@_ZTV7Derived = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN4Base3keyEv, ptr @_ZN7Derived4funcEi] }

define i32 @callee(ptr %0, i32 %1) !prof !20 {
; CHECK-LABEL: define i32 @callee(
; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) !prof [[PROF0:![0-9]+]] {
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8, !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP0]], i32 [[TMP1]]), !prof [[PROF2:![0-9]+]]
; CHECK-NEXT: ret i32 [[TMP6]]
;
%3 = load ptr, ptr %0, !prof !21
%5 = getelementptr inbounds i8, ptr %3, i64 8
%6 = load ptr, ptr %5
%7 = tail call i32 %6(ptr %0, i32 %1), !prof !17
ret i32 %7
}

define i32 @caller1(i32 %0) !prof !18 {
; CHECK-LABEL: define i32 @caller1(
; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF3:![0-9]+]] {
; CHECK-NEXT: [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]])
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !prof [[PROF4:![0-9]+]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF5:![0-9]+]]
; CHECK-NEXT: ret i32 [[TMP6]]
;
%2 = tail call ptr @_Z10createTypei(i32 %0)
%3 = tail call i32 @callee(ptr %2, i32 %0)
ret i32 %3
}

define i32 @caller2(i32 %0) !prof !19 {
; CHECK-LABEL: define i32 @caller2(
; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF6:![0-9]+]] {
; CHECK-NEXT: [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]])
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !prof [[PROF7:![0-9]+]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF8:![0-9]+]]
; CHECK-NEXT: ret i32 [[TMP6]]
;
%2 = tail call ptr @_Z10createTypei(i32 %0)
%3 = tail call i32 @callee(ptr %2, i32 %0)
ret i32 %3
}

declare ptr @_Z10createTypei(i32)

!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
!3 = !{!"ProfileFormat", !"InstrProf"}
!4 = !{!"TotalCount", i64 10000}
!5 = !{!"MaxCount", i64 10}
!6 = !{!"MaxInternalCount", i64 1}
!7 = !{!"MaxFunctionCount", i64 1000}
!8 = !{!"NumCounts", i64 3}
!9 = !{!"NumFunctions", i64 3}
!10 = !{!"DetailedSummary", !11}
!11 = !{!12, !13, !14}
!12 = !{i32 10000, i64 100, i32 1}
!13 = !{i32 999000, i64 100, i32 1}
!14 = !{i32 999999, i64 1, i32 2}
;!15 = !{i64 16, !"_ZTS4Base"}
;!16= !{i64 16, !"_ZTS7Derived"}
!17 = !{!"VP", i32 0, i64 1600, i64 15186643663281959480, i64 1000, i64 15101948577241817854, i64 600}
!18 = !{!"function_entry_count", i64 1000}
!19 = !{!"function_entry_count", i64 600}
!20 = !{!"function_entry_count", i64 1700}
!21 = !{!"VP", i32 2, i64 1600, i64 1960855528937986108, i64 1000, i64 13870436605473471591, i64 600}

;.
; CHECK: [[PROF0]] = !{!"function_entry_count", i64 100}
; CHECK: [[PROF1]] = !{!"VP", i32 2, i64 94, i64 1960855528937986108, i64 58, i64 -4576307468236080025, i64 35}
; CHECK: [[PROF2]] = !{!"VP", i32 0, i64 94, i64 -3260100410427592136, i64 58, i64 -3344795496467733762, i64 35}
; CHECK: [[PROF3]] = !{!"function_entry_count", i64 1000}
; CHECK: [[PROF4]] = !{!"VP", i32 2, i64 941, i64 1960855528937986108, i64 588, i64 -4576307468236080025, i64 352}
; CHECK: [[PROF5]] = !{!"VP", i32 0, i64 941, i64 -3260100410427592136, i64 588, i64 -3344795496467733762, i64 352}
; CHECK: [[PROF6]] = !{!"function_entry_count", i64 600}
; CHECK: [[PROF7]] = !{!"VP", i32 2, i64 564, i64 1960855528937986108, i64 352, i64 -4576307468236080025, i64 211}
; CHECK: [[PROF8]] = !{!"VP", i32 0, i64 564, i64 -3260100410427592136, i64 352, i64 -3344795496467733762, i64 211}
;.