-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[BOLT] Expose external entry count for functions #141674
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[BOLT] Expose external entry count for functions #141674
Conversation
Created using spr 1.3.4 [skip ci]
Created using spr 1.3.4
@llvm/pr-subscribers-bolt Author: Amir Ayupov (aaupov) ChangesRecord the number of function invocations from external code - code The purpose of it is to exclude external entry counts from call graph Test Plan: updated shrinkwrapping.test Full diff: https://github.com/llvm/llvm-project/pull/141674.diff 9 Files Affected:
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 6f3b5923d3ef4..54187b32968a5 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -388,6 +388,10 @@ class BinaryFunction {
/// The profile data for the number of times the function was executed.
uint64_t ExecutionCount{COUNT_NO_PROFILE};
+ /// Profile data for the number of times this function was entered from
+ /// external code (DSO, JIT, etc).
+ uint64_t ExternEntryCount{0};
+
/// Profile match ratio.
float ProfileMatchRatio{0.0f};
@@ -1864,6 +1868,10 @@ class BinaryFunction {
return *this;
}
+ /// Set the profile data for the number of times the function was entered from
+ /// external code (DSO/JIT).
+ void setExternEntryCount(uint64_t Count) { ExternEntryCount = Count; }
+
/// Adjust execution count for the function by a given \p Count. The value
/// \p Count will be subtracted from the current function count.
///
@@ -1891,6 +1899,10 @@ class BinaryFunction {
/// Return COUNT_NO_PROFILE if there's no profile info.
uint64_t getExecutionCount() const { return ExecutionCount; }
+ /// Return the profile information about the number of times the function was
+ /// entered from external code (DSO/JIT).
+ uint64_t getExternEntryCount() const { return ExternEntryCount; }
+
/// Return the raw profile information about the number of branch
/// executions corresponding to this function.
uint64_t getRawSampleCount() const { return RawSampleCount; }
diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h
index 80031f8f6ef4a..9bc61ec83364f 100644
--- a/bolt/include/bolt/Profile/DataReader.h
+++ b/bolt/include/bolt/Profile/DataReader.h
@@ -109,6 +109,9 @@ struct FuncBranchData {
/// Total execution count for the function.
int64_t ExecutionCount{0};
+ /// Total entry count from external code for the function.
+ uint64_t ExternEntryCount{0};
+
/// Indicate if the data was used.
bool Used{false};
diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
index a8d9a15311d94..41e2bd1651efd 100644
--- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h
+++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
@@ -206,6 +206,7 @@ struct BinaryFunctionProfile {
uint32_t Id{0};
llvm::yaml::Hex64 Hash{0};
uint64_t ExecCount{0};
+ uint64_t ExternEntryCount{0};
std::vector<BinaryBasicBlockProfile> Blocks;
std::vector<InlineTreeNode> InlineTree;
bool Used{false};
@@ -218,6 +219,7 @@ template <> struct MappingTraits<bolt::BinaryFunctionProfile> {
YamlIO.mapRequired("fid", BFP.Id);
YamlIO.mapRequired("hash", BFP.Hash);
YamlIO.mapRequired("exec", BFP.ExecCount);
+ YamlIO.mapOptional("extern", BFP.ExternEntryCount, 0);
YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks);
YamlIO.mapOptional("blocks", BFP.Blocks,
std::vector<bolt::BinaryBasicBlockProfile>());
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 851fa36a6b4b7..68477f778470c 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -476,6 +476,8 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
OS << "\n Sample Count: " << RawSampleCount;
OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f);
}
+ if (ExternEntryCount)
+ OS << "\n Extern Entry Count: " << ExternEntryCount;
if (opts::PrintDynoStats && !getLayout().block_empty()) {
OS << '\n';
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 4b7a9fd912869..7d62dadff887a 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -2289,6 +2289,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
YamlBF.Id = BF->getFunctionNumber();
YamlBF.Hash = BAT->getBFHash(FuncAddress);
YamlBF.ExecCount = BF->getKnownExecutionCount();
+ YamlBF.ExternEntryCount = BF->getExternEntryCount();
YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress);
const BoltAddressTranslation::BBHashMapTy &BlockMap =
BAT->getBBHashMap(FuncAddress);
diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp
index 198f7d8642738..9c9d9ca9ef7dd 100644
--- a/bolt/lib/Profile/DataReader.cpp
+++ b/bolt/lib/Profile/DataReader.cpp
@@ -85,6 +85,7 @@ void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) {
}
llvm::stable_sort(Data);
ExecutionCount += FBD.ExecutionCount;
+ ExternEntryCount += FBD.ExternEntryCount;
for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) {
assert(I->To.Name == FBD.Name);
auto NewElmt = EntryData.insert(EntryData.end(), *I);
@@ -269,6 +270,7 @@ Error DataReader::preprocessProfile(BinaryContext &BC) {
if (FuncBranchData *FuncData = getBranchDataForNames(Function.getNames())) {
setBranchData(Function, FuncData);
Function.ExecutionCount = FuncData->ExecutionCount;
+ Function.ExternEntryCount = FuncData->ExternEntryCount;
FuncData->Used = true;
}
}
@@ -419,6 +421,7 @@ void DataReader::matchProfileData(BinaryFunction &BF) {
if (fetchProfileForOtherEntryPoints(BF)) {
BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD);
BF.ExecutionCount = FBD->ExecutionCount;
+ BF.ExternEntryCount = FBD->ExternEntryCount;
BF.RawSampleCount = FBD->getNumExecutedBranches();
}
return;
@@ -449,6 +452,7 @@ void DataReader::matchProfileData(BinaryFunction &BF) {
setBranchData(BF, NewBranchData);
NewBranchData->Used = true;
BF.ExecutionCount = NewBranchData->ExecutionCount;
+ BF.ExternEntryCount = NewBranchData->ExternEntryCount;
BF.ProfileMatchRatio = 1.0f;
break;
}
@@ -1220,6 +1224,8 @@ std::error_code DataReader::parse() {
if (BI.To.IsSymbol && BI.To.Offset == 0) {
I = GetOrCreateFuncEntry(BI.To.Name);
I->second.ExecutionCount += BI.Branches;
+ if (!BI.From.IsSymbol)
+ I->second.ExternEntryCount += BI.Branches;
}
}
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 33ce40ac2eeec..086e47b661e10 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -176,6 +176,7 @@ bool YAMLProfileReader::parseFunctionProfile(
uint64_t FunctionExecutionCount = 0;
BF.setExecutionCount(YamlBF.ExecCount);
+ BF.setExternEntryCount(YamlBF.ExternEntryCount);
uint64_t FuncRawBranchCount = 0;
for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks)
diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp
index f1fe45f21a0f6..f4308d6fc1992 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -226,6 +226,7 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
YamlBF.Hash = BF.getHash();
YamlBF.NumBasicBlocks = BF.size();
YamlBF.ExecCount = BF.getKnownExecutionCount();
+ YamlBF.ExternEntryCount = BF.getExternEntryCount();
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
if (PseudoProbeDecoder && BF.getGUID()) {
std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
diff --git a/bolt/test/X86/shrinkwrapping.test b/bolt/test/X86/shrinkwrapping.test
index 8581d7e0c0f7b..521b4561b3ba6 100644
--- a/bolt/test/X86/shrinkwrapping.test
+++ b/bolt/test/X86/shrinkwrapping.test
@@ -8,6 +8,7 @@ REQUIRES: shell
RUN: %clangxx %cxxflags -no-pie %S/Inputs/exc4sw.S -o %t.exe -Wl,-q
RUN: llvm-bolt %t.exe -o %t --relocs --frame-opt=all \
+RUN: --print-only=main --print-cfg \
RUN: --data=%p/Inputs/exc4sw.fdata --reorder-blocks=cache 2>&1 | \
RUN: FileCheck %s --check-prefix=CHECK-BOLT
@@ -19,6 +20,7 @@ RUN: llvm-objdump --dwarf=frames %t | grep -A20 -e \
RUN: `llvm-nm --numeric-sort %t | grep main | tail -n 1 | cut -f1 -d' ' | \
RUN: tail -c9` 2>&1 | FileCheck %s --check-prefix=CHECK-OUTPUT
+CHECK-BOLT: Extern Entry Count: 100
CHECK-BOLT: Shrink wrapping moved 2 spills inserting load/stores and 0 spills inserting push/pops
CHECK-INPUT: DW_CFA_advance_loc: 2
|
Created using spr 1.3.4 [skip ci]
Created using spr 1.3.4
Record the number of function invocations from external code - code
outside the binary, which may include JIT code and DSOs. Accounting
external entry counts improves the fidelity of call graph flow
conservation analysis.
Test Plan: updated shrinkwrapping.test