-
Notifications
You must be signed in to change notification settings - Fork 11.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[PGO] Sampled instrumentation in PGO to speed up instrumentation binary #69535
Changes from 3 commits
451d818
64cbd8f
c207bfb
9c795d9
b8204b0
27d0a6c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -170,6 +170,29 @@ cl::opt<bool> SkipRetExitBlock( | |
"skip-ret-exit-block", cl::init(true), | ||
cl::desc("Suppress counter promotion if exit blocks contain ret.")); | ||
|
||
static cl::opt<bool> SampledInstr("sampled-instr", cl::ZeroOrMore, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 'instr' can be confused with 'instruction'. We should just spell it out as 'sampled-instrumentation'. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ack. |
||
cl::init(false), | ||
cl::desc("Do PGO instrumentation sampling")); | ||
|
||
static cl::opt<unsigned> SampledInstrPeriod( | ||
"sampled-instr-period", | ||
cl::desc("Set the profile instrumentation sample period. For each sample " | ||
"period, the 'sampled-instr-burst-duration' number of consecutive " | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A fixed number of consecutive samples will be record. The number is controlled by 'sampled-instr-burst-duration' flag. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ack. |
||
"samples will be recorded. The default sample period of 65535 is " | ||
"optimized for generating efficient code that leverages unsigned " | ||
"integer wrapping in overflow."), | ||
cl::init(65535)); | ||
|
||
static cl::opt<unsigned> SampledInstrBurstDuration( | ||
"sampled-instr-burst-duration", | ||
cl::desc("Set the profile instrumentation burst duration, which can range " | ||
"from 0 to one less than the value of 'sampled-instr-period'. " | ||
"This number of samples will be recorded for each " | ||
"'sampled-instr-period' count update. Setting to 1 enables " | ||
"simple sampling, in which case it is recommended to set " | ||
"'sampled-instr-period' to a prime number."), | ||
cl::init(200)); | ||
|
||
using LoadStorePair = std::pair<Instruction *, Instruction *>; | ||
|
||
static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) { | ||
|
@@ -260,6 +283,9 @@ class InstrLowerer final { | |
/// Returns true if profile counter update register promotion is enabled. | ||
bool isCounterPromotionEnabled() const; | ||
|
||
/// Return true if profile sampling is enabled. | ||
bool isSamplingEnabled() const; | ||
|
||
/// Count the number of instrumented value sites for the function. | ||
void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins); | ||
|
||
|
@@ -291,6 +317,9 @@ class InstrLowerer final { | |
/// acts on. | ||
Value *getCounterAddress(InstrProfCntrInstBase *I); | ||
|
||
/// Lower the incremental instructions under profile sampling predicates. | ||
void doSampling(Instruction *I); | ||
|
||
/// Get the region counters for an increment, creating them if necessary. | ||
/// | ||
/// If the counter array doesn't yet exist, the profile data variables | ||
|
@@ -635,33 +664,161 @@ PreservedAnalyses InstrProfilingLoweringPass::run(Module &M, | |
return PreservedAnalyses::none(); | ||
} | ||
|
||
// | ||
// Perform instrumentation sampling. | ||
// | ||
// There are 3 favors of sampling: | ||
// (1) Full burst sampling: We transform: | ||
// Increment_Instruction; | ||
// to: | ||
// if (__llvm_profile_sampling__ < SampledInstrBurstDuration) { | ||
// Increment_Instruction; | ||
// } | ||
// __llvm_profile_sampling__ += 1; | ||
// if (__llvm_profile_sampling__ >= SampledInstrPeriod) { | ||
// __llvm_profile_sampling__ = 0; | ||
// } | ||
// | ||
// "__llvm_profile_sampling__" is a thread-local global shared by all PGO | ||
// counters (value-instrumentation and edge instrumentation). | ||
// | ||
// (2) Fast burst sampling: | ||
// The value is an unsigned type, meaning it will wrap around to zero when | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. llvm_profile_sampling variable is .. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ack. |
||
// overflows. In this case, a second check (check2) is unnecessary, so we | ||
// won't generate check2 when the SampledInstrPeriod is set to 65535 (64K - 1). | ||
// The code after: | ||
// if (__llvm_profile_sampling__ < SampledInstrBurstDuration) { | ||
// Increment_Instruction; | ||
// } | ||
// __llvm_profile_sampling__ += 1; | ||
// | ||
// (3) Simple sampling: | ||
// When SampledInstrBurstDuration sets to 1, we do a simple sampling: | ||
// __llvm_profile_sampling__ += 1; | ||
// if (__llvm_profile_sampling__ >= SampledInstrPeriod) { | ||
// __llvm_profile_sampling__ = 0; | ||
// Increment_Instruction; | ||
// } | ||
// | ||
// Note that, the code snippet after the transformation can still be counter | ||
// promoted. However, with sampling enabled, counter updates are expected to | ||
// be infrequent, making the benefits of counter promotion negligible. | ||
// Moreover, counter promotion can potentially cause issues in server | ||
// applications, particularly when the counters are dumped without a clean | ||
// exit. To mitigate this risk, counter promotion is disabled by default when | ||
// sampling is enabled. This behavior can be overridden using the internal | ||
// option. | ||
void InstrLowerer::doSampling(Instruction *I) { | ||
if (!isSamplingEnabled()) | ||
return; | ||
|
||
unsigned SampledBurstDuration = SampledInstrBurstDuration.getValue(); | ||
unsigned SampledPeriod = SampledInstrPeriod.getValue(); | ||
assert(SampledBurstDuration < SampledPeriod); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we emit an error? assertion is probably not enough for validating user input. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ack. Will change to an error. |
||
bool UseShort = (SampledPeriod <= USHRT_MAX); | ||
bool IsSimpleSampling = (SampledBurstDuration == 1); | ||
bool IsFastSampling = (!IsSimpleSampling && SampledPeriod == 65535); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My understanding is fast means we don't need a check for period, and instead rely on overflow. In that case, value of SampledBurstDuration is unrelated? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The value of SampleBurstDuration is number of samples being recored for each duration. The value will be used in the condition generated. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The condition for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The condition is to handle the case where both SampledBurstDuration==1 and SampledPeriod=65535. |
||
|
||
auto GetConstant = [UseShort](IRBuilder<> &Builder, uint32_t C) { | ||
if (UseShort) | ||
return Builder.getInt16(C); | ||
else | ||
return Builder.getInt32(C); | ||
}; | ||
|
||
IntegerType *SamplingVarTy; | ||
if (UseShort) | ||
SamplingVarTy = Type::getInt16Ty(M.getContext()); | ||
else | ||
SamplingVarTy = Type::getInt32Ty(M.getContext()); | ||
auto *SamplingVar = | ||
M.getGlobalVariable(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR)); | ||
assert(SamplingVar && "SamplingVar not set properly"); | ||
|
||
// Create the condition for checking the burst duration. | ||
Instruction *SamplingVarIncr; | ||
Value *NewSamplingVarVal; | ||
MDBuilder MDB(I->getContext()); | ||
MDNode *BranchWeight; | ||
IRBuilder<> CondBuilder(I); | ||
auto *LoadSamplingVar = CondBuilder.CreateLoad(SamplingVarTy, SamplingVar); | ||
if (IsSimpleSampling) { | ||
// For the simple sampling, just create the load and increments. | ||
IRBuilder<> IncBuilder(I); | ||
NewSamplingVarVal = | ||
IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1)); | ||
SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar); | ||
} else { | ||
// For the bust-sampling, create the conditonal update. | ||
auto *DurationCond = CondBuilder.CreateICmpULE( | ||
LoadSamplingVar, GetConstant(CondBuilder, SampledBurstDuration)); | ||
BranchWeight = MDB.createBranchWeights( | ||
SampledBurstDuration, SampledPeriod + 1 - SampledBurstDuration); | ||
Instruction *ThenTerm = SplitBlockAndInsertIfThen( | ||
DurationCond, I, /* Unreachable */ false, BranchWeight); | ||
IRBuilder<> IncBuilder(I); | ||
NewSamplingVarVal = | ||
IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1)); | ||
SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar); | ||
I->moveBefore(ThenTerm); | ||
} | ||
|
||
if (IsFastSampling) | ||
return; | ||
|
||
// Create the condtion for checking the period. | ||
Instruction *ThenTerm, *ElseTerm; | ||
IRBuilder<> PeriodCondBuilder(SamplingVarIncr); | ||
auto *PeriodCond = PeriodCondBuilder.CreateICmpUGE( | ||
NewSamplingVarVal, GetConstant(PeriodCondBuilder, SampledPeriod)); | ||
BranchWeight = MDB.createBranchWeights(1, SampledPeriod); | ||
SplitBlockAndInsertIfThenElse(PeriodCond, SamplingVarIncr, &ThenTerm, | ||
&ElseTerm, BranchWeight); | ||
|
||
// For the simple sampling, the counter update happens in sampling var reset. | ||
if (IsSimpleSampling) | ||
I->moveBefore(ThenTerm); | ||
|
||
IRBuilder<> ResetBuilder(ThenTerm); | ||
ResetBuilder.CreateStore(GetConstant(ResetBuilder, 0), SamplingVar); | ||
SamplingVarIncr->moveBefore(ElseTerm); | ||
} | ||
|
||
bool InstrLowerer::lowerIntrinsics(Function *F) { | ||
bool MadeChange = false; | ||
PromotionCandidates.clear(); | ||
SmallVector<InstrProfInstBase *, 8> InstrProfInsts; | ||
|
||
for (BasicBlock &BB : *F) { | ||
for (Instruction &Instr : llvm::make_early_inc_range(BB)) { | ||
if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(&Instr)) { | ||
lowerIncrement(IPIS); | ||
MadeChange = true; | ||
} else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(&Instr)) { | ||
lowerIncrement(IPI); | ||
MadeChange = true; | ||
} else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(&Instr)) { | ||
lowerTimestamp(IPC); | ||
MadeChange = true; | ||
} else if (auto *IPC = dyn_cast<InstrProfCoverInst>(&Instr)) { | ||
lowerCover(IPC); | ||
MadeChange = true; | ||
} else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(&Instr)) { | ||
lowerValueProfileInst(IPVP); | ||
MadeChange = true; | ||
} else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(&Instr)) { | ||
IPMP->eraseFromParent(); | ||
MadeChange = true; | ||
} else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(&Instr)) { | ||
lowerMCDCTestVectorBitmapUpdate(IPBU); | ||
MadeChange = true; | ||
} | ||
if (auto *IP = dyn_cast<InstrProfInstBase>(&Instr)) | ||
InstrProfInsts.push_back(IP); | ||
xur-llvm marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} | ||
|
||
for (auto *Instr : InstrProfInsts) { | ||
doSampling(Instr); | ||
if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(Instr)) { | ||
lowerIncrement(IPIS); | ||
MadeChange = true; | ||
} else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(Instr)) { | ||
lowerIncrement(IPI); | ||
MadeChange = true; | ||
} else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(Instr)) { | ||
lowerTimestamp(IPC); | ||
MadeChange = true; | ||
} else if (auto *IPC = dyn_cast<InstrProfCoverInst>(Instr)) { | ||
lowerCover(IPC); | ||
MadeChange = true; | ||
} else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(Instr)) { | ||
lowerValueProfileInst(IPVP); | ||
MadeChange = true; | ||
} else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(Instr)) { | ||
IPMP->eraseFromParent(); | ||
MadeChange = true; | ||
} else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(Instr)) { | ||
lowerMCDCTestVectorBitmapUpdate(IPBU); | ||
MadeChange = true; | ||
} | ||
} | ||
|
||
|
@@ -684,6 +841,12 @@ bool InstrLowerer::isRuntimeCounterRelocationEnabled() const { | |
return TT.isOSFuchsia(); | ||
} | ||
|
||
bool InstrLowerer::isSamplingEnabled() const { | ||
if (SampledInstr.getNumOccurrences() > 0) | ||
return SampledInstr; | ||
return Options.Sampling; | ||
} | ||
|
||
bool InstrLowerer::isCounterPromotionEnabled() const { | ||
if (DoCounterPromotion.getNumOccurrences() > 0) | ||
return DoCounterPromotion; | ||
|
@@ -754,6 +917,9 @@ bool InstrLowerer::lower() { | |
if (NeedsRuntimeHook) | ||
MadeChange = emitRuntimeHook(); | ||
|
||
if (!IsCS && isSamplingEnabled()) | ||
createProfileSamplingVar(M); | ||
|
||
bool ContainsProfiling = containsProfilingIntrinsics(M); | ||
GlobalVariable *CoverageNamesVar = | ||
M.getNamedGlobal(getCoverageUnusedNamesVarName()); | ||
|
@@ -1952,3 +2118,29 @@ void InstrLowerer::emitInitialization() { | |
|
||
appendToGlobalCtors(M, F, 0); | ||
} | ||
|
||
namespace llvm { | ||
// Create the variable for profile sampling. | ||
void createProfileSamplingVar(Module &M) { | ||
const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR)); | ||
IntegerType *SamplingVarTy; | ||
Constant *ValueZero; | ||
if (SampledInstrPeriod.getValue() <= USHRT_MAX) { | ||
SamplingVarTy = Type::getInt16Ty(M.getContext()); | ||
ValueZero = Constant::getIntegerValue(SamplingVarTy, APInt(16, 0)); | ||
} else { | ||
SamplingVarTy = Type::getInt32Ty(M.getContext()); | ||
ValueZero = Constant::getIntegerValue(SamplingVarTy, APInt(32, 0)); | ||
} | ||
auto SamplingVar = new GlobalVariable( | ||
M, SamplingVarTy, false, GlobalValue::WeakAnyLinkage, ValueZero, VarName); | ||
SamplingVar->setVisibility(GlobalValue::DefaultVisibility); | ||
SamplingVar->setThreadLocal(true); | ||
Triple TT(M.getTargetTriple()); | ||
if (TT.supportsCOMDAT()) { | ||
SamplingVar->setLinkage(GlobalValue::ExternalLinkage); | ||
SamplingVar->setComdat(M.getOrInsertComdat(VarName)); | ||
} | ||
appendToCompilerUsed(M, SamplingVar); | ||
} | ||
} // namespace llvm |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a comment on why counter promotion is turned off?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The reason is mentioned in InstrProfling.C:400.