@@ -170,6 +170,30 @@ cl::opt<bool> SkipRetExitBlock(
170170 " skip-ret-exit-block" , cl::init(true ),
171171 cl::desc(" Suppress counter promotion if exit blocks contain ret." ));
172172
173+ static cl::opt<bool > SampledInstr (" sampled-instrumentation" , cl::ZeroOrMore,
174+ cl::init (false ),
175+ cl::desc(" Do PGO instrumentation sampling" ));
176+
177+ static cl::opt<unsigned > SampledInstrPeriod (
178+ " sampled-instr-period" ,
179+ cl::desc (" Set the profile instrumentation sample period. For each sample "
180+ " period, a fixed number of consecutive samples will be recorded. "
181+ " The number is controlled by 'sampled-instr-burst-duration' flag. "
182+ " The default sample period of 65535 is optimized for generating "
183+ " efficient code that leverages unsigned integer wrapping in "
184+ " overflow." ),
185+ cl::init(65535 ));
186+
187+ static cl::opt<unsigned > SampledInstrBurstDuration (
188+ " sampled-instr-burst-duration" ,
189+ cl::desc (" Set the profile instrumentation burst duration, which can range "
190+ " from 0 to one less than the value of 'sampled-instr-period'. "
191+ " This number of samples will be recorded for each "
192+ " 'sampled-instr-period' count update. Setting to 1 enables "
193+ " simple sampling, in which case it is recommended to set "
194+ " 'sampled-instr-period' to a prime number." ),
195+ cl::init(200 ));
196+
173197using LoadStorePair = std::pair<Instruction *, Instruction *>;
174198
175199static uint64_t getIntModuleFlagOrZero (const Module &M, StringRef Flag) {
@@ -260,6 +284,9 @@ class InstrLowerer final {
260284 // / Returns true if profile counter update register promotion is enabled.
261285 bool isCounterPromotionEnabled () const ;
262286
287+ // / Return true if profile sampling is enabled.
288+ bool isSamplingEnabled () const ;
289+
263290 // / Count the number of instrumented value sites for the function.
264291 void computeNumValueSiteCounts (InstrProfValueProfileInst *Ins);
265292
@@ -291,6 +318,9 @@ class InstrLowerer final {
291318 // / acts on.
292319 Value *getCounterAddress (InstrProfCntrInstBase *I);
293320
321+ // / Lower the incremental instructions under profile sampling predicates.
322+ void doSampling (Instruction *I);
323+
294324 // / Get the region counters for an increment, creating them if necessary.
295325 // /
296326 // / If the counter array doesn't yet exist, the profile data variables
@@ -635,33 +665,169 @@ PreservedAnalyses InstrProfilingLoweringPass::run(Module &M,
635665 return PreservedAnalyses::none ();
636666}
637667
668+ //
669+ // Perform instrumentation sampling.
670+ //
671+ // There are 3 favors of sampling:
672+ // (1) Full burst sampling: We transform:
673+ // Increment_Instruction;
674+ // to:
675+ // if (__llvm_profile_sampling__ < SampledInstrBurstDuration) {
676+ // Increment_Instruction;
677+ // }
678+ // __llvm_profile_sampling__ += 1;
679+ // if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
680+ // __llvm_profile_sampling__ = 0;
681+ // }
682+ //
683+ // "__llvm_profile_sampling__" is a thread-local global shared by all PGO
684+ // counters (value-instrumentation and edge instrumentation).
685+ //
686+ // (2) Fast burst sampling:
687+ // "__llvm_profile_sampling__" variable is an unsigned type, meaning it will
688+ // wrap around to zero when overflows. In this case, the second check is
689+ // unnecessary, so we won't generate check2 when the SampledInstrPeriod is
690+ // set to 65535 (64K - 1). The code after:
691+ // if (__llvm_profile_sampling__ < SampledInstrBurstDuration) {
692+ // Increment_Instruction;
693+ // }
694+ // __llvm_profile_sampling__ += 1;
695+ //
696+ // (3) Simple sampling:
697+ // When SampledInstrBurstDuration sets to 1, we do a simple sampling:
698+ // __llvm_profile_sampling__ += 1;
699+ // if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
700+ // __llvm_profile_sampling__ = 0;
701+ // Increment_Instruction;
702+ // }
703+ //
704+ // Note that, the code snippet after the transformation can still be counter
705+ // promoted. However, with sampling enabled, counter updates are expected to
706+ // be infrequent, making the benefits of counter promotion negligible.
707+ // Moreover, counter promotion can potentially cause issues in server
708+ // applications, particularly when the counters are dumped without a clean
709+ // exit. To mitigate this risk, counter promotion is disabled by default when
710+ // sampling is enabled. This behavior can be overridden using the internal
711+ // option.
712+ void InstrLowerer::doSampling (Instruction *I) {
713+ if (!isSamplingEnabled ())
714+ return ;
715+
716+ unsigned SampledBurstDuration = SampledInstrBurstDuration.getValue ();
717+ unsigned SampledPeriod = SampledInstrPeriod.getValue ();
718+ if (SampledBurstDuration >= SampledPeriod) {
719+ report_fatal_error (
720+ " SampledPeriod needs to be greater than SampledBurstDuration" );
721+ }
722+ bool UseShort = (SampledPeriod <= USHRT_MAX);
723+ bool IsSimpleSampling = (SampledBurstDuration == 1 );
724+ // If (SampledBurstDuration == 1 && SampledPeriod == 65535), generate
725+ // the simple sampling style code.
726+ bool IsFastSampling = (!IsSimpleSampling && SampledPeriod == 65535 );
727+
728+ auto GetConstant = [UseShort](IRBuilder<> &Builder, uint32_t C) {
729+ if (UseShort)
730+ return Builder.getInt16 (C);
731+ else
732+ return Builder.getInt32 (C);
733+ };
734+
735+ IntegerType *SamplingVarTy;
736+ if (UseShort)
737+ SamplingVarTy = Type::getInt16Ty (M.getContext ());
738+ else
739+ SamplingVarTy = Type::getInt32Ty (M.getContext ());
740+ auto *SamplingVar =
741+ M.getGlobalVariable (INSTR_PROF_QUOTE (INSTR_PROF_PROFILE_SAMPLING_VAR));
742+ assert (SamplingVar && " SamplingVar not set properly" );
743+
744+ // Create the condition for checking the burst duration.
745+ Instruction *SamplingVarIncr;
746+ Value *NewSamplingVarVal;
747+ MDBuilder MDB (I->getContext ());
748+ MDNode *BranchWeight;
749+ IRBuilder<> CondBuilder (I);
750+ auto *LoadSamplingVar = CondBuilder.CreateLoad (SamplingVarTy, SamplingVar);
751+ if (IsSimpleSampling) {
752+ // For the simple sampling, just create the load and increments.
753+ IRBuilder<> IncBuilder (I);
754+ NewSamplingVarVal =
755+ IncBuilder.CreateAdd (LoadSamplingVar, GetConstant (IncBuilder, 1 ));
756+ SamplingVarIncr = IncBuilder.CreateStore (NewSamplingVarVal, SamplingVar);
757+ } else {
758+ // For the bust-sampling, create the conditonal update.
759+ auto *DurationCond = CondBuilder.CreateICmpULE (
760+ LoadSamplingVar, GetConstant (CondBuilder, SampledBurstDuration));
761+ BranchWeight = MDB.createBranchWeights (
762+ SampledBurstDuration, SampledPeriod + 1 - SampledBurstDuration);
763+ Instruction *ThenTerm = SplitBlockAndInsertIfThen (
764+ DurationCond, I, /* Unreachable */ false , BranchWeight);
765+ IRBuilder<> IncBuilder (I);
766+ NewSamplingVarVal =
767+ IncBuilder.CreateAdd (LoadSamplingVar, GetConstant (IncBuilder, 1 ));
768+ SamplingVarIncr = IncBuilder.CreateStore (NewSamplingVarVal, SamplingVar);
769+ I->moveBefore (ThenTerm);
770+ }
771+
772+ if (IsFastSampling)
773+ return ;
774+
775+ // Create the condtion for checking the period.
776+ Instruction *ThenTerm, *ElseTerm;
777+ IRBuilder<> PeriodCondBuilder (SamplingVarIncr);
778+ auto *PeriodCond = PeriodCondBuilder.CreateICmpUGE (
779+ NewSamplingVarVal, GetConstant (PeriodCondBuilder, SampledPeriod));
780+ BranchWeight = MDB.createBranchWeights (1 , SampledPeriod);
781+ SplitBlockAndInsertIfThenElse (PeriodCond, SamplingVarIncr, &ThenTerm,
782+ &ElseTerm, BranchWeight);
783+
784+ // For the simple sampling, the counter update happens in sampling var reset.
785+ if (IsSimpleSampling)
786+ I->moveBefore (ThenTerm);
787+
788+ IRBuilder<> ResetBuilder (ThenTerm);
789+ ResetBuilder.CreateStore (GetConstant (ResetBuilder, 0 ), SamplingVar);
790+ SamplingVarIncr->moveBefore (ElseTerm);
791+ }
792+
638793bool InstrLowerer::lowerIntrinsics (Function *F) {
639794 bool MadeChange = false ;
640795 PromotionCandidates.clear ();
796+ SmallVector<InstrProfInstBase *, 8 > InstrProfInsts;
797+
798+ // To ensure compatibility with sampling, we save the intrinsics into
799+ // a buffer to prevent potential breakage of the iterator (as the
800+ // intrinsics will be moved to a different BB).
641801 for (BasicBlock &BB : *F) {
642802 for (Instruction &Instr : llvm::make_early_inc_range (BB)) {
643- if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(&Instr)) {
644- lowerIncrement (IPIS);
645- MadeChange = true ;
646- } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(&Instr)) {
647- lowerIncrement (IPI);
648- MadeChange = true ;
649- } else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(&Instr)) {
650- lowerTimestamp (IPC);
651- MadeChange = true ;
652- } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(&Instr)) {
653- lowerCover (IPC);
654- MadeChange = true ;
655- } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(&Instr)) {
656- lowerValueProfileInst (IPVP);
657- MadeChange = true ;
658- } else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(&Instr)) {
659- IPMP->eraseFromParent ();
660- MadeChange = true ;
661- } else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(&Instr)) {
662- lowerMCDCTestVectorBitmapUpdate (IPBU);
663- MadeChange = true ;
664- }
803+ if (auto *IP = dyn_cast<InstrProfInstBase>(&Instr))
804+ InstrProfInsts.push_back (IP);
805+ }
806+ }
807+
808+ for (auto *Instr : InstrProfInsts) {
809+ doSampling (Instr);
810+ if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(Instr)) {
811+ lowerIncrement (IPIS);
812+ MadeChange = true ;
813+ } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(Instr)) {
814+ lowerIncrement (IPI);
815+ MadeChange = true ;
816+ } else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(Instr)) {
817+ lowerTimestamp (IPC);
818+ MadeChange = true ;
819+ } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(Instr)) {
820+ lowerCover (IPC);
821+ MadeChange = true ;
822+ } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(Instr)) {
823+ lowerValueProfileInst (IPVP);
824+ MadeChange = true ;
825+ } else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(Instr)) {
826+ IPMP->eraseFromParent ();
827+ MadeChange = true ;
828+ } else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(Instr)) {
829+ lowerMCDCTestVectorBitmapUpdate (IPBU);
830+ MadeChange = true ;
665831 }
666832 }
667833
@@ -684,6 +850,12 @@ bool InstrLowerer::isRuntimeCounterRelocationEnabled() const {
684850 return TT.isOSFuchsia ();
685851}
686852
853+ bool InstrLowerer::isSamplingEnabled () const {
854+ if (SampledInstr.getNumOccurrences () > 0 )
855+ return SampledInstr;
856+ return Options.Sampling ;
857+ }
858+
687859bool InstrLowerer::isCounterPromotionEnabled () const {
688860 if (DoCounterPromotion.getNumOccurrences () > 0 )
689861 return DoCounterPromotion;
@@ -754,6 +926,9 @@ bool InstrLowerer::lower() {
754926 if (NeedsRuntimeHook)
755927 MadeChange = emitRuntimeHook ();
756928
929+ if (!IsCS && isSamplingEnabled ())
930+ createProfileSamplingVar (M);
931+
757932 bool ContainsProfiling = containsProfilingIntrinsics (M);
758933 GlobalVariable *CoverageNamesVar =
759934 M.getNamedGlobal (getCoverageUnusedNamesVarName ());
@@ -1955,3 +2130,29 @@ void InstrLowerer::emitInitialization() {
19552130
19562131 appendToGlobalCtors (M, F, 0 );
19572132}
2133+
2134+ namespace llvm {
2135+ // Create the variable for profile sampling.
2136+ void createProfileSamplingVar (Module &M) {
2137+ const StringRef VarName (INSTR_PROF_QUOTE (INSTR_PROF_PROFILE_SAMPLING_VAR));
2138+ IntegerType *SamplingVarTy;
2139+ Constant *ValueZero;
2140+ if (SampledInstrPeriod.getValue () <= USHRT_MAX) {
2141+ SamplingVarTy = Type::getInt16Ty (M.getContext ());
2142+ ValueZero = Constant::getIntegerValue (SamplingVarTy, APInt (16 , 0 ));
2143+ } else {
2144+ SamplingVarTy = Type::getInt32Ty (M.getContext ());
2145+ ValueZero = Constant::getIntegerValue (SamplingVarTy, APInt (32 , 0 ));
2146+ }
2147+ auto SamplingVar = new GlobalVariable (
2148+ M, SamplingVarTy, false , GlobalValue::WeakAnyLinkage, ValueZero, VarName);
2149+ SamplingVar->setVisibility (GlobalValue::DefaultVisibility);
2150+ SamplingVar->setThreadLocal (true );
2151+ Triple TT (M.getTargetTriple ());
2152+ if (TT.supportsCOMDAT ()) {
2153+ SamplingVar->setLinkage (GlobalValue::ExternalLinkage);
2154+ SamplingVar->setComdat (M.getOrInsertComdat (VarName));
2155+ }
2156+ appendToCompilerUsed (M, SamplingVar);
2157+ }
2158+ } // namespace llvm
0 commit comments