@@ -82,6 +82,22 @@ namespace PAuthGadgetScanner {
8282 dbgs () << " \n " ;
8383}
8484
85+ // Iterates over BinaryFunction's instructions like a range-based for loop:
86+ //
87+ // iterateOverInstrs(BF, [&](MCInstReference Inst) {
88+ // // loop body
89+ // });
90+ template <typename T> static void iterateOverInstrs (BinaryFunction &BF, T Fn) {
91+ if (BF.hasCFG ()) {
92+ for (BinaryBasicBlock &BB : BF)
93+ for (int64_t I = 0 , E = BB.size (); I < E; ++I)
94+ Fn (MCInstInBBReference (&BB, I));
95+ } else {
96+ for (auto I : BF.instrs ())
97+ Fn (MCInstInBFReference (&BF, I.first ));
98+ }
99+ }
100+
85101// This class represents mapping from a set of arbitrary physical registers to
86102// consecutive array indexes.
87103class TrackedRegisters {
@@ -342,6 +358,29 @@ class SrcSafetyAnalysis {
342358 return S;
343359 }
344360
361+ // / Computes a reasonably pessimistic estimation of the register state when
362+ // / the previous instruction is not known for sure. Takes the set of registers
363+ // / which are trusted at function entry and removes all registers that can be
364+ // / clobbered inside this function.
365+ SrcState computePessimisticState (BinaryFunction &BF) {
366+ BitVector ClobberedRegs (NumRegs);
367+ iterateOverInstrs (BF, [&](MCInstReference Inst) {
368+ BC.MIB ->getClobberedRegs (Inst, ClobberedRegs);
369+
370+ // If this is a call instruction, no register is safe anymore, unless
371+ // it is a tail call. Ignore tail calls for the purpose of estimating the
372+ // worst-case scenario, assuming no instructions are executed in the
373+ // caller after this point anyway.
374+ if (BC.MIB ->isCall (Inst) && !BC.MIB ->isTailCall (Inst))
375+ ClobberedRegs.set ();
376+ });
377+
378+ SrcState S = createEntryState ();
379+ S.SafeToDerefRegs .reset (ClobberedRegs);
380+ S.TrustedRegs .reset (ClobberedRegs);
381+ return S;
382+ }
383+
345384 BitVector getClobberedRegs (const MCInst &Point) const {
346385 BitVector Clobbered (NumRegs);
347386 // Assume a call can clobber all registers, including callee-saved
@@ -545,6 +584,10 @@ class DataflowSrcSafetyAnalysis
545584 using SrcSafetyAnalysis::BC;
546585 using SrcSafetyAnalysis::computeNext;
547586
587+ // Pessimistic initial state for basic blocks without any predecessors
588+ // (not needed for most functions, thus initialized lazily).
589+ SrcState PessimisticState;
590+
548591public:
549592 DataflowSrcSafetyAnalysis (BinaryFunction &BF,
550593 MCPlusBuilder::AllocatorIdTy AllocId,
@@ -585,6 +628,18 @@ class DataflowSrcSafetyAnalysis
585628 if (BB.isEntryPoint ())
586629 return createEntryState ();
587630
631+ // If a basic block without any predecessors is found in an optimized code,
632+ // this likely means that some CFG edges were not detected. Pessimistically
633+ // assume any register that can ever be clobbered in this function to be
634+ // unsafe before this basic block.
635+ // Warn about this fact in FunctionAnalysis::findUnsafeUses(), as it likely
636+ // means imprecise CFG information.
637+ if (BB.pred_empty ()) {
638+ if (PessimisticState.empty ())
639+ PessimisticState = computePessimisticState (*BB.getParent ());
640+ return PessimisticState;
641+ }
642+
588643 return SrcState ();
589644 }
590645
@@ -682,19 +737,14 @@ template <typename StateTy> class CFGUnawareAnalysis {
682737//
683738// Then, a function can be split into a number of disjoint contiguous sequences
684739// of instructions without labels in between. These sequences can be processed
685- // the same way basic blocks are processed by data-flow analysis, assuming
686- // pessimistically that all registers are unsafe at the start of each sequence.
740+ // the same way basic blocks are processed by data-flow analysis, with the same
741+ // pessimistic estimation of the initial state at the start of each sequence
742+ // (except the first instruction of the function).
687743class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis ,
688744 public CFGUnawareAnalysis<SrcState> {
689745 using SrcSafetyAnalysis::BC;
690746 BinaryFunction &BF;
691747
692- // / Creates a state with all registers marked unsafe (not to be confused
693- // / with empty state).
694- SrcState createUnsafeState () const {
695- return SrcState (NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters ());
696- }
697-
698748public:
699749 CFGUnawareSrcSafetyAnalysis (BinaryFunction &BF,
700750 MCPlusBuilder::AllocatorIdTy AllocId,
@@ -704,6 +754,7 @@ class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis,
704754 }
705755
706756 void run () override {
757+ const SrcState DefaultState = computePessimisticState (BF);
707758 SrcState S = createEntryState ();
708759 for (auto &I : BF.instrs ()) {
709760 MCInst &Inst = I.second ;
@@ -718,7 +769,7 @@ class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis,
718769 LLVM_DEBUG ({
719770 traceInst (BC, " Due to label, resetting the state before" , Inst);
720771 });
721- S = createUnsafeState () ;
772+ S = DefaultState ;
722773 }
723774
724775 // Attach the state *before* this instruction executes.
@@ -1344,17 +1395,6 @@ shouldReportAuthOracle(const BinaryContext &BC, const MCInstReference &Inst,
13441395 return make_gadget_report (AuthOracleKind, Inst, *AuthReg);
13451396}
13461397
1347- template <typename T> static void iterateOverInstrs (BinaryFunction &BF, T Fn) {
1348- if (BF.hasCFG ()) {
1349- for (BinaryBasicBlock &BB : BF)
1350- for (int64_t I = 0 , E = BB.size (); I < E; ++I)
1351- Fn (MCInstInBBReference (&BB, I));
1352- } else {
1353- for (auto I : BF.instrs ())
1354- Fn (MCInstInBFReference (&BF, I.first ));
1355- }
1356- }
1357-
13581398static SmallVector<MCPhysReg>
13591399collectRegsToTrack (ArrayRef<PartialReport<MCPhysReg>> Reports) {
13601400 SmallSet<MCPhysReg, 4 > RegsToTrack;
@@ -1375,17 +1415,60 @@ void FunctionAnalysisContext::findUnsafeUses(
13751415 BF.dump ();
13761416 });
13771417
1418+ bool UnreachableBBReported = false ;
1419+ if (BF.hasCFG ()) {
1420+ // Warn on basic blocks being unreachable according to BOLT (at most once
1421+ // per BinaryFunction), as this likely means the CFG reconstructed by BOLT
1422+ // is imprecise. A basic block can be
1423+ // * reachable from an entry basic block - a hopefully correct non-empty
1424+ // state is propagated to that basic block sooner or later. All basic
1425+ // blocks are expected to belong to this category under normal conditions.
1426+ // * reachable from a "directly unreachable" BB (a basic block that has no
1427+ // direct predecessors and this is not because it is an entry BB) - *some*
1428+ // non-empty state is propagated to this basic block sooner or later, as
1429+ // the initial state of directly unreachable basic blocks is
1430+ // pessimistically initialized to "all registers are unsafe"
1431+ // - a warning can be printed for the "directly unreachable" basic block
1432+ // * neither reachable from an entry nor from a "directly unreachable" BB
1433+ // (such as if this BB is in an isolated loop of basic blocks) - the final
1434+ // state is computed to be empty for this basic block
1435+ // - a warning can be printed for this basic block
1436+ for (BinaryBasicBlock &BB : BF) {
1437+ MCInst *FirstInst = BB.getFirstNonPseudoInstr ();
1438+ // Skip empty basic block early for simplicity.
1439+ if (!FirstInst)
1440+ continue ;
1441+
1442+ bool IsDirectlyUnreachable = BB.pred_empty () && !BB.isEntryPoint ();
1443+ bool HasNoStateComputed = Analysis->getStateBefore (*FirstInst).empty ();
1444+ if (!IsDirectlyUnreachable && !HasNoStateComputed)
1445+ continue ;
1446+
1447+ // Arbitrarily attach the report to the first instruction of BB.
1448+ // This is printed as "[message] in function [name], basic block ...,
1449+ // at address ..." when the issue is reported to the user.
1450+ Reports.push_back (make_generic_report (
1451+ MCInstReference::get (FirstInst, BF),
1452+ " Warning: possibly imprecise CFG, the analysis quality may be "
1453+ " degraded in this function. According to BOLT, unreachable code is "
1454+ " found" /* in function [name]... */ ));
1455+ UnreachableBBReported = true ;
1456+ break ; // One warning per function.
1457+ }
1458+ }
1459+ // FIXME: Warn the user about imprecise analysis when the function has no CFG
1460+ // information at all.
1461+
13781462 iterateOverInstrs (BF, [&](MCInstReference Inst) {
13791463 if (BC.MIB ->isCFI (Inst))
13801464 return ;
13811465
13821466 const SrcState &S = Analysis->getStateBefore (Inst);
1383-
1384- // If non-empty state was never propagated from the entry basic block
1385- // to Inst, assume it to be unreachable and report a warning.
13861467 if (S.empty ()) {
1387- Reports.push_back (
1388- make_generic_report (Inst, " Warning: unreachable instruction found" ));
1468+ LLVM_DEBUG (
1469+ { traceInst (BC, " Instruction has no state, skipping" , Inst); });
1470+ assert (UnreachableBBReported && " Should be reported at least once" );
1471+ (void )UnreachableBBReported;
13891472 return ;
13901473 }
13911474
0 commit comments