Skip to content

Commit c410222

Browse files
committed
[BOLT] Factor out MCInstReference from gadget scanner (NFC)
Move MCInstReference representing a constant reference to an instruction inside a parent entity - either inside a basic block (which has a reference to its parent function) or directly to the function (when CFG information is not available).
1 parent ff3dc1d commit c410222

File tree

5 files changed

+269
-237
lines changed

5 files changed

+269
-237
lines changed

bolt/include/bolt/Core/MCInstUtils.h

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
//===- bolt/Core/MCInstUtils.h ----------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef BOLT_CORE_MCINSTUTILS_H
10+
#define BOLT_CORE_MCINSTUTILS_H
11+
12+
#include "bolt/Core/BinaryBasicBlock.h"
13+
14+
#include <map>
15+
#include <tuple>
16+
#include <variant>
17+
18+
namespace llvm {
19+
namespace bolt {
20+
21+
class BinaryFunction;
22+
23+
/// MCInstReference represents a reference to a constant MCInst as stored either
24+
/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
25+
/// (after a CFG is created).
26+
class MCInstReference {
27+
using nocfg_const_iterator = std::map<uint32_t, MCInst>::const_iterator;
28+
29+
// Two cases are possible:
30+
// * functions with CFG reconstructed - a function stores a collection of
31+
// basic blocks, each basic block stores a contiguous vector of MCInst
32+
// * functions without CFG - there are no basic blocks created,
33+
// the instructions are directly stored in std::map in BinaryFunction
34+
//
35+
// In both cases, the direct parent of MCInst is stored together with an
36+
// iterator pointing to the instruction.
37+
38+
// Helper struct: CFG is available, the direct parent is a basic block,
39+
// iterator's type is `MCInst *`.
40+
struct RefInBB {
41+
RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst)
42+
: BB(BB), It(Inst) {}
43+
RefInBB(const RefInBB &Other) = default;
44+
RefInBB &operator=(const RefInBB &Other) = default;
45+
46+
const BinaryBasicBlock *BB;
47+
BinaryBasicBlock::const_iterator It;
48+
49+
bool operator<(const RefInBB &Other) const {
50+
return std::tie(BB, It) < std::tie(Other.BB, Other.It);
51+
}
52+
53+
bool operator==(const RefInBB &Other) const {
54+
return BB == Other.BB && It == Other.It;
55+
}
56+
};
57+
58+
// Helper struct: CFG is *not* available, the direct parent is a function,
59+
// iterator's type is std::map<uint32_t, MCInst>::iterator (the mapped value
60+
// is an instruction's offset).
61+
struct RefInBF {
62+
RefInBF(const BinaryFunction *BF, nocfg_const_iterator It)
63+
: BF(BF), It(It) {}
64+
RefInBF(const RefInBF &Other) = default;
65+
RefInBF &operator=(const RefInBF &Other) = default;
66+
67+
const BinaryFunction *BF;
68+
nocfg_const_iterator It;
69+
70+
bool operator<(const RefInBF &Other) const {
71+
return std::tie(BF, It->first) < std::tie(Other.BF, Other.It->first);
72+
}
73+
74+
bool operator==(const RefInBF &Other) const {
75+
return BF == Other.BF && It->first == Other.It->first;
76+
}
77+
};
78+
79+
std::variant<RefInBB, RefInBF> Reference;
80+
81+
// Utility methods to be used like this:
82+
//
83+
// if (auto *Ref = tryGetRefInBB())
84+
// return Ref->doSomething(...);
85+
// return getRefInBF().doSomethingElse(...);
86+
const RefInBB *tryGetRefInBB() const {
87+
assert(std::get_if<RefInBB>(&Reference) ||
88+
std::get_if<RefInBF>(&Reference));
89+
return std::get_if<RefInBB>(&Reference);
90+
}
91+
const RefInBF &getRefInBF() const {
92+
assert(std::get_if<RefInBF>(&Reference));
93+
return *std::get_if<RefInBF>(&Reference);
94+
}
95+
96+
public:
97+
/// Constructs an empty reference.
98+
MCInstReference() : Reference(RefInBB(nullptr, nullptr)) {}
99+
/// Constructs a reference to the instruction inside the basic block.
100+
MCInstReference(const BinaryBasicBlock *BB, const MCInst *Inst)
101+
: Reference(RefInBB(BB, Inst)) {
102+
assert(BB && Inst && "Neither BB nor Inst should be nullptr");
103+
}
104+
/// Constructs a reference to the instruction inside the basic block.
105+
MCInstReference(const BinaryBasicBlock *BB, unsigned Index)
106+
: Reference(RefInBB(BB, &BB->getInstructionAtIndex(Index))) {
107+
assert(BB && "Basic block should not be nullptr");
108+
}
109+
/// Constructs a reference to the instruction inside the function without
110+
/// CFG information.
111+
MCInstReference(const BinaryFunction *BF, nocfg_const_iterator It)
112+
: Reference(RefInBF(BF, It)) {
113+
assert(BF && "Function should not be nullptr");
114+
}
115+
116+
/// Locates an instruction inside a function and returns a reference.
117+
static MCInstReference get(const MCInst *Inst, const BinaryFunction &BF);
118+
119+
bool operator<(const MCInstReference &Other) const {
120+
return Reference < Other.Reference;
121+
}
122+
123+
bool operator==(const MCInstReference &Other) const {
124+
return Reference == Other.Reference;
125+
}
126+
127+
const MCInst &getMCInst() const {
128+
if (auto *Ref = tryGetRefInBB())
129+
return *Ref->It;
130+
return getRefInBF().It->second;
131+
}
132+
133+
operator const MCInst &() const { return getMCInst(); }
134+
135+
operator bool() const {
136+
if (auto *Ref = tryGetRefInBB())
137+
return Ref->BB != nullptr;
138+
return getRefInBF().BF != nullptr;
139+
}
140+
141+
bool hasCFG() const {
142+
return static_cast<bool>(*this) && tryGetRefInBB() != nullptr;
143+
}
144+
145+
const BinaryFunction *getFunction() const {
146+
if (auto *Ref = tryGetRefInBB())
147+
return Ref->BB->getFunction();
148+
return getRefInBF().BF;
149+
}
150+
151+
const BinaryBasicBlock *getBasicBlock() const {
152+
if (auto *Ref = tryGetRefInBB())
153+
return Ref->BB;
154+
return nullptr;
155+
}
156+
157+
raw_ostream &print(raw_ostream &OS) const;
158+
};
159+
160+
static inline raw_ostream &operator<<(raw_ostream &OS,
161+
const MCInstReference &Ref) {
162+
return Ref.print(OS);
163+
}
164+
165+
} // namespace bolt
166+
} // namespace llvm
167+
168+
#endif

bolt/include/bolt/Passes/PAuthGadgetScanner.h

Lines changed: 1 addition & 177 deletions
Original file line numberDiff line numberDiff line change
@@ -11,189 +11,13 @@
1111

1212
#include "bolt/Core/BinaryContext.h"
1313
#include "bolt/Core/BinaryFunction.h"
14+
#include "bolt/Core/MCInstUtils.h"
1415
#include "bolt/Passes/BinaryPasses.h"
1516
#include "llvm/Support/raw_ostream.h"
1617
#include <memory>
1718

1819
namespace llvm {
1920
namespace bolt {
20-
21-
/// @brief MCInstReference represents a reference to an MCInst as stored either
22-
/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
23-
/// (after a CFG is created). It aims to store the necessary information to be
24-
/// able to find the specific MCInst in either the BinaryFunction or
25-
/// BinaryBasicBlock data structures later, so that e.g. the InputAddress of
26-
/// the corresponding instruction can be computed.
27-
28-
struct MCInstInBBReference {
29-
BinaryBasicBlock *BB;
30-
int64_t BBIndex;
31-
MCInstInBBReference(BinaryBasicBlock *BB, int64_t BBIndex)
32-
: BB(BB), BBIndex(BBIndex) {}
33-
MCInstInBBReference() : BB(nullptr), BBIndex(0) {}
34-
static MCInstInBBReference get(const MCInst *Inst, BinaryFunction &BF) {
35-
for (BinaryBasicBlock &BB : BF)
36-
for (size_t I = 0; I < BB.size(); ++I)
37-
if (Inst == &BB.getInstructionAtIndex(I))
38-
return MCInstInBBReference(&BB, I);
39-
return {};
40-
}
41-
bool operator==(const MCInstInBBReference &RHS) const {
42-
return BB == RHS.BB && BBIndex == RHS.BBIndex;
43-
}
44-
bool operator<(const MCInstInBBReference &RHS) const {
45-
return std::tie(BB, BBIndex) < std::tie(RHS.BB, RHS.BBIndex);
46-
}
47-
operator MCInst &() const {
48-
assert(BB != nullptr);
49-
return BB->getInstructionAtIndex(BBIndex);
50-
}
51-
uint64_t getAddress() const {
52-
// 4 bytes per instruction on AArch64.
53-
// FIXME: the assumption of 4 byte per instruction needs to be fixed before
54-
// this method gets used on any non-AArch64 binaries (but should be fine for
55-
// pac-ret analysis, as that is an AArch64-specific feature).
56-
return BB->getFunction()->getAddress() + BB->getOffset() + BBIndex * 4;
57-
}
58-
};
59-
60-
raw_ostream &operator<<(raw_ostream &OS, const MCInstInBBReference &);
61-
62-
struct MCInstInBFReference {
63-
BinaryFunction *BF;
64-
uint64_t Offset;
65-
MCInstInBFReference(BinaryFunction *BF, uint64_t Offset)
66-
: BF(BF), Offset(Offset) {}
67-
68-
static MCInstInBFReference get(const MCInst *Inst, BinaryFunction &BF) {
69-
for (auto &I : BF.instrs())
70-
if (Inst == &I.second)
71-
return MCInstInBFReference(&BF, I.first);
72-
return {};
73-
}
74-
75-
MCInstInBFReference() : BF(nullptr), Offset(0) {}
76-
bool operator==(const MCInstInBFReference &RHS) const {
77-
return BF == RHS.BF && Offset == RHS.Offset;
78-
}
79-
bool operator<(const MCInstInBFReference &RHS) const {
80-
if (BF != RHS.BF)
81-
return BF < RHS.BF;
82-
return Offset < RHS.Offset;
83-
}
84-
operator MCInst &() const {
85-
assert(BF != nullptr);
86-
return *BF->getInstructionAtOffset(Offset);
87-
}
88-
89-
uint64_t getOffset() const { return Offset; }
90-
91-
uint64_t getAddress() const { return BF->getAddress() + getOffset(); }
92-
};
93-
94-
raw_ostream &operator<<(raw_ostream &OS, const MCInstInBFReference &);
95-
96-
struct MCInstReference {
97-
enum Kind { FunctionParent, BasicBlockParent };
98-
Kind ParentKind;
99-
union U {
100-
MCInstInBBReference BBRef;
101-
MCInstInBFReference BFRef;
102-
U(MCInstInBBReference BBRef) : BBRef(BBRef) {}
103-
U(MCInstInBFReference BFRef) : BFRef(BFRef) {}
104-
} U;
105-
MCInstReference(MCInstInBBReference BBRef)
106-
: ParentKind(BasicBlockParent), U(BBRef) {}
107-
MCInstReference(MCInstInBFReference BFRef)
108-
: ParentKind(FunctionParent), U(BFRef) {}
109-
MCInstReference(BinaryBasicBlock *BB, int64_t BBIndex)
110-
: MCInstReference(MCInstInBBReference(BB, BBIndex)) {}
111-
MCInstReference(BinaryFunction *BF, uint32_t Offset)
112-
: MCInstReference(MCInstInBFReference(BF, Offset)) {}
113-
114-
static MCInstReference get(const MCInst *Inst, BinaryFunction &BF) {
115-
if (BF.hasCFG())
116-
return MCInstInBBReference::get(Inst, BF);
117-
return MCInstInBFReference::get(Inst, BF);
118-
}
119-
120-
bool operator<(const MCInstReference &RHS) const {
121-
if (ParentKind != RHS.ParentKind)
122-
return ParentKind < RHS.ParentKind;
123-
switch (ParentKind) {
124-
case BasicBlockParent:
125-
return U.BBRef < RHS.U.BBRef;
126-
case FunctionParent:
127-
return U.BFRef < RHS.U.BFRef;
128-
}
129-
llvm_unreachable("");
130-
}
131-
132-
bool operator==(const MCInstReference &RHS) const {
133-
if (ParentKind != RHS.ParentKind)
134-
return false;
135-
switch (ParentKind) {
136-
case BasicBlockParent:
137-
return U.BBRef == RHS.U.BBRef;
138-
case FunctionParent:
139-
return U.BFRef == RHS.U.BFRef;
140-
}
141-
llvm_unreachable("");
142-
}
143-
144-
operator MCInst &() const {
145-
switch (ParentKind) {
146-
case BasicBlockParent:
147-
return U.BBRef;
148-
case FunctionParent:
149-
return U.BFRef;
150-
}
151-
llvm_unreachable("");
152-
}
153-
154-
operator bool() const {
155-
switch (ParentKind) {
156-
case BasicBlockParent:
157-
return U.BBRef.BB != nullptr;
158-
case FunctionParent:
159-
return U.BFRef.BF != nullptr;
160-
}
161-
llvm_unreachable("");
162-
}
163-
164-
uint64_t getAddress() const {
165-
switch (ParentKind) {
166-
case BasicBlockParent:
167-
return U.BBRef.getAddress();
168-
case FunctionParent:
169-
return U.BFRef.getAddress();
170-
}
171-
llvm_unreachable("");
172-
}
173-
174-
BinaryFunction *getFunction() const {
175-
switch (ParentKind) {
176-
case FunctionParent:
177-
return U.BFRef.BF;
178-
case BasicBlockParent:
179-
return U.BBRef.BB->getFunction();
180-
}
181-
llvm_unreachable("");
182-
}
183-
184-
BinaryBasicBlock *getBasicBlock() const {
185-
switch (ParentKind) {
186-
case FunctionParent:
187-
return nullptr;
188-
case BasicBlockParent:
189-
return U.BBRef.BB;
190-
}
191-
llvm_unreachable("");
192-
}
193-
};
194-
195-
raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &);
196-
19721
namespace PAuthGadgetScanner {
19822

19923
// The report classes are designed to be used in an immutable manner.

bolt/lib/Core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ add_llvm_library(LLVMBOLTCore
3131
GDBIndex.cpp
3232
HashUtilities.cpp
3333
JumpTable.cpp
34+
MCInstUtils.cpp
3435
MCPlusBuilder.cpp
3536
ParallelUtilities.cpp
3637
Relocation.cpp

0 commit comments

Comments
 (0)