forked from chromium/chromium
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathassembly_program.h
254 lines (191 loc) · 9.27 KB
/
assembly_program.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COURGETTE_ASSEMBLY_PROGRAM_H_
#define COURGETTE_ASSEMBLY_PROGRAM_H_
#include <stddef.h>
#include <stdint.h>
#include <map>
#include <memory>
#include <set>
#include <vector>
#include "base/bind.h"
#include "base/macros.h"
#include "base/memory/free_deleter.h"
#include "courgette/courgette.h"
#include "courgette/image_utils.h"
#include "courgette/label_manager.h"
#include "courgette/memory_allocator.h"
namespace courgette {
class EncodedProgram;
// Opcodes of simple assembly language
enum OP {
ORIGIN, // ORIGIN <rva> - set current address for assembly.
MAKEPERELOCS, // Generates a base relocation table.
MAKEELFRELOCS, // Generates a base relocation table.
DEFBYTE, // DEFBYTE <value> - emit a byte literal.
REL32, // REL32 <label> - emit a rel32 encoded reference to 'label'.
ABS32, // ABS32 <label> - emit an abs32 encoded reference to 'label'.
REL32ARM, // REL32ARM <c_op> <label> - arm-specific rel32 reference
MAKEELFARMRELOCS, // Generates a base relocation table.
DEFBYTES, // Emits any number of byte literals
ABS64, // ABS64 <label> - emit an abs64 encoded reference to 'label'.
LAST_OP
};
// Base class for instructions. Because we have so many instructions we want to
// keep them as small as possible. For this reason we avoid virtual functions.
class Instruction {
public:
OP op() const { return static_cast<OP>(op_); }
protected:
explicit Instruction(OP op) : op_(op), info_(0) {}
Instruction(OP op, unsigned int info) : op_(op), info_(info) {}
uint32_t op_ : 4; // A few bits to store the OP code.
uint32_t info_ : 28; // Remaining bits in first word available to subclass.
private:
DISALLOW_COPY_AND_ASSIGN(Instruction);
};
// An interface to receive emitted instructions parsed from an executable.
class InstructionReceptor {
public:
InstructionReceptor() = default;
virtual ~InstructionReceptor() = default;
// Generates an entire base relocation table.
virtual CheckBool EmitPeRelocs() = 0;
// Generates an ELF style relocation table for X86.
virtual CheckBool EmitElfRelocation() = 0;
// Generates an ELF style relocation table for ARM.
virtual CheckBool EmitElfARMRelocation() = 0;
// Following instruction will be assembled at address 'rva'.
virtual CheckBool EmitOrigin(RVA rva) = 0;
// Generates a single byte of data or machine instruction.
virtual CheckBool EmitSingleByte(uint8_t byte) = 0;
// Generates multiple bytes of data or machine instructions.
virtual CheckBool EmitMultipleBytes(const uint8_t* bytes, size_t len) = 0;
// Generates a 4-byte relative reference to address of 'label'.
virtual CheckBool EmitRel32(Label* label) = 0;
// Generates a 4-byte relative reference to address of 'label' for ARM.
virtual CheckBool EmitRel32ARM(uint16_t op,
Label* label,
const uint8_t* arm_op,
uint16_t op_size) = 0;
// Generates a 4-byte absolute reference to address of 'label'.
virtual CheckBool EmitAbs32(Label* label) = 0;
// Generates an 8-byte absolute reference to address of 'label'.
virtual CheckBool EmitAbs64(Label* label) = 0;
private:
DISALLOW_COPY_AND_ASSIGN(InstructionReceptor);
};
// An AssemblyProgram is the result of disassembling an executable file.
//
// * The disassembler creates labels in the AssemblyProgram and emits
// 'Instructions'.
// * The disassembler then calls DefaultAssignIndexes to assign
// addresses to positions in the address tables.
// * [Optional step]
// * At this point the AssemblyProgram can be converted into an
// EncodedProgram and serialized to an output stream.
// * Later, the EncodedProgram can be deserialized and assembled into
// the original file.
//
// The optional step is to modify the AssemblyProgram. One form of modification
// is to assign indexes in such a way as to make the EncodedProgram for this
// AssemblyProgram look more like the EncodedProgram for some other
// AssemblyProgram. The modification process should call UnassignIndexes, do
// its own assignment, and then call AssignRemainingIndexes to ensure all
// indexes are assigned.
class AssemblyProgram {
public:
using LabelHandler = base::Callback<void(Label*)>;
using LabelHandlerMap = std::map<OP, LabelHandler>;
// A callback for GenerateInstructions() to emit instructions. The first
// argument (AssemblyProgram*) is provided for Label-related feature access.
// The second argument (InstructionReceptor*) is a receptor for instructions.
// The callback (which gets called in 2 passes) should return true on success,
// and false otherwise.
using InstructionGenerator =
base::Callback<CheckBool(AssemblyProgram*, InstructionReceptor*)>;
explicit AssemblyProgram(ExecutableType kind);
~AssemblyProgram();
ExecutableType kind() const { return kind_; }
void set_image_base(uint64_t image_base) { image_base_ = image_base; }
// Traverses RVAs in |abs32_visitor| and |rel32_visitor| to precompute Labels.
void PrecomputeLabels(RvaVisitor* abs32_visitor, RvaVisitor* rel32_visitor);
// Removes underused Labels. Thresholds used (0 = no trimming) is
// architecture-dependent.
void TrimLabels();
void UnassignIndexes();
void DefaultAssignIndexes();
void AssignRemainingIndexes();
// Looks up abs32 label. Returns null if none found.
Label* FindAbs32Label(RVA rva);
// Looks up rel32 label. Returns null if none found.
Label* FindRel32Label(RVA rva);
std::unique_ptr<EncodedProgram> Encode() const;
// For each |instruction| in |instructions_|, looks up its opcode from
// |handler_map| for a handler. If a handler exists, invoke it by passing the
// |instruction|'s label. We assume that |handler_map| has correct keys, i.e.,
// opcodes for an instruction that have label.
void HandleInstructionLabels(const LabelHandlerMap& handler_map) const;
// Calls |gen| in 2 passes to emit instructions. In pass 1 we provide a
// receptor to count space requirement. In pass 2 we provide a receptor to
// store instructions.
CheckBool GenerateInstructions(const InstructionGenerator& gen);
// TODO(huangs): Implement these in InstructionStoreReceptor.
// Instructions will be assembled in the order they are emitted.
// Generates an entire base relocation table.
CheckBool EmitPeRelocs() WARN_UNUSED_RESULT;
// Generates an ELF style relocation table for X86.
CheckBool EmitElfRelocation() WARN_UNUSED_RESULT;
// Generates an ELF style relocation table for ARM.
CheckBool EmitElfARMRelocation() WARN_UNUSED_RESULT;
// Following instruction will be assembled at address 'rva'.
CheckBool EmitOrigin(RVA rva) WARN_UNUSED_RESULT;
// Generates a single byte of data or machine instruction.
CheckBool EmitSingleByte(uint8_t byte) WARN_UNUSED_RESULT;
// Generates multiple bytes of data or machine instructions.
CheckBool EmitMultipleBytes(const uint8_t* bytes,
size_t len) WARN_UNUSED_RESULT;
// Generates a 4-byte relative reference to address of 'label'.
CheckBool EmitRel32(Label* label) WARN_UNUSED_RESULT;
// Generates a 4-byte relative reference to address of 'label' for ARM.
CheckBool EmitRel32ARM(uint16_t op,
Label* label,
const uint8_t* arm_op,
uint16_t op_size) WARN_UNUSED_RESULT;
// Generates a 4-byte absolute reference to address of 'label'.
CheckBool EmitAbs32(Label* label) WARN_UNUSED_RESULT;
// Generates an 8-byte absolute reference to address of 'label'.
CheckBool EmitAbs64(Label* label) WARN_UNUSED_RESULT;
private:
using InstructionVector = NoThrowBuffer<Instruction*>;
using ScopedInstruction =
std::unique_ptr<Instruction, UncheckedDeleter<Instruction>>;
ExecutableType kind_;
CheckBool Emit(ScopedInstruction instruction) WARN_UNUSED_RESULT;
CheckBool EmitShared(Instruction* instruction) WARN_UNUSED_RESULT;
static const int kLabelLowerLimit;
// Looks up a label or creates a new one. Might return NULL.
Label* FindLabel(RVA rva, RVAToLabel* labels);
// Helper methods for the public versions.
static void UnassignIndexes(RVAToLabel* labels);
static void DefaultAssignIndexes(RVAToLabel* labels);
static void AssignRemainingIndexes(RVAToLabel* labels);
// Sharing instructions that emit a single byte saves a lot of space.
Instruction* GetByteInstruction(uint8_t byte);
std::unique_ptr<Instruction* [], base::FreeDeleter> byte_instruction_cache_;
uint64_t image_base_; // Desired or mandated base address of image.
InstructionVector instructions_; // All the instructions in program.
// Storage and lookup of Labels associated with target addresses. We use
// separate abs32 and rel32 labels.
LabelManager abs32_label_manager_;
LabelManager rel32_label_manager_;
DISALLOW_COPY_AND_ASSIGN(AssemblyProgram);
};
// Converts |program| into encoded form, returning it as |*output|.
// Returns C_OK if succeeded, otherwise returns an error status and sets
// |*output| to null.
Status Encode(const AssemblyProgram& program,
std::unique_ptr<EncodedProgram>* output);
} // namespace courgette
#endif // COURGETTE_ASSEMBLY_PROGRAM_H_