1
- use std:: { borrow:: Cow , collections:: BTreeMap } ;
1
+ use std:: {
2
+ borrow:: Cow ,
3
+ collections:: { BTreeMap , HashMap } ,
4
+ } ;
2
5
3
6
use anyhow:: { bail, ensure, Result } ;
4
7
use byteorder:: BigEndian ;
@@ -7,7 +10,7 @@ use object::{
7
10
elf, File , Object , ObjectSection , ObjectSymbol , Relocation , RelocationFlags , RelocationTarget ,
8
11
Symbol , SymbolKind ,
9
12
} ;
10
- use ppc750cl:: { Argument , InsIter , Opcode , GPR } ;
13
+ use ppc750cl:: { Argument , InsIter , Opcode , ParsedIns , GPR } ;
11
14
12
15
use crate :: {
13
16
arch:: { DataType , ObjArch , ProcessCodeResult } ,
@@ -27,6 +30,180 @@ fn is_rel_abs_arg(arg: &Argument) -> bool {
27
30
28
31
fn is_offset_arg ( arg : & Argument ) -> bool { matches ! ( arg, Argument :: Offset ( _) ) }
29
32
33
+ fn guess_data_type_from_load_store_inst_op ( inst_op : Opcode ) -> Option < DataType > {
34
+ match inst_op {
35
+ Opcode :: Lbz | Opcode :: Lbzu | Opcode :: Lbzux | Opcode :: Lbzx => Some ( DataType :: Int8 ) ,
36
+ Opcode :: Lhz | Opcode :: Lhzu | Opcode :: Lhzux | Opcode :: Lhzx => Some ( DataType :: Int16 ) ,
37
+ Opcode :: Lha | Opcode :: Lhau | Opcode :: Lhaux | Opcode :: Lhax => Some ( DataType :: Int16 ) ,
38
+ Opcode :: Lwz | Opcode :: Lwzu | Opcode :: Lwzux | Opcode :: Lwzx => Some ( DataType :: Int32 ) ,
39
+ Opcode :: Lfs | Opcode :: Lfsu | Opcode :: Lfsux | Opcode :: Lfsx => Some ( DataType :: Float ) ,
40
+ Opcode :: Lfd | Opcode :: Lfdu | Opcode :: Lfdux | Opcode :: Lfdx => Some ( DataType :: Double ) ,
41
+
42
+ Opcode :: Stb | Opcode :: Stbu | Opcode :: Stbux | Opcode :: Stbx => Some ( DataType :: Int8 ) ,
43
+ Opcode :: Sth | Opcode :: Sthu | Opcode :: Sthux | Opcode :: Sthx => Some ( DataType :: Int16 ) ,
44
+ Opcode :: Stw | Opcode :: Stwu | Opcode :: Stwux | Opcode :: Stwx => Some ( DataType :: Int32 ) ,
45
+ Opcode :: Stfs | Opcode :: Stfsu | Opcode :: Stfsux | Opcode :: Stfsx => Some ( DataType :: Float ) ,
46
+ Opcode :: Stfd | Opcode :: Stfdu | Opcode :: Stfdux | Opcode :: Stfdx => Some ( DataType :: Double ) ,
47
+ _ => None ,
48
+ }
49
+ }
50
+
51
+ // Given an instruction, determine if it could accessing data at the address in a register.
52
+ // If so, return the offset added to the register's address, the register containing that address,
53
+ // and (optionally) which destination register the address is being copied into.
54
+ fn get_offset_and_addr_gpr_for_possible_pool_reference (
55
+ opcode : Opcode ,
56
+ simplified : & ParsedIns ,
57
+ ) -> Option < ( i16 , GPR , Option < GPR > ) > {
58
+ let args = & simplified. args ;
59
+ if guess_data_type_from_load_store_inst_op ( opcode) . is_some ( ) {
60
+ match ( args[ 1 ] , args[ 2 ] ) {
61
+ ( Argument :: Offset ( offset) , Argument :: GPR ( addr_src_gpr) ) => {
62
+ // e.g. lwz. Immediate offset.
63
+ Some ( ( offset. 0 , addr_src_gpr, None ) )
64
+ }
65
+ ( Argument :: GPR ( addr_src_gpr) , Argument :: GPR ( _offset_gpr) ) => {
66
+ // e.g. lwzx. The offset is in a register and was likely calculated from an index.
67
+ // Treat the offset as being 0 in this case to show the first element of the array.
68
+ // It may be possible to show all elements by figuring out the stride of the array
69
+ // from the calculations performed on the index before it's put into offset_gpr, but
70
+ // this would be much more complicated, so it's not currently done.
71
+ Some ( ( 0 , addr_src_gpr, None ) )
72
+ }
73
+ _ => None ,
74
+ }
75
+ } else {
76
+ // If it's not a load/store instruction, there's two more possibilities we need to handle.
77
+ // 1. It could be a reference to @stringBase.
78
+ // 2. It could be moving the relocation address plus an offset into a different register to
79
+ // load from later.
80
+ // If either of these match, we also want to return the destination register that the
81
+ // address is being copied into so that we can detect any future references to that new
82
+ // register as well.
83
+ match ( opcode, args[ 0 ] , args[ 1 ] , args[ 2 ] ) {
84
+ (
85
+ Opcode :: Addi ,
86
+ Argument :: GPR ( addr_dst_gpr) ,
87
+ Argument :: GPR ( addr_src_gpr) ,
88
+ Argument :: Simm ( simm) ,
89
+ ) => Some ( ( simm. 0 , addr_src_gpr, Some ( addr_dst_gpr) ) ) ,
90
+ (
91
+ Opcode :: Or ,
92
+ Argument :: GPR ( addr_dst_gpr) ,
93
+ Argument :: GPR ( addr_src_gpr) ,
94
+ Argument :: None ,
95
+ ) => Some ( ( 0 , addr_src_gpr, Some ( addr_dst_gpr) ) ) , // `mr` or `mr.`
96
+ _ => None ,
97
+ }
98
+ }
99
+ }
100
+
101
+ // We create a fake relocation for an instruction, vaguely simulating what the actual relocation
102
+ // might have looked like if it wasn't pooled. This is so minimal changes are needed to display
103
+ // pooled accesses vs non-pooled accesses. We set the relocation type to R_PPC_NONE to indicate that
104
+ // there isn't really a relocation here, as copying the pool relocation's type wouldn't make sense.
105
+ // Also, if this instruction is accessing the middle of a symbol instead of the start, we add an
106
+ // addend to indicate that.
107
+ fn make_fake_pool_reloc (
108
+ offset : i16 ,
109
+ cur_addr : u32 ,
110
+ pool_reloc : & ObjReloc ,
111
+ sections : & [ ObjSection ] ,
112
+ ) -> Option < ObjReloc > {
113
+ let offset_from_pool = pool_reloc. addend + offset as i64 ;
114
+ let target_address = pool_reloc. target . address . checked_add_signed ( offset_from_pool) ?;
115
+ let orig_section_index = pool_reloc. target . orig_section_index ?;
116
+ let section = sections. iter ( ) . find ( |s| s. orig_index == orig_section_index) ?;
117
+ let target_symbol = section
118
+ . symbols
119
+ . iter ( )
120
+ . find ( |s| s. size > 0 && ( s. address ..s. address + s. size ) . contains ( & target_address) ) ?;
121
+ let addend = ( target_address - target_symbol. address ) as i64 ;
122
+ Some ( ObjReloc {
123
+ flags : RelocationFlags :: Elf { r_type : elf:: R_PPC_NONE } ,
124
+ address : cur_addr as u64 ,
125
+ target : target_symbol. clone ( ) ,
126
+ addend,
127
+ } )
128
+ }
129
+
130
+ // Searches through all instructions in a function, determining which registers have the addresses
131
+ // of pooled data relocations in them, finding which instructions load data from those addresses,
132
+ // and constructing a mapping of the address of that instruction to a "fake pool relocation" that
133
+ // simulates what that instruction's relocation would look like if data hadn't been pooled.
134
+ // Limitations: This method currently only goes through the instructions in a function in linear
135
+ // order, from start to finish. It does *not* follow any branches. This means that it could have
136
+ // false positives or false negatives in determining which relocation is currently loaded in which
137
+ // register at any given point in the function, as control flow is not respected.
138
+ // There are currently no known examples of this method producing inaccurate results in reality, but
139
+ // if examples are found, it may be possible to update this method to also follow all branches so
140
+ // that it produces more accurate results.
141
+ fn generate_fake_pool_reloc_for_addr_mapping (
142
+ address : u64 ,
143
+ code : & [ u8 ] ,
144
+ relocations : & [ ObjReloc ] ,
145
+ sections : & [ ObjSection ] ,
146
+ ) -> HashMap < u32 , ObjReloc > {
147
+ let mut active_pool_relocs = HashMap :: new ( ) ;
148
+ let mut pool_reloc_for_addr = HashMap :: new ( ) ;
149
+ for ( cur_addr, ins) in InsIter :: new ( code, address as u32 ) {
150
+ let simplified = ins. simplified ( ) ;
151
+ let reloc = relocations. iter ( ) . find ( |r| ( r. address as u32 & !3 ) == cur_addr) ;
152
+
153
+ if let Some ( reloc) = reloc {
154
+ // This instruction has a real relocation, so it may be a pool load we want to keep
155
+ // track of.
156
+ let args = & simplified. args ;
157
+ match ( ins. op , args[ 0 ] , args[ 1 ] , args[ 2 ] ) {
158
+ (
159
+ Opcode :: Addi ,
160
+ Argument :: GPR ( addr_dst_gpr) ,
161
+ Argument :: GPR ( _addr_src_gpr) ,
162
+ Argument :: Simm ( _simm) ,
163
+ ) => {
164
+ active_pool_relocs. insert ( addr_dst_gpr. 0 , reloc. clone ( ) ) ; // `lis` + `addi`
165
+ }
166
+ (
167
+ Opcode :: Ori ,
168
+ Argument :: GPR ( addr_dst_gpr) ,
169
+ Argument :: GPR ( _addr_src_gpr) ,
170
+ Argument :: Uimm ( _uimm) ,
171
+ ) => {
172
+ active_pool_relocs. insert ( addr_dst_gpr. 0 , reloc. clone ( ) ) ; // `lis` + `ori`
173
+ }
174
+ _ => { }
175
+ }
176
+ } else if let Some ( ( offset, addr_src_gpr, addr_dst_gpr) ) =
177
+ get_offset_and_addr_gpr_for_possible_pool_reference ( ins. op , & simplified)
178
+ {
179
+ // This instruction doesn't have a real relocation, so it may be a reference to one of
180
+ // the already-loaded pools.
181
+ if let Some ( pool_reloc) = active_pool_relocs. get ( & addr_src_gpr. 0 ) {
182
+ if let Some ( fake_pool_reloc) =
183
+ make_fake_pool_reloc ( offset, cur_addr, pool_reloc, sections)
184
+ {
185
+ pool_reloc_for_addr. insert ( cur_addr, fake_pool_reloc) ;
186
+ }
187
+ if let Some ( addr_dst_gpr) = addr_dst_gpr {
188
+ // If the address of the pool relocation got copied into another register, we
189
+ // need to keep track of it in that register too as future instructions may
190
+ // reference the symbol indirectly via this new register, instead of the
191
+ // register the symbol's address was originally loaded into.
192
+ // For example, the start of the function might `lis` + `addi` the start of the
193
+ // ...data pool into r25, and then later the start of a loop will `addi` r25
194
+ // with the offset within the .data section of an array variable into r21.
195
+ // Then the body of the loop will `lwzx` one of the array elements from r21.
196
+ let mut new_reloc = pool_reloc. clone ( ) ;
197
+ new_reloc. addend += offset as i64 ;
198
+ active_pool_relocs. insert ( addr_dst_gpr. 0 , new_reloc) ;
199
+ }
200
+ }
201
+ }
202
+ }
203
+
204
+ pool_reloc_for_addr
205
+ }
206
+
30
207
pub struct ObjArchPpc {
31
208
/// Exception info
32
209
pub extab : Option < BTreeMap < usize , ExceptionInfo > > ,
@@ -45,10 +222,13 @@ impl ObjArch for ObjArchPpc {
45
222
relocations : & [ ObjReloc ] ,
46
223
line_info : & BTreeMap < u64 , u32 > ,
47
224
config : & DiffObjConfig ,
225
+ sections : & [ ObjSection ] ,
48
226
) -> Result < ProcessCodeResult > {
49
227
let ins_count = code. len ( ) / 4 ;
50
228
let mut ops = Vec :: < u16 > :: with_capacity ( ins_count) ;
51
229
let mut insts = Vec :: < ObjIns > :: with_capacity ( ins_count) ;
230
+ let fake_pool_reloc_for_addr =
231
+ generate_fake_pool_reloc_for_addr_mapping ( address, code, relocations, sections) ;
52
232
for ( cur_addr, mut ins) in InsIter :: new ( code, address as u32 ) {
53
233
let reloc = relocations. iter ( ) . find ( |r| ( r. address as u32 & !3 ) == cur_addr) ;
54
234
if let Some ( reloc) = reloc {
@@ -146,6 +326,7 @@ impl ObjArch for ObjArchPpc {
146
326
mnemonic : Cow :: Borrowed ( simplified. mnemonic ) ,
147
327
args,
148
328
reloc : reloc. cloned ( ) ,
329
+ fake_pool_reloc : fake_pool_reloc_for_addr. get ( & cur_addr) . cloned ( ) ,
149
330
op : ins. op as u16 ,
150
331
branch_dest,
151
332
line,
@@ -173,6 +354,7 @@ impl ObjArch for ObjArchPpc {
173
354
fn display_reloc ( & self , flags : RelocationFlags ) -> Cow < ' static , str > {
174
355
match flags {
175
356
RelocationFlags :: Elf { r_type } => match r_type {
357
+ elf:: R_PPC_NONE => Cow :: Borrowed ( "R_PPC_NONE" ) , // We use this for fake pool relocs
176
358
elf:: R_PPC_ADDR16_LO => Cow :: Borrowed ( "R_PPC_ADDR16_LO" ) ,
177
359
elf:: R_PPC_ADDR16_HI => Cow :: Borrowed ( "R_PPC_ADDR16_HI" ) ,
178
360
elf:: R_PPC_ADDR16_HA => Cow :: Borrowed ( "R_PPC_ADDR16_HA" ) ,
@@ -188,27 +370,16 @@ impl ObjArch for ObjArchPpc {
188
370
}
189
371
190
372
fn guess_data_type ( & self , instruction : & ObjIns ) -> Option < super :: DataType > {
191
- // Always shows the first string of the table. Not ideal, but it's really hard to find
192
- // the actual string being referenced.
193
- if instruction. reloc . as_ref ( ) . is_some_and ( |r| r. target . name . starts_with ( "@stringBase" ) ) {
373
+ if instruction
374
+ . reloc
375
+ . as_ref ( )
376
+ . or ( instruction. fake_pool_reloc . as_ref ( ) )
377
+ . is_some_and ( |r| r. target . name . starts_with ( "@stringBase" ) )
378
+ {
194
379
return Some ( DataType :: String ) ;
195
380
}
196
381
197
- match Opcode :: from ( instruction. op as u8 ) {
198
- Opcode :: Lbz | Opcode :: Lbzu | Opcode :: Lbzux | Opcode :: Lbzx => Some ( DataType :: Int8 ) ,
199
- Opcode :: Lhz | Opcode :: Lhzu | Opcode :: Lhzux | Opcode :: Lhzx => Some ( DataType :: Int16 ) ,
200
- Opcode :: Lha | Opcode :: Lhau | Opcode :: Lhaux | Opcode :: Lhax => Some ( DataType :: Int16 ) ,
201
- Opcode :: Lwz | Opcode :: Lwzu | Opcode :: Lwzux | Opcode :: Lwzx => Some ( DataType :: Int32 ) ,
202
- Opcode :: Lfs | Opcode :: Lfsu | Opcode :: Lfsux | Opcode :: Lfsx => Some ( DataType :: Float ) ,
203
- Opcode :: Lfd | Opcode :: Lfdu | Opcode :: Lfdux | Opcode :: Lfdx => Some ( DataType :: Double ) ,
204
-
205
- Opcode :: Stb | Opcode :: Stbu | Opcode :: Stbux | Opcode :: Stbx => Some ( DataType :: Int8 ) ,
206
- Opcode :: Sth | Opcode :: Sthu | Opcode :: Sthux | Opcode :: Sthx => Some ( DataType :: Int16 ) ,
207
- Opcode :: Stw | Opcode :: Stwu | Opcode :: Stwux | Opcode :: Stwx => Some ( DataType :: Int32 ) ,
208
- Opcode :: Stfs | Opcode :: Stfsu | Opcode :: Stfsux | Opcode :: Stfsx => Some ( DataType :: Float ) ,
209
- Opcode :: Stfd | Opcode :: Stfdu | Opcode :: Stfdux | Opcode :: Stfdx => Some ( DataType :: Double ) ,
210
- _ => None ,
211
- }
382
+ guess_data_type_from_load_store_inst_op ( Opcode :: from ( instruction. op as u8 ) )
212
383
}
213
384
214
385
fn display_data_type ( & self , ty : DataType , bytes : & [ u8 ] ) -> Option < String > {
0 commit comments