1
- use std:: { borrow:: Cow , collections:: BTreeMap } ;
1
+ use std:: {
2
+ borrow:: Cow ,
3
+ collections:: { BTreeMap , HashMap } ,
4
+ } ;
2
5
3
6
use anyhow:: { bail, ensure, Result } ;
4
7
use byteorder:: BigEndian ;
@@ -7,7 +10,7 @@ use object::{
7
10
elf, File , Object , ObjectSection , ObjectSymbol , Relocation , RelocationFlags , RelocationTarget ,
8
11
Symbol , SymbolKind ,
9
12
} ;
10
- use ppc750cl:: { Argument , InsIter , Opcode , GPR } ;
13
+ use ppc750cl:: { Argument , InsIter , Opcode , ParsedIns , GPR } ;
11
14
12
15
use crate :: {
13
16
arch:: { DataType , ObjArch , ProcessCodeResult } ,
@@ -49,6 +52,8 @@ impl ObjArch for ObjArchPpc {
49
52
let ins_count = code. len ( ) / 4 ;
50
53
let mut ops = Vec :: < u16 > :: with_capacity ( ins_count) ;
51
54
let mut insts = Vec :: < ObjIns > :: with_capacity ( ins_count) ;
55
+ let fake_pool_reloc_for_addr =
56
+ generate_fake_pool_reloc_for_addr_mapping ( address, code, relocations) ;
52
57
for ( cur_addr, mut ins) in InsIter :: new ( code, address as u32 ) {
53
58
let reloc = relocations. iter ( ) . find ( |r| ( r. address as u32 & !3 ) == cur_addr) ;
54
59
if let Some ( reloc) = reloc {
@@ -145,7 +150,7 @@ impl ObjArch for ObjArchPpc {
145
150
size : 4 ,
146
151
mnemonic : Cow :: Borrowed ( simplified. mnemonic ) ,
147
152
args,
148
- reloc : reloc. cloned ( ) ,
153
+ reloc : reloc. or ( fake_pool_reloc_for_addr . get ( & cur_addr ) ) . cloned ( ) ,
149
154
op : ins. op as u16 ,
150
155
branch_dest,
151
156
line,
@@ -173,6 +178,7 @@ impl ObjArch for ObjArchPpc {
173
178
fn display_reloc ( & self , flags : RelocationFlags ) -> Cow < ' static , str > {
174
179
match flags {
175
180
RelocationFlags :: Elf { r_type } => match r_type {
181
+ elf:: R_PPC_NONE => Cow :: Borrowed ( "R_PPC_NONE" ) , // We use this for fake pool relocs
176
182
elf:: R_PPC_ADDR16_LO => Cow :: Borrowed ( "R_PPC_ADDR16_LO" ) ,
177
183
elf:: R_PPC_ADDR16_HI => Cow :: Borrowed ( "R_PPC_ADDR16_HI" ) ,
178
184
elf:: R_PPC_ADDR16_HA => Cow :: Borrowed ( "R_PPC_ADDR16_HA" ) ,
@@ -188,26 +194,22 @@ impl ObjArch for ObjArchPpc {
188
194
}
189
195
190
196
fn guess_data_type ( & self , instruction : & ObjIns ) -> Option < super :: DataType > {
191
- // Always shows the first string of the table. Not ideal, but it's really hard to find
192
- // the actual string being referenced.
193
197
if instruction. reloc . as_ref ( ) . is_some_and ( |r| r. target . name . starts_with ( "@stringBase" ) ) {
194
198
return Some ( DataType :: String ) ;
195
199
}
196
200
197
- match Opcode :: from ( instruction. op as u8 ) {
198
- Opcode :: Lbz | Opcode :: Lbzu | Opcode :: Lbzux | Opcode :: Lbzx => Some ( DataType :: Int8 ) ,
199
- Opcode :: Lhz | Opcode :: Lhzu | Opcode :: Lhzux | Opcode :: Lhzx => Some ( DataType :: Int16 ) ,
200
- Opcode :: Lha | Opcode :: Lhau | Opcode :: Lhaux | Opcode :: Lhax => Some ( DataType :: Int16 ) ,
201
- Opcode :: Lwz | Opcode :: Lwzu | Opcode :: Lwzux | Opcode :: Lwzx => Some ( DataType :: Int32 ) ,
202
- Opcode :: Lfs | Opcode :: Lfsu | Opcode :: Lfsux | Opcode :: Lfsx => Some ( DataType :: Float ) ,
203
- Opcode :: Lfd | Opcode :: Lfdu | Opcode :: Lfdux | Opcode :: Lfdx => Some ( DataType :: Double ) ,
204
-
205
- Opcode :: Stb | Opcode :: Stbu | Opcode :: Stbux | Opcode :: Stbx => Some ( DataType :: Int8 ) ,
206
- Opcode :: Sth | Opcode :: Sthu | Opcode :: Sthux | Opcode :: Sthx => Some ( DataType :: Int16 ) ,
207
- Opcode :: Stw | Opcode :: Stwu | Opcode :: Stwux | Opcode :: Stwx => Some ( DataType :: Int32 ) ,
208
- Opcode :: Stfs | Opcode :: Stfsu | Opcode :: Stfsux | Opcode :: Stfsx => Some ( DataType :: Float ) ,
209
- Opcode :: Stfd | Opcode :: Stfdu | Opcode :: Stfdux | Opcode :: Stfdx => Some ( DataType :: Double ) ,
210
- _ => None ,
201
+ let op = Opcode :: from ( instruction. op as u8 ) ;
202
+ if let Some ( ty) = guess_data_type_from_load_store_inst_op ( op) {
203
+ Some ( ty)
204
+ } else if op == Opcode :: Addi {
205
+ // Assume that any addi instruction that references a local symbol is loading a string.
206
+ // This hack is not ideal and results in tons of false positives where it will show
207
+ // garbage strings (e.g. misinterpreting arrays, float literals, etc).
208
+ // But not all strings are in the @stringBase pool, so the condition above that checks
209
+ // the target symbol name would miss some.
210
+ Some ( DataType :: String )
211
+ } else {
212
+ None
211
213
}
212
214
}
213
215
@@ -381,3 +383,196 @@ fn make_symbol_ref(symbol: &Symbol) -> Result<ExtabSymbolRef> {
381
383
let demangled_name = cwdemangle:: demangle ( & name, & cwdemangle:: DemangleOptions :: default ( ) ) ;
382
384
Ok ( ExtabSymbolRef { original_index : symbol. index ( ) . 0 , name, demangled_name } )
383
385
}
386
+
387
+ fn guess_data_type_from_load_store_inst_op ( inst_op : Opcode ) -> Option < DataType > {
388
+ match inst_op {
389
+ Opcode :: Lbz | Opcode :: Lbzu | Opcode :: Lbzux | Opcode :: Lbzx => Some ( DataType :: Int8 ) ,
390
+ Opcode :: Lhz | Opcode :: Lhzu | Opcode :: Lhzux | Opcode :: Lhzx => Some ( DataType :: Int16 ) ,
391
+ Opcode :: Lha | Opcode :: Lhau | Opcode :: Lhaux | Opcode :: Lhax => Some ( DataType :: Int16 ) ,
392
+ Opcode :: Lwz | Opcode :: Lwzu | Opcode :: Lwzux | Opcode :: Lwzx => Some ( DataType :: Int32 ) ,
393
+ Opcode :: Lfs | Opcode :: Lfsu | Opcode :: Lfsux | Opcode :: Lfsx => Some ( DataType :: Float ) ,
394
+ Opcode :: Lfd | Opcode :: Lfdu | Opcode :: Lfdux | Opcode :: Lfdx => Some ( DataType :: Double ) ,
395
+
396
+ Opcode :: Stb | Opcode :: Stbu | Opcode :: Stbux | Opcode :: Stbx => Some ( DataType :: Int8 ) ,
397
+ Opcode :: Sth | Opcode :: Sthu | Opcode :: Sthux | Opcode :: Sthx => Some ( DataType :: Int16 ) ,
398
+ Opcode :: Stw | Opcode :: Stwu | Opcode :: Stwux | Opcode :: Stwx => Some ( DataType :: Int32 ) ,
399
+ Opcode :: Stfs | Opcode :: Stfsu | Opcode :: Stfsux | Opcode :: Stfsx => Some ( DataType :: Float ) ,
400
+ Opcode :: Stfd | Opcode :: Stfdu | Opcode :: Stfdux | Opcode :: Stfdx => Some ( DataType :: Double ) ,
401
+ _ => None ,
402
+ }
403
+ }
404
+
405
+ // Given an instruction, determine if it could accessing data at the address in a register.
406
+ // If so, return the offset added to the register's address, the register containing that address,
407
+ // and (optionally) which destination register the address is being copied into.
408
+ fn get_offset_and_addr_gpr_for_possible_pool_reference (
409
+ opcode : Opcode ,
410
+ simplified : & ParsedIns ,
411
+ ) -> Option < ( i16 , GPR , Option < GPR > ) > {
412
+ let args = & simplified. args ;
413
+ if guess_data_type_from_load_store_inst_op ( opcode) . is_some ( ) {
414
+ match ( args[ 1 ] , args[ 2 ] ) {
415
+ ( Argument :: Offset ( offset) , Argument :: GPR ( addr_src_gpr) ) => {
416
+ // e.g. lwz. Immediate offset.
417
+ Some ( ( offset. 0 , addr_src_gpr, None ) )
418
+ }
419
+ ( Argument :: GPR ( addr_src_gpr) , Argument :: GPR ( _offset_gpr) ) => {
420
+ // e.g. lwzx. The offset is in a register and was likely calculated from an index.
421
+ // Treat the offset as being 0 in this case to show the first element of the array.
422
+ // It may be possible to show all elements by figuring out the stride of the array
423
+ // from the calculations performed on the index before it's put into offset_gpr, but
424
+ // this would be much more complicated, so it's not currently done.
425
+ Some ( ( 0 , addr_src_gpr, None ) )
426
+ }
427
+ _ => None ,
428
+ }
429
+ } else {
430
+ // If it's not a load/store instruction, there's two more possibilities we need to handle.
431
+ // 1. It could be loading a pointer to a string.
432
+ // 2. It could be moving the relocation address plus an offset into a different register to
433
+ // load from later.
434
+ // If either of these match, we also want to return the destination register that the
435
+ // address is being copied into so that we can detect any future references to that new
436
+ // register as well.
437
+ match ( opcode, args[ 0 ] , args[ 1 ] , args[ 2 ] ) {
438
+ (
439
+ Opcode :: Addi ,
440
+ Argument :: GPR ( addr_dst_gpr) ,
441
+ Argument :: GPR ( addr_src_gpr) ,
442
+ Argument :: Simm ( simm) ,
443
+ ) => Some ( ( simm. 0 , addr_src_gpr, Some ( addr_dst_gpr) ) ) ,
444
+ (
445
+ Opcode :: Or ,
446
+ Argument :: GPR ( addr_dst_gpr) ,
447
+ Argument :: GPR ( addr_src_gpr) ,
448
+ Argument :: None ,
449
+ ) => Some ( ( 0 , addr_src_gpr, Some ( addr_dst_gpr) ) ) , // `mr` or `mr.`
450
+ _ => None ,
451
+ }
452
+ }
453
+ }
454
+
455
+ // We create a fake relocation for an instruction, vaguely simulating what the actual relocation
456
+ // might have looked like if it wasn't pooled. This is so minimal changes are needed to display
457
+ // pooled accesses vs non-pooled accesses. We set the relocation type to R_PPC_NONE to indicate that
458
+ // there isn't really a relocation here, as copying the pool relocation's type wouldn't make sense.
459
+ // Also, if this instruction is accessing the middle of a symbol instead of the start, we add an
460
+ // addend to indicate that.
461
+ fn make_fake_pool_reloc ( offset : i16 , cur_addr : u32 , pool_reloc : & ObjReloc ) -> Option < ObjReloc > {
462
+ let offset_from_pool = pool_reloc. addend + offset as i64 ;
463
+ let target_address = pool_reloc. target . address . checked_add_signed ( offset_from_pool) ?;
464
+ let orig_section_index = pool_reloc. target . orig_section_index ?;
465
+ // We also need to create a fake target symbol to go inside our fake relocation.
466
+ // This is because we don't have access to list of all symbols in this section, so we can't find
467
+ // the real symbol yet. Instead we make a placeholder that has the correct `orig_section_index`
468
+ // and `address` fields, and then later on when this information is displayed to the user, we
469
+ // can find the real symbol by searching through the object's section's symbols for one that
470
+ // contains this address.
471
+ let fake_target_symbol = ObjSymbol {
472
+ name : "" . to_string ( ) ,
473
+ demangled_name : None ,
474
+ address : target_address,
475
+ section_address : 0 ,
476
+ size : 0 ,
477
+ size_known : false ,
478
+ kind : Default :: default ( ) ,
479
+ flags : Default :: default ( ) ,
480
+ orig_section_index : Some ( orig_section_index) ,
481
+ virtual_address : None ,
482
+ original_index : None ,
483
+ bytes : vec ! [ ] ,
484
+ } ;
485
+ // The addend is also fake because we don't know yet if the `target_address` here is the exact
486
+ // start of the symbol or if it's in the middle of it.
487
+ let fake_addend = 0 ;
488
+ Some ( ObjReloc {
489
+ flags : RelocationFlags :: Elf { r_type : elf:: R_PPC_NONE } ,
490
+ address : cur_addr as u64 ,
491
+ target : fake_target_symbol,
492
+ addend : fake_addend,
493
+ } )
494
+ }
495
+
496
+ // Searches through all instructions in a function, determining which registers have the addresses
497
+ // of pooled data relocations in them, finding which instructions load data from those addresses,
498
+ // and constructing a mapping of the address of that instruction to a "fake pool relocation" that
499
+ // simulates what that instruction's relocation would look like if data hadn't been pooled.
500
+ // Limitations: This method currently only goes through the instructions in a function in linear
501
+ // order, from start to finish. It does *not* follow any branches. This means that it could have
502
+ // false positives or false negatives in determining which relocation is currently loaded in which
503
+ // register at any given point in the function, as control flow is not respected.
504
+ // There are currently no known examples of this method producing inaccurate results in reality, but
505
+ // if examples are found, it may be possible to update this method to also follow all branches so
506
+ // that it produces more accurate results.
507
+ fn generate_fake_pool_reloc_for_addr_mapping (
508
+ address : u64 ,
509
+ code : & [ u8 ] ,
510
+ relocations : & [ ObjReloc ] ,
511
+ ) -> HashMap < u32 , ObjReloc > {
512
+ let mut active_pool_relocs = HashMap :: new ( ) ;
513
+ let mut pool_reloc_for_addr = HashMap :: new ( ) ;
514
+ for ( cur_addr, ins) in InsIter :: new ( code, address as u32 ) {
515
+ let simplified = ins. simplified ( ) ;
516
+ let reloc = relocations. iter ( ) . find ( |r| ( r. address as u32 & !3 ) == cur_addr) ;
517
+
518
+ if let Some ( reloc) = reloc {
519
+ // This instruction has a real relocation, so it may be a pool load we want to keep
520
+ // track of.
521
+ let args = & simplified. args ;
522
+ match ( ins. op , args[ 0 ] , args[ 1 ] , args[ 2 ] ) {
523
+ (
524
+ Opcode :: Addi ,
525
+ Argument :: GPR ( addr_dst_gpr) ,
526
+ Argument :: GPR ( _addr_src_gpr) ,
527
+ Argument :: Simm ( _simm) ,
528
+ ) => {
529
+ active_pool_relocs. insert ( addr_dst_gpr. 0 , reloc. clone ( ) ) ; // `lis` + `addi`
530
+ }
531
+ (
532
+ Opcode :: Ori ,
533
+ Argument :: GPR ( addr_dst_gpr) ,
534
+ Argument :: GPR ( _addr_src_gpr) ,
535
+ Argument :: Uimm ( _uimm) ,
536
+ ) => {
537
+ active_pool_relocs. insert ( addr_dst_gpr. 0 , reloc. clone ( ) ) ; // `lis` + `ori`
538
+ }
539
+ ( Opcode :: B , _, _, _) => {
540
+ if simplified. mnemonic == "bl" {
541
+ // When encountering a function call, clear any active pool relocations from
542
+ // the volatile registers (r0, r3-r12), but not the nonvolatile registers.
543
+ active_pool_relocs. remove ( & 0 ) ;
544
+ for gpr in 3 ..12 {
545
+ active_pool_relocs. remove ( & gpr) ;
546
+ }
547
+ }
548
+ }
549
+ _ => { }
550
+ }
551
+ } else if let Some ( ( offset, addr_src_gpr, addr_dst_gpr) ) =
552
+ get_offset_and_addr_gpr_for_possible_pool_reference ( ins. op , & simplified)
553
+ {
554
+ // This instruction doesn't have a real relocation, so it may be a reference to one of
555
+ // the already-loaded pools.
556
+ if let Some ( pool_reloc) = active_pool_relocs. get ( & addr_src_gpr. 0 ) {
557
+ if let Some ( fake_pool_reloc) = make_fake_pool_reloc ( offset, cur_addr, pool_reloc) {
558
+ pool_reloc_for_addr. insert ( cur_addr, fake_pool_reloc) ;
559
+ }
560
+ if let Some ( addr_dst_gpr) = addr_dst_gpr {
561
+ // If the address of the pool relocation got copied into another register, we
562
+ // need to keep track of it in that register too as future instructions may
563
+ // reference the symbol indirectly via this new register, instead of the
564
+ // register the symbol's address was originally loaded into.
565
+ // For example, the start of the function might `lis` + `addi` the start of the
566
+ // ...data pool into r25, and then later the start of a loop will `addi` r25
567
+ // with the offset within the .data section of an array variable into r21.
568
+ // Then the body of the loop will `lwzx` one of the array elements from r21.
569
+ let mut new_reloc = pool_reloc. clone ( ) ;
570
+ new_reloc. addend += offset as i64 ;
571
+ active_pool_relocs. insert ( addr_dst_gpr. 0 , new_reloc) ;
572
+ }
573
+ }
574
+ }
575
+ }
576
+
577
+ pool_reloc_for_addr
578
+ }
0 commit comments