Skip to content

Commit 10b2a9c

Browse files
authored
PPC: Display data values on hover for pools as well (#140)
* Fix missing dependency feature for objdiff-gui * Update .gitignore * PPC: Display data values on hover for pools as well * Tooltip data display: Format floats and doubles better Floats and doubles will now always be displayed with a decimal point and one digit after it, even if they are whole numbers. Floats will also have the f suffix. This is so you can tell the data type just by glancing at the value. * Move big functions to bottom ppc.rs * Clear pool relocs in volatile registers on function call This fixes some false positives. * Revert ObjArch API changes, add fake target symbol hack Because we no longer have access to the actual symbol name via sections, guess_data_type can no longer detect the String data type for pooled references. * Add hack to detect strings via the addi opcode * Move hack to resolve placeholder symbol into process_code_symbol * Merge reloc and fake_pool_reloc fields of ObjIns
1 parent abe68ef commit 10b2a9c

File tree

4 files changed

+279
-38
lines changed

4 files changed

+279
-38
lines changed

objdiff-core/src/arch/mod.rs

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,22 @@ pub enum DataType {
3636

3737
impl DataType {
3838
pub fn display_bytes<Endian: ByteOrder>(&self, bytes: &[u8]) -> Option<String> {
39-
// TODO: Attempt to interpret large symbols as arrays of a smaller type,
40-
// fallback to intrepreting it as bytes.
41-
// https://github.com/encounter/objdiff/issues/124
42-
if self.required_len().is_some_and(|l| bytes.len() != l) {
43-
log::warn!("Failed to display a symbol value for a symbol whose size doesn't match the instruction referencing it.");
39+
if self.required_len().is_some_and(|l| bytes.len() < l) {
40+
log::warn!("Failed to display a symbol value for a symbol whose size is too small for instruction referencing it.");
4441
return None;
4542
}
43+
let mut bytes = bytes;
44+
if self.required_len().is_some_and(|l| bytes.len() > l) {
45+
// If the symbol's size is larger a single instance of this data type, we take just the
46+
// bytes necessary for one of them in order to display the first element of the array.
47+
bytes = &bytes[0..self.required_len().unwrap()];
48+
// TODO: Attempt to interpret large symbols as arrays of a smaller type and show all
49+
// elements of the array instead. https://github.com/encounter/objdiff/issues/124
50+
// However, note that the stride of an array can not always be determined just by the
51+
// data type guessed by the single instruction accessing it. There can also be arrays of
52+
// structs that contain multiple elements of different types, so if other elements after
53+
// the first one were to be displayed in this manner, they may be inaccurate.
54+
}
4655

4756
match self {
4857
DataType::Int8 => {
@@ -86,10 +95,10 @@ impl DataType {
8695
}
8796
}
8897
DataType::Float => {
89-
format!("Float: {}", Endian::read_f32(bytes))
98+
format!("Float: {:?}f", Endian::read_f32(bytes))
9099
}
91100
DataType::Double => {
92-
format!("Double: {}", Endian::read_f64(bytes))
101+
format!("Double: {:?}", Endian::read_f64(bytes))
93102
}
94103
DataType::Bytes => {
95104
format!("Bytes: {:#?}", bytes)

objdiff-core/src/arch/ppc.rs

Lines changed: 214 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
use std::{borrow::Cow, collections::BTreeMap};
1+
use std::{
2+
borrow::Cow,
3+
collections::{BTreeMap, HashMap},
4+
};
25

36
use anyhow::{bail, ensure, Result};
47
use byteorder::BigEndian;
@@ -7,7 +10,7 @@ use object::{
710
elf, File, Object, ObjectSection, ObjectSymbol, Relocation, RelocationFlags, RelocationTarget,
811
Symbol, SymbolKind,
912
};
10-
use ppc750cl::{Argument, InsIter, Opcode, GPR};
13+
use ppc750cl::{Argument, InsIter, Opcode, ParsedIns, GPR};
1114

1215
use crate::{
1316
arch::{DataType, ObjArch, ProcessCodeResult},
@@ -49,6 +52,8 @@ impl ObjArch for ObjArchPpc {
4952
let ins_count = code.len() / 4;
5053
let mut ops = Vec::<u16>::with_capacity(ins_count);
5154
let mut insts = Vec::<ObjIns>::with_capacity(ins_count);
55+
let fake_pool_reloc_for_addr =
56+
generate_fake_pool_reloc_for_addr_mapping(address, code, relocations);
5257
for (cur_addr, mut ins) in InsIter::new(code, address as u32) {
5358
let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr);
5459
if let Some(reloc) = reloc {
@@ -145,7 +150,7 @@ impl ObjArch for ObjArchPpc {
145150
size: 4,
146151
mnemonic: Cow::Borrowed(simplified.mnemonic),
147152
args,
148-
reloc: reloc.cloned(),
153+
reloc: reloc.or(fake_pool_reloc_for_addr.get(&cur_addr)).cloned(),
149154
op: ins.op as u16,
150155
branch_dest,
151156
line,
@@ -173,6 +178,7 @@ impl ObjArch for ObjArchPpc {
173178
fn display_reloc(&self, flags: RelocationFlags) -> Cow<'static, str> {
174179
match flags {
175180
RelocationFlags::Elf { r_type } => match r_type {
181+
elf::R_PPC_NONE => Cow::Borrowed("R_PPC_NONE"), // We use this for fake pool relocs
176182
elf::R_PPC_ADDR16_LO => Cow::Borrowed("R_PPC_ADDR16_LO"),
177183
elf::R_PPC_ADDR16_HI => Cow::Borrowed("R_PPC_ADDR16_HI"),
178184
elf::R_PPC_ADDR16_HA => Cow::Borrowed("R_PPC_ADDR16_HA"),
@@ -188,26 +194,22 @@ impl ObjArch for ObjArchPpc {
188194
}
189195

190196
fn guess_data_type(&self, instruction: &ObjIns) -> Option<super::DataType> {
191-
// Always shows the first string of the table. Not ideal, but it's really hard to find
192-
// the actual string being referenced.
193197
if instruction.reloc.as_ref().is_some_and(|r| r.target.name.starts_with("@stringBase")) {
194198
return Some(DataType::String);
195199
}
196200

197-
match Opcode::from(instruction.op as u8) {
198-
Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8),
199-
Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16),
200-
Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16),
201-
Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32),
202-
Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float),
203-
Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double),
204-
205-
Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8),
206-
Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16),
207-
Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32),
208-
Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float),
209-
Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double),
210-
_ => None,
201+
let op = Opcode::from(instruction.op as u8);
202+
if let Some(ty) = guess_data_type_from_load_store_inst_op(op) {
203+
Some(ty)
204+
} else if op == Opcode::Addi {
205+
// Assume that any addi instruction that references a local symbol is loading a string.
206+
// This hack is not ideal and results in tons of false positives where it will show
207+
// garbage strings (e.g. misinterpreting arrays, float literals, etc).
208+
// But not all strings are in the @stringBase pool, so the condition above that checks
209+
// the target symbol name would miss some.
210+
Some(DataType::String)
211+
} else {
212+
None
211213
}
212214
}
213215

@@ -381,3 +383,196 @@ fn make_symbol_ref(symbol: &Symbol) -> Result<ExtabSymbolRef> {
381383
let demangled_name = cwdemangle::demangle(&name, &cwdemangle::DemangleOptions::default());
382384
Ok(ExtabSymbolRef { original_index: symbol.index().0, name, demangled_name })
383385
}
386+
387+
fn guess_data_type_from_load_store_inst_op(inst_op: Opcode) -> Option<DataType> {
388+
match inst_op {
389+
Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8),
390+
Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16),
391+
Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16),
392+
Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32),
393+
Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float),
394+
Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double),
395+
396+
Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8),
397+
Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16),
398+
Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32),
399+
Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float),
400+
Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double),
401+
_ => None,
402+
}
403+
}
404+
405+
// Given an instruction, determine if it could accessing data at the address in a register.
406+
// If so, return the offset added to the register's address, the register containing that address,
407+
// and (optionally) which destination register the address is being copied into.
408+
fn get_offset_and_addr_gpr_for_possible_pool_reference(
409+
opcode: Opcode,
410+
simplified: &ParsedIns,
411+
) -> Option<(i16, GPR, Option<GPR>)> {
412+
let args = &simplified.args;
413+
if guess_data_type_from_load_store_inst_op(opcode).is_some() {
414+
match (args[1], args[2]) {
415+
(Argument::Offset(offset), Argument::GPR(addr_src_gpr)) => {
416+
// e.g. lwz. Immediate offset.
417+
Some((offset.0, addr_src_gpr, None))
418+
}
419+
(Argument::GPR(addr_src_gpr), Argument::GPR(_offset_gpr)) => {
420+
// e.g. lwzx. The offset is in a register and was likely calculated from an index.
421+
// Treat the offset as being 0 in this case to show the first element of the array.
422+
// It may be possible to show all elements by figuring out the stride of the array
423+
// from the calculations performed on the index before it's put into offset_gpr, but
424+
// this would be much more complicated, so it's not currently done.
425+
Some((0, addr_src_gpr, None))
426+
}
427+
_ => None,
428+
}
429+
} else {
430+
// If it's not a load/store instruction, there's two more possibilities we need to handle.
431+
// 1. It could be loading a pointer to a string.
432+
// 2. It could be moving the relocation address plus an offset into a different register to
433+
// load from later.
434+
// If either of these match, we also want to return the destination register that the
435+
// address is being copied into so that we can detect any future references to that new
436+
// register as well.
437+
match (opcode, args[0], args[1], args[2]) {
438+
(
439+
Opcode::Addi,
440+
Argument::GPR(addr_dst_gpr),
441+
Argument::GPR(addr_src_gpr),
442+
Argument::Simm(simm),
443+
) => Some((simm.0, addr_src_gpr, Some(addr_dst_gpr))),
444+
(
445+
Opcode::Or,
446+
Argument::GPR(addr_dst_gpr),
447+
Argument::GPR(addr_src_gpr),
448+
Argument::None,
449+
) => Some((0, addr_src_gpr, Some(addr_dst_gpr))), // `mr` or `mr.`
450+
_ => None,
451+
}
452+
}
453+
}
454+
455+
// We create a fake relocation for an instruction, vaguely simulating what the actual relocation
456+
// might have looked like if it wasn't pooled. This is so minimal changes are needed to display
457+
// pooled accesses vs non-pooled accesses. We set the relocation type to R_PPC_NONE to indicate that
458+
// there isn't really a relocation here, as copying the pool relocation's type wouldn't make sense.
459+
// Also, if this instruction is accessing the middle of a symbol instead of the start, we add an
460+
// addend to indicate that.
461+
fn make_fake_pool_reloc(offset: i16, cur_addr: u32, pool_reloc: &ObjReloc) -> Option<ObjReloc> {
462+
let offset_from_pool = pool_reloc.addend + offset as i64;
463+
let target_address = pool_reloc.target.address.checked_add_signed(offset_from_pool)?;
464+
let orig_section_index = pool_reloc.target.orig_section_index?;
465+
// We also need to create a fake target symbol to go inside our fake relocation.
466+
// This is because we don't have access to list of all symbols in this section, so we can't find
467+
// the real symbol yet. Instead we make a placeholder that has the correct `orig_section_index`
468+
// and `address` fields, and then later on when this information is displayed to the user, we
469+
// can find the real symbol by searching through the object's section's symbols for one that
470+
// contains this address.
471+
let fake_target_symbol = ObjSymbol {
472+
name: "".to_string(),
473+
demangled_name: None,
474+
address: target_address,
475+
section_address: 0,
476+
size: 0,
477+
size_known: false,
478+
kind: Default::default(),
479+
flags: Default::default(),
480+
orig_section_index: Some(orig_section_index),
481+
virtual_address: None,
482+
original_index: None,
483+
bytes: vec![],
484+
};
485+
// The addend is also fake because we don't know yet if the `target_address` here is the exact
486+
// start of the symbol or if it's in the middle of it.
487+
let fake_addend = 0;
488+
Some(ObjReloc {
489+
flags: RelocationFlags::Elf { r_type: elf::R_PPC_NONE },
490+
address: cur_addr as u64,
491+
target: fake_target_symbol,
492+
addend: fake_addend,
493+
})
494+
}
495+
496+
// Searches through all instructions in a function, determining which registers have the addresses
497+
// of pooled data relocations in them, finding which instructions load data from those addresses,
498+
// and constructing a mapping of the address of that instruction to a "fake pool relocation" that
499+
// simulates what that instruction's relocation would look like if data hadn't been pooled.
500+
// Limitations: This method currently only goes through the instructions in a function in linear
501+
// order, from start to finish. It does *not* follow any branches. This means that it could have
502+
// false positives or false negatives in determining which relocation is currently loaded in which
503+
// register at any given point in the function, as control flow is not respected.
504+
// There are currently no known examples of this method producing inaccurate results in reality, but
505+
// if examples are found, it may be possible to update this method to also follow all branches so
506+
// that it produces more accurate results.
507+
fn generate_fake_pool_reloc_for_addr_mapping(
508+
address: u64,
509+
code: &[u8],
510+
relocations: &[ObjReloc],
511+
) -> HashMap<u32, ObjReloc> {
512+
let mut active_pool_relocs = HashMap::new();
513+
let mut pool_reloc_for_addr = HashMap::new();
514+
for (cur_addr, ins) in InsIter::new(code, address as u32) {
515+
let simplified = ins.simplified();
516+
let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr);
517+
518+
if let Some(reloc) = reloc {
519+
// This instruction has a real relocation, so it may be a pool load we want to keep
520+
// track of.
521+
let args = &simplified.args;
522+
match (ins.op, args[0], args[1], args[2]) {
523+
(
524+
Opcode::Addi,
525+
Argument::GPR(addr_dst_gpr),
526+
Argument::GPR(_addr_src_gpr),
527+
Argument::Simm(_simm),
528+
) => {
529+
active_pool_relocs.insert(addr_dst_gpr.0, reloc.clone()); // `lis` + `addi`
530+
}
531+
(
532+
Opcode::Ori,
533+
Argument::GPR(addr_dst_gpr),
534+
Argument::GPR(_addr_src_gpr),
535+
Argument::Uimm(_uimm),
536+
) => {
537+
active_pool_relocs.insert(addr_dst_gpr.0, reloc.clone()); // `lis` + `ori`
538+
}
539+
(Opcode::B, _, _, _) => {
540+
if simplified.mnemonic == "bl" {
541+
// When encountering a function call, clear any active pool relocations from
542+
// the volatile registers (r0, r3-r12), but not the nonvolatile registers.
543+
active_pool_relocs.remove(&0);
544+
for gpr in 3..12 {
545+
active_pool_relocs.remove(&gpr);
546+
}
547+
}
548+
}
549+
_ => {}
550+
}
551+
} else if let Some((offset, addr_src_gpr, addr_dst_gpr)) =
552+
get_offset_and_addr_gpr_for_possible_pool_reference(ins.op, &simplified)
553+
{
554+
// This instruction doesn't have a real relocation, so it may be a reference to one of
555+
// the already-loaded pools.
556+
if let Some(pool_reloc) = active_pool_relocs.get(&addr_src_gpr.0) {
557+
if let Some(fake_pool_reloc) = make_fake_pool_reloc(offset, cur_addr, pool_reloc) {
558+
pool_reloc_for_addr.insert(cur_addr, fake_pool_reloc);
559+
}
560+
if let Some(addr_dst_gpr) = addr_dst_gpr {
561+
// If the address of the pool relocation got copied into another register, we
562+
// need to keep track of it in that register too as future instructions may
563+
// reference the symbol indirectly via this new register, instead of the
564+
// register the symbol's address was originally loaded into.
565+
// For example, the start of the function might `lis` + `addi` the start of the
566+
// ...data pool into r25, and then later the start of a loop will `addi` r25
567+
// with the offset within the .data section of an array variable into r21.
568+
// Then the body of the loop will `lwzx` one of the array elements from r21.
569+
let mut new_reloc = pool_reloc.clone();
570+
new_reloc.addend += offset as i64;
571+
active_pool_relocs.insert(addr_dst_gpr.0, new_reloc);
572+
}
573+
}
574+
}
575+
}
576+
577+
pool_reloc_for_addr
578+
}

objdiff-core/src/diff/code.rs

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use crate::{
99
DiffObjConfig, ObjInsArgDiff, ObjInsBranchFrom, ObjInsBranchTo, ObjInsDiff, ObjInsDiffKind,
1010
ObjSymbolDiff,
1111
},
12-
obj::{ObjInfo, ObjInsArg, ObjReloc, ObjSymbolFlags, SymbolRef},
12+
obj::{ObjInfo, ObjInsArg, ObjReloc, ObjSection, ObjSymbol, ObjSymbolFlags, SymbolRef},
1313
};
1414

1515
pub fn process_code_symbol(
@@ -21,14 +21,30 @@ pub fn process_code_symbol(
2121
let section = section.ok_or_else(|| anyhow!("Code symbol section not found"))?;
2222
let code = &section.data
2323
[symbol.section_address as usize..(symbol.section_address + symbol.size) as usize];
24-
obj.arch.process_code(
24+
let mut res = obj.arch.process_code(
2525
symbol.address,
2626
code,
2727
section.orig_index,
2828
&section.relocations,
2929
&section.line_info,
3030
config,
31-
)
31+
)?;
32+
33+
for inst in res.insts.iter_mut() {
34+
if let Some(reloc) = &mut inst.reloc {
35+
if reloc.target.size == 0 && reloc.target.name.is_empty() {
36+
// Fake target symbol we added as a placeholder. We need to find the real one.
37+
if let Some(real_target) =
38+
find_symbol_matching_fake_symbol_in_sections(&reloc.target, &obj.sections)
39+
{
40+
reloc.addend = (reloc.target.address - real_target.address) as i64;
41+
reloc.target = real_target;
42+
}
43+
}
44+
}
45+
}
46+
47+
Ok(res)
3248
}
3349

3450
pub fn no_diff_code(out: &ProcessCodeResult, symbol_ref: SymbolRef) -> Result<ObjSymbolDiff> {
@@ -369,3 +385,16 @@ fn compare_ins(
369385
}
370386
Ok(result)
371387
}
388+
389+
fn find_symbol_matching_fake_symbol_in_sections(
390+
fake_symbol: &ObjSymbol,
391+
sections: &[ObjSection],
392+
) -> Option<ObjSymbol> {
393+
let orig_section_index = fake_symbol.orig_section_index?;
394+
let section = sections.iter().find(|s| s.orig_index == orig_section_index)?;
395+
let real_symbol = section
396+
.symbols
397+
.iter()
398+
.find(|s| s.size > 0 && (s.address..s.address + s.size).contains(&fake_symbol.address))?;
399+
Some(real_symbol.clone())
400+
}

0 commit comments

Comments
 (0)