Skip to content

Commit d8ead1f

Browse files
committed
PPC: Display data values on hover for pools as well
1 parent bd7ccbe commit d8ead1f

File tree

9 files changed

+237
-35
lines changed

9 files changed

+237
-35
lines changed

objdiff-core/src/arch/arm.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ impl ObjArch for ObjArchArm {
113113
relocations: &[ObjReloc],
114114
line_info: &BTreeMap<u64, u32>,
115115
config: &DiffObjConfig,
116+
_sections: &[ObjSection],
116117
) -> Result<ProcessCodeResult> {
117118
let start_addr = address as u32;
118119
let end_addr = start_addr + code.len() as u32;
@@ -216,6 +217,7 @@ impl ObjArch for ObjArchArm {
216217
mnemonic: Cow::Borrowed(parsed_ins.mnemonic),
217218
args,
218219
reloc,
220+
fake_pool_reloc: None,
219221
branch_dest,
220222
line,
221223
formatted: parsed_ins.display(display_options).to_string(),

objdiff-core/src/arch/arm64.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ impl ObjArch for ObjArchArm64 {
2929
relocations: &[ObjReloc],
3030
line_info: &BTreeMap<u64, u32>,
3131
config: &DiffObjConfig,
32+
_sections: &[ObjSection],
3233
) -> Result<ProcessCodeResult> {
3334
let start_address = address;
3435
let end_address = address + code.len() as u64;
@@ -59,6 +60,7 @@ impl ObjArch for ObjArchArm64 {
5960
mnemonic: Cow::Borrowed("<invalid>"),
6061
args: vec![],
6162
reloc: None,
63+
fake_pool_reloc: None,
6264
branch_dest: None,
6365
line: None,
6466
formatted: "".to_string(),
@@ -121,6 +123,7 @@ impl ObjArch for ObjArchArm64 {
121123
mnemonic: Cow::Borrowed(mnemonic),
122124
args,
123125
reloc,
126+
fake_pool_reloc: None,
124127
branch_dest,
125128
line,
126129
formatted: ins.to_string(),

objdiff-core/src/arch/mips.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ impl ObjArch for ObjArchMips {
8787
relocations: &[ObjReloc],
8888
line_info: &BTreeMap<u64, u32>,
8989
config: &DiffObjConfig,
90+
_sections: &[ObjSection],
9091
) -> Result<ProcessCodeResult> {
9192
let _guard = RABBITIZER_MUTEX.lock().map_err(|e| anyhow!("Failed to lock mutex: {e}"))?;
9293
configure_rabbitizer(match config.mips_abi {
@@ -205,6 +206,7 @@ impl ObjArch for ObjArchMips {
205206
mnemonic: Cow::Borrowed(mnemonic),
206207
args,
207208
reloc: reloc.cloned(),
209+
fake_pool_reloc: None,
208210
branch_dest,
209211
line,
210212
formatted,

objdiff-core/src/arch/mod.rs

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,22 @@ pub enum DataType {
3636

3737
impl DataType {
3838
pub fn display_bytes<Endian: ByteOrder>(&self, bytes: &[u8]) -> Option<String> {
39-
// TODO: Attempt to interpret large symbols as arrays of a smaller type,
40-
// fallback to intrepreting it as bytes.
41-
// https://github.com/encounter/objdiff/issues/124
42-
if self.required_len().is_some_and(|l| bytes.len() != l) {
43-
log::warn!("Failed to display a symbol value for a symbol whose size doesn't match the instruction referencing it.");
39+
if self.required_len().is_some_and(|l| bytes.len() < l) {
40+
log::warn!("Failed to display a symbol value for a symbol whose size is too small for instruction referencing it.");
4441
return None;
4542
}
43+
let mut bytes = bytes;
44+
if self.required_len().is_some_and(|l| bytes.len() > l) {
45+
// If the symbol's size is larger a single instance of this data type, we take just the
46+
// bytes necessary for one of them in order to display the first element of the array.
47+
bytes = &bytes[0..self.required_len().unwrap()];
48+
// TODO: Attempt to interpret large symbols as arrays of a smaller type and show all
49+
// elements of the array instead. https://github.com/encounter/objdiff/issues/124
50+
// However, note that the stride of an array can not always be determined just by the
51+
// data type guessed by the single instruction accessing it. There can also be arrays of
52+
// structs that contain multiple elements of different types, so if other elements after
53+
// the first one were to be displayed in this manner, they may be inaccurate.
54+
}
4655

4756
match self {
4857
DataType::Int8 => {
@@ -117,6 +126,7 @@ impl DataType {
117126
}
118127

119128
pub trait ObjArch: Send + Sync {
129+
#[expect(clippy::too_many_arguments)]
120130
fn process_code(
121131
&self,
122132
address: u64,
@@ -125,6 +135,7 @@ pub trait ObjArch: Send + Sync {
125135
relocations: &[ObjReloc],
126136
line_info: &BTreeMap<u64, u32>,
127137
config: &DiffObjConfig,
138+
sections: &[ObjSection],
128139
) -> Result<ProcessCodeResult>;
129140

130141
fn implcit_addend(

objdiff-core/src/arch/ppc.rs

Lines changed: 191 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
use std::{borrow::Cow, collections::BTreeMap};
1+
use std::{
2+
borrow::Cow,
3+
collections::{BTreeMap, HashMap},
4+
};
25

36
use anyhow::{bail, ensure, Result};
47
use byteorder::BigEndian;
@@ -7,7 +10,7 @@ use object::{
710
elf, File, Object, ObjectSection, ObjectSymbol, Relocation, RelocationFlags, RelocationTarget,
811
Symbol, SymbolKind,
912
};
10-
use ppc750cl::{Argument, InsIter, Opcode, GPR};
13+
use ppc750cl::{Argument, InsIter, Opcode, ParsedIns, GPR};
1114

1215
use crate::{
1316
arch::{DataType, ObjArch, ProcessCodeResult},
@@ -27,6 +30,180 @@ fn is_rel_abs_arg(arg: &Argument) -> bool {
2730

2831
fn is_offset_arg(arg: &Argument) -> bool { matches!(arg, Argument::Offset(_)) }
2932

33+
fn guess_data_type_from_load_store_inst_op(inst_op: Opcode) -> Option<DataType> {
34+
match inst_op {
35+
Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8),
36+
Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16),
37+
Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16),
38+
Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32),
39+
Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float),
40+
Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double),
41+
42+
Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8),
43+
Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16),
44+
Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32),
45+
Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float),
46+
Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double),
47+
_ => None,
48+
}
49+
}
50+
51+
// Given an instruction, determine if it could accessing data at the address in a register.
52+
// If so, return the offset added to the register's address, the register containing that address,
53+
// and (optionally) which destination register the address is being copied into.
54+
fn get_offset_and_addr_gpr_for_possible_pool_reference(
55+
opcode: Opcode,
56+
simplified: &ParsedIns,
57+
) -> Option<(i16, GPR, Option<GPR>)> {
58+
let args = &simplified.args;
59+
if guess_data_type_from_load_store_inst_op(opcode).is_some() {
60+
match (args[1], args[2]) {
61+
(Argument::Offset(offset), Argument::GPR(addr_src_gpr)) => {
62+
// e.g. lwz. Immediate offset.
63+
Some((offset.0, addr_src_gpr, None))
64+
}
65+
(Argument::GPR(addr_src_gpr), Argument::GPR(_offset_gpr)) => {
66+
// e.g. lwzx. The offset is in a register and was likely calculated from an index.
67+
// Treat the offset as being 0 in this case to show the first element of the array.
68+
// It may be possible to show all elements by figuring out the stride of the array
69+
// from the calculations performed on the index before it's put into offset_gpr, but
70+
// this would be much more complicated, so it's not currently done.
71+
Some((0, addr_src_gpr, None))
72+
}
73+
_ => None,
74+
}
75+
} else {
76+
// If it's not a load/store instruction, there's two more possibilities we need to handle.
77+
// 1. It could be a reference to @stringBase.
78+
// 2. It could be moving the relocation address plus an offset into a different register to
79+
// load from later.
80+
// If either of these match, we also want to return the destination register that the
81+
// address is being copied into so that we can detect any future references to that new
82+
// register as well.
83+
match (opcode, args[0], args[1], args[2]) {
84+
(
85+
Opcode::Addi,
86+
Argument::GPR(addr_dst_gpr),
87+
Argument::GPR(addr_src_gpr),
88+
Argument::Simm(simm),
89+
) => Some((simm.0, addr_src_gpr, Some(addr_dst_gpr))),
90+
(
91+
Opcode::Or,
92+
Argument::GPR(addr_dst_gpr),
93+
Argument::GPR(addr_src_gpr),
94+
Argument::None,
95+
) => Some((0, addr_src_gpr, Some(addr_dst_gpr))), // `mr` or `mr.`
96+
_ => None,
97+
}
98+
}
99+
}
100+
101+
// We create a fake relocation for an instruction, vaguely simulating what the actual relocation
102+
// might have looked like if it wasn't pooled. This is so minimal changes are needed to display
103+
// pooled accesses vs non-pooled accesses. We set the relocation type to R_PPC_NONE to indicate that
104+
// there isn't really a relocation here, as copying the pool relocation's type wouldn't make sense.
105+
// Also, if this instruction is accessing the middle of a symbol instead of the start, we add an
106+
// addend to indicate that.
107+
fn make_fake_pool_reloc(
108+
offset: i16,
109+
cur_addr: u32,
110+
pool_reloc: &ObjReloc,
111+
sections: &[ObjSection],
112+
) -> Option<ObjReloc> {
113+
let offset_from_pool = pool_reloc.addend + offset as i64;
114+
let target_address = pool_reloc.target.address.checked_add_signed(offset_from_pool)?;
115+
let orig_section_index = pool_reloc.target.orig_section_index?;
116+
let section = sections.iter().find(|s| s.orig_index == orig_section_index)?;
117+
let target_symbol = section
118+
.symbols
119+
.iter()
120+
.find(|s| s.size > 0 && (s.address..s.address + s.size).contains(&target_address))?;
121+
let addend = (target_address - target_symbol.address) as i64;
122+
Some(ObjReloc {
123+
flags: RelocationFlags::Elf { r_type: elf::R_PPC_NONE },
124+
address: cur_addr as u64,
125+
target: target_symbol.clone(),
126+
addend,
127+
})
128+
}
129+
130+
// Searches through all instructions in a function, determining which registers have the addresses
131+
// of pooled data relocations in them, finding which instructions load data from those addresses,
132+
// and constructing a mapping of the address of that instruction to a "fake pool relocation" that
133+
// simulates what that instruction's relocation would look like if data hadn't been pooled.
134+
// Limitations: This method currently only goes through the instructions in a function in linear
135+
// order, from start to finish. It does *not* follow any branches. This means that it could have
136+
// false positives or false negatives in determining which relocation is currently loaded in which
137+
// register at any given point in the function, as control flow is not respected.
138+
// There are currently no known examples of this method producing inaccurate results in reality, but
139+
// if examples are found, it may be possible to update this method to also follow all branches so
140+
// that it produces more accurate results.
141+
fn generate_fake_pool_reloc_for_addr_mapping(
142+
address: u64,
143+
code: &[u8],
144+
relocations: &[ObjReloc],
145+
sections: &[ObjSection],
146+
) -> HashMap<u32, ObjReloc> {
147+
let mut active_pool_relocs = HashMap::new();
148+
let mut pool_reloc_for_addr = HashMap::new();
149+
for (cur_addr, ins) in InsIter::new(code, address as u32) {
150+
let simplified = ins.simplified();
151+
let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr);
152+
153+
if let Some(reloc) = reloc {
154+
// This instruction has a real relocation, so it may be a pool load we want to keep
155+
// track of.
156+
let args = &simplified.args;
157+
match (ins.op, args[0], args[1], args[2]) {
158+
(
159+
Opcode::Addi,
160+
Argument::GPR(addr_dst_gpr),
161+
Argument::GPR(_addr_src_gpr),
162+
Argument::Simm(_simm),
163+
) => {
164+
active_pool_relocs.insert(addr_dst_gpr.0, reloc.clone()); // `lis` + `addi`
165+
}
166+
(
167+
Opcode::Ori,
168+
Argument::GPR(addr_dst_gpr),
169+
Argument::GPR(_addr_src_gpr),
170+
Argument::Uimm(_uimm),
171+
) => {
172+
active_pool_relocs.insert(addr_dst_gpr.0, reloc.clone()); // `lis` + `ori`
173+
}
174+
_ => {}
175+
}
176+
} else if let Some((offset, addr_src_gpr, addr_dst_gpr)) =
177+
get_offset_and_addr_gpr_for_possible_pool_reference(ins.op, &simplified)
178+
{
179+
// This instruction doesn't have a real relocation, so it may be a reference to one of
180+
// the already-loaded pools.
181+
if let Some(pool_reloc) = active_pool_relocs.get(&addr_src_gpr.0) {
182+
if let Some(fake_pool_reloc) =
183+
make_fake_pool_reloc(offset, cur_addr, pool_reloc, sections)
184+
{
185+
pool_reloc_for_addr.insert(cur_addr, fake_pool_reloc);
186+
}
187+
if let Some(addr_dst_gpr) = addr_dst_gpr {
188+
// If the address of the pool relocation got copied into another register, we
189+
// need to keep track of it in that register too as future instructions may
190+
// reference the symbol indirectly via this new register, instead of the
191+
// register the symbol's address was originally loaded into.
192+
// For example, the start of the function might `lis` + `addi` the start of the
193+
// ...data pool into r25, and then later the start of a loop will `addi` r25
194+
// with the offset within the .data section of an array variable into r21.
195+
// Then the body of the loop will `lwzx` one of the array elements from r21.
196+
let mut new_reloc = pool_reloc.clone();
197+
new_reloc.addend += offset as i64;
198+
active_pool_relocs.insert(addr_dst_gpr.0, new_reloc);
199+
}
200+
}
201+
}
202+
}
203+
204+
pool_reloc_for_addr
205+
}
206+
30207
pub struct ObjArchPpc {
31208
/// Exception info
32209
pub extab: Option<BTreeMap<usize, ExceptionInfo>>,
@@ -45,10 +222,13 @@ impl ObjArch for ObjArchPpc {
45222
relocations: &[ObjReloc],
46223
line_info: &BTreeMap<u64, u32>,
47224
config: &DiffObjConfig,
225+
sections: &[ObjSection],
48226
) -> Result<ProcessCodeResult> {
49227
let ins_count = code.len() / 4;
50228
let mut ops = Vec::<u16>::with_capacity(ins_count);
51229
let mut insts = Vec::<ObjIns>::with_capacity(ins_count);
230+
let fake_pool_reloc_for_addr =
231+
generate_fake_pool_reloc_for_addr_mapping(address, code, relocations, sections);
52232
for (cur_addr, mut ins) in InsIter::new(code, address as u32) {
53233
let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr);
54234
if let Some(reloc) = reloc {
@@ -146,6 +326,7 @@ impl ObjArch for ObjArchPpc {
146326
mnemonic: Cow::Borrowed(simplified.mnemonic),
147327
args,
148328
reloc: reloc.cloned(),
329+
fake_pool_reloc: fake_pool_reloc_for_addr.get(&cur_addr).cloned(),
149330
op: ins.op as u16,
150331
branch_dest,
151332
line,
@@ -173,6 +354,7 @@ impl ObjArch for ObjArchPpc {
173354
fn display_reloc(&self, flags: RelocationFlags) -> Cow<'static, str> {
174355
match flags {
175356
RelocationFlags::Elf { r_type } => match r_type {
357+
elf::R_PPC_NONE => Cow::Borrowed("R_PPC_NONE"), // We use this for fake pool relocs
176358
elf::R_PPC_ADDR16_LO => Cow::Borrowed("R_PPC_ADDR16_LO"),
177359
elf::R_PPC_ADDR16_HI => Cow::Borrowed("R_PPC_ADDR16_HI"),
178360
elf::R_PPC_ADDR16_HA => Cow::Borrowed("R_PPC_ADDR16_HA"),
@@ -188,27 +370,16 @@ impl ObjArch for ObjArchPpc {
188370
}
189371

190372
fn guess_data_type(&self, instruction: &ObjIns) -> Option<super::DataType> {
191-
// Always shows the first string of the table. Not ideal, but it's really hard to find
192-
// the actual string being referenced.
193-
if instruction.reloc.as_ref().is_some_and(|r| r.target.name.starts_with("@stringBase")) {
373+
if instruction
374+
.reloc
375+
.as_ref()
376+
.or(instruction.fake_pool_reloc.as_ref())
377+
.is_some_and(|r| r.target.name.starts_with("@stringBase"))
378+
{
194379
return Some(DataType::String);
195380
}
196381

197-
match Opcode::from(instruction.op as u8) {
198-
Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8),
199-
Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16),
200-
Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16),
201-
Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32),
202-
Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float),
203-
Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double),
204-
205-
Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8),
206-
Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16),
207-
Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32),
208-
Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float),
209-
Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double),
210-
_ => None,
211-
}
382+
guess_data_type_from_load_store_inst_op(Opcode::from(instruction.op as u8))
212383
}
213384

214385
fn display_data_type(&self, ty: DataType, bytes: &[u8]) -> Option<String> {

objdiff-core/src/arch/x86.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ impl ObjArch for ObjArchX86 {
3434
relocations: &[ObjReloc],
3535
line_info: &BTreeMap<u64, u32>,
3636
config: &DiffObjConfig,
37+
_sections: &[ObjSection],
3738
) -> Result<ProcessCodeResult> {
3839
let mut result = ProcessCodeResult { ops: Vec::new(), insts: Vec::new() };
3940
let mut decoder = Decoder::with_ip(self.bits, code, address, DecoderOptions::NONE);
@@ -54,6 +55,7 @@ impl ObjArch for ObjArchX86 {
5455
mnemonic: Cow::Borrowed("<invalid>"),
5556
args: vec![],
5657
reloc: None,
58+
fake_pool_reloc: None,
5759
branch_dest: None,
5860
line: None,
5961
formatted: String::new(),
@@ -79,6 +81,7 @@ impl ObjArch for ObjArchX86 {
7981
mnemonic: Cow::Borrowed("<invalid>"),
8082
args: vec![],
8183
reloc: reloc.cloned(),
84+
fake_pool_reloc: None,
8285
branch_dest: None,
8386
line,
8487
formatted: String::new(),

objdiff-core/src/diff/code.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ pub fn process_code_symbol(
2828
&section.relocations,
2929
&section.line_info,
3030
config,
31+
&obj.sections,
3132
)
3233
}
3334

0 commit comments

Comments
 (0)