|
| 1 | +use binaryninja::{ |
| 2 | + architecture::{Architecture as _, CoreRegister, Register as _, RegisterInfo as _}, |
| 3 | + binary_view::{BinaryView, BinaryViewExt as _}, |
| 4 | + low_level_il::{ |
| 5 | + expression::{ExpressionHandler, LowLevelILExpressionKind}, |
| 6 | + function::{LowLevelILFunction, Mutable, NonSSA}, |
| 7 | + instruction::{ |
| 8 | + InstructionHandler, LowLevelILInstruction, LowLevelILInstructionKind, |
| 9 | + LowLevelInstructionIndex, |
| 10 | + }, |
| 11 | + lifting::LowLevelILLabel, |
| 12 | + LowLevelILRegisterKind, |
| 13 | + }, |
| 14 | + workflow::AnalysisContext, |
| 15 | +}; |
| 16 | + |
| 17 | +use crate::{error::ILLevel, metadata::GlobalState, Error}; |
| 18 | + |
| 19 | +// TODO: We should also handle `objc_retain_x` / `objc_release_x` variants |
| 20 | +// that use a custom calling convention. |
| 21 | +const IGNORABLE_MEMORY_MANAGEMENT_FUNCTIONS: &[&[u8]] = &[ |
| 22 | + b"_objc_autorelease", |
| 23 | + b"_objc_autoreleaseReturnValue", |
| 24 | + b"_objc_release", |
| 25 | + b"_objc_retain", |
| 26 | + b"_objc_retainAutorelease", |
| 27 | + b"_objc_retainAutoreleaseReturnValue", |
| 28 | + b"_objc_retainAutoreleasedReturnValue", |
| 29 | + b"_objc_retainBlock", |
| 30 | + b"_objc_unsafeClaimAutoreleasedReturnValue", |
| 31 | +]; |
| 32 | + |
| 33 | +fn is_call_to_ignorable_memory_management_function<'func>( |
| 34 | + view: &binaryninja::binary_view::BinaryView, |
| 35 | + instr: &'func LowLevelILInstruction<'func, Mutable, NonSSA>, |
| 36 | +) -> bool { |
| 37 | + let target = match instr.kind() { |
| 38 | + LowLevelILInstructionKind::Call(call) | LowLevelILInstructionKind::TailCall(call) => { |
| 39 | + let LowLevelILExpressionKind::ConstPtr(address) = call.target().kind() else { |
| 40 | + return false; |
| 41 | + }; |
| 42 | + address.value() |
| 43 | + } |
| 44 | + LowLevelILInstructionKind::Goto(target) => target.address(), |
| 45 | + _ => return false, |
| 46 | + }; |
| 47 | + let Some(symbol) = view.symbol_by_address(target) else { |
| 48 | + return false; |
| 49 | + }; |
| 50 | + |
| 51 | + let symbol_name = symbol.full_name(); |
| 52 | + let symbol_name = symbol_name.to_bytes(); |
| 53 | + |
| 54 | + // Remove any j_ prefix that the shared cache workflow adds to stub functions. |
| 55 | + let symbol_name = symbol_name.strip_prefix(b"j_").unwrap_or(symbol_name); |
| 56 | + |
| 57 | + IGNORABLE_MEMORY_MANAGEMENT_FUNCTIONS.contains(&symbol_name) |
| 58 | +} |
| 59 | + |
| 60 | +fn process_instruction( |
| 61 | + bv: &BinaryView, |
| 62 | + llil: &LowLevelILFunction<Mutable, NonSSA>, |
| 63 | + insn: &LowLevelILInstruction<Mutable, NonSSA>, |
| 64 | + link_register: LowLevelILRegisterKind<CoreRegister>, |
| 65 | + link_register_size: usize, |
| 66 | +) -> Result<bool, &'static str> { |
| 67 | + if !is_call_to_ignorable_memory_management_function(bv, insn) { |
| 68 | + return Ok(false); |
| 69 | + } |
| 70 | + |
| 71 | + // TODO: Removing calls to `objc_release` can sometimes leave behind a load of a struct field |
| 72 | + // that appears to be unused. It's not clear whether we should be trying to detect and remove |
| 73 | + // those here, or if some later analysis pass should be cleaning them up but isn't. |
| 74 | + |
| 75 | + match insn.kind() { |
| 76 | + LowLevelILInstructionKind::TailCall(_) => unsafe { |
| 77 | + llil.set_current_address(insn.address()); |
| 78 | + llil.replace_expression( |
| 79 | + insn.expr_idx(), |
| 80 | + llil.ret(llil.reg(link_register_size, link_register)), |
| 81 | + ); |
| 82 | + }, |
| 83 | + LowLevelILInstructionKind::Call(_) => unsafe { |
| 84 | + // The memory management functions that are currently supported either return void |
| 85 | + // or return their first argument. For arm64, the first argument is passed in `x0` |
| 86 | + // and results are returned in `x0`, so we can replace the call with a nop. We'll need |
| 87 | + // to revisit this to support other architectures, and to support the `objc_retain_x` |
| 88 | + // `objc_release_x` functions that accept their argument in a different register. |
| 89 | + llil.set_current_address(insn.address()); |
| 90 | + llil.replace_expression(insn.expr_idx(), llil.nop()); |
| 91 | + }, |
| 92 | + LowLevelILInstructionKind::Goto(_) if insn.index.0 == 0 => unsafe { |
| 93 | + // If the `objc_retain` is the first instruction in the function, this function |
| 94 | + // can only contain the call to the memory management function since when the |
| 95 | + // memory management function returns, it will return to this function's caller. |
| 96 | + llil.set_current_address(insn.address()); |
| 97 | + llil.replace_expression( |
| 98 | + insn.expr_idx(), |
| 99 | + llil.ret(llil.reg(link_register_size, link_register)), |
| 100 | + ); |
| 101 | + }, |
| 102 | + LowLevelILInstructionKind::Goto(_) => { |
| 103 | + // The shared cache workflow inlines calls to stub functions, which causes them |
| 104 | + // to show up as a `lr = <next instruction>; goto <stub function instruction>;` |
| 105 | + // sequence. We need to remove the load of `lr` and update the `goto` to jump |
| 106 | + // to the next instruction. |
| 107 | + |
| 108 | + let Some(prev) = |
| 109 | + llil.instruction_from_index(LowLevelInstructionIndex(insn.index.0 - 1)) |
| 110 | + else { |
| 111 | + return Ok(false); |
| 112 | + }; |
| 113 | + |
| 114 | + let target = match prev.kind() { |
| 115 | + LowLevelILInstructionKind::SetReg(op) if op.dest_reg() == link_register => { |
| 116 | + let LowLevelILExpressionKind::ConstPtr(value) = op.source_expr().kind() else { |
| 117 | + return Ok(false); |
| 118 | + }; |
| 119 | + value.value() |
| 120 | + } |
| 121 | + _ => return Ok(false), |
| 122 | + }; |
| 123 | + |
| 124 | + let Some(LowLevelInstructionIndex(target_idx)) = llil.instruction_index_at(target) |
| 125 | + else { |
| 126 | + return Ok(false); |
| 127 | + }; |
| 128 | + |
| 129 | + // TODO: Manually creating a label like this is fragile and relies on a) knowledge of |
| 130 | + // how labels are used by core, and b) that the target is the first instruction in |
| 131 | + // a basic block. We should do this differently. |
| 132 | + let mut label = LowLevelILLabel::new(); |
| 133 | + label.operand = target_idx; |
| 134 | + |
| 135 | + unsafe { |
| 136 | + llil.set_current_address(prev.address()); |
| 137 | + llil.replace_expression(prev.expr_idx(), llil.nop()); |
| 138 | + llil.set_current_address(insn.address()); |
| 139 | + llil.replace_expression(insn.expr_idx(), llil.goto(&mut label)); |
| 140 | + } |
| 141 | + } |
| 142 | + _ => return Ok(false), |
| 143 | + } |
| 144 | + |
| 145 | + Ok(true) |
| 146 | +} |
| 147 | + |
| 148 | +pub fn process(ac: &AnalysisContext) -> Result<(), Error> { |
| 149 | + let view = ac.view(); |
| 150 | + if GlobalState::should_ignore_view(&view) { |
| 151 | + return Ok(()); |
| 152 | + } |
| 153 | + |
| 154 | + let func = ac.function(); |
| 155 | + |
| 156 | + let Some(link_register) = func.arch().link_reg() else { |
| 157 | + return Ok(()); |
| 158 | + }; |
| 159 | + let link_register_size = link_register.info().size(); |
| 160 | + let link_register = LowLevelILRegisterKind::Arch(link_register); |
| 161 | + |
| 162 | + let Some(llil) = (unsafe { ac.llil_function() }) else { |
| 163 | + return Err(Error::MissingIL { |
| 164 | + level: ILLevel::Low, |
| 165 | + func_start: func.start(), |
| 166 | + }); |
| 167 | + }; |
| 168 | + |
| 169 | + let mut function_changed = false; |
| 170 | + for block in llil.basic_blocks().iter() { |
| 171 | + for insn in block.iter() { |
| 172 | + match process_instruction(&view, &llil, &insn, link_register, link_register_size) { |
| 173 | + Ok(true) => function_changed = true, |
| 174 | + Ok(_) => {} |
| 175 | + Err(err) => { |
| 176 | + log::error!( |
| 177 | + "Error processing instruction at {:#x}: {}", |
| 178 | + insn.address(), |
| 179 | + err |
| 180 | + ); |
| 181 | + continue; |
| 182 | + } |
| 183 | + } |
| 184 | + } |
| 185 | + } |
| 186 | + |
| 187 | + if function_changed { |
| 188 | + // Regenerate SSA form after modifications |
| 189 | + llil.generate_ssa_form(); |
| 190 | + } |
| 191 | + Ok(()) |
| 192 | +} |
0 commit comments