From fdfbae02a6e012c2498e2eccf51899a5206d0601 Mon Sep 17 00:00:00 2001 From: Philip Craig Date: Sat, 25 Apr 2020 15:58:58 +1000 Subject: [PATCH] write: implement expressions Expressions are now are Vec of operations, instead of raw bytecode. This is required to support addresses and references within the expression. --- src/read/op.rs | 36 + src/write/cfi.rs | 69 +- src/write/dwarf.rs | 6 +- src/write/loc.rs | 105 ++- src/write/mod.rs | 28 +- src/write/op.rs | 1558 +++++++++++++++++++++++++++++++++++++++++ src/write/section.rs | 14 +- src/write/unit.rs | 273 +++++--- tests/convert_self.rs | 2 +- 9 files changed, 1907 insertions(+), 184 deletions(-) create mode 100644 src/write/op.rs diff --git a/src/read/op.rs b/src/read/op.rs index e0432adf9..336329cbf 100644 --- a/src/read/op.rs +++ b/src/read/op.rs @@ -885,6 +885,42 @@ impl Expression { pub fn evaluation(self, encoding: Encoding) -> Evaluation { Evaluation::new(self.0, encoding) } + + /// Return an iterator for the operations in the expression. + pub fn operations(self, encoding: Encoding) -> OperationIter { + OperationIter { + input: self.0, + encoding, + } + } +} + +/// An iterator for the operations in an expression. +#[derive(Debug, Clone, Copy)] +pub struct OperationIter { + input: R, + encoding: Encoding, +} + +impl OperationIter { + /// Read the next operation in an expression. + pub fn next(&mut self) -> Result>> { + if self.input.is_empty() { + return Ok(None); + } + match Operation::parse(&mut self.input, self.encoding) { + Ok(op) => Ok(Some(op)), + Err(e) => { + self.input.empty(); + Err(e) + } + } + } + + /// Return the current byte offset of the iterator. + pub fn offset_from(&self, expression: &Expression) -> R::Offset { + self.input.offset_from(&expression.0) + } } /// A DWARF expression evaluator. diff --git a/src/write/cfi.rs b/src/write/cfi.rs index a5e00bd78..c58eb1b1d 100644 --- a/src/write/cfi.rs +++ b/src/write/cfi.rs @@ -251,7 +251,7 @@ impl CommonInformationEntry { } for instruction in &self.instructions { - instruction.write(w, self)?; + instruction.write(w, encoding, self)?; } write_nop( @@ -357,7 +357,7 @@ impl FrameDescriptionEntry { for (offset, instruction) in &self.instructions { write_advance_loc(w, cie.code_alignment_factor, prev_offset, *offset)?; prev_offset = *offset; - instruction.write(w, cie)?; + instruction.write(w, encoding, cie)?; } write_nop( @@ -413,7 +413,12 @@ pub enum CallFrameInstruction { } impl CallFrameInstruction { - fn write(&self, w: &mut W, cie: &CommonInformationEntry) -> Result<()> { + fn write( + &self, + w: &mut W, + encoding: Encoding, + cie: &CommonInformationEntry, + ) -> Result<()> { match *self { CallFrameInstruction::Cfa(register, offset) => { if offset < 0 { @@ -445,8 +450,8 @@ impl CallFrameInstruction { } CallFrameInstruction::CfaExpression(ref expression) => { w.write_u8(constants::DW_CFA_def_cfa_expression.0)?; - w.write_uleb128(expression.0.len() as u64)?; - w.write(&expression.0)?; + w.write_uleb128(expression.size(encoding, None) as u64)?; + expression.write(w, None, encoding, None)?; } CallFrameInstruction::Restore(register) => { if register.0 < 0x40 { @@ -499,14 +504,14 @@ impl CallFrameInstruction { CallFrameInstruction::Expression(register, ref expression) => { w.write_u8(constants::DW_CFA_expression.0)?; w.write_uleb128(register.0.into())?; - w.write_uleb128(expression.0.len() as u64)?; - w.write(&expression.0)?; + w.write_uleb128(expression.size(encoding, None) as u64)?; + expression.write(w, None, encoding, None)?; } CallFrameInstruction::ValExpression(register, ref expression) => { w.write_u8(constants::DW_CFA_val_expression.0)?; w.write_uleb128(register.0.into())?; - w.write_uleb128(expression.0.len() as u64)?; - w.write(&expression.0)?; + w.write_uleb128(expression.size(encoding, None) as u64)?; + expression.write(w, None, encoding, None)?; } CallFrameInstruction::RememberState => { w.write_u8(constants::DW_CFA_remember_state.0)?; @@ -675,9 +680,12 @@ pub(crate) mod convert { let mut offset = 0; let mut from_instructions = from_cie.instructions(frame, bases); while let Some(from_instruction) = from_instructions.next()? { - if let Some(instruction) = - CallFrameInstruction::from(from_instruction, from_cie, &mut offset)? - { + if let Some(instruction) = CallFrameInstruction::from( + from_instruction, + from_cie, + convert_address, + &mut offset, + )? { cie.instructions.push(instruction); } } @@ -716,9 +724,12 @@ pub(crate) mod convert { let mut offset = 0; let mut from_instructions = from_fde.instructions(frame, bases); while let Some(from_instruction) = from_instructions.next()? { - if let Some(instruction) = - CallFrameInstruction::from(from_instruction, from_cie, &mut offset)? - { + if let Some(instruction) = CallFrameInstruction::from( + from_instruction, + from_cie, + convert_address, + &mut offset, + )? { fde.instructions.push((offset, instruction)); } } @@ -731,8 +742,11 @@ pub(crate) mod convert { fn from>( from_instruction: read::CallFrameInstruction, from_cie: &read::CommonInformationEntry, + convert_address: &dyn Fn(u64) -> Option
, offset: &mut u32, ) -> ConvertResult> { + let convert_expression = + |x| Expression::from(x, from_cie.encoding(), None, None, None, convert_address); // TODO: validate integer type conversions Ok(Some(match from_instruction { read::CallFrameInstruction::SetLoc { .. } => { @@ -764,8 +778,7 @@ pub(crate) mod convert { CallFrameInstruction::CfaOffset(offset as i32) } read::CallFrameInstruction::DefCfaExpression { expression } => { - let expression = Expression(expression.0.to_slice()?.into()); - CallFrameInstruction::CfaExpression(expression) + CallFrameInstruction::CfaExpression(convert_expression(expression)?) } read::CallFrameInstruction::Undefined { register } => { CallFrameInstruction::Undefined(register) @@ -808,17 +821,11 @@ pub(crate) mod convert { read::CallFrameInstruction::Expression { register, expression, - } => { - let expression = Expression(expression.0.to_slice()?.into()); - CallFrameInstruction::Expression(register, expression) - } + } => CallFrameInstruction::Expression(register, convert_expression(expression)?), read::CallFrameInstruction::ValExpression { register, expression, - } => { - let expression = Expression(expression.0.to_slice()?.into()); - CallFrameInstruction::ValExpression(register, expression) - } + } => CallFrameInstruction::ValExpression(register, convert_expression(expression)?), read::CallFrameInstruction::Restore { register } => { CallFrameInstruction::Restore(register) } @@ -922,6 +929,9 @@ mod tests { #[test] fn test_frame_instruction() { + let mut expression = Expression::new(); + expression.op_constu(0); + let cie_instructions = [ CallFrameInstruction::Cfa(X86_64::RSP, 8), CallFrameInstruction::Offset(X86_64::RA, -8), @@ -934,10 +944,7 @@ mod tests { (4, CallFrameInstruction::CfaOffset(8)), (4, CallFrameInstruction::CfaOffset(0)), (4, CallFrameInstruction::CfaOffset(-8)), - ( - 6, - CallFrameInstruction::CfaExpression(Expression(vec![1, 2, 3])), - ), + (6, CallFrameInstruction::CfaExpression(expression.clone())), (8, CallFrameInstruction::Restore(Register(1))), (8, CallFrameInstruction::Restore(Register(101))), (10, CallFrameInstruction::Undefined(Register(2))), @@ -949,11 +956,11 @@ mod tests { (18, CallFrameInstruction::Register(Register(6), Register(7))), ( 20, - CallFrameInstruction::Expression(Register(8), Expression(vec![2, 3, 4])), + CallFrameInstruction::Expression(Register(8), expression.clone()), ), ( 22, - CallFrameInstruction::ValExpression(Register(9), Expression(vec![3, 4, 5])), + CallFrameInstruction::ValExpression(Register(9), expression.clone()), ), (24 + 0x80, CallFrameInstruction::RememberState), (26 + 0x280, CallFrameInstruction::RestoreState), diff --git a/src/write/dwarf.rs b/src/write/dwarf.rs index bd7b3dfec..ea507126a 100644 --- a/src/write/dwarf.rs +++ b/src/write/dwarf.rs @@ -87,17 +87,17 @@ impl DwarfUnit { let abbrev_offset = sections.debug_abbrev.offset(); let mut abbrevs = AbbreviationTable::default(); - let mut debug_info_refs = Vec::new(); self.unit.write( sections, abbrev_offset, &mut abbrevs, &line_strings, &strings, - &mut debug_info_refs, )?; // None should exist because we didn't give out any UnitId. - assert!(debug_info_refs.is_empty()); + assert!(sections.debug_info_refs.is_empty()); + assert!(sections.debug_loc_refs.is_empty()); + assert!(sections.debug_loclists_refs.is_empty()); abbrevs.write(&mut sections.debug_abbrev)?; Ok(()) diff --git a/src/write/loc.rs b/src/write/loc.rs index 397bd82b6..2e742f76b 100644 --- a/src/write/loc.rs +++ b/src/write/loc.rs @@ -3,7 +3,10 @@ use indexmap::IndexSet; use std::ops::{Deref, DerefMut}; use crate::common::{Encoding, LocationListsOffset, SectionId}; -use crate::write::{Address, BaseId, Error, Expression, Result, Section, Sections, Writer}; +use crate::write::{ + Address, BaseId, DebugInfoReference, Error, Expression, Result, Section, Sections, UnitOffsets, + Writer, +}; define_section!( DebugLoc, @@ -45,14 +48,25 @@ impl LocationListTable { &self, sections: &mut Sections, encoding: Encoding, + unit_offsets: Option<&UnitOffsets>, ) -> Result { if self.locations.is_empty() { return Ok(LocationListOffsets::none()); } match encoding.version { - 2..=4 => self.write_loc(&mut sections.debug_loc, encoding.address_size), - 5 => self.write_loclists(&mut sections.debug_loclists, encoding), + 2..=4 => self.write_loc( + &mut sections.debug_loc, + &mut sections.debug_loc_refs, + encoding, + unit_offsets, + ), + 5 => self.write_loclists( + &mut sections.debug_loclists, + &mut sections.debug_loclists_refs, + encoding, + unit_offsets, + ), _ => Err(Error::UnsupportedVersion(encoding.version)), } } @@ -61,8 +75,11 @@ impl LocationListTable { fn write_loc( &self, w: &mut DebugLoc, - address_size: u8, + refs: &mut Vec, + encoding: Encoding, + unit_offsets: Option<&UnitOffsets>, ) -> Result { + let address_size = encoding.address_size; let mut offsets = Vec::new(); for loc_list in self.locations.iter() { offsets.push(w.offset()); @@ -86,8 +103,7 @@ impl LocationListTable { } w.write_udata(begin, address_size)?; w.write_udata(end, address_size)?; - w.write_u16(data.0.len() as u16)?; - w.write(&data.0)?; + write_expression(&mut w.0, refs, encoding, unit_offsets, data)?; } Location::StartEnd { begin, @@ -99,8 +115,7 @@ impl LocationListTable { } w.write_address(begin, address_size)?; w.write_address(end, address_size)?; - w.write_u16(data.0.len() as u16)?; - w.write(&data.0)?; + write_expression(&mut w.0, refs, encoding, unit_offsets, data)?; } Location::StartLength { begin, @@ -119,8 +134,7 @@ impl LocationListTable { } w.write_address(begin, address_size)?; w.write_address(end, address_size)?; - w.write_u16(data.0.len() as u16)?; - w.write(&data.0)?; + write_expression(&mut w.0, refs, encoding, unit_offsets, data)?; } Location::DefaultLocation { .. } => { return Err(Error::InvalidRange); @@ -140,7 +154,9 @@ impl LocationListTable { fn write_loclists( &self, w: &mut DebugLocLists, + refs: &mut Vec, encoding: Encoding, + unit_offsets: Option<&UnitOffsets>, ) -> Result { let mut offsets = Vec::new(); @@ -173,8 +189,7 @@ impl LocationListTable { w.write_u8(crate::constants::DW_LLE_offset_pair.0)?; w.write_uleb128(begin)?; w.write_uleb128(end)?; - w.write_uleb128(data.0.len() as u64)?; - w.write(&data.0)?; + write_expression(&mut w.0, refs, encoding, unit_offsets, data)?; } Location::StartEnd { begin, @@ -184,8 +199,7 @@ impl LocationListTable { w.write_u8(crate::constants::DW_LLE_start_end.0)?; w.write_address(begin, encoding.address_size)?; w.write_address(end, encoding.address_size)?; - w.write_uleb128(data.0.len() as u64)?; - w.write(&data.0)?; + write_expression(&mut w.0, refs, encoding, unit_offsets, data)?; } Location::StartLength { begin, @@ -195,13 +209,11 @@ impl LocationListTable { w.write_u8(crate::constants::DW_LLE_start_length.0)?; w.write_address(begin, encoding.address_size)?; w.write_uleb128(length)?; - w.write_uleb128(data.0.len() as u64)?; - w.write(&data.0)?; + write_expression(&mut w.0, refs, encoding, unit_offsets, data)?; } Location::DefaultLocation { ref data } => { w.write_u8(crate::constants::DW_LLE_default_location.0)?; - w.write_uleb128(data.0.len() as u64)?; - w.write(&data.0)?; + write_expression(&mut w.0, refs, encoding, unit_offsets, data)?; } } } @@ -265,6 +277,23 @@ pub enum Location { }, } +fn write_expression( + w: &mut W, + refs: &mut Vec, + encoding: Encoding, + unit_offsets: Option<&UnitOffsets>, + val: &Expression, +) -> Result<()> { + let size = val.size(encoding, unit_offsets) as u64; + if encoding.version <= 4 { + w.write_udata(size, 2)?; + } else { + w.write_uleb128(size)?; + } + val.write(w, Some(refs), encoding, unit_offsets)?; + Ok(()) +} + #[cfg(feature = "read")] mod convert { use super::*; @@ -281,6 +310,16 @@ mod convert { let mut have_base_address = context.base_address != Address::Constant(0); let convert_address = |x| (context.convert_address)(x).ok_or(ConvertError::InvalidAddress); + let convert_expression = |x| { + Expression::from( + x, + context.unit.encoding(), + Some(context.dwarf), + Some(context.unit), + Some(context.entry_ids), + context.convert_address, + ) + }; let mut loc_list = Vec::new(); while let Some(from_loc) = from.next()? { let loc = match from_loc { @@ -288,7 +327,7 @@ mod convert { // These were parsed as addresses, even if they are offsets. let begin = convert_address(begin)?; let end = convert_address(end)?; - let data = Expression(data.0.to_slice()?.into()); + let data = convert_expression(data)?; match (begin, end) { (Address::Constant(begin_offset), Address::Constant(end_offset)) => { if have_base_address { @@ -324,7 +363,7 @@ mod convert { read::RawLocListEntry::StartxEndx { begin, end, data } => { let begin = convert_address(context.dwarf.address(context.unit, begin)?)?; let end = convert_address(context.dwarf.address(context.unit, end)?)?; - let data = Expression(data.0.to_slice()?.into()); + let data = convert_expression(data)?; Location::StartEnd { begin, end, data } } read::RawLocListEntry::StartxLength { @@ -333,7 +372,7 @@ mod convert { data, } => { let begin = convert_address(context.dwarf.address(context.unit, begin)?)?; - let data = Expression(data.0.to_slice()?.into()); + let data = convert_expression(data)?; Location::StartLength { begin, length, @@ -341,13 +380,13 @@ mod convert { } } read::RawLocListEntry::OffsetPair { begin, end, data } => { - let data = Expression(data.0.to_slice()?.into()); + let data = convert_expression(data)?; Location::OffsetPair { begin, end, data } } read::RawLocListEntry::StartEnd { begin, end, data } => { let begin = convert_address(begin)?; let end = convert_address(end)?; - let data = Expression(data.0.to_slice()?.into()); + let data = convert_expression(data)?; Location::StartEnd { begin, end, data } } read::RawLocListEntry::StartLength { @@ -356,7 +395,7 @@ mod convert { data, } => { let begin = convert_address(begin)?; - let data = Expression(data.0.to_slice()?.into()); + let data = convert_expression(data)?; Location::StartLength { begin, length, @@ -364,7 +403,7 @@ mod convert { } } read::RawLocListEntry::DefaultLocation { data } => { - let data = Expression(data.0.to_slice()?.into()); + let data = convert_expression(data)?; Location::DefaultLocation { data } } }; @@ -402,6 +441,8 @@ mod tests { fn test_loc_list() { let mut line_strings = LineStringTable::default(); let mut strings = StringTable::default(); + let mut expression = Expression::new(); + expression.op_constu(0); for &version in &[2, 3, 4, 5] { for &address_size in &[4, 8] { @@ -416,12 +457,12 @@ mod tests { Location::StartLength { begin: Address::Constant(6666), length: 7777, - data: Expression(vec![1, 0, 0, 0]), + data: expression.clone(), }, Location::StartEnd { begin: Address::Constant(4444), end: Address::Constant(5555), - data: Expression(vec![2, 0, 0, 0]), + data: expression.clone(), }, Location::BaseAddress { address: Address::Constant(1111), @@ -429,12 +470,12 @@ mod tests { Location::OffsetPair { begin: 2222, end: 3333, - data: Expression(vec![3, 0, 0, 0]), + data: expression.clone(), }, ]); if version >= 5 { loc_list.0.push(Location::DefaultLocation { - data: Expression(vec![4, 0, 0, 0]), + data: expression.clone(), }); } @@ -442,7 +483,9 @@ mod tests { let loc_list_id = locations.add(loc_list.clone()); let mut sections = Sections::new(EndianVec::new(LittleEndian)); - let loc_list_offsets = locations.write(&mut sections, encoding).unwrap(); + let loc_list_offsets = locations.write(&mut sections, encoding, None).unwrap(); + assert!(sections.debug_loc_refs.is_empty()); + assert!(sections.debug_loclists_refs.is_empty()); let read_debug_loc = read::DebugLoc::new(sections.debug_loc.slice(), LittleEndian); @@ -493,7 +536,7 @@ mod tests { loc_list.0[0] = Location::StartEnd { begin: Address::Constant(6666), end: Address::Constant(6666 + 7777), - data: Expression(vec![1, 0, 0, 0]), + data: expression.clone(), }; } assert_eq!(loc_list, convert_loc_list); diff --git a/src/write/mod.rs b/src/write/mod.rs index 8c1d3ab3a..bf36de48c 100644 --- a/src/write/mod.rs +++ b/src/write/mod.rs @@ -144,12 +144,15 @@ pub use self::dwarf::*; mod line; pub use self::line::*; -mod range; -pub use self::range::*; - mod loc; pub use self::loc::*; +mod op; +pub use self::op::*; + +mod range; +pub use self::range::*; + mod str; pub use self::str::*; @@ -191,6 +194,10 @@ pub enum Error { InvalidFrameDataOffset(i32), /// Unsupported eh_frame pointer encoding. UnsupportedPointerEncoding(constants::DwEhPe), + /// Unsupported reference in CFI expression. + UnsupportedCfiExpressionReference, + /// Unsupported forward reference in expression. + UnsupportedExpressionForwardReference, } impl fmt::Display for Error { @@ -238,6 +245,12 @@ impl fmt::Display for Error { Error::UnsupportedPointerEncoding(eh_pe) => { write!(f, "Unsupported eh_frame pointer encoding ({}).", eh_pe) } + Error::UnsupportedCfiExpressionReference => { + write!(f, "Unsupported reference in CFI expression.") + } + Error::UnsupportedExpressionForwardReference => { + write!(f, "Unsupported forward reference in expression.") + } } } } @@ -339,6 +352,10 @@ mod convert { UnsupportedCfiInstruction, /// Writing indirect pointers is not implemented yet. UnsupportedIndirectAddress, + /// Writing this expression operation is not implemented yet. + UnsupportedOperation, + /// Operation branch target is invalid. + InvalidBranchTarget, } impl fmt::Display for ConvertError { @@ -381,6 +398,11 @@ mod convert { UnsupportedIndirectAddress => { write!(f, "Writing indirect pointers is not implemented yet.") } + UnsupportedOperation => write!( + f, + "Writing this expression operation is not implemented yet." + ), + InvalidBranchTarget => write!(f, "Operation branch target is invalid."), } } } diff --git a/src/write/op.rs b/src/write/op.rs new file mode 100644 index 000000000..3e65f590b --- /dev/null +++ b/src/write/op.rs @@ -0,0 +1,1558 @@ +use alloc::boxed::Box; +use alloc::vec::Vec; + +use crate::common::{Encoding, Register}; +use crate::constants::{self, DwOp}; +use crate::leb128::write::{sleb128_size, uleb128_size}; +use crate::write::{ + Address, DebugInfoReference, Error, Reference, Result, UnitEntryId, UnitOffsets, Writer, +}; + +/// The bytecode for a DWARF expression or location description. +#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)] +pub struct Expression { + operations: Vec, +} + +impl Expression { + /// Create an empty expression. + #[inline] + pub fn new() -> Self { + Self::default() + } + + /// Create an expression from raw bytecode. + /// + /// This does not support operations that require references, such as `DW_OP_addr`. + #[inline] + pub fn raw(bytecode: Vec) -> Self { + Expression { + operations: vec![Operation::Raw(bytecode)], + } + } + + /// Add an operation to the expression. + /// + /// This should only be used for operations that have no explicit operands. + pub fn op(&mut self, opcode: DwOp) { + self.operations.push(Operation::Simple(opcode)); + } + + /// Add a `DW_OP_addr` operation to the expression. + pub fn op_addr(&mut self, address: Address) { + self.operations.push(Operation::Address(address)); + } + + /// Add a `DW_OP_constu` operation to the expression. + /// + /// This may be emitted as a smaller equivalent operation. + pub fn op_constu(&mut self, value: u64) { + self.operations.push(Operation::UnsignedConstant(value)); + } + + /// Add a `DW_OP_consts` operation to the expression. + /// + /// This may be emitted as a smaller equivalent operation. + pub fn op_consts(&mut self, value: i64) { + self.operations.push(Operation::SignedConstant(value)); + } + + /// Add a `DW_OP_const_type` operation to the expression. + pub fn op_const_type(&mut self, base: UnitEntryId, value: Box<[u8]>) { + self.operations.push(Operation::ConstantType(base, value)); + } + + /// Add a `DW_OP_fbreg` operation to the expression. + pub fn op_fbreg(&mut self, offset: i64) { + self.operations.push(Operation::FrameOffset(offset)); + } + + /// Add a `DW_OP_bregx` operation to the expression. + /// + /// This may be emitted as a smaller equivalent operation. + pub fn op_breg(&mut self, register: Register, offset: i64) { + self.operations + .push(Operation::RegisterOffset(register, offset)); + } + + /// Add a `DW_OP_regval_type` operation to the expression. + /// + /// This may be emitted as a smaller equivalent operation. + pub fn op_regval_type(&mut self, register: Register, base: UnitEntryId) { + self.operations + .push(Operation::RegisterType(register, base)); + } + + /// Add a `DW_OP_pick` operation to the expression. + /// + /// This may be emitted as a `DW_OP_dup` or `DW_OP_over` operation. + pub fn op_pick(&mut self, index: u8) { + self.operations.push(Operation::Pick(index)); + } + + /// Add a `DW_OP_deref` operation to the expression. + pub fn op_deref(&mut self) { + self.operations.push(Operation::Deref { space: false }); + } + + /// Add a `DW_OP_xderef` operation to the expression. + pub fn op_xderef(&mut self) { + self.operations.push(Operation::Deref { space: true }); + } + + /// Add a `DW_OP_deref_size` operation to the expression. + pub fn op_deref_size(&mut self, size: u8) { + self.operations + .push(Operation::DerefSize { size, space: false }); + } + + /// Add a `DW_OP_xderef_size` operation to the expression. + pub fn op_xderef_size(&mut self, size: u8) { + self.operations + .push(Operation::DerefSize { size, space: true }); + } + + /// Add a `DW_OP_deref_type` operation to the expression. + pub fn op_deref_type(&mut self, size: u8, base: UnitEntryId) { + self.operations.push(Operation::DerefType { + size, + base, + space: false, + }); + } + + /// Add a `DW_OP_xderef_type` operation to the expression. + pub fn op_xderef_type(&mut self, size: u8, base: UnitEntryId) { + self.operations.push(Operation::DerefType { + size, + base, + space: true, + }); + } + + /// Add a `DW_OP_plus_uconst` operation to the expression. + pub fn op_plus_uconst(&mut self, value: u64) { + self.operations.push(Operation::PlusConstant(value)); + } + + /// Add a `DW_OP_skip` operation to the expression. + /// + /// Returns the index of the operation. The caller must call `set_target` with + /// this index to set the target of the branch. + pub fn op_skip(&mut self) -> usize { + let index = self.next_index(); + self.operations.push(Operation::Skip(!0)); + index + } + + /// Add a `DW_OP_bra` operation to the expression. + /// + /// Returns the index of the operation. The caller must call `set_target` with + /// this index to set the target of the branch. + pub fn op_bra(&mut self) -> usize { + let index = self.next_index(); + self.operations.push(Operation::Branch(!0)); + index + } + + /// Return the index that will be assigned to the next operation. + /// + /// This can be passed to `set_target`. + #[inline] + pub fn next_index(&self) -> usize { + self.operations.len() + } + + /// Set the target of a `DW_OP_skip` or `DW_OP_bra` operation . + pub fn set_target(&mut self, operation: usize, new_target: usize) { + debug_assert!(new_target <= self.next_index()); + debug_assert_ne!(operation, new_target); + match self.operations[operation] { + Operation::Skip(ref mut target) | Operation::Branch(ref mut target) => { + *target = new_target; + } + _ => unimplemented!(), + } + } + + /// Add a `DW_OP_call4` operation to the expression. + pub fn op_call(&mut self, entry: UnitEntryId) { + self.operations.push(Operation::Call(entry)); + } + + /// Add a `DW_OP_call_ref` operation to the expression. + pub fn op_call_ref(&mut self, entry: Reference) { + self.operations.push(Operation::CallRef(entry)); + } + + /// Add a `DW_OP_convert` operation to the expression. + /// + /// `base` is the DIE of the base type, or `None` for the generic type. + pub fn op_convert(&mut self, base: Option) { + self.operations.push(Operation::Convert(base)); + } + + /// Add a `DW_OP_reinterpret` operation to the expression. + /// + /// `base` is the DIE of the base type, or `None` for the generic type. + pub fn op_reinterpret(&mut self, base: Option) { + self.operations.push(Operation::Reinterpret(base)); + } + + /// Add a `DW_OP_entry_value operation to the expression. + pub fn op_entry_value(&mut self, expression: Expression) { + self.operations.push(Operation::EntryValue(expression)); + } + + /// Add a `DW_OP_regx` operation to the expression. + /// + /// This may be emitted as a smaller equivalent operation. + pub fn op_reg(&mut self, register: Register) { + self.operations.push(Operation::Register(register)); + } + + /// Add a `DW_OP_implicit_value` operation to the expression. + pub fn op_implicit_value(&mut self, data: Box<[u8]>) { + self.operations.push(Operation::ImplicitValue(data)); + } + + /// Add a `DW_OP_implicit_pointer` operation to the expression. + pub fn op_implicit_pointer(&mut self, entry: Reference, byte_offset: i64) { + self.operations + .push(Operation::ImplicitPointer { entry, byte_offset }); + } + + /// Add a `DW_OP_piece` operation to the expression. + pub fn op_piece(&mut self, size_in_bytes: u64) { + self.operations.push(Operation::Piece { size_in_bytes }); + } + + /// Add a `DW_OP_bit_piece` operation to the expression. + pub fn op_bit_piece(&mut self, size_in_bits: u64, bit_offset: u64) { + self.operations.push(Operation::BitPiece { + size_in_bits, + bit_offset, + }); + } + + /// Add a `DW_OP_GNU_parameter_ref` operation to the expression. + pub fn op_parameter_ref(&mut self, entry: UnitEntryId) { + self.operations.push(Operation::ParameterRef(entry)); + } + + pub(crate) fn size(&self, encoding: Encoding, unit_offsets: Option<&UnitOffsets>) -> usize { + let mut size = 0; + for operation in &self.operations { + size += operation.size(encoding, unit_offsets); + } + size + } + + pub(crate) fn write( + &self, + w: &mut W, + mut refs: Option<&mut Vec>, + encoding: Encoding, + unit_offsets: Option<&UnitOffsets>, + ) -> Result<()> { + // TODO: only calculate offsets if needed? + let mut offsets = Vec::with_capacity(self.operations.len()); + let mut offset = w.len(); + for operation in &self.operations { + offsets.push(offset); + offset += operation.size(encoding, unit_offsets); + } + offsets.push(offset); + for (operation, offset) in self.operations.iter().zip(offsets.iter().copied()) { + let refs = match refs { + Some(ref mut refs) => Some(&mut **refs), + None => None, + }; + debug_assert_eq!(w.len(), offset); + operation.write(w, refs, encoding, unit_offsets, &offsets)?; + } + Ok(()) + } +} + +/// A single DWARF operation. +// +// This type is intentionally not public so that we can change the +// representation of expressions as needed. +// +// Variants are listed in the order they appear in Section 2.5. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +enum Operation { + /// Raw bytecode. + /// + /// Does not support references. + Raw(Vec), + /// An operation that has no explicit operands. + /// + /// Represents: + /// - `DW_OP_drop`, `DW_OP_swap`, `DW_OP_rot` + /// - `DW_OP_push_object_address`, `DW_OP_form_tls_address`, `DW_OP_call_frame_cfa` + /// - `DW_OP_abs`, `DW_OP_and`, `DW_OP_div`, `DW_OP_minus`, `DW_OP_mod`, `DW_OP_mul`, + /// `DW_OP_neg`, `DW_OP_not`, `DW_OP_or`, `DW_OP_plus`, `DW_OP_shl`, `DW_OP_shr`, + /// `DW_OP_shra`, `DW_OP_xor` + /// - `DW_OP_le`, `DW_OP_ge`, `DW_OP_eq`, `DW_OP_lt`, `DW_OP_gt`, `DW_OP_ne` + /// - `DW_OP_nop` + /// - `DW_OP_stack_value` + Simple(DwOp), + /// Relocate the address if needed, and push it on the stack. + /// + /// Represents `DW_OP_addr`. + Address(Address), + /// Push an unsigned constant value on the stack. + /// + /// Represents `DW_OP_constu`. + UnsignedConstant(u64), + /// Push a signed constant value on the stack. + /// + /// Represents `DW_OP_consts`. + SignedConstant(i64), + /* TODO: requires .debug_addr write support + /// Read the address at the given index in `.debug_addr, relocate the address if needed, + /// and push it on the stack. + /// + /// Represents `DW_OP_addrx`. + AddressIndex(DebugAddrIndex), + /// Read the address at the given index in `.debug_addr, and push it on the stack. + /// Do not relocate the address. + /// + /// Represents `DW_OP_constx`. + ConstantIndex(DebugAddrIndex), + */ + /// Interpret the value bytes as a constant of a given type, and push it on the stack. + /// + /// Represents `DW_OP_const_type`. + ConstantType(UnitEntryId, Box<[u8]>), + /// Compute the frame base (using `DW_AT_frame_base`), add the + /// given offset, and then push the resulting sum on the stack. + /// + /// Represents `DW_OP_fbreg`. + FrameOffset(i64), + /// Find the contents of the given register, add the offset, and then + /// push the resulting sum on the stack. + /// + /// Represents `DW_OP_bregx`. + RegisterOffset(Register, i64), + /// Interpret the contents of the given register as a value of the given type, + /// and push it on the stack. + /// + /// Represents `DW_OP_regval_type`. + RegisterType(Register, UnitEntryId), + /// Copy the item at a stack index and push it on top of the stack. + /// + /// Represents `DW_OP_pick`, `DW_OP_dup`, and `DW_OP_over`. + Pick(u8), + /// Pop the topmost value of the stack, dereference it, and push the + /// resulting value. + /// + /// Represents `DW_OP_deref` and `DW_OP_xderef`. + Deref { + /// True if the dereference operation takes an address space + /// argument from the stack; false otherwise. + space: bool, + }, + /// Pop the topmost value of the stack, dereference it to obtain a value + /// of the given size, and push the resulting value. + /// + /// Represents `DW_OP_deref_size` and `DW_OP_xderef_size`. + DerefSize { + /// True if the dereference operation takes an address space + /// argument from the stack; false otherwise. + space: bool, + /// The size of the data to dereference. + size: u8, + }, + /// Pop the topmost value of the stack, dereference it to obtain a value + /// of the given type, and push the resulting value. + /// + /// Represents `DW_OP_deref_type` and `DW_OP_xderef_type`. + DerefType { + /// True if the dereference operation takes an address space + /// argument from the stack; false otherwise. + space: bool, + /// The size of the data to dereference. + size: u8, + /// The DIE of the base type, or `None` for the generic type. + base: UnitEntryId, + }, + /// Add an unsigned constant to the topmost value on the stack. + /// + /// Represents `DW_OP_plus_uconst`. + PlusConstant(u64), + /// Unconditional branch to the target location. + /// + /// The value is the index within the expression of the operation to branch to. + /// This will be converted to a relative offset when writing. + /// + /// Represents `DW_OP_skip`. + Skip(usize), + /// Branch to the target location if the top of stack is nonzero. + /// + /// The value is the index within the expression of the operation to branch to. + /// This will be converted to a relative offset when writing. + /// + /// Represents `DW_OP_bra`. + Branch(usize), + /// Evaluate a DWARF expression as a subroutine. + /// + /// The expression comes from the `DW_AT_location` attribute of the indicated DIE. + /// + /// Represents `DW_OP_call4`. + Call(UnitEntryId), + /// Evaluate an external DWARF expression as a subroutine. + /// + /// The expression comes from the `DW_AT_location` attribute of the indicated DIE, + /// which may be in another compilation unit or shared object. + /// + /// Represents `DW_OP_call_ref`. + CallRef(Reference), + /// Pop the top stack entry, convert it to a different type, and push it on the stack. + /// + /// Represents `DW_OP_convert`. + Convert(Option), + /// Pop the top stack entry, reinterpret the bits in its value as a different type, + /// and push it on the stack. + /// + /// Represents `DW_OP_reinterpret`. + Reinterpret(Option), + /// Evaluate an expression at the entry to the current subprogram, and push it on the stack. + /// + /// Represents `DW_OP_entry_value`. + EntryValue(Expression), + // FIXME: EntryRegister + /// Indicate that this piece's location is in the given register. + /// + /// Completes the piece or expression. + /// + /// Represents `DW_OP_regx`. + Register(Register), + /// The object has no location, but has a known constant value. + /// + /// Completes the piece or expression. + /// + /// Represents `DW_OP_implicit_value`. + ImplicitValue(Box<[u8]>), + /// The object is a pointer to a value which has no actual location, such as + /// an implicit value or a stack value. + /// + /// Completes the piece or expression. + /// + /// Represents `DW_OP_implicit_pointer`. + ImplicitPointer { + /// The DIE of the value that this is an implicit pointer into. + entry: Reference, + /// The byte offset into the value that the implicit pointer points to. + byte_offset: i64, + }, + /// Terminate a piece. + /// + /// Represents `DW_OP_piece`. + Piece { + /// The size of this piece in bytes. + size_in_bytes: u64, + }, + /// Terminate a piece with a size in bits. + /// + /// Represents `DW_OP_bit_piece`. + BitPiece { + /// The size of this piece in bits. + size_in_bits: u64, + /// The bit offset of this piece. + bit_offset: u64, + }, + /// This represents a parameter that was optimized out. + /// + /// The entry is the definition of the parameter, and is matched to + /// the `DW_TAG_GNU_call_site_parameter` in the caller that also + /// points to the same definition of the parameter. + /// + /// Represents `DW_OP_GNU_parameter_ref`. + ParameterRef(UnitEntryId), +} + +impl Operation { + fn size(&self, encoding: Encoding, unit_offsets: Option<&UnitOffsets>) -> usize { + let base_size = |base| { + // Errors are handled during writes. + match unit_offsets { + Some(offsets) => uleb128_size(offsets.unit_offset(base)), + None => 0, + } + }; + 1 + match *self { + Operation::Raw(ref bytecode) => return bytecode.len(), + Operation::Simple(_) => 0, + Operation::Address(_) => encoding.address_size as usize, + Operation::UnsignedConstant(value) => { + if value < 32 { + 0 + } else { + uleb128_size(value) + } + } + Operation::SignedConstant(value) => sleb128_size(value), + Operation::ConstantType(base, ref value) => base_size(base) + 1 + value.len(), + Operation::FrameOffset(offset) => sleb128_size(offset), + Operation::RegisterOffset(register, offset) => { + if register.0 < 32 { + sleb128_size(offset) + } else { + uleb128_size(register.0.into()) + sleb128_size(offset) + } + } + Operation::RegisterType(register, base) => { + uleb128_size(register.0.into()) + base_size(base) + } + Operation::Pick(index) => { + if index > 1 { + 1 + } else { + 0 + } + } + Operation::Deref { .. } => 0, + Operation::DerefSize { .. } => 1, + Operation::DerefType { base, .. } => 1 + base_size(base), + Operation::PlusConstant(value) => uleb128_size(value), + Operation::Skip(_) => 2, + Operation::Branch(_) => 2, + Operation::Call(_) => 4, + Operation::CallRef(_) => encoding.format.word_size() as usize, + Operation::Convert(base) => match base { + Some(base) => base_size(base), + None => 1, + }, + Operation::Reinterpret(base) => match base { + Some(base) => base_size(base), + None => 1, + }, + Operation::EntryValue(ref expression) => { + let length = expression.size(encoding, unit_offsets); + uleb128_size(length as u64) + length + } + Operation::Register(register) => { + if register.0 < 32 { + 0 + } else { + uleb128_size(register.0.into()) + } + } + Operation::ImplicitValue(ref data) => uleb128_size(data.len() as u64) + data.len(), + Operation::ImplicitPointer { byte_offset, .. } => { + encoding.format.word_size() as usize + sleb128_size(byte_offset) + } + Operation::Piece { size_in_bytes } => uleb128_size(size_in_bytes), + Operation::BitPiece { + size_in_bits, + bit_offset, + } => uleb128_size(size_in_bits) + uleb128_size(bit_offset), + Operation::ParameterRef(_) => 4, + } + } + + pub(crate) fn write( + &self, + w: &mut W, + refs: Option<&mut Vec>, + encoding: Encoding, + unit_offsets: Option<&UnitOffsets>, + offsets: &[usize], + ) -> Result<()> { + let entry_offset = |entry| match unit_offsets { + Some(offsets) => { + let offset = offsets.unit_offset(entry); + if offset == 0 { + Err(Error::UnsupportedExpressionForwardReference) + } else { + Ok(offset) + } + } + None => Err(Error::UnsupportedCfiExpressionReference), + }; + match *self { + Operation::Raw(ref bytecode) => w.write(bytecode)?, + Operation::Simple(opcode) => w.write_u8(opcode.0)?, + Operation::Address(address) => { + w.write_u8(constants::DW_OP_addr.0)?; + w.write_address(address, encoding.address_size)?; + } + Operation::UnsignedConstant(value) => { + if value < 32 { + w.write_u8(constants::DW_OP_lit0.0 + value as u8)?; + } else { + w.write_u8(constants::DW_OP_constu.0)?; + w.write_uleb128(value)?; + } + } + Operation::SignedConstant(value) => { + w.write_u8(constants::DW_OP_consts.0)?; + w.write_sleb128(value)?; + } + Operation::ConstantType(base, ref value) => { + if encoding.version >= 5 { + w.write_u8(constants::DW_OP_const_type.0)?; + } else { + w.write_u8(constants::DW_OP_GNU_const_type.0)?; + } + w.write_uleb128(entry_offset(base)?)?; + w.write_udata(value.len() as u64, 1)?; + w.write(&value)?; + } + Operation::FrameOffset(offset) => { + w.write_u8(constants::DW_OP_fbreg.0)?; + w.write_sleb128(offset)?; + } + Operation::RegisterOffset(register, offset) => { + if register.0 < 32 { + w.write_u8(constants::DW_OP_breg0.0 + register.0 as u8)?; + } else { + w.write_u8(constants::DW_OP_bregx.0)?; + w.write_uleb128(register.0.into())?; + } + w.write_sleb128(offset)?; + } + Operation::RegisterType(register, base) => { + if encoding.version >= 5 { + w.write_u8(constants::DW_OP_regval_type.0)?; + } else { + w.write_u8(constants::DW_OP_GNU_regval_type.0)?; + } + w.write_uleb128(register.0.into())?; + w.write_uleb128(entry_offset(base)?)?; + } + Operation::Pick(index) => match index { + 0 => w.write_u8(constants::DW_OP_dup.0)?, + 1 => w.write_u8(constants::DW_OP_over.0)?, + _ => { + w.write_u8(constants::DW_OP_pick.0)?; + w.write_u8(index)?; + } + }, + Operation::Deref { space } => { + if space { + w.write_u8(constants::DW_OP_xderef.0)?; + } else { + w.write_u8(constants::DW_OP_deref.0)?; + } + } + Operation::DerefSize { space, size } => { + if space { + w.write_u8(constants::DW_OP_xderef_size.0)?; + } else { + w.write_u8(constants::DW_OP_deref_size.0)?; + } + w.write_u8(size)?; + } + Operation::DerefType { space, size, base } => { + if space { + w.write_u8(constants::DW_OP_xderef_type.0)?; + } else { + if encoding.version >= 5 { + w.write_u8(constants::DW_OP_deref_type.0)?; + } else { + w.write_u8(constants::DW_OP_GNU_deref_type.0)?; + } + } + w.write_u8(size)?; + w.write_uleb128(entry_offset(base)?)?; + } + Operation::PlusConstant(value) => { + w.write_u8(constants::DW_OP_plus_uconst.0)?; + w.write_uleb128(value)?; + } + Operation::Skip(target) => { + w.write_u8(constants::DW_OP_skip.0)?; + let offset = offsets[target] as i64 - (w.len() as i64 + 2); + w.write_sdata(offset, 2)?; + } + Operation::Branch(target) => { + w.write_u8(constants::DW_OP_bra.0)?; + let offset = offsets[target] as i64 - (w.len() as i64 + 2); + w.write_sdata(offset, 2)?; + } + Operation::Call(entry) => { + w.write_u8(constants::DW_OP_call4.0)?; + // TODO: this probably won't work in practice, because we may + // only know the offsets of base type DIEs at this point. + w.write_udata(entry_offset(entry)?, 4)?; + } + Operation::CallRef(entry) => { + w.write_u8(constants::DW_OP_call_ref.0)?; + let size = encoding.format.word_size(); + match entry { + Reference::Symbol(symbol) => w.write_reference(symbol, size)?, + Reference::Entry(unit, entry) => { + let refs = refs.ok_or(Error::InvalidReference)?; + refs.push(DebugInfoReference { + offset: w.len(), + unit, + entry, + size, + }); + w.write_udata(0, size)?; + } + } + } + Operation::Convert(base) => { + if encoding.version >= 5 { + w.write_u8(constants::DW_OP_convert.0)?; + } else { + w.write_u8(constants::DW_OP_GNU_convert.0)?; + } + match base { + Some(base) => w.write_uleb128(entry_offset(base)?)?, + None => w.write_u8(0)?, + } + } + Operation::Reinterpret(base) => { + if encoding.version >= 5 { + w.write_u8(constants::DW_OP_reinterpret.0)?; + } else { + w.write_u8(constants::DW_OP_GNU_reinterpret.0)?; + } + match base { + Some(base) => w.write_uleb128(entry_offset(base)?)?, + None => w.write_u8(0)?, + } + } + Operation::EntryValue(ref expression) => { + if encoding.version >= 5 { + w.write_u8(constants::DW_OP_entry_value.0)?; + } else { + w.write_u8(constants::DW_OP_GNU_entry_value.0)?; + } + let length = expression.size(encoding, unit_offsets); + w.write_uleb128(length as u64)?; + expression.write(w, refs, encoding, unit_offsets)?; + } + Operation::Register(register) => { + if register.0 < 32 { + w.write_u8(constants::DW_OP_reg0.0 + register.0 as u8)?; + } else { + w.write_u8(constants::DW_OP_regx.0)?; + w.write_uleb128(register.0.into())?; + } + } + Operation::ImplicitValue(ref data) => { + w.write_u8(constants::DW_OP_implicit_value.0)?; + w.write_uleb128(data.len() as u64)?; + w.write(&data)?; + } + Operation::ImplicitPointer { entry, byte_offset } => { + if encoding.version >= 5 { + w.write_u8(constants::DW_OP_implicit_pointer.0)?; + } else { + w.write_u8(constants::DW_OP_GNU_implicit_pointer.0)?; + } + let size = encoding.format.word_size(); + match entry { + Reference::Symbol(symbol) => { + w.write_reference(symbol, size)?; + } + Reference::Entry(unit, entry) => { + let refs = refs.ok_or(Error::InvalidReference)?; + refs.push(DebugInfoReference { + offset: w.len(), + unit, + entry, + size, + }); + w.write_udata(0, size)?; + } + } + w.write_sleb128(byte_offset)?; + } + Operation::Piece { size_in_bytes } => { + w.write_u8(constants::DW_OP_piece.0)?; + w.write_uleb128(size_in_bytes)?; + } + Operation::BitPiece { + size_in_bits, + bit_offset, + } => { + w.write_u8(constants::DW_OP_bit_piece.0)?; + w.write_uleb128(size_in_bits)?; + w.write_uleb128(bit_offset)?; + } + Operation::ParameterRef(entry) => { + w.write_u8(constants::DW_OP_GNU_parameter_ref.0)?; + w.write_udata(entry_offset(entry)?, 4)?; + } + } + Ok(()) + } +} + +#[cfg(feature = "read")] +pub(crate) mod convert { + use super::*; + use crate::common::UnitSectionOffset; + use crate::read::{self, Reader}; + use crate::write::{ConvertError, ConvertResult, UnitEntryId, UnitId}; + use std::collections::HashMap; + + impl Expression { + /// Create an expression from the input expression. + pub fn from>( + from_expression: read::Expression, + encoding: Encoding, + dwarf: Option<&read::Dwarf>, + unit: Option<&read::Unit>, + entry_ids: Option<&HashMap>, + convert_address: &dyn Fn(u64) -> Option
, + ) -> ConvertResult { + let convert_unit_offset = |offset: read::UnitOffset| -> ConvertResult<_> { + let entry_ids = entry_ids.ok_or(ConvertError::UnsupportedOperation)?; + let unit = unit.ok_or(ConvertError::UnsupportedOperation)?; + let id = entry_ids + .get(&offset.to_unit_section_offset(unit)) + .ok_or(ConvertError::InvalidUnitRef)?; + Ok(id.1) + }; + let convert_debug_info_offset = |offset| -> ConvertResult<_> { + // TODO: support relocations + let entry_ids = entry_ids.ok_or(ConvertError::UnsupportedOperation)?; + let id = entry_ids + .get(&UnitSectionOffset::DebugInfoOffset(offset)) + .ok_or(ConvertError::InvalidDebugInfoRef)?; + Ok(Reference::Entry(id.0, id.1)) + }; + + // Calculate offsets for use in branch/skip operations. + let mut offsets = Vec::new(); + let mut offset = 0; + let mut from_operations = from_expression.clone().operations(encoding); + while let Some(_) = from_operations.next()? { + offsets.push(offset); + offset = from_operations.offset_from(&from_expression); + } + offsets.push(from_expression.0.len()); + + let mut from_operations = from_expression.clone().operations(encoding); + let mut operations = Vec::new(); + while let Some(from_operation) = from_operations.next()? { + let operation = match from_operation { + read::Operation::Deref { + base_type, + size, + space, + } => { + if base_type.0 != 0 { + let base = convert_unit_offset(base_type)?; + Operation::DerefType { space, size, base } + } else if size != encoding.address_size { + Operation::DerefSize { space, size } + } else { + Operation::Deref { space } + } + } + read::Operation::Drop => Operation::Simple(constants::DW_OP_drop), + read::Operation::Pick { index } => Operation::Pick(index), + read::Operation::Swap => Operation::Simple(constants::DW_OP_swap), + read::Operation::Rot => Operation::Simple(constants::DW_OP_rot), + read::Operation::Abs => Operation::Simple(constants::DW_OP_abs), + read::Operation::And => Operation::Simple(constants::DW_OP_and), + read::Operation::Div => Operation::Simple(constants::DW_OP_div), + read::Operation::Minus => Operation::Simple(constants::DW_OP_minus), + read::Operation::Mod => Operation::Simple(constants::DW_OP_mod), + read::Operation::Mul => Operation::Simple(constants::DW_OP_mul), + read::Operation::Neg => Operation::Simple(constants::DW_OP_neg), + read::Operation::Not => Operation::Simple(constants::DW_OP_not), + read::Operation::Or => Operation::Simple(constants::DW_OP_or), + read::Operation::Plus => Operation::Simple(constants::DW_OP_plus), + read::Operation::PlusConstant { value } => Operation::PlusConstant(value), + read::Operation::Shl => Operation::Simple(constants::DW_OP_shl), + read::Operation::Shr => Operation::Simple(constants::DW_OP_shr), + read::Operation::Shra => Operation::Simple(constants::DW_OP_shra), + read::Operation::Xor => Operation::Simple(constants::DW_OP_xor), + read::Operation::Eq => Operation::Simple(constants::DW_OP_eq), + read::Operation::Ge => Operation::Simple(constants::DW_OP_ge), + read::Operation::Gt => Operation::Simple(constants::DW_OP_gt), + read::Operation::Le => Operation::Simple(constants::DW_OP_le), + read::Operation::Lt => Operation::Simple(constants::DW_OP_lt), + read::Operation::Ne => Operation::Simple(constants::DW_OP_ne), + read::Operation::Bra { target } => { + let offset = from_operations + .offset_from(&from_expression) + .wrapping_add(i64::from(target) as usize); + let index = offsets + .binary_search(&offset) + .map_err(|_| ConvertError::InvalidBranchTarget)?; + Operation::Branch(index) + } + read::Operation::Skip { target } => { + let offset = from_operations + .offset_from(&from_expression) + .wrapping_add(i64::from(target) as usize); + let index = offsets + .binary_search(&offset) + .map_err(|_| ConvertError::InvalidBranchTarget)?; + Operation::Skip(index) + } + read::Operation::UnsignedConstant { value } => { + Operation::UnsignedConstant(value) + } + read::Operation::SignedConstant { value } => Operation::SignedConstant(value), + read::Operation::Register { register } => Operation::Register(register), + read::Operation::RegisterOffset { + register, + offset, + base_type, + } => { + if base_type.0 != 0 { + Operation::RegisterType(register, convert_unit_offset(base_type)?) + } else { + Operation::RegisterOffset(register, offset) + } + } + read::Operation::FrameOffset { offset } => Operation::FrameOffset(offset), + read::Operation::Nop => Operation::Simple(constants::DW_OP_nop), + read::Operation::PushObjectAddress => { + Operation::Simple(constants::DW_OP_push_object_address) + } + read::Operation::Call { offset } => match offset { + read::DieReference::UnitRef(offset) => { + Operation::Call(convert_unit_offset(offset)?) + } + read::DieReference::DebugInfoRef(offset) => { + Operation::CallRef(convert_debug_info_offset(offset)?) + } + }, + read::Operation::TLS => Operation::Simple(constants::DW_OP_form_tls_address), + read::Operation::CallFrameCFA => { + Operation::Simple(constants::DW_OP_call_frame_cfa) + } + read::Operation::Piece { + size_in_bits, + bit_offset: None, + } => Operation::Piece { + size_in_bytes: size_in_bits / 8, + }, + read::Operation::Piece { + size_in_bits, + bit_offset: Some(bit_offset), + } => Operation::BitPiece { + size_in_bits, + bit_offset, + }, + read::Operation::ImplicitValue { data } => { + Operation::ImplicitValue(data.to_slice()?.into_owned().into()) + } + read::Operation::StackValue => Operation::Simple(constants::DW_OP_stack_value), + read::Operation::ImplicitPointer { value, byte_offset } => { + let entry = convert_debug_info_offset(value)?; + Operation::ImplicitPointer { entry, byte_offset } + } + read::Operation::EntryValue { expression } => { + let expression = Expression::from( + read::Expression(expression), + encoding, + dwarf, + unit, + entry_ids, + convert_address, + )?; + Operation::EntryValue(expression) + } + read::Operation::ParameterRef { offset } => { + let entry = convert_unit_offset(offset)?; + Operation::ParameterRef(entry) + } + read::Operation::Address { address } => { + let address = + convert_address(address).ok_or(ConvertError::InvalidAddress)?; + Operation::Address(address) + } + read::Operation::AddressIndex { index } => { + let dwarf = dwarf.ok_or(ConvertError::UnsupportedOperation)?; + let unit = unit.ok_or(ConvertError::UnsupportedOperation)?; + let val = dwarf.address(unit, index)?; + let address = convert_address(val).ok_or(ConvertError::InvalidAddress)?; + Operation::Address(address) + } + read::Operation::ConstantIndex { index } => { + let dwarf = dwarf.ok_or(ConvertError::UnsupportedOperation)?; + let unit = unit.ok_or(ConvertError::UnsupportedOperation)?; + let val = dwarf.address(unit, index)?; + Operation::UnsignedConstant(val) + } + read::Operation::TypedLiteral { base_type, value } => { + let entry = convert_unit_offset(base_type)?; + Operation::ConstantType(entry, value.to_slice()?.into_owned().into()) + } + read::Operation::Convert { base_type } => { + if base_type.0 == 0 { + Operation::Convert(None) + } else { + let entry = convert_unit_offset(base_type)?; + Operation::Convert(Some(entry)) + } + } + read::Operation::Reinterpret { base_type } => { + if base_type.0 == 0 { + Operation::Reinterpret(None) + } else { + let entry = convert_unit_offset(base_type)?; + Operation::Reinterpret(Some(entry)) + } + } + }; + operations.push(operation); + } + Ok(Expression { operations }) + } + } +} + +#[cfg(test)] +#[cfg(feature = "read")] +mod tests { + use super::*; + use crate::common::{ + DebugAbbrevOffset, DebugAddrBase, DebugInfoOffset, DebugLocListsBase, DebugRngListsBase, + DebugStrOffsetsBase, Format, SectionId, UnitSectionOffset, + }; + use crate::read; + use crate::write::{ + DebugLineStrOffsets, DebugStrOffsets, EndianVec, LineProgram, Sections, Unit, UnitTable, + }; + use crate::LittleEndian; + use std::collections::HashMap; + + #[test] + fn test_operation() { + for &version in &[3, 4, 5] { + for &address_size in &[4, 8] { + for &format in &[Format::Dwarf32, Format::Dwarf64] { + let encoding = Encoding { + format, + version, + address_size, + }; + + let mut units = UnitTable::default(); + let unit_id = units.add(Unit::new(encoding, LineProgram::none())); + let unit = units.get_mut(unit_id); + let entry_id = unit.add(unit.root(), constants::DW_TAG_base_type); + let reference = Reference::Entry(unit_id, entry_id); + + let mut sections = Sections::new(EndianVec::new(LittleEndian)); + let debug_line_str_offsets = DebugLineStrOffsets::none(); + let debug_str_offsets = DebugStrOffsets::none(); + let debug_info_offsets = units + .write(&mut sections, &debug_line_str_offsets, &debug_str_offsets) + .unwrap(); + let unit_offsets = debug_info_offsets.unit_offsets(unit_id); + let debug_info_offset = unit_offsets.debug_info_offset(entry_id); + let entry_offset = + read::UnitOffset(unit_offsets.unit_offset(entry_id) as usize); + + let mut reg_expression = Expression::new(); + reg_expression.op_reg(Register(23)); + + let operations: &[(&dyn Fn(&mut Expression), Operation, read::Operation<_>)] = + &[ + ( + &|x| x.op_deref(), + Operation::Deref { space: false }, + read::Operation::Deref { + base_type: read::UnitOffset(0), + size: address_size, + space: false, + }, + ), + ( + &|x| x.op_xderef(), + Operation::Deref { space: true }, + read::Operation::Deref { + base_type: read::UnitOffset(0), + size: address_size, + space: true, + }, + ), + ( + &|x| x.op_deref_size(2), + Operation::DerefSize { + space: false, + size: 2, + }, + read::Operation::Deref { + base_type: read::UnitOffset(0), + size: 2, + space: false, + }, + ), + ( + &|x| x.op_xderef_size(2), + Operation::DerefSize { + space: true, + size: 2, + }, + read::Operation::Deref { + base_type: read::UnitOffset(0), + size: 2, + space: true, + }, + ), + ( + &|x| x.op_deref_type(2, entry_id), + Operation::DerefType { + space: false, + size: 2, + base: entry_id, + }, + read::Operation::Deref { + base_type: entry_offset, + size: 2, + space: false, + }, + ), + ( + &|x| x.op_xderef_type(2, entry_id), + Operation::DerefType { + space: true, + size: 2, + base: entry_id, + }, + read::Operation::Deref { + base_type: entry_offset, + size: 2, + space: true, + }, + ), + ( + &|x| x.op(constants::DW_OP_drop), + Operation::Simple(constants::DW_OP_drop), + read::Operation::Drop, + ), + ( + &|x| x.op_pick(0), + Operation::Pick(0), + read::Operation::Pick { index: 0 }, + ), + ( + &|x| x.op_pick(1), + Operation::Pick(1), + read::Operation::Pick { index: 1 }, + ), + ( + &|x| x.op_pick(2), + Operation::Pick(2), + read::Operation::Pick { index: 2 }, + ), + ( + &|x| x.op(constants::DW_OP_swap), + Operation::Simple(constants::DW_OP_swap), + read::Operation::Swap, + ), + ( + &|x| x.op(constants::DW_OP_rot), + Operation::Simple(constants::DW_OP_rot), + read::Operation::Rot, + ), + ( + &|x| x.op(constants::DW_OP_abs), + Operation::Simple(constants::DW_OP_abs), + read::Operation::Abs, + ), + ( + &|x| x.op(constants::DW_OP_and), + Operation::Simple(constants::DW_OP_and), + read::Operation::And, + ), + ( + &|x| x.op(constants::DW_OP_div), + Operation::Simple(constants::DW_OP_div), + read::Operation::Div, + ), + ( + &|x| x.op(constants::DW_OP_minus), + Operation::Simple(constants::DW_OP_minus), + read::Operation::Minus, + ), + ( + &|x| x.op(constants::DW_OP_mod), + Operation::Simple(constants::DW_OP_mod), + read::Operation::Mod, + ), + ( + &|x| x.op(constants::DW_OP_mul), + Operation::Simple(constants::DW_OP_mul), + read::Operation::Mul, + ), + ( + &|x| x.op(constants::DW_OP_neg), + Operation::Simple(constants::DW_OP_neg), + read::Operation::Neg, + ), + ( + &|x| x.op(constants::DW_OP_not), + Operation::Simple(constants::DW_OP_not), + read::Operation::Not, + ), + ( + &|x| x.op(constants::DW_OP_or), + Operation::Simple(constants::DW_OP_or), + read::Operation::Or, + ), + ( + &|x| x.op(constants::DW_OP_plus), + Operation::Simple(constants::DW_OP_plus), + read::Operation::Plus, + ), + ( + &|x| x.op_plus_uconst(23), + Operation::PlusConstant(23), + read::Operation::PlusConstant { value: 23 }, + ), + ( + &|x| x.op(constants::DW_OP_shl), + Operation::Simple(constants::DW_OP_shl), + read::Operation::Shl, + ), + ( + &|x| x.op(constants::DW_OP_shr), + Operation::Simple(constants::DW_OP_shr), + read::Operation::Shr, + ), + ( + &|x| x.op(constants::DW_OP_shra), + Operation::Simple(constants::DW_OP_shra), + read::Operation::Shra, + ), + ( + &|x| x.op(constants::DW_OP_xor), + Operation::Simple(constants::DW_OP_xor), + read::Operation::Xor, + ), + ( + &|x| x.op(constants::DW_OP_eq), + Operation::Simple(constants::DW_OP_eq), + read::Operation::Eq, + ), + ( + &|x| x.op(constants::DW_OP_ge), + Operation::Simple(constants::DW_OP_ge), + read::Operation::Ge, + ), + ( + &|x| x.op(constants::DW_OP_gt), + Operation::Simple(constants::DW_OP_gt), + read::Operation::Gt, + ), + ( + &|x| x.op(constants::DW_OP_le), + Operation::Simple(constants::DW_OP_le), + read::Operation::Le, + ), + ( + &|x| x.op(constants::DW_OP_lt), + Operation::Simple(constants::DW_OP_lt), + read::Operation::Lt, + ), + ( + &|x| x.op(constants::DW_OP_ne), + Operation::Simple(constants::DW_OP_ne), + read::Operation::Ne, + ), + ( + &|x| x.op_constu(23), + Operation::UnsignedConstant(23), + read::Operation::UnsignedConstant { value: 23 }, + ), + ( + &|x| x.op_consts(-23), + Operation::SignedConstant(-23), + read::Operation::SignedConstant { value: -23 }, + ), + ( + &|x| x.op_reg(Register(23)), + Operation::Register(Register(23)), + read::Operation::Register { + register: Register(23), + }, + ), + ( + &|x| x.op_reg(Register(123)), + Operation::Register(Register(123)), + read::Operation::Register { + register: Register(123), + }, + ), + ( + &|x| x.op_breg(Register(23), 34), + Operation::RegisterOffset(Register(23), 34), + read::Operation::RegisterOffset { + register: Register(23), + offset: 34, + base_type: read::UnitOffset(0), + }, + ), + ( + &|x| x.op_breg(Register(123), 34), + Operation::RegisterOffset(Register(123), 34), + read::Operation::RegisterOffset { + register: Register(123), + offset: 34, + base_type: read::UnitOffset(0), + }, + ), + ( + &|x| x.op_regval_type(Register(23), entry_id), + Operation::RegisterType(Register(23), entry_id), + read::Operation::RegisterOffset { + register: Register(23), + offset: 0, + base_type: entry_offset, + }, + ), + ( + &|x| x.op_fbreg(34), + Operation::FrameOffset(34), + read::Operation::FrameOffset { offset: 34 }, + ), + ( + &|x| x.op(constants::DW_OP_nop), + Operation::Simple(constants::DW_OP_nop), + read::Operation::Nop, + ), + ( + &|x| x.op(constants::DW_OP_push_object_address), + Operation::Simple(constants::DW_OP_push_object_address), + read::Operation::PushObjectAddress, + ), + ( + &|x| x.op_call(entry_id), + Operation::Call(entry_id), + read::Operation::Call { + offset: read::DieReference::UnitRef(entry_offset), + }, + ), + ( + &|x| x.op_call_ref(reference), + Operation::CallRef(reference), + read::Operation::Call { + offset: read::DieReference::DebugInfoRef(debug_info_offset), + }, + ), + ( + &|x| x.op(constants::DW_OP_form_tls_address), + Operation::Simple(constants::DW_OP_form_tls_address), + read::Operation::TLS, + ), + ( + &|x| x.op(constants::DW_OP_call_frame_cfa), + Operation::Simple(constants::DW_OP_call_frame_cfa), + read::Operation::CallFrameCFA, + ), + ( + &|x| x.op_piece(23), + Operation::Piece { size_in_bytes: 23 }, + read::Operation::Piece { + size_in_bits: 23 * 8, + bit_offset: None, + }, + ), + ( + &|x| x.op_bit_piece(23, 34), + Operation::BitPiece { + size_in_bits: 23, + bit_offset: 34, + }, + read::Operation::Piece { + size_in_bits: 23, + bit_offset: Some(34), + }, + ), + ( + &|x| x.op_implicit_value(vec![23].into()), + Operation::ImplicitValue(vec![23].into()), + read::Operation::ImplicitValue { + data: read::EndianSlice::new(&[23], LittleEndian), + }, + ), + ( + &|x| x.op(constants::DW_OP_stack_value), + Operation::Simple(constants::DW_OP_stack_value), + read::Operation::StackValue, + ), + ( + &|x| x.op_implicit_pointer(reference, 23), + Operation::ImplicitPointer { + entry: reference, + byte_offset: 23, + }, + read::Operation::ImplicitPointer { + value: debug_info_offset, + byte_offset: 23, + }, + ), + ( + &|x| x.op_entry_value(reg_expression.clone()), + Operation::EntryValue(reg_expression.clone()), + read::Operation::EntryValue { + expression: read::EndianSlice::new( + &[constants::DW_OP_reg23.0], + LittleEndian, + ), + }, + ), + ( + &|x| x.op_parameter_ref(entry_id), + Operation::ParameterRef(entry_id), + read::Operation::ParameterRef { + offset: entry_offset, + }, + ), + ( + &|x| x.op_addr(Address::Constant(23)), + Operation::Address(Address::Constant(23)), + read::Operation::Address { address: 23 }, + ), + ( + &|x| x.op_const_type(entry_id, vec![23].into()), + Operation::ConstantType(entry_id, vec![23].into()), + read::Operation::TypedLiteral { + base_type: entry_offset, + value: read::EndianSlice::new(&[23], LittleEndian), + }, + ), + ( + &|x| x.op_convert(None), + Operation::Convert(None), + read::Operation::Convert { + base_type: read::UnitOffset(0), + }, + ), + ( + &|x| x.op_convert(Some(entry_id)), + Operation::Convert(Some(entry_id)), + read::Operation::Convert { + base_type: entry_offset, + }, + ), + ( + &|x| x.op_reinterpret(None), + Operation::Reinterpret(None), + read::Operation::Reinterpret { + base_type: read::UnitOffset(0), + }, + ), + ( + &|x| x.op_reinterpret(Some(entry_id)), + Operation::Reinterpret(Some(entry_id)), + read::Operation::Reinterpret { + base_type: entry_offset, + }, + ), + ]; + + let mut expression = Expression::new(); + let start_index = expression.next_index(); + for (f, o, _) in operations { + f(&mut expression); + assert_eq!(expression.operations.last(), Some(o)); + } + + let bra_index = expression.op_bra(); + let skip_index = expression.op_skip(); + expression.op(constants::DW_OP_nop); + let end_index = expression.next_index(); + expression.set_target(bra_index, start_index); + expression.set_target(skip_index, end_index); + + let mut w = EndianVec::new(LittleEndian); + let mut refs = Vec::new(); + expression + .write(&mut w, Some(&mut refs), encoding, Some(&unit_offsets)) + .unwrap(); + for r in &refs { + assert_eq!(r.unit, unit_id); + assert_eq!(r.entry, entry_id); + w.write_offset_at( + r.offset, + debug_info_offset.0, + SectionId::DebugInfo, + r.size, + ) + .unwrap(); + } + + let read_expression = + read::Expression(read::EndianSlice::new(w.slice(), LittleEndian)); + let mut read_operations = read_expression.operations(encoding); + for (_, _, operation) in operations { + assert_eq!(read_operations.next(), Ok(Some(*operation))); + } + + // 4 = DW_OP_skip + i16 + DW_OP_nop + assert_eq!( + read_operations.next(), + Ok(Some(read::Operation::Bra { + target: -(w.len() as i16) + 4 + })) + ); + // 1 = DW_OP_nop + assert_eq!( + read_operations.next(), + Ok(Some(read::Operation::Skip { target: 1 })) + ); + assert_eq!(read_operations.next(), Ok(Some(read::Operation::Nop))); + assert_eq!(read_operations.next(), Ok(None)); + + // Fake the unit. + let unit = read::Unit { + offset: UnitSectionOffset::DebugInfoOffset(DebugInfoOffset(0)), + header: read::UnitHeader::new( + encoding, + 0, + DebugAbbrevOffset(0), + read::EndianSlice::new(&[], LittleEndian), + ), + abbreviations: read::Abbreviations::default(), + name: None, + comp_dir: None, + low_pc: 0, + str_offsets_base: DebugStrOffsetsBase(0), + addr_base: DebugAddrBase(0), + loclists_base: DebugLocListsBase(0), + rnglists_base: DebugRngListsBase(0), + line_program: None, + }; + + let mut entry_ids = HashMap::new(); + entry_ids.insert( + UnitSectionOffset::DebugInfoOffset(debug_info_offset), + (unit_id, entry_id), + ); + let convert_expression = Expression::from( + read_expression, + encoding, + None, /* dwarf */ + Some(&unit), + Some(&entry_ids), + &|address| Some(Address::Constant(address)), + ) + .unwrap(); + let mut convert_operations = convert_expression.operations.iter(); + for (_, operation, _) in operations { + assert_eq!(convert_operations.next(), Some(operation)); + } + assert_eq!( + convert_operations.next(), + Some(&Operation::Branch(start_index)) + ); + assert_eq!(convert_operations.next(), Some(&Operation::Skip(end_index))); + assert_eq!( + convert_operations.next(), + Some(&Operation::Simple(constants::DW_OP_nop)) + ); + } + } + } + } +} diff --git a/src/write/section.rs b/src/write/section.rs index 1bbf49c64..fa233858f 100644 --- a/src/write/section.rs +++ b/src/write/section.rs @@ -1,10 +1,11 @@ use std::ops::DerefMut; use std::result; +use std::vec::Vec; use crate::common::SectionId; use crate::write::{ - DebugAbbrev, DebugInfo, DebugLine, DebugLineStr, DebugLoc, DebugLocLists, DebugRanges, - DebugRngLists, DebugStr, Writer, + DebugAbbrev, DebugInfo, DebugInfoReference, DebugLine, DebugLineStr, DebugLoc, DebugLocLists, + DebugRanges, DebugRngLists, DebugStr, Writer, }; macro_rules! define_section { @@ -84,6 +85,12 @@ pub struct Sections { pub debug_loclists: DebugLocLists, /// The `.debug_str` section. pub debug_str: DebugStr, + /// Unresolved references in the `.debug_info` section. + pub(crate) debug_info_refs: Vec, + /// Unresolved references in the `.debug_loc` section. + pub(crate) debug_loc_refs: Vec, + /// Unresolved references in the `.debug_loclists` section. + pub(crate) debug_loclists_refs: Vec, } impl Sections { @@ -99,6 +106,9 @@ impl Sections { debug_loc: DebugLoc(section.clone()), debug_loclists: DebugLocLists(section.clone()), debug_str: DebugStr(section.clone()), + debug_info_refs: Vec::new(), + debug_loc_refs: Vec::new(), + debug_loclists_refs: Vec::new(), } } } diff --git a/src/write/unit.rs b/src/write/unit.rs index fd95bbabd..3ca4394a4 100644 --- a/src/write/unit.rs +++ b/src/write/unit.rs @@ -10,20 +10,15 @@ use crate::constants; use crate::leb128::write::{sleb128_size, uleb128_size}; use crate::write::{ Abbreviation, AbbreviationTable, Address, AttributeSpecification, BaseId, DebugLineStrOffsets, - DebugStrOffsets, Error, FileId, LineProgram, LineStringId, LocationListId, LocationListOffsets, - LocationListTable, RangeListId, RangeListOffsets, RangeListTable, Reference, Result, Section, - Sections, StringId, Writer, + DebugStrOffsets, Error, Expression, FileId, LineProgram, LineStringId, LocationListId, + LocationListOffsets, LocationListTable, RangeListId, RangeListOffsets, RangeListTable, + Reference, Result, Section, Sections, StringId, Writer, }; define_id!(UnitId, "An identifier for a unit in a `UnitTable`."); define_id!(UnitEntryId, "An identifier for an entry in a `Unit`."); -/// The bytecode for a DWARF expression or location description. -// TODO: this needs to be a `Vec` so we can handle relocations -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct Expression(pub Vec); - /// A table of units that will be stored in the `.debug_info` section. #[derive(Debug, Default)] pub struct UnitTable { @@ -93,7 +88,6 @@ impl UnitTable { line_strings: &DebugLineStrOffsets, strings: &DebugStrOffsets, ) -> Result { - let mut debug_info_refs = Vec::new(); let mut offsets = DebugInfoOffsets { base_id: self.base_id, units: Vec::new(), @@ -109,27 +103,44 @@ impl UnitTable { &mut abbrevs, line_strings, strings, - &mut debug_info_refs, )?); abbrevs.write(&mut sections.debug_abbrev)?; } - for (offset, (unit, entry), size) in debug_info_refs { - let entry_offset = offsets.entry(unit, entry).0; - debug_assert_ne!(entry_offset, 0); - sections.debug_info.write_offset_at( - offset.0, - entry_offset, - SectionId::DebugInfo, - size, - )?; - } + write_section_refs( + &mut sections.debug_info_refs, + &mut sections.debug_info.0, + &offsets, + )?; + write_section_refs( + &mut sections.debug_loc_refs, + &mut sections.debug_loc.0, + &offsets, + )?; + write_section_refs( + &mut sections.debug_loclists_refs, + &mut sections.debug_loclists.0, + &offsets, + )?; Ok(offsets) } } +fn write_section_refs( + references: &mut Vec, + w: &mut W, + offsets: &DebugInfoOffsets, +) -> Result<()> { + for r in references.drain(..) { + let entry_offset = offsets.entry(r.unit, r.entry).0; + debug_assert_ne!(entry_offset, 0); + w.write_offset_at(r.offset, entry_offset, SectionId::DebugInfo, r.size)?; + } + Ok(()) +} + /// A unit's debugging information. #[derive(Debug)] pub struct Unit { @@ -280,7 +291,6 @@ impl Unit { abbrevs: &mut AbbreviationTable, line_strings: &DebugLineStrOffsets, strings: &DebugStrOffsets, - debug_info_refs: &mut Vec<(DebugInfoOffset, (UnitId, UnitEntryId), u8)>, ) -> Result { let line_program = if self.line_program_in_use() { self.entries[self.root.index] @@ -295,8 +305,6 @@ impl Unit { self.entries[self.root.index].delete(constants::DW_AT_stmt_list); None }; - let range_lists = self.ranges.write(sections, self.encoding)?; - let loc_lists = self.locations.write(sections, self.encoding)?; // TODO: use .debug_types for type units in DWARF v4. let w = &mut sections.debug_info; @@ -343,10 +351,18 @@ impl Unit { abbrevs, )?; + let range_lists = self.ranges.write(sections, self.encoding)?; + // Location lists can't be written until we have DIE offsets. + let loc_lists = self + .locations + .write(sections, self.encoding, Some(&offsets))?; + let w = &mut sections.debug_info; let mut unit_refs = Vec::new(); self.entries[self.root.index].write( w, + &mut sections.debug_info_refs, + &mut unit_refs, self, &mut offsets, abbrevs, @@ -355,8 +371,6 @@ impl Unit { strings, &range_lists, &loc_lists, - &mut unit_refs, - debug_info_refs, )?; let length = (w.len() - length_base) as u64; @@ -590,6 +604,8 @@ impl DebuggingInformationEntry { fn write( &self, w: &mut DebugInfo, + debug_info_refs: &mut Vec, + unit_refs: &mut Vec<(DebugInfoOffset, UnitEntryId)>, unit: &Unit, offsets: &mut UnitOffsets, abbrevs: &mut AbbreviationTable, @@ -598,8 +614,6 @@ impl DebuggingInformationEntry { strings: &DebugStrOffsets, range_lists: &RangeListOffsets, loc_lists: &LocationListOffsets, - unit_refs: &mut Vec<(DebugInfoOffset, UnitEntryId)>, - debug_info_refs: &mut Vec<(DebugInfoOffset, (UnitId, UnitEntryId), u8)>, ) -> Result<()> { debug_assert_eq!(offsets.debug_info_offset(self.id), w.offset()); w.write_uleb128(offsets.abbrev(self.id))?; @@ -613,16 +627,17 @@ impl DebuggingInformationEntry { }; for attr in &self.attrs { - attr.write( + attr.value.write( w, + debug_info_refs, + unit_refs, unit, + offsets, line_program, line_strings, strings, range_lists, loc_lists, - unit_refs, - debug_info_refs, )?; } @@ -630,6 +645,8 @@ impl DebuggingInformationEntry { for child in &self.children { unit.entries[child.index].write( w, + debug_info_refs, + unit_refs, unit, offsets, abbrevs, @@ -638,8 +655,6 @@ impl DebuggingInformationEntry { strings, range_lists, loc_lists, - unit_refs, - debug_info_refs, )?; } // Null child @@ -689,34 +704,6 @@ impl Attribute { self.value.form(encoding)?, )) } - - /// Write the attribute to the given sections. - #[inline] - #[allow(clippy::too_many_arguments)] - fn write( - &self, - w: &mut DebugInfo, - unit: &Unit, - line_program: Option, - line_strings: &DebugLineStrOffsets, - strings: &DebugStrOffsets, - range_lists: &RangeListOffsets, - loc_lists: &LocationListOffsets, - unit_refs: &mut Vec<(DebugInfoOffset, UnitEntryId)>, - debug_info_refs: &mut Vec<(DebugInfoOffset, (UnitId, UnitEntryId), u8)>, - ) -> Result<()> { - self.value.write( - w, - unit, - line_program, - line_strings, - strings, - range_lists, - loc_lists, - unit_refs, - debug_info_refs, - ) - } } /// The value of an attribute in a `DebuggingInformationEntry`. @@ -997,7 +984,7 @@ impl AttributeValue { } AttributeValue::Exprloc(ref val) => { debug_assert_form!(constants::DW_FORM_exprloc); - let size = val.0.len(); + let size = val.size(unit.encoding(), Some(offsets)); uleb128_size(size as u64) + size } AttributeValue::Flag(_) => { @@ -1140,14 +1127,15 @@ impl AttributeValue { fn write( &self, w: &mut DebugInfo, + debug_info_refs: &mut Vec, + unit_refs: &mut Vec<(DebugInfoOffset, UnitEntryId)>, unit: &Unit, + offsets: &UnitOffsets, line_program: Option, line_strings: &DebugLineStrOffsets, strings: &DebugStrOffsets, range_lists: &RangeListOffsets, loc_lists: &LocationListOffsets, - unit_refs: &mut Vec<(DebugInfoOffset, UnitEntryId)>, - debug_info_refs: &mut Vec<(DebugInfoOffset, (UnitId, UnitEntryId), u8)>, ) -> Result<()> { macro_rules! debug_assert_form { ($form:expr) => { @@ -1190,8 +1178,13 @@ impl AttributeValue { } AttributeValue::Exprloc(ref val) => { debug_assert_form!(constants::DW_FORM_exprloc); - w.write_uleb128(val.0.len() as u64)?; - w.write(&val.0)?; + w.write_uleb128(val.size(unit.encoding(), Some(offsets)) as u64)?; + val.write( + &mut w.0, + Some(debug_info_refs), + unit.encoding(), + Some(offsets), + )?; } AttributeValue::Flag(val) => { debug_assert_form!(constants::DW_FORM_flag); @@ -1218,7 +1211,12 @@ impl AttributeValue { match reference { Reference::Symbol(symbol) => w.write_reference(symbol, size)?, Reference::Entry(unit, entry) => { - debug_info_refs.push((w.offset(), (unit, entry), size)); + debug_info_refs.push(DebugInfoReference { + offset: w.len(), + unit, + entry, + size, + }); w.write_udata(0, size)?; } } @@ -1379,6 +1377,12 @@ pub struct DebugInfoOffsets { } impl DebugInfoOffsets { + #[cfg(test)] + pub(crate) fn unit_offsets(&self, unit: UnitId) -> &UnitOffsets { + debug_assert_eq!(self.base_id, unit.base_id); + &self.units[unit.index] + } + /// Get the `.debug_info` section offset for the given unit. #[inline] pub fn unit(&self, unit: UnitId) -> DebugInfoOffset { @@ -1403,9 +1407,18 @@ pub(crate) struct UnitOffsets { } impl UnitOffsets { + #[cfg(test)] + fn none() -> Self { + UnitOffsets { + base_id: BaseId::default(), + unit: DebugInfoOffset(0), + entries: Vec::new(), + } + } + /// Get the .debug_info offset for the given entry. #[inline] - fn debug_info_offset(&self, entry: UnitEntryId) -> DebugInfoOffset { + pub(crate) fn debug_info_offset(&self, entry: UnitEntryId) -> DebugInfoOffset { debug_assert_eq!(self.base_id, entry.base_id); let offset = self.entries[entry.index].offset; debug_assert_ne!(offset.0, 0); @@ -1442,6 +1455,19 @@ impl EntryOffset { } } +/// A reference to a `.debug_info` entry that has yet to be resolved. +#[derive(Debug, Clone, Copy)] +pub(crate) struct DebugInfoReference { + /// The offset within the section of the reference. + pub offset: usize, + /// The size of the reference. + pub size: u8, + /// The unit containing the entry. + pub unit: UnitId, + /// The entry being referenced. + pub entry: UnitEntryId, +} + #[cfg(feature = "read")] pub(crate) mod convert { use super::*; @@ -1717,9 +1743,16 @@ pub(crate) mod convert { read::AttributeValue::Data8(val) => AttributeValue::Data8(val), read::AttributeValue::Sdata(val) => AttributeValue::Sdata(val), read::AttributeValue::Udata(val) => AttributeValue::Udata(val), - // TODO: addresses and offsets in expressions need special handling. - read::AttributeValue::Exprloc(read::Expression(val)) => { - AttributeValue::Exprloc(Expression(val.to_slice()?.into())) + read::AttributeValue::Exprloc(expression) => { + let expression = Expression::from( + expression, + context.unit.encoding(), + Some(context.dwarf), + Some(context.unit), + Some(context.entry_ids), + context.convert_address, + )?; + AttributeValue::Exprloc(expression) } // TODO: it would be nice to preserve the flag form. read::AttributeValue::Flag(val) => AttributeValue::Flag(val), @@ -2200,18 +2233,6 @@ mod tests { let mut strings = StringTable::default(); strings.add("string one"); let string_id = strings.add("string two"); - let mut ranges = RangeListTable::default(); - let range_id = ranges.add(RangeList(vec![Range::StartEnd { - begin: Address::Constant(0x1234), - end: Address::Constant(0x2345), - }])); - let mut locations = LocationListTable::default(); - let loc_id = locations.add(LocationList(vec![Location::StartEnd { - begin: Address::Constant(0x1234), - end: Address::Constant(0x2345), - data: Expression(vec![1, 0, 0, 0]), - }])); - let mut debug_str = DebugStr::from(EndianVec::new(LittleEndian)); let debug_str_offsets = strings.write(&mut debug_str).unwrap(); let read_debug_str = read::DebugStr::new(debug_str.slice(), LittleEndian); @@ -2227,6 +2248,26 @@ mod tests { let data = vec![1, 2, 3, 4]; let read_data = read::EndianSlice::new(&[1, 2, 3, 4], LittleEndian); + let mut expression = Expression::new(); + expression.op_constu(57); + let read_expression = read::Expression(read::EndianSlice::new( + &[constants::DW_OP_constu.0, 57], + LittleEndian, + )); + + let mut ranges = RangeListTable::default(); + let range_id = ranges.add(RangeList(vec![Range::StartEnd { + begin: Address::Constant(0x1234), + end: Address::Constant(0x2345), + }])); + + let mut locations = LocationListTable::default(); + let loc_id = locations.add(LocationList(vec![Location::StartEnd { + begin: Address::Constant(0x1234), + end: Address::Constant(0x2345), + data: expression.clone(), + }])); + for &version in &[2, 3, 4, 5] { for &address_size in &[4, 8] { for &format in &[Format::Dwarf32, Format::Dwarf64] { @@ -2238,7 +2279,7 @@ mod tests { let mut sections = Sections::new(EndianVec::new(LittleEndian)); let range_list_offsets = ranges.write(&mut sections, encoding).unwrap(); - let loc_list_offsets = locations.write(&mut sections, encoding).unwrap(); + let loc_list_offsets = locations.write(&mut sections, encoding, None).unwrap(); let read_debug_ranges = read::DebugRanges::new(sections.debug_ranges.slice(), LittleEndian); @@ -2308,8 +2349,8 @@ mod tests { ), ( constants::DW_AT_name, - AttributeValue::Exprloc(Expression(data.clone())), - read::AttributeValue::Exprloc(read::Expression(read_data)), + AttributeValue::Exprloc(expression.clone()), + read::AttributeValue::Exprloc(read_expression), ), ( constants::DW_AT_name, @@ -2443,22 +2484,25 @@ mod tests { value: value.clone(), }; + let offsets = UnitOffsets::none(); let line_program_offset = None; - let mut unit_refs = Vec::new(); let mut debug_info_refs = Vec::new(); + let mut unit_refs = Vec::new(); let mut debug_info = DebugInfo::from(EndianVec::new(LittleEndian)); - attr.write( - &mut debug_info, - &unit, - line_program_offset, - &debug_line_str_offsets, - &debug_str_offsets, - &range_list_offsets, - &loc_list_offsets, - &mut unit_refs, - &mut debug_info_refs, - ) - .unwrap(); + attr.value + .write( + &mut debug_info, + &mut debug_info_refs, + &mut unit_refs, + &unit, + &offsets, + line_program_offset, + &debug_line_str_offsets, + &debug_str_offsets, + &range_list_offsets, + &loc_list_offsets, + ) + .unwrap(); let spec = read::AttributeSpecification::new(*name, form, None); let mut r = read::EndianSlice::new(debug_info.slice(), LittleEndian); @@ -2905,9 +2949,7 @@ mod tests { let mut ranges = RangeListTable::default(); let mut locations = LocationListTable::default(); let mut strings = StringTable::default(); - let debug_str_offsets = DebugStrOffsets::none(); let mut line_strings = LineStringTable::default(); - let debug_line_str_offsets = DebugLineStrOffsets::none(); let form = value.form(encoding).unwrap(); let attr = Attribute { @@ -2915,23 +2957,28 @@ mod tests { value: value.clone(), }; - let mut unit_refs = Vec::new(); let mut debug_info_refs = Vec::new(); + let mut unit_refs = Vec::new(); let mut debug_info = DebugInfo::from(EndianVec::new(LittleEndian)); + let offsets = UnitOffsets::none(); + let debug_line_str_offsets = DebugLineStrOffsets::none(); + let debug_str_offsets = DebugStrOffsets::none(); let range_list_offsets = RangeListOffsets::none(); let loc_list_offsets = LocationListOffsets::none(); - attr.write( - &mut debug_info, - &unit, - Some(line_program_offset), - &debug_line_str_offsets, - &debug_str_offsets, - &range_list_offsets, - &loc_list_offsets, - &mut unit_refs, - &mut debug_info_refs, - ) - .unwrap(); + attr.value + .write( + &mut debug_info, + &mut debug_info_refs, + &mut unit_refs, + &unit, + &offsets, + Some(line_program_offset), + &debug_line_str_offsets, + &debug_str_offsets, + &range_list_offsets, + &loc_list_offsets, + ) + .unwrap(); let spec = read::AttributeSpecification::new(*name, form, None); let mut r = read::EndianSlice::new(debug_info.slice(), LittleEndian); diff --git a/tests/convert_self.rs b/tests/convert_self.rs index d8d8411ad..ec8b592d3 100644 --- a/tests/convert_self.rs +++ b/tests/convert_self.rs @@ -91,7 +91,7 @@ fn test_convert_debug_info() { assert_eq!(debug_abbrev_data.len(), 9701); assert_eq!(debug_line_data.len(), 105_797); assert_eq!(debug_ranges_data.len(), 155_712); - assert_eq!(debug_loc_data.len(), 245_768); + assert_eq!(debug_loc_data.len(), 245_168); assert_eq!(debug_str_data.len(), 144_731); // Convert new sections