From af8a5bbe828f354c0ef171f68d44400bffa5d485 Mon Sep 17 00:00:00 2001 From: Yury Delendik Date: Fri, 5 Jul 2019 09:29:15 -0500 Subject: [PATCH] Properly collect/identify used DWARF entries. --- wasmtime-debug/src/address_transform.rs | 4 + wasmtime-debug/src/gc.rs | 232 ++++++++++++++++++++++++ wasmtime-debug/src/lib.rs | 1 + wasmtime-debug/src/transform.rs | 50 ++--- 4 files changed, 264 insertions(+), 23 deletions(-) create mode 100644 wasmtime-debug/src/gc.rs diff --git a/wasmtime-debug/src/address_transform.rs b/wasmtime-debug/src/address_transform.rs index 427b1f68255f..663b7549f0f7 100644 --- a/wasmtime-debug/src/address_transform.rs +++ b/wasmtime-debug/src/address_transform.rs @@ -83,6 +83,10 @@ impl AddressTransform { } } + pub fn can_translate_address(&self, addr: u64) -> bool { + self.translate(addr).is_some() + } + pub fn translate(&self, addr: u64) -> Option { if addr == 0 { // It's normally 0 for debug info without the linked code. diff --git a/wasmtime-debug/src/gc.rs b/wasmtime-debug/src/gc.rs new file mode 100644 index 000000000000..16a54d1a63a2 --- /dev/null +++ b/wasmtime-debug/src/gc.rs @@ -0,0 +1,232 @@ +use crate::address_transform::AddressTransform; +use gimli::constants; +use gimli::read; +use gimli::{Reader, UnitSectionOffset}; +use std::collections::{HashMap, HashSet}; +use std::vec::Vec; + +#[derive(Debug)] +pub struct Dependencies { + edges: HashMap>, + roots: HashSet, +} + +impl Dependencies { + fn new() -> Dependencies { + Dependencies { + edges: HashMap::new(), + roots: HashSet::new(), + } + } + + fn add_edge(&mut self, a: UnitSectionOffset, b: UnitSectionOffset) { + use std::collections::hash_map::Entry; + match self.edges.entry(a) { + Entry::Occupied(mut o) => { + o.get_mut().insert(b); + } + Entry::Vacant(v) => { + let mut set = HashSet::new(); + set.insert(b); + v.insert(set); + } + } + } + + fn add_root(&mut self, root: UnitSectionOffset) { + self.roots.insert(root); + } + + pub fn get_reachable(&self) -> HashSet { + let mut reachable = self.roots.clone(); + let mut queue = Vec::new(); + for i in self.roots.iter() { + if let Some(deps) = self.edges.get(i) { + for j in deps { + if reachable.contains(j) { + continue; + } + reachable.insert(*j); + queue.push(*j); + } + } + } + while let Some(i) = queue.pop() { + if let Some(deps) = self.edges.get(&i) { + for j in deps { + if reachable.contains(j) { + continue; + } + reachable.insert(*j); + queue.push(*j); + } + } + } + reachable + } +} + +pub fn build_dependencies>( + dwarf: &read::Dwarf, + at: &AddressTransform, +) -> read::Result { + let mut deps = Dependencies::new(); + let mut units = dwarf.units(); + while let Some(unit) = units.next()? { + build_unit_dependencies(unit, dwarf, at, &mut deps)?; + } + Ok(deps) +} + +fn build_unit_dependencies>( + header: read::CompilationUnitHeader, + dwarf: &read::Dwarf, + at: &AddressTransform, + deps: &mut Dependencies, +) -> read::Result<()> { + let unit = dwarf.unit(header)?; + let mut tree = unit.entries_tree(None)?; + let root = tree.root()?; + build_die_dependencies(root, dwarf, &unit, at, deps)?; + Ok(()) +} + +fn has_die_back_edge>(die: &read::DebuggingInformationEntry) -> bool { + match die.tag() { + constants::DW_TAG_variable + | constants::DW_TAG_constant + | constants::DW_TAG_inlined_subroutine + | constants::DW_TAG_lexical_block + | constants::DW_TAG_label + | constants::DW_TAG_with_stmt + | constants::DW_TAG_try_block + | constants::DW_TAG_catch_block + | constants::DW_TAG_template_type_parameter + | constants::DW_TAG_member + | constants::DW_TAG_formal_parameter => true, + _ => false, + } +} + +fn has_valid_code_range>( + die: &read::DebuggingInformationEntry, + dwarf: &read::Dwarf, + unit: &read::Unit, + at: &AddressTransform, +) -> read::Result { + match die.tag() { + constants::DW_TAG_subprogram => { + if let Some(ranges_attr) = die.attr_value(constants::DW_AT_ranges)? { + let offset = match ranges_attr { + read::AttributeValue::RangeListsRef(val) => val, + read::AttributeValue::DebugRngListsIndex(index) => { + dwarf.ranges_offset(unit, index)? + } + _ => return Ok(false), + }; + let mut has_valid_base = if let Some(read::AttributeValue::Addr(low_pc)) = + die.attr_value(constants::DW_AT_low_pc)? + { + Some(at.can_translate_address(low_pc)) + } else { + None + }; + let mut it = dwarf.ranges.raw_ranges(offset, unit.encoding())?; + while let Some(range) = it.next()? { + // If at least one of the range addresses can be converted, + // declaring code range as valid. + match range { + read::RawRngListEntry::AddressOrOffsetPair { .. } + if has_valid_base.is_some() => + { + if has_valid_base.unwrap() { + return Ok(true); + } + } + read::RawRngListEntry::StartEnd { begin, .. } + | read::RawRngListEntry::StartLength { begin, .. } + | read::RawRngListEntry::AddressOrOffsetPair { begin, .. } => { + if at.can_translate_address(begin) { + return Ok(true); + } + } + read::RawRngListEntry::StartxEndx { begin, .. } + | read::RawRngListEntry::StartxLength { begin, .. } => { + let addr = dwarf.address(unit, begin)?; + if at.can_translate_address(addr) { + return Ok(true); + } + } + read::RawRngListEntry::BaseAddress { addr } => { + has_valid_base = Some(at.can_translate_address(addr)); + } + read::RawRngListEntry::BaseAddressx { addr } => { + let addr = dwarf.address(unit, addr)?; + has_valid_base = Some(at.can_translate_address(addr)); + } + read::RawRngListEntry::OffsetPair { .. } => (), + } + } + return Ok(false); + } else if let Some(low_pc) = die.attr_value(constants::DW_AT_low_pc)? { + if let read::AttributeValue::Addr(a) = low_pc { + return Ok(at.can_translate_address(a)); + } + } + } + _ => (), + } + Ok(false) +} + +fn build_die_dependencies>( + die: read::EntriesTreeNode, + dwarf: &read::Dwarf, + unit: &read::Unit, + at: &AddressTransform, + deps: &mut Dependencies, +) -> read::Result<()> { + let entry = die.entry(); + let offset = entry.offset().to_unit_section_offset(unit); + let mut attrs = entry.attrs(); + while let Some(attr) = attrs.next()? { + build_attr_dependencies(&attr, offset, dwarf, unit, at, deps)?; + } + + let mut children = die.children(); + while let Some(child) = children.next()? { + let child_entry = child.entry(); + let child_offset = child_entry.offset().to_unit_section_offset(unit); + deps.add_edge(child_offset, offset); + if has_die_back_edge(child_entry) { + deps.add_edge(offset, child_offset); + } + if has_valid_code_range(child_entry, dwarf, unit, at)? { + deps.add_root(child_offset); + } + build_die_dependencies(child, dwarf, unit, at, deps)?; + } + Ok(()) +} + +fn build_attr_dependencies>( + attr: &read::Attribute, + offset: UnitSectionOffset, + _dwarf: &read::Dwarf, + unit: &read::Unit, + _at: &AddressTransform, + deps: &mut Dependencies, +) -> read::Result<()> { + match attr.value() { + read::AttributeValue::UnitRef(val) => { + let ref_offset = val.to_unit_section_offset(unit); + deps.add_edge(offset, ref_offset); + } + read::AttributeValue::DebugInfoRef(val) => { + let ref_offset = UnitSectionOffset::DebugInfoOffset(val); + deps.add_edge(offset, ref_offset); + } + _ => (), + } + Ok(()) +} diff --git a/wasmtime-debug/src/lib.rs b/wasmtime-debug/src/lib.rs index 49ff0c28404d..c72294a375ac 100644 --- a/wasmtime-debug/src/lib.rs +++ b/wasmtime-debug/src/lib.rs @@ -12,6 +12,7 @@ pub use crate::transform::{ pub use crate::write_debuginfo::{emit_dwarf, ResolvedSymbol, SymbolResolver}; mod address_transform; +mod gc; mod read_debuginfo; mod transform; mod write_debuginfo; diff --git a/wasmtime-debug/src/transform.rs b/wasmtime-debug/src/transform.rs index bb04b89323c6..2ba67712efb0 100644 --- a/wasmtime-debug/src/transform.rs +++ b/wasmtime-debug/src/transform.rs @@ -1,19 +1,20 @@ use crate::address_transform::AddressTransform; +use crate::gc::build_dependencies; pub use crate::read_debuginfo::DebugInfoData; use cranelift_codegen::ir; use cranelift_codegen::isa::TargetFrontendConfig; use cranelift_entity::{EntityRef, PrimaryMap}; use cranelift_wasm::DefinedFuncIndex; use failure::Error; -use std::collections::{BTreeMap, HashMap}; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::ops::Bound::{Included, Unbounded}; use gimli; use gimli::{ - AttributeValue, CompilationUnitHeader, DebugAbbrev, DebugAddr, DebugAddrBase, DebugLine, - DebugLineOffset, DebugStr, DebuggingInformationEntry, LineEncoding, LocationLists, RangeLists, - UnitOffset, + AttributeValue, DebugAddr, DebugAddrBase, DebugLine, DebugLineOffset, DebugStr, + DebuggingInformationEntry, LineEncoding, LocationLists, RangeLists, Unit, UnitOffset, + UnitSectionOffset, }; use gimli::write; @@ -69,13 +70,13 @@ struct DebugInputContext<'a, R> where R: Reader, { - debug_abbrev: &'a DebugAbbrev, debug_str: &'a DebugStr, debug_line: &'a DebugLine, debug_addr: &'a DebugAddr, debug_addr_base: DebugAddrBase, rnglists: &'a RangeLists, loclists: &'a LocationLists, + reachable: &'a HashSet, } type PendingDieRef = (write::UnitEntryId, gimli::DwAt, UnitOffset); @@ -276,7 +277,7 @@ enum ReadLineProgramState { } fn clone_line_program( - unit: &CompilationUnitHeader, + unit: &Unit, root: &DebuggingInformationEntry, addr_tr: &AddressTransform, out_encoding: &gimli::Encoding, @@ -310,7 +311,7 @@ where let program = debug_line.program( offset, - unit.address_size(), + unit.header.address_size(), comp_dir.and_then(|val| val.string_value(&debug_str)), comp_name.and_then(|val| val.string_value(&debug_str)), ); @@ -487,7 +488,7 @@ where } fn clone_unit<'a, R>( - unit: &CompilationUnitHeader, + unit: Unit, context: &DebugInputContext, addr_tr: &'a AddressTransform, out_encoding: &gimli::Encoding, @@ -497,18 +498,16 @@ fn clone_unit<'a, R>( where R: Reader, { - let abbrevs = unit.abbreviations(context.debug_abbrev)?; - let mut die_ref_map = HashMap::new(); let mut pending_die_refs = Vec::new(); let mut stack = Vec::new(); // Iterate over all of this compilation unit's entries. - let mut entries = unit.entries(&abbrevs); + let mut entries = unit.entries(); let (comp_unit, file_map) = if let Some((depth_delta, entry)) = entries.next_dfs()? { assert!(depth_delta == 0); let (out_line_program, debug_line_offset, file_map) = clone_line_program( - unit, + &unit, entry, addr_tr, out_encoding, @@ -559,14 +558,17 @@ where } else { depth_delta }; + if !context + .reachable + .contains(&entry.offset().to_unit_section_offset(&unit)) + { + // entry is not reachable: discarding all its info. + skip_at_depth = Some((0, depth_delta)); + continue; + } + let range = if entry.tag() == gimli::DW_TAG_subprogram { - let range = get_subprogram_range(entry, addr_tr)?; - if range.is_none() { - // Subprogram was not compiled: discarding all its info. - skip_at_depth = Some((0, depth_delta)); - continue; - } - range + get_subprogram_range(entry, addr_tr)? } else { None }; @@ -615,14 +617,17 @@ pub fn transform_dwarf( di: &DebugInfoData, at: &ModuleAddressMap, ) -> Result { + let addr_tr = AddressTransform::new(at, &di.wasm_file); + let reachable = build_dependencies(&di.dwarf, &addr_tr)?.get_reachable(); + let context = DebugInputContext { - debug_abbrev: &di.dwarf.debug_abbrev, debug_str: &di.dwarf.debug_str, debug_line: &di.dwarf.debug_line, debug_addr: &di.dwarf.debug_addr, debug_addr_base: DebugAddrBase(0), rnglists: &di.dwarf.ranges, loclists: &di.dwarf.locations, + reachable: &reachable, }; let out_encoding = gimli::Encoding { @@ -633,15 +638,14 @@ pub fn transform_dwarf( address_size: target_config.pointer_bytes(), }; - let addr_tr = AddressTransform::new(at, &di.wasm_file); - let mut out_strings = write::StringTable::default(); let mut out_units = write::UnitTable::default(); let out_line_strings = write::LineStringTable::default(); let mut iter = di.dwarf.debug_info.units(); - while let Some(ref unit) = iter.next().unwrap_or(None) { + while let Some(unit) = iter.next().unwrap_or(None) { + let unit = di.dwarf.unit(unit)?; clone_unit( unit, &context,