Display correct line numbers for multiple .text sections (#63)

AetiasHax · encounter · web-flow · commit f5b5a612fc5a · 2024-05-21T09:55:39.000-06:00
* Support multiple DWARF sequences

* Rework DWARF line info parsing

- Properly handles multiple sections
  in DWARF 1
- line_info moved into ObjSection
- DWARF 2 parser no longer errors with
  no .text section
- Both parsers properly skip empty
  sections

* Simplify line_info (no Option)

---------

Co-authored-by: Luke Street &lt;luke.street@encounterpc.com&gt;
diff --git a/objdiff-core/src/arch/mips.rs b/objdiff-core/src/arch/mips.rs
@@ -112,10 +112,7 @@ impl ObjArch for ObjArchMips {
                     }
                 }
             }
-            let line = obj
-                .line_info
-                .as_ref()
-                .and_then(|map| map.range(..=cur_addr as u64).last().map(|(_, &b)| b));
+            let line = section.line_info.range(..=cur_addr as u64).last().map(|(_, &b)| b);
             insts.push(ObjIns {
                 address: cur_addr as u64,
                 size: 4,
diff --git a/objdiff-core/src/arch/ppc.rs b/objdiff-core/src/arch/ppc.rs
@@ -133,10 +133,7 @@ impl ObjArch for ObjArchPpc {
             }
 
             ops.push(ins.op as u16);
-            let line = obj
-                .line_info
-                .as_ref()
-                .and_then(|map| map.range(..=cur_addr as u64).last().map(|(_, &b)| b));
+            let line = section.line_info.range(..=cur_addr as u64).last().map(|(_, &b)| b);
             insts.push(ObjIns {
                 address: cur_addr as u64,
                 size: 4,
diff --git a/objdiff-core/src/arch/x86.rs b/objdiff-core/src/arch/x86.rs
@@ -74,6 +74,7 @@ impl ObjArch for ObjArchX86 {
                 .relocations
                 .iter()
                 .find(|r| r.address >= address && r.address < address + instruction.len() as u64);
+            let line = section.line_info.range(..=address).last().map(|(_, &b)| b);
             output.ins = ObjIns {
                 address,
                 size: instruction.len() as u8,
@@ -82,7 +83,7 @@ impl ObjArch for ObjArchX86 {
                 args: vec![],
                 reloc: reloc.cloned(),
                 branch_dest: None,
-                line: obj.line_info.as_ref().and_then(|m| m.get(&address).cloned()),
+                line,
                 formatted: String::new(),
                 orig: None,
             };
diff --git a/objdiff-core/src/obj/mod.rs b/objdiff-core/src/obj/mod.rs
@@ -39,6 +39,8 @@ pub struct ObjSection {
     pub symbols: Vec<ObjSymbol>,
     pub relocations: Vec<ObjReloc>,
     pub virtual_address: Option<u64>,
+    /// Line number info (.line or .debug_line section)
+    pub line_info: BTreeMap<u64, u64>,
 }
 
 #[derive(Debug, Clone, Eq, PartialEq)]
@@ -128,8 +130,6 @@ pub struct ObjInfo {
     pub sections: Vec<ObjSection>,
     /// Common BSS symbols
     pub common: Vec<ObjSymbol>,
-    /// Line number info (.line or .debug_line section)
-    pub line_info: Option<BTreeMap<u64, u64>>,
     /// Split object metadata (.note.split section)
     pub split_meta: Option<SplitMeta>,
 }
diff --git a/objdiff-core/src/obj/read.rs b/objdiff-core/src/obj/read.rs
@@ -1,4 +1,4 @@
-use std::{collections::BTreeMap, fs, io::Cursor, path::Path};
+use std::{fs, io::Cursor, path::Path};
 
 use anyhow::{anyhow, bail, ensure, Context, Result};
 use byteorder::{BigEndian, ReadBytesExt};
@@ -111,6 +111,7 @@ fn filter_sections(obj_file: &File<'_>, split_meta: Option<&SplitMeta>) -> Resul
             symbols: Vec::new(),
             relocations: Vec::new(),
             virtual_address,
+            line_info: Default::default(),
         });
     }
     result.sort_by(|a, b| a.name.cmp(&b.name));
@@ -268,26 +269,40 @@ fn relocations_by_section(
     Ok(relocations)
 }
 
-fn line_info(obj_file: &File<'_>) -> Result<Option<BTreeMap<u64, u64>>> {
+fn line_info(obj_file: &File<'_>, sections: &mut [ObjSection]) -> Result<()> {
     // DWARF 1.1
-    let mut map = BTreeMap::new();
     if let Some(section) = obj_file.section_by_name(".line") {
-        if section.size() == 0 {
-            return Ok(None);
-        }
         let data = section.uncompressed_data()?;
         let mut reader = Cursor::new(data.as_ref());
 
-        let size = reader.read_u32::<BigEndian>()?;
-        let base_address = reader.read_u32::<BigEndian>()? as u64;
-        while reader.position() < size as u64 {
-            let line_number = reader.read_u32::<BigEndian>()? as u64;
-            let statement_pos = reader.read_u16::<BigEndian>()?;
-            if statement_pos != 0xFFFF {
-                log::warn!("Unhandled statement pos {}", statement_pos);
+        let mut text_sections = obj_file.sections().filter(|s| s.kind() == SectionKind::Text);
+        while reader.position() < data.len() as u64 {
+            let text_section_index = text_sections
+                .next()
+                .ok_or_else(|| anyhow!("Next text section not found for line info"))?
+                .index()
+                .0;
+            let start = reader.position();
+            let size = reader.read_u32::<BigEndian>()?;
+            let base_address = reader.read_u32::<BigEndian>()? as u64;
+            let Some(out_section) =
+                sections.iter_mut().find(|s| s.orig_index == text_section_index)
+            else {
+                // Skip line info for sections we filtered out
+                reader.set_position(start + size as u64);
+                continue;
+            };
+            let end = start + size as u64;
+            while reader.position() < end {
+                let line_number = reader.read_u32::<BigEndian>()? as u64;
+                let statement_pos = reader.read_u16::<BigEndian>()?;
+                if statement_pos != 0xFFFF {
+                    log::warn!("Unhandled statement pos {}", statement_pos);
+                }
+                let address_delta = reader.read_u32::<BigEndian>()? as u64;
+                out_section.line_info.insert(base_address + address_delta, line_number);
+                log::debug!("Line: {:#x} -> {}", base_address + address_delta, line_number);
             }
-            let address_delta = reader.read_u32::<BigEndian>()? as u64;
-            map.insert(base_address + address_delta, line_number);
         }
     }
 
@@ -308,22 +323,48 @@ fn line_info(obj_file: &File<'_>) -> Result<Option<BTreeMap<u64, u64>>> {
         };
         let dwarf = dwarf_cow.borrow(|section| gimli::EndianSlice::new(section, endian));
         let mut iter = dwarf.units();
-        while let Some(header) = iter.next()? {
+        if let Some(header) = iter.next()? {
             let unit = dwarf.unit(header)?;
             if let Some(program) = unit.line_program.clone() {
+                let mut text_sections =
+                    obj_file.sections().filter(|s| s.kind() == SectionKind::Text);
+                let section_index = text_sections
+                    .next()
+                    .ok_or_else(|| anyhow!("Next text section not found for line info"))?
+                    .index()
+                    .0;
+                let mut lines = sections
+                    .iter_mut()
+                    .find(|s| s.orig_index == section_index)
+                    .map(|s| &mut s.line_info);
+
                 let mut rows = program.rows();
                 while let Some((_header, row)) = rows.next_row()? {
-                    if let Some(line) = row.line() {
-                        map.insert(row.address(), line.get());
+                    if let (Some(line), Some(lines)) = (row.line(), &mut lines) {
+                        lines.insert(row.address(), line.get());
+                    }
+                    if row.end_sequence() {
+                        // The next row is the start of a new sequence, which means we must
+                        // advance to the next .text section.
+                        let section_index = text_sections
+                            .next()
+                            .ok_or_else(|| anyhow!("Next text section not found for line info"))?
+                            .index()
+                            .0;
+                        lines = sections
+                            .iter_mut()
+                            .find(|s| s.orig_index == section_index)
+                            .map(|s| &mut s.line_info);
                     }
                 }
             }
         }
+        if iter.next()?.is_some() {
+            log::warn!("Multiple units found in DWARF data, only processing the first");
+        }
     }
-    if map.is_empty() {
-        return Ok(None);
-    }
-    Ok(Some(map))
+
+    Ok(())
 }
 
 pub fn read(obj_path: &Path) -> Result<ObjInfo> {
@@ -342,16 +383,9 @@ pub fn read(obj_path: &Path) -> Result<ObjInfo> {
         section.relocations =
             relocations_by_section(arch.as_ref(), &obj_file, section, split_meta.as_ref())?;
     }
+    line_info(&obj_file, &mut sections)?;
     let common = common_symbols(arch.as_ref(), &obj_file, split_meta.as_ref())?;
-    Ok(ObjInfo {
-        arch,
-        path: obj_path.to_owned(),
-        timestamp,
-        sections,
-        common,
-        line_info: line_info(&obj_file)?,
-        split_meta,
-    })
+    Ok(ObjInfo { arch, path: obj_path.to_owned(), timestamp, sections, common, split_meta })
 }
 
 pub fn has_function(obj_path: &Path, symbol_name: &str) -> Result<bool> {

Original file line number	Diff line number	Diff line change
`@@ -112,10 +112,7 @@ impl ObjArch for ObjArchMips {`
`112`	`112`	`}`
`113`	`113`	`}`
`114`	`114`	`}`
`115`		`- let line = obj`
`116`		`- .line_info`
`117`		`- .as_ref()`
`118`		`- .and_then(\|map\| map.range(..=cur_addr as u64).last().map(\|(_, &b)\| b));`
	`115`	`+ let line = section.line_info.range(..=cur_addr as u64).last().map(\|(_, &b)\| b);`
`119`	`116`	`insts.push(ObjIns {`
`120`	`117`	`address: cur_addr as u64,`
`121`	`118`	`size: 4,`