diff --git a/elftools/dwarf/callframe.py b/elftools/dwarf/callframe.py index 8fa9990f..2caaf2f7 100644 --- a/elftools/dwarf/callframe.py +++ b/elftools/dwarf/callframe.py @@ -6,7 +6,7 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -import copy +import copy, os from collections import namedtuple from ..common.utils import ( struct_parse, dwarf_assert, preserve_stream_pos, iterbytes) @@ -84,10 +84,13 @@ def _parse_entries(self): def _parse_entry_at(self, offset): """ Parse an entry from self.stream starting with the given offset. Return the entry object. self.stream will point right after the - entry. + entry (even if pulled from the cache). """ if offset in self._entry_cache: - return self._entry_cache[offset] + entry = self._entry_cache[offset] + self.stream.seek(entry.header.length + + entry.structs.initial_length_field_size(), os.SEEK_CUR) + return entry entry_length = struct_parse( self.base_structs.the_Dwarf_uint32, self.stream, offset) @@ -97,6 +100,9 @@ def _parse_entry_at(self, offset): dwarf_format = 64 if entry_length == 0xFFFFFFFF else 32 + # Theoretically possible to have a DWARF bitness transition here. + # DWARF version doesn't matter (CIEs are versioned separately), endianness can't change. + # The structs are cached though, so no extraneous creation. entry_structs = DWARFStructs( little_endian=self.base_structs.little_endian, dwarf_format=dwarf_format, @@ -124,15 +130,6 @@ def _parse_entry_at(self, offset): else: header = self._parse_fde_header(entry_structs, offset) - - # If this is DWARF version 4 or later, we can have a more precise - # address size, read from the CIE header. - if not self.for_eh_frame and entry_structs.dwarf_version >= 4: - entry_structs = DWARFStructs( - little_endian=entry_structs.little_endian, - dwarf_format=entry_structs.dwarf_format, - address_size=header.address_size) - # If the augmentation string is not empty, hope to find a length field # in order to skip the data specified augmentation. if is_CIE: @@ -161,7 +158,7 @@ def _parse_entry_at(self, offset): entry_structs, self.stream.tell(), end_offset) if is_CIE: - self._entry_cache[offset] = CIE( + entry = CIE( header=header, instructions=instructions, offset=offset, augmentation_dict=aug_dict, augmentation_bytes=aug_bytes, @@ -169,13 +166,14 @@ def _parse_entry_at(self, offset): else: # FDE cie = self._parse_cie_for_fde(offset, header, entry_structs) - self._entry_cache[offset] = FDE( + entry = FDE( header=header, instructions=instructions, offset=offset, structs=entry_structs, cie=cie, augmentation_bytes=aug_bytes, lsda_pointer=lsda_pointer, ) - return self._entry_cache[offset] + self._entry_cache[offset] = entry + return entry def _parse_instructions(self, structs, offset, end_offset): """ Parse a list of CFI instructions from self.stream, starting with diff --git a/elftools/dwarf/descriptions.py b/elftools/dwarf/descriptions.py index 2da46556..a1ebf4b9 100644 --- a/elftools/dwarf/descriptions.py +++ b/elftools/dwarf/descriptions.py @@ -100,6 +100,8 @@ def _full_reg_name(regnum): instr.args[1] * cie['data_alignment_factor']) elif name in ('DW_CFA_def_cfa_offset', 'DW_CFA_GNU_args_size'): s += ' %s: %s\n' % (name, instr.args[0]) + elif name == 'DW_CFA_def_cfa_offset_sf': + s += ' %s: %s\n' % (name, instr.args[0]*entry.cie['data_alignment_factor']) elif name == 'DW_CFA_def_cfa_expression': expr_dumper = ExprDumper(entry.structs) # readelf output is missing a colon for DW_CFA_def_cfa_expression @@ -618,7 +620,7 @@ def _init_lookups(self): for n in range(0, 32): self._ops_with_decimal_arg.add('DW_OP_breg%s' % n) - self._ops_with_two_decimal_args = set(['DW_OP_bregx', 'DW_OP_bit_piece']) + self._ops_with_two_decimal_args = set(['DW_OP_bregx']) self._ops_with_hex_arg = set( ['DW_OP_addr', 'DW_OP_call2', 'DW_OP_call4', 'DW_OP_call_ref']) @@ -674,5 +676,7 @@ def _dump_to_string(self, opcode, opcode_name, args, cu_offset=None): return "%s: <0x%x> %d byte block: %s " % (opcode_name, args[0] + cu_offset, len(args[1]), ' '.join("%x" % b for b in args[1])) elif opcode_name in ('DW_OP_GNU_regval_type', 'DW_OP_regval_type'): return "%s: %d (%s) <0x%x>" % (opcode_name, args[0], describe_reg_name(args[0], _MACHINE_ARCH), args[1] + cu_offset) + elif opcode_name == 'DW_OP_bit_piece': + return '%s: size: %s offset: %s' % (opcode_name, args[0], args[1]) else: return '' % opcode_name diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index df6075e8..75c1e797 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -449,23 +449,19 @@ def _create_callframe_entry_headers(self): self.Dwarf_offset('CIE_id'), self.Dwarf_uint8('version'), CString('augmentation'), + If(lambda ctx: ctx.version >= 4, self.Dwarf_uint8('address_size')), + If(lambda ctx: ctx.version >= 4, self.Dwarf_uint8('segment_size')), self.Dwarf_uleb128('code_alignment_factor'), self.Dwarf_sleb128('data_alignment_factor'), - self.Dwarf_uleb128('return_address_register')) + IfThenElse('return_address_register', lambda ctx: ctx.version > 1, + self.Dwarf_uleb128(''), + self.Dwarf_uint8(''))) self.EH_CIE_header = self.Dwarf_CIE_header - # The CIE header was modified in DWARFv4. - if self.dwarf_version == 4: - self.Dwarf_CIE_header = Struct('Dwarf_CIE_header', - self.Dwarf_initial_length('length'), - self.Dwarf_offset('CIE_id'), - self.Dwarf_uint8('version'), - CString('augmentation'), - self.Dwarf_uint8('address_size'), - self.Dwarf_uint8('segment_size'), - self.Dwarf_uleb128('code_alignment_factor'), - self.Dwarf_sleb128('data_alignment_factor'), - self.Dwarf_uleb128('return_address_register')) + # The CIE header was modified in DWARFv4, but the + # CIE header version is driven by the version # in the header + # itself, independent of the DWARF version + # in the CUs. self.Dwarf_FDE_header = Struct('Dwarf_FDE_header', self.Dwarf_initial_length('length'), diff --git a/elftools/elf/segments.py b/elftools/elf/segments.py index 0c318e17..9ddda235 100644 --- a/elftools/elf/segments.py +++ b/elftools/elf/segments.py @@ -69,9 +69,14 @@ def section_in_segment(self, section): # The third condition is the 'strict' one - an empty section will # not match at the very end of the segment (unless the segment is # also zero size, which is handled by the second condition). + + # Seva 2024-07-12: a zero length section at a zero offset + # in a zero length segment should match - in GNU readelf, p_memsz + # is unsigned, on a zero length segment p_memsz-1 wraps around + # and the third condition matches. if not (secaddr >= vaddr and secaddr - vaddr + section['sh_size'] <= self['p_memsz'] and - secaddr - vaddr <= self['p_memsz'] - 1): + (self['p_memsz'] == 0 or secaddr - vaddr <= self['p_memsz'] - 1)): return False # If we've come this far and it's a NOBITS section, it's in the segment @@ -83,9 +88,10 @@ def section_in_segment(self, section): # Same logic as with secaddr vs. vaddr checks above, just on offsets in # the file + # Seva 2024-07-12: similar discrepancy with readelf from unsignedness of p_filesz return (secoffset >= poffset and secoffset - poffset + section['sh_size'] <= self['p_filesz'] and - secoffset - poffset <= self['p_filesz'] - 1) + (self['p_filesz'] == 0 or secoffset - poffset <= self['p_filesz'] - 1)) class InterpSegment(Segment): diff --git a/scripts/dwarfdump.py b/scripts/dwarfdump.py index 6d46ba59..d92f18be 100644 --- a/scripts/dwarfdump.py +++ b/scripts/dwarfdump.py @@ -84,6 +84,8 @@ def _safe_DIE_linkage_name(die, default=None): def _desc_ref(attr, die, extra=''): if extra: extra = " \"%s\"" % extra + # TODO: leading zeros on the addend to CU - sometimes present, sometimes not. + # Check by the LLVM sources. return "cu + 0x%04x => {0x%08x}%s" % ( attr.raw_value, die.cu.cu_offset + attr.raw_value, @@ -99,7 +101,7 @@ def _desc_strx(attr, die): return "indexed (%08x) string = \"%s\"" % (attr.raw_value, bytes2str(attr.value).replace("\\", "\\\\")) FORM_DESCRIPTIONS = dict( - DW_FORM_string=lambda attr, die: "\"%s\"" % (bytes2str(attr.value),), + DW_FORM_string=lambda attr, die: "\"%s\"" % (bytes2str(attr.value).replace("\\", "\\\\"),), DW_FORM_strp=lambda attr, die: " .debug_str[0x%08x] = \"%s\"" % (attr.raw_value, bytes2str(attr.value).replace("\\", "\\\\")), DW_FORM_strx1=_desc_strx, DW_FORM_strx2=_desc_strx, @@ -391,7 +393,10 @@ def dump_info(self): '(0x%08x)' % die.get_parent().offset if die.get_parent() is not None else '')) for attr_name in die.attributes: attr = die.attributes[attr_name] - self._emitline(" %s [%s] (%s)" % (attr_name, attr.form, self.describe_attr_value(die, attr))) + self._emitline(" %s [%s] (%s)" % ( + attr_name if isinstance(attr_name, str) else "DW_AT_unknown_%x" % (attr_name,), + attr.form, + self.describe_attr_value(die, attr))) else: self._emitline("0x%08x: NULL" % (die.offset,)) parent = die.get_parent() diff --git a/scripts/readelf.py b/scripts/readelf.py index 32496eb9..2a7d49c8 100755 --- a/scripts/readelf.py +++ b/scripts/readelf.py @@ -1282,6 +1282,9 @@ def _dump_frames_info(self, section, cfi_entries): self._format_hex(entry['CIE_id'], fieldsize=8, lead0x=False))) self._emitline(' Version: %d' % entry['version']) self._emitline(' Augmentation: "%s"' % bytes2str(entry['augmentation'])) + if(entry['version'] >= 4): + self._emitline(' Pointer Size: %d' % entry['address_size']) + self._emitline(' Segment Size: %d' % entry['segment_size']) self._emitline(' Code alignment factor: %u' % entry['code_alignment_factor']) self._emitline(' Data alignment factor: %d' % entry['data_alignment_factor']) self._emitline(' Return address column: %d' % entry['return_address_register']) @@ -1293,9 +1296,11 @@ def _dump_frames_info(self, section, cfi_entries): self._emitline() elif isinstance(entry, FDE): + # Readelf bug #31973 + length = entry['length'] if entry.cie.offset < entry.offset else entry.cie['length'] self._emitline('\n%08x %s %s FDE cie=%08x pc=%s..%s' % ( entry.offset, - self._format_hex(entry['length'], fullhex=True, lead0x=False), + self._format_hex(length, fullhex=True, lead0x=False), self._format_hex(entry['CIE_pointer'], fieldsize=8, lead0x=False), entry.cie.offset, self._format_hex(entry['initial_location'], fullhex=True, lead0x=False), @@ -1428,9 +1433,11 @@ def _dump_frames_interp_info(self, section, cfi_entries): ra_regnum = entry['return_address_register'] elif isinstance(entry, FDE): + # Readelf bug #31973 - FDE length misreported if FDE precedes its CIE + length = entry['length'] if entry.cie.offset < entry.offset else entry.cie['length'] self._emitline('\n%08x %s %s FDE cie=%08x pc=%s..%s' % ( entry.offset, - self._format_hex(entry['length'], fullhex=True, lead0x=False), + self._format_hex(length, fullhex=True, lead0x=False), self._format_hex(entry['CIE_pointer'], fieldsize=8, lead0x=False), entry.cie.offset, self._format_hex(entry['initial_location'], fullhex=True, lead0x=False), diff --git a/test/run_readelf_tests.py b/test/run_readelf_tests.py index c1fc48c2..9c6747fb 100755 --- a/test/run_readelf_tests.py +++ b/test/run_readelf_tests.py @@ -84,6 +84,15 @@ def run_test_on_file(filename, verbose=False, opt=None): testlog.info('.......................SKIPPED') continue + # TODO(sevaa): excluding the binary with CIE ahead of FDE until binutils' bug #31975 is fixed + if "dwarf_v4cie" in filename and option == "--debug-dump=frames-interp": + continue + + # TODO(sevaa): excluding the binary with unaligned aranges entries. Readelf tried to recover + # but produces nonsensical output, but ultimately it's a toolchain bug (in IAR I presume). + if "dwarf_v4cie" in filename and option == "--debug-dump=aranges": + continue + # sevaa says: there is another shorted out test; in dwarf_lineprogramv5.elf, the two bytes at 0x2072 were # patched from 0x07 0x10 to 00 00. # Those represented the second instruction in the first FDE in .eh_frame. This changed the instruction diff --git a/test/test_dwarf_expr.py b/test/test_dwarf_expr.py index 93dc30f8..0d228d83 100644 --- a/test/test_dwarf_expr.py +++ b/test/test_dwarf_expr.py @@ -38,7 +38,8 @@ def test_basic_single(self): 'DW_OP_regx: 16 (rip)') self.assertEqual(self.visitor.dump_expr([0x9d, 0x8f, 0x0A, 0x90, 0x01]), - 'DW_OP_bit_piece: 1295 144') + # Explaining the arguments is what the latest readelf does + 'DW_OP_bit_piece: size: 1295 offset: 144') self.assertEqual(self.visitor.dump_expr([0x0e, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00]), 'DW_OP_const8u: 71777214294589695') diff --git a/test/testfiles_for_readelf/dwarf_v4cie.elf b/test/testfiles_for_readelf/dwarf_v4cie.elf new file mode 100644 index 00000000..f3cc7b87 Binary files /dev/null and b/test/testfiles_for_readelf/dwarf_v4cie.elf differ