Skip to content

Commit

Permalink
CIEv4 and FDE ahead of its CIE (#563)
Browse files Browse the repository at this point in the history
* CIEv4 and FDE ahead of its CIE

* Comment
  • Loading branch information
sevaa authored Jul 16, 2024
1 parent a86debb commit 0f72cb7
Show file tree
Hide file tree
Showing 9 changed files with 62 additions and 36 deletions.
28 changes: 13 additions & 15 deletions elftools/dwarf/callframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# Eli Bendersky ([email protected])
# This code is in the public domain
#-------------------------------------------------------------------------------
import copy
import copy, os
from collections import namedtuple
from ..common.utils import (
struct_parse, dwarf_assert, preserve_stream_pos, iterbytes)
Expand Down Expand Up @@ -84,10 +84,13 @@ def _parse_entries(self):
def _parse_entry_at(self, offset):
""" Parse an entry from self.stream starting with the given offset.
Return the entry object. self.stream will point right after the
entry.
entry (even if pulled from the cache).
"""
if offset in self._entry_cache:
return self._entry_cache[offset]
entry = self._entry_cache[offset]
self.stream.seek(entry.header.length +
entry.structs.initial_length_field_size(), os.SEEK_CUR)
return entry

entry_length = struct_parse(
self.base_structs.the_Dwarf_uint32, self.stream, offset)
Expand All @@ -97,6 +100,9 @@ def _parse_entry_at(self, offset):

dwarf_format = 64 if entry_length == 0xFFFFFFFF else 32

# Theoretically possible to have a DWARF bitness transition here.
# DWARF version doesn't matter (CIEs are versioned separately), endianness can't change.
# The structs are cached though, so no extraneous creation.
entry_structs = DWARFStructs(
little_endian=self.base_structs.little_endian,
dwarf_format=dwarf_format,
Expand Down Expand Up @@ -124,15 +130,6 @@ def _parse_entry_at(self, offset):
else:
header = self._parse_fde_header(entry_structs, offset)


# If this is DWARF version 4 or later, we can have a more precise
# address size, read from the CIE header.
if not self.for_eh_frame and entry_structs.dwarf_version >= 4:
entry_structs = DWARFStructs(
little_endian=entry_structs.little_endian,
dwarf_format=entry_structs.dwarf_format,
address_size=header.address_size)

# If the augmentation string is not empty, hope to find a length field
# in order to skip the data specified augmentation.
if is_CIE:
Expand Down Expand Up @@ -161,21 +158,22 @@ def _parse_entry_at(self, offset):
entry_structs, self.stream.tell(), end_offset)

if is_CIE:
self._entry_cache[offset] = CIE(
entry = CIE(
header=header, instructions=instructions, offset=offset,
augmentation_dict=aug_dict,
augmentation_bytes=aug_bytes,
structs=entry_structs)

else: # FDE
cie = self._parse_cie_for_fde(offset, header, entry_structs)
self._entry_cache[offset] = FDE(
entry = FDE(
header=header, instructions=instructions, offset=offset,
structs=entry_structs, cie=cie,
augmentation_bytes=aug_bytes,
lsda_pointer=lsda_pointer,
)
return self._entry_cache[offset]
self._entry_cache[offset] = entry
return entry

def _parse_instructions(self, structs, offset, end_offset):
""" Parse a list of CFI instructions from self.stream, starting with
Expand Down
6 changes: 5 additions & 1 deletion elftools/dwarf/descriptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ def _full_reg_name(regnum):
instr.args[1] * cie['data_alignment_factor'])
elif name in ('DW_CFA_def_cfa_offset', 'DW_CFA_GNU_args_size'):
s += ' %s: %s\n' % (name, instr.args[0])
elif name == 'DW_CFA_def_cfa_offset_sf':
s += ' %s: %s\n' % (name, instr.args[0]*entry.cie['data_alignment_factor'])
elif name == 'DW_CFA_def_cfa_expression':
expr_dumper = ExprDumper(entry.structs)
# readelf output is missing a colon for DW_CFA_def_cfa_expression
Expand Down Expand Up @@ -618,7 +620,7 @@ def _init_lookups(self):
for n in range(0, 32):
self._ops_with_decimal_arg.add('DW_OP_breg%s' % n)

self._ops_with_two_decimal_args = set(['DW_OP_bregx', 'DW_OP_bit_piece'])
self._ops_with_two_decimal_args = set(['DW_OP_bregx'])

self._ops_with_hex_arg = set(
['DW_OP_addr', 'DW_OP_call2', 'DW_OP_call4', 'DW_OP_call_ref'])
Expand Down Expand Up @@ -674,5 +676,7 @@ def _dump_to_string(self, opcode, opcode_name, args, cu_offset=None):
return "%s: <0x%x> %d byte block: %s " % (opcode_name, args[0] + cu_offset, len(args[1]), ' '.join("%x" % b for b in args[1]))
elif opcode_name in ('DW_OP_GNU_regval_type', 'DW_OP_regval_type'):
return "%s: %d (%s) <0x%x>" % (opcode_name, args[0], describe_reg_name(args[0], _MACHINE_ARCH), args[1] + cu_offset)
elif opcode_name == 'DW_OP_bit_piece':
return '%s: size: %s offset: %s' % (opcode_name, args[0], args[1])
else:
return '<unknown %s>' % opcode_name
22 changes: 9 additions & 13 deletions elftools/dwarf/structs.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,23 +449,19 @@ def _create_callframe_entry_headers(self):
self.Dwarf_offset('CIE_id'),
self.Dwarf_uint8('version'),
CString('augmentation'),
If(lambda ctx: ctx.version >= 4, self.Dwarf_uint8('address_size')),
If(lambda ctx: ctx.version >= 4, self.Dwarf_uint8('segment_size')),
self.Dwarf_uleb128('code_alignment_factor'),
self.Dwarf_sleb128('data_alignment_factor'),
self.Dwarf_uleb128('return_address_register'))
IfThenElse('return_address_register', lambda ctx: ctx.version > 1,
self.Dwarf_uleb128(''),
self.Dwarf_uint8('')))
self.EH_CIE_header = self.Dwarf_CIE_header

# The CIE header was modified in DWARFv4.
if self.dwarf_version == 4:
self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
self.Dwarf_initial_length('length'),
self.Dwarf_offset('CIE_id'),
self.Dwarf_uint8('version'),
CString('augmentation'),
self.Dwarf_uint8('address_size'),
self.Dwarf_uint8('segment_size'),
self.Dwarf_uleb128('code_alignment_factor'),
self.Dwarf_sleb128('data_alignment_factor'),
self.Dwarf_uleb128('return_address_register'))
# The CIE header was modified in DWARFv4, but the
# CIE header version is driven by the version # in the header
# itself, independent of the DWARF version
# in the CUs.

self.Dwarf_FDE_header = Struct('Dwarf_FDE_header',
self.Dwarf_initial_length('length'),
Expand Down
10 changes: 8 additions & 2 deletions elftools/elf/segments.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,14 @@ def section_in_segment(self, section):
# The third condition is the 'strict' one - an empty section will
# not match at the very end of the segment (unless the segment is
# also zero size, which is handled by the second condition).

# Seva 2024-07-12: a zero length section at a zero offset
# in a zero length segment should match - in GNU readelf, p_memsz
# is unsigned, on a zero length segment p_memsz-1 wraps around
# and the third condition matches.
if not (secaddr >= vaddr and
secaddr - vaddr + section['sh_size'] <= self['p_memsz'] and
secaddr - vaddr <= self['p_memsz'] - 1):
(self['p_memsz'] == 0 or secaddr - vaddr <= self['p_memsz'] - 1)):
return False

# If we've come this far and it's a NOBITS section, it's in the segment
Expand All @@ -83,9 +88,10 @@ def section_in_segment(self, section):

# Same logic as with secaddr vs. vaddr checks above, just on offsets in
# the file
# Seva 2024-07-12: similar discrepancy with readelf from unsignedness of p_filesz
return (secoffset >= poffset and
secoffset - poffset + section['sh_size'] <= self['p_filesz'] and
secoffset - poffset <= self['p_filesz'] - 1)
(self['p_filesz'] == 0 or secoffset - poffset <= self['p_filesz'] - 1))


class InterpSegment(Segment):
Expand Down
9 changes: 7 additions & 2 deletions scripts/dwarfdump.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ def _safe_DIE_linkage_name(die, default=None):
def _desc_ref(attr, die, extra=''):
if extra:
extra = " \"%s\"" % extra
# TODO: leading zeros on the addend to CU - sometimes present, sometimes not.
# Check by the LLVM sources.
return "cu + 0x%04x => {0x%08x}%s" % (
attr.raw_value,
die.cu.cu_offset + attr.raw_value,
Expand All @@ -99,7 +101,7 @@ def _desc_strx(attr, die):
return "indexed (%08x) string = \"%s\"" % (attr.raw_value, bytes2str(attr.value).replace("\\", "\\\\"))

FORM_DESCRIPTIONS = dict(
DW_FORM_string=lambda attr, die: "\"%s\"" % (bytes2str(attr.value),),
DW_FORM_string=lambda attr, die: "\"%s\"" % (bytes2str(attr.value).replace("\\", "\\\\"),),
DW_FORM_strp=lambda attr, die: " .debug_str[0x%08x] = \"%s\"" % (attr.raw_value, bytes2str(attr.value).replace("\\", "\\\\")),
DW_FORM_strx1=_desc_strx,
DW_FORM_strx2=_desc_strx,
Expand Down Expand Up @@ -391,7 +393,10 @@ def dump_info(self):
'(0x%08x)' % die.get_parent().offset if die.get_parent() is not None else ''))
for attr_name in die.attributes:
attr = die.attributes[attr_name]
self._emitline(" %s [%s] (%s)" % (attr_name, attr.form, self.describe_attr_value(die, attr)))
self._emitline(" %s [%s] (%s)" % (
attr_name if isinstance(attr_name, str) else "DW_AT_unknown_%x" % (attr_name,),
attr.form,
self.describe_attr_value(die, attr)))
else:
self._emitline("0x%08x: NULL" % (die.offset,))
parent = die.get_parent()
Expand Down
11 changes: 9 additions & 2 deletions scripts/readelf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1282,6 +1282,9 @@ def _dump_frames_info(self, section, cfi_entries):
self._format_hex(entry['CIE_id'], fieldsize=8, lead0x=False)))
self._emitline(' Version: %d' % entry['version'])
self._emitline(' Augmentation: "%s"' % bytes2str(entry['augmentation']))
if(entry['version'] >= 4):
self._emitline(' Pointer Size: %d' % entry['address_size'])
self._emitline(' Segment Size: %d' % entry['segment_size'])
self._emitline(' Code alignment factor: %u' % entry['code_alignment_factor'])
self._emitline(' Data alignment factor: %d' % entry['data_alignment_factor'])
self._emitline(' Return address column: %d' % entry['return_address_register'])
Expand All @@ -1293,9 +1296,11 @@ def _dump_frames_info(self, section, cfi_entries):
self._emitline()

elif isinstance(entry, FDE):
# Readelf bug #31973
length = entry['length'] if entry.cie.offset < entry.offset else entry.cie['length']
self._emitline('\n%08x %s %s FDE cie=%08x pc=%s..%s' % (
entry.offset,
self._format_hex(entry['length'], fullhex=True, lead0x=False),
self._format_hex(length, fullhex=True, lead0x=False),
self._format_hex(entry['CIE_pointer'], fieldsize=8, lead0x=False),
entry.cie.offset,
self._format_hex(entry['initial_location'], fullhex=True, lead0x=False),
Expand Down Expand Up @@ -1428,9 +1433,11 @@ def _dump_frames_interp_info(self, section, cfi_entries):
ra_regnum = entry['return_address_register']

elif isinstance(entry, FDE):
# Readelf bug #31973 - FDE length misreported if FDE precedes its CIE
length = entry['length'] if entry.cie.offset < entry.offset else entry.cie['length']
self._emitline('\n%08x %s %s FDE cie=%08x pc=%s..%s' % (
entry.offset,
self._format_hex(entry['length'], fullhex=True, lead0x=False),
self._format_hex(length, fullhex=True, lead0x=False),
self._format_hex(entry['CIE_pointer'], fieldsize=8, lead0x=False),
entry.cie.offset,
self._format_hex(entry['initial_location'], fullhex=True, lead0x=False),
Expand Down
9 changes: 9 additions & 0 deletions test/run_readelf_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,15 @@ def run_test_on_file(filename, verbose=False, opt=None):
testlog.info('.......................SKIPPED')
continue

# TODO(sevaa): excluding the binary with CIE ahead of FDE until binutils' bug #31975 is fixed
if "dwarf_v4cie" in filename and option == "--debug-dump=frames-interp":
continue

# TODO(sevaa): excluding the binary with unaligned aranges entries. Readelf tried to recover
# but produces nonsensical output, but ultimately it's a toolchain bug (in IAR I presume).
if "dwarf_v4cie" in filename and option == "--debug-dump=aranges":
continue

# sevaa says: there is another shorted out test; in dwarf_lineprogramv5.elf, the two bytes at 0x2072 were
# patched from 0x07 0x10 to 00 00.
# Those represented the second instruction in the first FDE in .eh_frame. This changed the instruction
Expand Down
3 changes: 2 additions & 1 deletion test/test_dwarf_expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ def test_basic_single(self):
'DW_OP_regx: 16 (rip)')

self.assertEqual(self.visitor.dump_expr([0x9d, 0x8f, 0x0A, 0x90, 0x01]),
'DW_OP_bit_piece: 1295 144')
# Explaining the arguments is what the latest readelf does
'DW_OP_bit_piece: size: 1295 offset: 144')

self.assertEqual(self.visitor.dump_expr([0x0e, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00]),
'DW_OP_const8u: 71777214294589695')
Expand Down
Binary file added test/testfiles_for_readelf/dwarf_v4cie.elf
Binary file not shown.

0 comments on commit 0f72cb7

Please sign in to comment.