diff --git a/README.md b/README.md index 1dc5d7c..0f6f787 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ debugging information in executable files, built on top of [pyelftools](https:// - Mach-O (MacOS X, iOS) - PE (Windows, Cygwin) - WASM (aka WebAssembly) + - ar (.a files, Linux/Unix/MacOS X static libraries) This project came from my desire to see and navigate the DWARF tree of compiled Android and iOS binaries. Seeing the DIEs is easy enough with utilities like `readelf` or `dwarfdump`. However, chasing inter-DIE references back and forth is not straightforward with those. @@ -16,6 +17,8 @@ Note that regular Windows executables (EXE/DLL files) are PE files but don't, as The pyelftools library that dwex is based on supports DWARF versions 2-5, and so does dwex. DWARFv5 support might be unstable. DWARF v1 is supported experimentally, in ELF files only. +There is a known issue with incorrect parsing of DWARF in .o files and static libraries that contain them. See [eliben/pyelftools#564](https://github.com/eliben/pyelftools/issues/564). Mach-O fat binary object files inside static libraries are not supported. + Requirements and Dependencies ------------ - Python 3.6.1+ @@ -49,7 +52,7 @@ On the most basic level, the debug information in a compiled file is an array of The UI of DWARF Explorer was meant for eyeballing that data structure: -![dwex](https://user-images.githubusercontent.com/5807738/77756810-510ad300-7006-11ea-8d97-b7c109d050b1.png) +![image](https://github.com/user-attachments/assets/2c2f426a-be59-437d-98bb-1520231641f5) The left hand tree displays the DIEs, with CU root DIEs on the top level. Expand the tree and click on DIEs to see their attributes. DIE attributes that have a substructure or point at larger data structures are clickable. @@ -69,3 +72,8 @@ Prior art There is also a GUI DWARF visualizer at [simark/dwarftree](https://github.com/simark/dwarftree). Also based on pyelftools, with gtk based UI. It's been inactive since 2015. I didn't know about it when I started. + +Pairs well with +--------------- + +For a free general purpose GUI ELF file visualizer, see [horsicq/XELFViewer](https://github.com/horsicq/XELFViewer). diff --git a/dwex/__main__.py b/dwex/__main__.py index 0966071..0d7f774 100644 --- a/dwex/__main__.py +++ b/dwex/__main__.py @@ -11,7 +11,7 @@ from .locals import LocalsDlg # Sync with version in setup.py -version = (3, 26) +version = (4, 0) # TODO: # On MacOS, start without a main window, instead show the Open dialog diff --git a/dwex/formats.py b/dwex/formats.py index c9c059e..afac7e9 100644 --- a/dwex/formats.py +++ b/dwex/formats.py @@ -4,7 +4,7 @@ from elftools.dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig # This doesn't depend on Qt # The dependency on filebytes only lives here -# Format codes: 0 = ELF, 1 = MACHO, 2 = PE, 3 - WASM, 4 - ELF inside A, 1 - MachO inside A +# Format codes: 0 = ELF, 1 = MACHO, 2 = PE, 3 - WASM, 4 - ELF inside A, 5 - arch specific MachO inside A class FormatError(Exception): def __init__(self, s): @@ -12,19 +12,35 @@ def __init__(self, s): def read_pe(filename): from filebytes.pe import PE, IMAGE_FILE_MACHINE + import struct, zlib pefile = PE(filename) + # TODO: debug import section in b.exe # Section's real size might be padded - see https://github.com/sashs/filebytes/issues/28 - sections = [(section.name, section, + sections = [(section.name if section.name[1] != 'z' else '.' + section.name[2:], + section.name[1] == 'z', + section, section.header.PhysicalAddress_or_VirtualSize, section.header.SizeOfRawData) for section in pefile.sections - if section.name.startswith('.debug')] - - data = {name: DebugSectionDescriptor(io.BytesIO(section.bytes), name, None, - raw_size if virtual_size == 0 else min((raw_size, virtual_size)), 0) - for (name, section, virtual_size, raw_size) in sections} + if section.name.startswith('.debug') or section.name.startswith('.zdebug')] + + def read_section(name, is_compressed, section, virtual_size, raw_size): + data = section.bytes + size = raw_size if virtual_size == 0 else min((raw_size, virtual_size)) + if is_compressed: + if size < 12: + raise FormatError("Compressesed section %s is unexpectedly short." % (name,)) + if data[0:4] != b'ZLIB': + raise FormatError("Unsupported format in compressesed section %s, ZLIB is expected." % (name,)) + (size,) = struct.unpack('>Q', data[4:12]) + data = zlib.decompress(data[12:]) + if len(data) != size: + raise FormatError("Wrong uncompressed size in compressesed section %s: expected %d, got %d." % (name, size, len(data))) + return DebugSectionDescriptor(io.BytesIO(data), name, None, size, 0) + + data = {sec[0]: read_section(*sec) for sec in sections} if not '.debug_info' in data: return None @@ -249,7 +265,7 @@ def read_elf(file, filename): _ar_file_header = namedtuple('ARHeader', ('header_offset', 'data_offset', 'name', # Don't care for the metadata - #'mod', 'uid', 'gid', 'mode', + #'last_mod_date', 'user_id', 'group_id', 'mode', 'size')) # resolve_slice takes a list of files in the archive, and returns @@ -268,7 +284,7 @@ def read_header(): name = file.read(name_len).rstrip(b'\0') data_size -= name_len # Resolve GNU style long file names - if name.startswith(b'/') and len(name) > 1 and ord(b'0') <= name[1] <= ord(b'9'): + elif name.startswith(b'/') and len(name) > 1 and ord(b'0') <= name[1] <= ord(b'9'): if not long_names: FormatError("Long file name in a static library, but no long name section was found.") str_offset = int(name[1:]) @@ -280,7 +296,7 @@ def read_header(): #int(b[34:40]), int(b[40:48], 8), data_size) - # Not used. Just in case. + # Not used. Just in case. GNU symtab only. def read_symtab(size, is64): ilen = 8 if is64 else 4 length = int.from_bytes(file.read(ilen), 'big') @@ -289,7 +305,12 @@ def read_symtab(size, is64): d = file.read(size - (length+1)*ilen) symbols = d.split(b'\0')[:-1] return zip(offsets, symbols) + + def skip_content(header): + file.seek(((header.size + 1) // 2) * 2, os.SEEK_CUR) + ############################ + # read_staticlib starts here file.seek(0, os.SEEK_END) size = file.tell() file.seek(8) # Past the magic signature @@ -297,7 +318,7 @@ def read_symtab(size, is64): # First section most likely a symtab - skip header = read_header() if header.name == b'/' or header.name == b'/SYM64/' or header.name == b'__.SYMDEF': - file.seek(((header.size + 1) // 2) * 2, os.SEEK_CUR) + skip_content(header) # read_symtab(header.size, header.name == b'/SYM64/') # if header.size % 2 == 1: # file.seek(1, os.SEEK_CUR) @@ -318,11 +339,11 @@ def read_symtab(size, is64): while file.tell() < size: header = read_header() headers.append(header) - file.seek(((header.size + 1) // 2) * 2, os.SEEK_CUR) + skip_content(header) # Present the user with slice choice # TODO: encoding? - names = list(h.name.rstrip(b'/').decode('ASCII') for h in headers) + names = tuple(h.name.rstrip(b'/').decode('ASCII') for h in headers) slice = resolve_slice(names, 'Static Library', 'Choose an object file:') if slice is None: return False # Cancellation @@ -338,7 +359,7 @@ def read_symtab(size, is64): macho = MachO(None, b) di = get_macho_dwarf(macho, None) elif b[:4] == b'\xCA\xFE\xBA\xBE': - raise FormatError("The selected slice of the static library is a MachO fat binary. Those are not supported. Let the author know.") + raise FormatError("The selected slice of the static library is a Mach-O fat binary. Those are not supported. Let the author know.") else: raise FormatError("The selected slice of the static library is not a supported object file. Let the author know.") diff --git a/dwex/patch.py b/dwex/patch.py index 4b2d43a..43e2b6d 100644 --- a/dwex/patch.py +++ b/dwex/patch.py @@ -6,6 +6,7 @@ import elftools.dwarf.locationlists import elftools.elf.elffile import elftools.dwarf.dwarfinfo +import filebytes.pe from elftools.common.utils import struct_parse from elftools.common.exceptions import DWARFError from elftools.dwarf.descriptions import _DESCR_DW_CC @@ -205,3 +206,6 @@ def _create_structs(self): # Fix for #1588 elftools.dwarf.enums.ENUM_DW_LNCT['DW_LNCT_LLVM_source'] = 0x2001 elftools.dwarf.enums.ENUM_DW_LNCT['DW_LNCT_LLVM_is_MD5'] = 0x2002 + + # Short out import directory parsing for now + filebytes.pe.PE._parseDataDirectory = lambda self,a,b,c: None diff --git a/setup.py b/setup.py index 463cf04..d3fcceb 100644 --- a/setup.py +++ b/setup.py @@ -69,7 +69,7 @@ def run(self): setup( name='dwex', - version='3.26', # Sync with version in __main__ + version='4.0', # Sync with version in __main__ packages=['dwex'], url="https://github.com/sevaa/dwex/", entry_points={"gui_scripts": ["dwex = dwex.__main__:main"]},