Skip to content

Commit

Permalink
Compressed sections in PE
Browse files Browse the repository at this point in the history
  • Loading branch information
sevaa committed Aug 18, 2024
1 parent 4cf1063 commit 2f2fa15
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 17 deletions.
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ debugging information in executable files, built on top of [pyelftools](https://
- Mach-O (MacOS X, iOS)
- PE (Windows, Cygwin)
- WASM (aka WebAssembly)
- ar (.a files, Linux/Unix/MacOS X static libraries)

This project came from my desire to see and navigate the DWARF tree of compiled Android and iOS binaries. Seeing the DIEs is easy enough with utilities like `readelf` or `dwarfdump`. However, chasing inter-DIE references back and forth is not straightforward with those.

Expand All @@ -16,6 +17,8 @@ Note that regular Windows executables (EXE/DLL files) are PE files but don't, as

The pyelftools library that dwex is based on supports DWARF versions 2-5, and so does dwex. DWARFv5 support might be unstable. DWARF v1 is supported experimentally, in ELF files only.

There is a known issue with incorrect parsing of DWARF in .o files and static libraries that contain them. See [eliben/pyelftools#564](https://github.com/eliben/pyelftools/issues/564). Mach-O fat binary object files inside static libraries are not supported.

Requirements and Dependencies
------------
- Python 3.6.1+
Expand Down Expand Up @@ -49,7 +52,7 @@ On the most basic level, the debug information in a compiled file is an array of

The UI of DWARF Explorer was meant for eyeballing that data structure:

![dwex](https://user-images.githubusercontent.com/5807738/77756810-510ad300-7006-11ea-8d97-b7c109d050b1.png)
![image](https://github.com/user-attachments/assets/2c2f426a-be59-437d-98bb-1520231641f5)

The left hand tree displays the DIEs, with CU root DIEs on the top level. Expand the tree and click on DIEs to see their attributes. DIE attributes that have a substructure or point at larger data structures are clickable.

Expand All @@ -69,3 +72,8 @@ Prior art

There is also a GUI DWARF visualizer at [simark/dwarftree](https://github.com/simark/dwarftree). Also based on pyelftools,
with gtk based UI. It's been inactive since 2015. I didn't know about it when I started.

Pairs well with
---------------

For a free general purpose GUI ELF file visualizer, see [horsicq/XELFViewer](https://github.com/horsicq/XELFViewer).
2 changes: 1 addition & 1 deletion dwex/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from .locals import LocalsDlg

# Sync with version in setup.py
version = (3, 26)
version = (4, 0)

# TODO:
# On MacOS, start without a main window, instead show the Open dialog
Expand Down
49 changes: 35 additions & 14 deletions dwex/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,43 @@
from elftools.dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig
# This doesn't depend on Qt
# The dependency on filebytes only lives here
# Format codes: 0 = ELF, 1 = MACHO, 2 = PE, 3 - WASM, 4 - ELF inside A, 1 - MachO inside A
# Format codes: 0 = ELF, 1 = MACHO, 2 = PE, 3 - WASM, 4 - ELF inside A, 5 - arch specific MachO inside A

class FormatError(Exception):
def __init__(self, s):
Exception.__init__(self, s)

def read_pe(filename):
from filebytes.pe import PE, IMAGE_FILE_MACHINE
import struct, zlib

pefile = PE(filename)
# TODO: debug import section in b.exe

# Section's real size might be padded - see https://github.com/sashs/filebytes/issues/28
sections = [(section.name, section,
sections = [(section.name if section.name[1] != 'z' else '.' + section.name[2:],
section.name[1] == 'z',
section,
section.header.PhysicalAddress_or_VirtualSize,
section.header.SizeOfRawData)
for section in pefile.sections
if section.name.startswith('.debug')]

data = {name: DebugSectionDescriptor(io.BytesIO(section.bytes), name, None,
raw_size if virtual_size == 0 else min((raw_size, virtual_size)), 0)
for (name, section, virtual_size, raw_size) in sections}
if section.name.startswith('.debug') or section.name.startswith('.zdebug')]

def read_section(name, is_compressed, section, virtual_size, raw_size):
data = section.bytes
size = raw_size if virtual_size == 0 else min((raw_size, virtual_size))
if is_compressed:
if size < 12:
raise FormatError("Compressesed section %s is unexpectedly short." % (name,))
if data[0:4] != b'ZLIB':
raise FormatError("Unsupported format in compressesed section %s, ZLIB is expected." % (name,))
(size,) = struct.unpack('>Q', data[4:12])
data = zlib.decompress(data[12:])
if len(data) != size:
raise FormatError("Wrong uncompressed size in compressesed section %s: expected %d, got %d." % (name, size, len(data)))
return DebugSectionDescriptor(io.BytesIO(data), name, None, size, 0)

data = {sec[0]: read_section(*sec) for sec in sections}

if not '.debug_info' in data:
return None
Expand Down Expand Up @@ -249,7 +265,7 @@ def read_elf(file, filename):
_ar_file_header = namedtuple('ARHeader', ('header_offset', 'data_offset',
'name',
# Don't care for the metadata
#'mod', 'uid', 'gid', 'mode',
#'last_mod_date', 'user_id', 'group_id', 'mode',
'size'))

# resolve_slice takes a list of files in the archive, and returns
Expand All @@ -268,7 +284,7 @@ def read_header():
name = file.read(name_len).rstrip(b'\0')
data_size -= name_len
# Resolve GNU style long file names
if name.startswith(b'/') and len(name) > 1 and ord(b'0') <= name[1] <= ord(b'9'):
elif name.startswith(b'/') and len(name) > 1 and ord(b'0') <= name[1] <= ord(b'9'):
if not long_names:
FormatError("Long file name in a static library, but no long name section was found.")
str_offset = int(name[1:])
Expand All @@ -280,7 +296,7 @@ def read_header():
#int(b[34:40]), int(b[40:48], 8),
data_size)

# Not used. Just in case.
# Not used. Just in case. GNU symtab only.
def read_symtab(size, is64):
ilen = 8 if is64 else 4
length = int.from_bytes(file.read(ilen), 'big')
Expand All @@ -289,15 +305,20 @@ def read_symtab(size, is64):
d = file.read(size - (length+1)*ilen)
symbols = d.split(b'\0')[:-1]
return zip(offsets, symbols)

def skip_content(header):
file.seek(((header.size + 1) // 2) * 2, os.SEEK_CUR)

############################
# read_staticlib starts here
file.seek(0, os.SEEK_END)
size = file.tell()
file.seek(8) # Past the magic signature

# First section most likely a symtab - skip
header = read_header()
if header.name == b'/' or header.name == b'/SYM64/' or header.name == b'__.SYMDEF':
file.seek(((header.size + 1) // 2) * 2, os.SEEK_CUR)
skip_content(header)
# read_symtab(header.size, header.name == b'/SYM64/')
# if header.size % 2 == 1:
# file.seek(1, os.SEEK_CUR)
Expand All @@ -318,11 +339,11 @@ def read_symtab(size, is64):
while file.tell() < size:
header = read_header()
headers.append(header)
file.seek(((header.size + 1) // 2) * 2, os.SEEK_CUR)
skip_content(header)

# Present the user with slice choice
# TODO: encoding?
names = list(h.name.rstrip(b'/').decode('ASCII') for h in headers)
names = tuple(h.name.rstrip(b'/').decode('ASCII') for h in headers)
slice = resolve_slice(names, 'Static Library', 'Choose an object file:')
if slice is None:
return False # Cancellation
Expand All @@ -338,7 +359,7 @@ def read_symtab(size, is64):
macho = MachO(None, b)
di = get_macho_dwarf(macho, None)
elif b[:4] == b'\xCA\xFE\xBA\xBE':
raise FormatError("The selected slice of the static library is a MachO fat binary. Those are not supported. Let the author know.")
raise FormatError("The selected slice of the static library is a Mach-O fat binary. Those are not supported. Let the author know.")
else:
raise FormatError("The selected slice of the static library is not a supported object file. Let the author know.")

Expand Down
4 changes: 4 additions & 0 deletions dwex/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import elftools.dwarf.locationlists
import elftools.elf.elffile
import elftools.dwarf.dwarfinfo
import filebytes.pe
from elftools.common.utils import struct_parse
from elftools.common.exceptions import DWARFError
from elftools.dwarf.descriptions import _DESCR_DW_CC
Expand Down Expand Up @@ -205,3 +206,6 @@ def _create_structs(self):
# Fix for #1588
elftools.dwarf.enums.ENUM_DW_LNCT['DW_LNCT_LLVM_source'] = 0x2001
elftools.dwarf.enums.ENUM_DW_LNCT['DW_LNCT_LLVM_is_MD5'] = 0x2002

# Short out import directory parsing for now
filebytes.pe.PE._parseDataDirectory = lambda self,a,b,c: None
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def run(self):

setup(
name='dwex',
version='3.26', # Sync with version in __main__
version='4.0', # Sync with version in __main__
packages=['dwex'],
url="https://github.com/sevaa/dwex/",
entry_points={"gui_scripts": ["dwex = dwex.__main__:main"]},
Expand Down

0 comments on commit 2f2fa15

Please sign in to comment.