mirror of
https://github.com/bminor/binutils-gdb.git
synced 2025-12-13 19:12:59 +00:00
I ran gdb/contrib/dwarf-to-dwarf-assembler.py on a hello world compiled with
gcc 15, and ran into:
...
Traceback (most recent call last):
File "/data/vries/gdb/./src/gdb/contrib/dwarf-to-dwarf-assembler.py", line 642, in <module>
main(sys.argv)
~~~~^^^^^^^^^^
File "/data/vries/gdb/./src/gdb/contrib/dwarf-to-dwarf-assembler.py", line 638, in main
generator.generate()
~~~~~~~~~~~~~~~~~~^^
File "/data/vries/gdb/./src/gdb/contrib/dwarf-to-dwarf-assembler.py", line 610, in generate
self.generate_die(die, indent_count)
~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^
File "/data/vries/gdb/./src/gdb/contrib/dwarf-to-dwarf-assembler.py", line 589, in generate_die
die_lines = die.format(self.dwarf_parser.offset_to_die, indent_count)
File "/data/vries/gdb/./src/gdb/contrib/dwarf-to-dwarf-assembler.py", line 279, in format
return "\n".join(self.format_lines(offset_die_lookup, indent_count))
~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/vries/gdb/./src/gdb/contrib/dwarf-to-dwarf-assembler.py", line 376, in format_lines
inner_lines = super().format_lines(offset_die_lookup, indent_count + 1)
File "/data/vries/gdb/./src/gdb/contrib/dwarf-to-dwarf-assembler.py", line 251, in format_lines
attr_line = attr.format(
offset_die_lookup, indent_count=indent_count + 1
)
File "/data/vries/gdb/./src/gdb/contrib/dwarf-to-dwarf-assembler.py", line 199, in format
s += self.name + " "
~~~~~~~~~~^~~~~
TypeError: unsupported operand type(s) for +: 'int' and 'str'
...
because of trying to print DWARF v6 attributes DW_AT_language_name (0x90) and
DW_AT_language_version (0x91).
Fix this by printing the number if the name is not known:
...
{DW_AT_0x90 3 DW_FORM_data1}
{DW_AT_0x91 202311 DW_FORM_data4}
...
647 lines
23 KiB
Python
Executable File
647 lines
23 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# pyright: strict
|
|
|
|
# Copyright 2024 Free Software Foundation, Inc.
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
# Due to the pyelftools dependency, this script requires Python version
|
|
# 3.10 or greater to run.
|
|
|
|
"""A utility to convert ELF files with DWARF info to Dwarf::assemble code.
|
|
|
|
Usage:
|
|
python ./asm_to_dwarf_assembler.py <path/to/elf/file>
|
|
|
|
Dependencies:
|
|
Python >= 3.10
|
|
pyelftools >= 0.31
|
|
|
|
Notes:
|
|
- Line tables are not currently supported.
|
|
- Non-contiguous subprograms are not currently supported.
|
|
- If you want to use $srcfile or similar, you must edit the references to the
|
|
file name manually, including DW_AT_name attributes on compile units.
|
|
- If run with binaries generated by make check-gdb, it may include an
|
|
additional compile_unit before and after the actual compile units. This is
|
|
an artifact of the normal compilation process, as these CUs are indeed in
|
|
the generated DWARF in some cases.
|
|
"""
|
|
|
|
import errno
|
|
import re
|
|
import sys
|
|
from copy import copy
|
|
from dataclasses import dataclass
|
|
from datetime import datetime
|
|
from functools import cache
|
|
from io import BytesIO, IOBase
|
|
from logging import getLogger
|
|
from typing import Annotated, Optional
|
|
|
|
from elftools.dwarf.compileunit import CompileUnit as RawCompileUnit
|
|
from elftools.dwarf.die import DIE as RawDIE
|
|
from elftools.dwarf.die import AttributeValue
|
|
from elftools.elf.elffile import ELFFile
|
|
|
|
logger = getLogger(__file__)
|
|
|
|
|
|
# While these aren't supported, their detection is important for replacing them
|
|
# with SPECIAL_expr and for writing the placeholder {MANUAL} expr list.
|
|
EXPR_ATTRIBUTE_FORMS = [
|
|
"DW_FORM_exprloc",
|
|
"DW_FORM_block",
|
|
"DW_FORM_block1",
|
|
"DW_FORM_block2",
|
|
"DW_FORM_block4",
|
|
]
|
|
|
|
|
|
# Workaround for my editor not to freak out over unclosed braces.
|
|
lbrace, rbrace = "{", "}"
|
|
|
|
|
|
@cache
|
|
def get_indent_str(indent_count: int) -> str:
|
|
"""Get whitespace string to prepend to another for indenting."""
|
|
indent = (indent_count // 2) * "\t"
|
|
if indent_count % 2 == 1:
|
|
indent += " "
|
|
return indent
|
|
|
|
|
|
def indent(line: str, indent_count: int) -> str:
|
|
"""Indent line by indent_count levels."""
|
|
return get_indent_str(indent_count) + line
|
|
|
|
|
|
def labelify_str(s: str) -> str:
|
|
"""Make s appropriate for a label name."""
|
|
# Replace "*" with the literal word "ptr".
|
|
s = s.replace("*", "ptr")
|
|
|
|
# Replace any non-"word" characters by "_".
|
|
s = re.sub(r"\W", "_", s)
|
|
|
|
# Remove consecutive "_"s.
|
|
s = re.sub(r"__+", "_", s)
|
|
|
|
return s
|
|
|
|
|
|
class DWARFAttribute:
|
|
"""Storage unit for a single DWARF attribute.
|
|
|
|
All its values are strings that are usually passed on
|
|
directly to format. The exceptions to this are attributes
|
|
with int values with DW_FORM_ref4 or DW_FORM_ref_addr form.
|
|
Their values are interpreted as the global offset of the DIE
|
|
being referenced, which are looked up dynamically to fetch
|
|
their labels.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
die_offset: int,
|
|
name: str,
|
|
value: str | bytes | int | bool,
|
|
form=None,
|
|
):
|
|
self.die_offset = die_offset
|
|
self.name = name
|
|
self.value = value
|
|
self.form = form
|
|
|
|
def _format_expr_value(self) -> str:
|
|
self.form = "SPECIAL_expr"
|
|
return "{ MANUAL: Fill expr list }"
|
|
|
|
def _needs_escaping(self, str_value: str) -> bool:
|
|
charset = set(str_value)
|
|
return bool(charset.intersection({"{", "}", " ", "\t"}))
|
|
|
|
def _format_str(self, str_value: str) -> str:
|
|
if self._needs_escaping(str_value):
|
|
escaped_str = str(str_value)
|
|
# Replace single escape (which is itself escaped because of regex)
|
|
# with a double escape (which doesn't mean anything to regex so
|
|
# it doesn't need escaping).
|
|
escaped_str = re.sub(r"\\", r"\\", escaped_str)
|
|
escaped_str = re.sub("([{}])", r"\\\1", escaped_str)
|
|
return "{" + escaped_str + "}"
|
|
else:
|
|
return str_value
|
|
|
|
def _format_value(
|
|
self, offset_die_lookup: dict[int, "DWARFDIE"], indent_count: int = 0
|
|
) -> str:
|
|
if self.form in EXPR_ATTRIBUTE_FORMS:
|
|
return self._format_expr_value()
|
|
elif isinstance(self.value, bool):
|
|
return str(int(self.value))
|
|
elif isinstance(self.value, int):
|
|
if self.form == "DW_FORM_ref4":
|
|
# ref4-style referencing label.
|
|
die = offset_die_lookup[self.value]
|
|
return ":$" + die.tcl_label
|
|
elif self.form == "DW_FORM_ref_addr":
|
|
# ref_addr-style referencing label.
|
|
die = offset_die_lookup[self.value]
|
|
return "%$" + die.tcl_label
|
|
else:
|
|
return str(self.value)
|
|
elif isinstance(self.value, bytes):
|
|
return self._format_str(self.value.decode("ascii"))
|
|
elif isinstance(self.value, str):
|
|
return self._format_str(self.value)
|
|
else:
|
|
raise NotImplementedError(f"Unknown data type: {type(self.value)}")
|
|
|
|
def format(
|
|
self, offset_die_lookup: dict[int, "DWARFDIE"], indent_count: int = 0
|
|
) -> str:
|
|
"""Format the attribute in the form {name value form}.
|
|
|
|
If form is DW_FORM_exprloc or DW_FORM_block, see next section on
|
|
DWARFOperations.
|
|
|
|
If it isn't, value is formatted as follows:
|
|
If bool, use "1" if True, "0" if False.
|
|
If int:
|
|
If form is DW_FORM_ref4, use ":$label" where label is the
|
|
tcl_label of the DWARFDIE at offset "value".
|
|
If form is DW_FORM_ref_addr, use "%$label" where label is
|
|
the tcl_label of the DWARFDIE at offset "value".
|
|
Else, use value directly.
|
|
If bytes, use value.decode("ascii")
|
|
If str, use value directly.
|
|
Any other type results in a NotImplementedError being raised.
|
|
|
|
Regarding DW_FORM_exprloc and DW_FORM_block:
|
|
The form is replaced with SPECIAL_expr.
|
|
The entries in the value are interpreted and decoded using the
|
|
dwarf_operations dictionary, and replaced with their names where
|
|
applicable.
|
|
"""
|
|
s = lbrace
|
|
if isinstance(self.name, int):
|
|
s += "DW_AT_" + hex(self.name)
|
|
else:
|
|
s += self.name
|
|
s += " "
|
|
s += self._format_value(offset_die_lookup)
|
|
|
|
# Only explicitly state form if it's not a reference.
|
|
if self.form not in [None, "DW_FORM_ref4", "DW_FORM_ref_addr"]:
|
|
s += " " + self.form
|
|
|
|
s += rbrace
|
|
return indent(s, indent_count)
|
|
|
|
|
|
class DWARFDIE:
|
|
"""This script's parsed version of a RawDIE."""
|
|
|
|
def __init__(
|
|
self,
|
|
offset: int,
|
|
tag: str,
|
|
attrs: dict[str, DWARFAttribute],
|
|
tcl_label: Optional[str] = None,
|
|
):
|
|
self.offset: Annotated[int, "Global offset of the DIE."] = offset
|
|
self.tag: Annotated[str, "DWARF tag for this DIE."] = tag
|
|
self.attrs: Annotated[
|
|
dict[str, DWARFAttribute], "Dict of attributes for this DIE."
|
|
] = copy(attrs)
|
|
self.children: Annotated[list[DWARFDIE], "List of child DIEs of this DIE."] = []
|
|
self.tcl_label: Annotated[
|
|
str,
|
|
"Label used by the Tcl code to reference this DIE, if any. These "
|
|
'take the form of "label: " before the actual DIE definition.',
|
|
] = tcl_label
|
|
|
|
def format_lines(
|
|
self, offset_die_lookup: dict[int, "DWARFDIE"], indent_count: int = 0
|
|
) -> list[str]:
|
|
"""Get the list of lines that represent this DIE in Dwarf assembler."""
|
|
die_lines = []
|
|
|
|
# Prepend label to first line, if it's set.
|
|
if self.tcl_label:
|
|
first_line_start = self.tcl_label + ": "
|
|
else:
|
|
first_line_start = ""
|
|
|
|
# First line, including label.
|
|
first_line = indent(first_line_start + self.tag + " " + lbrace, indent_count)
|
|
die_lines.append(first_line)
|
|
|
|
# Format attributes, if any.
|
|
if self.attrs:
|
|
for attr_name, attr in self.attrs.items():
|
|
attr_line = attr.format(
|
|
offset_die_lookup, indent_count=indent_count + 1
|
|
)
|
|
die_lines.append(attr_line)
|
|
die_lines.append(indent(rbrace, indent_count))
|
|
else:
|
|
# Don't create a new line, just append and immediately close the
|
|
# brace on the last line.
|
|
die_lines[-1] += rbrace
|
|
|
|
# Format children, if any.
|
|
if self.children:
|
|
# Only open a new brace if there are any children for the
|
|
# current DIE.
|
|
die_lines[-1] += " " + lbrace
|
|
for child in self.children:
|
|
child_lines = child.format_lines(
|
|
offset_die_lookup, indent_count=indent_count + 1
|
|
)
|
|
die_lines.extend(child_lines)
|
|
die_lines.append(indent(rbrace, indent_count))
|
|
|
|
return die_lines
|
|
|
|
def format(
|
|
self, offset_die_lookup: dict[int, "DWARFDIE"], indent_count: int = 0
|
|
) -> str:
|
|
"""Join result from format_lines into a single str."""
|
|
return "\n".join(self.format_lines(offset_die_lookup, indent_count))
|
|
|
|
def name(self) -> Optional[str]:
|
|
"""Get DW_AT_name (if present) decoded as ASCII."""
|
|
raw_value = self.attrs.get("DW_AT_name")
|
|
if raw_value is None:
|
|
return None
|
|
else:
|
|
return raw_value.value.decode("ascii")
|
|
|
|
def type_name(self) -> str:
|
|
"""Name of Dwarf tag, with the "DW_TAG_" prefix removed."""
|
|
return re.sub("DW_TAG_", "", self.tag)
|
|
|
|
|
|
class DWARFCompileUnit(DWARFDIE):
|
|
"""Wrapper subclass for CU DIEs.
|
|
|
|
This is necessary due to the special format CUs take in Dwarf::assemble.
|
|
|
|
Instead of simply:
|
|
DW_TAG_compile_unit {
|
|
<attributes>
|
|
} {
|
|
<children>
|
|
}
|
|
|
|
CUs are formatted as:
|
|
cu { <cu_special_vars> } {
|
|
DW_TAG_compile_unit {
|
|
<attributes>
|
|
} {
|
|
<children>
|
|
}
|
|
}
|
|
"""
|
|
|
|
# Default value for parameter is_64 defined in dwarf.exp line 1553.
|
|
# This value is converted to 0/1 automatically when emitting
|
|
# Dwarf::assemble code.
|
|
default_is_64 = False
|
|
|
|
# Default value for parameter dwarf_version defined in dwarf.exp line 1552.
|
|
default_dwarf_version = 4
|
|
|
|
# Default value for parameter is_fission defined in dwarf.exp line 1556.
|
|
# Currently not implemented, see comment below.
|
|
# default_is_fission = False
|
|
|
|
# Tag that signifies a DIE is a compile unit.
|
|
compile_unit_tag = "DW_TAG_compile_unit"
|
|
|
|
def __init__(
|
|
self,
|
|
raw_die: RawDIE,
|
|
raw_cu: RawCompileUnit,
|
|
attrs: dict[str, DWARFAttribute],
|
|
):
|
|
"""Initialize additional instance variables for CU encoding.
|
|
|
|
The additional instance variables are:
|
|
- is_64_bit: bool
|
|
Whether this CU is 64 bit or not.
|
|
- dwarf_version: int
|
|
default DWARFCompileUnit.default_dwarf_version
|
|
Version of DWARF this CU is using.
|
|
- addr_size: Optional[int]
|
|
default None
|
|
Size of an address in bytes.
|
|
|
|
These variables are used to configure the first parameter of the cu
|
|
proc (which contains calls to the compile_unit proc in the body of
|
|
Dwarf::assemble).
|
|
"""
|
|
super().__init__(raw_die.offset, DWARFCompileUnit.compile_unit_tag, attrs)
|
|
self.raw_cu = raw_cu
|
|
self.dwarf_version: int = raw_cu.header.get(
|
|
"version", DWARFCompileUnit.default_dwarf_version
|
|
)
|
|
self.addr_size: Optional[int] = raw_cu.header.get("address_size")
|
|
self.is_64_bit: bool = raw_cu.dwarf_format() == 64
|
|
|
|
# Fission is not currently implemented because I don't know where to
|
|
# fetch this information from.
|
|
# self.is_fission: bool = self.default_is_fission
|
|
|
|
# CU labels are not currently implemented because I haven't found where
|
|
# pyelftools exposes this information.
|
|
# self.cu_label: Optional[str] = None
|
|
|
|
def format_lines(
|
|
self,
|
|
offset_die_lookup: dict[int, DWARFDIE],
|
|
indent_count: int = 0,
|
|
) -> list[str]:
|
|
lines = []
|
|
lines.append(self._get_header(indent_count))
|
|
inner_lines = super().format_lines(offset_die_lookup, indent_count + 1)
|
|
lines += inner_lines
|
|
lines.append(indent(rbrace, indent_count))
|
|
return lines
|
|
|
|
def _get_header(self, indent_count: int = 0) -> str:
|
|
"""Assemble the first line of the surrounding 'cu {} {}' proc call."""
|
|
header = indent("cu " + lbrace, indent_count)
|
|
cu_params = []
|
|
|
|
if self.is_64_bit != DWARFCompileUnit.default_is_64:
|
|
# Convert from True/False to 1/0.
|
|
param_value = int(self.is_64_bit)
|
|
cu_params += ["is_64", str(param_value)]
|
|
|
|
if self.dwarf_version != DWARFCompileUnit.default_dwarf_version:
|
|
cu_params += ["version", str(self.dwarf_version)]
|
|
|
|
if self.addr_size is not None:
|
|
cu_params += ["addr_size", str(self.addr_size)]
|
|
|
|
# Fission is not currently implemented, see comment above.
|
|
# if self.is_fission != DWARFCompileUnit.default_is_fission:
|
|
# # Same as is_64_bit conversion, True/False -> 1/0.
|
|
# param_value = int(self.is_fission)
|
|
# cu_params += ["fission", str(param_value)]
|
|
|
|
# CU labels are not currently implemented, see commend above.
|
|
# if self.cu_label is not None:
|
|
# cu_params += ["label", self.cu_label]
|
|
|
|
if cu_params:
|
|
header += " ".join(cu_params)
|
|
|
|
header += rbrace + " " + lbrace
|
|
return header
|
|
|
|
|
|
class DWARFParser:
|
|
"""Converter from pyelftools's DWARF representation to this script's."""
|
|
|
|
def __init__(self, elf_file: IOBase):
|
|
"""Init parser with file opened in binary mode.
|
|
|
|
File can be closed after this function is called.
|
|
"""
|
|
self.raw_data = BytesIO(elf_file.read())
|
|
self.elf_data = ELFFile(self.raw_data)
|
|
self.dwarf_info = self.elf_data.get_dwarf_info()
|
|
self.offset_to_die: dict[int, DWARFDIE] = {}
|
|
self.label_to_die: dict[str, DWARFDIE] = {}
|
|
self.referenced_offsets: Annotated[
|
|
set[int], "The set of all offsets that were referenced by some DIE."
|
|
] = set()
|
|
self.raw_cu_list: list[RawCompileUnit] = []
|
|
self.top_level_dies: list[DWARFDIE] = []
|
|
self.subprograms: list[DWARFDIE] = []
|
|
self.taken_labels: set[str] = set()
|
|
|
|
self._read_all_cus()
|
|
self._create_necessary_labels()
|
|
|
|
def _read_all_cus(self):
|
|
"""Populate self.raw_cu_list with all CUs in self.dwarf_info."""
|
|
for cu in self.dwarf_info.iter_CUs():
|
|
self._read_cu(cu)
|
|
|
|
def _read_cu(self, raw_cu: RawCompileUnit):
|
|
"""Read a compile_unit into self.cu_list."""
|
|
self.raw_cu_list.append(raw_cu)
|
|
for raw_die in raw_cu.iter_DIEs():
|
|
if not raw_die.is_null():
|
|
self._parse_die(raw_cu, raw_die)
|
|
|
|
def _parse_die(self, die_cu: RawCompileUnit, raw_die: RawDIE) -> DWARFDIE:
|
|
"""Process a single DIE and add it to offset_to_die.
|
|
|
|
Look for DW_FORM_ref4 and DWD_FORM_ref_addr form attributes and replace
|
|
them with the global offset of the referenced DIE, and adding the
|
|
referenced DIE to a set. This will be used later to assign and use
|
|
labels only to DIEs that need it.
|
|
|
|
In case the DIE is a top-level DIE, add it to self.top_level_dies.
|
|
|
|
In case the DIE is a subprogram, add it to self.subprograms and call
|
|
self._use_vars_for_low_and_high_pc_attr with it.
|
|
"""
|
|
processed_attrs = {}
|
|
attr_value: AttributeValue
|
|
for attr_name, attr_value in raw_die.attributes.items():
|
|
actual_value = attr_value.value
|
|
if attr_value.form in ("DW_FORM_ref4", "DW_FORM_ref_addr"):
|
|
referenced_die = raw_die.get_DIE_from_attribute(attr_name)
|
|
actual_value = referenced_die.offset
|
|
self.referenced_offsets.add(referenced_die.offset)
|
|
|
|
processed_attrs[attr_name] = DWARFAttribute(
|
|
raw_die.offset, attr_name, actual_value, attr_value.form
|
|
)
|
|
|
|
if raw_die.tag == DWARFCompileUnit.compile_unit_tag:
|
|
processed_die = DWARFCompileUnit(raw_die, die_cu, processed_attrs)
|
|
else:
|
|
processed_die = DWARFDIE(raw_die.offset, raw_die.tag, processed_attrs, None)
|
|
|
|
if raw_die.get_parent() is None:
|
|
# Top level DIE
|
|
self.top_level_dies.append(processed_die)
|
|
else:
|
|
# Setting the parent here assumes the parent was already processed
|
|
# prior to this DIE being found.
|
|
# As far as I'm aware, this is always true in DWARF.
|
|
processed_parent = self.offset_to_die[raw_die.get_parent().offset]
|
|
processed_parent.children.append(processed_die)
|
|
|
|
if processed_die.tag == "DW_TAG_subprogram":
|
|
self.subprograms.append(processed_die)
|
|
self._use_vars_for_low_and_high_pc_attr(processed_die)
|
|
|
|
self.offset_to_die[processed_die.offset] = processed_die
|
|
return processed_die
|
|
|
|
def _create_necessary_labels(self):
|
|
"""Create labels to DIEs that were referenced by others."""
|
|
for offset in self.referenced_offsets:
|
|
die = self.offset_to_die[offset]
|
|
self._create_label_for_die(die)
|
|
|
|
def _use_vars_for_low_and_high_pc_attr(self, subprogram: DWARFDIE) -> None:
|
|
"""Replace existing PC attributes with Tcl variables.
|
|
|
|
If DW_AT_low_pc exists for this DIE, replace it with accessing the
|
|
variable whose name is given by self.subprogram_start_var(subprogram).
|
|
|
|
If DW_AT_high_pc exists for this DIE, replace it with accessing the
|
|
variable whose name is given by self.subprogram_end_var(subprogram).
|
|
"""
|
|
low_pc_attr_name = "DW_AT_low_pc"
|
|
if low_pc_attr_name in subprogram.attrs:
|
|
start = self.subprogram_start_var(subprogram)
|
|
subprogram.attrs[low_pc_attr_name].value = start
|
|
|
|
high_pc_attr_name = "DW_AT_high_pc"
|
|
if high_pc_attr_name in subprogram.attrs:
|
|
end = self.subprogram_end_var(subprogram)
|
|
subprogram.attrs[high_pc_attr_name].value = end
|
|
|
|
def _create_label_for_die(self, die: DWARFDIE) -> None:
|
|
"""Set tcl_label to a unique string among other DIEs for this parser.
|
|
|
|
As a first attempt, use labelify(die.name()). If the DIE does not have
|
|
a name, use labelify(die.type_name()).
|
|
|
|
If the chosen initial label is already taken, try again appending "_2".
|
|
While the attempt is still taken, try again replacing it with "_3", then
|
|
"_4", and so on.
|
|
|
|
This function also creates an entry on self.label_to_die.
|
|
"""
|
|
if die.tcl_label is not None:
|
|
return
|
|
|
|
label = labelify_str(die.name() or die.type_name())
|
|
|
|
# Deduplicate label in case of collision
|
|
if label in self.taken_labels:
|
|
suffix_nr = 2
|
|
|
|
# Walrus operator to prevent writing the assembled label_suffix
|
|
# string literal twice. This could be rewritten by copying the
|
|
# string literal to the line after the end of the while loop,
|
|
# but I deemed it would be too frail in case one of them needs
|
|
# to be changed and the other is forgotten.
|
|
while (new_label := f"{label}_{suffix_nr}") in self.taken_labels:
|
|
suffix_nr += 1
|
|
label = new_label
|
|
|
|
die.tcl_label = label
|
|
self.label_to_die[label] = die
|
|
self.taken_labels.add(label)
|
|
|
|
def subprogram_start_var(self, subprogram: DWARFDIE) -> str:
|
|
"""Name of the Tcl variable that holds the low PC for a subprogram."""
|
|
return f"${subprogram.name()}_start"
|
|
|
|
def subprogram_end_var(self, subprogram: DWARFDIE) -> str:
|
|
"""Name of the Tcl variable that holds the high PC for a subprogram."""
|
|
return f"${subprogram.name()}_end"
|
|
|
|
def all_labels(self) -> set[str]:
|
|
"""Get a copy of the set of all labels known to the parser so far."""
|
|
return copy(self.taken_labels)
|
|
|
|
|
|
class DWARFAssemblerGenerator:
|
|
"""Class that generates Dwarf::assemble code out of a DWARFParser."""
|
|
|
|
def __init__(self, dwarf_parser: DWARFParser, output=sys.stdout):
|
|
self.dwarf_parser = dwarf_parser
|
|
self.output = output
|
|
|
|
def emit(self, line: str, indent_count: int) -> None:
|
|
"""Print a single line indented indent_count times to self.output.
|
|
|
|
If line is empty, it will always print an empty line, even with nonzero
|
|
indent_count.
|
|
"""
|
|
if line:
|
|
line = get_indent_str(indent_count) + line
|
|
print(line, file=self.output)
|
|
|
|
def generate_die(self, die: DWARFDIE, indent_count: int):
|
|
"""Generate the lines that represent a DIE."""
|
|
die_lines = die.format(self.dwarf_parser.offset_to_die, indent_count)
|
|
self.emit(die_lines, 0)
|
|
|
|
def generate(self):
|
|
indent_count = 0
|
|
|
|
self.emit("Dwarf::assemble $asm_file {", indent_count)
|
|
|
|
# Begin Dwarf::assemble body.
|
|
indent_count += 1
|
|
self.emit("global srcdir subdir srcfile", indent_count)
|
|
|
|
all_labels = self.dwarf_parser.all_labels()
|
|
if all_labels:
|
|
self.emit("declare_labels " + " ".join(all_labels), indent_count)
|
|
|
|
self.emit("", 0)
|
|
for subprogram in self.dwarf_parser.subprograms:
|
|
self.emit(f"get_func_info {subprogram.name()}", indent_count)
|
|
|
|
for die in self.dwarf_parser.top_level_dies:
|
|
self.generate_die(die, indent_count)
|
|
|
|
# TODO: line table, if it's within scope (it probably isn't).
|
|
|
|
# End Dwarf::assemble body.
|
|
indent_count -= 1
|
|
self.emit(rbrace, indent_count)
|
|
|
|
|
|
def main(argv):
|
|
try:
|
|
filename = argv[1]
|
|
except IndexError:
|
|
print("Usage:", file=sys.stderr)
|
|
print("python ./asm_to_dwarf_assembler.py <path/to/elf/file>", file=sys.stderr)
|
|
sys.exit(errno.EOPNOTSUPP)
|
|
|
|
try:
|
|
with open(filename, "rb") as elf_file:
|
|
parser = DWARFParser(elf_file)
|
|
except Exception as e:
|
|
print(
|
|
"Error parsing ELF file. Does it contain DWARF information?",
|
|
file=sys.stderr,
|
|
)
|
|
print(str(e), file=sys.stderr)
|
|
sys.exit(errno.ENODATA)
|
|
generator = DWARFAssemblerGenerator(parser)
|
|
generator.generate()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main(sys.argv)
|