scripts: Adopted ctx.py-related changes in other result scripts

- Adopted higher-level collect data structures:

  - high-level DwarfEntry/DwarfInfo class
  - high-level SymInfo class
  - high-level LineInfo class

  Note these had to be moved out of function scope due to pickling
  issues in perf.py/perfbd.py. These were only function-local to
  minimize scope leak so this fortunately was an easy change.

- Adopted better list-default patterns in Result types:

    def __new__(..., children=None):
        return Result(..., children if children is not None else [])

  A classic python footgun.

- Adopted notes rendering, though this is only used by ctx.py at the
  moment.

- Reverted to sorting children entries, for now.

  Unfortunately there's no easy way to sort the result entries in
  perf.py/perfbd.py before folding. Folding is going to make a mess
  of more complicated children anyways, so another solution is
  needed...

And some other shared miscellany.
This commit is contained in:
Christopher Haster
2024-12-01 16:59:51 -06:00
parent b4c79c53d2
commit 512cf5ad4b
9 changed files with 805 additions and 510 deletions

View File

@@ -17,8 +17,8 @@ __import__('sys').path.pop(0)
import collections as co
import csv
import difflib
import itertools as it
import functools as ft
import math as mt
import os
import re
@@ -168,8 +168,8 @@ def collect_dwarf_files(obj_path, *,
'.*\s+(?P<path>[^\s]+)\s*$')
# find source paths
dirs = {}
files = {}
dirs = co.OrderedDict()
files = co.OrderedDict()
# note objdump-path may contain extra args
cmd = objdump_path + ['--dwarf=rawline', obj_path]
if args.get('verbose'):
@@ -202,7 +202,7 @@ def collect_dwarf_files(obj_path, *,
raise sp.CalledProcessError(proc.returncode, proc.args)
# simplify paths
files_ = {}
files_ = co.OrderedDict()
for no, file in files.items():
if os.path.commonpath([
os.getcwd(),
@@ -214,34 +214,102 @@ def collect_dwarf_files(obj_path, *,
return files
# each dwarf entry can have attrs and children entries
class DwarfEntry:
def __init__(self, level, off, tag, ats={}, children=[]):
self.level = level
self.off = off
self.tag = tag
self.ats = ats or {}
self.children = children or []
def get(self, k, d=None):
return self.ats.get(k, d)
def __getitem__(self, k):
return self.ats[k]
def __contains__(self, k):
return k in self.ats
def __repr__(self):
return '%s(%d, 0x%x, %r, %r)' % (
self.__class__.__name__,
self.level,
self.off,
self.tag,
self.ats)
@ft.cached_property
def name(self):
if 'DW_AT_name' in self:
name = self['DW_AT_name'].split(':')[-1].strip()
# prefix with struct/union/enum
if self.tag == 'DW_TAG_structure_type':
name = 'struct ' + name
elif self.tag == 'DW_TAG_union_type':
name = 'union ' + name
elif self.tag == 'DW_TAG_enumeration_type':
name = 'enum ' + name
return name
else:
return None
# a collection of dwarf entries
class DwarfInfo:
def __init__(self, entries):
self.entries = entries
def get(self, k, d=None):
# allow lookup by both offset and dwarf name
if not isinstance(k, str):
return self.entries.get(k, d)
else:
import difflib
# organize entries by name
if not hasattr(self, '_by_name'):
self._by_name = {}
for entry in self.entries.values():
if entry.name is not None:
self._by_name[entry.name] = entry
# exact match? avoid difflib if we can for speed
if k in self._by_name:
return self._by_name[k]
# find the best matching dwarf entry with difflib
#
# this can be different from the actual symbol because
# of optimization passes
else:
name, entry = max(
self._by_name.items(),
key=lambda entry: difflib.SequenceMatcher(
None, entry[0], k, False).ratio(),
default=(None, None))
return entry
def __getitem__(self, k):
v = self.get(k)
if v is None:
raise KeyError(k)
return v
def __contains__(self, k):
return self.get(k) is not None
def __len__(self):
return len(self.entries)
def __iter__(self):
return (v for k, v in self.entries.items())
def collect_dwarf_info(obj_path, filter=None, *,
objdump_path=OBJDUMP_PATH,
**args):
filter_, filter = filter, __builtins__.filter
# each dwarf entry can have attrs and children entries
class DwarfEntry:
def __init__(self, level, off, tag, ats={}, children=[]):
self.level = level
self.off = off
self.tag = tag
self.ats = ats or {}
self.children = children or []
def __getitem__(self, k):
return self.ats[k]
def __contains__(self, k):
return k in self.ats
def __repr__(self):
return '%s(%d, 0x%x, %r, %r)' % (
self.__class__.__name__,
self.level,
self.off,
self.tag,
self.ats)
info_pattern = re.compile(
'^\s*(?:<(?P<level>[^>]*)>'
'\s*<(?P<off>[^>]*)>'
@@ -251,7 +319,7 @@ def collect_dwarf_info(obj_path, filter=None, *,
'\s*:(?P<v>.*))\s*$')
# collect dwarf entries
entries = co.OrderedDict()
info = co.OrderedDict()
entry = None
levels = {}
# note objdump-path may contain extra args
@@ -277,7 +345,7 @@ def collect_dwarf_info(obj_path, filter=None, *,
if (entry.level == 1 and (
# unless this entry is filtered
filter_ is None or entry.tag in filter_)):
entries[entry.off] = entry
info[entry.off] = entry
# store entry in parent
levels[entry.level] = entry
if entry.level-1 in levels:
@@ -290,7 +358,7 @@ def collect_dwarf_info(obj_path, filter=None, *,
if proc.returncode != 0:
raise sp.CalledProcessError(proc.returncode, proc.args)
return entries
return DwarfInfo(info)
def collect_sizes(obj_path, *,
nm_path=NM_PATH,
@@ -331,33 +399,21 @@ def collect(obj_paths, *,
**args):
results = []
for obj_path in obj_paths:
# guess the source, if we have debug-info we'll replace this later
file = re.sub('(\.o)?$', '.c', obj_path, 1)
# find sizes
sizes = collect_sizes(obj_path, everything=everything, **args)
# try to figure out the source file if we have debug-info
defs = {}
try:
files = collect_dwarf_files(obj_path, **args)
info = collect_dwarf_info(obj_path,
filter={'DW_TAG_subprogram', 'DW_TAG_variable'},
**args)
for no, entry in info.items():
# skip funcs with no name or no file
if ('DW_AT_name' not in entry
or 'DW_AT_decl_file' not in entry):
continue
name_ = entry['DW_AT_name'].split(':')[-1].strip()
file_ = files.get(int(entry['DW_AT_decl_file']), '?')
defs[name_] = file_
except sp.CalledProcessError:
# do nothing on error, we don't need objdump to work, source
# files may just be inaccurate
pass
files = {}
info = {}
# map function sizes to debug symbols
for func, size in sizes.items():
@@ -365,34 +421,28 @@ def collect(obj_paths, *,
if not everything and func.startswith('__'):
continue
# find best matching debug symbol, this may be slightly different
# find best matching dwarf entry, this may be slightly different
# due to optimizations
if defs:
# exact match? avoid difflib if we can for speed
if func in defs:
file_ = defs[func]
else:
_, file_ = max(
defs.items(),
key=lambda d: difflib.SequenceMatcher(None,
d[0],
func, False).ratio())
entry = info.get(func)
if entry is not None and 'DW_AT_decl_file' in entry:
file = files.get(int(entry['DW_AT_decl_file']), '?')
else:
file_ = file
file = re.sub('(\.o)?$', '.c', obj_path, 1)
# ignore filtered sources
if sources is not None:
if not any(os.path.abspath(file_) == os.path.abspath(s)
if not any(os.path.abspath(file) == os.path.abspath(s)
for s in sources):
continue
else:
# default to only cwd
if not everything and not os.path.commonpath([
os.getcwd(),
os.path.abspath(file_)]) == os.getcwd():
os.path.abspath(file)]) == os.getcwd():
continue
results.append(CodeResult(file_, func, size))
results.append(CodeResult(file, func, size))
return results
@@ -624,6 +674,9 @@ def table(Result, results, diff_results=None, *,
types[k].ratio(
getattr(r, k, None),
getattr(diff_r, k, None)))))
# append any notes
if hasattr(r, 'notes'):
entry[-1][1].extend(r.notes)
return entry
# recursive entry helper, only used by some scripts