scripts: Adopted ctx.py-related changes in other result scripts

- Adopted higher-level collect data structures:

  - high-level DwarfEntry/DwarfInfo class
  - high-level SymInfo class
  - high-level LineInfo class

  Note these had to be moved out of function scope due to pickling
  issues in perf.py/perfbd.py. These were only function-local to
  minimize scope leak so this fortunately was an easy change.

- Adopted better list-default patterns in Result types:

    def __new__(..., children=None):
        return Result(..., children if children is not None else [])

  A classic python footgun.

- Adopted notes rendering, though this is only used by ctx.py at the
  moment.

- Reverted to sorting children entries, for now.

  Unfortunately there's no easy way to sort the result entries in
  perf.py/perfbd.py before folding. Folding is going to make a mess
  of more complicated children anyways, so another solution is
  needed...

And some other shared miscellany.
This commit is contained in:
Christopher Haster
2024-12-01 16:59:51 -06:00
parent b4c79c53d2
commit 512cf5ad4b
9 changed files with 805 additions and 510 deletions

View File

@@ -17,8 +17,8 @@ __import__('sys').path.pop(0)
import collections as co import collections as co
import csv import csv
import difflib
import itertools as it import itertools as it
import functools as ft
import math as mt import math as mt
import os import os
import re import re
@@ -168,8 +168,8 @@ def collect_dwarf_files(obj_path, *,
'.*\s+(?P<path>[^\s]+)\s*$') '.*\s+(?P<path>[^\s]+)\s*$')
# find source paths # find source paths
dirs = {} dirs = co.OrderedDict()
files = {} files = co.OrderedDict()
# note objdump-path may contain extra args # note objdump-path may contain extra args
cmd = objdump_path + ['--dwarf=rawline', obj_path] cmd = objdump_path + ['--dwarf=rawline', obj_path]
if args.get('verbose'): if args.get('verbose'):
@@ -202,7 +202,7 @@ def collect_dwarf_files(obj_path, *,
raise sp.CalledProcessError(proc.returncode, proc.args) raise sp.CalledProcessError(proc.returncode, proc.args)
# simplify paths # simplify paths
files_ = {} files_ = co.OrderedDict()
for no, file in files.items(): for no, file in files.items():
if os.path.commonpath([ if os.path.commonpath([
os.getcwd(), os.getcwd(),
@@ -214,34 +214,102 @@ def collect_dwarf_files(obj_path, *,
return files return files
# each dwarf entry can have attrs and children entries
class DwarfEntry:
def __init__(self, level, off, tag, ats={}, children=[]):
self.level = level
self.off = off
self.tag = tag
self.ats = ats or {}
self.children = children or []
def get(self, k, d=None):
return self.ats.get(k, d)
def __getitem__(self, k):
return self.ats[k]
def __contains__(self, k):
return k in self.ats
def __repr__(self):
return '%s(%d, 0x%x, %r, %r)' % (
self.__class__.__name__,
self.level,
self.off,
self.tag,
self.ats)
@ft.cached_property
def name(self):
if 'DW_AT_name' in self:
name = self['DW_AT_name'].split(':')[-1].strip()
# prefix with struct/union/enum
if self.tag == 'DW_TAG_structure_type':
name = 'struct ' + name
elif self.tag == 'DW_TAG_union_type':
name = 'union ' + name
elif self.tag == 'DW_TAG_enumeration_type':
name = 'enum ' + name
return name
else:
return None
# a collection of dwarf entries
class DwarfInfo:
def __init__(self, entries):
self.entries = entries
def get(self, k, d=None):
# allow lookup by both offset and dwarf name
if not isinstance(k, str):
return self.entries.get(k, d)
else:
import difflib
# organize entries by name
if not hasattr(self, '_by_name'):
self._by_name = {}
for entry in self.entries.values():
if entry.name is not None:
self._by_name[entry.name] = entry
# exact match? avoid difflib if we can for speed
if k in self._by_name:
return self._by_name[k]
# find the best matching dwarf entry with difflib
#
# this can be different from the actual symbol because
# of optimization passes
else:
name, entry = max(
self._by_name.items(),
key=lambda entry: difflib.SequenceMatcher(
None, entry[0], k, False).ratio(),
default=(None, None))
return entry
def __getitem__(self, k):
v = self.get(k)
if v is None:
raise KeyError(k)
return v
def __contains__(self, k):
return self.get(k) is not None
def __len__(self):
return len(self.entries)
def __iter__(self):
return (v for k, v in self.entries.items())
def collect_dwarf_info(obj_path, filter=None, *, def collect_dwarf_info(obj_path, filter=None, *,
objdump_path=OBJDUMP_PATH, objdump_path=OBJDUMP_PATH,
**args): **args):
filter_, filter = filter, __builtins__.filter filter_, filter = filter, __builtins__.filter
# each dwarf entry can have attrs and children entries
class DwarfEntry:
def __init__(self, level, off, tag, ats={}, children=[]):
self.level = level
self.off = off
self.tag = tag
self.ats = ats or {}
self.children = children or []
def __getitem__(self, k):
return self.ats[k]
def __contains__(self, k):
return k in self.ats
def __repr__(self):
return '%s(%d, 0x%x, %r, %r)' % (
self.__class__.__name__,
self.level,
self.off,
self.tag,
self.ats)
info_pattern = re.compile( info_pattern = re.compile(
'^\s*(?:<(?P<level>[^>]*)>' '^\s*(?:<(?P<level>[^>]*)>'
'\s*<(?P<off>[^>]*)>' '\s*<(?P<off>[^>]*)>'
@@ -251,7 +319,7 @@ def collect_dwarf_info(obj_path, filter=None, *,
'\s*:(?P<v>.*))\s*$') '\s*:(?P<v>.*))\s*$')
# collect dwarf entries # collect dwarf entries
entries = co.OrderedDict() info = co.OrderedDict()
entry = None entry = None
levels = {} levels = {}
# note objdump-path may contain extra args # note objdump-path may contain extra args
@@ -277,7 +345,7 @@ def collect_dwarf_info(obj_path, filter=None, *,
if (entry.level == 1 and ( if (entry.level == 1 and (
# unless this entry is filtered # unless this entry is filtered
filter_ is None or entry.tag in filter_)): filter_ is None or entry.tag in filter_)):
entries[entry.off] = entry info[entry.off] = entry
# store entry in parent # store entry in parent
levels[entry.level] = entry levels[entry.level] = entry
if entry.level-1 in levels: if entry.level-1 in levels:
@@ -290,7 +358,7 @@ def collect_dwarf_info(obj_path, filter=None, *,
if proc.returncode != 0: if proc.returncode != 0:
raise sp.CalledProcessError(proc.returncode, proc.args) raise sp.CalledProcessError(proc.returncode, proc.args)
return entries return DwarfInfo(info)
def collect_sizes(obj_path, *, def collect_sizes(obj_path, *,
nm_path=NM_PATH, nm_path=NM_PATH,
@@ -331,33 +399,21 @@ def collect(obj_paths, *,
**args): **args):
results = [] results = []
for obj_path in obj_paths: for obj_path in obj_paths:
# guess the source, if we have debug-info we'll replace this later
file = re.sub('(\.o)?$', '.c', obj_path, 1)
# find sizes # find sizes
sizes = collect_sizes(obj_path, everything=everything, **args) sizes = collect_sizes(obj_path, everything=everything, **args)
# try to figure out the source file if we have debug-info # try to figure out the source file if we have debug-info
defs = {}
try: try:
files = collect_dwarf_files(obj_path, **args) files = collect_dwarf_files(obj_path, **args)
info = collect_dwarf_info(obj_path, info = collect_dwarf_info(obj_path,
filter={'DW_TAG_subprogram', 'DW_TAG_variable'}, filter={'DW_TAG_subprogram', 'DW_TAG_variable'},
**args) **args)
for no, entry in info.items():
# skip funcs with no name or no file
if ('DW_AT_name' not in entry
or 'DW_AT_decl_file' not in entry):
continue
name_ = entry['DW_AT_name'].split(':')[-1].strip()
file_ = files.get(int(entry['DW_AT_decl_file']), '?')
defs[name_] = file_
except sp.CalledProcessError: except sp.CalledProcessError:
# do nothing on error, we don't need objdump to work, source # do nothing on error, we don't need objdump to work, source
# files may just be inaccurate # files may just be inaccurate
pass files = {}
info = {}
# map function sizes to debug symbols # map function sizes to debug symbols
for func, size in sizes.items(): for func, size in sizes.items():
@@ -365,34 +421,28 @@ def collect(obj_paths, *,
if not everything and func.startswith('__'): if not everything and func.startswith('__'):
continue continue
# find best matching debug symbol, this may be slightly different # find best matching dwarf entry, this may be slightly different
# due to optimizations # due to optimizations
if defs: entry = info.get(func)
# exact match? avoid difflib if we can for speed
if func in defs: if entry is not None and 'DW_AT_decl_file' in entry:
file_ = defs[func] file = files.get(int(entry['DW_AT_decl_file']), '?')
else:
_, file_ = max(
defs.items(),
key=lambda d: difflib.SequenceMatcher(None,
d[0],
func, False).ratio())
else: else:
file_ = file file = re.sub('(\.o)?$', '.c', obj_path, 1)
# ignore filtered sources # ignore filtered sources
if sources is not None: if sources is not None:
if not any(os.path.abspath(file_) == os.path.abspath(s) if not any(os.path.abspath(file) == os.path.abspath(s)
for s in sources): for s in sources):
continue continue
else: else:
# default to only cwd # default to only cwd
if not everything and not os.path.commonpath([ if not everything and not os.path.commonpath([
os.getcwd(), os.getcwd(),
os.path.abspath(file_)]) == os.getcwd(): os.path.abspath(file)]) == os.getcwd():
continue continue
results.append(CodeResult(file_, func, size)) results.append(CodeResult(file, func, size))
return results return results
@@ -624,6 +674,9 @@ def table(Result, results, diff_results=None, *,
types[k].ratio( types[k].ratio(
getattr(r, k, None), getattr(r, k, None),
getattr(diff_r, k, None))))) getattr(diff_r, k, None)))))
# append any notes
if hasattr(r, 'notes'):
entry[-1][1].extend(r.notes)
return entry return entry
# recursive entry helper, only used by some scripts # recursive entry helper, only used by some scripts

View File

@@ -578,6 +578,9 @@ def table(Result, results, diff_results=None, *,
types[k].ratio( types[k].ratio(
getattr(r, k, None), getattr(r, k, None),
getattr(diff_r, k, None))))) getattr(diff_r, k, None)))))
# append any notes
if hasattr(r, 'notes'):
entry[-1][1].extend(r.notes)
return entry return entry
# recursive entry helper, only used by some scripts # recursive entry helper, only used by some scripts

View File

@@ -1590,6 +1590,9 @@ def table(Result, results, diff_results=None, *,
types[k].ratio( types[k].ratio(
getattr(r, k, None), getattr(r, k, None),
getattr(diff_r, k, None))))) getattr(diff_r, k, None)))))
# append any notes
if hasattr(r, 'notes'):
entry[-1][1].extend(r.notes)
return entry return entry
# recursive entry helper, only used by some scripts # recursive entry helper, only used by some scripts

View File

@@ -160,61 +160,67 @@ def openio(path, mode='r', buffering=-1):
else: else:
return open(path, mode, buffering) return open(path, mode, buffering)
def collect_syms(obj_path, global_only=False, *, class SymInfo:
objdump_path=None, def __init__(self, syms):
**args): self.syms = syms
class SymInfo:
def __init__(self, syms):
self.syms = syms
def get(self, k, d=None): def get(self, k, d=None):
# allow lookup by both symbol and address # allow lookup by both symbol and address
if isinstance(k, str): if isinstance(k, str):
# organize by symbol, note multiple symbols can share a name # organize by symbol, note multiple symbols can share a name
if not hasattr(self, '_by_sym'): if not hasattr(self, '_by_sym'):
self._by_sym = {} by_sym = {}
for sym, addr, size in self.syms: for sym, addr, size in self.syms:
self._by_sym[sym] = (addr, size) if sym not in by_sym:
return self._by_sym.get(k, d) by_sym[sym] = []
if (addr, size) not in by_sym[sym]:
by_sym[sym].append((addr, size))
self._by_sym = by_sym
return self._by_sym.get(k, d)
else:
import bisect
# organize by address
if not hasattr(self, '_by_addr'):
# sort and keep largest/first when duplicates
syms = self.syms.copy()
syms.sort(key=lambda x: (x[1], -x[2], x[0]))
by_addr = []
for name, addr, size in syms:
if (len(by_addr) == 0
or by_addr[-1][0] != addr):
by_addr.append((name, addr, size))
self._by_addr = by_addr
# find sym by range
i = bisect.bisect(self._by_addr, k,
key=lambda x: x[1])
# check that we're actually in this sym's size
if i > 0 and k < self._by_addr[i-1][1]+self._by_addr[i-1][2]:
return self._by_addr[i-1][0]
else: else:
import bisect return d
# organize by address def __getitem__(self, k):
if not hasattr(self, '_by_addr'): v = self.get(k)
# sort and keep largest/first when duplicates if v is None:
syms = self.syms.copy() raise KeyError(k)
syms.sort(key=lambda x: (x[1], -x[2], x[0])) return v
self._by_addr = [] def __contains__(self, k):
for name, addr, size in syms: return self.get(k) is not None
if (len(self._by_addr) == 0
or self._by_addr[-1][0] != addr):
self._by_addr.append((name, addr, size))
# find sym by range def __len__(self):
i = bisect.bisect(self._by_addr, k, key=lambda x: x[1]) return len(self.syms)
# check that we're actually in this sym's size
if i > 0 and k < self._by_addr[i-1][1]+self._by_addr[i-1][2]:
return self._by_addr[i-1]
else:
return None
def __getitem__(self, k): def __iter__(self):
v = self.get(k) return iter(self.syms)
if v is None:
raise KeyError(k)
return v
def __contains__(self, k):
return self.get(k) is not None
def __len__(self):
return len(self.syms)
def __iter__(self):
return iter(self.syms)
def collect_syms(obj_path, global_only=False, *,
objdump_path=OBJDUMP_PATH,
**args):
symbol_pattern = re.compile( symbol_pattern = re.compile(
'^(?P<addr>[0-9a-fA-F]+)' '^(?P<addr>[0-9a-fA-F]+)'
' (?P<scope>.).*' ' (?P<scope>.).*'
@@ -260,28 +266,6 @@ def collect_syms(obj_path, global_only=False, *,
def collect_dwarf_files(obj_path, *, def collect_dwarf_files(obj_path, *,
objdump_path=OBJDUMP_PATH, objdump_path=OBJDUMP_PATH,
**args): **args):
class FileInfo:
def __init__(self, files):
self.files = files
def get(self, k, d=None):
return self.files.get(k, d)
def __getitem__(self, k):
v = self.get(k)
if v is None:
raise KeyError(k)
return v
def __contains__(self, k):
return self.get(k) is not None
def __len__(self):
return len(self.files)
def __iter__(self):
return (v for k, v in self.files.items())
line_pattern = re.compile( line_pattern = re.compile(
'^\s*(?P<no>[0-9]+)' '^\s*(?P<no>[0-9]+)'
'(?:\s+(?P<dir>[0-9]+))?' '(?:\s+(?P<dir>[0-9]+))?'
@@ -322,7 +306,7 @@ def collect_dwarf_files(obj_path, *,
raise sp.CalledProcessError(proc.returncode, proc.args) raise sp.CalledProcessError(proc.returncode, proc.args)
# simplify paths # simplify paths
files_ = {} files_ = co.OrderedDict()
for no, file in files.items(): for no, file in files.items():
if os.path.commonpath([ if os.path.commonpath([
os.getcwd(), os.getcwd(),
@@ -332,104 +316,104 @@ def collect_dwarf_files(obj_path, *,
files_[no] = os.path.abspath(file) files_[no] = os.path.abspath(file)
files = files_ files = files_
return FileInfo(files) return files
# each dwarf entry can have attrs and children entries
class DwarfEntry:
def __init__(self, level, off, tag, ats={}, children=[]):
self.level = level
self.off = off
self.tag = tag
self.ats = ats or {}
self.children = children or []
def get(self, k, d=None):
return self.ats.get(k, d)
def __getitem__(self, k):
return self.ats[k]
def __contains__(self, k):
return k in self.ats
def __repr__(self):
return '%s(%d, 0x%x, %r, %r)' % (
self.__class__.__name__,
self.level,
self.off,
self.tag,
self.ats)
@ft.cached_property
def name(self):
if 'DW_AT_name' in self:
name = self['DW_AT_name'].split(':')[-1].strip()
# prefix with struct/union/enum
if self.tag == 'DW_TAG_structure_type':
name = 'struct ' + name
elif self.tag == 'DW_TAG_union_type':
name = 'union ' + name
elif self.tag == 'DW_TAG_enumeration_type':
name = 'enum ' + name
return name
else:
return None
# a collection of dwarf entries
class DwarfInfo:
def __init__(self, entries):
self.entries = entries
def get(self, k, d=None):
# allow lookup by both offset and dwarf name
if not isinstance(k, str):
return self.entries.get(k, d)
else:
import difflib
# organize entries by name
if not hasattr(self, '_by_name'):
self._by_name = {}
for entry in self.entries.values():
if entry.name is not None:
self._by_name[entry.name] = entry
# exact match? avoid difflib if we can for speed
if k in self._by_name:
return self._by_name[k]
# find the best matching dwarf entry with difflib
#
# this can be different from the actual symbol because
# of optimization passes
else:
name, entry = max(
self._by_name.items(),
key=lambda entry: difflib.SequenceMatcher(
None, entry[0], k, False).ratio(),
default=(None, None))
return entry
def __getitem__(self, k):
v = self.get(k)
if v is None:
raise KeyError(k)
return v
def __contains__(self, k):
return self.get(k) is not None
def __len__(self):
return len(self.entries)
def __iter__(self):
return (v for k, v in self.entries.items())
def collect_dwarf_info(obj_path, filter=None, *, def collect_dwarf_info(obj_path, filter=None, *,
objdump_path=OBJDUMP_PATH, objdump_path=OBJDUMP_PATH,
**args): **args):
filter_, filter = filter, __builtins__.filter filter_, filter = filter, __builtins__.filter
# each dwarf entry can have attrs and children entries
class DwarfEntry:
def __init__(self, level, off, tag, ats={}, children=[]):
self.level = level
self.off = off
self.tag = tag
self.ats = ats or {}
self.children = children or []
def get(self, k, d=None):
return self.ats.get(k, d)
def __getitem__(self, k):
return self.ats[k]
def __contains__(self, k):
return k in self.ats
def __repr__(self):
return '%s(%d, 0x%x, %r, %r)' % (
self.__class__.__name__,
self.level,
self.off,
self.tag,
self.ats)
@ft.cached_property
def name(self):
if 'DW_AT_name' in self:
name = self['DW_AT_name'].split(':')[-1].strip()
# prefix with struct/union/enum
if self.tag == 'DW_TAG_structure_type':
name = 'struct ' + name
elif self.tag == 'DW_TAG_union_type':
name = 'union ' + name
elif self.tag == 'DW_TAG_enumeration_type':
name = 'enum ' + name
return name
else:
return None
# a collection of dwarf entries
class DwarfInfo:
def __init__(self, entries):
self.entries = entries
def get(self, k, d=None):
# allow lookup by both offset and dwarf name
if not isinstance(k, str):
return self.entries.get(k, d)
else:
import difflib
# organize entries by name
if not hasattr(self, '_by_name'):
self._by_name = {}
for entry in self.entries.values():
if entry.name is not None:
self._by_name[entry.name] = entry
# exact match? avoid difflib if we can for speed
if k in self._by_name:
return self._by_name[k]
# find the best matching dwarf entry with difflib
#
# this can be different from the actual symbol because
# of optimization passes
else:
name, entry = max(
self._by_name.items(),
key=lambda entry: difflib.SequenceMatcher(
None, entry[0], k, False).ratio(),
default=(None, None))
return entry
def __getitem__(self, k):
v = self.get(k)
if v is None:
raise KeyError(k)
return v
def __contains__(self, k):
return self.get(k) is not None
def __len__(self):
return len(self.entries)
def __iter__(self):
return (v for k, v in self.entries.items())
info_pattern = re.compile( info_pattern = re.compile(
'^\s*(?:<(?P<level>[^>]*)>' '^\s*(?:<(?P<level>[^>]*)>'
'\s*<(?P<off>[^>]*)>' '\s*<(?P<off>[^>]*)>'
@@ -925,7 +909,8 @@ def table(Result, results, diff_results=None, *,
for r in results_} for r in results_}
names_ = list(table_.keys()) names_ = list(table_.keys())
# only sort the children layer if explicitly requested # sort the children layer
names_.sort()
if sort: if sort:
for k, reverse in reversed(sort): for k, reverse in reversed(sort):
names_.sort( names_.sort(

View File

@@ -17,8 +17,8 @@ __import__('sys').path.pop(0)
import collections as co import collections as co
import csv import csv
import difflib
import itertools as it import itertools as it
import functools as ft
import math as mt import math as mt
import os import os
import re import re
@@ -168,8 +168,8 @@ def collect_dwarf_files(obj_path, *,
'.*\s+(?P<path>[^\s]+)\s*$') '.*\s+(?P<path>[^\s]+)\s*$')
# find source paths # find source paths
dirs = {} dirs = co.OrderedDict()
files = {} files = co.OrderedDict()
# note objdump-path may contain extra args # note objdump-path may contain extra args
cmd = objdump_path + ['--dwarf=rawline', obj_path] cmd = objdump_path + ['--dwarf=rawline', obj_path]
if args.get('verbose'): if args.get('verbose'):
@@ -202,7 +202,7 @@ def collect_dwarf_files(obj_path, *,
raise sp.CalledProcessError(proc.returncode, proc.args) raise sp.CalledProcessError(proc.returncode, proc.args)
# simplify paths # simplify paths
files_ = {} files_ = co.OrderedDict()
for no, file in files.items(): for no, file in files.items():
if os.path.commonpath([ if os.path.commonpath([
os.getcwd(), os.getcwd(),
@@ -214,34 +214,102 @@ def collect_dwarf_files(obj_path, *,
return files return files
# each dwarf entry can have attrs and children entries
class DwarfEntry:
def __init__(self, level, off, tag, ats={}, children=[]):
self.level = level
self.off = off
self.tag = tag
self.ats = ats or {}
self.children = children or []
def get(self, k, d=None):
return self.ats.get(k, d)
def __getitem__(self, k):
return self.ats[k]
def __contains__(self, k):
return k in self.ats
def __repr__(self):
return '%s(%d, 0x%x, %r, %r)' % (
self.__class__.__name__,
self.level,
self.off,
self.tag,
self.ats)
@ft.cached_property
def name(self):
if 'DW_AT_name' in self:
name = self['DW_AT_name'].split(':')[-1].strip()
# prefix with struct/union/enum
if self.tag == 'DW_TAG_structure_type':
name = 'struct ' + name
elif self.tag == 'DW_TAG_union_type':
name = 'union ' + name
elif self.tag == 'DW_TAG_enumeration_type':
name = 'enum ' + name
return name
else:
return None
# a collection of dwarf entries
class DwarfInfo:
def __init__(self, entries):
self.entries = entries
def get(self, k, d=None):
# allow lookup by both offset and dwarf name
if not isinstance(k, str):
return self.entries.get(k, d)
else:
import difflib
# organize entries by name
if not hasattr(self, '_by_name'):
self._by_name = {}
for entry in self.entries.values():
if entry.name is not None:
self._by_name[entry.name] = entry
# exact match? avoid difflib if we can for speed
if k in self._by_name:
return self._by_name[k]
# find the best matching dwarf entry with difflib
#
# this can be different from the actual symbol because
# of optimization passes
else:
name, entry = max(
self._by_name.items(),
key=lambda entry: difflib.SequenceMatcher(
None, entry[0], k, False).ratio(),
default=(None, None))
return entry
def __getitem__(self, k):
v = self.get(k)
if v is None:
raise KeyError(k)
return v
def __contains__(self, k):
return self.get(k) is not None
def __len__(self):
return len(self.entries)
def __iter__(self):
return (v for k, v in self.entries.items())
def collect_dwarf_info(obj_path, filter=None, *, def collect_dwarf_info(obj_path, filter=None, *,
objdump_path=OBJDUMP_PATH, objdump_path=OBJDUMP_PATH,
**args): **args):
filter_, filter = filter, __builtins__.filter filter_, filter = filter, __builtins__.filter
# each dwarf entry can have attrs and children entries
class DwarfEntry:
def __init__(self, level, off, tag, ats={}, children=[]):
self.level = level
self.off = off
self.tag = tag
self.ats = ats or {}
self.children = children or []
def __getitem__(self, k):
return self.ats[k]
def __contains__(self, k):
return k in self.ats
def __repr__(self):
return '%s(%d, 0x%x, %r, %r)' % (
self.__class__.__name__,
self.level,
self.off,
self.tag,
self.ats)
info_pattern = re.compile( info_pattern = re.compile(
'^\s*(?:<(?P<level>[^>]*)>' '^\s*(?:<(?P<level>[^>]*)>'
'\s*<(?P<off>[^>]*)>' '\s*<(?P<off>[^>]*)>'
@@ -251,7 +319,7 @@ def collect_dwarf_info(obj_path, filter=None, *,
'\s*:(?P<v>.*))\s*$') '\s*:(?P<v>.*))\s*$')
# collect dwarf entries # collect dwarf entries
entries = co.OrderedDict() info = co.OrderedDict()
entry = None entry = None
levels = {} levels = {}
# note objdump-path may contain extra args # note objdump-path may contain extra args
@@ -277,7 +345,7 @@ def collect_dwarf_info(obj_path, filter=None, *,
if (entry.level == 1 and ( if (entry.level == 1 and (
# unless this entry is filtered # unless this entry is filtered
filter_ is None or entry.tag in filter_)): filter_ is None or entry.tag in filter_)):
entries[entry.off] = entry info[entry.off] = entry
# store entry in parent # store entry in parent
levels[entry.level] = entry levels[entry.level] = entry
if entry.level-1 in levels: if entry.level-1 in levels:
@@ -290,7 +358,7 @@ def collect_dwarf_info(obj_path, filter=None, *,
if proc.returncode != 0: if proc.returncode != 0:
raise sp.CalledProcessError(proc.returncode, proc.args) raise sp.CalledProcessError(proc.returncode, proc.args)
return entries return DwarfInfo(info)
def collect_sizes(obj_path, *, def collect_sizes(obj_path, *,
nm_path=NM_PATH, nm_path=NM_PATH,
@@ -331,33 +399,21 @@ def collect(obj_paths, *,
**args): **args):
results = [] results = []
for obj_path in obj_paths: for obj_path in obj_paths:
# guess the source, if we have debug-info we'll replace this later
file = re.sub('(\.o)?$', '.c', obj_path, 1)
# find sizes # find sizes
sizes = collect_sizes(obj_path, everything=everything, **args) sizes = collect_sizes(obj_path, everything=everything, **args)
# try to figure out the source file if we have debug-info # try to figure out the source file if we have debug-info
defs = {}
try: try:
files = collect_dwarf_files(obj_path, **args) files = collect_dwarf_files(obj_path, **args)
info = collect_dwarf_info(obj_path, info = collect_dwarf_info(obj_path,
filter={'DW_TAG_subprogram', 'DW_TAG_variable'}, filter={'DW_TAG_subprogram', 'DW_TAG_variable'},
**args) **args)
for no, entry in info.items():
# skip funcs with no name or no file
if ('DW_AT_name' not in entry
or 'DW_AT_decl_file' not in entry):
continue
name_ = entry['DW_AT_name'].split(':')[-1].strip()
file_ = files.get(int(entry['DW_AT_decl_file']), '?')
defs[name_] = file_
except sp.CalledProcessError: except sp.CalledProcessError:
# do nothing on error, we don't need objdump to work, source # do nothing on error, we don't need objdump to work, source
# files may just be inaccurate # files may just be inaccurate
pass files = {}
info = {}
# map function sizes to debug symbols # map function sizes to debug symbols
for func, size in sizes.items(): for func, size in sizes.items():
@@ -365,34 +421,28 @@ def collect(obj_paths, *,
if not everything and func.startswith('__'): if not everything and func.startswith('__'):
continue continue
# find best matching debug symbol, this may be slightly different # find best matching dwarf entry, this may be slightly different
# due to optimizations # due to optimizations
if defs: entry = info.get(func)
# exact match? avoid difflib if we can for speed
if func in defs: if entry is not None and 'DW_AT_decl_file' in entry:
file_ = defs[func] file = files.get(int(entry['DW_AT_decl_file']), '?')
else:
_, file_ = max(
defs.items(),
key=lambda d: difflib.SequenceMatcher(None,
d[0],
func, False).ratio())
else: else:
file_ = file file = re.sub('(\.o)?$', '.c', obj_path, 1)
# ignore filtered sources # ignore filtered sources
if sources is not None: if sources is not None:
if not any(os.path.abspath(file_) == os.path.abspath(s) if not any(os.path.abspath(file) == os.path.abspath(s)
for s in sources): for s in sources):
continue continue
else: else:
# default to only cwd # default to only cwd
if not everything and not os.path.commonpath([ if not everything and not os.path.commonpath([
os.getcwd(), os.getcwd(),
os.path.abspath(file_)]) == os.getcwd(): os.path.abspath(file)]) == os.getcwd():
continue continue
results.append(DataResult(file_, func, size)) results.append(CodeResult(file, func, size))
return results return results
@@ -624,6 +674,9 @@ def table(Result, results, diff_results=None, *,
types[k].ratio( types[k].ratio(
getattr(r, k, None), getattr(r, k, None),
getattr(diff_r, k, None))))) getattr(diff_r, k, None)))))
# append any notes
if hasattr(r, 'notes'):
entry[-1][1].extend(r.notes)
return entry return entry
# recursive entry helper, only used by some scripts # recursive entry helper, only used by some scripts

View File

@@ -155,12 +155,12 @@ class PerfResult(co.namedtuple('PerfResult', [
__slots__ = () __slots__ = ()
def __new__(cls, file='', function='', line=0, def __new__(cls, file='', function='', line=0,
cycles=0, bmisses=0, branches=0, cmisses=0, caches=0, cycles=0, bmisses=0, branches=0, cmisses=0, caches=0,
children=[]): children=None):
return super().__new__(cls, file, function, int(RInt(line)), return super().__new__(cls, file, function, int(RInt(line)),
RInt(cycles), RInt(cycles),
RInt(bmisses), RInt(branches), RInt(bmisses), RInt(branches),
RInt(cmisses), RInt(caches), RInt(cmisses), RInt(caches),
children) children if children is not None else [])
def __add__(self, other): def __add__(self, other):
return PerfResult(self.file, self.function, self.line, return PerfResult(self.file, self.function, self.line,
@@ -259,19 +259,76 @@ def multiprocessing_cache(f):
return multiprocessing_cache return multiprocessing_cache
class SymInfo:
def __init__(self, syms):
self.syms = syms
def get(self, k, d=None):
# allow lookup by both symbol and address
if isinstance(k, str):
# organize by symbol, note multiple symbols can share a name
if not hasattr(self, '_by_sym'):
by_sym = {}
for sym, addr, size in self.syms:
if sym not in by_sym:
by_sym[sym] = []
if (addr, size) not in by_sym[sym]:
by_sym[sym].append((addr, size))
self._by_sym = by_sym
return self._by_sym.get(k, d)
else:
import bisect
# organize by address
if not hasattr(self, '_by_addr'):
# sort and keep largest/first when duplicates
syms = self.syms.copy()
syms.sort(key=lambda x: (x[1], -x[2], x[0]))
by_addr = []
for name, addr, size in syms:
if (len(by_addr) == 0
or by_addr[-1][0] != addr):
by_addr.append((name, addr, size))
self._by_addr = by_addr
# find sym by range
i = bisect.bisect(self._by_addr, k,
key=lambda x: x[1])
# check that we're actually in this sym's size
if i > 0 and k < self._by_addr[i-1][1]+self._by_addr[i-1][2]:
return self._by_addr[i-1][0]
else:
return d
def __getitem__(self, k):
v = self.get(k)
if v is None:
raise KeyError(k)
return v
def __contains__(self, k):
return self.get(k) is not None
def __len__(self):
return len(self.syms)
def __iter__(self):
return iter(self.syms)
@multiprocessing_cache @multiprocessing_cache
def collect_syms(obj_path, *, def collect_syms(obj_path, global_only=False, *,
objdump_path=None, objdump_path=OBJDUMP_PATH,
**args): **args):
symbol_pattern = re.compile( symbol_pattern = re.compile(
'^(?P<addr>[0-9a-fA-F]+)' '^(?P<addr>[0-9a-fA-F]+)'
'.*' ' (?P<scope>.).*'
'\s+(?P<size>[0-9a-fA-F]+)' '\s+(?P<size>[0-9a-fA-F]+)'
'\s+(?P<name>[^\s]+)\s*$') '\s+(?P<name>[^\s]+)\s*$')
# figure out symbol addresses # find symbol addresses and sizes
syms = {} syms = []
sym_at = []
cmd = objdump_path + ['-t', obj_path] cmd = objdump_path + ['-t', obj_path]
if args.get('verbose'): if args.get('verbose'):
print(' '.join(shlex.quote(c) for c in cmd)) print(' '.join(shlex.quote(c) for c in cmd))
@@ -284,33 +341,102 @@ def collect_syms(obj_path, *,
m = symbol_pattern.match(line) m = symbol_pattern.match(line)
if m: if m:
name = m.group('name') name = m.group('name')
scope = m.group('scope')
addr = int(m.group('addr'), 16) addr = int(m.group('addr'), 16)
size = int(m.group('size'), 16) size = int(m.group('size'), 16)
# skip non-globals?
# l => local
# g => global
# u => unique global
# => neither
# ! => local + global
if global_only and scope in 'l ':
continue
# ignore zero-sized symbols # ignore zero-sized symbols
if not size: if not size:
continue continue
# note multiple symbols can share a name # note multiple symbols can share a name
if name not in syms: syms.append((name, addr, size))
syms[name] = set()
syms[name].add((addr, size))
sym_at.append((addr, name, size))
proc.wait() proc.wait()
if proc.returncode != 0: if proc.returncode != 0:
raise sp.CalledProcessError(proc.returncode, proc.args) raise sp.CalledProcessError(proc.returncode, proc.args)
# sort and keep largest/first when duplicates return SymInfo(syms)
sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
sym_at_ = []
for addr, name, size in sym_at:
if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
sym_at_.append((addr, name, size))
sym_at = sym_at_
return syms, sym_at class LineInfo:
def __init__(self, lines):
self.lines = lines
def get(self, k, d=None):
# allow lookup by both address and file+line tuple
if not isinstance(k, tuple):
import bisect
# organize by address
if not hasattr(self, '_by_addr'):
# sort and keep first when duplicates
lines = self.lines.copy()
lines.sort(key=lambda x: (x[2], x[0], x[1]))
by_addr = []
for file, line, addr in lines:
if (len(by_addr) == 0
or by_addr[-1][2] != addr):
by_addr.append((file, line, addr))
self._by_addr = by_addr
# find file+line by addr
i = bisect.bisect(self._by_addr, k,
key=lambda x: x[2])
if i > 0:
return self._by_addr[i-1][0], self._by_addr[i-1][1]
else:
return d
else:
import bisect
# organize by file+line
if not hasattr(self, '_by_line'):
# sort and keep first when duplicates
lines = self.lines.copy()
lines.sort()
by_line = []
for file, line, addr in lines:
if (len(by_line) == 0
or by_line[-1][0] != file
or by_line[-1][1] != line):
by_line.append((file, line, addr))
self._by_line = by_line
# find addr by file+line tuple
i = bisect.bisect(self._by_line, k,
key=lambda x: (x[0], x[1]))
# make sure file at least matches!
if i > 0 and self._by_line[i-1][0] == k[0]:
return self._by_line[i-1][2]
else:
return d
def __getitem__(self, k):
v = self.get(k)
if v is None:
raise KeyError(k)
return v
def __contains__(self, k):
return self.get(k) is not None
def __len__(self):
return len(self.lines)
def __iter__(self):
return iter(self.lines)
@multiprocessing_cache @multiprocessing_cache
def collect_dwarf_lines(obj_path, *, def collect_dwarf_lines(obj_path, *,
objdump_path=None, objdump_path=OBJDUMP_PATH,
**args): **args):
line_pattern = re.compile( line_pattern = re.compile(
'^\s*(?:' '^\s*(?:'
@@ -334,9 +460,8 @@ def collect_dwarf_lines(obj_path, *,
# decodedline seems to have issues with multiple dir/file # decodedline seems to have issues with multiple dir/file
# tables, which is why we need this # tables, which is why we need this
lines = [] lines = []
line_at = [] dirs = co.OrderedDict()
dirs = {} files = co.OrderedDict()
files = {}
op_file = 1 op_file = 1
op_line = 1 op_line = 1
op_addr = 0 op_addr = 0
@@ -377,7 +502,6 @@ def collect_dwarf_lines(obj_path, *,
or m.group('op_end')): or m.group('op_end')):
file = os.path.abspath(files.get(op_file, '?')) file = os.path.abspath(files.get(op_file, '?'))
lines.append((file, op_line, op_addr)) lines.append((file, op_line, op_addr))
line_at.append((op_addr, file, op_line))
if m.group('op_end'): if m.group('op_end'):
op_file = 1 op_file = 1
@@ -387,23 +511,7 @@ def collect_dwarf_lines(obj_path, *,
if proc.returncode != 0: if proc.returncode != 0:
raise sp.CalledProcessError(proc.returncode, proc.args) raise sp.CalledProcessError(proc.returncode, proc.args)
# sort and keep first when duplicates return LineInfo(lines)
lines.sort()
lines_ = []
for file, line, addr in lines:
if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line:
lines_.append((file, line, addr))
lines = lines_
# sort and keep first when duplicates
line_at.sort()
line_at_ = []
for addr, file, line in line_at:
if len(line_at_) == 0 or line_at_[-1][0] != addr:
line_at_.append((addr, file, line))
line_at = line_at_
return lines, line_at
def collect_decompressed(path, *, def collect_decompressed(path, *,
@@ -502,8 +610,8 @@ def collect_decompressed(path, *,
addr_ = int(m.group('addr'), 16) addr_ = int(m.group('addr'), 16)
# get the syms/lines for the dso, this is cached # get the syms/lines for the dso, this is cached
syms, sym_at = collect_syms(dso, **args) syms = collect_syms(dso, **args)
lines, line_at = collect_dwarf_lines(dso, **args) lines = collect_dwarf_lines(dso, **args)
# ASLR is tricky, we have symbols+offsets, but static symbols # ASLR is tricky, we have symbols+offsets, but static symbols
# means we may have multiple options for each symbol. # means we may have multiple options for each symbol.
@@ -541,9 +649,9 @@ def collect_decompressed(path, *,
file, line = cached file, line = cached
else: else:
# find file+line # find file+line
i = bisect.bisect(line_at, addr, key=lambda x: x[0]) line_ = lines.get(addr)
if i > 0: if line_ is not None:
_, file, line = line_at[i-1] file, line = line_
else: else:
file, line = re.sub('(\.o)?$', '.c', dso, 1), 0 file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
@@ -872,6 +980,9 @@ def table(Result, results, diff_results=None, *,
types[k].ratio( types[k].ratio(
getattr(r, k, None), getattr(r, k, None),
getattr(diff_r, k, None))))) getattr(diff_r, k, None)))))
# append any notes
if hasattr(r, 'notes'):
entry[-1][1].extend(r.notes)
return entry return entry
# recursive entry helper, only used by some scripts # recursive entry helper, only used by some scripts

View File

@@ -143,10 +143,10 @@ class PerfBdResult(co.namedtuple('PerfBdResult', [
__slots__ = () __slots__ = ()
def __new__(cls, file='', function='', line=0, def __new__(cls, file='', function='', line=0,
readed=0, proged=0, erased=0, readed=0, proged=0, erased=0,
children=[]): children=None):
return super().__new__(cls, file, function, int(RInt(line)), return super().__new__(cls, file, function, int(RInt(line)),
RInt(readed), RInt(proged), RInt(erased), RInt(readed), RInt(proged), RInt(erased),
children) children if children is not None else [])
def __add__(self, other): def __add__(self, other):
return PerfBdResult(self.file, self.function, self.line, return PerfBdResult(self.file, self.function, self.line,
@@ -166,18 +166,75 @@ def openio(path, mode='r', buffering=-1):
else: else:
return open(path, mode, buffering) return open(path, mode, buffering)
def collect_syms(obj_path, *, class SymInfo:
objdump_path=None, def __init__(self, syms):
self.syms = syms
def get(self, k, d=None):
# allow lookup by both symbol and address
if isinstance(k, str):
# organize by symbol, note multiple symbols can share a name
if not hasattr(self, '_by_sym'):
by_sym = {}
for sym, addr, size in self.syms:
if sym not in by_sym:
by_sym[sym] = []
if (addr, size) not in by_sym[sym]:
by_sym[sym].append((addr, size))
self._by_sym = by_sym
return self._by_sym.get(k, d)
else:
import bisect
# organize by address
if not hasattr(self, '_by_addr'):
# sort and keep largest/first when duplicates
syms = self.syms.copy()
syms.sort(key=lambda x: (x[1], -x[2], x[0]))
by_addr = []
for name, addr, size in syms:
if (len(by_addr) == 0
or by_addr[-1][0] != addr):
by_addr.append((name, addr, size))
self._by_addr = by_addr
# find sym by range
i = bisect.bisect(self._by_addr, k,
key=lambda x: x[1])
# check that we're actually in this sym's size
if i > 0 and k < self._by_addr[i-1][1]+self._by_addr[i-1][2]:
return self._by_addr[i-1][0]
else:
return d
def __getitem__(self, k):
v = self.get(k)
if v is None:
raise KeyError(k)
return v
def __contains__(self, k):
return self.get(k) is not None
def __len__(self):
return len(self.syms)
def __iter__(self):
return iter(self.syms)
def collect_syms(obj_path, global_only=False, *,
objdump_path=OBJDUMP_PATH,
**args): **args):
symbol_pattern = re.compile( symbol_pattern = re.compile(
'^(?P<addr>[0-9a-fA-F]+)' '^(?P<addr>[0-9a-fA-F]+)'
'.*' ' (?P<scope>.).*'
'\s+(?P<size>[0-9a-fA-F]+)' '\s+(?P<size>[0-9a-fA-F]+)'
'\s+(?P<name>[^\s]+)\s*$') '\s+(?P<name>[^\s]+)\s*$')
# figure out symbol addresses # find symbol addresses and sizes
syms = {} syms = []
sym_at = []
cmd = objdump_path + ['-t', obj_path] cmd = objdump_path + ['-t', obj_path]
if args.get('verbose'): if args.get('verbose'):
print(' '.join(shlex.quote(c) for c in cmd)) print(' '.join(shlex.quote(c) for c in cmd))
@@ -190,32 +247,101 @@ def collect_syms(obj_path, *,
m = symbol_pattern.match(line) m = symbol_pattern.match(line)
if m: if m:
name = m.group('name') name = m.group('name')
scope = m.group('scope')
addr = int(m.group('addr'), 16) addr = int(m.group('addr'), 16)
size = int(m.group('size'), 16) size = int(m.group('size'), 16)
# skip non-globals?
# l => local
# g => global
# u => unique global
# => neither
# ! => local + global
if global_only and scope in 'l ':
continue
# ignore zero-sized symbols # ignore zero-sized symbols
if not size: if not size:
continue continue
# note multiple symbols can share a name # note multiple symbols can share a name
if name not in syms: syms.append((name, addr, size))
syms[name] = set()
syms[name].add((addr, size))
sym_at.append((addr, name, size))
proc.wait() proc.wait()
if proc.returncode != 0: if proc.returncode != 0:
raise sp.CalledProcessError(proc.returncode, proc.args) raise sp.CalledProcessError(proc.returncode, proc.args)
# sort and keep largest/first when duplicates return SymInfo(syms)
sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
sym_at_ = []
for addr, name, size in sym_at:
if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
sym_at_.append((addr, name, size))
sym_at = sym_at_
return syms, sym_at class LineInfo:
def __init__(self, lines):
self.lines = lines
def get(self, k, d=None):
# allow lookup by both address and file+line tuple
if not isinstance(k, tuple):
import bisect
# organize by address
if not hasattr(self, '_by_addr'):
# sort and keep first when duplicates
lines = self.lines.copy()
lines.sort(key=lambda x: (x[2], x[0], x[1]))
by_addr = []
for file, line, addr in lines:
if (len(by_addr) == 0
or by_addr[-1][2] != addr):
by_addr.append((file, line, addr))
self._by_addr = by_addr
# find file+line by addr
i = bisect.bisect(self._by_addr, k,
key=lambda x: x[2])
if i > 0:
return self._by_addr[i-1][0], self._by_addr[i-1][1]
else:
return d
else:
import bisect
# organize by file+line
if not hasattr(self, '_by_line'):
# sort and keep first when duplicates
lines = self.lines.copy()
lines.sort()
by_line = []
for file, line, addr in lines:
if (len(by_line) == 0
or by_line[-1][0] != file
or by_line[-1][1] != line):
by_line.append((file, line, addr))
self._by_line = by_line
# find addr by file+line tuple
i = bisect.bisect(self._by_line, k,
key=lambda x: (x[0], x[1]))
# make sure file at least matches!
if i > 0 and self._by_line[i-1][0] == k[0]:
return self._by_line[i-1][2]
else:
return d
def __getitem__(self, k):
v = self.get(k)
if v is None:
raise KeyError(k)
return v
def __contains__(self, k):
return self.get(k) is not None
def __len__(self):
return len(self.lines)
def __iter__(self):
return iter(self.lines)
def collect_dwarf_lines(obj_path, *, def collect_dwarf_lines(obj_path, *,
objdump_path=None, objdump_path=OBJDUMP_PATH,
**args): **args):
line_pattern = re.compile( line_pattern = re.compile(
'^\s*(?:' '^\s*(?:'
@@ -239,9 +365,8 @@ def collect_dwarf_lines(obj_path, *,
# decodedline seems to have issues with multiple dir/file # decodedline seems to have issues with multiple dir/file
# tables, which is why we need this # tables, which is why we need this
lines = [] lines = []
line_at = [] dirs = co.OrderedDict()
dirs = {} files = co.OrderedDict()
files = {}
op_file = 1 op_file = 1
op_line = 1 op_line = 1
op_addr = 0 op_addr = 0
@@ -282,7 +407,6 @@ def collect_dwarf_lines(obj_path, *,
or m.group('op_end')): or m.group('op_end')):
file = os.path.abspath(files.get(op_file, '?')) file = os.path.abspath(files.get(op_file, '?'))
lines.append((file, op_line, op_addr)) lines.append((file, op_line, op_addr))
line_at.append((op_addr, file, op_line))
if m.group('op_end'): if m.group('op_end'):
op_file = 1 op_file = 1
@@ -292,26 +416,10 @@ def collect_dwarf_lines(obj_path, *,
if proc.returncode != 0: if proc.returncode != 0:
raise sp.CalledProcessError(proc.returncode, proc.args) raise sp.CalledProcessError(proc.returncode, proc.args)
# sort and keep first when duplicates return LineInfo(lines)
lines.sort()
lines_ = []
for file, line, addr in lines:
if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line:
lines_.append((file, line, addr))
lines = lines_
# sort and keep first when duplicates
line_at.sort()
line_at_ = []
for addr, file, line in line_at:
if len(line_at_) == 0 or line_at_[-1][0] != addr:
line_at_.append((addr, file, line))
line_at = line_at_
return lines, line_at
def collect_job(path, start, stop, syms, sym_at, lines, line_at, *, def collect_job(path, start, stop, syms, lines, *,
sources=None, sources=None,
everything=False, everything=False,
propagate=0, propagate=0,
@@ -465,10 +573,9 @@ def collect_job(path, start, stop, syms, sym_at, lines, line_at, *,
# the first stack frame, so we can use that as a point # the first stack frame, so we can use that as a point
# of reference # of reference
if last_delta is None: if last_delta is None:
i = bisect.bisect(lines, (last_file, last_line), addr__ = lines.get((last_file, last_line))
key=lambda x: (x[0], x[1])) if addr__ is not None:
if i > 0: last_delta = addr__ - addr_
last_delta = lines[i-1][2] - addr_
else: else:
# can't reverse ASLR, give up on backtrace # can't reverse ASLR, give up on backtrace
commit() commit()
@@ -486,11 +593,8 @@ def collect_job(path, start, stop, syms, sym_at, lines, line_at, *,
file, sym, line = cached file, sym, line = cached
else: else:
# find sym # find sym
i = bisect.bisect(sym_at, addr, key=lambda x: x[0]) sym = syms.get(addr)
# check that we're actually in the sym's size if sym is None:
if i > 0 and addr < sym_at[i-1][0] + sym_at[i-1][2]:
_, sym, _ = sym_at[i-1]
else:
sym = hex(addr) sym = hex(addr)
# filter out internal/unknown functions # filter out internal/unknown functions
@@ -503,9 +607,9 @@ def collect_job(path, start, stop, syms, sym_at, lines, line_at, *,
continue continue
# find file+line # find file+line
i = bisect.bisect(line_at, addr, key=lambda x: x[0]) line_ = lines.get(addr)
if i > 0: if line_ is not None:
_, file, line = line_at[i-1] file, line = line_
elif len(last_stack) == 0: elif len(last_stack) == 0:
file, line = last_file, last_line file, line = last_file, last_line
else: else:
@@ -568,8 +672,8 @@ def collect(obj_path, trace_paths, *,
jobs = len(os.sched_getaffinity(0)) jobs = len(os.sched_getaffinity(0))
# find sym/line info to reverse ASLR # find sym/line info to reverse ASLR
syms, sym_at = collect_syms(obj_path, **args) syms = collect_syms(obj_path, **args)
lines, line_at = collect_dwarf_lines(obj_path, **args) lines = collect_dwarf_lines(obj_path, **args)
if jobs is not None: if jobs is not None:
# try to split up files so that even single files can be processed # try to split up files so that even single files can be processed
@@ -596,8 +700,7 @@ def collect(obj_path, trace_paths, *,
for results_ in p.imap_unordered( for results_ in p.imap_unordered(
starapply, starapply,
((collect_job, ((collect_job,
(path, start, stop, (path, start, stop, syms, lines),
syms, sym_at, lines, line_at),
args) args)
for path, ranges in zip(trace_paths, trace_ranges) for path, ranges in zip(trace_paths, trace_ranges)
for start, stop in ranges)): for start, stop in ranges)):
@@ -607,8 +710,7 @@ def collect(obj_path, trace_paths, *,
results = [] results = []
for path in trace_paths: for path in trace_paths:
results.extend(collect_job( results.extend(collect_job(
path, None, None, path, None, None, syms, lines,
syms, sym_at, lines, line_at,
**args)) **args))
return results return results
@@ -841,6 +943,9 @@ def table(Result, results, diff_results=None, *,
types[k].ratio( types[k].ratio(
getattr(r, k, None), getattr(r, k, None),
getattr(diff_r, k, None))))) getattr(diff_r, k, None)))))
# append any notes
if hasattr(r, 'notes'):
entry[-1][1].extend(r.notes)
return entry return entry
# recursive entry helper, only used by some scripts # recursive entry helper, only used by some scripts

View File

@@ -129,12 +129,11 @@ class StackResult(co.namedtuple('StackResult', [
_types = {'frame': RInt, 'limit': RInt} _types = {'frame': RInt, 'limit': RInt}
__slots__ = () __slots__ = ()
def __new__(cls, file='', function='', def __new__(cls, file='', function='', frame=0, limit=0,
frame=0, limit=0, children=None):
children=[]):
return super().__new__(cls, file, function, return super().__new__(cls, file, function,
RInt(frame), RInt(limit), RInt(frame), RInt(limit),
children) children if children is not None else [])
def __add__(self, other): def __add__(self, other):
return StackResult(self.file, self.function, return StackResult(self.file, self.function,
@@ -527,6 +526,9 @@ def table(Result, results, diff_results=None, *,
types[k].ratio( types[k].ratio(
getattr(r, k, None), getattr(r, k, None),
getattr(diff_r, k, None))))) getattr(diff_r, k, None)))))
# append any notes
if hasattr(r, 'notes'):
entry[-1][1].extend(r.notes)
return entry return entry
# recursive entry helper, only used by some scripts # recursive entry helper, only used by some scripts

View File

@@ -136,7 +136,8 @@ class StructResult(co.namedtuple('StructResult', [
_types = {'size': RInt, 'align': RInt} _types = {'size': RInt, 'align': RInt}
__slots__ = () __slots__ = ()
def __new__(cls, file='', struct='', size=0, align=0, children=None): def __new__(cls, file='', struct='', size=0, align=0,
children=None):
return super().__new__(cls, file, struct, return super().__new__(cls, file, struct,
RInt(size), RInt(align), RInt(size), RInt(align),
children if children is not None else []) children if children is not None else [])
@@ -161,28 +162,6 @@ def openio(path, mode='r', buffering=-1):
def collect_dwarf_files(obj_path, *, def collect_dwarf_files(obj_path, *,
objdump_path=OBJDUMP_PATH, objdump_path=OBJDUMP_PATH,
**args): **args):
class FileInfo:
def __init__(self, files):
self.files = files
def get(self, k, d=None):
return self.files.get(k, d)
def __getitem__(self, k):
v = self.get(k)
if v is None:
raise KeyError(k)
return v
def __contains__(self, k):
return self.get(k) is not None
def __len__(self):
return len(self.files)
def __iter__(self):
return (v for k, v in self.files.items())
line_pattern = re.compile( line_pattern = re.compile(
'^\s*(?P<no>[0-9]+)' '^\s*(?P<no>[0-9]+)'
'(?:\s+(?P<dir>[0-9]+))?' '(?:\s+(?P<dir>[0-9]+))?'
@@ -223,7 +202,7 @@ def collect_dwarf_files(obj_path, *,
raise sp.CalledProcessError(proc.returncode, proc.args) raise sp.CalledProcessError(proc.returncode, proc.args)
# simplify paths # simplify paths
files_ = {} files_ = co.OrderedDict()
for no, file in files.items(): for no, file in files.items():
if os.path.commonpath([ if os.path.commonpath([
os.getcwd(), os.getcwd(),
@@ -233,104 +212,104 @@ def collect_dwarf_files(obj_path, *,
files_[no] = os.path.abspath(file) files_[no] = os.path.abspath(file)
files = files_ files = files_
return FileInfo(files) return files
# each dwarf entry can have attrs and children entries
class DwarfEntry:
def __init__(self, level, off, tag, ats={}, children=[]):
self.level = level
self.off = off
self.tag = tag
self.ats = ats or {}
self.children = children or []
def get(self, k, d=None):
return self.ats.get(k, d)
def __getitem__(self, k):
return self.ats[k]
def __contains__(self, k):
return k in self.ats
def __repr__(self):
return '%s(%d, 0x%x, %r, %r)' % (
self.__class__.__name__,
self.level,
self.off,
self.tag,
self.ats)
@ft.cached_property
def name(self):
if 'DW_AT_name' in self:
name = self['DW_AT_name'].split(':')[-1].strip()
# prefix with struct/union
if self.tag == 'DW_TAG_structure_type':
name = 'struct ' + name
elif self.tag == 'DW_TAG_union_type':
name = 'union ' + name
elif self.tag == 'DW_TAG_enumeration_type':
name = 'enum ' + name
return name
else:
return None
# a collection of dwarf entries
class DwarfInfo:
def __init__(self, entries):
self.entries = entries
def get(self, k, d=None):
# allow lookup by both offset and dwarf name
if not isinstance(k, str):
return self.entries.get(k, d)
else:
import difflib
# organize entries by name
if not hasattr(self, '_by_name'):
self._by_name = {}
for entry in self.entries.values():
if entry.name is not None:
self._by_name[entry.name] = entry
# exact match? avoid difflib if we can for speed
if k in self._by_name:
return self._by_name[k]
# find the best matching dwarf entry with difflib
#
# this can be different from the actual symbol because
# of optimization passes
else:
name, entry = max(
self._by_name.items(),
key=lambda entry: difflib.SequenceMatcher(
None, entry[0], k, False).ratio(),
default=(None, None))
return entry
def __getitem__(self, k):
v = self.get(k)
if v is None:
raise KeyError(k)
return v
def __contains__(self, k):
return self.get(k) is not None
def __len__(self):
return len(self.entries)
def __iter__(self):
return (v for k, v in self.entries.items())
def collect_dwarf_info(obj_path, filter=None, *, def collect_dwarf_info(obj_path, filter=None, *,
objdump_path=OBJDUMP_PATH, objdump_path=OBJDUMP_PATH,
**args): **args):
filter_, filter = filter, __builtins__.filter filter_, filter = filter, __builtins__.filter
# each dwarf entry can have attrs and children entries
class DwarfEntry:
def __init__(self, level, off, tag, ats={}, children=[]):
self.level = level
self.off = off
self.tag = tag
self.ats = ats or {}
self.children = children or []
def get(self, k, d=None):
return self.ats.get(k, d)
def __getitem__(self, k):
return self.ats[k]
def __contains__(self, k):
return k in self.ats
def __repr__(self):
return '%s(%d, 0x%x, %r, %r)' % (
self.__class__.__name__,
self.level,
self.off,
self.tag,
self.ats)
@ft.cached_property
def name(self):
if 'DW_AT_name' in self:
name = self['DW_AT_name'].split(':')[-1].strip()
# prefix with struct/union
if self.tag == 'DW_TAG_structure_type':
name = 'struct ' + name
elif self.tag == 'DW_TAG_union_type':
name = 'union ' + name
elif self.tag == 'DW_TAG_enumeration_type':
name = 'enum ' + name
return name
else:
return None
# a collection of dwarf entries
class DwarfInfo:
def __init__(self, entries):
self.entries = entries
def get(self, k, d=None):
# allow lookup by both offset and dwarf name
if not isinstance(k, str):
return self.entries.get(k, d)
else:
import difflib
# organize entries by name
if not hasattr(self, '_by_name'):
self._by_name = {}
for entry in self.entries.values():
if entry.name is not None:
self._by_name[entry.name] = entry
# exact match? avoid difflib if we can for speed
if k in self._by_name:
return self._by_name[k]
# find the best matching dwarf entry with difflib
#
# this can be different from the actual symbol because
# of optimization passes
else:
name, entry = max(
self._by_name.items(),
key=lambda entry: difflib.SequenceMatcher(
None, entry[0], k, False).ratio(),
default=(None, None))
return entry
def __getitem__(self, k):
v = self.get(k)
if v is None:
raise KeyError(k)
return v
def __contains__(self, k):
return self.get(k) is not None
def __len__(self):
return len(self.entries)
def __iter__(self):
return (v for k, v in self.entries.items())
info_pattern = re.compile( info_pattern = re.compile(
'^\s*(?:<(?P<level>[^>]*)>' '^\s*(?:<(?P<level>[^>]*)>'
'\s*<(?P<off>[^>]*)>' '\s*<(?P<off>[^>]*)>'
@@ -797,7 +776,8 @@ def table(Result, results, diff_results=None, *,
for r in results_} for r in results_}
names_ = list(table_.keys()) names_ = list(table_.keys())
# only sort the children layer if explicitly requested # sort the children layer
names_.sort()
if sort: if sort:
for k, reverse in reversed(sort): for k, reverse in reversed(sort):
names_.sort( names_.sort(