mirror of
https://github.com/littlefs-project/littlefs.git
synced 2025-12-26 17:18:26 +00:00
scripts: Adopted ctx.py-related changes in other result scripts
- Adopted higher-level collect data structures:
- high-level DwarfEntry/DwarfInfo class
- high-level SymInfo class
- high-level LineInfo class
Note these had to be moved out of function scope due to pickling
issues in perf.py/perfbd.py. These were only function-local to
minimize scope leak so this fortunately was an easy change.
- Adopted better list-default patterns in Result types:
def __new__(..., children=None):
return Result(..., children if children is not None else [])
A classic python footgun.
- Adopted notes rendering, though this is only used by ctx.py at the
moment.
- Reverted to sorting children entries, for now.
Unfortunately there's no easy way to sort the result entries in
perf.py/perfbd.py before folding. Folding is going to make a mess
of more complicated children anyways, so another solution is
needed...
And some other shared miscellany.
This commit is contained in:
171
scripts/code.py
171
scripts/code.py
@@ -17,8 +17,8 @@ __import__('sys').path.pop(0)
|
|||||||
|
|
||||||
import collections as co
|
import collections as co
|
||||||
import csv
|
import csv
|
||||||
import difflib
|
|
||||||
import itertools as it
|
import itertools as it
|
||||||
|
import functools as ft
|
||||||
import math as mt
|
import math as mt
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@@ -168,8 +168,8 @@ def collect_dwarf_files(obj_path, *,
|
|||||||
'.*\s+(?P<path>[^\s]+)\s*$')
|
'.*\s+(?P<path>[^\s]+)\s*$')
|
||||||
|
|
||||||
# find source paths
|
# find source paths
|
||||||
dirs = {}
|
dirs = co.OrderedDict()
|
||||||
files = {}
|
files = co.OrderedDict()
|
||||||
# note objdump-path may contain extra args
|
# note objdump-path may contain extra args
|
||||||
cmd = objdump_path + ['--dwarf=rawline', obj_path]
|
cmd = objdump_path + ['--dwarf=rawline', obj_path]
|
||||||
if args.get('verbose'):
|
if args.get('verbose'):
|
||||||
@@ -202,7 +202,7 @@ def collect_dwarf_files(obj_path, *,
|
|||||||
raise sp.CalledProcessError(proc.returncode, proc.args)
|
raise sp.CalledProcessError(proc.returncode, proc.args)
|
||||||
|
|
||||||
# simplify paths
|
# simplify paths
|
||||||
files_ = {}
|
files_ = co.OrderedDict()
|
||||||
for no, file in files.items():
|
for no, file in files.items():
|
||||||
if os.path.commonpath([
|
if os.path.commonpath([
|
||||||
os.getcwd(),
|
os.getcwd(),
|
||||||
@@ -214,34 +214,102 @@ def collect_dwarf_files(obj_path, *,
|
|||||||
|
|
||||||
return files
|
return files
|
||||||
|
|
||||||
|
# each dwarf entry can have attrs and children entries
|
||||||
|
class DwarfEntry:
|
||||||
|
def __init__(self, level, off, tag, ats={}, children=[]):
|
||||||
|
self.level = level
|
||||||
|
self.off = off
|
||||||
|
self.tag = tag
|
||||||
|
self.ats = ats or {}
|
||||||
|
self.children = children or []
|
||||||
|
|
||||||
|
def get(self, k, d=None):
|
||||||
|
return self.ats.get(k, d)
|
||||||
|
|
||||||
|
def __getitem__(self, k):
|
||||||
|
return self.ats[k]
|
||||||
|
|
||||||
|
def __contains__(self, k):
|
||||||
|
return k in self.ats
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '%s(%d, 0x%x, %r, %r)' % (
|
||||||
|
self.__class__.__name__,
|
||||||
|
self.level,
|
||||||
|
self.off,
|
||||||
|
self.tag,
|
||||||
|
self.ats)
|
||||||
|
|
||||||
|
@ft.cached_property
|
||||||
|
def name(self):
|
||||||
|
if 'DW_AT_name' in self:
|
||||||
|
name = self['DW_AT_name'].split(':')[-1].strip()
|
||||||
|
# prefix with struct/union/enum
|
||||||
|
if self.tag == 'DW_TAG_structure_type':
|
||||||
|
name = 'struct ' + name
|
||||||
|
elif self.tag == 'DW_TAG_union_type':
|
||||||
|
name = 'union ' + name
|
||||||
|
elif self.tag == 'DW_TAG_enumeration_type':
|
||||||
|
name = 'enum ' + name
|
||||||
|
return name
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# a collection of dwarf entries
|
||||||
|
class DwarfInfo:
|
||||||
|
def __init__(self, entries):
|
||||||
|
self.entries = entries
|
||||||
|
|
||||||
|
def get(self, k, d=None):
|
||||||
|
# allow lookup by both offset and dwarf name
|
||||||
|
if not isinstance(k, str):
|
||||||
|
return self.entries.get(k, d)
|
||||||
|
|
||||||
|
else:
|
||||||
|
import difflib
|
||||||
|
|
||||||
|
# organize entries by name
|
||||||
|
if not hasattr(self, '_by_name'):
|
||||||
|
self._by_name = {}
|
||||||
|
for entry in self.entries.values():
|
||||||
|
if entry.name is not None:
|
||||||
|
self._by_name[entry.name] = entry
|
||||||
|
|
||||||
|
# exact match? avoid difflib if we can for speed
|
||||||
|
if k in self._by_name:
|
||||||
|
return self._by_name[k]
|
||||||
|
# find the best matching dwarf entry with difflib
|
||||||
|
#
|
||||||
|
# this can be different from the actual symbol because
|
||||||
|
# of optimization passes
|
||||||
|
else:
|
||||||
|
name, entry = max(
|
||||||
|
self._by_name.items(),
|
||||||
|
key=lambda entry: difflib.SequenceMatcher(
|
||||||
|
None, entry[0], k, False).ratio(),
|
||||||
|
default=(None, None))
|
||||||
|
return entry
|
||||||
|
|
||||||
|
def __getitem__(self, k):
|
||||||
|
v = self.get(k)
|
||||||
|
if v is None:
|
||||||
|
raise KeyError(k)
|
||||||
|
return v
|
||||||
|
|
||||||
|
def __contains__(self, k):
|
||||||
|
return self.get(k) is not None
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.entries)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return (v for k, v in self.entries.items())
|
||||||
|
|
||||||
def collect_dwarf_info(obj_path, filter=None, *,
|
def collect_dwarf_info(obj_path, filter=None, *,
|
||||||
objdump_path=OBJDUMP_PATH,
|
objdump_path=OBJDUMP_PATH,
|
||||||
**args):
|
**args):
|
||||||
filter_, filter = filter, __builtins__.filter
|
filter_, filter = filter, __builtins__.filter
|
||||||
|
|
||||||
# each dwarf entry can have attrs and children entries
|
|
||||||
class DwarfEntry:
|
|
||||||
def __init__(self, level, off, tag, ats={}, children=[]):
|
|
||||||
self.level = level
|
|
||||||
self.off = off
|
|
||||||
self.tag = tag
|
|
||||||
self.ats = ats or {}
|
|
||||||
self.children = children or []
|
|
||||||
|
|
||||||
def __getitem__(self, k):
|
|
||||||
return self.ats[k]
|
|
||||||
|
|
||||||
def __contains__(self, k):
|
|
||||||
return k in self.ats
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return '%s(%d, 0x%x, %r, %r)' % (
|
|
||||||
self.__class__.__name__,
|
|
||||||
self.level,
|
|
||||||
self.off,
|
|
||||||
self.tag,
|
|
||||||
self.ats)
|
|
||||||
|
|
||||||
info_pattern = re.compile(
|
info_pattern = re.compile(
|
||||||
'^\s*(?:<(?P<level>[^>]*)>'
|
'^\s*(?:<(?P<level>[^>]*)>'
|
||||||
'\s*<(?P<off>[^>]*)>'
|
'\s*<(?P<off>[^>]*)>'
|
||||||
@@ -251,7 +319,7 @@ def collect_dwarf_info(obj_path, filter=None, *,
|
|||||||
'\s*:(?P<v>.*))\s*$')
|
'\s*:(?P<v>.*))\s*$')
|
||||||
|
|
||||||
# collect dwarf entries
|
# collect dwarf entries
|
||||||
entries = co.OrderedDict()
|
info = co.OrderedDict()
|
||||||
entry = None
|
entry = None
|
||||||
levels = {}
|
levels = {}
|
||||||
# note objdump-path may contain extra args
|
# note objdump-path may contain extra args
|
||||||
@@ -277,7 +345,7 @@ def collect_dwarf_info(obj_path, filter=None, *,
|
|||||||
if (entry.level == 1 and (
|
if (entry.level == 1 and (
|
||||||
# unless this entry is filtered
|
# unless this entry is filtered
|
||||||
filter_ is None or entry.tag in filter_)):
|
filter_ is None or entry.tag in filter_)):
|
||||||
entries[entry.off] = entry
|
info[entry.off] = entry
|
||||||
# store entry in parent
|
# store entry in parent
|
||||||
levels[entry.level] = entry
|
levels[entry.level] = entry
|
||||||
if entry.level-1 in levels:
|
if entry.level-1 in levels:
|
||||||
@@ -290,7 +358,7 @@ def collect_dwarf_info(obj_path, filter=None, *,
|
|||||||
if proc.returncode != 0:
|
if proc.returncode != 0:
|
||||||
raise sp.CalledProcessError(proc.returncode, proc.args)
|
raise sp.CalledProcessError(proc.returncode, proc.args)
|
||||||
|
|
||||||
return entries
|
return DwarfInfo(info)
|
||||||
|
|
||||||
def collect_sizes(obj_path, *,
|
def collect_sizes(obj_path, *,
|
||||||
nm_path=NM_PATH,
|
nm_path=NM_PATH,
|
||||||
@@ -331,33 +399,21 @@ def collect(obj_paths, *,
|
|||||||
**args):
|
**args):
|
||||||
results = []
|
results = []
|
||||||
for obj_path in obj_paths:
|
for obj_path in obj_paths:
|
||||||
# guess the source, if we have debug-info we'll replace this later
|
|
||||||
file = re.sub('(\.o)?$', '.c', obj_path, 1)
|
|
||||||
|
|
||||||
# find sizes
|
# find sizes
|
||||||
sizes = collect_sizes(obj_path, everything=everything, **args)
|
sizes = collect_sizes(obj_path, everything=everything, **args)
|
||||||
|
|
||||||
# try to figure out the source file if we have debug-info
|
# try to figure out the source file if we have debug-info
|
||||||
defs = {}
|
|
||||||
try:
|
try:
|
||||||
files = collect_dwarf_files(obj_path, **args)
|
files = collect_dwarf_files(obj_path, **args)
|
||||||
info = collect_dwarf_info(obj_path,
|
info = collect_dwarf_info(obj_path,
|
||||||
filter={'DW_TAG_subprogram', 'DW_TAG_variable'},
|
filter={'DW_TAG_subprogram', 'DW_TAG_variable'},
|
||||||
**args)
|
**args)
|
||||||
|
|
||||||
for no, entry in info.items():
|
|
||||||
# skip funcs with no name or no file
|
|
||||||
if ('DW_AT_name' not in entry
|
|
||||||
or 'DW_AT_decl_file' not in entry):
|
|
||||||
continue
|
|
||||||
name_ = entry['DW_AT_name'].split(':')[-1].strip()
|
|
||||||
file_ = files.get(int(entry['DW_AT_decl_file']), '?')
|
|
||||||
defs[name_] = file_
|
|
||||||
|
|
||||||
except sp.CalledProcessError:
|
except sp.CalledProcessError:
|
||||||
# do nothing on error, we don't need objdump to work, source
|
# do nothing on error, we don't need objdump to work, source
|
||||||
# files may just be inaccurate
|
# files may just be inaccurate
|
||||||
pass
|
files = {}
|
||||||
|
info = {}
|
||||||
|
|
||||||
# map function sizes to debug symbols
|
# map function sizes to debug symbols
|
||||||
for func, size in sizes.items():
|
for func, size in sizes.items():
|
||||||
@@ -365,34 +421,28 @@ def collect(obj_paths, *,
|
|||||||
if not everything and func.startswith('__'):
|
if not everything and func.startswith('__'):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# find best matching debug symbol, this may be slightly different
|
# find best matching dwarf entry, this may be slightly different
|
||||||
# due to optimizations
|
# due to optimizations
|
||||||
if defs:
|
entry = info.get(func)
|
||||||
# exact match? avoid difflib if we can for speed
|
|
||||||
if func in defs:
|
if entry is not None and 'DW_AT_decl_file' in entry:
|
||||||
file_ = defs[func]
|
file = files.get(int(entry['DW_AT_decl_file']), '?')
|
||||||
else:
|
|
||||||
_, file_ = max(
|
|
||||||
defs.items(),
|
|
||||||
key=lambda d: difflib.SequenceMatcher(None,
|
|
||||||
d[0],
|
|
||||||
func, False).ratio())
|
|
||||||
else:
|
else:
|
||||||
file_ = file
|
file = re.sub('(\.o)?$', '.c', obj_path, 1)
|
||||||
|
|
||||||
# ignore filtered sources
|
# ignore filtered sources
|
||||||
if sources is not None:
|
if sources is not None:
|
||||||
if not any(os.path.abspath(file_) == os.path.abspath(s)
|
if not any(os.path.abspath(file) == os.path.abspath(s)
|
||||||
for s in sources):
|
for s in sources):
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
# default to only cwd
|
# default to only cwd
|
||||||
if not everything and not os.path.commonpath([
|
if not everything and not os.path.commonpath([
|
||||||
os.getcwd(),
|
os.getcwd(),
|
||||||
os.path.abspath(file_)]) == os.getcwd():
|
os.path.abspath(file)]) == os.getcwd():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
results.append(CodeResult(file_, func, size))
|
results.append(CodeResult(file, func, size))
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
@@ -624,6 +674,9 @@ def table(Result, results, diff_results=None, *,
|
|||||||
types[k].ratio(
|
types[k].ratio(
|
||||||
getattr(r, k, None),
|
getattr(r, k, None),
|
||||||
getattr(diff_r, k, None)))))
|
getattr(diff_r, k, None)))))
|
||||||
|
# append any notes
|
||||||
|
if hasattr(r, 'notes'):
|
||||||
|
entry[-1][1].extend(r.notes)
|
||||||
return entry
|
return entry
|
||||||
|
|
||||||
# recursive entry helper, only used by some scripts
|
# recursive entry helper, only used by some scripts
|
||||||
|
|||||||
@@ -578,6 +578,9 @@ def table(Result, results, diff_results=None, *,
|
|||||||
types[k].ratio(
|
types[k].ratio(
|
||||||
getattr(r, k, None),
|
getattr(r, k, None),
|
||||||
getattr(diff_r, k, None)))))
|
getattr(diff_r, k, None)))))
|
||||||
|
# append any notes
|
||||||
|
if hasattr(r, 'notes'):
|
||||||
|
entry[-1][1].extend(r.notes)
|
||||||
return entry
|
return entry
|
||||||
|
|
||||||
# recursive entry helper, only used by some scripts
|
# recursive entry helper, only used by some scripts
|
||||||
|
|||||||
@@ -1590,6 +1590,9 @@ def table(Result, results, diff_results=None, *,
|
|||||||
types[k].ratio(
|
types[k].ratio(
|
||||||
getattr(r, k, None),
|
getattr(r, k, None),
|
||||||
getattr(diff_r, k, None)))))
|
getattr(diff_r, k, None)))))
|
||||||
|
# append any notes
|
||||||
|
if hasattr(r, 'notes'):
|
||||||
|
entry[-1][1].extend(r.notes)
|
||||||
return entry
|
return entry
|
||||||
|
|
||||||
# recursive entry helper, only used by some scripts
|
# recursive entry helper, only used by some scripts
|
||||||
|
|||||||
311
scripts/ctx.py
311
scripts/ctx.py
@@ -160,61 +160,67 @@ def openio(path, mode='r', buffering=-1):
|
|||||||
else:
|
else:
|
||||||
return open(path, mode, buffering)
|
return open(path, mode, buffering)
|
||||||
|
|
||||||
def collect_syms(obj_path, global_only=False, *,
|
class SymInfo:
|
||||||
objdump_path=None,
|
def __init__(self, syms):
|
||||||
**args):
|
self.syms = syms
|
||||||
class SymInfo:
|
|
||||||
def __init__(self, syms):
|
|
||||||
self.syms = syms
|
|
||||||
|
|
||||||
def get(self, k, d=None):
|
def get(self, k, d=None):
|
||||||
# allow lookup by both symbol and address
|
# allow lookup by both symbol and address
|
||||||
if isinstance(k, str):
|
if isinstance(k, str):
|
||||||
# organize by symbol, note multiple symbols can share a name
|
# organize by symbol, note multiple symbols can share a name
|
||||||
if not hasattr(self, '_by_sym'):
|
if not hasattr(self, '_by_sym'):
|
||||||
self._by_sym = {}
|
by_sym = {}
|
||||||
for sym, addr, size in self.syms:
|
for sym, addr, size in self.syms:
|
||||||
self._by_sym[sym] = (addr, size)
|
if sym not in by_sym:
|
||||||
return self._by_sym.get(k, d)
|
by_sym[sym] = []
|
||||||
|
if (addr, size) not in by_sym[sym]:
|
||||||
|
by_sym[sym].append((addr, size))
|
||||||
|
self._by_sym = by_sym
|
||||||
|
return self._by_sym.get(k, d)
|
||||||
|
|
||||||
|
else:
|
||||||
|
import bisect
|
||||||
|
|
||||||
|
# organize by address
|
||||||
|
if not hasattr(self, '_by_addr'):
|
||||||
|
# sort and keep largest/first when duplicates
|
||||||
|
syms = self.syms.copy()
|
||||||
|
syms.sort(key=lambda x: (x[1], -x[2], x[0]))
|
||||||
|
|
||||||
|
by_addr = []
|
||||||
|
for name, addr, size in syms:
|
||||||
|
if (len(by_addr) == 0
|
||||||
|
or by_addr[-1][0] != addr):
|
||||||
|
by_addr.append((name, addr, size))
|
||||||
|
self._by_addr = by_addr
|
||||||
|
|
||||||
|
# find sym by range
|
||||||
|
i = bisect.bisect(self._by_addr, k,
|
||||||
|
key=lambda x: x[1])
|
||||||
|
# check that we're actually in this sym's size
|
||||||
|
if i > 0 and k < self._by_addr[i-1][1]+self._by_addr[i-1][2]:
|
||||||
|
return self._by_addr[i-1][0]
|
||||||
else:
|
else:
|
||||||
import bisect
|
return d
|
||||||
|
|
||||||
# organize by address
|
def __getitem__(self, k):
|
||||||
if not hasattr(self, '_by_addr'):
|
v = self.get(k)
|
||||||
# sort and keep largest/first when duplicates
|
if v is None:
|
||||||
syms = self.syms.copy()
|
raise KeyError(k)
|
||||||
syms.sort(key=lambda x: (x[1], -x[2], x[0]))
|
return v
|
||||||
|
|
||||||
self._by_addr = []
|
def __contains__(self, k):
|
||||||
for name, addr, size in syms:
|
return self.get(k) is not None
|
||||||
if (len(self._by_addr) == 0
|
|
||||||
or self._by_addr[-1][0] != addr):
|
|
||||||
self._by_addr.append((name, addr, size))
|
|
||||||
|
|
||||||
# find sym by range
|
def __len__(self):
|
||||||
i = bisect.bisect(self._by_addr, k, key=lambda x: x[1])
|
return len(self.syms)
|
||||||
# check that we're actually in this sym's size
|
|
||||||
if i > 0 and k < self._by_addr[i-1][1]+self._by_addr[i-1][2]:
|
|
||||||
return self._by_addr[i-1]
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def __getitem__(self, k):
|
def __iter__(self):
|
||||||
v = self.get(k)
|
return iter(self.syms)
|
||||||
if v is None:
|
|
||||||
raise KeyError(k)
|
|
||||||
return v
|
|
||||||
|
|
||||||
def __contains__(self, k):
|
|
||||||
return self.get(k) is not None
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self.syms)
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
return iter(self.syms)
|
|
||||||
|
|
||||||
|
def collect_syms(obj_path, global_only=False, *,
|
||||||
|
objdump_path=OBJDUMP_PATH,
|
||||||
|
**args):
|
||||||
symbol_pattern = re.compile(
|
symbol_pattern = re.compile(
|
||||||
'^(?P<addr>[0-9a-fA-F]+)'
|
'^(?P<addr>[0-9a-fA-F]+)'
|
||||||
' (?P<scope>.).*'
|
' (?P<scope>.).*'
|
||||||
@@ -260,28 +266,6 @@ def collect_syms(obj_path, global_only=False, *,
|
|||||||
def collect_dwarf_files(obj_path, *,
|
def collect_dwarf_files(obj_path, *,
|
||||||
objdump_path=OBJDUMP_PATH,
|
objdump_path=OBJDUMP_PATH,
|
||||||
**args):
|
**args):
|
||||||
class FileInfo:
|
|
||||||
def __init__(self, files):
|
|
||||||
self.files = files
|
|
||||||
|
|
||||||
def get(self, k, d=None):
|
|
||||||
return self.files.get(k, d)
|
|
||||||
|
|
||||||
def __getitem__(self, k):
|
|
||||||
v = self.get(k)
|
|
||||||
if v is None:
|
|
||||||
raise KeyError(k)
|
|
||||||
return v
|
|
||||||
|
|
||||||
def __contains__(self, k):
|
|
||||||
return self.get(k) is not None
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self.files)
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
return (v for k, v in self.files.items())
|
|
||||||
|
|
||||||
line_pattern = re.compile(
|
line_pattern = re.compile(
|
||||||
'^\s*(?P<no>[0-9]+)'
|
'^\s*(?P<no>[0-9]+)'
|
||||||
'(?:\s+(?P<dir>[0-9]+))?'
|
'(?:\s+(?P<dir>[0-9]+))?'
|
||||||
@@ -322,7 +306,7 @@ def collect_dwarf_files(obj_path, *,
|
|||||||
raise sp.CalledProcessError(proc.returncode, proc.args)
|
raise sp.CalledProcessError(proc.returncode, proc.args)
|
||||||
|
|
||||||
# simplify paths
|
# simplify paths
|
||||||
files_ = {}
|
files_ = co.OrderedDict()
|
||||||
for no, file in files.items():
|
for no, file in files.items():
|
||||||
if os.path.commonpath([
|
if os.path.commonpath([
|
||||||
os.getcwd(),
|
os.getcwd(),
|
||||||
@@ -332,104 +316,104 @@ def collect_dwarf_files(obj_path, *,
|
|||||||
files_[no] = os.path.abspath(file)
|
files_[no] = os.path.abspath(file)
|
||||||
files = files_
|
files = files_
|
||||||
|
|
||||||
return FileInfo(files)
|
return files
|
||||||
|
|
||||||
|
# each dwarf entry can have attrs and children entries
|
||||||
|
class DwarfEntry:
|
||||||
|
def __init__(self, level, off, tag, ats={}, children=[]):
|
||||||
|
self.level = level
|
||||||
|
self.off = off
|
||||||
|
self.tag = tag
|
||||||
|
self.ats = ats or {}
|
||||||
|
self.children = children or []
|
||||||
|
|
||||||
|
def get(self, k, d=None):
|
||||||
|
return self.ats.get(k, d)
|
||||||
|
|
||||||
|
def __getitem__(self, k):
|
||||||
|
return self.ats[k]
|
||||||
|
|
||||||
|
def __contains__(self, k):
|
||||||
|
return k in self.ats
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '%s(%d, 0x%x, %r, %r)' % (
|
||||||
|
self.__class__.__name__,
|
||||||
|
self.level,
|
||||||
|
self.off,
|
||||||
|
self.tag,
|
||||||
|
self.ats)
|
||||||
|
|
||||||
|
@ft.cached_property
|
||||||
|
def name(self):
|
||||||
|
if 'DW_AT_name' in self:
|
||||||
|
name = self['DW_AT_name'].split(':')[-1].strip()
|
||||||
|
# prefix with struct/union/enum
|
||||||
|
if self.tag == 'DW_TAG_structure_type':
|
||||||
|
name = 'struct ' + name
|
||||||
|
elif self.tag == 'DW_TAG_union_type':
|
||||||
|
name = 'union ' + name
|
||||||
|
elif self.tag == 'DW_TAG_enumeration_type':
|
||||||
|
name = 'enum ' + name
|
||||||
|
return name
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# a collection of dwarf entries
|
||||||
|
class DwarfInfo:
|
||||||
|
def __init__(self, entries):
|
||||||
|
self.entries = entries
|
||||||
|
|
||||||
|
def get(self, k, d=None):
|
||||||
|
# allow lookup by both offset and dwarf name
|
||||||
|
if not isinstance(k, str):
|
||||||
|
return self.entries.get(k, d)
|
||||||
|
|
||||||
|
else:
|
||||||
|
import difflib
|
||||||
|
|
||||||
|
# organize entries by name
|
||||||
|
if not hasattr(self, '_by_name'):
|
||||||
|
self._by_name = {}
|
||||||
|
for entry in self.entries.values():
|
||||||
|
if entry.name is not None:
|
||||||
|
self._by_name[entry.name] = entry
|
||||||
|
|
||||||
|
# exact match? avoid difflib if we can for speed
|
||||||
|
if k in self._by_name:
|
||||||
|
return self._by_name[k]
|
||||||
|
# find the best matching dwarf entry with difflib
|
||||||
|
#
|
||||||
|
# this can be different from the actual symbol because
|
||||||
|
# of optimization passes
|
||||||
|
else:
|
||||||
|
name, entry = max(
|
||||||
|
self._by_name.items(),
|
||||||
|
key=lambda entry: difflib.SequenceMatcher(
|
||||||
|
None, entry[0], k, False).ratio(),
|
||||||
|
default=(None, None))
|
||||||
|
return entry
|
||||||
|
|
||||||
|
def __getitem__(self, k):
|
||||||
|
v = self.get(k)
|
||||||
|
if v is None:
|
||||||
|
raise KeyError(k)
|
||||||
|
return v
|
||||||
|
|
||||||
|
def __contains__(self, k):
|
||||||
|
return self.get(k) is not None
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.entries)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return (v for k, v in self.entries.items())
|
||||||
|
|
||||||
def collect_dwarf_info(obj_path, filter=None, *,
|
def collect_dwarf_info(obj_path, filter=None, *,
|
||||||
objdump_path=OBJDUMP_PATH,
|
objdump_path=OBJDUMP_PATH,
|
||||||
**args):
|
**args):
|
||||||
filter_, filter = filter, __builtins__.filter
|
filter_, filter = filter, __builtins__.filter
|
||||||
|
|
||||||
# each dwarf entry can have attrs and children entries
|
|
||||||
class DwarfEntry:
|
|
||||||
def __init__(self, level, off, tag, ats={}, children=[]):
|
|
||||||
self.level = level
|
|
||||||
self.off = off
|
|
||||||
self.tag = tag
|
|
||||||
self.ats = ats or {}
|
|
||||||
self.children = children or []
|
|
||||||
|
|
||||||
def get(self, k, d=None):
|
|
||||||
return self.ats.get(k, d)
|
|
||||||
|
|
||||||
def __getitem__(self, k):
|
|
||||||
return self.ats[k]
|
|
||||||
|
|
||||||
def __contains__(self, k):
|
|
||||||
return k in self.ats
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return '%s(%d, 0x%x, %r, %r)' % (
|
|
||||||
self.__class__.__name__,
|
|
||||||
self.level,
|
|
||||||
self.off,
|
|
||||||
self.tag,
|
|
||||||
self.ats)
|
|
||||||
|
|
||||||
@ft.cached_property
|
|
||||||
def name(self):
|
|
||||||
if 'DW_AT_name' in self:
|
|
||||||
name = self['DW_AT_name'].split(':')[-1].strip()
|
|
||||||
# prefix with struct/union/enum
|
|
||||||
if self.tag == 'DW_TAG_structure_type':
|
|
||||||
name = 'struct ' + name
|
|
||||||
elif self.tag == 'DW_TAG_union_type':
|
|
||||||
name = 'union ' + name
|
|
||||||
elif self.tag == 'DW_TAG_enumeration_type':
|
|
||||||
name = 'enum ' + name
|
|
||||||
return name
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# a collection of dwarf entries
|
|
||||||
class DwarfInfo:
|
|
||||||
def __init__(self, entries):
|
|
||||||
self.entries = entries
|
|
||||||
|
|
||||||
def get(self, k, d=None):
|
|
||||||
# allow lookup by both offset and dwarf name
|
|
||||||
if not isinstance(k, str):
|
|
||||||
return self.entries.get(k, d)
|
|
||||||
|
|
||||||
else:
|
|
||||||
import difflib
|
|
||||||
|
|
||||||
# organize entries by name
|
|
||||||
if not hasattr(self, '_by_name'):
|
|
||||||
self._by_name = {}
|
|
||||||
for entry in self.entries.values():
|
|
||||||
if entry.name is not None:
|
|
||||||
self._by_name[entry.name] = entry
|
|
||||||
|
|
||||||
# exact match? avoid difflib if we can for speed
|
|
||||||
if k in self._by_name:
|
|
||||||
return self._by_name[k]
|
|
||||||
# find the best matching dwarf entry with difflib
|
|
||||||
#
|
|
||||||
# this can be different from the actual symbol because
|
|
||||||
# of optimization passes
|
|
||||||
else:
|
|
||||||
name, entry = max(
|
|
||||||
self._by_name.items(),
|
|
||||||
key=lambda entry: difflib.SequenceMatcher(
|
|
||||||
None, entry[0], k, False).ratio(),
|
|
||||||
default=(None, None))
|
|
||||||
return entry
|
|
||||||
|
|
||||||
def __getitem__(self, k):
|
|
||||||
v = self.get(k)
|
|
||||||
if v is None:
|
|
||||||
raise KeyError(k)
|
|
||||||
return v
|
|
||||||
|
|
||||||
def __contains__(self, k):
|
|
||||||
return self.get(k) is not None
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self.entries)
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
return (v for k, v in self.entries.items())
|
|
||||||
|
|
||||||
info_pattern = re.compile(
|
info_pattern = re.compile(
|
||||||
'^\s*(?:<(?P<level>[^>]*)>'
|
'^\s*(?:<(?P<level>[^>]*)>'
|
||||||
'\s*<(?P<off>[^>]*)>'
|
'\s*<(?P<off>[^>]*)>'
|
||||||
@@ -925,7 +909,8 @@ def table(Result, results, diff_results=None, *,
|
|||||||
for r in results_}
|
for r in results_}
|
||||||
names_ = list(table_.keys())
|
names_ = list(table_.keys())
|
||||||
|
|
||||||
# only sort the children layer if explicitly requested
|
# sort the children layer
|
||||||
|
names_.sort()
|
||||||
if sort:
|
if sort:
|
||||||
for k, reverse in reversed(sort):
|
for k, reverse in reversed(sort):
|
||||||
names_.sort(
|
names_.sort(
|
||||||
|
|||||||
171
scripts/data.py
171
scripts/data.py
@@ -17,8 +17,8 @@ __import__('sys').path.pop(0)
|
|||||||
|
|
||||||
import collections as co
|
import collections as co
|
||||||
import csv
|
import csv
|
||||||
import difflib
|
|
||||||
import itertools as it
|
import itertools as it
|
||||||
|
import functools as ft
|
||||||
import math as mt
|
import math as mt
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@@ -168,8 +168,8 @@ def collect_dwarf_files(obj_path, *,
|
|||||||
'.*\s+(?P<path>[^\s]+)\s*$')
|
'.*\s+(?P<path>[^\s]+)\s*$')
|
||||||
|
|
||||||
# find source paths
|
# find source paths
|
||||||
dirs = {}
|
dirs = co.OrderedDict()
|
||||||
files = {}
|
files = co.OrderedDict()
|
||||||
# note objdump-path may contain extra args
|
# note objdump-path may contain extra args
|
||||||
cmd = objdump_path + ['--dwarf=rawline', obj_path]
|
cmd = objdump_path + ['--dwarf=rawline', obj_path]
|
||||||
if args.get('verbose'):
|
if args.get('verbose'):
|
||||||
@@ -202,7 +202,7 @@ def collect_dwarf_files(obj_path, *,
|
|||||||
raise sp.CalledProcessError(proc.returncode, proc.args)
|
raise sp.CalledProcessError(proc.returncode, proc.args)
|
||||||
|
|
||||||
# simplify paths
|
# simplify paths
|
||||||
files_ = {}
|
files_ = co.OrderedDict()
|
||||||
for no, file in files.items():
|
for no, file in files.items():
|
||||||
if os.path.commonpath([
|
if os.path.commonpath([
|
||||||
os.getcwd(),
|
os.getcwd(),
|
||||||
@@ -214,34 +214,102 @@ def collect_dwarf_files(obj_path, *,
|
|||||||
|
|
||||||
return files
|
return files
|
||||||
|
|
||||||
|
# each dwarf entry can have attrs and children entries
|
||||||
|
class DwarfEntry:
|
||||||
|
def __init__(self, level, off, tag, ats={}, children=[]):
|
||||||
|
self.level = level
|
||||||
|
self.off = off
|
||||||
|
self.tag = tag
|
||||||
|
self.ats = ats or {}
|
||||||
|
self.children = children or []
|
||||||
|
|
||||||
|
def get(self, k, d=None):
|
||||||
|
return self.ats.get(k, d)
|
||||||
|
|
||||||
|
def __getitem__(self, k):
|
||||||
|
return self.ats[k]
|
||||||
|
|
||||||
|
def __contains__(self, k):
|
||||||
|
return k in self.ats
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '%s(%d, 0x%x, %r, %r)' % (
|
||||||
|
self.__class__.__name__,
|
||||||
|
self.level,
|
||||||
|
self.off,
|
||||||
|
self.tag,
|
||||||
|
self.ats)
|
||||||
|
|
||||||
|
@ft.cached_property
|
||||||
|
def name(self):
|
||||||
|
if 'DW_AT_name' in self:
|
||||||
|
name = self['DW_AT_name'].split(':')[-1].strip()
|
||||||
|
# prefix with struct/union/enum
|
||||||
|
if self.tag == 'DW_TAG_structure_type':
|
||||||
|
name = 'struct ' + name
|
||||||
|
elif self.tag == 'DW_TAG_union_type':
|
||||||
|
name = 'union ' + name
|
||||||
|
elif self.tag == 'DW_TAG_enumeration_type':
|
||||||
|
name = 'enum ' + name
|
||||||
|
return name
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# a collection of dwarf entries
|
||||||
|
class DwarfInfo:
|
||||||
|
def __init__(self, entries):
|
||||||
|
self.entries = entries
|
||||||
|
|
||||||
|
def get(self, k, d=None):
|
||||||
|
# allow lookup by both offset and dwarf name
|
||||||
|
if not isinstance(k, str):
|
||||||
|
return self.entries.get(k, d)
|
||||||
|
|
||||||
|
else:
|
||||||
|
import difflib
|
||||||
|
|
||||||
|
# organize entries by name
|
||||||
|
if not hasattr(self, '_by_name'):
|
||||||
|
self._by_name = {}
|
||||||
|
for entry in self.entries.values():
|
||||||
|
if entry.name is not None:
|
||||||
|
self._by_name[entry.name] = entry
|
||||||
|
|
||||||
|
# exact match? avoid difflib if we can for speed
|
||||||
|
if k in self._by_name:
|
||||||
|
return self._by_name[k]
|
||||||
|
# find the best matching dwarf entry with difflib
|
||||||
|
#
|
||||||
|
# this can be different from the actual symbol because
|
||||||
|
# of optimization passes
|
||||||
|
else:
|
||||||
|
name, entry = max(
|
||||||
|
self._by_name.items(),
|
||||||
|
key=lambda entry: difflib.SequenceMatcher(
|
||||||
|
None, entry[0], k, False).ratio(),
|
||||||
|
default=(None, None))
|
||||||
|
return entry
|
||||||
|
|
||||||
|
def __getitem__(self, k):
|
||||||
|
v = self.get(k)
|
||||||
|
if v is None:
|
||||||
|
raise KeyError(k)
|
||||||
|
return v
|
||||||
|
|
||||||
|
def __contains__(self, k):
|
||||||
|
return self.get(k) is not None
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.entries)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return (v for k, v in self.entries.items())
|
||||||
|
|
||||||
def collect_dwarf_info(obj_path, filter=None, *,
|
def collect_dwarf_info(obj_path, filter=None, *,
|
||||||
objdump_path=OBJDUMP_PATH,
|
objdump_path=OBJDUMP_PATH,
|
||||||
**args):
|
**args):
|
||||||
filter_, filter = filter, __builtins__.filter
|
filter_, filter = filter, __builtins__.filter
|
||||||
|
|
||||||
# each dwarf entry can have attrs and children entries
|
|
||||||
class DwarfEntry:
|
|
||||||
def __init__(self, level, off, tag, ats={}, children=[]):
|
|
||||||
self.level = level
|
|
||||||
self.off = off
|
|
||||||
self.tag = tag
|
|
||||||
self.ats = ats or {}
|
|
||||||
self.children = children or []
|
|
||||||
|
|
||||||
def __getitem__(self, k):
|
|
||||||
return self.ats[k]
|
|
||||||
|
|
||||||
def __contains__(self, k):
|
|
||||||
return k in self.ats
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return '%s(%d, 0x%x, %r, %r)' % (
|
|
||||||
self.__class__.__name__,
|
|
||||||
self.level,
|
|
||||||
self.off,
|
|
||||||
self.tag,
|
|
||||||
self.ats)
|
|
||||||
|
|
||||||
info_pattern = re.compile(
|
info_pattern = re.compile(
|
||||||
'^\s*(?:<(?P<level>[^>]*)>'
|
'^\s*(?:<(?P<level>[^>]*)>'
|
||||||
'\s*<(?P<off>[^>]*)>'
|
'\s*<(?P<off>[^>]*)>'
|
||||||
@@ -251,7 +319,7 @@ def collect_dwarf_info(obj_path, filter=None, *,
|
|||||||
'\s*:(?P<v>.*))\s*$')
|
'\s*:(?P<v>.*))\s*$')
|
||||||
|
|
||||||
# collect dwarf entries
|
# collect dwarf entries
|
||||||
entries = co.OrderedDict()
|
info = co.OrderedDict()
|
||||||
entry = None
|
entry = None
|
||||||
levels = {}
|
levels = {}
|
||||||
# note objdump-path may contain extra args
|
# note objdump-path may contain extra args
|
||||||
@@ -277,7 +345,7 @@ def collect_dwarf_info(obj_path, filter=None, *,
|
|||||||
if (entry.level == 1 and (
|
if (entry.level == 1 and (
|
||||||
# unless this entry is filtered
|
# unless this entry is filtered
|
||||||
filter_ is None or entry.tag in filter_)):
|
filter_ is None or entry.tag in filter_)):
|
||||||
entries[entry.off] = entry
|
info[entry.off] = entry
|
||||||
# store entry in parent
|
# store entry in parent
|
||||||
levels[entry.level] = entry
|
levels[entry.level] = entry
|
||||||
if entry.level-1 in levels:
|
if entry.level-1 in levels:
|
||||||
@@ -290,7 +358,7 @@ def collect_dwarf_info(obj_path, filter=None, *,
|
|||||||
if proc.returncode != 0:
|
if proc.returncode != 0:
|
||||||
raise sp.CalledProcessError(proc.returncode, proc.args)
|
raise sp.CalledProcessError(proc.returncode, proc.args)
|
||||||
|
|
||||||
return entries
|
return DwarfInfo(info)
|
||||||
|
|
||||||
def collect_sizes(obj_path, *,
|
def collect_sizes(obj_path, *,
|
||||||
nm_path=NM_PATH,
|
nm_path=NM_PATH,
|
||||||
@@ -331,33 +399,21 @@ def collect(obj_paths, *,
|
|||||||
**args):
|
**args):
|
||||||
results = []
|
results = []
|
||||||
for obj_path in obj_paths:
|
for obj_path in obj_paths:
|
||||||
# guess the source, if we have debug-info we'll replace this later
|
|
||||||
file = re.sub('(\.o)?$', '.c', obj_path, 1)
|
|
||||||
|
|
||||||
# find sizes
|
# find sizes
|
||||||
sizes = collect_sizes(obj_path, everything=everything, **args)
|
sizes = collect_sizes(obj_path, everything=everything, **args)
|
||||||
|
|
||||||
# try to figure out the source file if we have debug-info
|
# try to figure out the source file if we have debug-info
|
||||||
defs = {}
|
|
||||||
try:
|
try:
|
||||||
files = collect_dwarf_files(obj_path, **args)
|
files = collect_dwarf_files(obj_path, **args)
|
||||||
info = collect_dwarf_info(obj_path,
|
info = collect_dwarf_info(obj_path,
|
||||||
filter={'DW_TAG_subprogram', 'DW_TAG_variable'},
|
filter={'DW_TAG_subprogram', 'DW_TAG_variable'},
|
||||||
**args)
|
**args)
|
||||||
|
|
||||||
for no, entry in info.items():
|
|
||||||
# skip funcs with no name or no file
|
|
||||||
if ('DW_AT_name' not in entry
|
|
||||||
or 'DW_AT_decl_file' not in entry):
|
|
||||||
continue
|
|
||||||
name_ = entry['DW_AT_name'].split(':')[-1].strip()
|
|
||||||
file_ = files.get(int(entry['DW_AT_decl_file']), '?')
|
|
||||||
defs[name_] = file_
|
|
||||||
|
|
||||||
except sp.CalledProcessError:
|
except sp.CalledProcessError:
|
||||||
# do nothing on error, we don't need objdump to work, source
|
# do nothing on error, we don't need objdump to work, source
|
||||||
# files may just be inaccurate
|
# files may just be inaccurate
|
||||||
pass
|
files = {}
|
||||||
|
info = {}
|
||||||
|
|
||||||
# map function sizes to debug symbols
|
# map function sizes to debug symbols
|
||||||
for func, size in sizes.items():
|
for func, size in sizes.items():
|
||||||
@@ -365,34 +421,28 @@ def collect(obj_paths, *,
|
|||||||
if not everything and func.startswith('__'):
|
if not everything and func.startswith('__'):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# find best matching debug symbol, this may be slightly different
|
# find best matching dwarf entry, this may be slightly different
|
||||||
# due to optimizations
|
# due to optimizations
|
||||||
if defs:
|
entry = info.get(func)
|
||||||
# exact match? avoid difflib if we can for speed
|
|
||||||
if func in defs:
|
if entry is not None and 'DW_AT_decl_file' in entry:
|
||||||
file_ = defs[func]
|
file = files.get(int(entry['DW_AT_decl_file']), '?')
|
||||||
else:
|
|
||||||
_, file_ = max(
|
|
||||||
defs.items(),
|
|
||||||
key=lambda d: difflib.SequenceMatcher(None,
|
|
||||||
d[0],
|
|
||||||
func, False).ratio())
|
|
||||||
else:
|
else:
|
||||||
file_ = file
|
file = re.sub('(\.o)?$', '.c', obj_path, 1)
|
||||||
|
|
||||||
# ignore filtered sources
|
# ignore filtered sources
|
||||||
if sources is not None:
|
if sources is not None:
|
||||||
if not any(os.path.abspath(file_) == os.path.abspath(s)
|
if not any(os.path.abspath(file) == os.path.abspath(s)
|
||||||
for s in sources):
|
for s in sources):
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
# default to only cwd
|
# default to only cwd
|
||||||
if not everything and not os.path.commonpath([
|
if not everything and not os.path.commonpath([
|
||||||
os.getcwd(),
|
os.getcwd(),
|
||||||
os.path.abspath(file_)]) == os.getcwd():
|
os.path.abspath(file)]) == os.getcwd():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
results.append(DataResult(file_, func, size))
|
results.append(CodeResult(file, func, size))
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
@@ -624,6 +674,9 @@ def table(Result, results, diff_results=None, *,
|
|||||||
types[k].ratio(
|
types[k].ratio(
|
||||||
getattr(r, k, None),
|
getattr(r, k, None),
|
||||||
getattr(diff_r, k, None)))))
|
getattr(diff_r, k, None)))))
|
||||||
|
# append any notes
|
||||||
|
if hasattr(r, 'notes'):
|
||||||
|
entry[-1][1].extend(r.notes)
|
||||||
return entry
|
return entry
|
||||||
|
|
||||||
# recursive entry helper, only used by some scripts
|
# recursive entry helper, only used by some scripts
|
||||||
|
|||||||
205
scripts/perf.py
205
scripts/perf.py
@@ -155,12 +155,12 @@ class PerfResult(co.namedtuple('PerfResult', [
|
|||||||
__slots__ = ()
|
__slots__ = ()
|
||||||
def __new__(cls, file='', function='', line=0,
|
def __new__(cls, file='', function='', line=0,
|
||||||
cycles=0, bmisses=0, branches=0, cmisses=0, caches=0,
|
cycles=0, bmisses=0, branches=0, cmisses=0, caches=0,
|
||||||
children=[]):
|
children=None):
|
||||||
return super().__new__(cls, file, function, int(RInt(line)),
|
return super().__new__(cls, file, function, int(RInt(line)),
|
||||||
RInt(cycles),
|
RInt(cycles),
|
||||||
RInt(bmisses), RInt(branches),
|
RInt(bmisses), RInt(branches),
|
||||||
RInt(cmisses), RInt(caches),
|
RInt(cmisses), RInt(caches),
|
||||||
children)
|
children if children is not None else [])
|
||||||
|
|
||||||
def __add__(self, other):
|
def __add__(self, other):
|
||||||
return PerfResult(self.file, self.function, self.line,
|
return PerfResult(self.file, self.function, self.line,
|
||||||
@@ -259,19 +259,76 @@ def multiprocessing_cache(f):
|
|||||||
|
|
||||||
return multiprocessing_cache
|
return multiprocessing_cache
|
||||||
|
|
||||||
|
class SymInfo:
|
||||||
|
def __init__(self, syms):
|
||||||
|
self.syms = syms
|
||||||
|
|
||||||
|
def get(self, k, d=None):
|
||||||
|
# allow lookup by both symbol and address
|
||||||
|
if isinstance(k, str):
|
||||||
|
# organize by symbol, note multiple symbols can share a name
|
||||||
|
if not hasattr(self, '_by_sym'):
|
||||||
|
by_sym = {}
|
||||||
|
for sym, addr, size in self.syms:
|
||||||
|
if sym not in by_sym:
|
||||||
|
by_sym[sym] = []
|
||||||
|
if (addr, size) not in by_sym[sym]:
|
||||||
|
by_sym[sym].append((addr, size))
|
||||||
|
self._by_sym = by_sym
|
||||||
|
return self._by_sym.get(k, d)
|
||||||
|
|
||||||
|
else:
|
||||||
|
import bisect
|
||||||
|
|
||||||
|
# organize by address
|
||||||
|
if not hasattr(self, '_by_addr'):
|
||||||
|
# sort and keep largest/first when duplicates
|
||||||
|
syms = self.syms.copy()
|
||||||
|
syms.sort(key=lambda x: (x[1], -x[2], x[0]))
|
||||||
|
|
||||||
|
by_addr = []
|
||||||
|
for name, addr, size in syms:
|
||||||
|
if (len(by_addr) == 0
|
||||||
|
or by_addr[-1][0] != addr):
|
||||||
|
by_addr.append((name, addr, size))
|
||||||
|
self._by_addr = by_addr
|
||||||
|
|
||||||
|
# find sym by range
|
||||||
|
i = bisect.bisect(self._by_addr, k,
|
||||||
|
key=lambda x: x[1])
|
||||||
|
# check that we're actually in this sym's size
|
||||||
|
if i > 0 and k < self._by_addr[i-1][1]+self._by_addr[i-1][2]:
|
||||||
|
return self._by_addr[i-1][0]
|
||||||
|
else:
|
||||||
|
return d
|
||||||
|
|
||||||
|
def __getitem__(self, k):
|
||||||
|
v = self.get(k)
|
||||||
|
if v is None:
|
||||||
|
raise KeyError(k)
|
||||||
|
return v
|
||||||
|
|
||||||
|
def __contains__(self, k):
|
||||||
|
return self.get(k) is not None
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.syms)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter(self.syms)
|
||||||
|
|
||||||
@multiprocessing_cache
|
@multiprocessing_cache
|
||||||
def collect_syms(obj_path, *,
|
def collect_syms(obj_path, global_only=False, *,
|
||||||
objdump_path=None,
|
objdump_path=OBJDUMP_PATH,
|
||||||
**args):
|
**args):
|
||||||
symbol_pattern = re.compile(
|
symbol_pattern = re.compile(
|
||||||
'^(?P<addr>[0-9a-fA-F]+)'
|
'^(?P<addr>[0-9a-fA-F]+)'
|
||||||
'.*'
|
' (?P<scope>.).*'
|
||||||
'\s+(?P<size>[0-9a-fA-F]+)'
|
'\s+(?P<size>[0-9a-fA-F]+)'
|
||||||
'\s+(?P<name>[^\s]+)\s*$')
|
'\s+(?P<name>[^\s]+)\s*$')
|
||||||
|
|
||||||
# figure out symbol addresses
|
# find symbol addresses and sizes
|
||||||
syms = {}
|
syms = []
|
||||||
sym_at = []
|
|
||||||
cmd = objdump_path + ['-t', obj_path]
|
cmd = objdump_path + ['-t', obj_path]
|
||||||
if args.get('verbose'):
|
if args.get('verbose'):
|
||||||
print(' '.join(shlex.quote(c) for c in cmd))
|
print(' '.join(shlex.quote(c) for c in cmd))
|
||||||
@@ -284,33 +341,102 @@ def collect_syms(obj_path, *,
|
|||||||
m = symbol_pattern.match(line)
|
m = symbol_pattern.match(line)
|
||||||
if m:
|
if m:
|
||||||
name = m.group('name')
|
name = m.group('name')
|
||||||
|
scope = m.group('scope')
|
||||||
addr = int(m.group('addr'), 16)
|
addr = int(m.group('addr'), 16)
|
||||||
size = int(m.group('size'), 16)
|
size = int(m.group('size'), 16)
|
||||||
|
# skip non-globals?
|
||||||
|
# l => local
|
||||||
|
# g => global
|
||||||
|
# u => unique global
|
||||||
|
# => neither
|
||||||
|
# ! => local + global
|
||||||
|
if global_only and scope in 'l ':
|
||||||
|
continue
|
||||||
# ignore zero-sized symbols
|
# ignore zero-sized symbols
|
||||||
if not size:
|
if not size:
|
||||||
continue
|
continue
|
||||||
# note multiple symbols can share a name
|
# note multiple symbols can share a name
|
||||||
if name not in syms:
|
syms.append((name, addr, size))
|
||||||
syms[name] = set()
|
|
||||||
syms[name].add((addr, size))
|
|
||||||
sym_at.append((addr, name, size))
|
|
||||||
proc.wait()
|
proc.wait()
|
||||||
if proc.returncode != 0:
|
if proc.returncode != 0:
|
||||||
raise sp.CalledProcessError(proc.returncode, proc.args)
|
raise sp.CalledProcessError(proc.returncode, proc.args)
|
||||||
|
|
||||||
# sort and keep largest/first when duplicates
|
return SymInfo(syms)
|
||||||
sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
|
|
||||||
sym_at_ = []
|
|
||||||
for addr, name, size in sym_at:
|
|
||||||
if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
|
|
||||||
sym_at_.append((addr, name, size))
|
|
||||||
sym_at = sym_at_
|
|
||||||
|
|
||||||
return syms, sym_at
|
class LineInfo:
|
||||||
|
def __init__(self, lines):
|
||||||
|
self.lines = lines
|
||||||
|
|
||||||
|
def get(self, k, d=None):
|
||||||
|
# allow lookup by both address and file+line tuple
|
||||||
|
if not isinstance(k, tuple):
|
||||||
|
import bisect
|
||||||
|
|
||||||
|
# organize by address
|
||||||
|
if not hasattr(self, '_by_addr'):
|
||||||
|
# sort and keep first when duplicates
|
||||||
|
lines = self.lines.copy()
|
||||||
|
lines.sort(key=lambda x: (x[2], x[0], x[1]))
|
||||||
|
|
||||||
|
by_addr = []
|
||||||
|
for file, line, addr in lines:
|
||||||
|
if (len(by_addr) == 0
|
||||||
|
or by_addr[-1][2] != addr):
|
||||||
|
by_addr.append((file, line, addr))
|
||||||
|
self._by_addr = by_addr
|
||||||
|
|
||||||
|
# find file+line by addr
|
||||||
|
i = bisect.bisect(self._by_addr, k,
|
||||||
|
key=lambda x: x[2])
|
||||||
|
if i > 0:
|
||||||
|
return self._by_addr[i-1][0], self._by_addr[i-1][1]
|
||||||
|
else:
|
||||||
|
return d
|
||||||
|
|
||||||
|
else:
|
||||||
|
import bisect
|
||||||
|
|
||||||
|
# organize by file+line
|
||||||
|
if not hasattr(self, '_by_line'):
|
||||||
|
# sort and keep first when duplicates
|
||||||
|
lines = self.lines.copy()
|
||||||
|
lines.sort()
|
||||||
|
|
||||||
|
by_line = []
|
||||||
|
for file, line, addr in lines:
|
||||||
|
if (len(by_line) == 0
|
||||||
|
or by_line[-1][0] != file
|
||||||
|
or by_line[-1][1] != line):
|
||||||
|
by_line.append((file, line, addr))
|
||||||
|
self._by_line = by_line
|
||||||
|
|
||||||
|
# find addr by file+line tuple
|
||||||
|
i = bisect.bisect(self._by_line, k,
|
||||||
|
key=lambda x: (x[0], x[1]))
|
||||||
|
# make sure file at least matches!
|
||||||
|
if i > 0 and self._by_line[i-1][0] == k[0]:
|
||||||
|
return self._by_line[i-1][2]
|
||||||
|
else:
|
||||||
|
return d
|
||||||
|
|
||||||
|
def __getitem__(self, k):
|
||||||
|
v = self.get(k)
|
||||||
|
if v is None:
|
||||||
|
raise KeyError(k)
|
||||||
|
return v
|
||||||
|
|
||||||
|
def __contains__(self, k):
|
||||||
|
return self.get(k) is not None
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.lines)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter(self.lines)
|
||||||
|
|
||||||
@multiprocessing_cache
|
@multiprocessing_cache
|
||||||
def collect_dwarf_lines(obj_path, *,
|
def collect_dwarf_lines(obj_path, *,
|
||||||
objdump_path=None,
|
objdump_path=OBJDUMP_PATH,
|
||||||
**args):
|
**args):
|
||||||
line_pattern = re.compile(
|
line_pattern = re.compile(
|
||||||
'^\s*(?:'
|
'^\s*(?:'
|
||||||
@@ -334,9 +460,8 @@ def collect_dwarf_lines(obj_path, *,
|
|||||||
# decodedline seems to have issues with multiple dir/file
|
# decodedline seems to have issues with multiple dir/file
|
||||||
# tables, which is why we need this
|
# tables, which is why we need this
|
||||||
lines = []
|
lines = []
|
||||||
line_at = []
|
dirs = co.OrderedDict()
|
||||||
dirs = {}
|
files = co.OrderedDict()
|
||||||
files = {}
|
|
||||||
op_file = 1
|
op_file = 1
|
||||||
op_line = 1
|
op_line = 1
|
||||||
op_addr = 0
|
op_addr = 0
|
||||||
@@ -377,7 +502,6 @@ def collect_dwarf_lines(obj_path, *,
|
|||||||
or m.group('op_end')):
|
or m.group('op_end')):
|
||||||
file = os.path.abspath(files.get(op_file, '?'))
|
file = os.path.abspath(files.get(op_file, '?'))
|
||||||
lines.append((file, op_line, op_addr))
|
lines.append((file, op_line, op_addr))
|
||||||
line_at.append((op_addr, file, op_line))
|
|
||||||
|
|
||||||
if m.group('op_end'):
|
if m.group('op_end'):
|
||||||
op_file = 1
|
op_file = 1
|
||||||
@@ -387,23 +511,7 @@ def collect_dwarf_lines(obj_path, *,
|
|||||||
if proc.returncode != 0:
|
if proc.returncode != 0:
|
||||||
raise sp.CalledProcessError(proc.returncode, proc.args)
|
raise sp.CalledProcessError(proc.returncode, proc.args)
|
||||||
|
|
||||||
# sort and keep first when duplicates
|
return LineInfo(lines)
|
||||||
lines.sort()
|
|
||||||
lines_ = []
|
|
||||||
for file, line, addr in lines:
|
|
||||||
if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line:
|
|
||||||
lines_.append((file, line, addr))
|
|
||||||
lines = lines_
|
|
||||||
|
|
||||||
# sort and keep first when duplicates
|
|
||||||
line_at.sort()
|
|
||||||
line_at_ = []
|
|
||||||
for addr, file, line in line_at:
|
|
||||||
if len(line_at_) == 0 or line_at_[-1][0] != addr:
|
|
||||||
line_at_.append((addr, file, line))
|
|
||||||
line_at = line_at_
|
|
||||||
|
|
||||||
return lines, line_at
|
|
||||||
|
|
||||||
|
|
||||||
def collect_decompressed(path, *,
|
def collect_decompressed(path, *,
|
||||||
@@ -502,8 +610,8 @@ def collect_decompressed(path, *,
|
|||||||
addr_ = int(m.group('addr'), 16)
|
addr_ = int(m.group('addr'), 16)
|
||||||
|
|
||||||
# get the syms/lines for the dso, this is cached
|
# get the syms/lines for the dso, this is cached
|
||||||
syms, sym_at = collect_syms(dso, **args)
|
syms = collect_syms(dso, **args)
|
||||||
lines, line_at = collect_dwarf_lines(dso, **args)
|
lines = collect_dwarf_lines(dso, **args)
|
||||||
|
|
||||||
# ASLR is tricky, we have symbols+offsets, but static symbols
|
# ASLR is tricky, we have symbols+offsets, but static symbols
|
||||||
# means we may have multiple options for each symbol.
|
# means we may have multiple options for each symbol.
|
||||||
@@ -541,9 +649,9 @@ def collect_decompressed(path, *,
|
|||||||
file, line = cached
|
file, line = cached
|
||||||
else:
|
else:
|
||||||
# find file+line
|
# find file+line
|
||||||
i = bisect.bisect(line_at, addr, key=lambda x: x[0])
|
line_ = lines.get(addr)
|
||||||
if i > 0:
|
if line_ is not None:
|
||||||
_, file, line = line_at[i-1]
|
file, line = line_
|
||||||
else:
|
else:
|
||||||
file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
|
file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
|
||||||
|
|
||||||
@@ -872,6 +980,9 @@ def table(Result, results, diff_results=None, *,
|
|||||||
types[k].ratio(
|
types[k].ratio(
|
||||||
getattr(r, k, None),
|
getattr(r, k, None),
|
||||||
getattr(diff_r, k, None)))))
|
getattr(diff_r, k, None)))))
|
||||||
|
# append any notes
|
||||||
|
if hasattr(r, 'notes'):
|
||||||
|
entry[-1][1].extend(r.notes)
|
||||||
return entry
|
return entry
|
||||||
|
|
||||||
# recursive entry helper, only used by some scripts
|
# recursive entry helper, only used by some scripts
|
||||||
|
|||||||
@@ -143,10 +143,10 @@ class PerfBdResult(co.namedtuple('PerfBdResult', [
|
|||||||
__slots__ = ()
|
__slots__ = ()
|
||||||
def __new__(cls, file='', function='', line=0,
|
def __new__(cls, file='', function='', line=0,
|
||||||
readed=0, proged=0, erased=0,
|
readed=0, proged=0, erased=0,
|
||||||
children=[]):
|
children=None):
|
||||||
return super().__new__(cls, file, function, int(RInt(line)),
|
return super().__new__(cls, file, function, int(RInt(line)),
|
||||||
RInt(readed), RInt(proged), RInt(erased),
|
RInt(readed), RInt(proged), RInt(erased),
|
||||||
children)
|
children if children is not None else [])
|
||||||
|
|
||||||
def __add__(self, other):
|
def __add__(self, other):
|
||||||
return PerfBdResult(self.file, self.function, self.line,
|
return PerfBdResult(self.file, self.function, self.line,
|
||||||
@@ -166,18 +166,75 @@ def openio(path, mode='r', buffering=-1):
|
|||||||
else:
|
else:
|
||||||
return open(path, mode, buffering)
|
return open(path, mode, buffering)
|
||||||
|
|
||||||
def collect_syms(obj_path, *,
|
class SymInfo:
|
||||||
objdump_path=None,
|
def __init__(self, syms):
|
||||||
|
self.syms = syms
|
||||||
|
|
||||||
|
def get(self, k, d=None):
|
||||||
|
# allow lookup by both symbol and address
|
||||||
|
if isinstance(k, str):
|
||||||
|
# organize by symbol, note multiple symbols can share a name
|
||||||
|
if not hasattr(self, '_by_sym'):
|
||||||
|
by_sym = {}
|
||||||
|
for sym, addr, size in self.syms:
|
||||||
|
if sym not in by_sym:
|
||||||
|
by_sym[sym] = []
|
||||||
|
if (addr, size) not in by_sym[sym]:
|
||||||
|
by_sym[sym].append((addr, size))
|
||||||
|
self._by_sym = by_sym
|
||||||
|
return self._by_sym.get(k, d)
|
||||||
|
|
||||||
|
else:
|
||||||
|
import bisect
|
||||||
|
|
||||||
|
# organize by address
|
||||||
|
if not hasattr(self, '_by_addr'):
|
||||||
|
# sort and keep largest/first when duplicates
|
||||||
|
syms = self.syms.copy()
|
||||||
|
syms.sort(key=lambda x: (x[1], -x[2], x[0]))
|
||||||
|
|
||||||
|
by_addr = []
|
||||||
|
for name, addr, size in syms:
|
||||||
|
if (len(by_addr) == 0
|
||||||
|
or by_addr[-1][0] != addr):
|
||||||
|
by_addr.append((name, addr, size))
|
||||||
|
self._by_addr = by_addr
|
||||||
|
|
||||||
|
# find sym by range
|
||||||
|
i = bisect.bisect(self._by_addr, k,
|
||||||
|
key=lambda x: x[1])
|
||||||
|
# check that we're actually in this sym's size
|
||||||
|
if i > 0 and k < self._by_addr[i-1][1]+self._by_addr[i-1][2]:
|
||||||
|
return self._by_addr[i-1][0]
|
||||||
|
else:
|
||||||
|
return d
|
||||||
|
|
||||||
|
def __getitem__(self, k):
|
||||||
|
v = self.get(k)
|
||||||
|
if v is None:
|
||||||
|
raise KeyError(k)
|
||||||
|
return v
|
||||||
|
|
||||||
|
def __contains__(self, k):
|
||||||
|
return self.get(k) is not None
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.syms)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter(self.syms)
|
||||||
|
|
||||||
|
def collect_syms(obj_path, global_only=False, *,
|
||||||
|
objdump_path=OBJDUMP_PATH,
|
||||||
**args):
|
**args):
|
||||||
symbol_pattern = re.compile(
|
symbol_pattern = re.compile(
|
||||||
'^(?P<addr>[0-9a-fA-F]+)'
|
'^(?P<addr>[0-9a-fA-F]+)'
|
||||||
'.*'
|
' (?P<scope>.).*'
|
||||||
'\s+(?P<size>[0-9a-fA-F]+)'
|
'\s+(?P<size>[0-9a-fA-F]+)'
|
||||||
'\s+(?P<name>[^\s]+)\s*$')
|
'\s+(?P<name>[^\s]+)\s*$')
|
||||||
|
|
||||||
# figure out symbol addresses
|
# find symbol addresses and sizes
|
||||||
syms = {}
|
syms = []
|
||||||
sym_at = []
|
|
||||||
cmd = objdump_path + ['-t', obj_path]
|
cmd = objdump_path + ['-t', obj_path]
|
||||||
if args.get('verbose'):
|
if args.get('verbose'):
|
||||||
print(' '.join(shlex.quote(c) for c in cmd))
|
print(' '.join(shlex.quote(c) for c in cmd))
|
||||||
@@ -190,32 +247,101 @@ def collect_syms(obj_path, *,
|
|||||||
m = symbol_pattern.match(line)
|
m = symbol_pattern.match(line)
|
||||||
if m:
|
if m:
|
||||||
name = m.group('name')
|
name = m.group('name')
|
||||||
|
scope = m.group('scope')
|
||||||
addr = int(m.group('addr'), 16)
|
addr = int(m.group('addr'), 16)
|
||||||
size = int(m.group('size'), 16)
|
size = int(m.group('size'), 16)
|
||||||
|
# skip non-globals?
|
||||||
|
# l => local
|
||||||
|
# g => global
|
||||||
|
# u => unique global
|
||||||
|
# => neither
|
||||||
|
# ! => local + global
|
||||||
|
if global_only and scope in 'l ':
|
||||||
|
continue
|
||||||
# ignore zero-sized symbols
|
# ignore zero-sized symbols
|
||||||
if not size:
|
if not size:
|
||||||
continue
|
continue
|
||||||
# note multiple symbols can share a name
|
# note multiple symbols can share a name
|
||||||
if name not in syms:
|
syms.append((name, addr, size))
|
||||||
syms[name] = set()
|
|
||||||
syms[name].add((addr, size))
|
|
||||||
sym_at.append((addr, name, size))
|
|
||||||
proc.wait()
|
proc.wait()
|
||||||
if proc.returncode != 0:
|
if proc.returncode != 0:
|
||||||
raise sp.CalledProcessError(proc.returncode, proc.args)
|
raise sp.CalledProcessError(proc.returncode, proc.args)
|
||||||
|
|
||||||
# sort and keep largest/first when duplicates
|
return SymInfo(syms)
|
||||||
sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
|
|
||||||
sym_at_ = []
|
|
||||||
for addr, name, size in sym_at:
|
|
||||||
if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
|
|
||||||
sym_at_.append((addr, name, size))
|
|
||||||
sym_at = sym_at_
|
|
||||||
|
|
||||||
return syms, sym_at
|
class LineInfo:
|
||||||
|
def __init__(self, lines):
|
||||||
|
self.lines = lines
|
||||||
|
|
||||||
|
def get(self, k, d=None):
|
||||||
|
# allow lookup by both address and file+line tuple
|
||||||
|
if not isinstance(k, tuple):
|
||||||
|
import bisect
|
||||||
|
|
||||||
|
# organize by address
|
||||||
|
if not hasattr(self, '_by_addr'):
|
||||||
|
# sort and keep first when duplicates
|
||||||
|
lines = self.lines.copy()
|
||||||
|
lines.sort(key=lambda x: (x[2], x[0], x[1]))
|
||||||
|
|
||||||
|
by_addr = []
|
||||||
|
for file, line, addr in lines:
|
||||||
|
if (len(by_addr) == 0
|
||||||
|
or by_addr[-1][2] != addr):
|
||||||
|
by_addr.append((file, line, addr))
|
||||||
|
self._by_addr = by_addr
|
||||||
|
|
||||||
|
# find file+line by addr
|
||||||
|
i = bisect.bisect(self._by_addr, k,
|
||||||
|
key=lambda x: x[2])
|
||||||
|
if i > 0:
|
||||||
|
return self._by_addr[i-1][0], self._by_addr[i-1][1]
|
||||||
|
else:
|
||||||
|
return d
|
||||||
|
|
||||||
|
else:
|
||||||
|
import bisect
|
||||||
|
|
||||||
|
# organize by file+line
|
||||||
|
if not hasattr(self, '_by_line'):
|
||||||
|
# sort and keep first when duplicates
|
||||||
|
lines = self.lines.copy()
|
||||||
|
lines.sort()
|
||||||
|
|
||||||
|
by_line = []
|
||||||
|
for file, line, addr in lines:
|
||||||
|
if (len(by_line) == 0
|
||||||
|
or by_line[-1][0] != file
|
||||||
|
or by_line[-1][1] != line):
|
||||||
|
by_line.append((file, line, addr))
|
||||||
|
self._by_line = by_line
|
||||||
|
|
||||||
|
# find addr by file+line tuple
|
||||||
|
i = bisect.bisect(self._by_line, k,
|
||||||
|
key=lambda x: (x[0], x[1]))
|
||||||
|
# make sure file at least matches!
|
||||||
|
if i > 0 and self._by_line[i-1][0] == k[0]:
|
||||||
|
return self._by_line[i-1][2]
|
||||||
|
else:
|
||||||
|
return d
|
||||||
|
|
||||||
|
def __getitem__(self, k):
|
||||||
|
v = self.get(k)
|
||||||
|
if v is None:
|
||||||
|
raise KeyError(k)
|
||||||
|
return v
|
||||||
|
|
||||||
|
def __contains__(self, k):
|
||||||
|
return self.get(k) is not None
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.lines)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter(self.lines)
|
||||||
|
|
||||||
def collect_dwarf_lines(obj_path, *,
|
def collect_dwarf_lines(obj_path, *,
|
||||||
objdump_path=None,
|
objdump_path=OBJDUMP_PATH,
|
||||||
**args):
|
**args):
|
||||||
line_pattern = re.compile(
|
line_pattern = re.compile(
|
||||||
'^\s*(?:'
|
'^\s*(?:'
|
||||||
@@ -239,9 +365,8 @@ def collect_dwarf_lines(obj_path, *,
|
|||||||
# decodedline seems to have issues with multiple dir/file
|
# decodedline seems to have issues with multiple dir/file
|
||||||
# tables, which is why we need this
|
# tables, which is why we need this
|
||||||
lines = []
|
lines = []
|
||||||
line_at = []
|
dirs = co.OrderedDict()
|
||||||
dirs = {}
|
files = co.OrderedDict()
|
||||||
files = {}
|
|
||||||
op_file = 1
|
op_file = 1
|
||||||
op_line = 1
|
op_line = 1
|
||||||
op_addr = 0
|
op_addr = 0
|
||||||
@@ -282,7 +407,6 @@ def collect_dwarf_lines(obj_path, *,
|
|||||||
or m.group('op_end')):
|
or m.group('op_end')):
|
||||||
file = os.path.abspath(files.get(op_file, '?'))
|
file = os.path.abspath(files.get(op_file, '?'))
|
||||||
lines.append((file, op_line, op_addr))
|
lines.append((file, op_line, op_addr))
|
||||||
line_at.append((op_addr, file, op_line))
|
|
||||||
|
|
||||||
if m.group('op_end'):
|
if m.group('op_end'):
|
||||||
op_file = 1
|
op_file = 1
|
||||||
@@ -292,26 +416,10 @@ def collect_dwarf_lines(obj_path, *,
|
|||||||
if proc.returncode != 0:
|
if proc.returncode != 0:
|
||||||
raise sp.CalledProcessError(proc.returncode, proc.args)
|
raise sp.CalledProcessError(proc.returncode, proc.args)
|
||||||
|
|
||||||
# sort and keep first when duplicates
|
return LineInfo(lines)
|
||||||
lines.sort()
|
|
||||||
lines_ = []
|
|
||||||
for file, line, addr in lines:
|
|
||||||
if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line:
|
|
||||||
lines_.append((file, line, addr))
|
|
||||||
lines = lines_
|
|
||||||
|
|
||||||
# sort and keep first when duplicates
|
|
||||||
line_at.sort()
|
|
||||||
line_at_ = []
|
|
||||||
for addr, file, line in line_at:
|
|
||||||
if len(line_at_) == 0 or line_at_[-1][0] != addr:
|
|
||||||
line_at_.append((addr, file, line))
|
|
||||||
line_at = line_at_
|
|
||||||
|
|
||||||
return lines, line_at
|
|
||||||
|
|
||||||
|
|
||||||
def collect_job(path, start, stop, syms, sym_at, lines, line_at, *,
|
def collect_job(path, start, stop, syms, lines, *,
|
||||||
sources=None,
|
sources=None,
|
||||||
everything=False,
|
everything=False,
|
||||||
propagate=0,
|
propagate=0,
|
||||||
@@ -465,10 +573,9 @@ def collect_job(path, start, stop, syms, sym_at, lines, line_at, *,
|
|||||||
# the first stack frame, so we can use that as a point
|
# the first stack frame, so we can use that as a point
|
||||||
# of reference
|
# of reference
|
||||||
if last_delta is None:
|
if last_delta is None:
|
||||||
i = bisect.bisect(lines, (last_file, last_line),
|
addr__ = lines.get((last_file, last_line))
|
||||||
key=lambda x: (x[0], x[1]))
|
if addr__ is not None:
|
||||||
if i > 0:
|
last_delta = addr__ - addr_
|
||||||
last_delta = lines[i-1][2] - addr_
|
|
||||||
else:
|
else:
|
||||||
# can't reverse ASLR, give up on backtrace
|
# can't reverse ASLR, give up on backtrace
|
||||||
commit()
|
commit()
|
||||||
@@ -486,11 +593,8 @@ def collect_job(path, start, stop, syms, sym_at, lines, line_at, *,
|
|||||||
file, sym, line = cached
|
file, sym, line = cached
|
||||||
else:
|
else:
|
||||||
# find sym
|
# find sym
|
||||||
i = bisect.bisect(sym_at, addr, key=lambda x: x[0])
|
sym = syms.get(addr)
|
||||||
# check that we're actually in the sym's size
|
if sym is None:
|
||||||
if i > 0 and addr < sym_at[i-1][0] + sym_at[i-1][2]:
|
|
||||||
_, sym, _ = sym_at[i-1]
|
|
||||||
else:
|
|
||||||
sym = hex(addr)
|
sym = hex(addr)
|
||||||
|
|
||||||
# filter out internal/unknown functions
|
# filter out internal/unknown functions
|
||||||
@@ -503,9 +607,9 @@ def collect_job(path, start, stop, syms, sym_at, lines, line_at, *,
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# find file+line
|
# find file+line
|
||||||
i = bisect.bisect(line_at, addr, key=lambda x: x[0])
|
line_ = lines.get(addr)
|
||||||
if i > 0:
|
if line_ is not None:
|
||||||
_, file, line = line_at[i-1]
|
file, line = line_
|
||||||
elif len(last_stack) == 0:
|
elif len(last_stack) == 0:
|
||||||
file, line = last_file, last_line
|
file, line = last_file, last_line
|
||||||
else:
|
else:
|
||||||
@@ -568,8 +672,8 @@ def collect(obj_path, trace_paths, *,
|
|||||||
jobs = len(os.sched_getaffinity(0))
|
jobs = len(os.sched_getaffinity(0))
|
||||||
|
|
||||||
# find sym/line info to reverse ASLR
|
# find sym/line info to reverse ASLR
|
||||||
syms, sym_at = collect_syms(obj_path, **args)
|
syms = collect_syms(obj_path, **args)
|
||||||
lines, line_at = collect_dwarf_lines(obj_path, **args)
|
lines = collect_dwarf_lines(obj_path, **args)
|
||||||
|
|
||||||
if jobs is not None:
|
if jobs is not None:
|
||||||
# try to split up files so that even single files can be processed
|
# try to split up files so that even single files can be processed
|
||||||
@@ -596,8 +700,7 @@ def collect(obj_path, trace_paths, *,
|
|||||||
for results_ in p.imap_unordered(
|
for results_ in p.imap_unordered(
|
||||||
starapply,
|
starapply,
|
||||||
((collect_job,
|
((collect_job,
|
||||||
(path, start, stop,
|
(path, start, stop, syms, lines),
|
||||||
syms, sym_at, lines, line_at),
|
|
||||||
args)
|
args)
|
||||||
for path, ranges in zip(trace_paths, trace_ranges)
|
for path, ranges in zip(trace_paths, trace_ranges)
|
||||||
for start, stop in ranges)):
|
for start, stop in ranges)):
|
||||||
@@ -607,8 +710,7 @@ def collect(obj_path, trace_paths, *,
|
|||||||
results = []
|
results = []
|
||||||
for path in trace_paths:
|
for path in trace_paths:
|
||||||
results.extend(collect_job(
|
results.extend(collect_job(
|
||||||
path, None, None,
|
path, None, None, syms, lines,
|
||||||
syms, sym_at, lines, line_at,
|
|
||||||
**args))
|
**args))
|
||||||
|
|
||||||
return results
|
return results
|
||||||
@@ -841,6 +943,9 @@ def table(Result, results, diff_results=None, *,
|
|||||||
types[k].ratio(
|
types[k].ratio(
|
||||||
getattr(r, k, None),
|
getattr(r, k, None),
|
||||||
getattr(diff_r, k, None)))))
|
getattr(diff_r, k, None)))))
|
||||||
|
# append any notes
|
||||||
|
if hasattr(r, 'notes'):
|
||||||
|
entry[-1][1].extend(r.notes)
|
||||||
return entry
|
return entry
|
||||||
|
|
||||||
# recursive entry helper, only used by some scripts
|
# recursive entry helper, only used by some scripts
|
||||||
|
|||||||
@@ -129,12 +129,11 @@ class StackResult(co.namedtuple('StackResult', [
|
|||||||
_types = {'frame': RInt, 'limit': RInt}
|
_types = {'frame': RInt, 'limit': RInt}
|
||||||
|
|
||||||
__slots__ = ()
|
__slots__ = ()
|
||||||
def __new__(cls, file='', function='',
|
def __new__(cls, file='', function='', frame=0, limit=0,
|
||||||
frame=0, limit=0,
|
children=None):
|
||||||
children=[]):
|
|
||||||
return super().__new__(cls, file, function,
|
return super().__new__(cls, file, function,
|
||||||
RInt(frame), RInt(limit),
|
RInt(frame), RInt(limit),
|
||||||
children)
|
children if children is not None else [])
|
||||||
|
|
||||||
def __add__(self, other):
|
def __add__(self, other):
|
||||||
return StackResult(self.file, self.function,
|
return StackResult(self.file, self.function,
|
||||||
@@ -527,6 +526,9 @@ def table(Result, results, diff_results=None, *,
|
|||||||
types[k].ratio(
|
types[k].ratio(
|
||||||
getattr(r, k, None),
|
getattr(r, k, None),
|
||||||
getattr(diff_r, k, None)))))
|
getattr(diff_r, k, None)))))
|
||||||
|
# append any notes
|
||||||
|
if hasattr(r, 'notes'):
|
||||||
|
entry[-1][1].extend(r.notes)
|
||||||
return entry
|
return entry
|
||||||
|
|
||||||
# recursive entry helper, only used by some scripts
|
# recursive entry helper, only used by some scripts
|
||||||
|
|||||||
@@ -136,7 +136,8 @@ class StructResult(co.namedtuple('StructResult', [
|
|||||||
_types = {'size': RInt, 'align': RInt}
|
_types = {'size': RInt, 'align': RInt}
|
||||||
|
|
||||||
__slots__ = ()
|
__slots__ = ()
|
||||||
def __new__(cls, file='', struct='', size=0, align=0, children=None):
|
def __new__(cls, file='', struct='', size=0, align=0,
|
||||||
|
children=None):
|
||||||
return super().__new__(cls, file, struct,
|
return super().__new__(cls, file, struct,
|
||||||
RInt(size), RInt(align),
|
RInt(size), RInt(align),
|
||||||
children if children is not None else [])
|
children if children is not None else [])
|
||||||
@@ -161,28 +162,6 @@ def openio(path, mode='r', buffering=-1):
|
|||||||
def collect_dwarf_files(obj_path, *,
|
def collect_dwarf_files(obj_path, *,
|
||||||
objdump_path=OBJDUMP_PATH,
|
objdump_path=OBJDUMP_PATH,
|
||||||
**args):
|
**args):
|
||||||
class FileInfo:
|
|
||||||
def __init__(self, files):
|
|
||||||
self.files = files
|
|
||||||
|
|
||||||
def get(self, k, d=None):
|
|
||||||
return self.files.get(k, d)
|
|
||||||
|
|
||||||
def __getitem__(self, k):
|
|
||||||
v = self.get(k)
|
|
||||||
if v is None:
|
|
||||||
raise KeyError(k)
|
|
||||||
return v
|
|
||||||
|
|
||||||
def __contains__(self, k):
|
|
||||||
return self.get(k) is not None
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self.files)
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
return (v for k, v in self.files.items())
|
|
||||||
|
|
||||||
line_pattern = re.compile(
|
line_pattern = re.compile(
|
||||||
'^\s*(?P<no>[0-9]+)'
|
'^\s*(?P<no>[0-9]+)'
|
||||||
'(?:\s+(?P<dir>[0-9]+))?'
|
'(?:\s+(?P<dir>[0-9]+))?'
|
||||||
@@ -223,7 +202,7 @@ def collect_dwarf_files(obj_path, *,
|
|||||||
raise sp.CalledProcessError(proc.returncode, proc.args)
|
raise sp.CalledProcessError(proc.returncode, proc.args)
|
||||||
|
|
||||||
# simplify paths
|
# simplify paths
|
||||||
files_ = {}
|
files_ = co.OrderedDict()
|
||||||
for no, file in files.items():
|
for no, file in files.items():
|
||||||
if os.path.commonpath([
|
if os.path.commonpath([
|
||||||
os.getcwd(),
|
os.getcwd(),
|
||||||
@@ -233,104 +212,104 @@ def collect_dwarf_files(obj_path, *,
|
|||||||
files_[no] = os.path.abspath(file)
|
files_[no] = os.path.abspath(file)
|
||||||
files = files_
|
files = files_
|
||||||
|
|
||||||
return FileInfo(files)
|
return files
|
||||||
|
|
||||||
|
# each dwarf entry can have attrs and children entries
|
||||||
|
class DwarfEntry:
|
||||||
|
def __init__(self, level, off, tag, ats={}, children=[]):
|
||||||
|
self.level = level
|
||||||
|
self.off = off
|
||||||
|
self.tag = tag
|
||||||
|
self.ats = ats or {}
|
||||||
|
self.children = children or []
|
||||||
|
|
||||||
|
def get(self, k, d=None):
|
||||||
|
return self.ats.get(k, d)
|
||||||
|
|
||||||
|
def __getitem__(self, k):
|
||||||
|
return self.ats[k]
|
||||||
|
|
||||||
|
def __contains__(self, k):
|
||||||
|
return k in self.ats
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '%s(%d, 0x%x, %r, %r)' % (
|
||||||
|
self.__class__.__name__,
|
||||||
|
self.level,
|
||||||
|
self.off,
|
||||||
|
self.tag,
|
||||||
|
self.ats)
|
||||||
|
|
||||||
|
@ft.cached_property
|
||||||
|
def name(self):
|
||||||
|
if 'DW_AT_name' in self:
|
||||||
|
name = self['DW_AT_name'].split(':')[-1].strip()
|
||||||
|
# prefix with struct/union
|
||||||
|
if self.tag == 'DW_TAG_structure_type':
|
||||||
|
name = 'struct ' + name
|
||||||
|
elif self.tag == 'DW_TAG_union_type':
|
||||||
|
name = 'union ' + name
|
||||||
|
elif self.tag == 'DW_TAG_enumeration_type':
|
||||||
|
name = 'enum ' + name
|
||||||
|
return name
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# a collection of dwarf entries
|
||||||
|
class DwarfInfo:
|
||||||
|
def __init__(self, entries):
|
||||||
|
self.entries = entries
|
||||||
|
|
||||||
|
def get(self, k, d=None):
|
||||||
|
# allow lookup by both offset and dwarf name
|
||||||
|
if not isinstance(k, str):
|
||||||
|
return self.entries.get(k, d)
|
||||||
|
|
||||||
|
else:
|
||||||
|
import difflib
|
||||||
|
|
||||||
|
# organize entries by name
|
||||||
|
if not hasattr(self, '_by_name'):
|
||||||
|
self._by_name = {}
|
||||||
|
for entry in self.entries.values():
|
||||||
|
if entry.name is not None:
|
||||||
|
self._by_name[entry.name] = entry
|
||||||
|
|
||||||
|
# exact match? avoid difflib if we can for speed
|
||||||
|
if k in self._by_name:
|
||||||
|
return self._by_name[k]
|
||||||
|
# find the best matching dwarf entry with difflib
|
||||||
|
#
|
||||||
|
# this can be different from the actual symbol because
|
||||||
|
# of optimization passes
|
||||||
|
else:
|
||||||
|
name, entry = max(
|
||||||
|
self._by_name.items(),
|
||||||
|
key=lambda entry: difflib.SequenceMatcher(
|
||||||
|
None, entry[0], k, False).ratio(),
|
||||||
|
default=(None, None))
|
||||||
|
return entry
|
||||||
|
|
||||||
|
def __getitem__(self, k):
|
||||||
|
v = self.get(k)
|
||||||
|
if v is None:
|
||||||
|
raise KeyError(k)
|
||||||
|
return v
|
||||||
|
|
||||||
|
def __contains__(self, k):
|
||||||
|
return self.get(k) is not None
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.entries)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return (v for k, v in self.entries.items())
|
||||||
|
|
||||||
def collect_dwarf_info(obj_path, filter=None, *,
|
def collect_dwarf_info(obj_path, filter=None, *,
|
||||||
objdump_path=OBJDUMP_PATH,
|
objdump_path=OBJDUMP_PATH,
|
||||||
**args):
|
**args):
|
||||||
filter_, filter = filter, __builtins__.filter
|
filter_, filter = filter, __builtins__.filter
|
||||||
|
|
||||||
# each dwarf entry can have attrs and children entries
|
|
||||||
class DwarfEntry:
|
|
||||||
def __init__(self, level, off, tag, ats={}, children=[]):
|
|
||||||
self.level = level
|
|
||||||
self.off = off
|
|
||||||
self.tag = tag
|
|
||||||
self.ats = ats or {}
|
|
||||||
self.children = children or []
|
|
||||||
|
|
||||||
def get(self, k, d=None):
|
|
||||||
return self.ats.get(k, d)
|
|
||||||
|
|
||||||
def __getitem__(self, k):
|
|
||||||
return self.ats[k]
|
|
||||||
|
|
||||||
def __contains__(self, k):
|
|
||||||
return k in self.ats
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return '%s(%d, 0x%x, %r, %r)' % (
|
|
||||||
self.__class__.__name__,
|
|
||||||
self.level,
|
|
||||||
self.off,
|
|
||||||
self.tag,
|
|
||||||
self.ats)
|
|
||||||
|
|
||||||
@ft.cached_property
|
|
||||||
def name(self):
|
|
||||||
if 'DW_AT_name' in self:
|
|
||||||
name = self['DW_AT_name'].split(':')[-1].strip()
|
|
||||||
# prefix with struct/union
|
|
||||||
if self.tag == 'DW_TAG_structure_type':
|
|
||||||
name = 'struct ' + name
|
|
||||||
elif self.tag == 'DW_TAG_union_type':
|
|
||||||
name = 'union ' + name
|
|
||||||
elif self.tag == 'DW_TAG_enumeration_type':
|
|
||||||
name = 'enum ' + name
|
|
||||||
return name
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# a collection of dwarf entries
|
|
||||||
class DwarfInfo:
|
|
||||||
def __init__(self, entries):
|
|
||||||
self.entries = entries
|
|
||||||
|
|
||||||
def get(self, k, d=None):
|
|
||||||
# allow lookup by both offset and dwarf name
|
|
||||||
if not isinstance(k, str):
|
|
||||||
return self.entries.get(k, d)
|
|
||||||
|
|
||||||
else:
|
|
||||||
import difflib
|
|
||||||
|
|
||||||
# organize entries by name
|
|
||||||
if not hasattr(self, '_by_name'):
|
|
||||||
self._by_name = {}
|
|
||||||
for entry in self.entries.values():
|
|
||||||
if entry.name is not None:
|
|
||||||
self._by_name[entry.name] = entry
|
|
||||||
|
|
||||||
# exact match? avoid difflib if we can for speed
|
|
||||||
if k in self._by_name:
|
|
||||||
return self._by_name[k]
|
|
||||||
# find the best matching dwarf entry with difflib
|
|
||||||
#
|
|
||||||
# this can be different from the actual symbol because
|
|
||||||
# of optimization passes
|
|
||||||
else:
|
|
||||||
name, entry = max(
|
|
||||||
self._by_name.items(),
|
|
||||||
key=lambda entry: difflib.SequenceMatcher(
|
|
||||||
None, entry[0], k, False).ratio(),
|
|
||||||
default=(None, None))
|
|
||||||
return entry
|
|
||||||
|
|
||||||
def __getitem__(self, k):
|
|
||||||
v = self.get(k)
|
|
||||||
if v is None:
|
|
||||||
raise KeyError(k)
|
|
||||||
return v
|
|
||||||
|
|
||||||
def __contains__(self, k):
|
|
||||||
return self.get(k) is not None
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self.entries)
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
return (v for k, v in self.entries.items())
|
|
||||||
|
|
||||||
info_pattern = re.compile(
|
info_pattern = re.compile(
|
||||||
'^\s*(?:<(?P<level>[^>]*)>'
|
'^\s*(?:<(?P<level>[^>]*)>'
|
||||||
'\s*<(?P<off>[^>]*)>'
|
'\s*<(?P<off>[^>]*)>'
|
||||||
@@ -797,7 +776,8 @@ def table(Result, results, diff_results=None, *,
|
|||||||
for r in results_}
|
for r in results_}
|
||||||
names_ = list(table_.keys())
|
names_ = list(table_.keys())
|
||||||
|
|
||||||
# only sort the children layer if explicitly requested
|
# sort the children layer
|
||||||
|
names_.sort()
|
||||||
if sort:
|
if sort:
|
||||||
for k, reverse in reversed(sort):
|
for k, reverse in reversed(sort):
|
||||||
names_.sort(
|
names_.sort(
|
||||||
|
|||||||
Reference in New Issue
Block a user