From 512cf5ad4b178f3f62de26defb4634c2258eecc9 Mon Sep 17 00:00:00 2001 From: Christopher Haster Date: Sun, 1 Dec 2024 16:59:51 -0600 Subject: [PATCH] scripts: Adopted ctx.py-related changes in other result scripts - Adopted higher-level collect data structures: - high-level DwarfEntry/DwarfInfo class - high-level SymInfo class - high-level LineInfo class Note these had to be moved out of function scope due to pickling issues in perf.py/perfbd.py. These were only function-local to minimize scope leak so this fortunately was an easy change. - Adopted better list-default patterns in Result types: def __new__(..., children=None): return Result(..., children if children is not None else []) A classic python footgun. - Adopted notes rendering, though this is only used by ctx.py at the moment. - Reverted to sorting children entries, for now. Unfortunately there's no easy way to sort the result entries in perf.py/perfbd.py before folding. Folding is going to make a mess of more complicated children anyways, so another solution is needed... And some other shared miscellany. --- scripts/code.py | 171 ++++++++++++++++--------- scripts/cov.py | 3 + scripts/csv.py | 3 + scripts/ctx.py | 311 +++++++++++++++++++++------------------------ scripts/data.py | 171 ++++++++++++++++--------- scripts/perf.py | 205 +++++++++++++++++++++++------- scripts/perfbd.py | 227 ++++++++++++++++++++++++--------- scripts/stack.py | 10 +- scripts/structs.py | 214 ++++++++++++++----------------- 9 files changed, 805 insertions(+), 510 deletions(-) diff --git a/scripts/code.py b/scripts/code.py index 119487ab..e65d76e9 100755 --- a/scripts/code.py +++ b/scripts/code.py @@ -17,8 +17,8 @@ __import__('sys').path.pop(0) import collections as co import csv -import difflib import itertools as it +import functools as ft import math as mt import os import re @@ -168,8 +168,8 @@ def collect_dwarf_files(obj_path, *, '.*\s+(?P[^\s]+)\s*$') # find source paths - dirs = {} - files = {} + dirs = co.OrderedDict() + files = co.OrderedDict() # note objdump-path may contain extra args cmd = objdump_path + ['--dwarf=rawline', obj_path] if args.get('verbose'): @@ -202,7 +202,7 @@ def collect_dwarf_files(obj_path, *, raise sp.CalledProcessError(proc.returncode, proc.args) # simplify paths - files_ = {} + files_ = co.OrderedDict() for no, file in files.items(): if os.path.commonpath([ os.getcwd(), @@ -214,34 +214,102 @@ def collect_dwarf_files(obj_path, *, return files +# each dwarf entry can have attrs and children entries +class DwarfEntry: + def __init__(self, level, off, tag, ats={}, children=[]): + self.level = level + self.off = off + self.tag = tag + self.ats = ats or {} + self.children = children or [] + + def get(self, k, d=None): + return self.ats.get(k, d) + + def __getitem__(self, k): + return self.ats[k] + + def __contains__(self, k): + return k in self.ats + + def __repr__(self): + return '%s(%d, 0x%x, %r, %r)' % ( + self.__class__.__name__, + self.level, + self.off, + self.tag, + self.ats) + + @ft.cached_property + def name(self): + if 'DW_AT_name' in self: + name = self['DW_AT_name'].split(':')[-1].strip() + # prefix with struct/union/enum + if self.tag == 'DW_TAG_structure_type': + name = 'struct ' + name + elif self.tag == 'DW_TAG_union_type': + name = 'union ' + name + elif self.tag == 'DW_TAG_enumeration_type': + name = 'enum ' + name + return name + else: + return None + +# a collection of dwarf entries +class DwarfInfo: + def __init__(self, entries): + self.entries = entries + + def get(self, k, d=None): + # allow lookup by both offset and dwarf name + if not isinstance(k, str): + return self.entries.get(k, d) + + else: + import difflib + + # organize entries by name + if not hasattr(self, '_by_name'): + self._by_name = {} + for entry in self.entries.values(): + if entry.name is not None: + self._by_name[entry.name] = entry + + # exact match? avoid difflib if we can for speed + if k in self._by_name: + return self._by_name[k] + # find the best matching dwarf entry with difflib + # + # this can be different from the actual symbol because + # of optimization passes + else: + name, entry = max( + self._by_name.items(), + key=lambda entry: difflib.SequenceMatcher( + None, entry[0], k, False).ratio(), + default=(None, None)) + return entry + + def __getitem__(self, k): + v = self.get(k) + if v is None: + raise KeyError(k) + return v + + def __contains__(self, k): + return self.get(k) is not None + + def __len__(self): + return len(self.entries) + + def __iter__(self): + return (v for k, v in self.entries.items()) + def collect_dwarf_info(obj_path, filter=None, *, objdump_path=OBJDUMP_PATH, **args): filter_, filter = filter, __builtins__.filter - # each dwarf entry can have attrs and children entries - class DwarfEntry: - def __init__(self, level, off, tag, ats={}, children=[]): - self.level = level - self.off = off - self.tag = tag - self.ats = ats or {} - self.children = children or [] - - def __getitem__(self, k): - return self.ats[k] - - def __contains__(self, k): - return k in self.ats - - def __repr__(self): - return '%s(%d, 0x%x, %r, %r)' % ( - self.__class__.__name__, - self.level, - self.off, - self.tag, - self.ats) - info_pattern = re.compile( '^\s*(?:<(?P[^>]*)>' '\s*<(?P[^>]*)>' @@ -251,7 +319,7 @@ def collect_dwarf_info(obj_path, filter=None, *, '\s*:(?P.*))\s*$') # collect dwarf entries - entries = co.OrderedDict() + info = co.OrderedDict() entry = None levels = {} # note objdump-path may contain extra args @@ -277,7 +345,7 @@ def collect_dwarf_info(obj_path, filter=None, *, if (entry.level == 1 and ( # unless this entry is filtered filter_ is None or entry.tag in filter_)): - entries[entry.off] = entry + info[entry.off] = entry # store entry in parent levels[entry.level] = entry if entry.level-1 in levels: @@ -290,7 +358,7 @@ def collect_dwarf_info(obj_path, filter=None, *, if proc.returncode != 0: raise sp.CalledProcessError(proc.returncode, proc.args) - return entries + return DwarfInfo(info) def collect_sizes(obj_path, *, nm_path=NM_PATH, @@ -331,33 +399,21 @@ def collect(obj_paths, *, **args): results = [] for obj_path in obj_paths: - # guess the source, if we have debug-info we'll replace this later - file = re.sub('(\.o)?$', '.c', obj_path, 1) - # find sizes sizes = collect_sizes(obj_path, everything=everything, **args) # try to figure out the source file if we have debug-info - defs = {} try: files = collect_dwarf_files(obj_path, **args) info = collect_dwarf_info(obj_path, filter={'DW_TAG_subprogram', 'DW_TAG_variable'}, **args) - for no, entry in info.items(): - # skip funcs with no name or no file - if ('DW_AT_name' not in entry - or 'DW_AT_decl_file' not in entry): - continue - name_ = entry['DW_AT_name'].split(':')[-1].strip() - file_ = files.get(int(entry['DW_AT_decl_file']), '?') - defs[name_] = file_ - except sp.CalledProcessError: # do nothing on error, we don't need objdump to work, source # files may just be inaccurate - pass + files = {} + info = {} # map function sizes to debug symbols for func, size in sizes.items(): @@ -365,34 +421,28 @@ def collect(obj_paths, *, if not everything and func.startswith('__'): continue - # find best matching debug symbol, this may be slightly different + # find best matching dwarf entry, this may be slightly different # due to optimizations - if defs: - # exact match? avoid difflib if we can for speed - if func in defs: - file_ = defs[func] - else: - _, file_ = max( - defs.items(), - key=lambda d: difflib.SequenceMatcher(None, - d[0], - func, False).ratio()) + entry = info.get(func) + + if entry is not None and 'DW_AT_decl_file' in entry: + file = files.get(int(entry['DW_AT_decl_file']), '?') else: - file_ = file + file = re.sub('(\.o)?$', '.c', obj_path, 1) # ignore filtered sources if sources is not None: - if not any(os.path.abspath(file_) == os.path.abspath(s) + if not any(os.path.abspath(file) == os.path.abspath(s) for s in sources): continue else: # default to only cwd if not everything and not os.path.commonpath([ os.getcwd(), - os.path.abspath(file_)]) == os.getcwd(): + os.path.abspath(file)]) == os.getcwd(): continue - results.append(CodeResult(file_, func, size)) + results.append(CodeResult(file, func, size)) return results @@ -624,6 +674,9 @@ def table(Result, results, diff_results=None, *, types[k].ratio( getattr(r, k, None), getattr(diff_r, k, None))))) + # append any notes + if hasattr(r, 'notes'): + entry[-1][1].extend(r.notes) return entry # recursive entry helper, only used by some scripts diff --git a/scripts/cov.py b/scripts/cov.py index 8e245ec5..730c7ff8 100755 --- a/scripts/cov.py +++ b/scripts/cov.py @@ -578,6 +578,9 @@ def table(Result, results, diff_results=None, *, types[k].ratio( getattr(r, k, None), getattr(diff_r, k, None))))) + # append any notes + if hasattr(r, 'notes'): + entry[-1][1].extend(r.notes) return entry # recursive entry helper, only used by some scripts diff --git a/scripts/csv.py b/scripts/csv.py index 866f1e5a..582f0d70 100755 --- a/scripts/csv.py +++ b/scripts/csv.py @@ -1590,6 +1590,9 @@ def table(Result, results, diff_results=None, *, types[k].ratio( getattr(r, k, None), getattr(diff_r, k, None))))) + # append any notes + if hasattr(r, 'notes'): + entry[-1][1].extend(r.notes) return entry # recursive entry helper, only used by some scripts diff --git a/scripts/ctx.py b/scripts/ctx.py index 428aa877..2e82a21d 100755 --- a/scripts/ctx.py +++ b/scripts/ctx.py @@ -160,61 +160,67 @@ def openio(path, mode='r', buffering=-1): else: return open(path, mode, buffering) -def collect_syms(obj_path, global_only=False, *, - objdump_path=None, - **args): - class SymInfo: - def __init__(self, syms): - self.syms = syms +class SymInfo: + def __init__(self, syms): + self.syms = syms - def get(self, k, d=None): - # allow lookup by both symbol and address - if isinstance(k, str): - # organize by symbol, note multiple symbols can share a name - if not hasattr(self, '_by_sym'): - self._by_sym = {} - for sym, addr, size in self.syms: - self._by_sym[sym] = (addr, size) - return self._by_sym.get(k, d) + def get(self, k, d=None): + # allow lookup by both symbol and address + if isinstance(k, str): + # organize by symbol, note multiple symbols can share a name + if not hasattr(self, '_by_sym'): + by_sym = {} + for sym, addr, size in self.syms: + if sym not in by_sym: + by_sym[sym] = [] + if (addr, size) not in by_sym[sym]: + by_sym[sym].append((addr, size)) + self._by_sym = by_sym + return self._by_sym.get(k, d) + else: + import bisect + + # organize by address + if not hasattr(self, '_by_addr'): + # sort and keep largest/first when duplicates + syms = self.syms.copy() + syms.sort(key=lambda x: (x[1], -x[2], x[0])) + + by_addr = [] + for name, addr, size in syms: + if (len(by_addr) == 0 + or by_addr[-1][0] != addr): + by_addr.append((name, addr, size)) + self._by_addr = by_addr + + # find sym by range + i = bisect.bisect(self._by_addr, k, + key=lambda x: x[1]) + # check that we're actually in this sym's size + if i > 0 and k < self._by_addr[i-1][1]+self._by_addr[i-1][2]: + return self._by_addr[i-1][0] else: - import bisect + return d - # organize by address - if not hasattr(self, '_by_addr'): - # sort and keep largest/first when duplicates - syms = self.syms.copy() - syms.sort(key=lambda x: (x[1], -x[2], x[0])) + def __getitem__(self, k): + v = self.get(k) + if v is None: + raise KeyError(k) + return v - self._by_addr = [] - for name, addr, size in syms: - if (len(self._by_addr) == 0 - or self._by_addr[-1][0] != addr): - self._by_addr.append((name, addr, size)) + def __contains__(self, k): + return self.get(k) is not None - # find sym by range - i = bisect.bisect(self._by_addr, k, key=lambda x: x[1]) - # check that we're actually in this sym's size - if i > 0 and k < self._by_addr[i-1][1]+self._by_addr[i-1][2]: - return self._by_addr[i-1] - else: - return None + def __len__(self): + return len(self.syms) - def __getitem__(self, k): - v = self.get(k) - if v is None: - raise KeyError(k) - return v - - def __contains__(self, k): - return self.get(k) is not None - - def __len__(self): - return len(self.syms) - - def __iter__(self): - return iter(self.syms) + def __iter__(self): + return iter(self.syms) +def collect_syms(obj_path, global_only=False, *, + objdump_path=OBJDUMP_PATH, + **args): symbol_pattern = re.compile( '^(?P[0-9a-fA-F]+)' ' (?P.).*' @@ -260,28 +266,6 @@ def collect_syms(obj_path, global_only=False, *, def collect_dwarf_files(obj_path, *, objdump_path=OBJDUMP_PATH, **args): - class FileInfo: - def __init__(self, files): - self.files = files - - def get(self, k, d=None): - return self.files.get(k, d) - - def __getitem__(self, k): - v = self.get(k) - if v is None: - raise KeyError(k) - return v - - def __contains__(self, k): - return self.get(k) is not None - - def __len__(self): - return len(self.files) - - def __iter__(self): - return (v for k, v in self.files.items()) - line_pattern = re.compile( '^\s*(?P[0-9]+)' '(?:\s+(?P[0-9]+))?' @@ -322,7 +306,7 @@ def collect_dwarf_files(obj_path, *, raise sp.CalledProcessError(proc.returncode, proc.args) # simplify paths - files_ = {} + files_ = co.OrderedDict() for no, file in files.items(): if os.path.commonpath([ os.getcwd(), @@ -332,104 +316,104 @@ def collect_dwarf_files(obj_path, *, files_[no] = os.path.abspath(file) files = files_ - return FileInfo(files) + return files + +# each dwarf entry can have attrs and children entries +class DwarfEntry: + def __init__(self, level, off, tag, ats={}, children=[]): + self.level = level + self.off = off + self.tag = tag + self.ats = ats or {} + self.children = children or [] + + def get(self, k, d=None): + return self.ats.get(k, d) + + def __getitem__(self, k): + return self.ats[k] + + def __contains__(self, k): + return k in self.ats + + def __repr__(self): + return '%s(%d, 0x%x, %r, %r)' % ( + self.__class__.__name__, + self.level, + self.off, + self.tag, + self.ats) + + @ft.cached_property + def name(self): + if 'DW_AT_name' in self: + name = self['DW_AT_name'].split(':')[-1].strip() + # prefix with struct/union/enum + if self.tag == 'DW_TAG_structure_type': + name = 'struct ' + name + elif self.tag == 'DW_TAG_union_type': + name = 'union ' + name + elif self.tag == 'DW_TAG_enumeration_type': + name = 'enum ' + name + return name + else: + return None + +# a collection of dwarf entries +class DwarfInfo: + def __init__(self, entries): + self.entries = entries + + def get(self, k, d=None): + # allow lookup by both offset and dwarf name + if not isinstance(k, str): + return self.entries.get(k, d) + + else: + import difflib + + # organize entries by name + if not hasattr(self, '_by_name'): + self._by_name = {} + for entry in self.entries.values(): + if entry.name is not None: + self._by_name[entry.name] = entry + + # exact match? avoid difflib if we can for speed + if k in self._by_name: + return self._by_name[k] + # find the best matching dwarf entry with difflib + # + # this can be different from the actual symbol because + # of optimization passes + else: + name, entry = max( + self._by_name.items(), + key=lambda entry: difflib.SequenceMatcher( + None, entry[0], k, False).ratio(), + default=(None, None)) + return entry + + def __getitem__(self, k): + v = self.get(k) + if v is None: + raise KeyError(k) + return v + + def __contains__(self, k): + return self.get(k) is not None + + def __len__(self): + return len(self.entries) + + def __iter__(self): + return (v for k, v in self.entries.items()) def collect_dwarf_info(obj_path, filter=None, *, objdump_path=OBJDUMP_PATH, **args): filter_, filter = filter, __builtins__.filter - # each dwarf entry can have attrs and children entries - class DwarfEntry: - def __init__(self, level, off, tag, ats={}, children=[]): - self.level = level - self.off = off - self.tag = tag - self.ats = ats or {} - self.children = children or [] - - def get(self, k, d=None): - return self.ats.get(k, d) - - def __getitem__(self, k): - return self.ats[k] - - def __contains__(self, k): - return k in self.ats - - def __repr__(self): - return '%s(%d, 0x%x, %r, %r)' % ( - self.__class__.__name__, - self.level, - self.off, - self.tag, - self.ats) - - @ft.cached_property - def name(self): - if 'DW_AT_name' in self: - name = self['DW_AT_name'].split(':')[-1].strip() - # prefix with struct/union/enum - if self.tag == 'DW_TAG_structure_type': - name = 'struct ' + name - elif self.tag == 'DW_TAG_union_type': - name = 'union ' + name - elif self.tag == 'DW_TAG_enumeration_type': - name = 'enum ' + name - return name - else: - return None - - # a collection of dwarf entries - class DwarfInfo: - def __init__(self, entries): - self.entries = entries - - def get(self, k, d=None): - # allow lookup by both offset and dwarf name - if not isinstance(k, str): - return self.entries.get(k, d) - - else: - import difflib - - # organize entries by name - if not hasattr(self, '_by_name'): - self._by_name = {} - for entry in self.entries.values(): - if entry.name is not None: - self._by_name[entry.name] = entry - - # exact match? avoid difflib if we can for speed - if k in self._by_name: - return self._by_name[k] - # find the best matching dwarf entry with difflib - # - # this can be different from the actual symbol because - # of optimization passes - else: - name, entry = max( - self._by_name.items(), - key=lambda entry: difflib.SequenceMatcher( - None, entry[0], k, False).ratio(), - default=(None, None)) - return entry - - def __getitem__(self, k): - v = self.get(k) - if v is None: - raise KeyError(k) - return v - - def __contains__(self, k): - return self.get(k) is not None - - def __len__(self): - return len(self.entries) - - def __iter__(self): - return (v for k, v in self.entries.items()) - info_pattern = re.compile( '^\s*(?:<(?P[^>]*)>' '\s*<(?P[^>]*)>' @@ -925,7 +909,8 @@ def table(Result, results, diff_results=None, *, for r in results_} names_ = list(table_.keys()) - # only sort the children layer if explicitly requested + # sort the children layer + names_.sort() if sort: for k, reverse in reversed(sort): names_.sort( diff --git a/scripts/data.py b/scripts/data.py index 1aafac67..3b5fd438 100755 --- a/scripts/data.py +++ b/scripts/data.py @@ -17,8 +17,8 @@ __import__('sys').path.pop(0) import collections as co import csv -import difflib import itertools as it +import functools as ft import math as mt import os import re @@ -168,8 +168,8 @@ def collect_dwarf_files(obj_path, *, '.*\s+(?P[^\s]+)\s*$') # find source paths - dirs = {} - files = {} + dirs = co.OrderedDict() + files = co.OrderedDict() # note objdump-path may contain extra args cmd = objdump_path + ['--dwarf=rawline', obj_path] if args.get('verbose'): @@ -202,7 +202,7 @@ def collect_dwarf_files(obj_path, *, raise sp.CalledProcessError(proc.returncode, proc.args) # simplify paths - files_ = {} + files_ = co.OrderedDict() for no, file in files.items(): if os.path.commonpath([ os.getcwd(), @@ -214,34 +214,102 @@ def collect_dwarf_files(obj_path, *, return files +# each dwarf entry can have attrs and children entries +class DwarfEntry: + def __init__(self, level, off, tag, ats={}, children=[]): + self.level = level + self.off = off + self.tag = tag + self.ats = ats or {} + self.children = children or [] + + def get(self, k, d=None): + return self.ats.get(k, d) + + def __getitem__(self, k): + return self.ats[k] + + def __contains__(self, k): + return k in self.ats + + def __repr__(self): + return '%s(%d, 0x%x, %r, %r)' % ( + self.__class__.__name__, + self.level, + self.off, + self.tag, + self.ats) + + @ft.cached_property + def name(self): + if 'DW_AT_name' in self: + name = self['DW_AT_name'].split(':')[-1].strip() + # prefix with struct/union/enum + if self.tag == 'DW_TAG_structure_type': + name = 'struct ' + name + elif self.tag == 'DW_TAG_union_type': + name = 'union ' + name + elif self.tag == 'DW_TAG_enumeration_type': + name = 'enum ' + name + return name + else: + return None + +# a collection of dwarf entries +class DwarfInfo: + def __init__(self, entries): + self.entries = entries + + def get(self, k, d=None): + # allow lookup by both offset and dwarf name + if not isinstance(k, str): + return self.entries.get(k, d) + + else: + import difflib + + # organize entries by name + if not hasattr(self, '_by_name'): + self._by_name = {} + for entry in self.entries.values(): + if entry.name is not None: + self._by_name[entry.name] = entry + + # exact match? avoid difflib if we can for speed + if k in self._by_name: + return self._by_name[k] + # find the best matching dwarf entry with difflib + # + # this can be different from the actual symbol because + # of optimization passes + else: + name, entry = max( + self._by_name.items(), + key=lambda entry: difflib.SequenceMatcher( + None, entry[0], k, False).ratio(), + default=(None, None)) + return entry + + def __getitem__(self, k): + v = self.get(k) + if v is None: + raise KeyError(k) + return v + + def __contains__(self, k): + return self.get(k) is not None + + def __len__(self): + return len(self.entries) + + def __iter__(self): + return (v for k, v in self.entries.items()) + def collect_dwarf_info(obj_path, filter=None, *, objdump_path=OBJDUMP_PATH, **args): filter_, filter = filter, __builtins__.filter - # each dwarf entry can have attrs and children entries - class DwarfEntry: - def __init__(self, level, off, tag, ats={}, children=[]): - self.level = level - self.off = off - self.tag = tag - self.ats = ats or {} - self.children = children or [] - - def __getitem__(self, k): - return self.ats[k] - - def __contains__(self, k): - return k in self.ats - - def __repr__(self): - return '%s(%d, 0x%x, %r, %r)' % ( - self.__class__.__name__, - self.level, - self.off, - self.tag, - self.ats) - info_pattern = re.compile( '^\s*(?:<(?P[^>]*)>' '\s*<(?P[^>]*)>' @@ -251,7 +319,7 @@ def collect_dwarf_info(obj_path, filter=None, *, '\s*:(?P.*))\s*$') # collect dwarf entries - entries = co.OrderedDict() + info = co.OrderedDict() entry = None levels = {} # note objdump-path may contain extra args @@ -277,7 +345,7 @@ def collect_dwarf_info(obj_path, filter=None, *, if (entry.level == 1 and ( # unless this entry is filtered filter_ is None or entry.tag in filter_)): - entries[entry.off] = entry + info[entry.off] = entry # store entry in parent levels[entry.level] = entry if entry.level-1 in levels: @@ -290,7 +358,7 @@ def collect_dwarf_info(obj_path, filter=None, *, if proc.returncode != 0: raise sp.CalledProcessError(proc.returncode, proc.args) - return entries + return DwarfInfo(info) def collect_sizes(obj_path, *, nm_path=NM_PATH, @@ -331,33 +399,21 @@ def collect(obj_paths, *, **args): results = [] for obj_path in obj_paths: - # guess the source, if we have debug-info we'll replace this later - file = re.sub('(\.o)?$', '.c', obj_path, 1) - # find sizes sizes = collect_sizes(obj_path, everything=everything, **args) # try to figure out the source file if we have debug-info - defs = {} try: files = collect_dwarf_files(obj_path, **args) info = collect_dwarf_info(obj_path, filter={'DW_TAG_subprogram', 'DW_TAG_variable'}, **args) - for no, entry in info.items(): - # skip funcs with no name or no file - if ('DW_AT_name' not in entry - or 'DW_AT_decl_file' not in entry): - continue - name_ = entry['DW_AT_name'].split(':')[-1].strip() - file_ = files.get(int(entry['DW_AT_decl_file']), '?') - defs[name_] = file_ - except sp.CalledProcessError: # do nothing on error, we don't need objdump to work, source # files may just be inaccurate - pass + files = {} + info = {} # map function sizes to debug symbols for func, size in sizes.items(): @@ -365,34 +421,28 @@ def collect(obj_paths, *, if not everything and func.startswith('__'): continue - # find best matching debug symbol, this may be slightly different + # find best matching dwarf entry, this may be slightly different # due to optimizations - if defs: - # exact match? avoid difflib if we can for speed - if func in defs: - file_ = defs[func] - else: - _, file_ = max( - defs.items(), - key=lambda d: difflib.SequenceMatcher(None, - d[0], - func, False).ratio()) + entry = info.get(func) + + if entry is not None and 'DW_AT_decl_file' in entry: + file = files.get(int(entry['DW_AT_decl_file']), '?') else: - file_ = file + file = re.sub('(\.o)?$', '.c', obj_path, 1) # ignore filtered sources if sources is not None: - if not any(os.path.abspath(file_) == os.path.abspath(s) + if not any(os.path.abspath(file) == os.path.abspath(s) for s in sources): continue else: # default to only cwd if not everything and not os.path.commonpath([ os.getcwd(), - os.path.abspath(file_)]) == os.getcwd(): + os.path.abspath(file)]) == os.getcwd(): continue - results.append(DataResult(file_, func, size)) + results.append(CodeResult(file, func, size)) return results @@ -624,6 +674,9 @@ def table(Result, results, diff_results=None, *, types[k].ratio( getattr(r, k, None), getattr(diff_r, k, None))))) + # append any notes + if hasattr(r, 'notes'): + entry[-1][1].extend(r.notes) return entry # recursive entry helper, only used by some scripts diff --git a/scripts/perf.py b/scripts/perf.py index 4df12e14..f63c8d7f 100755 --- a/scripts/perf.py +++ b/scripts/perf.py @@ -155,12 +155,12 @@ class PerfResult(co.namedtuple('PerfResult', [ __slots__ = () def __new__(cls, file='', function='', line=0, cycles=0, bmisses=0, branches=0, cmisses=0, caches=0, - children=[]): + children=None): return super().__new__(cls, file, function, int(RInt(line)), RInt(cycles), RInt(bmisses), RInt(branches), RInt(cmisses), RInt(caches), - children) + children if children is not None else []) def __add__(self, other): return PerfResult(self.file, self.function, self.line, @@ -259,19 +259,76 @@ def multiprocessing_cache(f): return multiprocessing_cache +class SymInfo: + def __init__(self, syms): + self.syms = syms + + def get(self, k, d=None): + # allow lookup by both symbol and address + if isinstance(k, str): + # organize by symbol, note multiple symbols can share a name + if not hasattr(self, '_by_sym'): + by_sym = {} + for sym, addr, size in self.syms: + if sym not in by_sym: + by_sym[sym] = [] + if (addr, size) not in by_sym[sym]: + by_sym[sym].append((addr, size)) + self._by_sym = by_sym + return self._by_sym.get(k, d) + + else: + import bisect + + # organize by address + if not hasattr(self, '_by_addr'): + # sort and keep largest/first when duplicates + syms = self.syms.copy() + syms.sort(key=lambda x: (x[1], -x[2], x[0])) + + by_addr = [] + for name, addr, size in syms: + if (len(by_addr) == 0 + or by_addr[-1][0] != addr): + by_addr.append((name, addr, size)) + self._by_addr = by_addr + + # find sym by range + i = bisect.bisect(self._by_addr, k, + key=lambda x: x[1]) + # check that we're actually in this sym's size + if i > 0 and k < self._by_addr[i-1][1]+self._by_addr[i-1][2]: + return self._by_addr[i-1][0] + else: + return d + + def __getitem__(self, k): + v = self.get(k) + if v is None: + raise KeyError(k) + return v + + def __contains__(self, k): + return self.get(k) is not None + + def __len__(self): + return len(self.syms) + + def __iter__(self): + return iter(self.syms) + @multiprocessing_cache -def collect_syms(obj_path, *, - objdump_path=None, +def collect_syms(obj_path, global_only=False, *, + objdump_path=OBJDUMP_PATH, **args): symbol_pattern = re.compile( '^(?P[0-9a-fA-F]+)' - '.*' + ' (?P.).*' '\s+(?P[0-9a-fA-F]+)' '\s+(?P[^\s]+)\s*$') - # figure out symbol addresses - syms = {} - sym_at = [] + # find symbol addresses and sizes + syms = [] cmd = objdump_path + ['-t', obj_path] if args.get('verbose'): print(' '.join(shlex.quote(c) for c in cmd)) @@ -284,33 +341,102 @@ def collect_syms(obj_path, *, m = symbol_pattern.match(line) if m: name = m.group('name') + scope = m.group('scope') addr = int(m.group('addr'), 16) size = int(m.group('size'), 16) + # skip non-globals? + # l => local + # g => global + # u => unique global + # => neither + # ! => local + global + if global_only and scope in 'l ': + continue # ignore zero-sized symbols if not size: continue # note multiple symbols can share a name - if name not in syms: - syms[name] = set() - syms[name].add((addr, size)) - sym_at.append((addr, name, size)) + syms.append((name, addr, size)) proc.wait() if proc.returncode != 0: raise sp.CalledProcessError(proc.returncode, proc.args) - # sort and keep largest/first when duplicates - sym_at.sort(key=lambda x: (x[0], -x[2], x[1])) - sym_at_ = [] - for addr, name, size in sym_at: - if len(sym_at_) == 0 or sym_at_[-1][0] != addr: - sym_at_.append((addr, name, size)) - sym_at = sym_at_ + return SymInfo(syms) - return syms, sym_at +class LineInfo: + def __init__(self, lines): + self.lines = lines + + def get(self, k, d=None): + # allow lookup by both address and file+line tuple + if not isinstance(k, tuple): + import bisect + + # organize by address + if not hasattr(self, '_by_addr'): + # sort and keep first when duplicates + lines = self.lines.copy() + lines.sort(key=lambda x: (x[2], x[0], x[1])) + + by_addr = [] + for file, line, addr in lines: + if (len(by_addr) == 0 + or by_addr[-1][2] != addr): + by_addr.append((file, line, addr)) + self._by_addr = by_addr + + # find file+line by addr + i = bisect.bisect(self._by_addr, k, + key=lambda x: x[2]) + if i > 0: + return self._by_addr[i-1][0], self._by_addr[i-1][1] + else: + return d + + else: + import bisect + + # organize by file+line + if not hasattr(self, '_by_line'): + # sort and keep first when duplicates + lines = self.lines.copy() + lines.sort() + + by_line = [] + for file, line, addr in lines: + if (len(by_line) == 0 + or by_line[-1][0] != file + or by_line[-1][1] != line): + by_line.append((file, line, addr)) + self._by_line = by_line + + # find addr by file+line tuple + i = bisect.bisect(self._by_line, k, + key=lambda x: (x[0], x[1])) + # make sure file at least matches! + if i > 0 and self._by_line[i-1][0] == k[0]: + return self._by_line[i-1][2] + else: + return d + + def __getitem__(self, k): + v = self.get(k) + if v is None: + raise KeyError(k) + return v + + def __contains__(self, k): + return self.get(k) is not None + + def __len__(self): + return len(self.lines) + + def __iter__(self): + return iter(self.lines) @multiprocessing_cache def collect_dwarf_lines(obj_path, *, - objdump_path=None, + objdump_path=OBJDUMP_PATH, **args): line_pattern = re.compile( '^\s*(?:' @@ -334,9 +460,8 @@ def collect_dwarf_lines(obj_path, *, # decodedline seems to have issues with multiple dir/file # tables, which is why we need this lines = [] - line_at = [] - dirs = {} - files = {} + dirs = co.OrderedDict() + files = co.OrderedDict() op_file = 1 op_line = 1 op_addr = 0 @@ -377,7 +502,6 @@ def collect_dwarf_lines(obj_path, *, or m.group('op_end')): file = os.path.abspath(files.get(op_file, '?')) lines.append((file, op_line, op_addr)) - line_at.append((op_addr, file, op_line)) if m.group('op_end'): op_file = 1 @@ -387,23 +511,7 @@ def collect_dwarf_lines(obj_path, *, if proc.returncode != 0: raise sp.CalledProcessError(proc.returncode, proc.args) - # sort and keep first when duplicates - lines.sort() - lines_ = [] - for file, line, addr in lines: - if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line: - lines_.append((file, line, addr)) - lines = lines_ - - # sort and keep first when duplicates - line_at.sort() - line_at_ = [] - for addr, file, line in line_at: - if len(line_at_) == 0 or line_at_[-1][0] != addr: - line_at_.append((addr, file, line)) - line_at = line_at_ - - return lines, line_at + return LineInfo(lines) def collect_decompressed(path, *, @@ -502,8 +610,8 @@ def collect_decompressed(path, *, addr_ = int(m.group('addr'), 16) # get the syms/lines for the dso, this is cached - syms, sym_at = collect_syms(dso, **args) - lines, line_at = collect_dwarf_lines(dso, **args) + syms = collect_syms(dso, **args) + lines = collect_dwarf_lines(dso, **args) # ASLR is tricky, we have symbols+offsets, but static symbols # means we may have multiple options for each symbol. @@ -541,9 +649,9 @@ def collect_decompressed(path, *, file, line = cached else: # find file+line - i = bisect.bisect(line_at, addr, key=lambda x: x[0]) - if i > 0: - _, file, line = line_at[i-1] + line_ = lines.get(addr) + if line_ is not None: + file, line = line_ else: file, line = re.sub('(\.o)?$', '.c', dso, 1), 0 @@ -872,6 +980,9 @@ def table(Result, results, diff_results=None, *, types[k].ratio( getattr(r, k, None), getattr(diff_r, k, None))))) + # append any notes + if hasattr(r, 'notes'): + entry[-1][1].extend(r.notes) return entry # recursive entry helper, only used by some scripts diff --git a/scripts/perfbd.py b/scripts/perfbd.py index 751cda85..88cb941f 100755 --- a/scripts/perfbd.py +++ b/scripts/perfbd.py @@ -143,10 +143,10 @@ class PerfBdResult(co.namedtuple('PerfBdResult', [ __slots__ = () def __new__(cls, file='', function='', line=0, readed=0, proged=0, erased=0, - children=[]): + children=None): return super().__new__(cls, file, function, int(RInt(line)), RInt(readed), RInt(proged), RInt(erased), - children) + children if children is not None else []) def __add__(self, other): return PerfBdResult(self.file, self.function, self.line, @@ -166,18 +166,75 @@ def openio(path, mode='r', buffering=-1): else: return open(path, mode, buffering) -def collect_syms(obj_path, *, - objdump_path=None, +class SymInfo: + def __init__(self, syms): + self.syms = syms + + def get(self, k, d=None): + # allow lookup by both symbol and address + if isinstance(k, str): + # organize by symbol, note multiple symbols can share a name + if not hasattr(self, '_by_sym'): + by_sym = {} + for sym, addr, size in self.syms: + if sym not in by_sym: + by_sym[sym] = [] + if (addr, size) not in by_sym[sym]: + by_sym[sym].append((addr, size)) + self._by_sym = by_sym + return self._by_sym.get(k, d) + + else: + import bisect + + # organize by address + if not hasattr(self, '_by_addr'): + # sort and keep largest/first when duplicates + syms = self.syms.copy() + syms.sort(key=lambda x: (x[1], -x[2], x[0])) + + by_addr = [] + for name, addr, size in syms: + if (len(by_addr) == 0 + or by_addr[-1][0] != addr): + by_addr.append((name, addr, size)) + self._by_addr = by_addr + + # find sym by range + i = bisect.bisect(self._by_addr, k, + key=lambda x: x[1]) + # check that we're actually in this sym's size + if i > 0 and k < self._by_addr[i-1][1]+self._by_addr[i-1][2]: + return self._by_addr[i-1][0] + else: + return d + + def __getitem__(self, k): + v = self.get(k) + if v is None: + raise KeyError(k) + return v + + def __contains__(self, k): + return self.get(k) is not None + + def __len__(self): + return len(self.syms) + + def __iter__(self): + return iter(self.syms) + +def collect_syms(obj_path, global_only=False, *, + objdump_path=OBJDUMP_PATH, **args): symbol_pattern = re.compile( '^(?P[0-9a-fA-F]+)' - '.*' + ' (?P.).*' '\s+(?P[0-9a-fA-F]+)' '\s+(?P[^\s]+)\s*$') - # figure out symbol addresses - syms = {} - sym_at = [] + # find symbol addresses and sizes + syms = [] cmd = objdump_path + ['-t', obj_path] if args.get('verbose'): print(' '.join(shlex.quote(c) for c in cmd)) @@ -190,32 +247,101 @@ def collect_syms(obj_path, *, m = symbol_pattern.match(line) if m: name = m.group('name') + scope = m.group('scope') addr = int(m.group('addr'), 16) size = int(m.group('size'), 16) + # skip non-globals? + # l => local + # g => global + # u => unique global + # => neither + # ! => local + global + if global_only and scope in 'l ': + continue # ignore zero-sized symbols if not size: continue # note multiple symbols can share a name - if name not in syms: - syms[name] = set() - syms[name].add((addr, size)) - sym_at.append((addr, name, size)) + syms.append((name, addr, size)) proc.wait() if proc.returncode != 0: raise sp.CalledProcessError(proc.returncode, proc.args) - # sort and keep largest/first when duplicates - sym_at.sort(key=lambda x: (x[0], -x[2], x[1])) - sym_at_ = [] - for addr, name, size in sym_at: - if len(sym_at_) == 0 or sym_at_[-1][0] != addr: - sym_at_.append((addr, name, size)) - sym_at = sym_at_ + return SymInfo(syms) - return syms, sym_at +class LineInfo: + def __init__(self, lines): + self.lines = lines + + def get(self, k, d=None): + # allow lookup by both address and file+line tuple + if not isinstance(k, tuple): + import bisect + + # organize by address + if not hasattr(self, '_by_addr'): + # sort and keep first when duplicates + lines = self.lines.copy() + lines.sort(key=lambda x: (x[2], x[0], x[1])) + + by_addr = [] + for file, line, addr in lines: + if (len(by_addr) == 0 + or by_addr[-1][2] != addr): + by_addr.append((file, line, addr)) + self._by_addr = by_addr + + # find file+line by addr + i = bisect.bisect(self._by_addr, k, + key=lambda x: x[2]) + if i > 0: + return self._by_addr[i-1][0], self._by_addr[i-1][1] + else: + return d + + else: + import bisect + + # organize by file+line + if not hasattr(self, '_by_line'): + # sort and keep first when duplicates + lines = self.lines.copy() + lines.sort() + + by_line = [] + for file, line, addr in lines: + if (len(by_line) == 0 + or by_line[-1][0] != file + or by_line[-1][1] != line): + by_line.append((file, line, addr)) + self._by_line = by_line + + # find addr by file+line tuple + i = bisect.bisect(self._by_line, k, + key=lambda x: (x[0], x[1])) + # make sure file at least matches! + if i > 0 and self._by_line[i-1][0] == k[0]: + return self._by_line[i-1][2] + else: + return d + + def __getitem__(self, k): + v = self.get(k) + if v is None: + raise KeyError(k) + return v + + def __contains__(self, k): + return self.get(k) is not None + + def __len__(self): + return len(self.lines) + + def __iter__(self): + return iter(self.lines) def collect_dwarf_lines(obj_path, *, - objdump_path=None, + objdump_path=OBJDUMP_PATH, **args): line_pattern = re.compile( '^\s*(?:' @@ -239,9 +365,8 @@ def collect_dwarf_lines(obj_path, *, # decodedline seems to have issues with multiple dir/file # tables, which is why we need this lines = [] - line_at = [] - dirs = {} - files = {} + dirs = co.OrderedDict() + files = co.OrderedDict() op_file = 1 op_line = 1 op_addr = 0 @@ -282,7 +407,6 @@ def collect_dwarf_lines(obj_path, *, or m.group('op_end')): file = os.path.abspath(files.get(op_file, '?')) lines.append((file, op_line, op_addr)) - line_at.append((op_addr, file, op_line)) if m.group('op_end'): op_file = 1 @@ -292,26 +416,10 @@ def collect_dwarf_lines(obj_path, *, if proc.returncode != 0: raise sp.CalledProcessError(proc.returncode, proc.args) - # sort and keep first when duplicates - lines.sort() - lines_ = [] - for file, line, addr in lines: - if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line: - lines_.append((file, line, addr)) - lines = lines_ - - # sort and keep first when duplicates - line_at.sort() - line_at_ = [] - for addr, file, line in line_at: - if len(line_at_) == 0 or line_at_[-1][0] != addr: - line_at_.append((addr, file, line)) - line_at = line_at_ - - return lines, line_at + return LineInfo(lines) -def collect_job(path, start, stop, syms, sym_at, lines, line_at, *, +def collect_job(path, start, stop, syms, lines, *, sources=None, everything=False, propagate=0, @@ -465,10 +573,9 @@ def collect_job(path, start, stop, syms, sym_at, lines, line_at, *, # the first stack frame, so we can use that as a point # of reference if last_delta is None: - i = bisect.bisect(lines, (last_file, last_line), - key=lambda x: (x[0], x[1])) - if i > 0: - last_delta = lines[i-1][2] - addr_ + addr__ = lines.get((last_file, last_line)) + if addr__ is not None: + last_delta = addr__ - addr_ else: # can't reverse ASLR, give up on backtrace commit() @@ -486,11 +593,8 @@ def collect_job(path, start, stop, syms, sym_at, lines, line_at, *, file, sym, line = cached else: # find sym - i = bisect.bisect(sym_at, addr, key=lambda x: x[0]) - # check that we're actually in the sym's size - if i > 0 and addr < sym_at[i-1][0] + sym_at[i-1][2]: - _, sym, _ = sym_at[i-1] - else: + sym = syms.get(addr) + if sym is None: sym = hex(addr) # filter out internal/unknown functions @@ -503,9 +607,9 @@ def collect_job(path, start, stop, syms, sym_at, lines, line_at, *, continue # find file+line - i = bisect.bisect(line_at, addr, key=lambda x: x[0]) - if i > 0: - _, file, line = line_at[i-1] + line_ = lines.get(addr) + if line_ is not None: + file, line = line_ elif len(last_stack) == 0: file, line = last_file, last_line else: @@ -568,8 +672,8 @@ def collect(obj_path, trace_paths, *, jobs = len(os.sched_getaffinity(0)) # find sym/line info to reverse ASLR - syms, sym_at = collect_syms(obj_path, **args) - lines, line_at = collect_dwarf_lines(obj_path, **args) + syms = collect_syms(obj_path, **args) + lines = collect_dwarf_lines(obj_path, **args) if jobs is not None: # try to split up files so that even single files can be processed @@ -596,8 +700,7 @@ def collect(obj_path, trace_paths, *, for results_ in p.imap_unordered( starapply, ((collect_job, - (path, start, stop, - syms, sym_at, lines, line_at), + (path, start, stop, syms, lines), args) for path, ranges in zip(trace_paths, trace_ranges) for start, stop in ranges)): @@ -607,8 +710,7 @@ def collect(obj_path, trace_paths, *, results = [] for path in trace_paths: results.extend(collect_job( - path, None, None, - syms, sym_at, lines, line_at, + path, None, None, syms, lines, **args)) return results @@ -841,6 +943,9 @@ def table(Result, results, diff_results=None, *, types[k].ratio( getattr(r, k, None), getattr(diff_r, k, None))))) + # append any notes + if hasattr(r, 'notes'): + entry[-1][1].extend(r.notes) return entry # recursive entry helper, only used by some scripts diff --git a/scripts/stack.py b/scripts/stack.py index fb4ef917..b9d65ed1 100755 --- a/scripts/stack.py +++ b/scripts/stack.py @@ -129,12 +129,11 @@ class StackResult(co.namedtuple('StackResult', [ _types = {'frame': RInt, 'limit': RInt} __slots__ = () - def __new__(cls, file='', function='', - frame=0, limit=0, - children=[]): + def __new__(cls, file='', function='', frame=0, limit=0, + children=None): return super().__new__(cls, file, function, RInt(frame), RInt(limit), - children) + children if children is not None else []) def __add__(self, other): return StackResult(self.file, self.function, @@ -527,6 +526,9 @@ def table(Result, results, diff_results=None, *, types[k].ratio( getattr(r, k, None), getattr(diff_r, k, None))))) + # append any notes + if hasattr(r, 'notes'): + entry[-1][1].extend(r.notes) return entry # recursive entry helper, only used by some scripts diff --git a/scripts/structs.py b/scripts/structs.py index d8f1f32f..b0e68f50 100755 --- a/scripts/structs.py +++ b/scripts/structs.py @@ -136,7 +136,8 @@ class StructResult(co.namedtuple('StructResult', [ _types = {'size': RInt, 'align': RInt} __slots__ = () - def __new__(cls, file='', struct='', size=0, align=0, children=None): + def __new__(cls, file='', struct='', size=0, align=0, + children=None): return super().__new__(cls, file, struct, RInt(size), RInt(align), children if children is not None else []) @@ -161,28 +162,6 @@ def openio(path, mode='r', buffering=-1): def collect_dwarf_files(obj_path, *, objdump_path=OBJDUMP_PATH, **args): - class FileInfo: - def __init__(self, files): - self.files = files - - def get(self, k, d=None): - return self.files.get(k, d) - - def __getitem__(self, k): - v = self.get(k) - if v is None: - raise KeyError(k) - return v - - def __contains__(self, k): - return self.get(k) is not None - - def __len__(self): - return len(self.files) - - def __iter__(self): - return (v for k, v in self.files.items()) - line_pattern = re.compile( '^\s*(?P[0-9]+)' '(?:\s+(?P[0-9]+))?' @@ -223,7 +202,7 @@ def collect_dwarf_files(obj_path, *, raise sp.CalledProcessError(proc.returncode, proc.args) # simplify paths - files_ = {} + files_ = co.OrderedDict() for no, file in files.items(): if os.path.commonpath([ os.getcwd(), @@ -233,104 +212,104 @@ def collect_dwarf_files(obj_path, *, files_[no] = os.path.abspath(file) files = files_ - return FileInfo(files) + return files + +# each dwarf entry can have attrs and children entries +class DwarfEntry: + def __init__(self, level, off, tag, ats={}, children=[]): + self.level = level + self.off = off + self.tag = tag + self.ats = ats or {} + self.children = children or [] + + def get(self, k, d=None): + return self.ats.get(k, d) + + def __getitem__(self, k): + return self.ats[k] + + def __contains__(self, k): + return k in self.ats + + def __repr__(self): + return '%s(%d, 0x%x, %r, %r)' % ( + self.__class__.__name__, + self.level, + self.off, + self.tag, + self.ats) + + @ft.cached_property + def name(self): + if 'DW_AT_name' in self: + name = self['DW_AT_name'].split(':')[-1].strip() + # prefix with struct/union + if self.tag == 'DW_TAG_structure_type': + name = 'struct ' + name + elif self.tag == 'DW_TAG_union_type': + name = 'union ' + name + elif self.tag == 'DW_TAG_enumeration_type': + name = 'enum ' + name + return name + else: + return None + +# a collection of dwarf entries +class DwarfInfo: + def __init__(self, entries): + self.entries = entries + + def get(self, k, d=None): + # allow lookup by both offset and dwarf name + if not isinstance(k, str): + return self.entries.get(k, d) + + else: + import difflib + + # organize entries by name + if not hasattr(self, '_by_name'): + self._by_name = {} + for entry in self.entries.values(): + if entry.name is not None: + self._by_name[entry.name] = entry + + # exact match? avoid difflib if we can for speed + if k in self._by_name: + return self._by_name[k] + # find the best matching dwarf entry with difflib + # + # this can be different from the actual symbol because + # of optimization passes + else: + name, entry = max( + self._by_name.items(), + key=lambda entry: difflib.SequenceMatcher( + None, entry[0], k, False).ratio(), + default=(None, None)) + return entry + + def __getitem__(self, k): + v = self.get(k) + if v is None: + raise KeyError(k) + return v + + def __contains__(self, k): + return self.get(k) is not None + + def __len__(self): + return len(self.entries) + + def __iter__(self): + return (v for k, v in self.entries.items()) def collect_dwarf_info(obj_path, filter=None, *, objdump_path=OBJDUMP_PATH, **args): filter_, filter = filter, __builtins__.filter - # each dwarf entry can have attrs and children entries - class DwarfEntry: - def __init__(self, level, off, tag, ats={}, children=[]): - self.level = level - self.off = off - self.tag = tag - self.ats = ats or {} - self.children = children or [] - - def get(self, k, d=None): - return self.ats.get(k, d) - - def __getitem__(self, k): - return self.ats[k] - - def __contains__(self, k): - return k in self.ats - - def __repr__(self): - return '%s(%d, 0x%x, %r, %r)' % ( - self.__class__.__name__, - self.level, - self.off, - self.tag, - self.ats) - - @ft.cached_property - def name(self): - if 'DW_AT_name' in self: - name = self['DW_AT_name'].split(':')[-1].strip() - # prefix with struct/union - if self.tag == 'DW_TAG_structure_type': - name = 'struct ' + name - elif self.tag == 'DW_TAG_union_type': - name = 'union ' + name - elif self.tag == 'DW_TAG_enumeration_type': - name = 'enum ' + name - return name - else: - return None - - # a collection of dwarf entries - class DwarfInfo: - def __init__(self, entries): - self.entries = entries - - def get(self, k, d=None): - # allow lookup by both offset and dwarf name - if not isinstance(k, str): - return self.entries.get(k, d) - - else: - import difflib - - # organize entries by name - if not hasattr(self, '_by_name'): - self._by_name = {} - for entry in self.entries.values(): - if entry.name is not None: - self._by_name[entry.name] = entry - - # exact match? avoid difflib if we can for speed - if k in self._by_name: - return self._by_name[k] - # find the best matching dwarf entry with difflib - # - # this can be different from the actual symbol because - # of optimization passes - else: - name, entry = max( - self._by_name.items(), - key=lambda entry: difflib.SequenceMatcher( - None, entry[0], k, False).ratio(), - default=(None, None)) - return entry - - def __getitem__(self, k): - v = self.get(k) - if v is None: - raise KeyError(k) - return v - - def __contains__(self, k): - return self.get(k) is not None - - def __len__(self): - return len(self.entries) - - def __iter__(self): - return (v for k, v in self.entries.items()) - info_pattern = re.compile( '^\s*(?:<(?P[^>]*)>' '\s*<(?P[^>]*)>' @@ -797,7 +776,8 @@ def table(Result, results, diff_results=None, *, for r in results_} names_ = list(table_.keys()) - # only sort the children layer if explicitly requested + # sort the children layer + names_.sort() if sort: for k, reverse in reversed(sort): names_.sort(