diff --git a/scripts/structs.py b/scripts/structs.py index 1ad1b0a7..4fa75a28 100755 --- a/scripts/structs.py +++ b/scripts/structs.py @@ -128,20 +128,23 @@ class RInt(co.namedtuple('RInt', 'x')): # struct size results class StructResult(co.namedtuple('StructResult', [ 'file', 'struct', - 'size'])): + 'size', + 'children'])): _by = ['file', 'struct'] _fields = ['size'] _sort = ['size'] _types = {'size': RInt} __slots__ = () - def __new__(cls, file='', struct='', size=0): + def __new__(cls, file='', struct='', size=0, children=[]): return super().__new__(cls, file, struct, - RInt(size)) + RInt(size), + children or []) def __add__(self, other): return StructResult(self.file, self.struct, - self.size + other.size) + self.size + other.size, + self.children + other.children) def openio(path, mode='r', buffering=-1): @@ -154,136 +157,239 @@ def openio(path, mode='r', buffering=-1): else: return open(path, mode, buffering) -def collect(obj_paths, *, +def collect_dwarf_files(obj_path, *, objdump_path=OBJDUMP_PATH, + **args): + line_pattern = re.compile( + '^\s*(?P[0-9]+)' + '(?:\s+(?P[0-9]+))?' + '.*\s+(?P[^\s]+)\s*$') + + # find source paths + dirs = {} + files = {} + # note objdump-path may contain extra args + cmd = objdump_path + ['--dwarf=rawline', obj_path] + if args.get('verbose'): + print(' '.join(shlex.quote(c) for c in cmd)) + proc = sp.Popen(cmd, + stdout=sp.PIPE, + stderr=None if args.get('verbose') else sp.DEVNULL, + universal_newlines=True, + errors='replace', + close_fds=False) + for line in proc.stdout: + # note that files contain references to dirs, which we + # dereference as soon as we see them as each file table + # follows a dir table + m = line_pattern.match(line) + if m: + if not m.group('dir'): + # found a directory entry + dirs[int(m.group('no'))] = m.group('path') + else: + # found a file entry + dir = int(m.group('dir')) + if dir in dirs: + files[int(m.group('no'))] = os.path.join( + dirs[dir], + m.group('path')) + else: + files[int(m.group('no'))] = m.group('path') + proc.wait() + if proc.returncode != 0: + if not args.get('verbose'): + for line in proc.stderr: + sys.stderr.write(line) + raise sp.CalledProcessError(proc.returncode, proc.args) + + # simplify paths + files_ = {} + for no, file in files.items(): + if os.path.commonpath([ + os.getcwd(), + os.path.abspath(file)]) == os.getcwd(): + files_[no] = os.path.relpath(file) + else: + files_[no] = os.path.abspath(file) + files = files_ + + return files + +def collect_dwarf_info(obj_path, filter=None, *, + objdump_path=OBJDUMP_PATH, + **args): + filter_, filter = filter, __builtins__.filter + + # each dwarf entry can have attrs and children entries + class DwarfEntry: + def __init__(self, level, off, tag, ats={}, children=[]): + self.level = level + self.off = off + self.tag = tag + self.ats = ats or {} + self.children = children or [] + + def __getitem__(self, k): + return self.ats[k] + + def __contains__(self, k): + return k in self.ats + + info_pattern = re.compile( + '^\s*(?:<(?P[^>]*)>' + '\s*<(?P[^>]*)>' + '.*\(\s*(?P[^)]*?)\s*\)' + '|\s*<(?P[^>]*)>' + '\s*(?P[^>:]*?)' + '\s*:(?P.*))\s*$') + + # collect dwarf entries + entries = co.OrderedDict() + entry = None + levels = {} + # note objdump-path may contain extra args + cmd = objdump_path + ['--dwarf=info', obj_path] + if args.get('verbose'): + print(' '.join(shlex.quote(c) for c in cmd)) + proc = sp.Popen(cmd, + stdout=sp.PIPE, + stderr=None if args.get('verbose') else sp.DEVNULL, + universal_newlines=True, + errors='replace', + close_fds=False) + for line in proc.stdout: + # state machine here to find dwarf entries + m = info_pattern.match(line) + if m: + if m.group('tag'): + entry = DwarfEntry( + level=int(m.group('level'), 0), + off=int(m.group('off'), 16), + tag=m.group('tag').strip(), + ) + # keep track of top-level entries + if (entry.level == 1 and ( + # unless this entry is filtered + filter_ is None or entry.tag in filter_)): + entries[entry.off] = entry + # store entry in parent + levels[entry.level] = entry + if entry.level-1 in levels: + levels[entry.level-1].children.append(entry) + elif m.group('at'): + if entry: + entry.ats[m.group('at').strip()] = ( + m.group('v').strip()) + proc.wait() + if proc.returncode != 0: + if not args.get('verbose'): + for line in proc.stderr: + sys.stderr.write(line) + raise sp.CalledProcessError(proc.returncode, proc.args) + + return entries + +def collect(obj_paths, *, sources=None, everything=False, internal=False, **args): - line_pattern = re.compile( - '^\s+(?P[0-9]+)' - '(?:\s+(?P[0-9]+))?' - '\s+.*' - '\s+(?P[^\s]+)$') - info_pattern = re.compile( - '^(?:.*(?PDW_TAG_[a-z_]+).*' - '|.*DW_AT_name.*:\s*(?P[^:\s]+)\s*' - '|.*DW_AT_decl_file.*:\s*(?P[0-9]+)\s*' - '|.*DW_AT_byte_size.*:\s*(?P[0-9]+)\s*)$') - results = [] - for path in obj_paths: - # find files, we want to filter by structs in .h files - dirs = {} - files = {} - # note objdump-path may contain extra args - cmd = objdump_path + ['--dwarf=rawline', path] - if args.get('verbose'): - print(' '.join(shlex.quote(c) for c in cmd)) - proc = sp.Popen(cmd, - stdout=sp.PIPE, - stderr=None if args.get('verbose') else sp.DEVNULL, - universal_newlines=True, - errors='replace', - close_fds=False) - for line in proc.stdout: - # note that files contain references to dirs, which we - # dereference as soon as we see them as each file table follows a - # dir table - m = line_pattern.match(line) - if m: - if not m.group('dir'): - # found a directory entry - dirs[int(m.group('no'))] = m.group('path') - else: - # found a file entry - dir = int(m.group('dir')) - if dir in dirs: - files[int(m.group('no'))] = os.path.join( - dirs[dir], - m.group('path')) - else: - files[int(m.group('no'))] = m.group('path') - proc.wait() - if proc.returncode != 0: - if not args.get('verbose'): - for line in proc.stderr: - sys.stderr.write(line) - sys.exit(-1) + for obj_path in obj_paths: + # find source paths + files = collect_dwarf_files(obj_path, **args) - # collect structs as we parse dwarf info - results_ = [] - is_struct = False - s_name = None - s_file = None - s_size = None - def append(): - # ignore non-structs and unnamed files - if is_struct and s_name: - file = files.get(s_file, '?') - results_.append(StructResult(file, s_name, s_size)) - # note objdump-path may contain extra args - cmd = objdump_path + ['--dwarf=info', path] - if args.get('verbose'): - print(' '.join(shlex.quote(c) for c in cmd)) - proc = sp.Popen(cmd, - stdout=sp.PIPE, - stderr=None if args.get('verbose') else sp.DEVNULL, - universal_newlines=True, - errors='replace', - close_fds=False) - for i, line in enumerate(proc.stdout): - # state machine here to find structs - m = info_pattern.match(line) - if m: - if m.group('tag'): - append() - is_struct = (m.group('tag') == 'DW_TAG_structure_type' - or m.group('tag') == 'DW_TAG_union_type') - s_name = None - s_file = None - s_size = None - elif m.group('name'): - s_name = m.group('name') - elif m.group('file'): - s_file = int(m.group('file')) - elif m.group('size'): - s_size = int(m.group('size')) - # don't forget the last struct - append() - proc.wait() - if proc.returncode != 0: - if not args.get('verbose'): - for line in proc.stderr: - sys.stderr.write(line) - sys.exit(-1) + # find dwarf info + info = collect_dwarf_info(obj_path, **args) + + # collect structs and other types + typedefs = {} + typedefed = set() + types = {} + for no, entry in info.items(): + # skip non-types + if entry.tag not in { + 'DW_TAG_typedef', + 'DW_TAG_structure_type', + 'DW_TAG_union_type', + 'DW_TAG_enumeration_type'}: + continue - for r in results_: # ignore filtered sources + file = files.get(int(entry['DW_AT_decl_file']), '?') if sources is not None: - if not any(os.path.abspath(r.file) == os.path.abspath(s) + if not any(os.path.abspath(file) == os.path.abspath(s) for s in sources): continue else: # default to only cwd - if not everything and not os.path.commonpath([ - os.getcwd(), - os.path.abspath(r.file)]) == os.getcwd(): + if (not everything and not os.path.commonpath([ + os.getcwd(), + os.path.abspath(file)]) == os.getcwd()): continue # limit to .h files unless --internal - if not internal and not r.file.endswith('.h'): + if not internal and not file.endswith('.h'): continue - # simplify path - if os.path.commonpath([ - os.getcwd(), - os.path.abspath(r.file)]) == os.getcwd(): - file = os.path.relpath(r.file) - else: - file = os.path.abspath(r.file) + # skip types with no names + if 'DW_AT_name' not in entry: + continue + name = entry['DW_AT_name'].split(':')[-1].strip() - results.append(r._replace(file=file)) + # find the size of a type, recursing if necessary + def sizeof(entry): + # explicit size? + if 'DW_AT_byte_size' in entry: + return int(entry['DW_AT_byte_size']) + # indirect type? + elif 'DW_AT_type' in entry: + type = int(entry['DW_AT_type'].strip('<>'), 0) + size = sizeof(info[type]) + # wait are we an array? + if entry.tag == 'DW_TAG_array_type': + for child in entry.children: + if child.tag == 'DW_TAG_subrange_type': + size *= int(child['DW_AT_upper_bound']) + 1 + return size + else: + assert False + size = sizeof(entry) + + # find children, recursing if necessary + def childrenof(entry): + # pointer? these end up recursive but the underlying + # type doesn't really matter here + if entry.tag == 'DW_TAG_pointer_type': + return [] + # indirect type? + elif 'DW_AT_type' in entry: + type = int(entry['DW_AT_type'].strip('<>'), 0) + return childrenof(info[type]) + else: + children = [] + for child in entry.children: + name = child['DW_AT_name'].split(':')[-1].strip() + size = sizeof(child) + children.append(StructResult(file, name, size, + childrenof(child))) + return children + children = childrenof(entry) + + # typdefs exist in a separate namespace, so we need to track + # these separately + if entry.tag == 'DW_TAG_typedef': + typedefs[no] = StructResult(file, name, size, children) + typedefed.add(int(entry['DW_AT_type'].strip('<>'), 0)) + else: + types[no] = StructResult(file, name, size, children) + + # let typedefs take priority + results.extend(typedefs.values()) + results.extend(type + for no, type in types.items() + if no not in typedefed) return results @@ -330,6 +436,8 @@ def table(Result, results, diff_results=None, *, all=False, compare=None, summary=False, + depth=None, + hot=None, **_): all_, all = all, __builtins__.all @@ -344,6 +452,34 @@ def table(Result, results, diff_results=None, *, if diff_results is not None: diff_results = fold(Result, diff_results, by=by) + # reduce children to hot paths? + if hot: + def rec_hot(results_, seen=set()): + if not results_: + return [] + + r = max(results_, + key=lambda r: tuple( + tuple((getattr(r, k),) + if getattr(r, k, None) is not None + else () + for k in ( + [k] if k else [ + k for k in Result._sort + if k in fields]) + if k in fields) + for k in it.chain(hot, [None]))) + + # found a cycle? + if id(r) in seen: + return [] + + return [r._replace(children=[])] + rec_hot( + r.children, + seen | {id(r)}) + + results = [r._replace(children=rec_hot(r.children)) for r in results] + # organize by name table = { ','.join(str(getattr(r, k) or '') for k in by): r @@ -485,6 +621,59 @@ def table(Result, results, diff_results=None, *, getattr(diff_r, k, None))))) return entry + # recursive entry helper + def recurse(results_, depth_, seen=set(), + prefixes=('', '', '', '')): + # build the children table at each layer + results_ = fold(Result, results_, by=by) + table_ = { + ','.join(str(getattr(r, k) or '') for k in by): r + for r in results_} + names_ = list(table_.keys()) + + # sort the children layer + names_.sort() + if sort: + for k, reverse in reversed(sort): + names_.sort( + key=lambda n: tuple( + (getattr(table_[n], k),) + if getattr(table_.get(n), k, None) + is not None + else () + for k in ( + [k] if k else [ + k for k in Result._sort + if k in fields])), + reverse=reverse ^ (not k or k in Result._fields)) + + for i, name in enumerate(names_): + r = table_[name] + is_last = (i == len(names_)-1) + + line = table_entry(name, r) + line = [x if isinstance(x, tuple) else (x, []) for x in line] + # add prefixes + line[0] = (prefixes[0+is_last] + line[0][0], line[0][1]) + # add cycle detection + if id(r) in seen: + line[-1] = (line[-1][0], line[-1][1] + ['cycle detected']) + lines.append(line) + + # found a cycle? + if id(r) in seen: + continue + + # recurse? + if depth_ > 1: + recurse(r.children, + depth_-1, + seen | {id(r)}, + (prefixes[2+is_last] + "|-> ", + prefixes[2+is_last] + "'-> ", + prefixes[2+is_last] + "| ", + prefixes[2+is_last] + " ")) + # entries if (not summary) or compare: for name in names: @@ -495,6 +684,16 @@ def table(Result, results, diff_results=None, *, diff_r = diff_table.get(name) lines.append(table_entry(name, r, diff_r)) + # recursive entries + if name in table and depth > 1: + recurse(table[name].children, + depth-1, + {id(r)}, + ("|-> ", + "'-> ", + "| ", + " ")) + # total, unless we're comparing if not (compare and not percent and not diff): r = next(iter(fold(Result, results, by=[])), None) @@ -534,6 +733,12 @@ def main(obj_paths, *, defines=[], sort=None, **args): + # figure out depth + if args.get('depth') is None: + args['depth'] = mt.inf if args.get('hot') else 1 + elif args.get('depth') == 0: + args['depth'] = mt.inf + # find sizes if not args.get('use', None): results = collect(obj_paths, **args) @@ -721,6 +926,18 @@ if __name__ == "__main__": '--internal', action='store_true', help="Also show structs in .c files.") + parser.add_argument( + '-z', '--depth', + nargs='?', + type=lambda x: int(x, 0), + const=0, + help="Depth of function calls to show. 0 shows all calls unless " + "we find a cycle. Defaults to 0.") + parser.add_argument( + '-t', '--hot', + nargs='?', + action='append', + help="Show only the hot path for each function call.") parser.add_argument( '--objdump-path', type=lambda x: x.split(),