From 512cf5ad4b178f3f62de26defb4634c2258eecc9 Mon Sep 17 00:00:00 2001
From: Christopher Haster <geky@geky.net>
Date: Sun, 1 Dec 2024 16:59:51 -0600
Subject: [PATCH] scripts: Adopted ctx.py-related changes in other result
 scripts

- Adopted higher-level collect data structures:

  - high-level DwarfEntry/DwarfInfo class
  - high-level SymInfo class
  - high-level LineInfo class

  Note these had to be moved out of function scope due to pickling
  issues in perf.py/perfbd.py. These were only function-local to
  minimize scope leak so this fortunately was an easy change.

- Adopted better list-default patterns in Result types:

    def __new__(..., children=None):
        return Result(..., children if children is not None else [])

  A classic python footgun.

- Adopted notes rendering, though this is only used by ctx.py at the
  moment.

- Reverted to sorting children entries, for now.

  Unfortunately there's no easy way to sort the result entries in
  perf.py/perfbd.py before folding. Folding is going to make a mess
  of more complicated children anyways, so another solution is
  needed...

And some other shared miscellany.
---
 scripts/code.py    | 171 ++++++++++++++++---------
 scripts/cov.py     |   3 +
 scripts/csv.py     |   3 +
 scripts/ctx.py     | 311 +++++++++++++++++++++------------------------
 scripts/data.py    | 171 ++++++++++++++++---------
 scripts/perf.py    | 205 +++++++++++++++++++++++-------
 scripts/perfbd.py  | 227 ++++++++++++++++++++++++---------
 scripts/stack.py   |  10 +-
 scripts/structs.py | 214 ++++++++++++++-----------------
 9 files changed, 805 insertions(+), 510 deletions(-)
diff --git a/scripts/code.py b/scripts/code.py
index 119487ab..e65d76e9 100755
--- a/scripts/code.py
+++ b/scripts/code.py
@@ -17,8 +17,8 @@ __import__('sys').path.pop(0)
 
 import collections as co
 import csv
-import difflib
 import itertools as it
+import functools as ft
 import math as mt
 import os
 import re
@@ -168,8 +168,8 @@ def collect_dwarf_files(obj_path, *,
                 '.*\s+(?P<path>[^\s]+)\s*$')
 
     # find source paths
-    dirs = {}
-    files = {}
+    dirs = co.OrderedDict()
+    files = co.OrderedDict()
     # note objdump-path may contain extra args
     cmd = objdump_path + ['--dwarf=rawline', obj_path]
     if args.get('verbose'):
@@ -202,7 +202,7 @@ def collect_dwarf_files(obj_path, *,
         raise sp.CalledProcessError(proc.returncode, proc.args)
 
     # simplify paths
-    files_ = {}
+    files_ = co.OrderedDict()
     for no, file in files.items():
         if os.path.commonpath([
                     os.getcwd(),
@@ -214,34 +214,102 @@ def collect_dwarf_files(obj_path, *,
 
     return files
 
+# each dwarf entry can have attrs and children entries
+class DwarfEntry:
+    def __init__(self, level, off, tag, ats={}, children=[]):
+        self.level = level
+        self.off = off
+        self.tag = tag
+        self.ats = ats or {}
+        self.children = children or []
+
+    def get(self, k, d=None):
+        return self.ats.get(k, d)
+
+    def __getitem__(self, k):
+        return self.ats[k]
+
+    def __contains__(self, k):
+        return k in self.ats
+
+    def __repr__(self):
+        return '%s(%d, 0x%x, %r, %r)' % (
+                self.__class__.__name__,
+                self.level,
+                self.off,
+                self.tag,
+                self.ats)
+
+    @ft.cached_property
+    def name(self):
+        if 'DW_AT_name' in self:
+            name = self['DW_AT_name'].split(':')[-1].strip()
+            # prefix with struct/union/enum
+            if self.tag == 'DW_TAG_structure_type':
+                name = 'struct ' + name
+            elif self.tag == 'DW_TAG_union_type':
+                name = 'union ' + name
+            elif self.tag == 'DW_TAG_enumeration_type':
+                name = 'enum ' + name
+            return name
+        else:
+            return None
+
+# a collection of dwarf entries
+class DwarfInfo:
+    def __init__(self, entries):
+        self.entries = entries
+
+    def get(self, k, d=None):
+        # allow lookup by both offset and dwarf name
+        if not isinstance(k, str):
+            return self.entries.get(k, d)
+
+        else:
+            import difflib
+
+            # organize entries by name
+            if not hasattr(self, '_by_name'):
+                self._by_name = {}
+                for entry in self.entries.values():
+                    if entry.name is not None:
+                        self._by_name[entry.name] = entry
+
+            # exact match? avoid difflib if we can for speed
+            if k in self._by_name:
+                return self._by_name[k]
+            # find the best matching dwarf entry with difflib
+            #
+            # this can be different from the actual symbol because
+            # of optimization passes
+            else:
+                name, entry = max(
+                        self._by_name.items(),
+                        key=lambda entry: difflib.SequenceMatcher(
+                            None, entry[0], k, False).ratio(),
+                        default=(None, None))
+                return entry
+
+    def __getitem__(self, k):
+        v = self.get(k)
+        if v is None:
+            raise KeyError(k)
+        return v
+
+    def __contains__(self, k):
+        return self.get(k) is not None
+
+    def __len__(self):
+        return len(self.entries)
+
+    def __iter__(self):
+        return (v for k, v in self.entries.items())
+
 def collect_dwarf_info(obj_path, filter=None, *,
         objdump_path=OBJDUMP_PATH,
         **args):
     filter_, filter = filter, __builtins__.filter
 
-    # each dwarf entry can have attrs and children entries
-    class DwarfEntry:
-        def __init__(self, level, off, tag, ats={}, children=[]):
-            self.level = level
-            self.off = off
-            self.tag = tag
-            self.ats = ats or {}
-            self.children = children or []
-
-        def __getitem__(self, k):
-            return self.ats[k]
-
-        def __contains__(self, k):
-            return k in self.ats
-
-        def __repr__(self):
-            return '%s(%d, 0x%x, %r, %r)' % (
-                    self.__class__.__name__,
-                    self.level,
-                    self.off,
-                    self.tag,
-                    self.ats)
-
     info_pattern = re.compile(
             '^\s*(?:<(?P<level>[^>]*)>'
                     '\s*<(?P<off>[^>]*)>'
@@ -251,7 +319,7 @@ def collect_dwarf_info(obj_path, filter=None, *,
                     '\s*:(?P<v>.*))\s*$')
 
     # collect dwarf entries
-    entries = co.OrderedDict()
+    info = co.OrderedDict()
     entry = None
     levels = {}
     # note objdump-path may contain extra args
@@ -277,7 +345,7 @@ def collect_dwarf_info(obj_path, filter=None, *,
                 if (entry.level == 1 and (
                         # unless this entry is filtered
                         filter_ is None or entry.tag in filter_)):
-                    entries[entry.off] = entry
+                    info[entry.off] = entry
                 # store entry in parent
                 levels[entry.level] = entry
                 if entry.level-1 in levels:
@@ -290,7 +358,7 @@ def collect_dwarf_info(obj_path, filter=None, *,
     if proc.returncode != 0:
         raise sp.CalledProcessError(proc.returncode, proc.args)
 
-    return entries
+    return DwarfInfo(info)
 
 def collect_sizes(obj_path, *,
         nm_path=NM_PATH,
@@ -331,33 +399,21 @@ def collect(obj_paths, *,
         **args):
     results = []
     for obj_path in obj_paths:
-        # guess the source, if we have debug-info we'll replace this later
-        file = re.sub('(\.o)?$', '.c', obj_path, 1)
-
         # find sizes
         sizes = collect_sizes(obj_path, everything=everything, **args)
 
         # try to figure out the source file if we have debug-info
-        defs = {}
         try:
             files = collect_dwarf_files(obj_path, **args)
             info = collect_dwarf_info(obj_path,
                     filter={'DW_TAG_subprogram', 'DW_TAG_variable'},
                     **args)
 
-            for no, entry in info.items():
-                # skip funcs with no name or no file
-                if ('DW_AT_name' not in entry
-                        or 'DW_AT_decl_file' not in entry):
-                    continue
-                name_ = entry['DW_AT_name'].split(':')[-1].strip()
-                file_ = files.get(int(entry['DW_AT_decl_file']), '?')
-                defs[name_] = file_
-
         except sp.CalledProcessError:
             # do nothing on error, we don't need objdump to work, source
             # files may just be inaccurate
-            pass
+            files = {}
+            info = {}
 
         # map function sizes to debug symbols
         for func, size in sizes.items():
@@ -365,34 +421,28 @@ def collect(obj_paths, *,
             if not everything and func.startswith('__'):
                 continue
 
-            # find best matching debug symbol, this may be slightly different
+            # find best matching dwarf entry, this may be slightly different
             # due to optimizations
-            if defs:
-                # exact match? avoid difflib if we can for speed
-                if func in defs:
-                    file_ = defs[func]
-                else:
-                    _, file_ = max(
-                            defs.items(),
-                            key=lambda d: difflib.SequenceMatcher(None,
-                                d[0],
-                                func, False).ratio())
+            entry = info.get(func)
+
+            if entry is not None and 'DW_AT_decl_file' in entry:
+                file = files.get(int(entry['DW_AT_decl_file']), '?')
             else:
-                file_ = file
+                file = re.sub('(\.o)?$', '.c', obj_path, 1)
 
             # ignore filtered sources
             if sources is not None:
-                if not any(os.path.abspath(file_) == os.path.abspath(s)
+                if not any(os.path.abspath(file) == os.path.abspath(s)
                         for s in sources):
                     continue
             else:
                 # default to only cwd
                 if not everything and not os.path.commonpath([
                         os.getcwd(),
-                        os.path.abspath(file_)]) == os.getcwd():
+                        os.path.abspath(file)]) == os.getcwd():
                     continue
 
-            results.append(CodeResult(file_, func, size))
+            results.append(CodeResult(file, func, size))
 
     return results
 
@@ -624,6 +674,9 @@ def table(Result, results, diff_results=None, *,
                                 types[k].ratio(
                                     getattr(r, k, None),
                                     getattr(diff_r, k, None)))))
+        # append any notes
+        if hasattr(r, 'notes'):
+            entry[-1][1].extend(r.notes)
         return entry
 
     # recursive entry helper, only used by some scripts
diff --git a/scripts/cov.py b/scripts/cov.py
index 8e245ec5..730c7ff8 100755
--- a/scripts/cov.py
+++ b/scripts/cov.py
@@ -578,6 +578,9 @@ def table(Result, results, diff_results=None, *,
                                 types[k].ratio(
                                     getattr(r, k, None),
                                     getattr(diff_r, k, None)))))
+        # append any notes
+        if hasattr(r, 'notes'):
+            entry[-1][1].extend(r.notes)
         return entry
 
     # recursive entry helper, only used by some scripts
diff --git a/scripts/csv.py b/scripts/csv.py
index 866f1e5a..582f0d70 100755
--- a/scripts/csv.py
+++ b/scripts/csv.py
@@ -1590,6 +1590,9 @@ def table(Result, results, diff_results=None, *,
                                 types[k].ratio(
                                     getattr(r, k, None),
                                     getattr(diff_r, k, None)))))
+        # append any notes
+        if hasattr(r, 'notes'):
+            entry[-1][1].extend(r.notes)
         return entry
 
     # recursive entry helper, only used by some scripts
diff --git a/scripts/ctx.py b/scripts/ctx.py
index 428aa877..2e82a21d 100755
--- a/scripts/ctx.py
+++ b/scripts/ctx.py
@@ -160,61 +160,67 @@ def openio(path, mode='r', buffering=-1):
     else:
         return open(path, mode, buffering)
 
-def collect_syms(obj_path, global_only=False, *,
-        objdump_path=None,
-        **args):
-    class SymInfo:
-        def __init__(self, syms):
-            self.syms = syms
+class SymInfo:
+    def __init__(self, syms):
+        self.syms = syms
 
-        def get(self, k, d=None):
-            # allow lookup by both symbol and address
-            if isinstance(k, str):
-                # organize by symbol, note multiple symbols can share a name
-                if not hasattr(self, '_by_sym'):
-                    self._by_sym = {}
-                    for sym, addr, size in self.syms:
-                        self._by_sym[sym] = (addr, size)
-                return self._by_sym.get(k, d)
+    def get(self, k, d=None):
+        # allow lookup by both symbol and address
+        if isinstance(k, str):
+            # organize by symbol, note multiple symbols can share a name
+            if not hasattr(self, '_by_sym'):
+                by_sym = {}
+                for sym, addr, size in self.syms:
+                    if sym not in by_sym:
+                        by_sym[sym] = []
+                    if (addr, size) not in by_sym[sym]:
+                        by_sym[sym].append((addr, size))
+                self._by_sym = by_sym
+            return self._by_sym.get(k, d)
 
+        else:
+            import bisect
+
+            # organize by address
+            if not hasattr(self, '_by_addr'):
+                # sort and keep largest/first when duplicates
+                syms = self.syms.copy()
+                syms.sort(key=lambda x: (x[1], -x[2], x[0]))
+
+                by_addr = []
+                for name, addr, size in syms:
+                    if (len(by_addr) == 0
+                            or by_addr[-1][0] != addr):
+                        by_addr.append((name, addr, size))
+                self._by_addr = by_addr
+
+            # find sym by range
+            i = bisect.bisect(self._by_addr, k,
+                    key=lambda x: x[1])
+            # check that we're actually in this sym's size
+            if i > 0 and k < self._by_addr[i-1][1]+self._by_addr[i-1][2]:
+                return self._by_addr[i-1][0]
             else:
-                import bisect
+                return d
 
-                # organize by address
-                if not hasattr(self, '_by_addr'):
-                    # sort and keep largest/first when duplicates
-                    syms = self.syms.copy()
-                    syms.sort(key=lambda x: (x[1], -x[2], x[0]))
+    def __getitem__(self, k):
+        v = self.get(k)
+        if v is None:
+            raise KeyError(k)
+        return v
 
-                    self._by_addr = []
-                    for name, addr, size in syms:
-                        if (len(self._by_addr) == 0
-                                or self._by_addr[-1][0] != addr):
-                            self._by_addr.append((name, addr, size))
+    def __contains__(self, k):
+        return self.get(k) is not None
 
-                # find sym by range
-                i = bisect.bisect(self._by_addr, k, key=lambda x: x[1])
-                # check that we're actually in this sym's size
-                if i > 0 and k < self._by_addr[i-1][1]+self._by_addr[i-1][2]:
-                    return self._by_addr[i-1]
-                else:
-                    return None
+    def __len__(self):
+        return len(self.syms)
 
-        def __getitem__(self, k):
-            v = self.get(k)
-            if v is None:
-                raise KeyError(k)
-            return v
-
-        def __contains__(self, k):
-            return self.get(k) is not None
-
-        def __len__(self):
-            return len(self.syms)
-
-        def __iter__(self):
-            return iter(self.syms)
+    def __iter__(self):
+        return iter(self.syms)
 
+def collect_syms(obj_path, global_only=False, *,
+        objdump_path=OBJDUMP_PATH,
+        **args):
     symbol_pattern = re.compile(
             '^(?P<addr>[0-9a-fA-F]+)'
                 ' (?P<scope>.).*'
@@ -260,28 +266,6 @@ def collect_syms(obj_path, global_only=False, *,
 def collect_dwarf_files(obj_path, *,
         objdump_path=OBJDUMP_PATH,
         **args):
-    class FileInfo:
-        def __init__(self, files):
-            self.files = files
-
-        def get(self, k, d=None):
-            return self.files.get(k, d)
-
-        def __getitem__(self, k):
-            v = self.get(k)
-            if v is None:
-                raise KeyError(k)
-            return v
-
-        def __contains__(self, k):
-            return self.get(k) is not None
-
-        def __len__(self):
-            return len(self.files)
-
-        def __iter__(self):
-            return (v for k, v in self.files.items())
-
     line_pattern = re.compile(
             '^\s*(?P<no>[0-9]+)'
                 '(?:\s+(?P<dir>[0-9]+))?'
@@ -322,7 +306,7 @@ def collect_dwarf_files(obj_path, *,
         raise sp.CalledProcessError(proc.returncode, proc.args)
 
     # simplify paths
-    files_ = {}
+    files_ = co.OrderedDict()
     for no, file in files.items():
         if os.path.commonpath([
                     os.getcwd(),
@@ -332,104 +316,104 @@ def collect_dwarf_files(obj_path, *,
             files_[no] = os.path.abspath(file)
     files = files_
 
-    return FileInfo(files)
+    return files
+
+# each dwarf entry can have attrs and children entries
+class DwarfEntry:
+    def __init__(self, level, off, tag, ats={}, children=[]):
+        self.level = level
+        self.off = off
+        self.tag = tag
+        self.ats = ats or {}
+        self.children = children or []
+
+    def get(self, k, d=None):
+        return self.ats.get(k, d)
+
+    def __getitem__(self, k):
+        return self.ats[k]
+
+    def __contains__(self, k):
+        return k in self.ats
+
+    def __repr__(self):
+        return '%s(%d, 0x%x, %r, %r)' % (
+                self.__class__.__name__,
+                self.level,
+                self.off,
+                self.tag,
+                self.ats)
+
+    @ft.cached_property
+    def name(self):
+        if 'DW_AT_name' in self:
+            name = self['DW_AT_name'].split(':')[-1].strip()
+            # prefix with struct/union/enum
+            if self.tag == 'DW_TAG_structure_type':
+                name = 'struct ' + name
+            elif self.tag == 'DW_TAG_union_type':
+                name = 'union ' + name
+            elif self.tag == 'DW_TAG_enumeration_type':
+                name = 'enum ' + name
+            return name
+        else:
+            return None
+
+# a collection of dwarf entries
+class DwarfInfo:
+    def __init__(self, entries):
+        self.entries = entries
+
+    def get(self, k, d=None):
+        # allow lookup by both offset and dwarf name
+        if not isinstance(k, str):
+            return self.entries.get(k, d)
+
+        else:
+            import difflib
+
+            # organize entries by name
+            if not hasattr(self, '_by_name'):
+                self._by_name = {}
+                for entry in self.entries.values():
+                    if entry.name is not None:
+                        self._by_name[entry.name] = entry
+
+            # exact match? avoid difflib if we can for speed
+            if k in self._by_name:
+                return self._by_name[k]
+            # find the best matching dwarf entry with difflib
+            #
+            # this can be different from the actual symbol because
+            # of optimization passes
+            else:
+                name, entry = max(
+                        self._by_name.items(),
+                        key=lambda entry: difflib.SequenceMatcher(
+                            None, entry[0], k, False).ratio(),
+                        default=(None, None))
+                return entry
+
+    def __getitem__(self, k):
+        v = self.get(k)
+        if v is None:
+            raise KeyError(k)
+        return v
+
+    def __contains__(self, k):
+        return self.get(k) is not None
+
+    def __len__(self):
+        return len(self.entries)
+
+    def __iter__(self):
+        return (v for k, v in self.entries.items())
 
 def collect_dwarf_info(obj_path, filter=None, *,
         objdump_path=OBJDUMP_PATH,
         **args):
     filter_, filter = filter, __builtins__.filter
 
-    # each dwarf entry can have attrs and children entries
-    class DwarfEntry:
-        def __init__(self, level, off, tag, ats={}, children=[]):
-            self.level = level
-            self.off = off
-            self.tag = tag
-            self.ats = ats or {}
-            self.children = children or []
-
-        def get(self, k, d=None):
-            return self.ats.get(k, d)
-
-        def __getitem__(self, k):
-            return self.ats[k]
-
-        def __contains__(self, k):
-            return k in self.ats
-
-        def __repr__(self):
-            return '%s(%d, 0x%x, %r, %r)' % (
-                    self.__class__.__name__,
-                    self.level,
-                    self.off,
-                    self.tag,
-                    self.ats)
-
-        @ft.cached_property
-        def name(self):
-            if 'DW_AT_name' in self:
-                name = self['DW_AT_name'].split(':')[-1].strip()
-                # prefix with struct/union/enum
-                if self.tag == 'DW_TAG_structure_type':
-                    name = 'struct ' + name
-                elif self.tag == 'DW_TAG_union_type':
-                    name = 'union ' + name
-                elif self.tag == 'DW_TAG_enumeration_type':
-                    name = 'enum ' + name
-                return name
-            else:
-                return None
-
-    # a collection of dwarf entries
-    class DwarfInfo:
-        def __init__(self, entries):
-            self.entries = entries
-
-        def get(self, k, d=None):
-            # allow lookup by both offset and dwarf name
-            if not isinstance(k, str):
-                return self.entries.get(k, d)
-
-            else:
-                import difflib
-
-                # organize entries by name
-                if not hasattr(self, '_by_name'):
-                    self._by_name = {}
-                    for entry in self.entries.values():
-                        if entry.name is not None:
-                            self._by_name[entry.name] = entry
-
-                # exact match? avoid difflib if we can for speed
-                if k in self._by_name:
-                    return self._by_name[k]
-                # find the best matching dwarf entry with difflib
-                #
-                # this can be different from the actual symbol because
-                # of optimization passes
-                else:
-                    name, entry = max(
-                            self._by_name.items(),
-                            key=lambda entry: difflib.SequenceMatcher(
-                                None, entry[0], k, False).ratio(),
-                            default=(None, None))
-                    return entry
-
-        def __getitem__(self, k):
-            v = self.get(k)
-            if v is None:
-                raise KeyError(k)
-            return v
-
-        def __contains__(self, k):
-            return self.get(k) is not None
-
-        def __len__(self):
-            return len(self.entries)
-
-        def __iter__(self):
-            return (v for k, v in self.entries.items())
-
     info_pattern = re.compile(
             '^\s*(?:<(?P<level>[^>]*)>'
                     '\s*<(?P<off>[^>]*)>'
@@ -925,7 +909,8 @@ def table(Result, results, diff_results=None, *,
                     for r in results_}
         names_ = list(table_.keys())
 
-        # only sort the children layer if explicitly requested
+        # sort the children layer
+        names_.sort()
         if sort:
             for k, reverse in reversed(sort):
                 names_.sort(
diff --git a/scripts/data.py b/scripts/data.py
index 1aafac67..3b5fd438 100755
--- a/scripts/data.py
+++ b/scripts/data.py
@@ -17,8 +17,8 @@ __import__('sys').path.pop(0)
 
 import collections as co
 import csv
-import difflib
 import itertools as it
+import functools as ft
 import math as mt
 import os
 import re
@@ -168,8 +168,8 @@ def collect_dwarf_files(obj_path, *,
                 '.*\s+(?P<path>[^\s]+)\s*$')
 
     # find source paths
-    dirs = {}
-    files = {}
+    dirs = co.OrderedDict()
+    files = co.OrderedDict()
     # note objdump-path may contain extra args
     cmd = objdump_path + ['--dwarf=rawline', obj_path]
     if args.get('verbose'):
@@ -202,7 +202,7 @@ def collect_dwarf_files(obj_path, *,
         raise sp.CalledProcessError(proc.returncode, proc.args)
 
     # simplify paths
-    files_ = {}
+    files_ = co.OrderedDict()
     for no, file in files.items():
         if os.path.commonpath([
                     os.getcwd(),
@@ -214,34 +214,102 @@ def collect_dwarf_files(obj_path, *,
 
     return files
 
+# each dwarf entry can have attrs and children entries
+class DwarfEntry:
+    def __init__(self, level, off, tag, ats={}, children=[]):
+        self.level = level
+        self.off = off
+        self.tag = tag
+        self.ats = ats or {}
+        self.children = children or []
+
+    def get(self, k, d=None):
+        return self.ats.get(k, d)
+
+    def __getitem__(self, k):
+        return self.ats[k]
+
+    def __contains__(self, k):
+        return k in self.ats
+
+    def __repr__(self):
+        return '%s(%d, 0x%x, %r, %r)' % (
+                self.__class__.__name__,
+                self.level,
+                self.off,
+                self.tag,
+                self.ats)
+
+    @ft.cached_property
+    def name(self):
+        if 'DW_AT_name' in self:
+            name = self['DW_AT_name'].split(':')[-1].strip()
+            # prefix with struct/union/enum
+            if self.tag == 'DW_TAG_structure_type':
+                name = 'struct ' + name
+            elif self.tag == 'DW_TAG_union_type':
+                name = 'union ' + name
+            elif self.tag == 'DW_TAG_enumeration_type':
+                name = 'enum ' + name
+            return name
+        else:
+            return None
+
+# a collection of dwarf entries
+class DwarfInfo:
+    def __init__(self, entries):
+        self.entries = entries
+
+    def get(self, k, d=None):
+        # allow lookup by both offset and dwarf name
+        if not isinstance(k, str):
+            return self.entries.get(k, d)
+
+        else:
+            import difflib
+
+            # organize entries by name
+            if not hasattr(self, '_by_name'):
+                self._by_name = {}
+                for entry in self.entries.values():
+                    if entry.name is not None:
+                        self._by_name[entry.name] = entry
+
+            # exact match? avoid difflib if we can for speed
+            if k in self._by_name:
+                return self._by_name[k]
+            # find the best matching dwarf entry with difflib
+            #
+            # this can be different from the actual symbol because
+            # of optimization passes
+            else:
+                name, entry = max(
+                        self._by_name.items(),
+                        key=lambda entry: difflib.SequenceMatcher(
+                            None, entry[0], k, False).ratio(),
+                        default=(None, None))
+                return entry
+
+    def __getitem__(self, k):
+        v = self.get(k)
+        if v is None:
+            raise KeyError(k)
+        return v
+
+    def __contains__(self, k):
+        return self.get(k) is not None
+
+    def __len__(self):
+        return len(self.entries)
+
+    def __iter__(self):
+        return (v for k, v in self.entries.items())
+
 def collect_dwarf_info(obj_path, filter=None, *,
         objdump_path=OBJDUMP_PATH,
         **args):
     filter_, filter = filter, __builtins__.filter
 
-    # each dwarf entry can have attrs and children entries
-    class DwarfEntry:
-        def __init__(self, level, off, tag, ats={}, children=[]):
-            self.level = level
-            self.off = off
-            self.tag = tag
-            self.ats = ats or {}
-            self.children = children or []
-
-        def __getitem__(self, k):
-            return self.ats[k]
-
-        def __contains__(self, k):
-            return k in self.ats
-
-        def __repr__(self):
-            return '%s(%d, 0x%x, %r, %r)' % (
-                    self.__class__.__name__,
-                    self.level,
-                    self.off,
-                    self.tag,
-                    self.ats)
-
     info_pattern = re.compile(
             '^\s*(?:<(?P<level>[^>]*)>'
                     '\s*<(?P<off>[^>]*)>'
@@ -251,7 +319,7 @@ def collect_dwarf_info(obj_path, filter=None, *,
                     '\s*:(?P<v>.*))\s*$')
 
     # collect dwarf entries
-    entries = co.OrderedDict()
+    info = co.OrderedDict()
     entry = None
     levels = {}
     # note objdump-path may contain extra args
@@ -277,7 +345,7 @@ def collect_dwarf_info(obj_path, filter=None, *,
                 if (entry.level == 1 and (
                         # unless this entry is filtered
                         filter_ is None or entry.tag in filter_)):
-                    entries[entry.off] = entry
+                    info[entry.off] = entry
                 # store entry in parent
                 levels[entry.level] = entry
                 if entry.level-1 in levels:
@@ -290,7 +358,7 @@ def collect_dwarf_info(obj_path, filter=None, *,
     if proc.returncode != 0:
         raise sp.CalledProcessError(proc.returncode, proc.args)
 
-    return entries
+    return DwarfInfo(info)
 
 def collect_sizes(obj_path, *,
         nm_path=NM_PATH,
@@ -331,33 +399,21 @@ def collect(obj_paths, *,
         **args):
     results = []
     for obj_path in obj_paths:
-        # guess the source, if we have debug-info we'll replace this later
-        file = re.sub('(\.o)?$', '.c', obj_path, 1)
-
         # find sizes
         sizes = collect_sizes(obj_path, everything=everything, **args)
 
         # try to figure out the source file if we have debug-info
-        defs = {}
         try:
             files = collect_dwarf_files(obj_path, **args)
             info = collect_dwarf_info(obj_path,
                     filter={'DW_TAG_subprogram', 'DW_TAG_variable'},
                     **args)
 
-            for no, entry in info.items():
-                # skip funcs with no name or no file
-                if ('DW_AT_name' not in entry
-                        or 'DW_AT_decl_file' not in entry):
-                    continue
-                name_ = entry['DW_AT_name'].split(':')[-1].strip()
-                file_ = files.get(int(entry['DW_AT_decl_file']), '?')
-                defs[name_] = file_
-
         except sp.CalledProcessError:
             # do nothing on error, we don't need objdump to work, source
             # files may just be inaccurate
-            pass
+            files = {}
+            info = {}
 
         # map function sizes to debug symbols
         for func, size in sizes.items():
@@ -365,34 +421,28 @@ def collect(obj_paths, *,
             if not everything and func.startswith('__'):
                 continue
 
-            # find best matching debug symbol, this may be slightly different
+            # find best matching dwarf entry, this may be slightly different
             # due to optimizations
-            if defs:
-                # exact match? avoid difflib if we can for speed
-                if func in defs:
-                    file_ = defs[func]
-                else:
-                    _, file_ = max(
-                            defs.items(),
-                            key=lambda d: difflib.SequenceMatcher(None,
-                                d[0],
-                                func, False).ratio())
+            entry = info.get(func)
+
+            if entry is not None and 'DW_AT_decl_file' in entry:
+                file = files.get(int(entry['DW_AT_decl_file']), '?')
             else:
-                file_ = file
+                file = re.sub('(\.o)?$', '.c', obj_path, 1)
 
             # ignore filtered sources
             if sources is not None:
-                if not any(os.path.abspath(file_) == os.path.abspath(s)
+                if not any(os.path.abspath(file) == os.path.abspath(s)
                         for s in sources):
                     continue
             else:
                 # default to only cwd
                 if not everything and not os.path.commonpath([
                         os.getcwd(),
-                        os.path.abspath(file_)]) == os.getcwd():
+                        os.path.abspath(file)]) == os.getcwd():
                     continue
 
-            results.append(DataResult(file_, func, size))
+            results.append(CodeResult(file, func, size))
 
     return results
 
@@ -624,6 +674,9 @@ def table(Result, results, diff_results=None, *,
                                 types[k].ratio(
                                     getattr(r, k, None),
                                     getattr(diff_r, k, None)))))
+        # append any notes
+        if hasattr(r, 'notes'):
+            entry[-1][1].extend(r.notes)
         return entry
 
     # recursive entry helper, only used by some scripts
diff --git a/scripts/perf.py b/scripts/perf.py
index 4df12e14..f63c8d7f 100755
--- a/scripts/perf.py
+++ b/scripts/perf.py
@@ -155,12 +155,12 @@ class PerfResult(co.namedtuple('PerfResult', [
     __slots__ = ()
     def __new__(cls, file='', function='', line=0,
             cycles=0, bmisses=0, branches=0, cmisses=0, caches=0,
-            children=[]):
+            children=None):
         return super().__new__(cls, file, function, int(RInt(line)),
                 RInt(cycles),
                 RInt(bmisses), RInt(branches),
                 RInt(cmisses), RInt(caches),
-                children)
+                children if children is not None else [])
 
     def __add__(self, other):
         return PerfResult(self.file, self.function, self.line,
@@ -259,19 +259,76 @@ def multiprocessing_cache(f):
 
     return multiprocessing_cache
 
+class SymInfo:
+    def __init__(self, syms):
+        self.syms = syms
+
+    def get(self, k, d=None):
+        # allow lookup by both symbol and address
+        if isinstance(k, str):
+            # organize by symbol, note multiple symbols can share a name
+            if not hasattr(self, '_by_sym'):
+                by_sym = {}
+                for sym, addr, size in self.syms:
+                    if sym not in by_sym:
+                        by_sym[sym] = []
+                    if (addr, size) not in by_sym[sym]:
+                        by_sym[sym].append((addr, size))
+                self._by_sym = by_sym
+            return self._by_sym.get(k, d)
+
+        else:
+            import bisect
+
+            # organize by address
+            if not hasattr(self, '_by_addr'):
+                # sort and keep largest/first when duplicates
+                syms = self.syms.copy()
+                syms.sort(key=lambda x: (x[1], -x[2], x[0]))
+
+                by_addr = []
+                for name, addr, size in syms:
+                    if (len(by_addr) == 0
+                            or by_addr[-1][0] != addr):
+                        by_addr.append((name, addr, size))
+                self._by_addr = by_addr
+
+            # find sym by range
+            i = bisect.bisect(self._by_addr, k,
+                    key=lambda x: x[1])
+            # check that we're actually in this sym's size
+            if i > 0 and k < self._by_addr[i-1][1]+self._by_addr[i-1][2]:
+                return self._by_addr[i-1][0]
+            else:
+                return d
+
+    def __getitem__(self, k):
+        v = self.get(k)
+        if v is None:
+            raise KeyError(k)
+        return v
+
+    def __contains__(self, k):
+        return self.get(k) is not None
+
+    def __len__(self):
+        return len(self.syms)
+
+    def __iter__(self):
+        return iter(self.syms)
+
 @multiprocessing_cache
-def collect_syms(obj_path, *,
-        objdump_path=None,
+def collect_syms(obj_path, global_only=False, *,
+        objdump_path=OBJDUMP_PATH,
         **args):
     symbol_pattern = re.compile(
             '^(?P<addr>[0-9a-fA-F]+)'
-                '.*'
+                ' (?P<scope>.).*'
                 '\s+(?P<size>[0-9a-fA-F]+)'
                 '\s+(?P<name>[^\s]+)\s*$')
 
-    # figure out symbol addresses
-    syms = {}
-    sym_at = []
+    # find symbol addresses and sizes
+    syms = []
     cmd = objdump_path + ['-t', obj_path]
     if args.get('verbose'):
         print(' '.join(shlex.quote(c) for c in cmd))
@@ -284,33 +341,102 @@ def collect_syms(obj_path, *,
         m = symbol_pattern.match(line)
         if m:
             name = m.group('name')
+            scope = m.group('scope')
             addr = int(m.group('addr'), 16)
             size = int(m.group('size'), 16)
+            # skip non-globals?
+            # l => local
+            # g => global
+            # u => unique global
+            #   => neither
+            # ! => local + global
+            if global_only and scope in 'l ':
+                continue
             # ignore zero-sized symbols
             if not size:
                 continue
             # note multiple symbols can share a name
-            if name not in syms:
-                syms[name] = set()
-            syms[name].add((addr, size))
-            sym_at.append((addr, name, size))
+            syms.append((name, addr, size))
     proc.wait()
     if proc.returncode != 0:
         raise sp.CalledProcessError(proc.returncode, proc.args)
 
-    # sort and keep largest/first when duplicates
-    sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
-    sym_at_ = []
-    for addr, name, size in sym_at:
-        if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
-            sym_at_.append((addr, name, size))
-    sym_at = sym_at_
+    return SymInfo(syms)
 
-    return syms, sym_at
+class LineInfo:
+    def __init__(self, lines):
+        self.lines = lines
+
+    def get(self, k, d=None):
+        # allow lookup by both address and file+line tuple
+        if not isinstance(k, tuple):
+            import bisect
+
+            # organize by address
+            if not hasattr(self, '_by_addr'):
+                # sort and keep first when duplicates
+                lines = self.lines.copy()
+                lines.sort(key=lambda x: (x[2], x[0], x[1]))
+
+                by_addr = []
+                for file, line, addr in lines:
+                    if (len(by_addr) == 0
+                            or by_addr[-1][2] != addr):
+                        by_addr.append((file, line, addr))
+                self._by_addr = by_addr
+
+            # find file+line by addr
+            i = bisect.bisect(self._by_addr, k,
+                    key=lambda x: x[2])
+            if i > 0:
+                return self._by_addr[i-1][0], self._by_addr[i-1][1]
+            else:
+                return d
+
+        else:
+            import bisect
+
+            # organize by file+line
+            if not hasattr(self, '_by_line'):
+                # sort and keep first when duplicates
+                lines = self.lines.copy()
+                lines.sort()
+
+                by_line = []
+                for file, line, addr in lines:
+                    if (len(by_line) == 0
+                            or by_line[-1][0] != file
+                            or by_line[-1][1] != line):
+                        by_line.append((file, line, addr))
+                self._by_line = by_line
+
+            # find addr by file+line tuple
+            i = bisect.bisect(self._by_line, k,
+                    key=lambda x: (x[0], x[1]))
+            # make sure file at least matches!
+            if i > 0 and self._by_line[i-1][0] == k[0]:
+                return self._by_line[i-1][2]
+            else:
+                return d
+
+    def __getitem__(self, k):
+        v = self.get(k)
+        if v is None:
+            raise KeyError(k)
+        return v
+
+    def __contains__(self, k):
+        return self.get(k) is not None
+
+    def __len__(self):
+        return len(self.lines)
+
+    def __iter__(self):
+        return iter(self.lines)
 
 @multiprocessing_cache
 def collect_dwarf_lines(obj_path, *,
-        objdump_path=None,
+        objdump_path=OBJDUMP_PATH,
         **args):
     line_pattern = re.compile(
             '^\s*(?:'
@@ -334,9 +460,8 @@ def collect_dwarf_lines(obj_path, *,
     # decodedline seems to have issues with multiple dir/file
     # tables, which is why we need this
     lines = []
-    line_at = []
-    dirs = {}
-    files = {}
+    dirs = co.OrderedDict()
+    files = co.OrderedDict()
     op_file = 1
     op_line = 1
     op_addr = 0
@@ -377,7 +502,6 @@ def collect_dwarf_lines(obj_path, *,
                         or m.group('op_end')):
                     file = os.path.abspath(files.get(op_file, '?'))
                     lines.append((file, op_line, op_addr))
-                    line_at.append((op_addr, file, op_line))
 
                 if m.group('op_end'):
                     op_file = 1
@@ -387,23 +511,7 @@ def collect_dwarf_lines(obj_path, *,
     if proc.returncode != 0:
         raise sp.CalledProcessError(proc.returncode, proc.args)
 
-    # sort and keep first when duplicates
-    lines.sort()
-    lines_ = []
-    for file, line, addr in lines:
-        if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line:
-            lines_.append((file, line, addr))
-    lines = lines_
-
-    # sort and keep first when duplicates
-    line_at.sort()
-    line_at_ = []
-    for addr, file, line in line_at:
-        if len(line_at_) == 0 or line_at_[-1][0] != addr:
-            line_at_.append((addr, file, line))
-    line_at = line_at_
-
-    return lines, line_at
+    return LineInfo(lines)
 
 
 def collect_decompressed(path, *,
@@ -502,8 +610,8 @@ def collect_decompressed(path, *,
                 addr_ = int(m.group('addr'), 16)
 
                 # get the syms/lines for the dso, this is cached
-                syms, sym_at = collect_syms(dso, **args)
-                lines, line_at = collect_dwarf_lines(dso, **args)
+                syms = collect_syms(dso, **args)
+                lines = collect_dwarf_lines(dso, **args)
 
                 # ASLR is tricky, we have symbols+offsets, but static symbols
                 # means we may have multiple options for each symbol.
@@ -541,9 +649,9 @@ def collect_decompressed(path, *,
                         file, line = cached
                     else:
                         # find file+line
-                        i = bisect.bisect(line_at, addr, key=lambda x: x[0])
-                        if i > 0:
-                            _, file, line = line_at[i-1]
+                        line_ = lines.get(addr)
+                        if line_ is not None:
+                            file, line = line_
                         else:
                             file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
 
@@ -872,6 +980,9 @@ def table(Result, results, diff_results=None, *,
                                 types[k].ratio(
                                     getattr(r, k, None),
                                     getattr(diff_r, k, None)))))
+        # append any notes
+        if hasattr(r, 'notes'):
+            entry[-1][1].extend(r.notes)
         return entry
 
     # recursive entry helper, only used by some scripts
diff --git a/scripts/perfbd.py b/scripts/perfbd.py
index 751cda85..88cb941f 100755
--- a/scripts/perfbd.py
+++ b/scripts/perfbd.py
@@ -143,10 +143,10 @@ class PerfBdResult(co.namedtuple('PerfBdResult', [
     __slots__ = ()
     def __new__(cls, file='', function='', line=0,
             readed=0, proged=0, erased=0,
-            children=[]):
+            children=None):
         return super().__new__(cls, file, function, int(RInt(line)),
                 RInt(readed), RInt(proged), RInt(erased),
-                children)
+                children if children is not None else [])
 
     def __add__(self, other):
         return PerfBdResult(self.file, self.function, self.line,
@@ -166,18 +166,75 @@ def openio(path, mode='r', buffering=-1):
     else:
         return open(path, mode, buffering)
 
-def collect_syms(obj_path, *,
-        objdump_path=None,
+class SymInfo:
+    def __init__(self, syms):
+        self.syms = syms
+
+    def get(self, k, d=None):
+        # allow lookup by both symbol and address
+        if isinstance(k, str):
+            # organize by symbol, note multiple symbols can share a name
+            if not hasattr(self, '_by_sym'):
+                by_sym = {}
+                for sym, addr, size in self.syms:
+                    if sym not in by_sym:
+                        by_sym[sym] = []
+                    if (addr, size) not in by_sym[sym]:
+                        by_sym[sym].append((addr, size))
+                self._by_sym = by_sym
+            return self._by_sym.get(k, d)
+
+        else:
+            import bisect
+
+            # organize by address
+            if not hasattr(self, '_by_addr'):
+                # sort and keep largest/first when duplicates
+                syms = self.syms.copy()
+                syms.sort(key=lambda x: (x[1], -x[2], x[0]))
+
+                by_addr = []
+                for name, addr, size in syms:
+                    if (len(by_addr) == 0
+                            or by_addr[-1][0] != addr):
+                        by_addr.append((name, addr, size))
+                self._by_addr = by_addr
+
+            # find sym by range
+            i = bisect.bisect(self._by_addr, k,
+                    key=lambda x: x[1])
+            # check that we're actually in this sym's size
+            if i > 0 and k < self._by_addr[i-1][1]+self._by_addr[i-1][2]:
+                return self._by_addr[i-1][0]
+            else:
+                return d
+
+    def __getitem__(self, k):
+        v = self.get(k)
+        if v is None:
+            raise KeyError(k)
+        return v
+
+    def __contains__(self, k):
+        return self.get(k) is not None
+
+    def __len__(self):
+        return len(self.syms)
+
+    def __iter__(self):
+        return iter(self.syms)
+
+def collect_syms(obj_path, global_only=False, *,
+        objdump_path=OBJDUMP_PATH,
         **args):
     symbol_pattern = re.compile(
             '^(?P<addr>[0-9a-fA-F]+)'
-                '.*'
+                ' (?P<scope>.).*'
                 '\s+(?P<size>[0-9a-fA-F]+)'
                 '\s+(?P<name>[^\s]+)\s*$')
 
-    # figure out symbol addresses
-    syms = {}
-    sym_at = []
+    # find symbol addresses and sizes
+    syms = []
     cmd = objdump_path + ['-t', obj_path]
     if args.get('verbose'):
         print(' '.join(shlex.quote(c) for c in cmd))
@@ -190,32 +247,101 @@ def collect_syms(obj_path, *,
         m = symbol_pattern.match(line)
         if m:
             name = m.group('name')
+            scope = m.group('scope')
             addr = int(m.group('addr'), 16)
             size = int(m.group('size'), 16)
+            # skip non-globals?
+            # l => local
+            # g => global
+            # u => unique global
+            #   => neither
+            # ! => local + global
+            if global_only and scope in 'l ':
+                continue
             # ignore zero-sized symbols
             if not size:
                 continue
             # note multiple symbols can share a name
-            if name not in syms:
-                syms[name] = set()
-            syms[name].add((addr, size))
-            sym_at.append((addr, name, size))
+            syms.append((name, addr, size))
     proc.wait()
     if proc.returncode != 0:
         raise sp.CalledProcessError(proc.returncode, proc.args)
 
-    # sort and keep largest/first when duplicates
-    sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
-    sym_at_ = []
-    for addr, name, size in sym_at:
-        if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
-            sym_at_.append((addr, name, size))
-    sym_at = sym_at_
+    return SymInfo(syms)
 
-    return syms, sym_at
+class LineInfo:
+    def __init__(self, lines):
+        self.lines = lines
+
+    def get(self, k, d=None):
+        # allow lookup by both address and file+line tuple
+        if not isinstance(k, tuple):
+            import bisect
+
+            # organize by address
+            if not hasattr(self, '_by_addr'):
+                # sort and keep first when duplicates
+                lines = self.lines.copy()
+                lines.sort(key=lambda x: (x[2], x[0], x[1]))
+
+                by_addr = []
+                for file, line, addr in lines:
+                    if (len(by_addr) == 0
+                            or by_addr[-1][2] != addr):
+                        by_addr.append((file, line, addr))
+                self._by_addr = by_addr
+
+            # find file+line by addr
+            i = bisect.bisect(self._by_addr, k,
+                    key=lambda x: x[2])
+            if i > 0:
+                return self._by_addr[i-1][0], self._by_addr[i-1][1]
+            else:
+                return d
+
+        else:
+            import bisect
+
+            # organize by file+line
+            if not hasattr(self, '_by_line'):
+                # sort and keep first when duplicates
+                lines = self.lines.copy()
+                lines.sort()
+
+                by_line = []
+                for file, line, addr in lines:
+                    if (len(by_line) == 0
+                            or by_line[-1][0] != file
+                            or by_line[-1][1] != line):
+                        by_line.append((file, line, addr))
+                self._by_line = by_line
+
+            # find addr by file+line tuple
+            i = bisect.bisect(self._by_line, k,
+                    key=lambda x: (x[0], x[1]))
+            # make sure file at least matches!
+            if i > 0 and self._by_line[i-1][0] == k[0]:
+                return self._by_line[i-1][2]
+            else:
+                return d
+
+    def __getitem__(self, k):
+        v = self.get(k)
+        if v is None:
+            raise KeyError(k)
+        return v
+
+    def __contains__(self, k):
+        return self.get(k) is not None
+
+    def __len__(self):
+        return len(self.lines)
+
+    def __iter__(self):
+        return iter(self.lines)
 
 def collect_dwarf_lines(obj_path, *,
-        objdump_path=None,
+        objdump_path=OBJDUMP_PATH,
         **args):
     line_pattern = re.compile(
             '^\s*(?:'
@@ -239,9 +365,8 @@ def collect_dwarf_lines(obj_path, *,
     # decodedline seems to have issues with multiple dir/file
     # tables, which is why we need this
     lines = []
-    line_at = []
-    dirs = {}
-    files = {}
+    dirs = co.OrderedDict()
+    files = co.OrderedDict()
     op_file = 1
     op_line = 1
     op_addr = 0
@@ -282,7 +407,6 @@ def collect_dwarf_lines(obj_path, *,
                         or m.group('op_end')):
                     file = os.path.abspath(files.get(op_file, '?'))
                     lines.append((file, op_line, op_addr))
-                    line_at.append((op_addr, file, op_line))
 
                 if m.group('op_end'):
                     op_file = 1
@@ -292,26 +416,10 @@ def collect_dwarf_lines(obj_path, *,
     if proc.returncode != 0:
         raise sp.CalledProcessError(proc.returncode, proc.args)
 
-    # sort and keep first when duplicates
-    lines.sort()
-    lines_ = []
-    for file, line, addr in lines:
-        if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line:
-            lines_.append((file, line, addr))
-    lines = lines_
-
-    # sort and keep first when duplicates
-    line_at.sort()
-    line_at_ = []
-    for addr, file, line in line_at:
-        if len(line_at_) == 0 or line_at_[-1][0] != addr:
-            line_at_.append((addr, file, line))
-    line_at = line_at_
-
-    return lines, line_at
+    return LineInfo(lines)
 
 
-def collect_job(path, start, stop, syms, sym_at, lines, line_at, *,
+def collect_job(path, start, stop, syms, lines, *,
         sources=None,
         everything=False,
         propagate=0,
@@ -465,10 +573,9 @@ def collect_job(path, start, stop, syms, sym_at, lines, line_at, *,
                     # the first stack frame, so we can use that as a point
                     # of reference
                     if last_delta is None:
-                        i = bisect.bisect(lines, (last_file, last_line),
-                                key=lambda x: (x[0], x[1]))
-                        if i > 0:
-                            last_delta = lines[i-1][2] - addr_
+                        addr__ = lines.get((last_file, last_line))
+                        if addr__ is not None:
+                            last_delta = addr__ - addr_
                         else:
                             # can't reverse ASLR, give up on backtrace
                             commit()
@@ -486,11 +593,8 @@ def collect_job(path, start, stop, syms, sym_at, lines, line_at, *,
                         file, sym, line = cached
                     else:
                         # find sym
-                        i = bisect.bisect(sym_at, addr, key=lambda x: x[0])
-                        # check that we're actually in the sym's size
-                        if i > 0 and addr < sym_at[i-1][0] + sym_at[i-1][2]:
-                            _, sym, _ = sym_at[i-1]
-                        else:
+                        sym = syms.get(addr)
+                        if sym is None:
                             sym = hex(addr)
 
                         # filter out internal/unknown functions
@@ -503,9 +607,9 @@ def collect_job(path, start, stop, syms, sym_at, lines, line_at, *,
                             continue
 
                         # find file+line
-                        i = bisect.bisect(line_at, addr, key=lambda x: x[0])
-                        if i > 0:
-                            _, file, line = line_at[i-1]
+                        line_ = lines.get(addr)
+                        if line_ is not None:
+                            file, line = line_
                         elif len(last_stack) == 0:
                             file, line = last_file, last_line
                         else:
@@ -568,8 +672,8 @@ def collect(obj_path, trace_paths, *,
         jobs = len(os.sched_getaffinity(0))
 
     # find sym/line info to reverse ASLR
-    syms, sym_at = collect_syms(obj_path, **args)
-    lines, line_at = collect_dwarf_lines(obj_path, **args)
+    syms = collect_syms(obj_path, **args)
+    lines = collect_dwarf_lines(obj_path, **args)
 
     if jobs is not None:
         # try to split up files so that even single files can be processed
@@ -596,8 +700,7 @@ def collect(obj_path, trace_paths, *,
             for results_ in p.imap_unordered(
                     starapply,
                     ((collect_job,
-                            (path, start, stop,
-                                syms, sym_at, lines, line_at),
+                            (path, start, stop, syms, lines),
                             args)
                         for path, ranges in zip(trace_paths, trace_ranges)
                         for start, stop in ranges)):
@@ -607,8 +710,7 @@ def collect(obj_path, trace_paths, *,
         results = []
         for path in trace_paths:
             results.extend(collect_job(
-                    path, None, None,
-                    syms, sym_at, lines, line_at,
+                    path, None, None, syms, lines,
                     **args))
 
     return results
@@ -841,6 +943,9 @@ def table(Result, results, diff_results=None, *,
                                 types[k].ratio(
                                     getattr(r, k, None),
                                     getattr(diff_r, k, None)))))
+        # append any notes
+        if hasattr(r, 'notes'):
+            entry[-1][1].extend(r.notes)
         return entry
 
     # recursive entry helper, only used by some scripts
diff --git a/scripts/stack.py b/scripts/stack.py
index fb4ef917..b9d65ed1 100755
--- a/scripts/stack.py
+++ b/scripts/stack.py
@@ -129,12 +129,11 @@ class StackResult(co.namedtuple('StackResult', [
     _types = {'frame': RInt, 'limit': RInt}
 
     __slots__ = ()
-    def __new__(cls, file='', function='',
-            frame=0, limit=0,
-            children=[]):
+    def __new__(cls, file='', function='', frame=0, limit=0,
+            children=None):
         return super().__new__(cls, file, function,
                 RInt(frame), RInt(limit),
-                children)
+                children if children is not None else [])
 
     def __add__(self, other):
         return StackResult(self.file, self.function,
@@ -527,6 +526,9 @@ def table(Result, results, diff_results=None, *,
                                 types[k].ratio(
                                     getattr(r, k, None),
                                     getattr(diff_r, k, None)))))
+        # append any notes
+        if hasattr(r, 'notes'):
+            entry[-1][1].extend(r.notes)
         return entry
 
     # recursive entry helper, only used by some scripts
diff --git a/scripts/structs.py b/scripts/structs.py
index d8f1f32f..b0e68f50 100755
--- a/scripts/structs.py
+++ b/scripts/structs.py
@@ -136,7 +136,8 @@ class StructResult(co.namedtuple('StructResult', [
     _types = {'size': RInt, 'align': RInt}
 
     __slots__ = ()
-    def __new__(cls, file='', struct='', size=0, align=0, children=None):
+    def __new__(cls, file='', struct='', size=0, align=0,
+            children=None):
         return super().__new__(cls, file, struct,
                 RInt(size), RInt(align),
                 children if children is not None else [])
@@ -161,28 +162,6 @@ def openio(path, mode='r', buffering=-1):
 def collect_dwarf_files(obj_path, *,
         objdump_path=OBJDUMP_PATH,
         **args):
-    class FileInfo:
-        def __init__(self, files):
-            self.files = files
-
-        def get(self, k, d=None):
-            return self.files.get(k, d)
-
-        def __getitem__(self, k):
-            v = self.get(k)
-            if v is None:
-                raise KeyError(k)
-            return v
-
-        def __contains__(self, k):
-            return self.get(k) is not None
-
-        def __len__(self):
-            return len(self.files)
-
-        def __iter__(self):
-            return (v for k, v in self.files.items())
-
     line_pattern = re.compile(
             '^\s*(?P<no>[0-9]+)'
                 '(?:\s+(?P<dir>[0-9]+))?'
@@ -223,7 +202,7 @@ def collect_dwarf_files(obj_path, *,
         raise sp.CalledProcessError(proc.returncode, proc.args)
 
     # simplify paths
-    files_ = {}
+    files_ = co.OrderedDict()
     for no, file in files.items():
         if os.path.commonpath([
                     os.getcwd(),
@@ -233,104 +212,104 @@ def collect_dwarf_files(obj_path, *,
             files_[no] = os.path.abspath(file)
     files = files_
 
-    return FileInfo(files)
+    return files
+
+# each dwarf entry can have attrs and children entries
+class DwarfEntry:
+    def __init__(self, level, off, tag, ats={}, children=[]):
+        self.level = level
+        self.off = off
+        self.tag = tag
+        self.ats = ats or {}
+        self.children = children or []
+
+    def get(self, k, d=None):
+        return self.ats.get(k, d)
+
+    def __getitem__(self, k):
+        return self.ats[k]
+
+    def __contains__(self, k):
+        return k in self.ats
+
+    def __repr__(self):
+        return '%s(%d, 0x%x, %r, %r)' % (
+                self.__class__.__name__,
+                self.level,
+                self.off,
+                self.tag,
+                self.ats)
+
+    @ft.cached_property
+    def name(self):
+        if 'DW_AT_name' in self:
+            name = self['DW_AT_name'].split(':')[-1].strip()
+            # prefix with struct/union
+            if self.tag == 'DW_TAG_structure_type':
+                name = 'struct ' + name
+            elif self.tag == 'DW_TAG_union_type':
+                name = 'union ' + name
+            elif self.tag == 'DW_TAG_enumeration_type':
+                name = 'enum ' + name
+            return name
+        else:
+            return None
+
+# a collection of dwarf entries
+class DwarfInfo:
+    def __init__(self, entries):
+        self.entries = entries
+
+    def get(self, k, d=None):
+        # allow lookup by both offset and dwarf name
+        if not isinstance(k, str):
+            return self.entries.get(k, d)
+
+        else:
+            import difflib
+
+            # organize entries by name
+            if not hasattr(self, '_by_name'):
+                self._by_name = {}
+                for entry in self.entries.values():
+                    if entry.name is not None:
+                        self._by_name[entry.name] = entry
+
+            # exact match? avoid difflib if we can for speed
+            if k in self._by_name:
+                return self._by_name[k]
+            # find the best matching dwarf entry with difflib
+            #
+            # this can be different from the actual symbol because
+            # of optimization passes
+            else:
+                name, entry = max(
+                        self._by_name.items(),
+                        key=lambda entry: difflib.SequenceMatcher(
+                            None, entry[0], k, False).ratio(),
+                        default=(None, None))
+                return entry
+
+    def __getitem__(self, k):
+        v = self.get(k)
+        if v is None:
+            raise KeyError(k)
+        return v
+
+    def __contains__(self, k):
+        return self.get(k) is not None
+
+    def __len__(self):
+        return len(self.entries)
+
+    def __iter__(self):
+        return (v for k, v in self.entries.items())
 
 def collect_dwarf_info(obj_path, filter=None, *,
         objdump_path=OBJDUMP_PATH,
         **args):
     filter_, filter = filter, __builtins__.filter
 
-    # each dwarf entry can have attrs and children entries
-    class DwarfEntry:
-        def __init__(self, level, off, tag, ats={}, children=[]):
-            self.level = level
-            self.off = off
-            self.tag = tag
-            self.ats = ats or {}
-            self.children = children or []
-
-        def get(self, k, d=None):
-            return self.ats.get(k, d)
-
-        def __getitem__(self, k):
-            return self.ats[k]
-
-        def __contains__(self, k):
-            return k in self.ats
-
-        def __repr__(self):
-            return '%s(%d, 0x%x, %r, %r)' % (
-                    self.__class__.__name__,
-                    self.level,
-                    self.off,
-                    self.tag,
-                    self.ats)
-
-        @ft.cached_property
-        def name(self):
-            if 'DW_AT_name' in self:
-                name = self['DW_AT_name'].split(':')[-1].strip()
-                # prefix with struct/union
-                if self.tag == 'DW_TAG_structure_type':
-                    name = 'struct ' + name
-                elif self.tag == 'DW_TAG_union_type':
-                    name = 'union ' + name
-                elif self.tag == 'DW_TAG_enumeration_type':
-                    name = 'enum ' + name
-                return name
-            else:
-                return None
-
-    # a collection of dwarf entries
-    class DwarfInfo:
-        def __init__(self, entries):
-            self.entries = entries
-
-        def get(self, k, d=None):
-            # allow lookup by both offset and dwarf name
-            if not isinstance(k, str):
-                return self.entries.get(k, d)
-
-            else:
-                import difflib
-
-                # organize entries by name
-                if not hasattr(self, '_by_name'):
-                    self._by_name = {}
-                    for entry in self.entries.values():
-                        if entry.name is not None:
-                            self._by_name[entry.name] = entry
-
-                # exact match? avoid difflib if we can for speed
-                if k in self._by_name:
-                    return self._by_name[k]
-                # find the best matching dwarf entry with difflib
-                #
-                # this can be different from the actual symbol because
-                # of optimization passes
-                else:
-                    name, entry = max(
-                            self._by_name.items(),
-                            key=lambda entry: difflib.SequenceMatcher(
-                                None, entry[0], k, False).ratio(),
-                            default=(None, None))
-                    return entry
-
-        def __getitem__(self, k):
-            v = self.get(k)
-            if v is None:
-                raise KeyError(k)
-            return v
-
-        def __contains__(self, k):
-            return self.get(k) is not None
-
-        def __len__(self):
-            return len(self.entries)
-
-        def __iter__(self):
-            return (v for k, v in self.entries.items())
-
     info_pattern = re.compile(
             '^\s*(?:<(?P<level>[^>]*)>'
                     '\s*<(?P<off>[^>]*)>'
@@ -797,7 +776,8 @@ def table(Result, results, diff_results=None, *,
                     for r in results_}
         names_ = list(table_.keys())
 
-        # only sort the children layer if explicitly requested
+        # sort the children layer
+        names_.sort()
         if sort:
             for k, reverse in reversed(sort):
                 names_.sort(