Added recursive results to perf.py

This adds -P/--propagate and -Z/--depth to perf.py for showing recursive
results, making it easy to narrow down on where spikes in performance
come from.

This ended up being a bit different from stack.py's recursive results,
as we end up with different (diminishing) numbers as we descend.
This commit is contained in:
Christopher Haster
2022-10-08 20:49:44 -05:00
parent 490e1c4616
commit df283aeb48
8 changed files with 680 additions and 843 deletions

View File

@@ -28,6 +28,8 @@ import subprocess as sp
import tempfile
import zipfile
# TODO support non-zip perf results?
PERF_PATHS = ['*.perf']
PERF_TOOL = ['perf']
@@ -118,61 +120,31 @@ class Int(co.namedtuple('Int', 'x')):
# perf results
class PerfResult(co.namedtuple('PerfResult', [
'file', 'function', 'line',
'self_cycles',
'self_bmisses', 'self_branches',
'self_cmisses', 'self_caches',
'cycles',
'bmisses', 'branches',
'cmisses', 'caches',
'children', 'parents'])):
'cycles', 'bmisses', 'branches', 'cmisses', 'caches',
'children'])):
_by = ['file', 'function', 'line']
_fields = [
'self_cycles',
'self_bmisses', 'self_branches',
'self_cmisses', 'self_caches',
'cycles',
'bmisses', 'branches',
'cmisses', 'caches']
_fields = ['cycles', 'bmisses', 'branches', 'cmisses', 'caches']
_types = {
'self_cycles': Int,
'self_bmisses': Int, 'self_branches': Int,
'self_cmisses': Int, 'self_caches': Int,
'cycles': Int,
'bmisses': Int, 'branches': Int,
'cmisses': Int, 'caches': Int}
__slots__ = ()
def __new__(cls, file='', function='', line=0,
self_cycles=0,
self_bmisses=0, self_branches=0,
self_cmisses=0, self_caches=0,
cycles=0,
bmisses=0, branches=0,
cmisses=0, caches=0,
children=set(), parents=set()):
cycles=0, bmisses=0, branches=0, cmisses=0, caches=0,
children=[]):
return super().__new__(cls, file, function, int(Int(line)),
Int(self_cycles),
Int(self_bmisses), Int(self_branches),
Int(self_cmisses), Int(self_caches),
Int(cycles),
Int(bmisses), Int(branches),
Int(cmisses), Int(caches),
children, parents)
Int(cycles), Int(bmisses), Int(branches), Int(cmisses), Int(caches),
children)
def __add__(self, other):
return PerfResult(self.file, self.function, self.line,
self.self_cycles + other.self_cycles,
self.self_bmisses + other.self_bmisses,
self.self_branches + other.self_branches,
self.self_cmisses + other.self_cmisses,
self.self_caches + other.self_caches,
self.cycles + other.cycles,
self.bmisses + other.bmisses,
self.branches + other.branches,
self.cmisses + other.cmisses,
self.caches + other.caches,
self.children | other.children,
self.parents | other.parents)
self.children + other.children)
def openio(path, mode='r'):
@@ -245,7 +217,8 @@ def record(command, *,
def collect_decompressed(path, *,
perf_tool=PERF_TOOL,
everything=False,
depth=0,
propagate=0,
depth=1,
**args):
sample_pattern = re.compile(
'(?P<comm>\w+)'
@@ -278,10 +251,27 @@ def collect_decompressed(path, *,
close_fds=False)
last_filtered = False
last_has_frame = False
last_event = ''
last_period = 0
results = co.defaultdict(lambda: co.defaultdict(lambda: (0, 0)))
last_stack = []
results = {}
def commit():
# tail-recursively propagate measurements
for i in range(len(last_stack)):
results_ = results
for j in reversed(range(i+1)):
if i+1-j > depth:
break
# propagate
name = last_stack[j]
if name not in results_:
results_[name] = (co.defaultdict(lambda: 0), {})
results_[name][0][last_event] += last_period
# recurse
results_ = results_[name][1]
for line in proc.stdout:
# we need to process a lot of data, so wait to use regex as late
@@ -291,10 +281,12 @@ def collect_decompressed(path, *,
if not line.startswith('\t'):
m = sample_pattern.match(line)
if m:
if last_stack:
commit()
last_event = m.group('event')
last_filtered = last_event in events
last_period = int(m.group('period'), 0)
last_has_frame = False
last_stack = []
elif last_filtered:
m = frame_pattern.match(line)
if m:
@@ -305,20 +297,16 @@ def collect_decompressed(path, *,
or not m.group('sym')[:1].isalpha()):
continue
name = (
last_stack.append((
m.group('dso'),
m.group('sym'),
int(m.group('addr'), 16))
self, total = results[name][last_event]
if not last_has_frame:
results[name][last_event] = (
self + last_period,
total + last_period)
last_has_frame = True
else:
results[name][last_event] = (
self,
total + last_period)
int(m.group('addr'), 16)))
# stop propogating?
if propagate and len(last_stack) >= propagate:
last_filtered = False
if last_stack:
commit()
proc.wait()
if proc.returncode != 0:
@@ -328,14 +316,15 @@ def collect_decompressed(path, *,
sys.exit(-1)
# rearrange results into result type
results_ = []
for name, r in results.items():
results_.append(PerfResult(*name,
**{'self_'+events[e]: s for e, (s, _) in r.items()},
**{ events[e]: t for e, (_, t) in r.items()}))
results = results_
def to_results(results):
results_ = []
for name, (r, children) in results.items():
results_.append(PerfResult(*name,
**{events[k]: v for k, v in r.items()},
children=to_results(children)))
return results_
return results
return to_results(results)
def collect_job(path, i, **args):
# decompress into a temporary file, this is to work around
@@ -567,38 +556,46 @@ def collect(paths, *,
default=0)
# then try to map addrs -> file+line
for r in results_:
addr = r.line + delta
i = bisect.bisect(line_at, addr, key=lambda x: x[0])
if i > 0:
_, file, line = line_at[i-1]
else:
file, line = re.sub('(\.o)?$', '.c', r.file, 1), 0
#
# note we need to do this recursively
def remap(results):
results_ = []
for r in results:
addr = r.line + delta
i = bisect.bisect(line_at, addr, key=lambda x: x[0])
if i > 0:
_, file, line = line_at[i-1]
else:
file, line = re.sub('(\.o)?$', '.c', r.file, 1), 0
# ignore filtered sources
if sources is not None:
if not any(
os.path.abspath(file) == os.path.abspath(s)
for s in sources):
continue
else:
# default to only cwd
if not everything and not os.path.commonpath([
# ignore filtered sources
if sources is not None:
if not any(
os.path.abspath(file) == os.path.abspath(s)
for s in sources):
continue
else:
# default to only cwd
if not everything and not os.path.commonpath([
os.getcwd(),
os.path.abspath(file)]) == os.getcwd():
continue
# simplify path
if os.path.commonpath([
os.getcwd(),
os.path.abspath(file)]) == os.getcwd():
continue
file = os.path.relpath(file)
else:
file = os.path.abspath(file)
# simplify path
if os.path.commonpath([
os.getcwd(),
os.path.abspath(file)]) == os.getcwd():
file = os.path.relpath(file)
else:
file = os.path.abspath(file)
function, *_ = r.function.split('+', 1)
results_.append(r._replace(
file=file, function=function, line=line,
children=remap(r.children)))
return results_
function, *_ = r.function.split('+', 1)
results.append(PerfResult(file, function, line,
**{k: getattr(r, k) for k in PerfResult._fields}))
results.extend(remap(results_))
return results
@@ -636,6 +633,15 @@ def fold(Result, results, *,
for name, rs in folding.items():
folded.append(sum(rs[1:], start=rs[0]))
# fold recursively
folded_ = []
for r in folded:
folded_.append(r._replace(children=fold(
Result, r.children,
by=by,
defines=defines)))
folded = folded_
return folded
def table(Result, results, diff_results=None, *,
@@ -645,6 +651,7 @@ def table(Result, results, diff_results=None, *,
summary=False,
all=False,
percent=False,
depth=1,
**_):
all_, all = all, __builtins__.all
@@ -683,13 +690,12 @@ def table(Result, results, diff_results=None, *,
if getattr(table.get(n), k, None) is not None else (),
reverse=reverse ^ (not k or k in Result._fields))
# build up our lines
lines = []
# header
line = []
line.append('%s%s' % (
header = []
header.append('%s%s' % (
','.join(by),
' (%d added, %d removed)' % (
sum(1 for n in table if n not in diff_table),
@@ -698,129 +704,95 @@ def table(Result, results, diff_results=None, *,
if not summary else '')
if diff_results is None:
for k in fields:
line.append(k)
header.append(k)
elif percent:
for k in fields:
line.append(k)
header.append(k)
else:
for k in fields:
line.append('o'+k)
header.append('o'+k)
for k in fields:
line.append('n'+k)
header.append('n'+k)
for k in fields:
line.append('d'+k)
line.append('')
lines.append(line)
header.append('d'+k)
header.append('')
lines.append(header)
def table_entry(name, r, diff_r=None, ratios=[]):
entry = []
entry.append(name)
if diff_results is None:
for k in fields:
entry.append(getattr(r, k).table()
if getattr(r, k, None) is not None
else types[k].none)
elif percent:
for k in fields:
entry.append(getattr(r, k).diff_table()
if getattr(r, k, None) is not None
else types[k].diff_none)
else:
for k in fields:
entry.append(getattr(diff_r, k).diff_table()
if getattr(diff_r, k, None) is not None
else types[k].diff_none)
for k in fields:
entry.append(getattr(r, k).diff_table()
if getattr(r, k, None) is not None
else types[k].diff_none)
for k in fields:
entry.append(types[k].diff_diff(
getattr(r, k, None),
getattr(diff_r, k, None)))
if diff_results is None:
entry.append('')
elif percent:
entry.append(' (%s)' % ', '.join(
'+∞%' if t == +m.inf
else '-∞%' if t == -m.inf
else '%+.1f%%' % (100*t)
for t in ratios))
else:
entry.append(' (%s)' % ', '.join(
'+∞%' if t == +m.inf
else '-∞%' if t == -m.inf
else '%+.1f%%' % (100*t)
for t in ratios
if t)
if any(ratios) else '')
return entry
# entries
if not summary:
for name in names:
r = table.get(name)
if diff_results is not None:
if diff_results is None:
diff_r = None
ratios = None
else:
diff_r = diff_table.get(name)
ratios = [
types[k].ratio(
getattr(r, k, None),
getattr(diff_r, k, None))
for k in fields]
if not any(ratios) and not all_:
if not all_ and not any(ratios):
continue
line = []
line.append(name)
if diff_results is None:
for k in fields:
line.append(getattr(r, k).table()
if getattr(r, k, None) is not None
else types[k].none)
elif percent:
for k in fields:
line.append(getattr(r, k).diff_table()
if getattr(r, k, None) is not None
else types[k].diff_none)
else:
for k in fields:
line.append(getattr(diff_r, k).diff_table()
if getattr(diff_r, k, None) is not None
else types[k].diff_none)
for k in fields:
line.append(getattr(r, k).diff_table()
if getattr(r, k, None) is not None
else types[k].diff_none)
for k in fields:
line.append(types[k].diff_diff(
getattr(r, k, None),
getattr(diff_r, k, None)))
if diff_results is None:
line.append('')
elif percent:
line.append(' (%s)' % ', '.join(
'+∞%' if t == +m.inf
else '-∞%' if t == -m.inf
else '%+.1f%%' % (100*t)
for t in ratios))
else:
line.append(' (%s)' % ', '.join(
'+∞%' if t == +m.inf
else '-∞%' if t == -m.inf
else '%+.1f%%' % (100*t)
for t in ratios
if t)
if any(ratios) else '')
lines.append(line)
lines.append(table_entry(name, r, diff_r, ratios))
# total
r = next(iter(fold(Result, results, by=[])), None)
if diff_results is not None:
if diff_results is None:
diff_r = None
ratios = None
else:
diff_r = next(iter(fold(Result, diff_results, by=[])), None)
ratios = [
types[k].ratio(
getattr(r, k, None),
getattr(diff_r, k, None))
for k in fields]
line = []
line.append('TOTAL')
if diff_results is None:
for k in fields:
line.append(getattr(r, k).table()
if getattr(r, k, None) is not None
else types[k].none)
elif percent:
for k in fields:
line.append(getattr(r, k).diff_table()
if getattr(r, k, None) is not None
else types[k].diff_none)
else:
for k in fields:
line.append(getattr(diff_r, k).diff_table()
if getattr(diff_r, k, None) is not None
else types[k].diff_none)
for k in fields:
line.append(getattr(r, k).diff_table()
if getattr(r, k, None) is not None
else types[k].diff_none)
for k in fields:
line.append(types[k].diff_diff(
getattr(r, k, None),
getattr(diff_r, k, None)))
if diff_results is None:
line.append('')
elif percent:
line.append(' (%s)' % ', '.join(
'+∞%' if t == +m.inf
else '-∞%' if t == -m.inf
else '%+.1f%%' % (100*t)
for t in ratios))
else:
line.append(' (%s)' % ', '.join(
'+∞%' if t == +m.inf
else '-∞%' if t == -m.inf
else '%+.1f%%' % (100*t)
for t in ratios
if t)
if any(ratios) else '')
lines.append(line)
lines.append(table_entry('TOTAL', r, diff_r, ratios))
# find the best widths, note that column 0 contains the names and column -1
# the ratios, so those are handled a bit differently
@@ -830,13 +802,83 @@ def table(Result, results, diff_results=None, *,
it.chain([23], it.repeat(7)),
range(len(lines[0])-1))]
# print our table
for line in lines:
print('%-*s %s%s' % (
widths[0], line[0],
' '.join('%*s' % (w, x)
for w, x in zip(widths[1:], line[1:-1])),
line[-1]))
# adjust the name width based on the expected call depth, though
# note this doesn't really work with unbounded recursion
if not summary and not m.isinf(depth):
widths[0] += 4*(depth-1)
# print the tree recursively
print('%-*s %s%s' % (
widths[0], lines[0][0],
' '.join('%*s' % (w, x)
for w, x in zip(widths[1:], lines[0][1:-1])),
lines[0][-1]))
if not summary:
def recurse(results_, depth_, prefixes=('', '', '', '')):
# rebuild our tables at each layer
table_ = {
','.join(str(getattr(r, k) or '') for k in by): r
for r in results_}
names_ = list(table_.keys())
# sort again at each layer, keep in mind the numbers are
# changing as we descend
names_.sort()
if sort:
for k, reverse in reversed(sort):
names_.sort(key=lambda n: (getattr(table_[n], k),)
if getattr(table_.get(n), k, None) is not None else (),
reverse=reverse ^ (not k or k in Result._fields))
for i, name in enumerate(names_):
r = table_[name]
is_last = (i == len(names_)-1)
print('%s%-*s %s' % (
prefixes[0+is_last],
widths[0] - (
len(prefixes[0+is_last])
if not m.isinf(depth) else 0),
name,
' '.join('%*s' % (w, x)
for w, x in zip(
widths[1:],
table_entry(name, r)[1:]))))
# recurse?
if depth_ > 1:
recurse(
r.children,
depth_-1,
(prefixes[2+is_last] + "|-> ",
prefixes[2+is_last] + "'-> ",
prefixes[2+is_last] + "| ",
prefixes[2+is_last] + " "))
# we have enough going on with diffing to make the top layer
# a special case
for name, line in zip(names, lines[1:-1]):
print('%-*s %s%s' % (
widths[0], line[0],
' '.join('%*s' % (w, x)
for w, x in zip(widths[1:], line[1:-1])),
line[-1]))
if name in table and depth > 1:
recurse(
table[name].children,
depth-1,
("|-> ",
"'-> ",
"| ",
" "))
print('%-*s %s%s' % (
widths[0], lines[-1][0],
' '.join('%*s' % (w, x)
for w, x in zip(widths[1:], lines[-1][1:-1])),
lines[-1][-1]))
def annotate(Result, results, *,
@@ -855,11 +897,11 @@ def annotate(Result, results, *,
t0, t1 = min(t0, t1), max(t0, t1)
if not branches and not caches:
tk = 'self_cycles'
tk = 'cycles'
elif branches:
tk = 'self_bmisses'
tk = 'bmisses'
else:
tk = 'self_cmisses'
tk = 'cmisses'
# find max cycles
max_ = max(it.chain((float(getattr(r, tk)) for r in results), [1]))
@@ -913,23 +955,19 @@ def annotate(Result, results, *,
r = table.get(i+1)
if r is not None and (
float(r.self_cycles) > 0
float(r.cycles) > 0
if not branches and not caches
else float(r.self_bmisses) > 0
or float(r.self_branches) > 0
else float(r.bmisses) > 0 or float(r.branches) > 0
if branches
else float(r.self_cmisses) > 0
or float(r.self_caches) > 0):
else float(r.cmisses) > 0 or float(r.caches) > 0):
line = '%-*s // %s' % (
args['width'],
line,
'%s cycles' % r.self_cycles
'%s cycles' % r.cycles
if not branches and not caches
else '%s bmisses, %s branches' % (
r.self_bmisses, r.self_branches)
else '%s bmisses, %s branches' % (r.bmisses, r.branches)
if branches
else '%s cmisses, %s caches' % (
r.self_cmisses, r.self_caches))
else '%s cmisses, %s caches' % (r.cmisses, r.caches))
if args['color']:
if float(getattr(r, tk)) / max_ >= t1:
@@ -948,8 +986,6 @@ def report(perf_paths, *,
self=False,
branches=False,
caches=False,
tree=False,
depth=None,
**args):
# figure out what color should be
if args.get('color') == 'auto':
@@ -959,11 +995,8 @@ def report(perf_paths, *,
else:
args['color'] = False
# it doesn't really make sense to not have a depth with tree,
# so assume depth=inf if tree by default
if args.get('depth') is None:
args['depth'] = m.inf if tree else 1
elif args.get('depth') == 0:
# depth of 0 == m.inf
if args.get('depth') == 0:
args['depth'] = m.inf
# find sizes
@@ -1055,12 +1088,10 @@ def report(perf_paths, *,
table(PerfResult, results,
diff_results if args.get('diff') else None,
by=by if by is not None else ['function'],
fields=fields if fields is not None else [
'self_'+k if self else k
for k in (
['cycles'] if not branches and not caches
else ['bmisses', 'branches'] if branches
else ['cmisses', 'caches'])],
fields=fields if fields is not None
else ['cycles'] if not branches and not caches
else ['bmisses', 'branches'] if branches
else ['cmisses', 'caches'],
sort=sort,
**args)
@@ -1164,10 +1195,6 @@ if __name__ == "__main__":
'--everything',
action='store_true',
help="Include builtin and libc specific symbols.")
parser.add_argument(
'--self',
action='store_true',
help="Show samples before propagation up the call-chain.")
parser.add_argument(
'--branches',
action='store_true',
@@ -1176,6 +1203,18 @@ if __name__ == "__main__":
'--caches',
action='store_true',
help="Show cache accesses and cache misses.")
parser.add_argument(
'-P', '--propagate',
type=lambda x: int(x, 0),
help="Depth to propagate samples up the call-stack. 0 propagates up "
"to the entry point, 1 does no propagation. Defaults to 0.")
parser.add_argument(
'-Z', '--depth',
nargs='?',
type=lambda x: int(x, 0),
const=0,
help="Depth of function calls to show. 0 shows all calls but may not "
"terminate!")
parser.add_argument(
'-A', '--annotate',
action='store_true',