mirror of
https://github.com/littlefs-project/littlefs.git
synced 2025-12-06 15:42:43 +00:00
This was a bit broken when r was None. Which is unusual, but happens when rendering added/removed diff results.
1853 lines
63 KiB
Python
Executable File
1853 lines
63 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
#
|
|
# Script to aggregate and report Linux perf results.
|
|
#
|
|
# Example:
|
|
# ./scripts/perf.py --record -obench.perf ./runners/bench_runner
|
|
# ./scripts/perf.py bench.perf -j -Flfs.c -Flfs_util.c -Scycles
|
|
#
|
|
# Copyright (c) 2022, The littlefs authors.
|
|
# SPDX-License-Identifier: BSD-3-Clause
|
|
#
|
|
|
|
# prevent local imports
|
|
if __name__ == "__main__":
|
|
__import__('sys').path.pop(0)
|
|
|
|
import bisect
|
|
import collections as co
|
|
import csv
|
|
import errno
|
|
import fcntl
|
|
import functools as ft
|
|
import io
|
|
import itertools as it
|
|
import math as mt
|
|
import multiprocessing as mp
|
|
import os
|
|
import re
|
|
import shlex
|
|
import shutil
|
|
import subprocess as sp
|
|
import sys
|
|
import tempfile
|
|
import zipfile
|
|
|
|
|
|
# TODO support non-zip perf results?
|
|
|
|
PERF_PATH = ['perf']
|
|
PERF_EVENTS = 'cycles,branch-misses,branches,cache-misses,cache-references'
|
|
PERF_FREQ = 100
|
|
OBJDUMP_PATH = ['objdump']
|
|
THRESHOLD = (0.5, 0.85)
|
|
|
|
|
|
# integer fields
|
|
class RInt(co.namedtuple('RInt', 'x')):
|
|
__slots__ = ()
|
|
def __new__(cls, x=0):
|
|
if isinstance(x, RInt):
|
|
return x
|
|
if isinstance(x, str):
|
|
try:
|
|
x = int(x, 0)
|
|
except ValueError:
|
|
# also accept +-∞ and +-inf
|
|
if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
|
|
x = mt.inf
|
|
elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
|
|
x = -mt.inf
|
|
else:
|
|
raise
|
|
if not (isinstance(x, int) or mt.isinf(x)):
|
|
x = int(x)
|
|
return super().__new__(cls, x)
|
|
|
|
def __repr__(self):
|
|
return '%s(%r)' % (self.__class__.__name__, self.x)
|
|
|
|
def __str__(self):
|
|
if self.x == mt.inf:
|
|
return '∞'
|
|
elif self.x == -mt.inf:
|
|
return '-∞'
|
|
else:
|
|
return str(self.x)
|
|
|
|
def __bool__(self):
|
|
return bool(self.x)
|
|
|
|
def __int__(self):
|
|
assert not mt.isinf(self.x)
|
|
return self.x
|
|
|
|
def __float__(self):
|
|
return float(self.x)
|
|
|
|
none = '%7s' % '-'
|
|
def table(self):
|
|
return '%7s' % (self,)
|
|
|
|
def diff(self, other):
|
|
new = self.x if self else 0
|
|
old = other.x if other else 0
|
|
diff = new - old
|
|
if diff == +mt.inf:
|
|
return '%7s' % '+∞'
|
|
elif diff == -mt.inf:
|
|
return '%7s' % '-∞'
|
|
else:
|
|
return '%+7d' % diff
|
|
|
|
def ratio(self, other):
|
|
new = self.x if self else 0
|
|
old = other.x if other else 0
|
|
if mt.isinf(new) and mt.isinf(old):
|
|
return 0.0
|
|
elif mt.isinf(new):
|
|
return +mt.inf
|
|
elif mt.isinf(old):
|
|
return -mt.inf
|
|
elif not old and not new:
|
|
return 0.0
|
|
elif not old:
|
|
return +mt.inf
|
|
else:
|
|
return (new-old) / old
|
|
|
|
def __pos__(self):
|
|
return self.__class__(+self.x)
|
|
|
|
def __neg__(self):
|
|
return self.__class__(-self.x)
|
|
|
|
def __abs__(self):
|
|
return self.__class__(abs(self.x))
|
|
|
|
def __add__(self, other):
|
|
return self.__class__(self.x + other.x)
|
|
|
|
def __sub__(self, other):
|
|
return self.__class__(self.x - other.x)
|
|
|
|
def __mul__(self, other):
|
|
return self.__class__(self.x * other.x)
|
|
|
|
def __truediv__(self, other):
|
|
if not other:
|
|
if self >= self.__class__(0):
|
|
return self.__class__(+mt.inf)
|
|
else:
|
|
return self.__class__(-mt.inf)
|
|
return self.__class__(self.x // other.x)
|
|
|
|
def __mod__(self, other):
|
|
return self.__class__(self.x % other.x)
|
|
|
|
# perf results
|
|
class PerfResult(co.namedtuple('PerfResult', [
|
|
'z', 'file', 'function', 'line',
|
|
'cycles', 'bmisses', 'branches', 'cmisses', 'caches',
|
|
'children'])):
|
|
_by = ['z', 'file', 'function', 'line']
|
|
_fields = ['cycles', 'bmisses', 'branches', 'cmisses', 'caches']
|
|
_sort = ['cycles', 'bmisses', 'cmisses', 'branches', 'caches']
|
|
_types = {
|
|
'cycles': RInt,
|
|
'bmisses': RInt, 'branches': RInt,
|
|
'cmisses': RInt, 'caches': RInt}
|
|
_children = 'children'
|
|
|
|
__slots__ = ()
|
|
def __new__(cls, z=0, file='', function='', line=0,
|
|
cycles=0, bmisses=0, branches=0, cmisses=0, caches=0,
|
|
children=None):
|
|
return super().__new__(cls, z, file, function, int(RInt(line)),
|
|
RInt(cycles),
|
|
RInt(bmisses), RInt(branches),
|
|
RInt(cmisses), RInt(caches),
|
|
children if children is not None else [])
|
|
|
|
def __add__(self, other):
|
|
return PerfResult(self.z, self.file, self.function, self.line,
|
|
self.cycles + other.cycles,
|
|
self.bmisses + other.bmisses,
|
|
self.branches + other.branches,
|
|
self.cmisses + other.cmisses,
|
|
self.caches + other.caches,
|
|
self.children + other.children)
|
|
|
|
|
|
def openio(path, mode='r', buffering=-1):
|
|
# allow '-' for stdin/stdout
|
|
if path == '-':
|
|
if 'r' in mode:
|
|
return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
|
|
else:
|
|
return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
|
|
else:
|
|
return open(path, mode, buffering)
|
|
|
|
# run perf as a subprocess, storing measurements into a zip file
|
|
def record(command, *,
|
|
output=None,
|
|
perf_freq=PERF_FREQ,
|
|
perf_period=None,
|
|
perf_events=PERF_EVENTS,
|
|
perf_path=PERF_PATH,
|
|
**args):
|
|
# create a temporary file for perf to write to, as far as I can tell
|
|
# this is strictly needed because perf's pipe-mode only works with stdout
|
|
with tempfile.NamedTemporaryFile('rb') as f:
|
|
# figure out our perf invocation
|
|
perf = perf_path + list(filter(None, [
|
|
'record',
|
|
'-F%s' % perf_freq
|
|
if perf_freq is not None
|
|
and perf_period is None else None,
|
|
'-c%s' % perf_period
|
|
if perf_period is not None else None,
|
|
'-B',
|
|
'-g',
|
|
'--all-user',
|
|
'-e%s' % perf_events,
|
|
'-o%s' % f.name]))
|
|
|
|
# run our command
|
|
try:
|
|
if args.get('verbose'):
|
|
print(' '.join(shlex.quote(c) for c in perf + command))
|
|
err = sp.call(perf + command, close_fds=False)
|
|
|
|
except KeyboardInterrupt:
|
|
err = errno.EOWNERDEAD
|
|
|
|
# synchronize access
|
|
z = os.open(output, os.O_RDWR | os.O_CREAT)
|
|
fcntl.flock(z, fcntl.LOCK_EX)
|
|
|
|
# copy measurements into our zip file
|
|
with os.fdopen(z, 'r+b') as z:
|
|
with zipfile.ZipFile(z, 'a',
|
|
compression=zipfile.ZIP_DEFLATED,
|
|
compresslevel=1) as z:
|
|
with z.open('perf.%d' % os.getpid(), 'w') as g:
|
|
shutil.copyfileobj(f, g)
|
|
|
|
# forward the return code
|
|
return err
|
|
|
|
|
|
# try to only process each dso once
|
|
#
|
|
# note this only caches with the non-keyword arguments
|
|
def multiprocessing_cache(f):
|
|
local_cache = {}
|
|
manager = mp.Manager()
|
|
global_cache = manager.dict()
|
|
lock = mp.Lock()
|
|
|
|
def multiprocessing_cache(*args, **kwargs):
|
|
# check local cache?
|
|
if args in local_cache:
|
|
return local_cache[args]
|
|
# check global cache?
|
|
with lock:
|
|
if args in global_cache:
|
|
v = global_cache[args]
|
|
local_cache[args] = v
|
|
return v
|
|
# fall back to calling the function
|
|
v = f(*args, **kwargs)
|
|
global_cache[args] = v
|
|
local_cache[args] = v
|
|
return v
|
|
|
|
return multiprocessing_cache
|
|
|
|
class Sym(co.namedtuple('Sym', [
|
|
'name', 'global_', 'section', 'addr', 'size'])):
|
|
__slots__ = ()
|
|
def __new__(cls, name, global_, section, addr, size):
|
|
return super().__new__(cls, name, global_, section, addr, size)
|
|
|
|
def __repr__(self):
|
|
return '%s(%r, %r, %r, 0x%x, 0x%x)' % (
|
|
self.__class__.__name__,
|
|
self.name,
|
|
self.global_,
|
|
self.section,
|
|
self.addr,
|
|
self.size)
|
|
|
|
class SymInfo:
|
|
def __init__(self, syms):
|
|
self.syms = syms
|
|
|
|
def get(self, k, d=None):
|
|
# allow lookup by both symbol and address
|
|
if isinstance(k, str):
|
|
# organize by symbol, note multiple symbols can share a name
|
|
if not hasattr(self, '_by_sym'):
|
|
by_sym = {}
|
|
for sym in self.syms:
|
|
if sym.name not in by_sym:
|
|
by_sym[sym.name] = []
|
|
if sym not in by_sym[sym.name]:
|
|
by_sym[sym.name].append(sym)
|
|
self._by_sym = by_sym
|
|
|
|
return self._by_sym.get(k, d)
|
|
|
|
else:
|
|
import bisect
|
|
|
|
# organize by address
|
|
if not hasattr(self, '_by_addr'):
|
|
# sort and keep largest/first when duplicates
|
|
syms = self.syms.copy()
|
|
syms.sort(key=lambda x: (x.addr, -x.size))
|
|
|
|
by_addr = []
|
|
for sym in syms:
|
|
if (len(by_addr) == 0
|
|
or by_addr[-1].addr != sym.addr):
|
|
by_addr.append(sym)
|
|
self._by_addr = by_addr
|
|
|
|
# find sym by range
|
|
i = bisect.bisect(self._by_addr, k,
|
|
key=lambda x: x.addr)
|
|
# check that we're actually in this sym's size
|
|
if i > 0 and k < self._by_addr[i-1].addr+self._by_addr[i-1].size:
|
|
return self._by_addr[i-1]
|
|
else:
|
|
return d
|
|
|
|
def __getitem__(self, k):
|
|
v = self.get(k)
|
|
if v is None:
|
|
raise KeyError(k)
|
|
return v
|
|
|
|
def __contains__(self, k):
|
|
return self.get(k) is not None
|
|
|
|
def __bool__(self):
|
|
return bool(self.syms)
|
|
|
|
def __len__(self):
|
|
return len(self.syms)
|
|
|
|
def __iter__(self):
|
|
return iter(self.syms)
|
|
|
|
def globals(self):
|
|
return SymInfo([sym for sym in self.syms
|
|
if sym.global_])
|
|
|
|
def section(self, section):
|
|
return SymInfo([sym for sym in self.syms
|
|
# note we accept prefixes
|
|
if s.startswith(section)])
|
|
|
|
@multiprocessing_cache
|
|
def collect_syms(obj_path, global_=False, sections=None, *,
|
|
objdump_path=OBJDUMP_PATH,
|
|
**args):
|
|
symbol_pattern = re.compile(
|
|
'^(?P<addr>[0-9a-fA-F]+)'
|
|
' (?P<scope>.).*'
|
|
'\s+(?P<section>[^\s]+)'
|
|
'\s+(?P<size>[0-9a-fA-F]+)'
|
|
'\s+(?P<name>[^\s]+)\s*$')
|
|
|
|
# find symbol addresses and sizes
|
|
syms = []
|
|
cmd = objdump_path + ['--syms', obj_path]
|
|
if args.get('verbose'):
|
|
print(' '.join(shlex.quote(c) for c in cmd))
|
|
proc = sp.Popen(cmd,
|
|
stdout=sp.PIPE,
|
|
universal_newlines=True,
|
|
errors='replace',
|
|
close_fds=False)
|
|
for line in proc.stdout:
|
|
m = symbol_pattern.match(line)
|
|
if m:
|
|
name = m.group('name')
|
|
scope = m.group('scope')
|
|
section = m.group('section')
|
|
addr = int(m.group('addr'), 16)
|
|
size = int(m.group('size'), 16)
|
|
# skip non-globals?
|
|
# l => local
|
|
# g => global
|
|
# u => unique global
|
|
# => neither
|
|
# ! => local + global
|
|
global__ = scope not in 'l '
|
|
if global_ and not global__:
|
|
continue
|
|
# filter by section? note we accept prefixes
|
|
if (sections is not None
|
|
and not any(section.startswith(prefix)
|
|
for prefix in sections)):
|
|
continue
|
|
# skip zero sized symbols
|
|
if not size:
|
|
continue
|
|
# note multiple symbols can share a name
|
|
syms.append(Sym(name, global__, section, addr, size))
|
|
proc.wait()
|
|
if proc.returncode != 0:
|
|
raise sp.CalledProcessError(proc.returncode, proc.args)
|
|
|
|
return SymInfo(syms)
|
|
|
|
class Line(co.namedtuple('Line', ['file', 'line', 'addr'])):
|
|
__slots__ = ()
|
|
def __new__(cls, file, line, addr):
|
|
return super().__new__(cls, file, line, addr)
|
|
|
|
def __repr__(self):
|
|
return '%s(%r, %r, 0x%x)' % (
|
|
self.__class__.__name__,
|
|
self.file,
|
|
self.line,
|
|
self.addr)
|
|
|
|
class LineInfo:
|
|
def __init__(self, lines):
|
|
self.lines = lines
|
|
|
|
def get(self, k, d=None):
|
|
# allow lookup by both address and file+line tuple
|
|
if not isinstance(k, tuple):
|
|
import bisect
|
|
|
|
# organize by address
|
|
if not hasattr(self, '_by_addr'):
|
|
# sort and keep first when duplicates
|
|
lines = self.lines.copy()
|
|
lines.sort(key=lambda x: (x.addr, x.file, x.line))
|
|
|
|
by_addr = []
|
|
for line in lines:
|
|
if (len(by_addr) == 0
|
|
or by_addr[-1].addr != line.addr):
|
|
by_addr.append(line)
|
|
self._by_addr = by_addr
|
|
|
|
# find file+line by addr
|
|
i = bisect.bisect(self._by_addr, k,
|
|
key=lambda x: x.addr)
|
|
if i > 0:
|
|
return self._by_addr[i-1]
|
|
else:
|
|
return d
|
|
|
|
else:
|
|
import bisect
|
|
|
|
# organize by file+line
|
|
if not hasattr(self, '_by_line'):
|
|
# sort and keep first when duplicates
|
|
lines = self.lines.copy()
|
|
lines.sort()
|
|
|
|
by_line = []
|
|
for line in lines:
|
|
if (len(by_line) == 0
|
|
or by_line[-1].file != line.file
|
|
or by_line[-1].line != line.line):
|
|
by_line.append(line)
|
|
self._by_line = by_line
|
|
|
|
# find addr by file+line tuple
|
|
i = bisect.bisect(self._by_line, k,
|
|
key=lambda x: (x.file, x.line))
|
|
# make sure file at least matches!
|
|
if i > 0 and self._by_line[i-1].file == k[0]:
|
|
return self._by_line[i-1]
|
|
else:
|
|
return d
|
|
|
|
def __getitem__(self, k):
|
|
v = self.get(k)
|
|
if v is None:
|
|
raise KeyError(k)
|
|
return v
|
|
|
|
def __contains__(self, k):
|
|
return self.get(k) is not None
|
|
|
|
def __bool__(self):
|
|
return bool(self.lines)
|
|
|
|
def __len__(self):
|
|
return len(self.lines)
|
|
|
|
def __iter__(self):
|
|
return iter(self.lines)
|
|
|
|
@multiprocessing_cache
|
|
def collect_dwarf_lines(obj_path, *,
|
|
objdump_path=OBJDUMP_PATH,
|
|
**args):
|
|
line_pattern = re.compile(
|
|
# matches dir/file table
|
|
'^\s*(?P<no>[0-9]+)'
|
|
'(?:\s+(?P<dir>[0-9]+))?'
|
|
'.*\s+(?P<path>[^\s]+)\s*$'
|
|
# matches line opcodes
|
|
'|' '^\s*\[[^\]]*\]' '(?:'
|
|
'\s+(?P<op_special>Special)'
|
|
'|' '\s+(?P<op_copy>Copy)'
|
|
'|' '\s+(?P<op_end>End of Sequence)'
|
|
'|' '\s+File.*?to.*?(?P<op_file>[0-9]+)'
|
|
'|' '\s+Line.*?to.*?(?P<op_line>[0-9]+)'
|
|
'|' '\s+(?:Address|PC)'
|
|
'\s+.*?to.*?(?P<op_addr>[0xX0-9a-fA-F]+)'
|
|
'|' '\s+[^\s]+' ')+\s*$',
|
|
re.IGNORECASE)
|
|
|
|
# state machine for dwarf line numbers, note that objdump's
|
|
# decodedline seems to have issues with multiple dir/file
|
|
# tables, which is why we need this
|
|
lines = []
|
|
dirs = co.OrderedDict()
|
|
files = co.OrderedDict()
|
|
op_file = 1
|
|
op_line = 1
|
|
op_addr = 0
|
|
cmd = objdump_path + ['--dwarf=rawline', obj_path]
|
|
if args.get('verbose'):
|
|
print(' '.join(shlex.quote(c) for c in cmd))
|
|
proc = sp.Popen(cmd,
|
|
stdout=sp.PIPE,
|
|
universal_newlines=True,
|
|
errors='replace',
|
|
close_fds=False)
|
|
for line in proc.stdout:
|
|
m = line_pattern.match(line)
|
|
if m:
|
|
if m.group('no') and not m.group('dir'):
|
|
# found a directory entry
|
|
dirs[int(m.group('no'))] = m.group('path')
|
|
elif m.group('no'):
|
|
# found a file entry
|
|
dir = int(m.group('dir'))
|
|
if dir in dirs:
|
|
files[int(m.group('no'))] = os.path.join(
|
|
dirs[dir],
|
|
m.group('path'))
|
|
else:
|
|
files[int(m.group('no'))] = m.group('path')
|
|
else:
|
|
# found a state machine update
|
|
if m.group('op_file'):
|
|
op_file = int(m.group('op_file'), 0)
|
|
if m.group('op_line'):
|
|
op_line = int(m.group('op_line'), 0)
|
|
if m.group('op_addr'):
|
|
op_addr = int(m.group('op_addr'), 0)
|
|
|
|
if (m.group('op_special')
|
|
or m.group('op_copy')
|
|
or m.group('op_end')):
|
|
file = os.path.abspath(files.get(op_file, '?'))
|
|
lines.append(Line(file, op_line, op_addr))
|
|
|
|
if m.group('op_end'):
|
|
op_file = 1
|
|
op_line = 1
|
|
op_addr = 0
|
|
proc.wait()
|
|
if proc.returncode != 0:
|
|
raise sp.CalledProcessError(proc.returncode, proc.args)
|
|
|
|
return LineInfo(lines)
|
|
|
|
|
|
def collect_decompressed(path, *,
|
|
perf_path=PERF_PATH,
|
|
sources=None,
|
|
everything=False,
|
|
propagate=0,
|
|
depth=1,
|
|
**args):
|
|
sample_pattern = re.compile(
|
|
'(?P<comm>\w+)'
|
|
'\s+(?P<pid>\w+)'
|
|
'\s+(?P<time>[\w.]+):'
|
|
'\s*(?P<period>\w+)'
|
|
'\s+(?P<event>[^:]+):')
|
|
frame_pattern = re.compile(
|
|
'\s+(?P<addr>\w+)'
|
|
'\s+(?P<sym>[^\s\+]+)(?:\+(?P<off>\w+))?'
|
|
'\s+\((?P<dso>[^\)]+)\)')
|
|
events = {
|
|
'cycles': 'cycles',
|
|
'branch-misses': 'bmisses',
|
|
'branches': 'branches',
|
|
'cache-misses': 'cmisses',
|
|
'cache-references': 'caches'}
|
|
|
|
# note perf_path may contain extra args
|
|
cmd = perf_path + [
|
|
'script',
|
|
'-i%s' % path]
|
|
if args.get('verbose'):
|
|
print(' '.join(shlex.quote(c) for c in cmd))
|
|
proc = sp.Popen(cmd,
|
|
stdout=sp.PIPE,
|
|
universal_newlines=True,
|
|
errors='replace',
|
|
close_fds=False)
|
|
|
|
last_filtered = False
|
|
last_event = ''
|
|
last_period = 0
|
|
last_stack = []
|
|
deltas = co.defaultdict(lambda: {})
|
|
syms_ = co.defaultdict(lambda: {})
|
|
at_cache = {}
|
|
results = {}
|
|
|
|
def commit():
|
|
# tail-recursively propagate measurements
|
|
for i in range(len(last_stack)):
|
|
results_ = results
|
|
for j in reversed(range(i+1)):
|
|
if i+1-j > depth:
|
|
break
|
|
|
|
# propagate
|
|
name = last_stack[j]
|
|
if name not in results_:
|
|
results_[name] = (co.defaultdict(lambda: 0), {})
|
|
results_[name][0][last_event] += last_period
|
|
|
|
# recurse
|
|
results_ = results_[name][1]
|
|
|
|
for line in proc.stdout:
|
|
# we need to process a lot of data, so wait to use regex as late
|
|
# as possible
|
|
if not line.startswith('\t'):
|
|
if last_filtered:
|
|
commit()
|
|
last_filtered = False
|
|
|
|
if line:
|
|
m = sample_pattern.match(line)
|
|
if m and m.group('event') in events:
|
|
last_filtered = True
|
|
last_event = m.group('event')
|
|
last_period = int(m.group('period'), 0)
|
|
last_stack = []
|
|
|
|
elif last_filtered:
|
|
m = frame_pattern.match(line)
|
|
if m:
|
|
# filter out internal/kernel functions
|
|
if not everything and (
|
|
m.group('sym').startswith('__')
|
|
or m.group('sym').startswith('0')
|
|
or m.group('sym').startswith('-')
|
|
or m.group('sym').startswith('[')
|
|
or m.group('dso').startswith('/usr/lib')):
|
|
continue
|
|
|
|
dso = m.group('dso')
|
|
sym = m.group('sym')
|
|
off = int(m.group('off'), 0) if m.group('off') else 0
|
|
addr_ = int(m.group('addr'), 16)
|
|
|
|
# get the syms/lines for the dso, this is cached
|
|
syms = collect_syms(dso,
|
|
sections=['.text'],
|
|
**args)
|
|
lines = collect_dwarf_lines(dso, **args)
|
|
|
|
# ASLR is tricky, we have symbols+offsets, but static symbols
|
|
# means we may have multiple options for each symbol.
|
|
#
|
|
# To try to solve this, we use previous seen symbols to build
|
|
# confidence for the correct ASLR delta. This means we may
|
|
# guess incorrectly for early symbols, but this will only affect
|
|
# a few samples.
|
|
if sym in syms:
|
|
sym_addr_ = addr_ - off
|
|
|
|
# track possible deltas?
|
|
for sym_ in syms[sym]:
|
|
delta = sym_.addr - sym_addr_
|
|
if delta not in deltas[dso]:
|
|
deltas[dso][delta] = sum(
|
|
abs(a_+delta - a)
|
|
for s, (a_, _) in syms_[dso].items()
|
|
for a, _ in syms[s])
|
|
for delta in deltas[dso].keys():
|
|
deltas[dso][delta] += abs(sym_addr_+delta - sym_.addr)
|
|
syms_[dso][sym] = sym_addr_, sym_.size
|
|
|
|
# guess the best delta
|
|
delta, _ = min(deltas[dso].items(),
|
|
key=lambda x: (x[1], x[0]))
|
|
addr = addr_ + delta
|
|
|
|
# cached?
|
|
if (dso,addr) in at_cache:
|
|
cached = at_cache[(dso,addr)]
|
|
if cached is None:
|
|
# cache says to skip
|
|
continue
|
|
file, line = cached
|
|
else:
|
|
# find file+line
|
|
line_ = lines.get(addr)
|
|
if line_ is not None:
|
|
file, line = line_.file, line_.line
|
|
else:
|
|
file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
|
|
|
|
# ignore filtered sources
|
|
if sources is not None:
|
|
if not any(
|
|
os.path.abspath(file) == os.path.abspath(s)
|
|
for s in sources):
|
|
at_cache[(dso,addr)] = None
|
|
continue
|
|
else:
|
|
# default to only cwd
|
|
if not everything and not os.path.commonpath([
|
|
os.getcwd(),
|
|
os.path.abspath(file)]) == os.getcwd():
|
|
at_cache[(dso,addr)] = None
|
|
continue
|
|
|
|
# simplify path
|
|
if os.path.commonpath([
|
|
os.getcwd(),
|
|
os.path.abspath(file)]) == os.getcwd():
|
|
file = os.path.relpath(file)
|
|
else:
|
|
file = os.path.abspath(file)
|
|
|
|
at_cache[(dso,addr)] = file, line
|
|
else:
|
|
file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
|
|
|
|
last_stack.append((file, sym, line))
|
|
|
|
# stop propogating?
|
|
if propagate and len(last_stack) >= propagate:
|
|
commit()
|
|
last_filtered = False
|
|
if last_filtered:
|
|
commit()
|
|
|
|
proc.wait()
|
|
if proc.returncode != 0:
|
|
raise sp.CalledProcessError(proc.returncode, proc.args)
|
|
|
|
# rearrange results into result type
|
|
def to_results(results, z):
|
|
results_ = []
|
|
for name, (r, children) in results.items():
|
|
results_.append(PerfResult(z, *name,
|
|
**{events[k]: v for k, v in r.items()},
|
|
children=to_results(children, z+1)))
|
|
return results_
|
|
|
|
return to_results(results, 0)
|
|
|
|
def collect_job(path, i, **args):
|
|
# decompress into a temporary file, this is to work around
|
|
# some limitations of perf
|
|
with zipfile.ZipFile(path) as z:
|
|
with z.open(i) as f:
|
|
with tempfile.NamedTemporaryFile('wb') as g:
|
|
shutil.copyfileobj(f, g)
|
|
g.flush()
|
|
|
|
return collect_decompressed(g.name, **args)
|
|
|
|
def starapply(args):
|
|
f, args, kwargs = args
|
|
return f(*args, **kwargs)
|
|
|
|
def collect_perf(perf_paths, *,
|
|
jobs=None,
|
|
**args):
|
|
# automatic job detection?
|
|
if jobs == 0:
|
|
jobs = len(os.sched_getaffinity(0))
|
|
|
|
records = []
|
|
for path in perf_paths:
|
|
# each .perf file is actually a zip file containing perf files from
|
|
# multiple runs
|
|
with zipfile.ZipFile(path) as z:
|
|
records.extend((path, i) for i in z.infolist())
|
|
|
|
# we're dealing with a lot of data but also surprisingly
|
|
# parallelizable
|
|
if jobs is not None:
|
|
results = []
|
|
with mp.Pool(jobs) as p:
|
|
for results_ in p.imap_unordered(
|
|
starapply,
|
|
((collect_job, (path, i), args)
|
|
for path, i in records)):
|
|
results.extend(results_)
|
|
else:
|
|
results = []
|
|
for path, i in records:
|
|
results.extend(collect_job(path, i, **args))
|
|
|
|
return results
|
|
|
|
|
|
# common folding/tabling/read/write code
|
|
|
|
class Rev(co.namedtuple('Rev', 'x')):
|
|
__slots__ = ()
|
|
# yes we need all of these because we're a namedtuple
|
|
def __lt__(self, other):
|
|
return self.x > other.x
|
|
def __gt__(self, other):
|
|
return self.x < other.x
|
|
def __le__(self, other):
|
|
return self.x >= other.x
|
|
def __ge__(self, other):
|
|
return self.x <= other.x
|
|
|
|
def fold(Result, results, *,
|
|
by=None,
|
|
defines=[],
|
|
sort=None,
|
|
depth=1,
|
|
**_):
|
|
# stop when depth hits zero
|
|
if depth == 0:
|
|
return []
|
|
|
|
# organize by by
|
|
if by is None:
|
|
by = Result._by
|
|
|
|
for k in it.chain(by or [], (k for k, _ in defines)):
|
|
if k not in Result._by and k not in Result._fields:
|
|
print("error: could not find field %r?" % k,
|
|
file=sys.stderr)
|
|
sys.exit(-1)
|
|
|
|
# filter by matching defines
|
|
if defines:
|
|
results_ = []
|
|
for r in results:
|
|
if all(str(getattr(r, k)) in vs for k, vs in defines):
|
|
results_.append(r)
|
|
results = results_
|
|
|
|
# organize results into conflicts
|
|
folding = co.OrderedDict()
|
|
for r in results:
|
|
name = tuple(getattr(r, k) for k in by)
|
|
if name not in folding:
|
|
folding[name] = []
|
|
folding[name].append(r)
|
|
|
|
# merge conflicts
|
|
folded = []
|
|
for name, rs in folding.items():
|
|
folded.append(sum(rs[1:], start=rs[0]))
|
|
|
|
# sort, note that python's sort is stable
|
|
folded.sort(key=lambda r: (
|
|
# sort by explicit sort fields
|
|
tuple((Rev
|
|
if reverse ^ (not k or k in Result._fields)
|
|
else lambda x: x)(
|
|
tuple((getattr(r, k_),)
|
|
if getattr(r, k_) is not None
|
|
else ()
|
|
for k_ in ([k] if k else Result._sort)))
|
|
for k, reverse in (sort or [])),
|
|
# sort by result
|
|
r))
|
|
|
|
# recurse if we have recursive results
|
|
if hasattr(Result, '_children'):
|
|
folded = [r._replace(**{
|
|
Result._children: fold(
|
|
Result, getattr(r, Result._children),
|
|
by=by,
|
|
# only filter defines at the top level!
|
|
sort=sort,
|
|
depth=depth-1)})
|
|
for r in folded]
|
|
|
|
return folded
|
|
|
|
def hotify(Result, results, *,
|
|
enumerates=None,
|
|
depth=1,
|
|
hot=None,
|
|
**_):
|
|
# note! hotifying risks confusion if you don't enumerate/have a
|
|
# z field, since it will allow folding across recursive boundaries
|
|
|
|
# hotify only makes sense for recursive results
|
|
assert hasattr(Result, '_children')
|
|
|
|
results_ = []
|
|
for r in results:
|
|
hot_ = []
|
|
def recurse(results_, depth_):
|
|
nonlocal hot_
|
|
if not results_:
|
|
return
|
|
|
|
# find the hottest result
|
|
r = min(results_, key=lambda r:
|
|
tuple((Rev
|
|
if reverse ^ (not k or k in Result._fields)
|
|
else lambda x: x)(
|
|
tuple((getattr(r, k_),)
|
|
if getattr(r, k_) is not None
|
|
else ()
|
|
for k_ in ([k] if k else Result._sort)))
|
|
for k, reverse in it.chain(hot, [(None, False)])))
|
|
|
|
hot_.append(r._replace(**(
|
|
# enumerate?
|
|
({e: len(hot_) for e in enumerates}
|
|
if enumerates is not None
|
|
else {})
|
|
| {Result._children: []})))
|
|
|
|
# recurse?
|
|
if depth_ > 1:
|
|
recurse(getattr(r, Result._children),
|
|
depth_-1)
|
|
|
|
recurse(getattr(r, Result._children), depth-1)
|
|
results_.append(r._replace(**{Result._children: hot_}))
|
|
|
|
return results_
|
|
|
|
def table(Result, results, diff_results=None, *,
|
|
by=None,
|
|
fields=None,
|
|
sort=None,
|
|
labels=None,
|
|
depth=1,
|
|
hot=None,
|
|
diff=None,
|
|
percent=None,
|
|
all=False,
|
|
compare=None,
|
|
no_header=False,
|
|
small_header=False,
|
|
no_total=False,
|
|
small_table=False,
|
|
summary=False,
|
|
**_):
|
|
import builtins
|
|
all_, all = all, builtins.all
|
|
|
|
if by is None:
|
|
by = Result._by
|
|
if fields is None:
|
|
fields = Result._fields
|
|
types = Result._types
|
|
|
|
# organize by name
|
|
table = {
|
|
','.join(str(getattr(r, k)
|
|
if getattr(r, k) is not None
|
|
else '')
|
|
for k in by): r
|
|
for r in results}
|
|
diff_table = {
|
|
','.join(str(getattr(r, k)
|
|
if getattr(r, k) is not None
|
|
else '')
|
|
for k in by): r
|
|
for r in diff_results or []}
|
|
|
|
# lost results? this only happens if we didn't fold by the same
|
|
# by field, which is an error and risks confusing results
|
|
assert len(table) == len(results)
|
|
if diff_results is not None:
|
|
assert len(diff_table) == len(diff_results)
|
|
|
|
# find compare entry if there is one
|
|
if compare:
|
|
compare_r = table.get(','.join(str(k) for k in compare))
|
|
|
|
# build up our lines
|
|
lines = []
|
|
|
|
# header
|
|
if not no_header:
|
|
header = ['%s%s' % (
|
|
','.join(labels if labels is not None else by),
|
|
' (%d added, %d removed)' % (
|
|
sum(1 for n in table if n not in diff_table),
|
|
sum(1 for n in diff_table if n not in table))
|
|
if diff else '')
|
|
if not small_header and not small_table and not summary
|
|
else '']
|
|
if not diff:
|
|
for k in fields:
|
|
header.append(k)
|
|
else:
|
|
for k in fields:
|
|
header.append('o'+k)
|
|
for k in fields:
|
|
header.append('n'+k)
|
|
for k in fields:
|
|
header.append('d'+k)
|
|
lines.append(header)
|
|
|
|
# delete these to try to catch typos below, we need to rebuild
|
|
# these tables at each recursive layer
|
|
del table
|
|
del diff_table
|
|
|
|
# entry helper
|
|
def table_entry(name, r, diff_r=None):
|
|
# prepend name
|
|
entry = [name]
|
|
|
|
# normal entry?
|
|
if ((compare is None or r == compare_r)
|
|
and not percent
|
|
and not diff):
|
|
for k in fields:
|
|
entry.append(
|
|
(getattr(r, k).table(),
|
|
getattr(getattr(r, k), 'notes', lambda: [])())
|
|
if getattr(r, k, None) is not None
|
|
else types[k].none)
|
|
# compare entry?
|
|
elif not percent and not diff:
|
|
for k in fields:
|
|
entry.append(
|
|
(getattr(r, k).table()
|
|
if getattr(r, k, None) is not None
|
|
else types[k].none,
|
|
(lambda t: ['+∞%'] if t == +mt.inf
|
|
else ['-∞%'] if t == -mt.inf
|
|
else ['%+.1f%%' % (100*t)])(
|
|
types[k].ratio(
|
|
getattr(r, k, None),
|
|
getattr(compare_r, k, None)))))
|
|
# percent entry?
|
|
elif not diff:
|
|
for k in fields:
|
|
entry.append(
|
|
(getattr(r, k).table()
|
|
if getattr(r, k, None) is not None
|
|
else types[k].none,
|
|
(lambda t: ['+∞%'] if t == +mt.inf
|
|
else ['-∞%'] if t == -mt.inf
|
|
else ['%+.1f%%' % (100*t)])(
|
|
types[k].ratio(
|
|
getattr(r, k, None),
|
|
getattr(diff_r, k, None)))))
|
|
# diff entry?
|
|
else:
|
|
for k in fields:
|
|
entry.append(getattr(diff_r, k).table()
|
|
if getattr(diff_r, k, None) is not None
|
|
else types[k].none)
|
|
for k in fields:
|
|
entry.append(getattr(r, k).table()
|
|
if getattr(r, k, None) is not None
|
|
else types[k].none)
|
|
for k in fields:
|
|
entry.append(
|
|
(types[k].diff(
|
|
getattr(r, k, None),
|
|
getattr(diff_r, k, None)),
|
|
(lambda t: ['+∞%'] if t == +mt.inf
|
|
else ['-∞%'] if t == -mt.inf
|
|
else ['%+.1f%%' % (100*t)] if t
|
|
else [])(
|
|
types[k].ratio(
|
|
getattr(r, k, None),
|
|
getattr(diff_r, k, None)))))
|
|
|
|
# append any notes
|
|
if hasattr(Result, '_notes') and r is not None:
|
|
notes = sorted(getattr(r, Result._notes))
|
|
if isinstance(entry[-1], tuple):
|
|
entry[-1] = (entry[-1][0], entry[-1][1] + notes)
|
|
else:
|
|
entry[-1] = (entry[-1], notes)
|
|
|
|
return entry
|
|
|
|
# recursive entry helper
|
|
def table_recurse(results_, diff_results_,
|
|
depth_,
|
|
prefixes=('', '', '', '')):
|
|
# build the children table at each layer
|
|
table_ = {
|
|
','.join(str(getattr(r, k)
|
|
if getattr(r, k) is not None
|
|
else '')
|
|
for k in by): r
|
|
for r in results_}
|
|
diff_table_ = {
|
|
','.join(str(getattr(r, k)
|
|
if getattr(r, k) is not None
|
|
else '')
|
|
for k in by): r
|
|
for r in diff_results_ or []}
|
|
names_ = [n
|
|
for n in table_.keys() | diff_table_.keys()
|
|
if diff_results is None
|
|
or all_
|
|
or any(
|
|
types[k].ratio(
|
|
getattr(table_.get(n), k, None),
|
|
getattr(diff_table_.get(n), k, None))
|
|
for k in fields)]
|
|
|
|
# sort again, now with diff info, note that python's sort is stable
|
|
names_.sort(key=lambda n: (
|
|
# sort by explicit sort fields
|
|
next(
|
|
tuple((Rev
|
|
if reverse ^ (not k or k in Result._fields)
|
|
else lambda x: x)(
|
|
tuple((getattr(r_, k_),)
|
|
if getattr(r_, k_) is not None
|
|
else ()
|
|
for k_ in ([k] if k else Result._sort)))
|
|
for k, reverse in (sort or []))
|
|
for r_ in [table_.get(n), diff_table_.get(n)]
|
|
if r_ is not None),
|
|
# sort by ratio if diffing
|
|
Rev(tuple(types[k].ratio(
|
|
getattr(table_.get(n), k, None),
|
|
getattr(diff_table_.get(n), k, None))
|
|
for k in fields))
|
|
if diff or percent
|
|
else (),
|
|
# move compare entry to the top, note this can be
|
|
# overridden by explicitly sorting by fields
|
|
(table_.get(n) != compare_r,
|
|
# sort by ratio if comparing
|
|
Rev(tuple(
|
|
types[k].ratio(
|
|
getattr(table_.get(n), k, None),
|
|
getattr(compare_r, k, None))
|
|
for k in fields)))
|
|
if compare
|
|
else (),
|
|
# sort by result
|
|
(table_[n],) if n in table_ else (),
|
|
# and finally by name (diffs may be missing results)
|
|
n))
|
|
|
|
for i, name in enumerate(names_):
|
|
# find comparable results
|
|
r = table_.get(name)
|
|
diff_r = diff_table_.get(name)
|
|
|
|
# figure out a good label
|
|
if labels is not None:
|
|
label = next(
|
|
','.join(str(getattr(r_, k)
|
|
if getattr(r_, k) is not None
|
|
else '')
|
|
for k in labels)
|
|
for r_ in [r, diff_r]
|
|
if r_ is not None)
|
|
else:
|
|
label = name
|
|
|
|
# build line
|
|
line = table_entry(label, r, diff_r)
|
|
|
|
# add prefixes
|
|
line = [x if isinstance(x, tuple) else (x, []) for x in line]
|
|
line[0] = (prefixes[0+(i==len(names_)-1)] + line[0][0], line[0][1])
|
|
lines.append(line)
|
|
|
|
# recurse?
|
|
if name in table_ and depth_ > 1:
|
|
table_recurse(
|
|
getattr(r, Result._children),
|
|
getattr(diff_r, Result._children, None),
|
|
depth_-1,
|
|
(prefixes[2+(i==len(names_)-1)] + "|-> ",
|
|
prefixes[2+(i==len(names_)-1)] + "'-> ",
|
|
prefixes[2+(i==len(names_)-1)] + "| ",
|
|
prefixes[2+(i==len(names_)-1)] + " "))
|
|
|
|
# build entries
|
|
if not summary:
|
|
table_recurse(results, diff_results, depth)
|
|
|
|
# total
|
|
if not no_total and not (small_table and not summary):
|
|
r = next(iter(fold(Result, results, by=[])), None)
|
|
if diff_results is None:
|
|
diff_r = None
|
|
else:
|
|
diff_r = next(iter(fold(Result, diff_results, by=[])), None)
|
|
lines.append(table_entry('TOTAL', r, diff_r))
|
|
|
|
# homogenize
|
|
lines = [[x if isinstance(x, tuple) else (x, []) for x in line]
|
|
for line in lines]
|
|
|
|
# find the best widths, note that column 0 contains the names and is
|
|
# handled a bit differently
|
|
widths = co.defaultdict(lambda: 7, {0: 7})
|
|
nwidths = co.defaultdict(lambda: 0)
|
|
for line in lines:
|
|
for i, x in enumerate(line):
|
|
widths[i] = max(widths[i], ((len(x[0])+1+4-1)//4)*4-1)
|
|
if i != len(line)-1:
|
|
nwidths[i] = max(nwidths[i], 1+sum(2+len(n) for n in x[1]))
|
|
|
|
# print our table
|
|
for line in lines:
|
|
print('%-*s %s' % (
|
|
widths[0], line[0][0],
|
|
' '.join('%*s%-*s' % (
|
|
widths[i], x[0],
|
|
nwidths[i], ' (%s)' % ', '.join(x[1]) if x[1] else '')
|
|
for i, x in enumerate(line[1:], 1))))
|
|
|
|
def read_csv(path, Result, *,
|
|
depth=1,
|
|
**_):
|
|
with openio(path, 'r') as f:
|
|
# csv or json? assume json starts with [
|
|
json = (f.buffer.peek(1)[:1] == b'[')
|
|
|
|
# read csv?
|
|
if not json:
|
|
results = []
|
|
reader = csv.DictReader(f, restval='')
|
|
for r in reader:
|
|
if not any(k in r and r[k].strip()
|
|
for k in Result._fields):
|
|
continue
|
|
try:
|
|
# note this allows by/fields to overlap
|
|
results.append(Result(**(
|
|
{k: r[k] for k in Result._by
|
|
if k in r and r[k].strip()}
|
|
| {k: r[k] for k in Result._fields
|
|
if k in r and r[k].strip()})))
|
|
except TypeError:
|
|
pass
|
|
return results
|
|
|
|
# read json?
|
|
else:
|
|
import json
|
|
def unjsonify(results, depth_):
|
|
results_ = []
|
|
for r in results:
|
|
if not any(k in r and r[k].strip()
|
|
for k in Result._fields):
|
|
continue
|
|
try:
|
|
# note this allows by/fields to overlap
|
|
results_.append(Result(**(
|
|
{k: r[k] for k in Result._by
|
|
if k in r and r[k] is not None}
|
|
| {k: r[k] for k in Result._fields
|
|
if k in r and r[k] is not None}
|
|
| ({Result._children: unjsonify(
|
|
r[Result._children],
|
|
depth_-1)}
|
|
if hasattr(Result, '_children')
|
|
and Result._children in r
|
|
and r[Result._children] is not None
|
|
and depth_ > 1
|
|
else {})
|
|
| ({Result._notes: set(r[Result._notes])}
|
|
if hasattr(Result, '_notes')
|
|
and Result._notes in r
|
|
and r[Result._notes] is not None
|
|
else {}))))
|
|
except TypeError:
|
|
pass
|
|
return results_
|
|
return unjsonify(json.load(f), depth)
|
|
|
|
def write_csv(path, Result, results, *,
|
|
json=False,
|
|
by=None,
|
|
fields=None,
|
|
depth=1,
|
|
**_):
|
|
with openio(path, 'w') as f:
|
|
# write csv?
|
|
if not json:
|
|
writer = csv.DictWriter(f, list(co.OrderedDict.fromkeys(it.chain(
|
|
by
|
|
if by is not None
|
|
else Result._by,
|
|
fields
|
|
if fields is not None
|
|
else Result._fields)).keys()))
|
|
writer.writeheader()
|
|
for r in results:
|
|
# note this allows by/fields to overlap
|
|
writer.writerow(
|
|
{k: getattr(r, k)
|
|
for k in (by
|
|
if by is not None
|
|
else Result._by)
|
|
if getattr(r, k) is not None}
|
|
| {k: str(getattr(r, k))
|
|
for k in (fields
|
|
if fields is not None
|
|
else Result._fields)
|
|
if getattr(r, k) is not None})
|
|
|
|
# write json?
|
|
else:
|
|
import json
|
|
# the neat thing about json is we can include recursive results
|
|
def jsonify(results, depth_):
|
|
results_ = []
|
|
for r in results:
|
|
# note this allows by/fields to overlap
|
|
results_.append(
|
|
{k: getattr(r, k)
|
|
for k in (by
|
|
if by is not None
|
|
else Result._by)
|
|
if getattr(r, k) is not None}
|
|
| {k: str(getattr(r, k))
|
|
for k in (fields
|
|
if fields is not None
|
|
else Result._fields)
|
|
if getattr(r, k) is not None}
|
|
| ({Result._children: jsonify(
|
|
getattr(r, Result._children),
|
|
depth_-1)}
|
|
if hasattr(Result, '_children')
|
|
and getattr(r, Result._children)
|
|
and depth_ > 1
|
|
else {})
|
|
| ({Result._notes: list(
|
|
getattr(r, Result._notes))}
|
|
if hasattr(Result, '_notes')
|
|
and getattr(r, Result._notes)
|
|
else {}))
|
|
return results_
|
|
json.dump(jsonify(results, depth), f,
|
|
separators=(',', ':'))
|
|
|
|
|
|
def annotate(Result, results, *,
|
|
annotate=None,
|
|
threshold=None,
|
|
branches=False,
|
|
caches=False,
|
|
**args):
|
|
# figure out the threshold
|
|
if threshold is None:
|
|
t0, t1 = THRESHOLD
|
|
elif len(threshold) == 1:
|
|
t0, t1 = threshold[0], threshold[0]
|
|
else:
|
|
t0, t1 = threshold
|
|
t0, t1 = min(t0, t1), max(t0, t1)
|
|
|
|
if not branches and not caches:
|
|
tk = 'cycles'
|
|
elif branches:
|
|
tk = 'bmisses'
|
|
else:
|
|
tk = 'cmisses'
|
|
|
|
# find max cycles
|
|
max_ = max(it.chain((float(getattr(r, tk)) for r in results), [1]))
|
|
|
|
for path in co.OrderedDict.fromkeys(r.file for r in results).keys():
|
|
# flatten to line info
|
|
results = fold(Result, results, by=['file', 'line'])
|
|
table = {r.line: r for r in results if r.file == path}
|
|
|
|
# calculate spans to show
|
|
if not annotate:
|
|
spans = []
|
|
last = None
|
|
func = None
|
|
for line, r in sorted(table.items()):
|
|
if float(getattr(r, tk)) / max_ >= t0:
|
|
if last is not None and line - last.stop <= args['context']:
|
|
last = range(
|
|
last.start,
|
|
line+1+args['context'])
|
|
else:
|
|
if last is not None:
|
|
spans.append((last, func))
|
|
last = range(
|
|
line-args['context'],
|
|
line+1+args['context'])
|
|
func = r.function
|
|
if last is not None:
|
|
spans.append((last, func))
|
|
|
|
with open(path) as f:
|
|
skipped = False
|
|
for i, line in enumerate(f):
|
|
# skip lines not in spans?
|
|
if not annotate and not any(i+1 in s for s, _ in spans):
|
|
skipped = True
|
|
continue
|
|
|
|
if skipped:
|
|
skipped = False
|
|
print('%s@@ %s:%d: %s @@%s' % (
|
|
'\x1b[36m' if args['color'] else '',
|
|
path,
|
|
i+1,
|
|
next(iter(f for _, f in spans)),
|
|
'\x1b[m' if args['color'] else ''))
|
|
|
|
# build line
|
|
if line.endswith('\n'):
|
|
line = line[:-1]
|
|
|
|
r = table.get(i+1)
|
|
if r is not None and (
|
|
float(r.cycles) > 0
|
|
if not branches and not caches
|
|
else float(r.bmisses) > 0 or float(r.branches) > 0
|
|
if branches
|
|
else float(r.cmisses) > 0 or float(r.caches) > 0):
|
|
line = '%-*s // %s' % (
|
|
args['width'],
|
|
line,
|
|
'%s cycles' % r.cycles
|
|
if not branches and not caches
|
|
else '%s bmisses, %s branches' % (
|
|
r.bmisses, r.branches)
|
|
if branches
|
|
else '%s cmisses, %s caches' % (
|
|
r.cmisses, r.caches))
|
|
|
|
if args['color']:
|
|
if float(getattr(r, tk)) / max_ >= t1:
|
|
line = '\x1b[1;31m%s\x1b[m' % line
|
|
elif float(getattr(r, tk)) / max_ >= t0:
|
|
line = '\x1b[35m%s\x1b[m' % line
|
|
|
|
print(line)
|
|
|
|
|
|
def report(perf_paths, *,
|
|
by=None,
|
|
fields=None,
|
|
defines=[],
|
|
sort=None,
|
|
branches=False,
|
|
caches=False,
|
|
depth=None,
|
|
hot=None,
|
|
**args):
|
|
# figure out what color should be
|
|
if args.get('color') == 'auto':
|
|
args['color'] = sys.stdout.isatty()
|
|
elif args.get('color') == 'always':
|
|
args['color'] = True
|
|
else:
|
|
args['color'] = False
|
|
|
|
# figure out what fields we're interested in
|
|
labels = None
|
|
if by is None:
|
|
if (args.get('annotate')
|
|
or args.get('threshold')):
|
|
by = ['file', 'line']
|
|
elif depth is not None or hot is not None:
|
|
by = ['z', 'function']
|
|
labels = ['function']
|
|
else:
|
|
by = ['function']
|
|
|
|
if fields is None:
|
|
if (args.get('annotate')
|
|
or args.get('threshold')
|
|
or args.get('output')
|
|
or args.get('output_json')):
|
|
fields = ['cycles', 'bmisses', 'branches', 'cmisses', 'caches']
|
|
elif not branches and not caches:
|
|
fields = ['cycles']
|
|
elif branches:
|
|
fields = ['bmisses', 'branches']
|
|
else:
|
|
fields = ['cmisses', 'caches']
|
|
|
|
# figure out depth
|
|
if depth is None:
|
|
depth = mt.inf if hot else 1
|
|
elif depth == 0:
|
|
depth = mt.inf
|
|
|
|
# find sizes
|
|
if not args.get('use', None):
|
|
# not enough info?
|
|
if not perf_paths:
|
|
print("error: no *.perf files?",
|
|
file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# collect info
|
|
results = collect_perf(perf_paths,
|
|
depth=depth,
|
|
**args)
|
|
|
|
else:
|
|
results = read_csv(args['use'], PerfResult,
|
|
depth=depth,
|
|
**args)
|
|
|
|
# fold
|
|
results = fold(PerfResult, results,
|
|
by=by,
|
|
defines=defines,
|
|
depth=depth)
|
|
|
|
# hotify?
|
|
if hot:
|
|
results = hotify(PerfResult, results,
|
|
depth=depth,
|
|
hot=hot)
|
|
|
|
# find previous results?
|
|
diff_results = None
|
|
if args.get('diff') or args.get('percent'):
|
|
try:
|
|
diff_results = read_csv(
|
|
args.get('diff') or args.get('percent'),
|
|
PerfResult,
|
|
depth=depth,
|
|
**args)
|
|
except FileNotFoundError:
|
|
diff_results = []
|
|
|
|
# fold
|
|
diff_results = fold(PerfResult, diff_results,
|
|
by=by,
|
|
defines=defines,
|
|
depth=depth)
|
|
|
|
# hotify?
|
|
if hot:
|
|
diff_results = hotify(PerfResult, diff_results,
|
|
depth=depth,
|
|
hot=hot)
|
|
|
|
# print table
|
|
if (args.get('annotate')
|
|
or args.get('threshold')):
|
|
annotate(PerfResult, results,
|
|
branches=branches,
|
|
caches=caches,
|
|
**args)
|
|
# write results to JSON
|
|
elif args.get('output_json'):
|
|
write_csv(args['output_json'], PerfResult, results, json=True,
|
|
by=by,
|
|
fields=fields,
|
|
depth=depth,
|
|
**args)
|
|
# write results to CSV
|
|
elif args.get('output'):
|
|
write_csv(args['output'], PerfResult, results,
|
|
by=by,
|
|
fields=fields,
|
|
depth=depth,
|
|
**args)
|
|
else:
|
|
# print table
|
|
table(PerfResult, results, diff_results,
|
|
by=by,
|
|
fields=fields,
|
|
sort=sort,
|
|
labels=labels,
|
|
depth=depth,
|
|
**args)
|
|
|
|
|
|
def main(**args):
|
|
if args.get('record'):
|
|
return record(**args)
|
|
else:
|
|
return report(**args)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
import sys
|
|
|
|
# bit of a hack, but parse_intermixed_args and REMAINDER are
|
|
# incompatible, so we need to figure out what we want before running
|
|
# argparse
|
|
if '--record' in sys.argv:
|
|
nargs = argparse.REMAINDER
|
|
else:
|
|
nargs = '*'
|
|
|
|
argparse.ArgumentParser._handle_conflict_ignore = lambda *_: None
|
|
argparse._ArgumentGroup._handle_conflict_ignore = lambda *_: None
|
|
parser = argparse.ArgumentParser(
|
|
description="Aggregate and report Linux perf results.",
|
|
allow_abbrev=False,
|
|
conflict_handler='ignore')
|
|
parser.add_argument(
|
|
'perf_paths',
|
|
nargs=nargs,
|
|
help="Input *.perf files.")
|
|
parser.add_argument(
|
|
'-v', '--verbose',
|
|
action='store_true',
|
|
help="Output commands that run behind the scenes.")
|
|
parser.add_argument(
|
|
'-o', '--output',
|
|
help="Specify CSV file to store results.")
|
|
parser.add_argument(
|
|
'-O', '--output-json',
|
|
help="Specify JSON file to store results. This may contain "
|
|
"recursive info.")
|
|
parser.add_argument(
|
|
'-u', '--use',
|
|
help="Don't parse anything, use this CSV/JSON file.")
|
|
parser.add_argument(
|
|
'-d', '--diff',
|
|
help="Specify CSV/JSON file to diff against.")
|
|
parser.add_argument(
|
|
'-p', '--percent',
|
|
help="Specify CSV/JSON file to diff against, but only show "
|
|
"percentage change, not a full diff.")
|
|
parser.add_argument(
|
|
'-c', '--compare',
|
|
type=lambda x: tuple(v.strip() for v in x.split(',')),
|
|
help="Compare results to the row matching this by pattern.")
|
|
parser.add_argument(
|
|
'-a', '--all',
|
|
action='store_true',
|
|
help="Show all, not just the ones that changed.")
|
|
parser.add_argument(
|
|
'-b', '--by',
|
|
action='append',
|
|
choices=PerfResult._by,
|
|
help="Group by this field.")
|
|
parser.add_argument(
|
|
'-f', '--field',
|
|
dest='fields',
|
|
action='append',
|
|
choices=PerfResult._fields,
|
|
help="Show this field.")
|
|
parser.add_argument(
|
|
'-D', '--define',
|
|
dest='defines',
|
|
action='append',
|
|
type=lambda x: (
|
|
lambda k, vs: (
|
|
k.strip(),
|
|
{v.strip() for v in vs.split(',')})
|
|
)(*x.split('=', 1)),
|
|
help="Only include results where this field is this value.")
|
|
class AppendSort(argparse.Action):
|
|
def __call__(self, parser, namespace, value, option):
|
|
if namespace.sort is None:
|
|
namespace.sort = []
|
|
namespace.sort.append((value, option in {'-S', '--reverse-sort'}))
|
|
parser.add_argument(
|
|
'-s', '--sort',
|
|
nargs='?',
|
|
action=AppendSort,
|
|
help="Sort by this field.")
|
|
parser.add_argument(
|
|
'-S', '--reverse-sort',
|
|
nargs='?',
|
|
action=AppendSort,
|
|
help="Sort by this field, but backwards.")
|
|
parser.add_argument(
|
|
'-z', '--depth',
|
|
nargs='?',
|
|
type=lambda x: int(x, 0),
|
|
const=0,
|
|
help="Depth of function calls to show. 0 shows all calls unless "
|
|
"we find a cycle. Defaults to 0.")
|
|
parser.add_argument(
|
|
'-g', '--propagate',
|
|
type=lambda x: int(x, 0),
|
|
help="Depth to propagate samples up the call-stack. 0 propagates "
|
|
"up to the entry point, 1 does no propagation. Defaults to 0.")
|
|
class AppendHot(argparse.Action):
|
|
def __call__(self, parser, namespace, value, option):
|
|
if namespace.hot is None:
|
|
namespace.hot = []
|
|
namespace.hot.append((value, option in {'-R', '--reverse-hot'}))
|
|
parser.add_argument(
|
|
'-r', '--hot',
|
|
nargs='?',
|
|
action=AppendHot,
|
|
help="Show only the hot path for each function call. Can "
|
|
"optionally provide fields like sort.")
|
|
parser.add_argument(
|
|
'-R', '--reverse-hot',
|
|
nargs='?',
|
|
action=AppendHot,
|
|
help="Like -r/--hot, but backwards.")
|
|
parser.add_argument(
|
|
'--no-header',
|
|
action='store_true',
|
|
help="Don't show the header.")
|
|
parser.add_argument(
|
|
'--small-header',
|
|
action='store_true',
|
|
help="Don't show by field names.")
|
|
parser.add_argument(
|
|
'--no-total',
|
|
action='store_true',
|
|
help="Don't show the total.")
|
|
parser.add_argument(
|
|
'-Q', '--small-table',
|
|
action='store_true',
|
|
help="Equivalent to --small-header + --no-total.")
|
|
parser.add_argument(
|
|
'-Y', '--summary',
|
|
action='store_true',
|
|
help="Only show the total.")
|
|
parser.add_argument(
|
|
'-F', '--source',
|
|
dest='sources',
|
|
action='append',
|
|
help="Only consider definitions in this file. Defaults to "
|
|
"anything in the current directory.")
|
|
parser.add_argument(
|
|
'--everything',
|
|
action='store_true',
|
|
help="Include builtin and libc specific symbols.")
|
|
parser.add_argument(
|
|
'--branches',
|
|
action='store_true',
|
|
help="Show branches and branch misses.")
|
|
parser.add_argument(
|
|
'--caches',
|
|
action='store_true',
|
|
help="Show cache accesses and cache misses.")
|
|
parser.add_argument(
|
|
'-A', '--annotate',
|
|
action='store_true',
|
|
help="Show source files annotated with coverage info.")
|
|
parser.add_argument(
|
|
'-T', '--threshold',
|
|
nargs='?',
|
|
type=lambda x: tuple(float(x) for x in x.split(',')),
|
|
const=THRESHOLD,
|
|
help="Show lines with samples above this threshold as a percent "
|
|
"of all lines. Defaults to "
|
|
"%s." % ','.join(str(t) for t in THRESHOLD))
|
|
parser.add_argument(
|
|
'-C', '--context',
|
|
type=lambda x: int(x, 0),
|
|
default=3,
|
|
help="Show n additional lines of context. Defaults to 3.")
|
|
parser.add_argument(
|
|
'-W', '--width',
|
|
type=lambda x: int(x, 0),
|
|
default=80,
|
|
help="Assume source is styled with this many columns. Defaults "
|
|
"to 80.")
|
|
parser.add_argument(
|
|
'--color',
|
|
choices=['never', 'always', 'auto'],
|
|
default='auto',
|
|
help="When to use terminal colors. Defaults to 'auto'.")
|
|
parser.add_argument(
|
|
'-j', '--jobs',
|
|
nargs='?',
|
|
type=lambda x: int(x, 0),
|
|
const=0,
|
|
help="Number of processes to use. 0 spawns one process per core.")
|
|
parser.add_argument(
|
|
'--perf-path',
|
|
type=lambda x: x.split(),
|
|
help="Path to the perf executable, may include flags. "
|
|
"Defaults to %r." % PERF_PATH)
|
|
parser.add_argument(
|
|
'--objdump-path',
|
|
type=lambda x: x.split(),
|
|
default=OBJDUMP_PATH,
|
|
help="Path to the objdump executable, may include flags. "
|
|
"Defaults to %r." % OBJDUMP_PATH)
|
|
|
|
# record flags
|
|
record_parser = parser.add_argument_group('record options')
|
|
record_parser.add_argument(
|
|
'command',
|
|
nargs=nargs,
|
|
help="Command to run.")
|
|
record_parser.add_argument(
|
|
'--record',
|
|
action='store_true',
|
|
help="Run a command and aggregate perf measurements.")
|
|
record_parser.add_argument(
|
|
'-o', '--output',
|
|
help="Output file. Uses flock to synchronize. This is stored as a "
|
|
"zip-file of multiple perf results.")
|
|
record_parser.add_argument(
|
|
'--perf-freq',
|
|
help="perf sampling frequency. This is passed directly to perf. "
|
|
"Defaults to %r." % PERF_FREQ)
|
|
record_parser.add_argument(
|
|
'--perf-period',
|
|
help="perf sampling period. This is passed directly to perf.")
|
|
record_parser.add_argument(
|
|
'--perf-events',
|
|
help="perf events to record. This is passed directly to perf. "
|
|
"Defaults to %r." % PERF_EVENTS)
|
|
record_parser.add_argument(
|
|
'--perf-path',
|
|
type=lambda x: x.split(),
|
|
help="Path to the perf executable, may include flags. "
|
|
"Defaults to %r." % PERF_PATH)
|
|
|
|
# avoid intermixed/REMAINDER conflict, see above
|
|
if nargs == argparse.REMAINDER:
|
|
args = parser.parse_args()
|
|
else:
|
|
args = parser.parse_intermixed_args()
|
|
|
|
# perf_paths/command overlap, so need to do some munging here
|
|
args.command = args.perf_paths
|
|
if args.record:
|
|
if not args.command:
|
|
print('error: no command specified?',
|
|
file=sys.stderr)
|
|
sys.exit(-1)
|
|
if not args.output:
|
|
print('error: no output file specified?',
|
|
file=sys.stderr)
|
|
sys.exit(-1)
|
|
|
|
sys.exit(main(**{k: v
|
|
for k, v in vars(args).items()
|
|
if v is not None}))
|