#!/usr/bin/env python3 # # Script to aggregate and report Linux perf results. # # Example: # ./scripts/perf.py --record -obench.perf ./runners/bench_runner # ./scripts/perf.py bench.perf -j -Flfs.c -Flfs_util.c -Scycles # # Copyright (c) 2022, The littlefs authors. # SPDX-License-Identifier: BSD-3-Clause # # prevent local imports if __name__ == "__main__": __import__('sys').path.pop(0) import bisect import collections as co import csv import errno import fcntl import fnmatch import functools as ft import io import itertools as it import math as mt import multiprocessing as mp import os import re import shlex import shutil import subprocess as sp import sys import tempfile import zipfile # TODO support non-zip perf results? PERF_PATH = ['perf'] PERF_EVENTS = 'cycles,branch-misses,branches,cache-misses,cache-references' PERF_FREQ = 100 OBJDUMP_PATH = ['objdump'] THRESHOLD = (0.5, 0.85) # integer fields class CsvInt(co.namedtuple('CsvInt', 'a')): __slots__ = () def __new__(cls, a=0): if isinstance(a, CsvInt): return a if isinstance(a, str): try: a = int(a, 0) except ValueError: # also accept +-∞ and +-inf if re.match('^\s*\+?\s*(?:∞|inf)\s*$', a): a = mt.inf elif re.match('^\s*-\s*(?:∞|inf)\s*$', a): a = -mt.inf else: raise if not (isinstance(a, int) or mt.isinf(a)): a = int(a) return super().__new__(cls, a) def __repr__(self): return '%s(%r)' % (self.__class__.__name__, self.a) def __str__(self): if self.a == mt.inf: return '∞' elif self.a == -mt.inf: return '-∞' else: return str(self.a) def __csv__(self): if self.a == mt.inf: return 'inf' elif self.a == -mt.inf: return '-inf' else: return repr(self.a) def __bool__(self): return bool(self.a) def __int__(self): assert not mt.isinf(self.a) return self.a def __float__(self): return float(self.a) none = '%7s' % '-' def table(self): return '%7s' % (self,) def diff(self, other): new = self.a if self else 0 old = other.a if other else 0 diff = new - old if diff == +mt.inf: return '%7s' % '+∞' elif diff == -mt.inf: return '%7s' % '-∞' else: return '%+7d' % diff def ratio(self, other): new = self.a if self else 0 old = other.a if other else 0 if mt.isinf(new) and mt.isinf(old): return 0.0 elif mt.isinf(new): return +mt.inf elif mt.isinf(old): return -mt.inf elif not old and not new: return 0.0 elif not old: return +mt.inf else: return (new-old) / old def __pos__(self): return self.__class__(+self.a) def __neg__(self): return self.__class__(-self.a) def __abs__(self): return self.__class__(abs(self.a)) def __add__(self, other): return self.__class__(self.a + other.a) def __sub__(self, other): return self.__class__(self.a - other.a) def __mul__(self, other): return self.__class__(self.a * other.a) def __truediv__(self, other): if not other: if self >= self.__class__(0): return self.__class__(+mt.inf) else: return self.__class__(-mt.inf) return self.__class__(self.a // other.a) def __mod__(self, other): return self.__class__(self.a % other.a) # perf results class PerfResult(co.namedtuple('PerfResult', [ 'z', 'file', 'function', 'line', 'cycles', 'bmisses', 'branches', 'cmisses', 'caches', 'children'])): _prefix = 'perf' _by = ['z', 'file', 'function', 'line'] _fields = ['cycles', 'bmisses', 'branches', 'cmisses', 'caches'] _sort = ['cycles', 'bmisses', 'cmisses', 'branches', 'caches'] _types = { 'cycles': CsvInt, 'bmisses': CsvInt, 'branches': CsvInt, 'cmisses': CsvInt, 'caches': CsvInt} _children = 'children' __slots__ = () def __new__(cls, z=0, file='', function='', line=0, cycles=0, bmisses=0, branches=0, cmisses=0, caches=0, children=None): return super().__new__(cls, z, file, function, int(CsvInt(line)), CsvInt(cycles), CsvInt(bmisses), CsvInt(branches), CsvInt(cmisses), CsvInt(caches), children if children is not None else []) def __add__(self, other): return PerfResult(self.z, self.file, self.function, self.line, self.cycles + other.cycles, self.bmisses + other.bmisses, self.branches + other.branches, self.cmisses + other.cmisses, self.caches + other.caches, self.children + other.children) # open with '-' for stdin/stdout def openio(path, mode='r', buffering=-1): import os if path == '-': if 'r' in mode: return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering) else: return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering) else: return open(path, mode, buffering) # run perf as a subprocess, storing measurements into a zip file def record(command, *, output=None, perf_freq=PERF_FREQ, perf_period=None, perf_events=PERF_EVENTS, perf_path=PERF_PATH, **args): # create a temporary file for perf to write to, as far as I can tell # this is strictly needed because perf's pipe-mode only works with stdout with tempfile.NamedTemporaryFile('rb') as f: # figure out our perf invocation perf = perf_path + list(filter(None, [ 'record', '-F%s' % perf_freq if perf_freq is not None and perf_period is None else None, '-c%s' % perf_period if perf_period is not None else None, '-B', '-g', '--all-user', '-e%s' % perf_events, '-o%s' % f.name])) # run our command try: if args.get('verbose'): print(' '.join(shlex.quote(c) for c in perf + command)) err = sp.call(perf + command, close_fds=False) except KeyboardInterrupt: err = errno.EOWNERDEAD # synchronize access z = os.open(output, os.O_RDWR | os.O_CREAT) fcntl.flock(z, fcntl.LOCK_EX) # copy measurements into our zip file with os.fdopen(z, 'r+b') as z: with zipfile.ZipFile(z, 'a', compression=zipfile.ZIP_DEFLATED, compresslevel=1) as z: with z.open('perf.%d' % os.getpid(), 'w') as g: shutil.copyfileobj(f, g) # forward the return code return err # try to only process each dso once # # note this only caches with the non-keyword arguments def multiprocessing_cache(f): local_cache = {} manager = mp.Manager() global_cache = manager.dict() lock = mp.Lock() def multiprocessing_cache(*args, **kwargs): # check local cache? if args in local_cache: return local_cache[args] # check global cache? with lock: if args in global_cache: v = global_cache[args] local_cache[args] = v return v # fall back to calling the function v = f(*args, **kwargs) global_cache[args] = v local_cache[args] = v return v return multiprocessing_cache class Sym(co.namedtuple('Sym', [ 'name', 'global_', 'section', 'addr', 'size'])): __slots__ = () def __new__(cls, name, global_, section, addr, size): return super().__new__(cls, name, global_, section, addr, size) def __repr__(self): return '%s(%r, %r, %r, 0x%x, 0x%x)' % ( self.__class__.__name__, self.name, self.global_, self.section, self.addr, self.size) class SymInfo: def __init__(self, syms): self.syms = syms def get(self, k, d=None): # allow lookup by both symbol and address if isinstance(k, str): # organize by symbol, note multiple symbols can share a name if not hasattr(self, '_by_sym'): by_sym = {} for sym in self.syms: if sym.name not in by_sym: by_sym[sym.name] = [] if sym not in by_sym[sym.name]: by_sym[sym.name].append(sym) self._by_sym = by_sym return self._by_sym.get(k, d) else: import bisect # organize by address if not hasattr(self, '_by_addr'): # sort and keep largest/first when duplicates syms = self.syms.copy() syms.sort(key=lambda x: (x.addr, -x.size)) by_addr = [] for sym in syms: if (len(by_addr) == 0 or by_addr[-1].addr != sym.addr): by_addr.append(sym) self._by_addr = by_addr # find sym by range i = bisect.bisect(self._by_addr, k, key=lambda x: x.addr) - 1 # check that we're actually in this sym's size if i > -1 and k < self._by_addr[i].addr+self._by_addr[i].size: return self._by_addr[i] else: return d def __getitem__(self, k): v = self.get(k) if v is None: raise KeyError(k) return v def __contains__(self, k): return self.get(k) is not None def __bool__(self): return bool(self.syms) def __len__(self): return len(self.syms) def __iter__(self): return iter(self.syms) def globals(self): return SymInfo([sym for sym in self.syms if sym.global_]) def section(self, section): return SymInfo([sym for sym in self.syms # note we accept prefixes if s.startswith(section)]) @multiprocessing_cache def collect_syms(obj_path, global_=False, sections=None, *, objdump_path=OBJDUMP_PATH, **args): symbol_pattern = re.compile( '^(?P[0-9a-fA-F]+)' ' (?P.).*' '\s+(?P
[^\s]+)' '\s+(?P[0-9a-fA-F]+)' '\s+(?P[^\s]+)\s*$') # find symbol addresses and sizes syms = [] cmd = objdump_path + ['--syms', obj_path] if args.get('verbose'): print(' '.join(shlex.quote(c) for c in cmd)) proc = sp.Popen(cmd, stdout=sp.PIPE, universal_newlines=True, errors='replace', close_fds=False) for line in proc.stdout: m = symbol_pattern.match(line) if m: name = m.group('name') scope = m.group('scope') section = m.group('section') addr = int(m.group('addr'), 16) size = int(m.group('size'), 16) # skip non-globals? # l => local # g => global # u => unique global # => neither # ! => local + global global__ = scope not in 'l ' if global_ and not global__: continue # filter by section? note we accept prefixes if (sections is not None and not any(section.startswith(prefix) for prefix in sections)): continue # skip zero sized symbols if not size: continue # note multiple symbols can share a name syms.append(Sym(name, global__, section, addr, size)) proc.wait() if proc.returncode != 0: raise sp.CalledProcessError(proc.returncode, proc.args) return SymInfo(syms) class Line(co.namedtuple('Line', ['file', 'line', 'addr'])): __slots__ = () def __new__(cls, file, line, addr): return super().__new__(cls, file, line, addr) def __repr__(self): return '%s(%r, %r, 0x%x)' % ( self.__class__.__name__, self.file, self.line, self.addr) class LineInfo: def __init__(self, lines): self.lines = lines def get(self, k, d=None): # allow lookup by both address and file+line tuple if not isinstance(k, tuple): import bisect # organize by address if not hasattr(self, '_by_addr'): # sort and keep first when duplicates lines = self.lines.copy() lines.sort(key=lambda x: (x.addr, x.file, x.line)) by_addr = [] for line in lines: if (len(by_addr) == 0 or by_addr[-1].addr != line.addr): by_addr.append(line) self._by_addr = by_addr # find file+line by addr i = bisect.bisect(self._by_addr, k, key=lambda x: x.addr) - 1 if i > -1: return self._by_addr[i] else: return d else: import bisect # organize by file+line if not hasattr(self, '_by_line'): # sort and keep first when duplicates lines = self.lines.copy() lines.sort() by_line = [] for line in lines: if (len(by_line) == 0 or by_line[-1].file != line.file or by_line[-1].line != line.line): by_line.append(line) self._by_line = by_line # find addr by file+line tuple i = bisect.bisect(self._by_line, k, key=lambda x: (x.file, x.line)) - 1 # make sure file at least matches! if i > -1 and self._by_line[i].file == k[0]: return self._by_line[i] else: return d def __getitem__(self, k): v = self.get(k) if v is None: raise KeyError(k) return v def __contains__(self, k): return self.get(k) is not None def __bool__(self): return bool(self.lines) def __len__(self): return len(self.lines) def __iter__(self): return iter(self.lines) @multiprocessing_cache def collect_dwarf_lines(obj_path, *, objdump_path=OBJDUMP_PATH, **args): line_pattern = re.compile( # matches dir/file table '^\s*(?P[0-9]+)' '(?:\s+(?P[0-9]+))?' '.*\s+(?P[^\s]+)\s*$' # matches line opcodes '|' '^\s*\[[^\]]*\]' '(?:' '\s+(?PSpecial)' '|' '\s+(?PCopy)' '|' '\s+(?PEnd of Sequence)' '|' '\s+File.*?to.*?(?P[0-9]+)' '|' '\s+Line.*?to.*?(?P[0-9]+)' '|' '\s+(?:Address|PC)' '\s+.*?to.*?(?P[0xX0-9a-fA-F]+)' '|' '\s+[^\s]+' ')+\s*$', re.IGNORECASE) # state machine for dwarf line numbers, note that objdump's # decodedline seems to have issues with multiple dir/file # tables, which is why we need this lines = [] dirs = co.OrderedDict() files = co.OrderedDict() op_file = 1 op_line = 1 op_addr = 0 cmd = objdump_path + ['--dwarf=rawline', obj_path] if args.get('verbose'): print(' '.join(shlex.quote(c) for c in cmd)) proc = sp.Popen(cmd, stdout=sp.PIPE, universal_newlines=True, errors='replace', close_fds=False) for line in proc.stdout: m = line_pattern.match(line) if m: if m.group('no') and not m.group('dir'): # found a directory entry dirs[int(m.group('no'))] = m.group('path') elif m.group('no'): # found a file entry dir = int(m.group('dir')) if dir in dirs: files[int(m.group('no'))] = os.path.join( dirs[dir], m.group('path')) else: files[int(m.group('no'))] = m.group('path') else: # found a state machine update if m.group('op_file'): op_file = int(m.group('op_file'), 0) if m.group('op_line'): op_line = int(m.group('op_line'), 0) if m.group('op_addr'): op_addr = int(m.group('op_addr'), 0) if (m.group('op_special') or m.group('op_copy') or m.group('op_end')): file = os.path.abspath(files.get(op_file, '?')) lines.append(Line(file, op_line, op_addr)) if m.group('op_end'): op_file = 1 op_line = 1 op_addr = 0 proc.wait() if proc.returncode != 0: raise sp.CalledProcessError(proc.returncode, proc.args) return LineInfo(lines) def collect_decompressed(path, *, perf_path=PERF_PATH, sources=None, everything=False, no_strip=False, propagate=0, depth=1, **args): sample_pattern = re.compile( '(?P\w+)' '\s+(?P\w+)' '\s+(?P