#!/usr/bin/env python3 # # Script to aggregate and report Linux perf results. # # Example: # ./scripts/perf.py -R -obench.perf ./runners/bench_runner # ./scripts/perf.py bench.perf -j -Flfs.c -Flfs_util.c -Scycles # # Copyright (c) 2022, The littlefs authors. # SPDX-License-Identifier: BSD-3-Clause # import bisect import collections as co import csv import errno import fcntl import functools as ft import itertools as it import math as mt import multiprocessing as mp import os import re import shlex import shutil import subprocess as sp import tempfile import zipfile # TODO support non-zip perf results? PERF_PATH = ['perf'] PERF_EVENTS = 'cycles,branch-misses,branches,cache-misses,cache-references' PERF_FREQ = 100 OBJDUMP_PATH = ['objdump'] THRESHOLD = (0.5, 0.85) # integer fields class RInt(co.namedtuple('RInt', 'x')): __slots__ = () def __new__(cls, x=0): if isinstance(x, RInt): return x if isinstance(x, str): try: x = int(x, 0) except ValueError: # also accept +-∞ and +-inf if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x): x = mt.inf elif re.match('^\s*-\s*(?:∞|inf)\s*$', x): x = -mt.inf else: raise assert isinstance(x, int) or mt.isinf(x), x return super().__new__(cls, x) def __str__(self): if self.x == mt.inf: return '∞' elif self.x == -mt.inf: return '-∞' else: return str(self.x) def __int__(self): assert not mt.isinf(self.x) return self.x def __float__(self): return float(self.x) none = '%7s' % '-' def table(self): return '%7s' % (self,) def diff(self, other): new = self.x if self else 0 old = other.x if other else 0 diff = new - old if diff == +mt.inf: return '%7s' % '+∞' elif diff == -mt.inf: return '%7s' % '-∞' else: return '%+7d' % diff def ratio(self, other): new = self.x if self else 0 old = other.x if other else 0 if mt.isinf(new) and mt.isinf(old): return 0.0 elif mt.isinf(new): return +mt.inf elif mt.isinf(old): return -mt.inf elif not old and not new: return 0.0 elif not old: return +mt.inf else: return (new-old) / old def __add__(self, other): return self.__class__(self.x + other.x) def __sub__(self, other): return self.__class__(self.x - other.x) def __mul__(self, other): return self.__class__(self.x * other.x) # perf results class PerfResult(co.namedtuple('PerfResult', [ 'file', 'function', 'line', 'cycles', 'bmisses', 'branches', 'cmisses', 'caches', 'children'])): _by = ['file', 'function', 'line'] _fields = ['cycles', 'bmisses', 'branches', 'cmisses', 'caches'] _sort = ['cycles', 'bmisses', 'cmisses', 'branches', 'caches'] _types = { 'cycles': RInt, 'bmisses': RInt, 'branches': RInt, 'cmisses': RInt, 'caches': RInt} __slots__ = () def __new__(cls, file='', function='', line=0, cycles=0, bmisses=0, branches=0, cmisses=0, caches=0, children=[]): return super().__new__(cls, file, function, int(RInt(line)), RInt(cycles), RInt(bmisses), RInt(branches), RInt(cmisses), RInt(caches), children) def __add__(self, other): return PerfResult(self.file, self.function, self.line, self.cycles + other.cycles, self.bmisses + other.bmisses, self.branches + other.branches, self.cmisses + other.cmisses, self.caches + other.caches, self.children + other.children) def openio(path, mode='r', buffering=-1): # allow '-' for stdin/stdout if path == '-': if 'r' in mode: return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering) else: return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering) else: return open(path, mode, buffering) # run perf as a subprocess, storing measurements into a zip file def record(command, *, output=None, perf_freq=PERF_FREQ, perf_period=None, perf_events=PERF_EVENTS, perf_path=PERF_PATH, **args): # create a temporary file for perf to write to, as far as I can tell # this is strictly needed because perf's pipe-mode only works with stdout with tempfile.NamedTemporaryFile('rb') as f: # figure out our perf invocation perf = perf_path + list(filter(None, [ 'record', '-F%s' % perf_freq if perf_freq is not None and perf_period is None else None, '-c%s' % perf_period if perf_period is not None else None, '-B', '-g', '--all-user', '-e%s' % perf_events, '-o%s' % f.name])) # run our command try: if args.get('verbose'): print(' '.join(shlex.quote(c) for c in perf + command)) err = sp.call(perf + command, close_fds=False) except KeyboardInterrupt: err = errno.EOWNERDEAD # synchronize access z = os.open(output, os.O_RDWR | os.O_CREAT) fcntl.flock(z, fcntl.LOCK_EX) # copy measurements into our zip file with os.fdopen(z, 'r+b') as z: with zipfile.ZipFile(z, 'a', compression=zipfile.ZIP_DEFLATED, compresslevel=1) as z: with z.open('perf.%d' % os.getpid(), 'w') as g: shutil.copyfileobj(f, g) # forward the return code return err # try to only process each dso onceS # # note this only caches with the non-keyword arguments def multiprocessing_cache(f): local_cache = {} manager = mp.Manager() global_cache = manager.dict() lock = mp.Lock() def multiprocessing_cache(*args, **kwargs): # check local cache? if args in local_cache: return local_cache[args] # check global cache? with lock: if args in global_cache: v = global_cache[args] local_cache[args] = v return v # fall back to calling the function v = f(*args, **kwargs) global_cache[args] = v local_cache[args] = v return v return multiprocessing_cache @multiprocessing_cache def collect_syms_and_lines(obj_path, *, objdump_path=None, **args): symbol_pattern = re.compile( '^(?P[0-9a-fA-F]+)' '\s+.*' '\s+(?P[0-9a-fA-F]+)' '\s+(?P[^\s]+)\s*$') line_pattern = re.compile( '^\s+(?:' # matches dir/file table '(?P[0-9]+)' '(?:\s+(?P[0-9]+))?' '\s+.*' '\s+(?P[^\s]+)' # matches line opcodes '|' '\[[^\]]*\]\s+' '(?:' '(?PSpecial)' '|' '(?PCopy)' '|' '(?PEnd of Sequence)' '|' 'File .*?to (?:entry )?(?P\d+)' '|' 'Line .*?to (?P[0-9]+)' '|' '(?:Address|PC) .*?to (?P[0x0-9a-fA-F]+)' '|' '.' ')*' ')$', re.IGNORECASE) # figure out symbol addresses and file+line ranges syms = {} sym_at = [] cmd = objdump_path + ['-t', obj_path] if args.get('verbose'): print(' '.join(shlex.quote(c) for c in cmd)) proc = sp.Popen(cmd, stdout=sp.PIPE, stderr=None if args.get('verbose') else sp.DEVNULL, universal_newlines=True, errors='replace', close_fds=False) for line in proc.stdout: m = symbol_pattern.match(line) if m: name = m.group('name') addr = int(m.group('addr'), 16) size = int(m.group('size'), 16) # ignore zero-sized symbols if not size: continue # note multiple symbols can share a name if name not in syms: syms[name] = set() syms[name].add((addr, size)) sym_at.append((addr, name, size)) proc.wait() if proc.returncode != 0: if not args.get('verbose'): for line in proc.stderr: sys.stderr.write(line) # assume no debug-info on failure pass # sort and keep largest/first when duplicates sym_at.sort(key=lambda x: (x[0], -x[2], x[1])) sym_at_ = [] for addr, name, size in sym_at: if len(sym_at_) == 0 or sym_at_[-1][0] != addr: sym_at_.append((addr, name, size)) sym_at = sym_at_ # state machine for dwarf line numbers, note that objdump's # decodedline seems to have issues with multiple dir/file # tables, which is why we need this lines = [] line_at = [] dirs = {} files = {} op_file = 1 op_line = 1 op_addr = 0 cmd = objdump_path + ['--dwarf=rawline', obj_path] if args.get('verbose'): print(' '.join(shlex.quote(c) for c in cmd)) proc = sp.Popen(cmd, stdout=sp.PIPE, stderr=None if args.get('verbose') else sp.DEVNULL, universal_newlines=True, errors='replace', close_fds=False) for line in proc.stdout: m = line_pattern.match(line) if m: if m.group('no') and not m.group('dir'): # found a directory entry dirs[int(m.group('no'))] = m.group('path') elif m.group('no'): # found a file entry dir = int(m.group('dir')) if dir in dirs: files[int(m.group('no'))] = os.path.join( dirs[dir], m.group('path')) else: files[int(m.group('no'))] = m.group('path') else: # found a state machine update if m.group('op_file'): op_file = int(m.group('op_file'), 0) if m.group('op_line'): op_line = int(m.group('op_line'), 0) if m.group('op_addr'): op_addr = int(m.group('op_addr'), 0) if (m.group('op_special') or m.group('op_copy') or m.group('op_end')): file = os.path.abspath(files.get(op_file, '?')) lines.append((file, op_line, op_addr)) line_at.append((op_addr, file, op_line)) if m.group('op_end'): op_file = 1 op_line = 1 op_addr = 0 proc.wait() if proc.returncode != 0: if not args.get('verbose'): for line in proc.stderr: sys.stderr.write(line) # assume no debug-info on failure pass # sort and keep first when duplicates lines.sort() lines_ = [] for file, line, addr in lines: if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line: lines_.append((file, line, addr)) lines = lines_ # sort and keep first when duplicates line_at.sort() line_at_ = [] for addr, file, line in line_at: if len(line_at_) == 0 or line_at_[-1][0] != addr: line_at_.append((addr, file, line)) line_at = line_at_ return syms, sym_at, lines, line_at def collect_decompressed(path, *, perf_path=PERF_PATH, sources=None, everything=False, propagate=0, depth=1, **args): sample_pattern = re.compile( '(?P\w+)' '\s+(?P\w+)' '\s+(?P