#!/usr/bin/env python3 # # Script to aggregate and report Linux perf results. # # Example: # ./scripts/perf.py -R -obench.perf ./runners/bench_runner # ./scripts/perf.py bench.perf -Flfs.c -Flfs_util.c -Scycles # # Copyright (c) 2022, The littlefs authors. # SPDX-License-Identifier: BSD-3-Clause # import bisect import collections as co import csv import errno import fcntl import functools as ft import glob import itertools as it import math as m import multiprocessing as mp import os import re import shlex import shutil import subprocess as sp import tempfile import zipfile # TODO support non-zip perf results? PERF_PATHS = ['*.perf'] PERF_TOOL = ['perf'] PERF_EVENTS = 'cycles,branch-misses,branches,cache-misses,cache-references' PERF_FREQ = 100 OBJDUMP_TOOL = ['objdump'] THRESHOLD = (0.5, 0.85) # integer fields class Int(co.namedtuple('Int', 'x')): __slots__ = () def __new__(cls, x=0): if isinstance(x, Int): return x if isinstance(x, str): try: x = int(x, 0) except ValueError: # also accept +-∞ and +-inf if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x): x = m.inf elif re.match('^\s*-\s*(?:∞|inf)\s*$', x): x = -m.inf else: raise assert isinstance(x, int) or m.isinf(x), x return super().__new__(cls, x) def __str__(self): if self.x == m.inf: return '∞' elif self.x == -m.inf: return '-∞' else: return str(self.x) def __int__(self): assert not m.isinf(self.x) return self.x def __float__(self): return float(self.x) none = '%7s' % '-' def table(self): return '%7s' % (self,) diff_none = '%7s' % '-' diff_table = table def diff_diff(self, other): new = self.x if self else 0 old = other.x if other else 0 diff = new - old if diff == +m.inf: return '%7s' % '+∞' elif diff == -m.inf: return '%7s' % '-∞' else: return '%+7d' % diff def ratio(self, other): new = self.x if self else 0 old = other.x if other else 0 if m.isinf(new) and m.isinf(old): return 0.0 elif m.isinf(new): return +m.inf elif m.isinf(old): return -m.inf elif not old and not new: return 0.0 elif not old: return 1.0 else: return (new-old) / old def __add__(self, other): return self.__class__(self.x + other.x) def __sub__(self, other): return self.__class__(self.x - other.x) def __mul__(self, other): return self.__class__(self.x * other.x) # perf results class PerfResult(co.namedtuple('PerfResult', [ 'file', 'function', 'line', 'cycles', 'bmisses', 'branches', 'cmisses', 'caches', 'children'])): _by = ['file', 'function', 'line'] _fields = ['cycles', 'bmisses', 'branches', 'cmisses', 'caches'] _types = { 'cycles': Int, 'bmisses': Int, 'branches': Int, 'cmisses': Int, 'caches': Int} __slots__ = () def __new__(cls, file='', function='', line=0, cycles=0, bmisses=0, branches=0, cmisses=0, caches=0, children=[]): return super().__new__(cls, file, function, int(Int(line)), Int(cycles), Int(bmisses), Int(branches), Int(cmisses), Int(caches), children) def __add__(self, other): return PerfResult(self.file, self.function, self.line, self.cycles + other.cycles, self.bmisses + other.bmisses, self.branches + other.branches, self.cmisses + other.cmisses, self.caches + other.caches, self.children + other.children) def openio(path, mode='r'): if path == '-': if mode == 'r': return os.fdopen(os.dup(sys.stdin.fileno()), 'r') else: return os.fdopen(os.dup(sys.stdout.fileno()), 'w') else: return open(path, mode) # run perf as a subprocess, storing measurements into a zip file def record(command, *, output=None, perf_freq=PERF_FREQ, perf_period=None, perf_events=PERF_EVENTS, perf_tool=PERF_TOOL, **args): if not command: print('error: no command specified?') sys.exit(-1) if not output: print('error: no output file specified?') sys.exit(-1) # create a temporary file for perf to write to, as far as I can tell # this is strictly needed because perf's pipe-mode only works with stdout with tempfile.NamedTemporaryFile('rb') as f: # figure out our perf invocation perf = perf_tool + list(filter(None, [ 'record', '-F%s' % perf_freq if perf_freq is not None and perf_period is None else None, '-c%s' % perf_period if perf_period is not None else None, '-B', '-g', '--all-user', '-e%s' % perf_events, '-o%s' % f.name])) # run our command try: if args.get('verbose'): print(' '.join(shlex.quote(c) for c in perf + command)) err = sp.call(perf + command, close_fds=False) except KeyboardInterrupt: err = errno.EOWNERDEAD # synchronize access z = os.open(output, os.O_RDWR | os.O_CREAT) fcntl.flock(z, fcntl.LOCK_EX) # copy measurements into our zip file with os.fdopen(z, 'r+b') as z: with zipfile.ZipFile(z, 'a', compression=zipfile.ZIP_DEFLATED, compresslevel=1) as z: with z.open('perf.%d' % os.getpid(), 'w') as g: shutil.copyfileobj(f, g) # forward the return code return err def collect_decompressed(path, *, perf_tool=PERF_TOOL, everything=False, propagate=0, depth=1, **args): sample_pattern = re.compile( '(?P\w+)' '\s+(?P\w+)' '\s+(?P