Added perfbd.py and block device performance sampling in bench-runner

Based loosely on Linux's perf tool, perfbd.py uses trace output with backtraces to aggregate and show the block device usage of all functions in a program, propagating block devices operation cost up the backtrace for each operation. This combined with --trace-period and --trace-freq for sampling/filtering trace events allow the bench-runner to very efficiently record the general cost of block device operations with very little overhead. Adopted this as the default side-effect of make bench, replacing cycle-based performance measurements which are less important for littlefs.
2022-10-13 11:09:26 -05:00
parent 29cbafeb67
commit 3a33c3795b
20 changed files with 2026 additions and 610 deletions
--- a/scripts/perf.py
+++ b/scripts/perf.py
@@ -4,7 +4,7 @@
 #
 # Example:
 # ./scripts/perf.py -R -obench.perf ./runners/bench_runner
-# ./scripts/perf.py bench.perf -Flfs.c -Flfs_util.c -Scycles
+# ./scripts/perf.py bench.perf -j -Flfs.c -Flfs_util.c -Scycles
 #
 # Copyright (c) 2022, The littlefs authors.
 # SPDX-License-Identifier: BSD-3-Clause
@@ -16,7 +16,6 @@ import csv
 import errno
 import fcntl
 import functools as ft
-import glob
 import itertools as it
 import math as m
 import multiprocessing as mp
@@ -31,7 +30,6 @@ import zipfile
 # TODO support non-zip perf results?


-PERF_PATHS = ['*.perf']
 PERF_TOOL = ['perf']
 PERF_EVENTS = 'cycles,branch-misses,branches,cache-misses,cache-references'
 PERF_FREQ = 100
@@ -147,14 +145,14 @@ class PerfResult(co.namedtuple('PerfResult', [
            self.children + other.children)


-def openio(path, mode='r'):
+def openio(path, mode='r', buffering=-1):
    if path == '-':
        if mode == 'r':
-            return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
        else:
-            return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+            return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
    else:
-        return open(path, mode)
+        return open(path, mode, buffering)

 # run perf as a subprocess, storing measurements into a zip file
 def record(command, *,
@@ -164,14 +162,6 @@ def record(command, *,
        perf_events=PERF_EVENTS,
        perf_tool=PERF_TOOL,
        **args):
-    if not command:
-        print('error: no command specified?')
-        sys.exit(-1)
-
-    if not output:
-        print('error: no output file specified?')
-        sys.exit(-1)
-
    # create a temporary file for perf to write to, as far as I can tell
    # this is strictly needed because perf's pipe-mode only works with stdout
    with tempfile.NamedTemporaryFile('rb') as f:
@@ -214,8 +204,187 @@ def record(command, *,
    return err


+# try to only process each dso onceS
+#
+# note this only caches with the non-keyword arguments
+def multiprocessing_cache(f):
+    local_cache = {}
+    manager = mp.Manager()
+    global_cache = manager.dict()
+    lock = mp.Lock()
+
+    def multiprocessing_cache(*args, **kwargs):
+        # check local cache?
+        if args in local_cache:
+            return local_cache[args]
+        # check global cache?
+        with lock:
+            if args in global_cache:
+                v = global_cache[args]
+                local_cache[args] = v
+                return v
+            # fall back to calling the function
+            v = f(*args, **kwargs)
+            global_cache[args] = v
+            local_cache[args] = v
+            return v
+
+    return multiprocessing_cache
+
+@multiprocessing_cache
+def collect_syms_and_lines(obj_path, *,
+        objdump_tool=None,
+        **args):
+    symbol_pattern = re.compile(
+        '^(?P<addr>[0-9a-fA-F]+)'
+            '\s+.*'
+            '\s+(?P<size>[0-9a-fA-F]+)'
+            '\s+(?P<name>[^\s]+)\s*$')
+    line_pattern = re.compile(
+        '^\s+(?:'
+            # matches dir/file table
+            '(?P<no>[0-9]+)'
+                '(?:\s+(?P<dir>[0-9]+))?'
+                '\s+.*'
+                '\s+(?P<path>[^\s]+)'
+            # matches line opcodes
+            '|' '\[[^\]]*\]\s+'
+                '(?:'
+                    '(?P<op_special>Special)'
+                    '|' '(?P<op_copy>Copy)'
+                    '|' '(?P<op_end>End of Sequence)'
+                    '|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
+                    '|' 'Line .*?to (?P<op_line>[0-9]+)'
+                    '|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
+                    '|' '.' ')*'
+            ')$', re.IGNORECASE)
+
+    # figure out symbol addresses and file+line ranges
+    syms = {}
+    sym_at = []
+    cmd = objdump_tool + ['-t', obj_path]
+    if args.get('verbose'):
+        print(' '.join(shlex.quote(c) for c in cmd))
+    proc = sp.Popen(cmd,
+        stdout=sp.PIPE,
+        stderr=sp.PIPE if not args.get('verbose') else None,
+        universal_newlines=True,
+        errors='replace',
+        close_fds=False)
+    for line in proc.stdout:
+        m = symbol_pattern.match(line)
+        if m:
+            name = m.group('name')
+            addr = int(m.group('addr'), 16)
+            size = int(m.group('size'), 16)
+            # ignore zero-sized symbols
+            if not size:
+                continue
+            # note multiple symbols can share a name
+            if name not in syms:
+                syms[name] = set()
+            syms[name].add((addr, size))
+            sym_at.append((addr, name, size))
+    proc.wait()
+    if proc.returncode != 0:
+        if not args.get('verbose'):
+            for line in proc.stderr:
+                sys.stdout.write(line)
+        # assume no debug-info on failure
+        pass
+
+    # sort and keep largest/first when duplicates
+    sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
+    sym_at_ = []
+    for addr, name, size in sym_at:
+        if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
+            sym_at_.append((addr, name, size))
+    sym_at = sym_at_
+
+    # state machine for dwarf line numbers, note that objdump's
+    # decodedline seems to have issues with multiple dir/file
+    # tables, which is why we need this
+    lines = []
+    line_at = []
+    dirs = {}
+    files = {}
+    op_file = 1
+    op_line = 1
+    op_addr = 0
+    cmd = objdump_tool + ['--dwarf=rawline', obj_path]
+    if args.get('verbose'):
+        print(' '.join(shlex.quote(c) for c in cmd))
+    proc = sp.Popen(cmd,
+        stdout=sp.PIPE,
+        stderr=sp.PIPE if not args.get('verbose') else None,
+        universal_newlines=True,
+        errors='replace',
+        close_fds=False)
+    for line in proc.stdout:
+        m = line_pattern.match(line)
+        if m:
+            if m.group('no') and not m.group('dir'):
+                # found a directory entry
+                dirs[int(m.group('no'))] = m.group('path')
+            elif m.group('no'):
+                # found a file entry
+                dir = int(m.group('dir'))
+                if dir in dirs:
+                    files[int(m.group('no'))] = os.path.join(
+                        dirs[dir],
+                        m.group('path'))
+                else:
+                    files[int(m.group('no'))] = m.group('path')
+            else:
+                # found a state machine update
+                if m.group('op_file'):
+                    op_file = int(m.group('op_file'), 0)
+                if m.group('op_line'):
+                    op_line = int(m.group('op_line'), 0)
+                if m.group('op_addr'):
+                    op_addr = int(m.group('op_addr'), 0)
+
+                if (m.group('op_special')
+                        or m.group('op_copy')
+                        or m.group('op_end')):
+                    file = os.path.abspath(files.get(op_file, '?'))
+                    lines.append((file, op_line, op_addr))
+                    line_at.append((op_addr, file, op_line))
+
+                if m.group('op_end'):
+                    op_file = 1
+                    op_line = 1
+                    op_addr = 0
+    proc.wait()
+    if proc.returncode != 0:
+        if not args.get('verbose'):
+            for line in proc.stderr:
+                sys.stdout.write(line)
+        # assume no debug-info on failure
+        pass
+
+    # sort and keep first when duplicates
+    lines.sort()
+    lines_ = []
+    for file, line, addr in lines:
+        if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line:
+            lines_.append((file, line, addr))
+    lines = lines_
+
+    # sort and keep first when duplicates
+    line_at.sort()
+    line_at_ = []
+    for addr, file, line in line_at:
+        if len(line_at_) == 0 or line_at_[-1][0] != addr:
+            line_at_.append((addr, file, line))
+    line_at = line_at_
+
+    return syms, sym_at, lines, line_at
+
+
 def collect_decompressed(path, *,
        perf_tool=PERF_TOOL,
+        sources=None,
        everything=False,
        propagate=0,
        depth=1,
@@ -228,7 +397,7 @@ def collect_decompressed(path, *,
        '\s+(?P<event>[^:]+):')
    frame_pattern = re.compile(
        '\s+(?P<addr>\w+)'
-        '\s+(?P<sym>[^\s]+)'
+        '\s+(?P<sym>[^\s\+]+)(?:\+(?P<off>\w+))?'
        '\s+\((?P<dso>[^\)]+)\)')
    events = {
        'cycles':           'cycles',
@@ -254,6 +423,9 @@ def collect_decompressed(path, *,
    last_event = ''
    last_period = 0
    last_stack = []
+    deltas = co.defaultdict(lambda: {})
+    syms_ = co.defaultdict(lambda: {})
+    at_cache = {}
    results = {}

    def commit():
@@ -276,36 +448,117 @@ def collect_decompressed(path, *,
    for line in proc.stdout:
        # we need to process a lot of data, so wait to use regex as late
        # as possible
-        if not line:
-            continue
        if not line.startswith('\t'):
-            m = sample_pattern.match(line)
-            if m:
-                if last_stack:
-                    commit()
-                last_event = m.group('event')
-                last_filtered = last_event in events
-                last_period = int(m.group('period'), 0)
-                last_stack = []
+            if last_filtered:
+                commit()
+            last_filtered = False
+
+            if line:
+                m = sample_pattern.match(line)
+                if m and m.group('event') in events:
+                    last_filtered = True
+                    last_event = m.group('event')
+                    last_period = int(m.group('period'), 0)
+                    last_stack = []
+
        elif last_filtered:
            m = frame_pattern.match(line)
            if m:
                # filter out internal/kernel functions
                if not everything and (
                        m.group('sym').startswith('__')
-                        or m.group('dso').startswith('/usr/lib')
-                        or not m.group('sym')[:1].isalpha()):
+                        or m.group('sym').startswith('0')
+                        or m.group('sym').startswith('-')
+                        or m.group('sym').startswith('[')
+                        or m.group('dso').startswith('/usr/lib')):
                    continue

-                last_stack.append((
-                    m.group('dso'),
-                    m.group('sym'),
-                    int(m.group('addr'), 16)))
+                dso = m.group('dso')
+                sym = m.group('sym')
+                off = int(m.group('off'), 0) if m.group('off') else 0
+                addr_ = int(m.group('addr'), 16)
+
+                # get the syms/lines for the dso, this is cached
+                syms, sym_at, lines, line_at = collect_syms_and_lines(
+                    dso,
+                    **args)
+
+                # ASLR is tricky, we have symbols+offsets, but static symbols
+                # means we may have multiple options for each symbol.
+                #
+                # To try to solve this, we use previous seen symbols to build
+                # confidence for the correct ASLR delta. This means we may
+                # guess incorrectly for early symbols, but this will only affect
+                # a few samples.
+                if sym in syms:
+                    sym_addr_ = addr_ - off
+
+                    # track possible deltas?
+                    for sym_addr, size in syms[sym]:
+                        delta = sym_addr - sym_addr_
+                        if delta not in deltas[dso]:
+                            deltas[dso][delta] = sum(
+                                abs(a_+delta - a)
+                                for s, (a_, _) in syms_[dso].items()
+                                for a, _ in syms[s])
+                    for delta in deltas[dso].keys():
+                        deltas[dso][delta] += abs(sym_addr_+delta - sym_addr)
+                    syms_[dso][sym] = sym_addr_, size
+
+                    # guess the best delta
+                    delta, _ = min(deltas[dso].items(),
+                        key=lambda x: (x[1], x[0]))
+                    addr = addr_ + delta
+
+                    # cached?
+                    if (dso,addr) in at_cache:
+                        cached = at_cache[(dso,addr)]
+                        if cached is None:
+                            # cache says to skip
+                            continue
+                        file, line = cached
+                    else:
+                        # find file+line
+                        i = bisect.bisect(line_at, addr, key=lambda x: x[0])
+                        if i > 0:
+                            _, file, line = line_at[i-1]
+                        else:
+                            file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
+
+                        # ignore filtered sources
+                        if sources is not None:
+                            if not any(
+                                    os.path.abspath(file) == os.path.abspath(s)
+                                    for s in sources):
+                                at_cache[(dso,addr)] = None
+                                continue
+                        else:
+                            # default to only cwd
+                            if not everything and not os.path.commonpath([
+                                    os.getcwd(),
+                                    os.path.abspath(file)]) == os.getcwd():
+                                at_cache[(dso,addr)] = None
+                                continue
+
+                        # simplify path
+                        if os.path.commonpath([
+                                os.getcwd(),
+                                os.path.abspath(file)]) == os.getcwd():
+                            file = os.path.relpath(file)
+                        else:
+                            file = os.path.abspath(file)
+
+                        at_cache[(dso,addr)] = file, line
+                else:
+                    file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
+
+                last_stack.append((file, sym, line))

                # stop propogating?
                if propagate and len(last_stack) >= propagate:
+                    commit()
                    last_filtered = False
-    if last_stack:
+    if last_filtered:
        commit()

    proc.wait()
@@ -341,35 +594,15 @@ def starapply(args):
    f, args, kwargs = args
    return f(*args, **kwargs)

-def collect(paths, *,
+def collect(perf_paths, *,
        jobs=None,
-        objdump_tool=None,
-        sources=None,
-        everything=False,
        **args):
-    symbol_pattern = re.compile(
-        '^(?P<addr>[0-9a-fA-F]+)\s.*\s(?P<name>[^\s]+)\s*$')
-    line_pattern = re.compile(
-        '^\s+(?:'
-            # matches dir/file table
-            '(?P<no>[0-9]+)\s+'
-                '(?:(?P<dir>[0-9]+)\s+)?'
-                '.*\s+'
-                '(?P<path>[^\s]+)'
-            # matches line opcodes
-            '|' '\[[^\]]*\]\s+'
-                '(?:'
-                    '(?P<op_special>Special)'
-                    '|' '(?P<op_copy>Copy)'
-                    '|' '(?P<op_end>End of Sequence)'
-                    '|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
-                    '|' 'Line .*?to (?P<op_line>[0-9]+)'
-                    '|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
-                    '|' '.' ')*'
-            ')$', re.IGNORECASE)
+    # automatic job detection?
+    if jobs == 0:
+        jobs = len(os.sched_getaffinity(0))

    records = []
-    for path in paths:
+    for path in perf_paths:
        # each .perf file is actually a zip file containing perf files from
        # multiple runs
        with zipfile.ZipFile(path) as z:
@@ -377,225 +610,17 @@ def collect(paths, *,

    # we're dealing with a lot of data but also surprisingly
    # parallelizable
-    dsos = {}
-    results = []
-    with mp.Pool(jobs or len(os.sched_getaffinity(0))) as p:
-        for results_ in p.imap_unordered(
-                starapply,
-                ((collect_job, (path, i), dict(
-                    everything=everything,
-                    **args))
-                    for path, i in records)):
-
-            # organize by dso
-            results__ = {}
-            for r in results_:
-                if r.file not in results__:
-                    results__[r.file] = []
-                results__[r.file].append(r)
-            results_ = results__
-
-            for dso, results_ in results_.items():
-                if dso not in dsos:
-                    # find file+line ranges for dsos
-                    #
-                    # do this here so we only process each dso once
-                    syms = {}
-                    sym_at = []
-                    cmd = objdump_tool + ['-t', dso]
-                    if args.get('verbose'):
-                        print(' '.join(shlex.quote(c) for c in cmd))
-                    proc = sp.Popen(cmd,
-                        stdout=sp.PIPE,
-                        stderr=sp.PIPE if not args.get('verbose') else None,
-                        universal_newlines=True,
-                        errors='replace',
-                        close_fds=False)
-                    for line in proc.stdout:
-                        m = symbol_pattern.match(line)
-                        if m:
-                            name = m.group('name')
-                            addr = int(m.group('addr'), 16)
-                            # note multiple symbols can share a name
-                            if name not in syms:
-                                syms[name] = set()
-                            syms[name].add(addr)
-                            sym_at.append((addr, name))
-                    proc.wait()
-                    if proc.returncode != 0:
-                        if not args.get('verbose'):
-                            for line in proc.stderr:
-                                sys.stdout.write(line)
-                        # assume no debug-info on failure
-                        pass
-
-                    # sort and keep first when duplicates
-                    sym_at.sort()
-                    sym_at_ = []
-                    for addr, name in sym_at:
-                        if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
-                            sym_at_.append((addr, name))
-                    sym_at = sym_at_
-
-                    # state machine for dwarf line numbers, note that objdump's
-                    # decodedline seems to have issues with multiple dir/file
-                    # tables, which is why we need this
-                    line_at = []
-                    dirs = {}
-                    files = {}
-                    op_file = 1
-                    op_line = 1
-                    op_addr = 0
-                    cmd = objdump_tool + ['--dwarf=rawline', dso]
-                    if args.get('verbose'):
-                        print(' '.join(shlex.quote(c) for c in cmd))
-                    proc = sp.Popen(cmd,
-                        stdout=sp.PIPE,
-                        stderr=sp.PIPE if not args.get('verbose') else None,
-                        universal_newlines=True,
-                        errors='replace',
-                        close_fds=False)
-                    for line in proc.stdout:
-                        m = line_pattern.match(line)
-                        if m:
-                            if m.group('no') and not m.group('dir'):
-                                # found a directory entry
-                                dirs[int(m.group('no'))] = m.group('path')
-                            elif m.group('no'):
-                                # found a file entry
-                                dir = int(m.group('dir'))
-                                if dir in dirs:
-                                    files[int(m.group('no'))] = os.path.join(
-                                        dirs[dir],
-                                        m.group('path'))
-                                else:
-                                    files[int(m.group('no'))] = m.group('path')
-                            else:
-                                # found a state machine update
-                                if m.group('op_file'):
-                                    op_file = int(m.group('op_file'), 0)
-                                if m.group('op_line'):
-                                    op_line = int(m.group('op_line'), 0)
-                                if m.group('op_addr'):
-                                    op_addr = int(m.group('op_addr'), 0)
-
-                                if (m.group('op_special')
-                                        or m.group('op_copy')
-                                        or m.group('op_end')):
-                                    line_at.append((
-                                        op_addr,
-                                        files.get(op_file, '?'),
-                                        op_line))
-
-                                if m.group('op_end'):
-                                    op_file = 1
-                                    op_line = 1
-                                    op_addr = 0
-                    proc.wait()
-                    if proc.returncode != 0:
-                        if not args.get('verbose'):
-                            for line in proc.stderr:
-                                sys.stdout.write(line)
-                        # assume no debug-info on failure
-                        pass
-
-                    # sort and keep first when duplicates
-                    #
-                    # I think dwarf requires this to be sorted but just in case
-                    line_at.sort()
-                    line_at_ = []
-                    for addr, file, line in line_at:
-                        if len(line_at_) == 0 or line_at_[-1][0] != addr:
-                            line_at_.append((addr, file, line))
-                    line_at = line_at_
-
-                    # discard lines outside of the range of the containing
-                    # function, these are introduced by dwarf for inlined
-                    # functions but don't map to elf-level symbols
-                    sym_at_ = []
-                    for addr, sym in sym_at:
-                        i = bisect.bisect(line_at, addr, key=lambda x: x[0])
-                        if i > 0:
-                            _, file, line = line_at[i-1]
-                            sym_at_.append((file, line, sym))
-                    sym_at_.sort()
-
-                    line_at_ = []
-                    for addr, file, line in line_at:
-                        # only keep if sym-at-addr and sym-at-line match
-                        i = bisect.bisect(
-                            sym_at, addr, key=lambda x: x[0])
-                        j = bisect.bisect(
-                            sym_at_, (file, line), key=lambda x: (x[0], x[1]))
-                        if i > 0 and j > 0 and (
-                                sym_at[i-1][1] == sym_at_[j-1][2]):
-                            line_at_.append((addr, file, line))
-                    line_at = line_at_
-
-                    dsos[dso] = (syms, sym_at, line_at)
-
-                syms, _, line_at = dsos[dso]
-
-                # first try to reverse ASLR
-                def deltas(r, d):
-                    if '+' in r.function:
-                        sym, off = r.function.split('+', 1)
-                        off = int(off, 0)
-                    else:
-                        sym, off = r.function, 0
-                    addr = r.line - off + d
-
-                    for addr_ in syms.get(sym, []):
-                        yield addr_ - addr
-
-                delta = min(
-                    it.chain.from_iterable(
-                        deltas(r, 0) for r in results_),
-                    key=lambda d: sum(it.chain.from_iterable(
-                        deltas(r, d) for r in results_)),
-                    default=0)
-
-                # then try to map addrs -> file+line
-                #
-                # note we need to do this recursively
-                def remap(results):
-                    results_ = []
-                    for r in results:
-                        addr = r.line + delta
-                        i = bisect.bisect(line_at, addr, key=lambda x: x[0])
-                        if i > 0:
-                            _, file, line = line_at[i-1]
-                        else:
-                            file, line = re.sub('(\.o)?$', '.c', r.file, 1), 0
-
-                        # ignore filtered sources
-                        if sources is not None:
-                            if not any(
-                                    os.path.abspath(file) == os.path.abspath(s)
-                                    for s in sources):
-                                continue
-                        else:
-                            # default to only cwd
-                            if not everything and not os.path.commonpath([
-                                    os.getcwd(),
-                                    os.path.abspath(file)]) == os.getcwd():
-                                continue
-
-                        # simplify path
-                        if os.path.commonpath([
-                                os.getcwd(),
-                                os.path.abspath(file)]) == os.getcwd():
-                            file = os.path.relpath(file)
-                        else:
-                            file = os.path.abspath(file)
-
-                        function, *_ = r.function.split('+', 1)
-                        results_.append(r._replace(
-                            file=file, function=function, line=line,
-                            children=remap(r.children)))
-                    return results_
-
-                results.extend(remap(results_))
+    if jobs is not None:
+        results = []
+        with mp.Pool(jobs) as p:
+            for results_ in p.imap_unordered(
+                    starapply,
+                    ((collect_job, (path, i), args) for path, i in records)):
+                results.extend(results_)
+    else:
+        results = []
+        for path, i in records:
+            results.extend(collect_job(path, i, **args))

    return results

@@ -640,7 +665,7 @@ def fold(Result, results, *,
            Result, r.children,
            by=by,
            defines=defines)))
-    folded = folded_ 
+    folded = folded_

    return folded

@@ -983,7 +1008,6 @@ def report(perf_paths, *,
        fields=None,
        defines=None,
        sort=None,
-        self=False,
        branches=False,
        caches=False,
        **args):
@@ -1001,20 +1025,7 @@ def report(perf_paths, *,

    # find sizes
    if not args.get('use', None):
-        # find .o files
-        paths = []
-        for path in perf_paths:
-            if os.path.isdir(path):
-                path = path + '/*.perf'
-
-            for path in glob.glob(path):
-                paths.append(path)
-
-        if not paths:
-            print("error: no .perf files found in %r?" % perf_paths)
-            sys.exit(-1)
-
-        results = collect(paths, **args)
+        results = collect(perf_paths, **args)
    else:
        results = []
        with openio(args['use']) as f:
@@ -1124,8 +1135,7 @@ if __name__ == "__main__":
    parser.add_argument(
        'perf_paths',
        nargs=nargs,
-        help="Description of where to find *.perf files. May be a directory "
-            "or a list of paths. Defaults to %r." % PERF_PATHS)
+        help="Input *.perf files.")
    parser.add_argument(
        '-v', '--verbose',
        action='store_true',
@@ -1224,7 +1234,7 @@ if __name__ == "__main__":
        nargs='?',
        type=lambda x: tuple(float(x) for x in x.split(',')),
        const=THRESHOLD,
-        help="Show lines wth samples above this threshold as a percent of "
+        help="Show lines with samples above this threshold as a percent of "
            "all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
    parser.add_argument(
        '-c', '--context',
@@ -1295,7 +1305,13 @@ if __name__ == "__main__":

    # perf_paths/command overlap, so need to do some munging here
    args.command = args.perf_paths
-    args.perf_paths = args.perf_paths or PERF_PATHS
+    if args.record:
+        if not args.command:
+            print('error: no command specified?')
+            sys.exit(-1)
+        if not args.output:
+            print('error: no output file specified?')
+            sys.exit(-1)

    sys.exit(main(**{k: v
        for k, v in vars(args).items()