forked from Imagelibrary/littlefs
Added perfbd.py and block device performance sampling in bench-runner
Based loosely on Linux's perf tool, perfbd.py uses trace output with backtraces to aggregate and show the block device usage of all functions in a program, propagating block devices operation cost up the backtrace for each operation. This combined with --trace-period and --trace-freq for sampling/filtering trace events allow the bench-runner to very efficiently record the general cost of block device operations with very little overhead. Adopted this as the default side-effect of make bench, replacing cycle-based performance measurements which are less important for littlefs.
This commit is contained in:
610
scripts/perf.py
610
scripts/perf.py
@@ -4,7 +4,7 @@
|
||||
#
|
||||
# Example:
|
||||
# ./scripts/perf.py -R -obench.perf ./runners/bench_runner
|
||||
# ./scripts/perf.py bench.perf -Flfs.c -Flfs_util.c -Scycles
|
||||
# ./scripts/perf.py bench.perf -j -Flfs.c -Flfs_util.c -Scycles
|
||||
#
|
||||
# Copyright (c) 2022, The littlefs authors.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
@@ -16,7 +16,6 @@ import csv
|
||||
import errno
|
||||
import fcntl
|
||||
import functools as ft
|
||||
import glob
|
||||
import itertools as it
|
||||
import math as m
|
||||
import multiprocessing as mp
|
||||
@@ -31,7 +30,6 @@ import zipfile
|
||||
# TODO support non-zip perf results?
|
||||
|
||||
|
||||
PERF_PATHS = ['*.perf']
|
||||
PERF_TOOL = ['perf']
|
||||
PERF_EVENTS = 'cycles,branch-misses,branches,cache-misses,cache-references'
|
||||
PERF_FREQ = 100
|
||||
@@ -147,14 +145,14 @@ class PerfResult(co.namedtuple('PerfResult', [
|
||||
self.children + other.children)
|
||||
|
||||
|
||||
def openio(path, mode='r'):
|
||||
def openio(path, mode='r', buffering=-1):
|
||||
if path == '-':
|
||||
if mode == 'r':
|
||||
return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
|
||||
return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
|
||||
else:
|
||||
return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
|
||||
return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
|
||||
else:
|
||||
return open(path, mode)
|
||||
return open(path, mode, buffering)
|
||||
|
||||
# run perf as a subprocess, storing measurements into a zip file
|
||||
def record(command, *,
|
||||
@@ -164,14 +162,6 @@ def record(command, *,
|
||||
perf_events=PERF_EVENTS,
|
||||
perf_tool=PERF_TOOL,
|
||||
**args):
|
||||
if not command:
|
||||
print('error: no command specified?')
|
||||
sys.exit(-1)
|
||||
|
||||
if not output:
|
||||
print('error: no output file specified?')
|
||||
sys.exit(-1)
|
||||
|
||||
# create a temporary file for perf to write to, as far as I can tell
|
||||
# this is strictly needed because perf's pipe-mode only works with stdout
|
||||
with tempfile.NamedTemporaryFile('rb') as f:
|
||||
@@ -214,8 +204,187 @@ def record(command, *,
|
||||
return err
|
||||
|
||||
|
||||
# try to only process each dso onceS
|
||||
#
|
||||
# note this only caches with the non-keyword arguments
|
||||
def multiprocessing_cache(f):
|
||||
local_cache = {}
|
||||
manager = mp.Manager()
|
||||
global_cache = manager.dict()
|
||||
lock = mp.Lock()
|
||||
|
||||
def multiprocessing_cache(*args, **kwargs):
|
||||
# check local cache?
|
||||
if args in local_cache:
|
||||
return local_cache[args]
|
||||
# check global cache?
|
||||
with lock:
|
||||
if args in global_cache:
|
||||
v = global_cache[args]
|
||||
local_cache[args] = v
|
||||
return v
|
||||
# fall back to calling the function
|
||||
v = f(*args, **kwargs)
|
||||
global_cache[args] = v
|
||||
local_cache[args] = v
|
||||
return v
|
||||
|
||||
return multiprocessing_cache
|
||||
|
||||
@multiprocessing_cache
|
||||
def collect_syms_and_lines(obj_path, *,
|
||||
objdump_tool=None,
|
||||
**args):
|
||||
symbol_pattern = re.compile(
|
||||
'^(?P<addr>[0-9a-fA-F]+)'
|
||||
'\s+.*'
|
||||
'\s+(?P<size>[0-9a-fA-F]+)'
|
||||
'\s+(?P<name>[^\s]+)\s*$')
|
||||
line_pattern = re.compile(
|
||||
'^\s+(?:'
|
||||
# matches dir/file table
|
||||
'(?P<no>[0-9]+)'
|
||||
'(?:\s+(?P<dir>[0-9]+))?'
|
||||
'\s+.*'
|
||||
'\s+(?P<path>[^\s]+)'
|
||||
# matches line opcodes
|
||||
'|' '\[[^\]]*\]\s+'
|
||||
'(?:'
|
||||
'(?P<op_special>Special)'
|
||||
'|' '(?P<op_copy>Copy)'
|
||||
'|' '(?P<op_end>End of Sequence)'
|
||||
'|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
|
||||
'|' 'Line .*?to (?P<op_line>[0-9]+)'
|
||||
'|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
|
||||
'|' '.' ')*'
|
||||
')$', re.IGNORECASE)
|
||||
|
||||
# figure out symbol addresses and file+line ranges
|
||||
syms = {}
|
||||
sym_at = []
|
||||
cmd = objdump_tool + ['-t', obj_path]
|
||||
if args.get('verbose'):
|
||||
print(' '.join(shlex.quote(c) for c in cmd))
|
||||
proc = sp.Popen(cmd,
|
||||
stdout=sp.PIPE,
|
||||
stderr=sp.PIPE if not args.get('verbose') else None,
|
||||
universal_newlines=True,
|
||||
errors='replace',
|
||||
close_fds=False)
|
||||
for line in proc.stdout:
|
||||
m = symbol_pattern.match(line)
|
||||
if m:
|
||||
name = m.group('name')
|
||||
addr = int(m.group('addr'), 16)
|
||||
size = int(m.group('size'), 16)
|
||||
# ignore zero-sized symbols
|
||||
if not size:
|
||||
continue
|
||||
# note multiple symbols can share a name
|
||||
if name not in syms:
|
||||
syms[name] = set()
|
||||
syms[name].add((addr, size))
|
||||
sym_at.append((addr, name, size))
|
||||
proc.wait()
|
||||
if proc.returncode != 0:
|
||||
if not args.get('verbose'):
|
||||
for line in proc.stderr:
|
||||
sys.stdout.write(line)
|
||||
# assume no debug-info on failure
|
||||
pass
|
||||
|
||||
# sort and keep largest/first when duplicates
|
||||
sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
|
||||
sym_at_ = []
|
||||
for addr, name, size in sym_at:
|
||||
if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
|
||||
sym_at_.append((addr, name, size))
|
||||
sym_at = sym_at_
|
||||
|
||||
# state machine for dwarf line numbers, note that objdump's
|
||||
# decodedline seems to have issues with multiple dir/file
|
||||
# tables, which is why we need this
|
||||
lines = []
|
||||
line_at = []
|
||||
dirs = {}
|
||||
files = {}
|
||||
op_file = 1
|
||||
op_line = 1
|
||||
op_addr = 0
|
||||
cmd = objdump_tool + ['--dwarf=rawline', obj_path]
|
||||
if args.get('verbose'):
|
||||
print(' '.join(shlex.quote(c) for c in cmd))
|
||||
proc = sp.Popen(cmd,
|
||||
stdout=sp.PIPE,
|
||||
stderr=sp.PIPE if not args.get('verbose') else None,
|
||||
universal_newlines=True,
|
||||
errors='replace',
|
||||
close_fds=False)
|
||||
for line in proc.stdout:
|
||||
m = line_pattern.match(line)
|
||||
if m:
|
||||
if m.group('no') and not m.group('dir'):
|
||||
# found a directory entry
|
||||
dirs[int(m.group('no'))] = m.group('path')
|
||||
elif m.group('no'):
|
||||
# found a file entry
|
||||
dir = int(m.group('dir'))
|
||||
if dir in dirs:
|
||||
files[int(m.group('no'))] = os.path.join(
|
||||
dirs[dir],
|
||||
m.group('path'))
|
||||
else:
|
||||
files[int(m.group('no'))] = m.group('path')
|
||||
else:
|
||||
# found a state machine update
|
||||
if m.group('op_file'):
|
||||
op_file = int(m.group('op_file'), 0)
|
||||
if m.group('op_line'):
|
||||
op_line = int(m.group('op_line'), 0)
|
||||
if m.group('op_addr'):
|
||||
op_addr = int(m.group('op_addr'), 0)
|
||||
|
||||
if (m.group('op_special')
|
||||
or m.group('op_copy')
|
||||
or m.group('op_end')):
|
||||
file = os.path.abspath(files.get(op_file, '?'))
|
||||
lines.append((file, op_line, op_addr))
|
||||
line_at.append((op_addr, file, op_line))
|
||||
|
||||
if m.group('op_end'):
|
||||
op_file = 1
|
||||
op_line = 1
|
||||
op_addr = 0
|
||||
proc.wait()
|
||||
if proc.returncode != 0:
|
||||
if not args.get('verbose'):
|
||||
for line in proc.stderr:
|
||||
sys.stdout.write(line)
|
||||
# assume no debug-info on failure
|
||||
pass
|
||||
|
||||
# sort and keep first when duplicates
|
||||
lines.sort()
|
||||
lines_ = []
|
||||
for file, line, addr in lines:
|
||||
if len(lines_) == 0 or lines_[-1][0] != file or lines[-1][1] != line:
|
||||
lines_.append((file, line, addr))
|
||||
lines = lines_
|
||||
|
||||
# sort and keep first when duplicates
|
||||
line_at.sort()
|
||||
line_at_ = []
|
||||
for addr, file, line in line_at:
|
||||
if len(line_at_) == 0 or line_at_[-1][0] != addr:
|
||||
line_at_.append((addr, file, line))
|
||||
line_at = line_at_
|
||||
|
||||
return syms, sym_at, lines, line_at
|
||||
|
||||
|
||||
def collect_decompressed(path, *,
|
||||
perf_tool=PERF_TOOL,
|
||||
sources=None,
|
||||
everything=False,
|
||||
propagate=0,
|
||||
depth=1,
|
||||
@@ -228,7 +397,7 @@ def collect_decompressed(path, *,
|
||||
'\s+(?P<event>[^:]+):')
|
||||
frame_pattern = re.compile(
|
||||
'\s+(?P<addr>\w+)'
|
||||
'\s+(?P<sym>[^\s]+)'
|
||||
'\s+(?P<sym>[^\s\+]+)(?:\+(?P<off>\w+))?'
|
||||
'\s+\((?P<dso>[^\)]+)\)')
|
||||
events = {
|
||||
'cycles': 'cycles',
|
||||
@@ -254,6 +423,9 @@ def collect_decompressed(path, *,
|
||||
last_event = ''
|
||||
last_period = 0
|
||||
last_stack = []
|
||||
deltas = co.defaultdict(lambda: {})
|
||||
syms_ = co.defaultdict(lambda: {})
|
||||
at_cache = {}
|
||||
results = {}
|
||||
|
||||
def commit():
|
||||
@@ -276,36 +448,117 @@ def collect_decompressed(path, *,
|
||||
for line in proc.stdout:
|
||||
# we need to process a lot of data, so wait to use regex as late
|
||||
# as possible
|
||||
if not line:
|
||||
continue
|
||||
if not line.startswith('\t'):
|
||||
m = sample_pattern.match(line)
|
||||
if m:
|
||||
if last_stack:
|
||||
commit()
|
||||
last_event = m.group('event')
|
||||
last_filtered = last_event in events
|
||||
last_period = int(m.group('period'), 0)
|
||||
last_stack = []
|
||||
if last_filtered:
|
||||
commit()
|
||||
last_filtered = False
|
||||
|
||||
if line:
|
||||
m = sample_pattern.match(line)
|
||||
if m and m.group('event') in events:
|
||||
last_filtered = True
|
||||
last_event = m.group('event')
|
||||
last_period = int(m.group('period'), 0)
|
||||
last_stack = []
|
||||
|
||||
elif last_filtered:
|
||||
m = frame_pattern.match(line)
|
||||
if m:
|
||||
# filter out internal/kernel functions
|
||||
if not everything and (
|
||||
m.group('sym').startswith('__')
|
||||
or m.group('dso').startswith('/usr/lib')
|
||||
or not m.group('sym')[:1].isalpha()):
|
||||
or m.group('sym').startswith('0')
|
||||
or m.group('sym').startswith('-')
|
||||
or m.group('sym').startswith('[')
|
||||
or m.group('dso').startswith('/usr/lib')):
|
||||
continue
|
||||
|
||||
last_stack.append((
|
||||
m.group('dso'),
|
||||
m.group('sym'),
|
||||
int(m.group('addr'), 16)))
|
||||
dso = m.group('dso')
|
||||
sym = m.group('sym')
|
||||
off = int(m.group('off'), 0) if m.group('off') else 0
|
||||
addr_ = int(m.group('addr'), 16)
|
||||
|
||||
# get the syms/lines for the dso, this is cached
|
||||
syms, sym_at, lines, line_at = collect_syms_and_lines(
|
||||
dso,
|
||||
**args)
|
||||
|
||||
# ASLR is tricky, we have symbols+offsets, but static symbols
|
||||
# means we may have multiple options for each symbol.
|
||||
#
|
||||
# To try to solve this, we use previous seen symbols to build
|
||||
# confidence for the correct ASLR delta. This means we may
|
||||
# guess incorrectly for early symbols, but this will only affect
|
||||
# a few samples.
|
||||
if sym in syms:
|
||||
sym_addr_ = addr_ - off
|
||||
|
||||
# track possible deltas?
|
||||
for sym_addr, size in syms[sym]:
|
||||
delta = sym_addr - sym_addr_
|
||||
if delta not in deltas[dso]:
|
||||
deltas[dso][delta] = sum(
|
||||
abs(a_+delta - a)
|
||||
for s, (a_, _) in syms_[dso].items()
|
||||
for a, _ in syms[s])
|
||||
for delta in deltas[dso].keys():
|
||||
deltas[dso][delta] += abs(sym_addr_+delta - sym_addr)
|
||||
syms_[dso][sym] = sym_addr_, size
|
||||
|
||||
# guess the best delta
|
||||
delta, _ = min(deltas[dso].items(),
|
||||
key=lambda x: (x[1], x[0]))
|
||||
addr = addr_ + delta
|
||||
|
||||
# cached?
|
||||
if (dso,addr) in at_cache:
|
||||
cached = at_cache[(dso,addr)]
|
||||
if cached is None:
|
||||
# cache says to skip
|
||||
continue
|
||||
file, line = cached
|
||||
else:
|
||||
# find file+line
|
||||
i = bisect.bisect(line_at, addr, key=lambda x: x[0])
|
||||
if i > 0:
|
||||
_, file, line = line_at[i-1]
|
||||
else:
|
||||
file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
|
||||
|
||||
# ignore filtered sources
|
||||
if sources is not None:
|
||||
if not any(
|
||||
os.path.abspath(file) == os.path.abspath(s)
|
||||
for s in sources):
|
||||
at_cache[(dso,addr)] = None
|
||||
continue
|
||||
else:
|
||||
# default to only cwd
|
||||
if not everything and not os.path.commonpath([
|
||||
os.getcwd(),
|
||||
os.path.abspath(file)]) == os.getcwd():
|
||||
at_cache[(dso,addr)] = None
|
||||
continue
|
||||
|
||||
# simplify path
|
||||
if os.path.commonpath([
|
||||
os.getcwd(),
|
||||
os.path.abspath(file)]) == os.getcwd():
|
||||
file = os.path.relpath(file)
|
||||
else:
|
||||
file = os.path.abspath(file)
|
||||
|
||||
at_cache[(dso,addr)] = file, line
|
||||
else:
|
||||
file, line = re.sub('(\.o)?$', '.c', dso, 1), 0
|
||||
|
||||
last_stack.append((file, sym, line))
|
||||
|
||||
# stop propogating?
|
||||
if propagate and len(last_stack) >= propagate:
|
||||
commit()
|
||||
last_filtered = False
|
||||
if last_stack:
|
||||
if last_filtered:
|
||||
commit()
|
||||
|
||||
proc.wait()
|
||||
@@ -341,35 +594,15 @@ def starapply(args):
|
||||
f, args, kwargs = args
|
||||
return f(*args, **kwargs)
|
||||
|
||||
def collect(paths, *,
|
||||
def collect(perf_paths, *,
|
||||
jobs=None,
|
||||
objdump_tool=None,
|
||||
sources=None,
|
||||
everything=False,
|
||||
**args):
|
||||
symbol_pattern = re.compile(
|
||||
'^(?P<addr>[0-9a-fA-F]+)\s.*\s(?P<name>[^\s]+)\s*$')
|
||||
line_pattern = re.compile(
|
||||
'^\s+(?:'
|
||||
# matches dir/file table
|
||||
'(?P<no>[0-9]+)\s+'
|
||||
'(?:(?P<dir>[0-9]+)\s+)?'
|
||||
'.*\s+'
|
||||
'(?P<path>[^\s]+)'
|
||||
# matches line opcodes
|
||||
'|' '\[[^\]]*\]\s+'
|
||||
'(?:'
|
||||
'(?P<op_special>Special)'
|
||||
'|' '(?P<op_copy>Copy)'
|
||||
'|' '(?P<op_end>End of Sequence)'
|
||||
'|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
|
||||
'|' 'Line .*?to (?P<op_line>[0-9]+)'
|
||||
'|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
|
||||
'|' '.' ')*'
|
||||
')$', re.IGNORECASE)
|
||||
# automatic job detection?
|
||||
if jobs == 0:
|
||||
jobs = len(os.sched_getaffinity(0))
|
||||
|
||||
records = []
|
||||
for path in paths:
|
||||
for path in perf_paths:
|
||||
# each .perf file is actually a zip file containing perf files from
|
||||
# multiple runs
|
||||
with zipfile.ZipFile(path) as z:
|
||||
@@ -377,225 +610,17 @@ def collect(paths, *,
|
||||
|
||||
# we're dealing with a lot of data but also surprisingly
|
||||
# parallelizable
|
||||
dsos = {}
|
||||
results = []
|
||||
with mp.Pool(jobs or len(os.sched_getaffinity(0))) as p:
|
||||
for results_ in p.imap_unordered(
|
||||
starapply,
|
||||
((collect_job, (path, i), dict(
|
||||
everything=everything,
|
||||
**args))
|
||||
for path, i in records)):
|
||||
|
||||
# organize by dso
|
||||
results__ = {}
|
||||
for r in results_:
|
||||
if r.file not in results__:
|
||||
results__[r.file] = []
|
||||
results__[r.file].append(r)
|
||||
results_ = results__
|
||||
|
||||
for dso, results_ in results_.items():
|
||||
if dso not in dsos:
|
||||
# find file+line ranges for dsos
|
||||
#
|
||||
# do this here so we only process each dso once
|
||||
syms = {}
|
||||
sym_at = []
|
||||
cmd = objdump_tool + ['-t', dso]
|
||||
if args.get('verbose'):
|
||||
print(' '.join(shlex.quote(c) for c in cmd))
|
||||
proc = sp.Popen(cmd,
|
||||
stdout=sp.PIPE,
|
||||
stderr=sp.PIPE if not args.get('verbose') else None,
|
||||
universal_newlines=True,
|
||||
errors='replace',
|
||||
close_fds=False)
|
||||
for line in proc.stdout:
|
||||
m = symbol_pattern.match(line)
|
||||
if m:
|
||||
name = m.group('name')
|
||||
addr = int(m.group('addr'), 16)
|
||||
# note multiple symbols can share a name
|
||||
if name not in syms:
|
||||
syms[name] = set()
|
||||
syms[name].add(addr)
|
||||
sym_at.append((addr, name))
|
||||
proc.wait()
|
||||
if proc.returncode != 0:
|
||||
if not args.get('verbose'):
|
||||
for line in proc.stderr:
|
||||
sys.stdout.write(line)
|
||||
# assume no debug-info on failure
|
||||
pass
|
||||
|
||||
# sort and keep first when duplicates
|
||||
sym_at.sort()
|
||||
sym_at_ = []
|
||||
for addr, name in sym_at:
|
||||
if len(sym_at_) == 0 or sym_at_[-1][0] != addr:
|
||||
sym_at_.append((addr, name))
|
||||
sym_at = sym_at_
|
||||
|
||||
# state machine for dwarf line numbers, note that objdump's
|
||||
# decodedline seems to have issues with multiple dir/file
|
||||
# tables, which is why we need this
|
||||
line_at = []
|
||||
dirs = {}
|
||||
files = {}
|
||||
op_file = 1
|
||||
op_line = 1
|
||||
op_addr = 0
|
||||
cmd = objdump_tool + ['--dwarf=rawline', dso]
|
||||
if args.get('verbose'):
|
||||
print(' '.join(shlex.quote(c) for c in cmd))
|
||||
proc = sp.Popen(cmd,
|
||||
stdout=sp.PIPE,
|
||||
stderr=sp.PIPE if not args.get('verbose') else None,
|
||||
universal_newlines=True,
|
||||
errors='replace',
|
||||
close_fds=False)
|
||||
for line in proc.stdout:
|
||||
m = line_pattern.match(line)
|
||||
if m:
|
||||
if m.group('no') and not m.group('dir'):
|
||||
# found a directory entry
|
||||
dirs[int(m.group('no'))] = m.group('path')
|
||||
elif m.group('no'):
|
||||
# found a file entry
|
||||
dir = int(m.group('dir'))
|
||||
if dir in dirs:
|
||||
files[int(m.group('no'))] = os.path.join(
|
||||
dirs[dir],
|
||||
m.group('path'))
|
||||
else:
|
||||
files[int(m.group('no'))] = m.group('path')
|
||||
else:
|
||||
# found a state machine update
|
||||
if m.group('op_file'):
|
||||
op_file = int(m.group('op_file'), 0)
|
||||
if m.group('op_line'):
|
||||
op_line = int(m.group('op_line'), 0)
|
||||
if m.group('op_addr'):
|
||||
op_addr = int(m.group('op_addr'), 0)
|
||||
|
||||
if (m.group('op_special')
|
||||
or m.group('op_copy')
|
||||
or m.group('op_end')):
|
||||
line_at.append((
|
||||
op_addr,
|
||||
files.get(op_file, '?'),
|
||||
op_line))
|
||||
|
||||
if m.group('op_end'):
|
||||
op_file = 1
|
||||
op_line = 1
|
||||
op_addr = 0
|
||||
proc.wait()
|
||||
if proc.returncode != 0:
|
||||
if not args.get('verbose'):
|
||||
for line in proc.stderr:
|
||||
sys.stdout.write(line)
|
||||
# assume no debug-info on failure
|
||||
pass
|
||||
|
||||
# sort and keep first when duplicates
|
||||
#
|
||||
# I think dwarf requires this to be sorted but just in case
|
||||
line_at.sort()
|
||||
line_at_ = []
|
||||
for addr, file, line in line_at:
|
||||
if len(line_at_) == 0 or line_at_[-1][0] != addr:
|
||||
line_at_.append((addr, file, line))
|
||||
line_at = line_at_
|
||||
|
||||
# discard lines outside of the range of the containing
|
||||
# function, these are introduced by dwarf for inlined
|
||||
# functions but don't map to elf-level symbols
|
||||
sym_at_ = []
|
||||
for addr, sym in sym_at:
|
||||
i = bisect.bisect(line_at, addr, key=lambda x: x[0])
|
||||
if i > 0:
|
||||
_, file, line = line_at[i-1]
|
||||
sym_at_.append((file, line, sym))
|
||||
sym_at_.sort()
|
||||
|
||||
line_at_ = []
|
||||
for addr, file, line in line_at:
|
||||
# only keep if sym-at-addr and sym-at-line match
|
||||
i = bisect.bisect(
|
||||
sym_at, addr, key=lambda x: x[0])
|
||||
j = bisect.bisect(
|
||||
sym_at_, (file, line), key=lambda x: (x[0], x[1]))
|
||||
if i > 0 and j > 0 and (
|
||||
sym_at[i-1][1] == sym_at_[j-1][2]):
|
||||
line_at_.append((addr, file, line))
|
||||
line_at = line_at_
|
||||
|
||||
dsos[dso] = (syms, sym_at, line_at)
|
||||
|
||||
syms, _, line_at = dsos[dso]
|
||||
|
||||
# first try to reverse ASLR
|
||||
def deltas(r, d):
|
||||
if '+' in r.function:
|
||||
sym, off = r.function.split('+', 1)
|
||||
off = int(off, 0)
|
||||
else:
|
||||
sym, off = r.function, 0
|
||||
addr = r.line - off + d
|
||||
|
||||
for addr_ in syms.get(sym, []):
|
||||
yield addr_ - addr
|
||||
|
||||
delta = min(
|
||||
it.chain.from_iterable(
|
||||
deltas(r, 0) for r in results_),
|
||||
key=lambda d: sum(it.chain.from_iterable(
|
||||
deltas(r, d) for r in results_)),
|
||||
default=0)
|
||||
|
||||
# then try to map addrs -> file+line
|
||||
#
|
||||
# note we need to do this recursively
|
||||
def remap(results):
|
||||
results_ = []
|
||||
for r in results:
|
||||
addr = r.line + delta
|
||||
i = bisect.bisect(line_at, addr, key=lambda x: x[0])
|
||||
if i > 0:
|
||||
_, file, line = line_at[i-1]
|
||||
else:
|
||||
file, line = re.sub('(\.o)?$', '.c', r.file, 1), 0
|
||||
|
||||
# ignore filtered sources
|
||||
if sources is not None:
|
||||
if not any(
|
||||
os.path.abspath(file) == os.path.abspath(s)
|
||||
for s in sources):
|
||||
continue
|
||||
else:
|
||||
# default to only cwd
|
||||
if not everything and not os.path.commonpath([
|
||||
os.getcwd(),
|
||||
os.path.abspath(file)]) == os.getcwd():
|
||||
continue
|
||||
|
||||
# simplify path
|
||||
if os.path.commonpath([
|
||||
os.getcwd(),
|
||||
os.path.abspath(file)]) == os.getcwd():
|
||||
file = os.path.relpath(file)
|
||||
else:
|
||||
file = os.path.abspath(file)
|
||||
|
||||
function, *_ = r.function.split('+', 1)
|
||||
results_.append(r._replace(
|
||||
file=file, function=function, line=line,
|
||||
children=remap(r.children)))
|
||||
return results_
|
||||
|
||||
results.extend(remap(results_))
|
||||
if jobs is not None:
|
||||
results = []
|
||||
with mp.Pool(jobs) as p:
|
||||
for results_ in p.imap_unordered(
|
||||
starapply,
|
||||
((collect_job, (path, i), args) for path, i in records)):
|
||||
results.extend(results_)
|
||||
else:
|
||||
results = []
|
||||
for path, i in records:
|
||||
results.extend(collect_job(path, i, **args))
|
||||
|
||||
return results
|
||||
|
||||
@@ -640,7 +665,7 @@ def fold(Result, results, *,
|
||||
Result, r.children,
|
||||
by=by,
|
||||
defines=defines)))
|
||||
folded = folded_
|
||||
folded = folded_
|
||||
|
||||
return folded
|
||||
|
||||
@@ -983,7 +1008,6 @@ def report(perf_paths, *,
|
||||
fields=None,
|
||||
defines=None,
|
||||
sort=None,
|
||||
self=False,
|
||||
branches=False,
|
||||
caches=False,
|
||||
**args):
|
||||
@@ -1001,20 +1025,7 @@ def report(perf_paths, *,
|
||||
|
||||
# find sizes
|
||||
if not args.get('use', None):
|
||||
# find .o files
|
||||
paths = []
|
||||
for path in perf_paths:
|
||||
if os.path.isdir(path):
|
||||
path = path + '/*.perf'
|
||||
|
||||
for path in glob.glob(path):
|
||||
paths.append(path)
|
||||
|
||||
if not paths:
|
||||
print("error: no .perf files found in %r?" % perf_paths)
|
||||
sys.exit(-1)
|
||||
|
||||
results = collect(paths, **args)
|
||||
results = collect(perf_paths, **args)
|
||||
else:
|
||||
results = []
|
||||
with openio(args['use']) as f:
|
||||
@@ -1124,8 +1135,7 @@ if __name__ == "__main__":
|
||||
parser.add_argument(
|
||||
'perf_paths',
|
||||
nargs=nargs,
|
||||
help="Description of where to find *.perf files. May be a directory "
|
||||
"or a list of paths. Defaults to %r." % PERF_PATHS)
|
||||
help="Input *.perf files.")
|
||||
parser.add_argument(
|
||||
'-v', '--verbose',
|
||||
action='store_true',
|
||||
@@ -1224,7 +1234,7 @@ if __name__ == "__main__":
|
||||
nargs='?',
|
||||
type=lambda x: tuple(float(x) for x in x.split(',')),
|
||||
const=THRESHOLD,
|
||||
help="Show lines wth samples above this threshold as a percent of "
|
||||
help="Show lines with samples above this threshold as a percent of "
|
||||
"all lines. Defaults to %s." % ','.join(str(t) for t in THRESHOLD))
|
||||
parser.add_argument(
|
||||
'-c', '--context',
|
||||
@@ -1295,7 +1305,13 @@ if __name__ == "__main__":
|
||||
|
||||
# perf_paths/command overlap, so need to do some munging here
|
||||
args.command = args.perf_paths
|
||||
args.perf_paths = args.perf_paths or PERF_PATHS
|
||||
if args.record:
|
||||
if not args.command:
|
||||
print('error: no command specified?')
|
||||
sys.exit(-1)
|
||||
if not args.output:
|
||||
print('error: no output file specified?')
|
||||
sys.exit(-1)
|
||||
|
||||
sys.exit(main(**{k: v
|
||||
for k, v in vars(args).items()
|
||||
|
||||
Reference in New Issue
Block a user