scripts: Small refactor to adopt collect_thing pattern everywhere

- stack.py:collect -> collect + collect_cov
- perf.py:collect_syms_and_lines -> collect_syms + collect_dwarf_lines
- perfbd.py:collect_syms_and_lines -> collect_syms + collect_dwarf_lines

This should hopefully lead to both better readability and better code
reuse.

Note collect_dwarf_lines is a bit different than collect_dwarf_files in
code.py/data.py/etc, but the extra complexity of collect_dwarf_lines is
probably not worth sharing here.
This commit is contained in:
Christopher Haster
2024-11-29 01:22:35 -06:00
parent 26ba7bdebc
commit b58266c3b0
3 changed files with 92 additions and 78 deletions

View File

@@ -262,34 +262,41 @@ def openio(path, mode='r', buffering=-1):
else:
return open(path, mode, buffering)
def collect(gcda_paths, *,
def collect_cov(gcda_path, *,
gcov_path=GCOV_PATH,
**args):
# get coverage info through gcov's json output
# note, gcov-path may contain extra args
cmd = GCOV_PATH + ['-b', '-t', '--json-format', gcda_path]
if args.get('verbose'):
print(' '.join(shlex.quote(c) for c in cmd))
proc = sp.Popen(cmd,
stdout=sp.PIPE,
stderr=None if args.get('verbose') else sp.DEVNULL,
universal_newlines=True,
errors='replace',
close_fds=False)
cov = json.load(proc.stdout)
proc.wait()
if proc.returncode != 0:
if not args.get('verbose'):
for line in proc.stderr:
sys.stderr.write(line)
raise sp.CalledProcessError(proc.returncode, proc.args)
return cov
def collect(gcda_paths, *,
sources=None,
everything=False,
**args):
results = []
for path in gcda_paths:
# get coverage info through gcov's json output
# note, gcov-path may contain extra args
cmd = GCOV_PATH + ['-b', '-t', '--json-format', path]
if args.get('verbose'):
print(' '.join(shlex.quote(c) for c in cmd))
proc = sp.Popen(cmd,
stdout=sp.PIPE,
stderr=None if args.get('verbose') else sp.DEVNULL,
universal_newlines=True,
errors='replace',
close_fds=False)
data = json.load(proc.stdout)
proc.wait()
if proc.returncode != 0:
if not args.get('verbose'):
for line in proc.stderr:
sys.stderr.write(line)
sys.exit(-1)
for gcda_path in gcda_paths:
# find coverage info
cov = collect_cov(gcda_path, **args)
# collect line/branch coverage
for file in data['files']:
for file in cov['files']:
# ignore filtered sources
if sources is not None:
if not any(os.path.abspath(file['file']) == os.path.abspath(s)

View File

@@ -232,7 +232,7 @@ def record(command, *,
return err
# try to only process each dso onceS
# try to only process each dso once
#
# note this only caches with the non-keyword arguments
def multiprocessing_cache(f):
@@ -260,34 +260,16 @@ def multiprocessing_cache(f):
return multiprocessing_cache
@multiprocessing_cache
def collect_syms_and_lines(obj_path, *,
def collect_syms(obj_path, *,
objdump_path=None,
**args):
symbol_pattern = re.compile(
'^(?P<addr>[0-9a-fA-F]+)'
'\s+.*'
'.*'
'\s+(?P<size>[0-9a-fA-F]+)'
'\s+(?P<name>[^\s]+)\s*$')
line_pattern = re.compile(
'^\s+(?:'
# matches dir/file table
'(?P<no>[0-9]+)'
'(?:\s+(?P<dir>[0-9]+))?'
'\s+.*'
'\s+(?P<path>[^\s]+)'
# matches line opcodes
'|' '\[[^\]]*\]\s+' '(?:'
'(?P<op_special>Special)'
'|' '(?P<op_copy>Copy)'
'|' '(?P<op_end>End of Sequence)'
'|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
'|' 'Line .*?to (?P<op_line>[0-9]+)'
'|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
'|' '.'
')*'
')$', re.IGNORECASE)
# figure out symbol addresses and file+line ranges
# figure out symbol addresses
syms = {}
sym_at = []
cmd = objdump_path + ['-t', obj_path]
@@ -318,8 +300,7 @@ def collect_syms_and_lines(obj_path, *,
if not args.get('verbose'):
for line in proc.stderr:
sys.stderr.write(line)
# assume no debug-info on failure
pass
raise sp.CalledProcessError(proc.returncode, proc.args)
# sort and keep largest/first when duplicates
sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
@@ -329,6 +310,30 @@ def collect_syms_and_lines(obj_path, *,
sym_at_.append((addr, name, size))
sym_at = sym_at_
return syms, sym_at
@multiprocessing_cache
def collect_dwarf_lines(obj_path, *,
objdump_path=None,
**args):
line_pattern = re.compile(
'^\s*(?:'
# matches dir/file table
'(?P<no>[0-9]+)'
'(?:\s+(?P<dir>[0-9]+))?'
'.*\s+(?P<path>[^\s]+)'
# matches line opcodes
'|' '\[[^\]]*\]\s+' '(?:'
'(?P<op_special>Special)'
'|' '(?P<op_copy>Copy)'
'|' '(?P<op_end>End of Sequence)'
'|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
'|' 'Line .*?to (?P<op_line>[0-9]+)'
'|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
'|' '.'
')*'
')\s*$', re.IGNORECASE)
# state machine for dwarf line numbers, note that objdump's
# decodedline seems to have issues with multiple dir/file
# tables, which is why we need this
@@ -388,8 +393,7 @@ def collect_syms_and_lines(obj_path, *,
if not args.get('verbose'):
for line in proc.stderr:
sys.stderr.write(line)
# assume no debug-info on failure
pass
raise sp.CalledProcessError(proc.returncode, proc.args)
# sort and keep first when duplicates
lines.sort()
@@ -407,7 +411,7 @@ def collect_syms_and_lines(obj_path, *,
line_at_.append((addr, file, line))
line_at = line_at_
return syms, sym_at, lines, line_at
return lines, line_at
def collect_decompressed(path, *,
@@ -507,9 +511,8 @@ def collect_decompressed(path, *,
addr_ = int(m.group('addr'), 16)
# get the syms/lines for the dso, this is cached
syms, sym_at, lines, line_at = collect_syms_and_lines(
dso,
**args)
syms, sym_at = collect_syms(dso, **args)
lines, line_at = collect_dwarf_lines(dso, **args)
# ASLR is tricky, we have symbols+offsets, but static symbols
# means we may have multiple options for each symbol.
@@ -594,7 +597,7 @@ def collect_decompressed(path, *,
if not args.get('verbose'):
for line in proc.stderr:
sys.stderr.write(line)
sys.exit(-1)
raise sp.CalledProcessError(proc.returncode, proc.args)
# rearrange results into result type
def to_results(results):

View File

@@ -166,32 +166,14 @@ def openio(path, mode='r', buffering=-1):
else:
return open(path, mode, buffering)
def collect_syms_and_lines(obj_path, *,
def collect_syms(obj_path, *,
objdump_path=None,
**args):
symbol_pattern = re.compile(
'^(?P<addr>[0-9a-fA-F]+)'
'\s+.*'
'.*'
'\s+(?P<size>[0-9a-fA-F]+)'
'\s+(?P<name>[^\s]+)\s*$')
line_pattern = re.compile(
'^\s+(?:'
# matches dir/file table
'(?P<no>[0-9]+)'
'(?:\s+(?P<dir>[0-9]+))?'
'\s+.*'
'\s+(?P<path>[^\s]+)'
# matches line opcodes
'|' '\[[^\]]*\]\s+' '(?:'
'(?P<op_special>Special)'
'|' '(?P<op_copy>Copy)'
'|' '(?P<op_end>End of Sequence)'
'|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
'|' 'Line .*?to (?P<op_line>[0-9]+)'
'|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
'|' '.'
')*'
')$', re.IGNORECASE)
# figure out symbol addresses
syms = {}
@@ -224,8 +206,7 @@ def collect_syms_and_lines(obj_path, *,
if not args.get('verbose'):
for line in proc.stderr:
sys.stderr.write(line)
# assume no debug-info on failure
pass
raise sp.CalledProcessError(proc.returncode, proc.args)
# sort and keep largest/first when duplicates
sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
@@ -235,6 +216,29 @@ def collect_syms_and_lines(obj_path, *,
sym_at_.append((addr, name, size))
sym_at = sym_at_
return syms, sym_at
def collect_dwarf_lines(obj_path, *,
objdump_path=None,
**args):
line_pattern = re.compile(
'^\s*(?:'
# matches dir/file table
'(?P<no>[0-9]+)'
'(?:\s+(?P<dir>[0-9]+))?'
'.*\s+(?P<path>[^\s]+)'
# matches line opcodes
'|' '\[[^\]]*\]\s+' '(?:'
'(?P<op_special>Special)'
'|' '(?P<op_copy>Copy)'
'|' '(?P<op_end>End of Sequence)'
'|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
'|' 'Line .*?to (?P<op_line>[0-9]+)'
'|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
'|' '.'
')*'
')\s*$', re.IGNORECASE)
# state machine for dwarf line numbers, note that objdump's
# decodedline seems to have issues with multiple dir/file
# tables, which is why we need this
@@ -294,8 +298,7 @@ def collect_syms_and_lines(obj_path, *,
if not args.get('verbose'):
for line in proc.stderr:
sys.stderr.write(line)
# assume no debug-info on failure
pass
raise sp.CalledProcessError(proc.returncode, proc.args)
# sort and keep first when duplicates
lines.sort()
@@ -313,7 +316,7 @@ def collect_syms_and_lines(obj_path, *,
line_at_.append((addr, file, line))
line_at = line_at_
return syms, sym_at, lines, line_at
return lines, line_at
def collect_job(path, start, stop, syms, sym_at, lines, line_at, *,
@@ -573,7 +576,8 @@ def collect(obj_path, trace_paths, *,
jobs = len(os.sched_getaffinity(0))
# find sym/line info to reverse ASLR
syms, sym_at, lines, line_at = collect_syms_and_lines(obj_path, **args)
syms, sym_at = collect_syms(obj_path, **args)
lines, line_at = collect_dwarf_lines(obj_path, **args)
if jobs is not None:
# try to split up files so that even single files can be processed