scripts: Small refactor to adopt collect_thing pattern everywhere

- stack.py:collect -> collect + collect_cov
- perf.py:collect_syms_and_lines -> collect_syms + collect_dwarf_lines
- perfbd.py:collect_syms_and_lines -> collect_syms + collect_dwarf_lines

This should hopefully lead to both better readability and better code
reuse.

Note collect_dwarf_lines is a bit different than collect_dwarf_files in
code.py/data.py/etc, but the extra complexity of collect_dwarf_lines is
probably not worth sharing here.
This commit is contained in:
Christopher Haster
2024-11-29 01:22:35 -06:00
parent 26ba7bdebc
commit b58266c3b0
3 changed files with 92 additions and 78 deletions

View File

@@ -166,32 +166,14 @@ def openio(path, mode='r', buffering=-1):
else:
return open(path, mode, buffering)
def collect_syms_and_lines(obj_path, *,
def collect_syms(obj_path, *,
objdump_path=None,
**args):
symbol_pattern = re.compile(
'^(?P<addr>[0-9a-fA-F]+)'
'\s+.*'
'.*'
'\s+(?P<size>[0-9a-fA-F]+)'
'\s+(?P<name>[^\s]+)\s*$')
line_pattern = re.compile(
'^\s+(?:'
# matches dir/file table
'(?P<no>[0-9]+)'
'(?:\s+(?P<dir>[0-9]+))?'
'\s+.*'
'\s+(?P<path>[^\s]+)'
# matches line opcodes
'|' '\[[^\]]*\]\s+' '(?:'
'(?P<op_special>Special)'
'|' '(?P<op_copy>Copy)'
'|' '(?P<op_end>End of Sequence)'
'|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
'|' 'Line .*?to (?P<op_line>[0-9]+)'
'|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
'|' '.'
')*'
')$', re.IGNORECASE)
# figure out symbol addresses
syms = {}
@@ -224,8 +206,7 @@ def collect_syms_and_lines(obj_path, *,
if not args.get('verbose'):
for line in proc.stderr:
sys.stderr.write(line)
# assume no debug-info on failure
pass
raise sp.CalledProcessError(proc.returncode, proc.args)
# sort and keep largest/first when duplicates
sym_at.sort(key=lambda x: (x[0], -x[2], x[1]))
@@ -235,6 +216,29 @@ def collect_syms_and_lines(obj_path, *,
sym_at_.append((addr, name, size))
sym_at = sym_at_
return syms, sym_at
def collect_dwarf_lines(obj_path, *,
objdump_path=None,
**args):
line_pattern = re.compile(
'^\s*(?:'
# matches dir/file table
'(?P<no>[0-9]+)'
'(?:\s+(?P<dir>[0-9]+))?'
'.*\s+(?P<path>[^\s]+)'
# matches line opcodes
'|' '\[[^\]]*\]\s+' '(?:'
'(?P<op_special>Special)'
'|' '(?P<op_copy>Copy)'
'|' '(?P<op_end>End of Sequence)'
'|' 'File .*?to (?:entry )?(?P<op_file>\d+)'
'|' 'Line .*?to (?P<op_line>[0-9]+)'
'|' '(?:Address|PC) .*?to (?P<op_addr>[0x0-9a-fA-F]+)'
'|' '.'
')*'
')\s*$', re.IGNORECASE)
# state machine for dwarf line numbers, note that objdump's
# decodedline seems to have issues with multiple dir/file
# tables, which is why we need this
@@ -294,8 +298,7 @@ def collect_syms_and_lines(obj_path, *,
if not args.get('verbose'):
for line in proc.stderr:
sys.stderr.write(line)
# assume no debug-info on failure
pass
raise sp.CalledProcessError(proc.returncode, proc.args)
# sort and keep first when duplicates
lines.sort()
@@ -313,7 +316,7 @@ def collect_syms_and_lines(obj_path, *,
line_at_.append((addr, file, line))
line_at = line_at_
return syms, sym_at, lines, line_at
return lines, line_at
def collect_job(path, start, stop, syms, sym_at, lines, line_at, *,
@@ -573,7 +576,8 @@ def collect(obj_path, trace_paths, *,
jobs = len(os.sched_getaffinity(0))
# find sym/line info to reverse ASLR
syms, sym_at, lines, line_at = collect_syms_and_lines(obj_path, **args)
syms, sym_at = collect_syms(obj_path, **args)
lines, line_at = collect_dwarf_lines(obj_path, **args)
if jobs is not None:
# try to split up files so that even single files can be processed