Reworked scripts to move field details into classes

These scripts can't easily share the common logic, but separating
field details from the print/merge/csv logic should make the common
part of these scripts much easier to create/modify going forward.

This also tweaked the behavior of summary.py slightly.
This commit is contained in:
Christopher Haster
2022-05-21 16:46:25 -05:00
parent 4a7e94fb15
commit 5b0a6d4747
8 changed files with 1102 additions and 898 deletions

View File

@@ -3,58 +3,202 @@
# Script to summarize the outputs of other scripts. Operates on CSV files.
#
import functools as ft
import collections as co
import os
import csv
import re
import functools as ft
import math as m
import os
import re
# displayable fields
Field = co.namedtuple('Field', 'name,parse,acc,key,fmt,repr,null,ratio')
FIELDS = [
# name, parse, accumulate, fmt, print, null
Field('code',
lambda r: int(r['code_size']),
sum,
lambda r: r,
'%7s',
lambda r: r,
'-',
lambda old, new: (new-old)/old),
Field('data',
lambda r: int(r['data_size']),
sum,
lambda r: r,
'%7s',
lambda r: r,
'-',
lambda old, new: (new-old)/old),
Field('stack',
lambda r: float(r['stack_limit']),
max,
lambda r: r,
'%7s',
lambda r: '' if m.isinf(r) else int(r),
'-',
lambda old, new: (new-old)/old),
Field('structs',
lambda r: int(r['struct_size']),
sum,
lambda r: r,
'%8s',
lambda r: r,
'-',
lambda old, new: (new-old)/old),
Field('coverage',
lambda r: (int(r['coverage_hits']), int(r['coverage_count'])),
lambda rs: ft.reduce(lambda a, b: (a[0]+b[0], a[1]+b[1]), rs),
lambda r: r[0]/r[1],
'%19s',
lambda r: '%11s %7s' % ('%d/%d' % (r[0], r[1]), '%.1f%%' % (100*r[0]/r[1])),
'%11s %7s' % ('-', '-'),
lambda old, new: ((new[0]/new[1]) - (old[0]/old[1])))
]
# each result is a type generated by another script
RESULTS = []
FIELDS = 'code,data,stack,structs'
def result(cls):
RESULTS.append(cls)
return cls
@result
class CodeResult(co.namedtuple('CodeResult', 'code_size')):
__slots__ = ()
def __new__(cls, code_size=0):
return super().__new__(cls, int(code_size))
def __add__(self, other):
return self.__class__(self.code_size + other.code_size)
def __sub__(self, other):
old = other.code_size if other is not None else 0
new = self.code_size if self is not None else 0
return (new-old) / old if old else 1.0
def __rsub__(self, other):
return self.__class__.__sub__(other, self)
def key(self):
return -self.code_size
_header = '%7s' % 'code'
_nil = '%7s' % '-'
def __str__(self):
return '%7s' % self.code_size
@result
class DataResult(co.namedtuple('DataResult', 'data_size')):
__slots__ = ()
def __new__(cls, data_size=0):
return super().__new__(cls, int(data_size))
def __add__(self, other):
return self.__class__(self.data_size + other.data_size)
def __sub__(self, other):
old = other.data_size if other is not None else 0
new = self.data_size if self is not None else 0
return (new-old) / old if old else 1.0
def __rsub__(self, other):
return self.__class__.__sub__(other, self)
def key(self):
return -self.data_size
_header = '%7s' % 'data'
_nil = '%7s' % '-'
def __str__(self):
return '%7s' % self.data_size
@result
class StackResult(co.namedtuple('StackResult', 'stack_limit')):
__slots__ = ()
def __new__(cls, stack_limit=0):
return super().__new__(cls, float(stack_limit))
def __add__(self, other):
return self.__class__(max(self.stack_limit, other.stack_limit))
def __sub__(self, other):
old_limit = other.stack_limit if other is not None else 0
new_limit = self.stack_limit if self is not None else 0
return (0.0 if m.isinf(new_limit) and m.isinf(old_limit)
else +float('inf') if m.isinf(new_limit)
else -float('inf') if m.isinf(old_limit)
else 0.0 if not old_limit and not new_limit
else 1.0 if not old_limit
else (new_limit-old_limit) / old_limit)
def __rsub__(self, other):
return self.__class__.__sub__(other, self)
def key(self):
return -self.stack_limit
_header = '%7s' % 'stack'
_nil = '%7s' % '-'
def __str__(self):
return '%7s' % (
'' if m.isinf(self.stack_limit)
else int(self.stack_limit))
@result
class StructsResult(co.namedtuple('StructsResult', 'struct_size')):
__slots__ = ()
def __new__(cls, struct_size=0):
return super().__new__(cls, int(struct_size))
def __add__(self, other):
return self.__class__(self.struct_size + other.struct_size)
def __sub__(self, other):
old = other.struct_size if other is not None else 0
new = self.struct_size if self is not None else 0
return (new-old) / old if old else 1.0
def __rsub__(self, other):
return self.__class__.__sub__(other, self)
def key(self):
return -self.struct_size
_header = '%7s' % 'structs'
_nil = '%7s' % '-'
def __str__(self):
return '%7s' % self.struct_size
@result
class CoverageLineResult(co.namedtuple('CoverageResult',
'coverage_line_hits,coverage_line_count')):
__slots__ = ()
def __new__(cls, coverage_line_hits=0, coverage_line_count=0):
return super().__new__(cls,
int(coverage_line_hits),
int(coverage_line_count))
def __add__(self, other):
return self.__class__(
self.coverage_line_hits + other.coverage_line_hits,
self.coverage_line_count + other.coverage_line_count)
def __sub__(self, other):
old_hits = other.coverage_line_hits if other is not None else 0
old_count = other.coverage_line_count if other is not None else 0
new_hits = self.coverage_line_hits if self is not None else 0
new_count = self.coverage_line_count if self is not None else 0
return ((new_hits/new_count if new_count else 1.0)
- (old_hits/old_count if old_count else 1.0))
def __rsub__(self, other):
return self.__class__.__sub__(other, self)
def key(self):
return -(self.coverage_line_hits/self.coverage_line_count
if self.coverage_line_count else -1)
_header = '%19s' % 'coverage/line'
_nil = '%11s %7s' % ('-', '-')
def __str__(self):
return '%11s %7s' % (
'%d/%d' % (self.coverage_line_hits, self.coverage_line_count)
if self.coverage_line_count else '-',
'%.1f%%' % (100*self.coverage_line_hits/self.coverage_line_count)
if self.coverage_line_count else '-')
@result
class CoverageBranchResult(co.namedtuple('CoverageResult',
'coverage_branch_hits,coverage_branch_count')):
__slots__ = ()
def __new__(cls, coverage_branch_hits=0, coverage_branch_count=0):
return super().__new__(cls,
int(coverage_branch_hits),
int(coverage_branch_count))
def __add__(self, other):
return self.__class__(
self.coverage_branch_hits + other.coverage_branch_hits,
self.coverage_branch_count + other.coverage_branch_count)
def __sub__(self, other):
old_hits = other.coverage_branch_hits if other is not None else 0
old_count = other.coverage_branch_count if other is not None else 0
new_hits = self.coverage_branch_hits if self is not None else 0
new_count = self.coverage_branch_count if self is not None else 0
return ((new_hits/new_count if new_count else 1.0)
- (old_hits/old_count if old_count else 1.0))
def __rsub__(self, other):
return self.__class__.__sub__(other, self)
def key(self):
return -(self.coverage_branch_hits/self.coverage_branch_count
if self.coverage_branch_count else -1)
_header = '%19s' % 'coverage/branch'
_nil = '%11s %7s' % ('-', '-')
def __str__(self):
return '%11s %7s' % (
'%d/%d' % (self.coverage_branch_hits, self.coverage_branch_count)
if self.coverage_branch_count else '-',
'%.1f%%' % (100*self.coverage_branch_hits/self.coverage_branch_count)
if self.coverage_branch_count else '-')
def openio(path, mode='r'):
@@ -76,178 +220,171 @@ def main(**args):
for result in r:
file = result.pop('file', '')
name = result.pop('name', '')
prev = results[(file, name)]
for field in FIELDS:
try:
r = field.parse(result)
if field.name in prev:
results[(file, name)][field.name] = field.acc(
[prev[field.name], r])
else:
results[(file, name)][field.name] = r
except (KeyError, ValueError):
pass
for Result in RESULTS:
if all(result.get(f) not in {None, ''}
for f in Result._fields):
results[(file, name)][Result.__name__] = (
results[(file, name)].get(
Result.__name__, Result())
+ Result(*(result[f]
for f in Result._fields)))
except FileNotFoundError:
pass
# find fields
if args.get('all_fields'):
fields = FIELDS
elif args.get('fields') is not None:
fields_dict = {field.name: field for field in FIELDS}
fields = [fields_dict[f] for f in args['fields']]
else:
fields = []
for field in FIELDS:
if any(field.name in result for result in results.values()):
fields.append(field)
# find total for every field
total = {}
for result in results.values():
for field in fields:
if field.name in result and field.name in total:
total[field.name] = field.acc(
[total[field.name], result[field.name]])
elif field.name in result:
total[field.name] = result[field.name]
# find previous results?
if args.get('diff'):
prev_results = co.defaultdict(lambda: {})
try:
with openio(args['diff']) as f:
r = csv.DictReader(f)
for result in r:
file = result.pop('file', '')
name = result.pop('name', '')
prev = prev_results[(file, name)]
for field in FIELDS:
try:
r = field.parse(result)
if field.name in prev:
prev_results[(file, name)][field.name] = field.acc(
[prev[field.name], r])
else:
prev_results[(file, name)][field.name] = r
except (KeyError, ValueError):
pass
except FileNotFoundError:
pass
for path in args.get('csv_paths', '-'):
try:
with openio(args['diff']) as f:
r = csv.DictReader(f)
for result in r:
file = result.pop('file', '')
name = result.pop('name', '')
for Result in RESULTS:
if all(result.get(f) not in {None, ''}
for f in Result._fields):
prev_results[(file, name)][Result.__name__] = (
prev_results[(file, name)].get(
Result.__name__, Result())
+ Result(*(result[f]
for f in Result._fields)))
except FileNotFoundError:
pass
prev_total = {}
for result in prev_results.values():
for field in fields:
if field.name in result and field.name in prev_total:
prev_total[field.name] = field.acc(
[prev_total[field.name], result[field.name]])
elif field.name in result:
prev_total[field.name] = result[field.name]
# filter our result types by results that are present
if 'all' in args['fields']:
filtered_results = RESULTS
else:
filtered_results = [
Result for Result in RESULTS
if (any(f.startswith(r)
for r in args['fields']
for f in Result._fields)
or any(Result._header.strip().startswith(r)
for r in args['fields']))]
# figure out a sort key
if args.get('sort'):
key_Result = next(
Result for Result in RESULTS
if (any(f.startswith(args['sort'])
for f in Result._fields)
or Result._header.strip().startswith(args['sort'])))
key = lambda result: result.get(key_Result.__name__, key_Result()).key()
reverse = False
elif args.get('reverse_sort'):
key_Result = next(
Result for Result in RESULTS
if (any(f.startswith(args['reverse_sort'])
for f in Result._fields)
or Result._header.strip().startswith(args['reverse_sort'])))
key = lambda result: result.get(key_Result.__name__, key_Result()).key()
reverse = True
else:
key = lambda _: None
reverse = False
# write merged results to CSV
if args.get('output'):
with openio(args['output'], 'w') as f:
w = csv.DictWriter(f, sum(
(Result._fields for Result in filtered_results),
('file', 'name')))
w.writeheader()
for (file, name), result in sorted(results.items()):
w.writerow(ft.reduce(dict.__or__,
(r._asdict() for r in result.values()),
{'file': file, 'name': name}))
# print results
def dedup_entries(results, by='name'):
def print_header(by):
if by == 'total':
entry = lambda k: 'TOTAL'
elif by == 'file':
entry = lambda k: k[0]
else:
entry = lambda k: k[1]
if not args.get('diff'):
print('%-36s %s' % (by,
' '.join(Result._header for Result in filtered_results)))
else:
old = {entry(k) for k in results.keys()}
new = {entry(k) for k in prev_results.keys()}
print('%-36s %s' % (
'%s (%d added, %d removed)' % (by,
sum(1 for k in new if k not in old),
sum(1 for k in old if k not in new))
if by else '',
' '.join('%s%-10s' % (Result._header, '')
for Result in filtered_results)))
def print_entries(by):
if by == 'total':
entry = lambda k: 'TOTAL'
elif by == 'file':
entry = lambda k: k[0]
else:
entry = lambda k: k[1]
entries = co.defaultdict(lambda: {})
for (file, func), result in results.items():
entry = (file if by == 'file' else func)
prev = entries[entry]
for field in fields:
if field.name in result and field.name in prev:
entries[entry][field.name] = field.acc(
[prev[field.name], result[field.name]])
elif field.name in result:
entries[entry][field.name] = result[field.name]
return entries
def sorted_entries(entries):
if args.get('sort') is not None:
field = {field.name: field for field in FIELDS}[args['sort']]
return sorted(entries, key=lambda x: (
-(field.key(x[1][field.name])) if field.name in x[1] else -1, x))
elif args.get('reverse_sort') is not None:
field = {field.name: field for field in FIELDS}[args['reverse_sort']]
return sorted(entries, key=lambda x: (
+(field.key(x[1][field.name])) if field.name in x[1] else -1, x))
else:
return sorted(entries)
def print_header(by=''):
if not args.get('diff'):
print('%-36s' % by, end='')
for field in fields:
print((' '+field.fmt) % field.name, end='')
print()
else:
print('%-36s' % by, end='')
for field in fields:
print((' '+field.fmt) % field.name, end='')
print(' %-9s' % '', end='')
print()
def print_entry(name, result):
print('%-36s' % name, end='')
for field in fields:
r = result.get(field.name)
if r is not None:
print((' '+field.fmt) % field.repr(r), end='')
else:
print((' '+field.fmt) % '-', end='')
print()
def print_diff_entry(name, old, new):
print('%-36s' % name, end='')
for field in fields:
n = new.get(field.name)
if n is not None:
print((' '+field.fmt) % field.repr(n), end='')
else:
print((' '+field.fmt) % '-', end='')
o = old.get(field.name)
ratio = (
0.0 if m.isinf(o or 0) and m.isinf(n or 0)
else +float('inf') if m.isinf(n or 0)
else -float('inf') if m.isinf(o or 0)
else 0.0 if not o and not n
else +1.0 if not o
else -1.0 if not n
else field.ratio(o, n))
print(' %-9s' % (
'' if not ratio
else '(+∞%)' if ratio > 0 and m.isinf(ratio)
else '(-∞%)' if ratio < 0 and m.isinf(ratio)
else '(%+.1f%%)' % (100*ratio)), end='')
print()
def print_entries(by='name'):
entries = dedup_entries(results, by=by)
for k, result in results.items():
entries[entry(k)] |= {
r.__class__.__name__: entries[entry(k)].get(
r.__class__.__name__, r.__class__()) + r
for r in result.values()}
if not args.get('diff'):
print_header(by=by)
for name, result in sorted_entries(entries.items()):
print_entry(name, result)
for name, result in sorted(entries.items(),
key=lambda p: (key(p[1]), p),
reverse=reverse):
print('%-36s %s' % (name, ' '.join(
str(result.get(Result.__name__, Result._nil))
for Result in filtered_results)))
else:
prev_entries = dedup_entries(prev_results, by=by)
print_header(by='%s (%d added, %d removed)' % (by,
sum(1 for name in entries if name not in prev_entries),
sum(1 for name in prev_entries if name not in entries)))
for name, result in sorted_entries(entries.items()):
if args.get('all') or result != prev_entries.get(name, {}):
print_diff_entry(name, prev_entries.get(name, {}), result)
prev_entries = co.defaultdict(lambda: {})
for k, result in prev_results.items():
prev_entries[entry(k)] |= {
r.__class__.__name__: prev_entries[entry(k)].get(
r.__class__.__name__, r.__class__()) + r
for r in result.values()}
def print_totals():
if not args.get('diff'):
print_entry('TOTAL', total)
else:
print_diff_entry('TOTAL', prev_total, total)
diff_entries = {
name: (prev_entries.get(name), entries.get(name))
for name in (entries.keys() | prev_entries.keys())}
if args.get('summary'):
print_header()
print_totals()
for name, (old, new) in sorted(diff_entries.items(),
key=lambda p: (key(p[1][1]), p)):
fields = []
changed = False
for Result in filtered_results:
o = old.get(Result.__name__) if old is not None else None
n = new.get(Result.__name__) if new is not None else None
ratio = n - o if n is not None or o is not None else 0
changed = changed or ratio
fields.append('%s%-10s' % (
n if n is not None else Result._nil,
'' if not ratio
else ' (+∞%)' if ratio > 0 and m.isinf(ratio)
else ' (-∞%)' if ratio < 0 and m.isinf(ratio)
else ' (%+.1f%%)' % (100*ratio)))
if changed or args.get('all'):
print('%-36s %s' % (name, ' '.join(fields)))
if args.get('quiet'):
pass
elif args.get('summary'):
print_header('')
print_entries('total')
elif args.get('files'):
print_entries(by='file')
print_totals()
print_header('file')
print_entries('file')
print_entries('total')
else:
print_entries(by='name')
print_totals()
print_header('name')
print_entries('name')
print_entries('total')
if __name__ == "__main__":
@@ -257,17 +394,21 @@ if __name__ == "__main__":
description="Summarize measurements")
parser.add_argument('csv_paths', nargs='*', default='-',
help="Description of where to find *.csv files. May be a directory \
or list of paths. *.csv files will be merged to show the total \
coverage.")
or list of paths.")
parser.add_argument('-q', '--quiet', action='store_true',
help="Don't show anything, useful with -o.")
parser.add_argument('-o', '--output',
help="Specify CSV file to store results.")
parser.add_argument('-d', '--diff',
help="Specify CSV file to diff against.")
parser.add_argument('-a', '--all', action='store_true',
help="Show all objects, not just the ones that changed.")
parser.add_argument('-e', '--all-fields', action='store_true',
help="Show all fields, even those with no results.")
parser.add_argument('-f', '--fields', type=lambda x: re.split('\s*,\s*', x),
parser.add_argument('-f', '--fields',
type=lambda x: set(re.split('\s*,\s*', x)),
default=FIELDS,
help="Comma separated list of fields to print, by default all fields \
that are found in the CSV files are printed.")
that are found in the CSV files are printed. \"all\" prints all \
fields this script knows. Defaults to %r." % FIELDS)
parser.add_argument('-s', '--sort',
help="Sort by this field.")
parser.add_argument('-S', '--reverse-sort',