Consistent handling of by/field arguments for plot.py and summary.py

Now both scripts also fallback to guessing what fields to use based on
what fields can be converted to integers. This is more falible, and
doesn't work for tests/benchmarks, but in those cases explicit fields
can be used (which is what would be needed without guessing anyways).
This commit is contained in:
Christopher Haster
2022-09-23 00:27:09 -05:00
parent 7591d9cf74
commit fb58148df2
3 changed files with 209 additions and 206 deletions

View File

@@ -16,6 +16,7 @@ import collections as co
import csv
import functools as ft
import glob
import itertools as it
import math as m
import os
import re
@@ -23,31 +24,13 @@ import re
CSV_PATHS = ['*.csv']
# Defaults are common fields generated by other littlefs scripts
MERGES = {
'add': (
['code_size', 'data_size', 'stack_frame', 'struct_size',
'coverage_lines', 'coverage_branches',
'test_passed',
'bench_read', 'bench_prog', 'bench_erased'],
lambda xs: sum(xs[1:], start=xs[0])
),
'mul': (
[],
lambda xs: m.prod(xs[1:], start=xs[0])
),
'min': (
[],
min
),
'max': (
['stack_limit', 'coverage_hits'],
max
),
'avg': (
[],
lambda xs: sum(xs[1:], start=xs[0]) / len(xs)
),
# supported merge operations
OPS = {
'add': lambda xs: sum(xs[1:], start=xs[0]),
'mul': lambda xs: m.prod(xs[1:], start=xs[0]),
'min': min,
'max': max,
'avg': lambda xs: sum(xs[1:], start=xs[0]) / len(xs),
}
@@ -273,112 +256,142 @@ class FracField(co.namedtuple('FracField', 'a,b')):
def __truediv__(self, n):
return FracField(self.a / n, self.b / n)
# available types
TYPES = [IntField, FloatField, FracField]
def homogenize(results, *,
by=None,
fields=None,
merges=None,
renames=None,
renames=[],
define={},
types=None,
**_):
results = results.copy()
# rename fields?
if renames is not None:
if renames:
for r in results:
# make a copy so renames can overlap
r_ = {}
for new_k, old_k in renames:
if old_k in r:
r_[new_k] = r[old_k]
r.update(r_)
# filter by matching defines
if define:
results_ = []
for r in results:
results_.append({renames.get(k, k): v for k, v in r.items()})
if all(k in r and r[k] in vs for k, vs in define):
results_.append(r)
results = results_
# find all fields
if not fields:
# if fields not specified, try to guess from data
if fields is None:
fields = co.OrderedDict()
for r in results:
# also remove None fields, these can get introduced by
# csv.DictReader when header and rows mismatch
fields.update((k, v) for k, v in r.items() if k is not None)
fields = list(fields.keys())
for k, v in r.items():
if by is not None and k in by:
continue
types_ = []
for type in fields.get(k, TYPES):
try:
type(v)
types_.append(type)
except ValueError:
pass
fields[k] = types_
fields = list(k for k,v in fields.items() if v)
# infer 'by' fields?
if by is None:
by = co.OrderedDict()
for r in results:
# also ignore None keys, these are introduced by csv.DictReader
# when header + row mismatch
by.update((k, True) for k in r.keys()
if k is not None
and k not in fields
and not any(k == old_k for _, old_k in renames))
by = list(by.keys())
# go ahead and clean up none values, these can have a few forms
results_ = []
for r in results:
results_.append({
k: r[k] for k in fields
if r.get(k) is not None and not(
k: r[k] for k in it.chain(by, fields)
if r.get(k) is not None and not (
isinstance(r[k], str)
and re.match('^\s*[+-]?\s*$', r[k]))})
results = results_
# find best type for all fields
def try_(x, type):
try:
type(x)
return True
except ValueError:
return False
if types is None:
def is_type(x, type):
try:
type(x)
return True
except ValueError:
return False
types = {}
for k in fields:
if merges is not None and merges.get(k):
for type in [IntField, FloatField, FracField]:
if all(k not in r or try_(r[k], type) for r in results_):
types[k] = type
break
else:
print("no type matches field %r?" % k)
sys.exit(-1)
for type in TYPES:
if all(k not in r or is_type(r[k], type) for r in results_):
types[k] = type
break
else:
print("no type matches field %r?" % k)
sys.exit(-1)
# homogenize types
for k in fields:
if k in types:
for r in results_:
if k in r:
r[k] = types[k](r[k])
for r in results:
for k in fields:
if k in r:
r[k] = types[k](r[k])
return fields, types, results_
return by, fields, types, results
def fold(results, *,
fields=None,
merges=None,
by=None,
by=[],
fields=[],
ops={},
**_):
folding = co.OrderedDict()
if by is None:
by = [k for k in fields if k not in merges]
for r in results:
name = tuple(r.get(k) for k in by)
name = tuple(r.get(k, '') for k in by)
if name not in folding:
folding[name] = {k: [] for k in fields if k in merges}
folding[name] = {k: [] for k in fields}
for k in fields:
# drop all fields fields without a type
if k in merges and k in r:
if k in r:
folding[name][k].append(r[k])
# merge fields, we need the count at this point for averages
folded = []
types = {}
for name, r in folding.items():
r_ = {}
for k, vs in r.items():
if vs:
_, merge = MERGES[merges[k]]
r_[k] = merge(vs)
# sum fields by default
op = OPS[ops.get(k, 'add')]
r_[k] = op(vs)
# drop all rows without any fields
# and drop all empty keys
# drop any rows without fields and any empty keys
if r_:
folded.append(dict(
{k: n for k, n in zip(by, name) if n},
{k: v for k, v in zip(by, name) if v},
**r_))
fields_ = by + [k for k in fields if k in merges]
return fields_, folded
return folded
def table(results, diff_results=None, *,
by=None,
fields=None,
types=None,
merges=None,
by=None,
ops=None,
sort=None,
reverse_sort=None,
summary=False,
@@ -387,29 +400,18 @@ def table(results, diff_results=None, *,
**_):
all_, all = all, __builtins__.all
# fold
if by is not None:
fields, results = fold(results, fields=fields, merges=merges, by=by)
if diff_results is not None:
_, diff_results = fold(diff_results,
fields=fields, merges=merges, by=by)
table = {
tuple(r.get(k,'') for k in fields if k not in merges): r
for r in results}
diff_table = {
tuple(r.get(k,'') for k in fields if k not in merges): r
for r in diff_results or []}
table = {tuple(r.get(k,'') for k in by): r for r in results}
diff_table = {tuple(r.get(k,'') for k in by): r for r in diff_results or []}
# sort, note that python's sort is stable
names = list(table.keys() | diff_table.keys())
names.sort()
if diff_results is not None:
names.sort(key=lambda n: [
names.sort(key=lambda n: tuple(
-types[k].ratio(
table.get(n,{}).get(k),
diff_table.get(n,{}).get(k))
for k in fields if k in merges])
for k in fields))
if sort:
names.sort(key=lambda n: tuple(
(table[n][k],) if k in table.get(n,{}) else ()
@@ -423,7 +425,7 @@ def table(results, diff_results=None, *,
# print header
print('%-36s' % ('%s%s' % (
','.join(k for k in fields if k not in merges),
','.join(k for k in by),
' (%d added, %d removed)' % (
sum(1 for n in table if n not in diff_table),
sum(1 for n in diff_table if n not in table))
@@ -433,19 +435,19 @@ def table(results, diff_results=None, *,
if diff_results is None:
print(' %s' % (
' '.join(k.rjust(len(types[k].none))
for k in fields if k in merges)))
for k in fields)))
elif percent:
print(' %s' % (
' '.join(k.rjust(len(types[k].diff_none))
for k in fields if k in merges)))
for k in fields)))
else:
print(' %s %s %s' % (
' '.join(('o'+k).rjust(len(types[k].diff_none))
for k in fields if k in merges),
for k in fields),
' '.join(('n'+k).rjust(len(types[k].diff_none))
for k in fields if k in merges),
for k in fields),
' '.join(('d'+k).rjust(len(types[k].diff_none))
for k in fields if k in merges)))
for k in fields)))
# print entries
if not summary:
@@ -454,7 +456,7 @@ def table(results, diff_results=None, *,
if diff_results is not None:
diff_r = diff_table.get(name, {})
ratios = [types[k].ratio(r.get(k), diff_r.get(k))
for k in fields if k in merges]
for k in fields]
if not any(ratios) and not all_:
continue
@@ -463,12 +465,12 @@ def table(results, diff_results=None, *,
print(' %s' % (
' '.join(r[k].table()
if k in r else types[k].none
for k in fields if k in merges)))
for k in fields)))
elif percent:
print(' %s%s' % (
' '.join(r[k].diff_table()
if k in r else types[k].diff_none
for k in fields if k in merges),
for k in fields),
' (%s)' % ', '.join(
'+∞%' if t == float('+inf')
else '-∞%' if t == float('-inf')
@@ -478,13 +480,13 @@ def table(results, diff_results=None, *,
print(' %s %s %s%s' % (
' '.join(diff_r[k].diff_table()
if k in diff_r else types[k].diff_none
for k in fields if k in merges),
for k in fields),
' '.join(r[k].diff_table()
if k in r else types[k].diff_none
for k in fields if k in merges),
for k in fields),
' '.join(types[k].diff_diff(r.get(k), diff_r.get(k))
if k in r or k in diff_r else types[k].diff_none
for k in fields if k in merges),
for k in fields),
' (%s)' % ', '.join(
'+∞%' if t == float('+inf')
else '-∞%' if t == float('-inf')
@@ -494,26 +496,25 @@ def table(results, diff_results=None, *,
if any(ratios) else ''))
# print total
_, total = fold(results, fields=fields, merges=merges, by=[])
total = fold(results, by=[], fields=fields, ops=ops)
r = total[0] if total else {}
if diff_results is not None:
_, diff_total = fold(diff_results,
fields=fields, merges=merges, by=[])
diff_total = fold(diff_results, by=[], fields=fields, ops=ops)
diff_r = diff_total[0] if diff_total else {}
ratios = [types[k].ratio(r.get(k), diff_r.get(k))
for k in fields if k in merges]
for k in fields]
print('%-36s' % 'TOTAL', end='')
if diff_results is None:
print(' %s' % (
' '.join(r[k].table()
if k in r else types[k].none
for k in fields if k in merges)))
for k in fields)))
elif percent:
print(' %s%s' % (
' '.join(r[k].diff_table()
if k in r else types[k].diff_none
for k in fields if k in merges),
for k in fields),
' (%s)' % ', '.join(
'+∞%' if t == float('+inf')
else '-∞%' if t == float('-inf')
@@ -523,13 +524,13 @@ def table(results, diff_results=None, *,
print(' %s %s %s%s' % (
' '.join(diff_r[k].diff_table()
if k in diff_r else types[k].diff_none
for k in fields if k in merges),
for k in fields),
' '.join(r[k].diff_table()
if k in r else types[k].diff_none
for k in fields if k in merges),
for k in fields),
' '.join(types[k].diff_diff(r.get(k), diff_r.get(k))
if k in r or k in diff_r else types[k].diff_none
for k in fields if k in merges),
for k in fields),
' (%s)' % ', '.join(
'+∞%' if t == float('+inf')
else '-∞%' if t == float('-inf')
@@ -539,56 +540,35 @@ def table(results, diff_results=None, *,
if any(ratios) else ''))
def main(csv_paths, *, fields=None, by=None, **args):
# figure out what fields to use
renames = {}
def main(csv_paths, *,
by=None,
fields=None,
define=[],
**args):
# separate out renames
renames = [k.split('=', 1)
for k in it.chain(by or [], fields or [])
if '=' in k]
if by is not None:
by = [k.split('=', 1)[0] for k in by]
if fields is not None:
fields_ = []
for name in fields:
if '=' in name:
a, b = name.split('=', 1)
renames[b] = a
name = a
fields_.append(name)
fields = fields_
fields = [k.split('=', 1)[0] for k in fields]
if by is not None:
by_ = []
for name in by:
if '=' in name:
a, b = name.split('=', 1)
renames[b] = a
name = a
by_.append(name)
by = by_
# include 'by' fields in fields, it doesn't make sense to not
if fields is not None and by is not None:
fields[:0] = [k for k in by if k not in fields]
# use preconfigured merge operations unless any merge operation is
# explictly specified
merge_args = (args
if any(args.get(m) for m in MERGES.keys())
else {m: k for m, (k, _) in MERGES.items()})
merges = {}
for m in MERGES.keys():
for k in merge_args.get(m, []):
if k in merges:
print("conflicting merge type for field %r?" % k)
# figure out merge operations
ops = {}
for m in OPS.keys():
for k in args.get(m, []):
if k in ops:
print("conflicting op for field %r?" % k)
sys.exit(-1)
merges[k] = m
# allow renames to apply to merges
for m in MERGES.keys():
for k in merge_args.get(m, []):
if renames.get(k, k) not in merges:
merges[renames.get(k, k)] = m
# ignore merges that conflict with 'by' fields
if by is not None:
for k in by:
if k in merges:
del merges[k]
ops[k] = m
# rename ops?
if renames:
ops_ = {}
for new_k, old_k in renames:
if old_k in ops:
ops_[new_k] = ops[old_k]
ops.update(ops_)
# find CSV files
paths = []
@@ -614,17 +594,17 @@ def main(csv_paths, *, fields=None, by=None, **args):
pass
# homogenize
fields, types, results = homogenize(results,
fields=fields, merges=merges, renames=renames)
by, fields, types, results = homogenize(results,
by=by, fields=fields, renames=renames, define=define)
# fold to remove duplicates
fields, results = fold(results,
fields=fields, merges=merges)
results = fold(results,
by=by, fields=fields, ops=ops)
# write results to CSV
if args.get('output'):
with openio(args['output'], 'w') as f:
writer = csv.DictWriter(f, fields)
writer = csv.DictWriter(f, by + fields)
writer.writeheader()
for r in results:
writer.writerow(r)
@@ -641,22 +621,22 @@ def main(csv_paths, *, fields=None, by=None, **args):
pass
# homogenize
_, _, diff_results = homogenize(diff_results,
fields=fields, merges=merges, renames=renames, types=types)
_, _, _, diff_results = homogenize(diff_results,
by=by, fields=fields, renames=renames, define=define, types=types)
# fold to remove duplicates
_, diff_results = fold(diff_results,
fields=fields, merges=merges)
diff_results = fold(diff_results,
by=by, fields=fields, ops=ops)
# print table
if not args.get('quiet'):
table(
results,
diff_results if args.get('diff') else None,
fields=fields,
types=types,
merges=merges,
by=by,
fields=fields,
ops=ops,
types=types,
**args)
@@ -689,36 +669,41 @@ if __name__ == "__main__":
'-p', '--percent',
action='store_true',
help="Only show percentage change, not a full diff.")
parser.add_argument(
'-f', '--fields',
type=lambda x: [x.strip() for x in x.split(',')],
help="Only show these fields. Can rename fields "
"with new_name=old_name.")
parser.add_argument(
'-b', '--by',
type=lambda x: [x.strip() for x in x.split(',')],
help="Group by these fields. Can rename fields "
"with new_name=old_name.")
help="Group by these fields. All other fields will be merged as "
"needed. Can rename fields with new_name=old_name.")
parser.add_argument(
'-f', '--fields',
type=lambda x: [x.strip() for x in x.split(',')],
help="Use these fields. Can rename fields with new_name=old_name.")
parser.add_argument(
'-D', '--define',
type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
action='append',
help="Only include rows where this field is this value. May include "
"comma-separated options.")
parser.add_argument(
'--add',
type=lambda x: [x.strip() for x in x.split(',')],
help="Add these fields when merging.")
help="Add these fields (the default).")
parser.add_argument(
'--mul',
type=lambda x: [x.strip() for x in x.split(',')],
help="Multiply these fields when merging.")
help="Multiply these fields.")
parser.add_argument(
'--min',
type=lambda x: [x.strip() for x in x.split(',')],
help="Take the minimum of these fields when merging.")
help="Take the minimum of these fields.")
parser.add_argument(
'--max',
type=lambda x: [x.strip() for x in x.split(',')],
help="Take the maximum of these fields when merging.")
help="Take the maximum of these fields.")
parser.add_argument(
'--avg',
type=lambda x: [x.strip() for x in x.split(',')],
help="Average these fields when merging.")
help="Average these fields.")
parser.add_argument(
'-s', '--sort',
type=lambda x: [x.strip() for x in x.split(',')],