Files
littlefs/scripts/avg.py
2023-11-05 15:55:07 -06:00

313 lines
9.6 KiB
Python
Executable File

#!/usr/bin/env python3
#
# Compute averages/etc of benchmark measurements
#
import collections as co
import csv
import itertools as it
import math as m
import os
def openio(path, mode='r', buffering=-1):
# allow '-' for stdin/stdout
if path == '-':
if 'r' in mode:
return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
else:
return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
else:
return open(path, mode, buffering)
# parse different data representations
def dat(x):
# allow the first part of an a/b fraction
if '/' in x:
x, _ = x.split('/', 1)
# first try as int
try:
return int(x, 0)
except ValueError:
pass
# then try as float
try:
return float(x)
# just don't allow infinity or nan
if m.isinf(x) or m.isnan(x):
raise ValueError("invalid dat %r" % x)
except ValueError:
pass
# else give up
raise ValueError("invalid dat %r" % x)
def collect(csv_paths, renames=[], defines=[]):
# collect results from CSV files
fields = []
results = []
for path in csv_paths:
try:
with openio(path) as f:
reader = csv.DictReader(f, restval='')
fields.extend(
k for k in reader.fieldnames
if k not in fields)
for r in reader:
# apply any renames
if renames:
# make a copy so renames can overlap
r_ = {}
for new_k, old_k in renames:
if old_k in r:
r_[new_k] = r[old_k]
r.update(r_)
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
results.append(r)
except FileNotFoundError:
pass
return fields, results
def main(csv_paths, output, *,
sum=False,
prod=False,
min=False,
max=False,
bnd=False,
avg=False,
stddev=False,
gmean=False,
gstddev=False,
by=None,
meas=None,
seeds=None,
fields=None,
defines=[]):
sum_, sum = sum, __builtins__.sum
min_, min = min, __builtins__.min
max_, max = max, __builtins__.max
# default to averaging
if (not sum_
and not prod
and not min_
and not max_
and not bnd
and not avg
and not stddev
and not gmean
and not gstddev):
avg = True
# separate out renames
renames = list(it.chain.from_iterable(
((k, v) for v in vs)
for k, vs in it.chain(by or [], seeds or [], fields or [])))
if by is not None:
by = [k for k, _ in by]
if seeds is not None:
seeds = [k for k, _ in seeds]
if fields is not None:
fields = [k for k, _ in fields]
if by is None and fields is None:
print("error: needs --by or --fields to figure out fields",
file=sys.stderr)
sys.exit(-1)
# collect results from csv files
fields_, results = collect(csv_paths, renames, defines)
# if by not specified, guess it's anything not in
# seeds/fields/renames/defines
if by is None:
by = [
k for k in fields_
if k not in (seeds or [])
and k not in (fields or [])
and not any(k == old_k for _, old_k in renames)
and not any(k == k_ for k_, _ in defines)]
# if fields not specified, guess it's anything not in
# by/seeds/renames/defines
if fields is None:
fields = [
k for k in fields_
if k not in (by or [])
and k not in (seeds or [])
and not any(k == old_k for _, old_k in renames)
and not any(k == k_ for k_, _ in defines)]
# add meas to by if it isn't already present
if meas is not None and meas not in by:
by.append(meas)
# convert fields to ints/floats
for r in results:
for k in fields:
if k in r and isinstance(r[k], str):
r[k] = dat(r[k]) if r[k].strip() else 0
# organize by 'by' values
results_ = co.defaultdict(lambda: [])
for r in results:
key = tuple(r.get(k, '') for k in by)
results_[key].append(r)
results = results_
# for each key calculate the avgs/etc
avgs = []
for key, rs in results.items():
vs = {f: [] for f in fields}
meas__ = None
for r in rs:
for f in fields:
vs[f].append(r.get(f, 0))
if meas is not None and meas in r:
meas__ = r[meas]
def append(meas_, f_):
avgs.append(
{k: v for k, v in zip(by, key)}
| {f: f_(vs_) for f, vs_ in vs.items()}
| ({} if meas is None
else {meas: meas_} if meas__ is None
else {meas: meas__+'+'+meas_}))
if sum_: append('sum', lambda vs: sum(vs))
if prod: append('prod', lambda vs: m.prod(vs))
if min_: append('min', lambda vs: min(vs, default=0))
if max_: append('max', lambda vs: max(vs, default=0))
if bnd: append('bnd', lambda vs: min(vs, default=0))
if bnd: append('bnd', lambda vs: max(vs, default=0))
if avg: append('avg', lambda vs: sum(vs) / max(len(vs), 1))
if stddev: append('stddev', lambda vs: (
lambda avg: m.sqrt(
sum((v - avg)**2 for v in vs) / max(len(vs), 1))
)(sum(vs) / max(len(vs), 1)))
if gmean: append('gmean', lambda vs:
m.prod(float(v) for v in vs)**(1 / max(len(vs), 1)))
if gstddev: append('gstddev', lambda vs: (
lambda gmean: m.exp(m.sqrt(
sum(m.log(v/gmean)**2 for v in vs) / max(len(vs), 1)))
if gmean else m.inf
)(m.prod(float(v) for v in vs)**(1 / max(len(vs), 1))))
# write results to CSVS
with openio(output, 'w') as f:
writer = csv.DictWriter(f, by + fields)
writer.writeheader()
for r in avgs:
writer.writerow(r)
if __name__ == "__main__":
import argparse
import sys
parser = argparse.ArgumentParser(
description="Compute averages/etc of benchmark measurements.",
allow_abbrev=False)
parser.add_argument(
'csv_paths',
nargs='*',
help="Input *.csv files.")
parser.add_argument(
'-o', '--output',
required=True,
help="*.csv file to write amortized measurements to.")
parser.add_argument(
'--sum',
action='store_true',
help="Compute the sum.")
parser.add_argument(
'--prod',
action='store_true',
help="Compute the product.")
parser.add_argument(
'--min',
action='store_true',
help="Compute the min.")
parser.add_argument(
'--max',
action='store_true',
help="Compute the max.")
parser.add_argument(
'--bnd',
action='store_true',
help="Compute the bounds (min+max concatenated).")
parser.add_argument(
'--avg', '--mean',
action='store_true',
help="Compute the average (the default).")
parser.add_argument(
'--stddev',
action='store_true',
help="Compute the standard deviation.")
parser.add_argument(
'--gmean',
action='store_true',
help="Compute the geometric mean.")
parser.add_argument(
'--gstddev',
action='store_true',
help="Compute the geometric standard deviation.")
parser.add_argument(
'-b', '--by',
action='append',
type=lambda x: (
lambda k, vs=None: (
k.strip(),
tuple(v.strip() for v in vs.split(','))
if vs is not None else ())
)(*x.split('=', 1)),
help="Group by this field. Can rename fields with new_name=old_name.")
parser.add_argument(
'-m', '--meas',
help="Optional name of measurement name field. If provided, the name "
"will be modified with +amor or +per.")
parser.add_argument(
'-s', '--seed',
dest='seeds',
action='append',
type=lambda x: (
lambda k, vs=None: (
k.strip(),
tuple(v.strip() for v in vs.split(','))
if vs is not None else ())
)(*x.split('=', 1)),
help="Field to ignore when averaging. Can rename fields with "
"new_name=old_name.")
parser.add_argument(
'-f', '--field',
dest='fields',
action='append',
type=lambda x: (
lambda k, vs=None: (
k.strip(),
tuple(v.strip() for v in vs.split(','))
if vs is not None else ())
)(*x.split('=', 1)),
help="Field to amortize. Can rename fields with new_name=old_name.")
parser.add_argument(
'-D', '--define',
dest='defines',
action='append',
type=lambda x: (
lambda k, vs: (
k.strip(),
{v.strip() for v in vs.split(',')})
)(*x.split('=', 1)),
help="Only include results where this field is this value. May include "
"comma-separated options.")
sys.exit(main(**{k: v
for k, v in vars(parser.parse_intermixed_args()).items()
if v is not None}))