Files
littlefs/scripts/amor.py
Christopher Haster 2be3ff57c5 Moved post-bench amor/avg analysis out into amor.py and avg.py
1. Being able to inspect results before benchmarks complete was useful
   to track their status. It also allows some analysis even if a
   benchmark fails.

2. Moving these scripts out of bench.py allows them to be a bit more
   flexible, at the cost of CSV parsing/structuring overhead.

3. Writing benchmark measurements immediately avoids RAM buildup as we
   store intermediate measurements for each bench permutation. This may
   increase the IO bottleneck, but we end up writing the same number of
   lines, so not sure...

I realize avg.py has quite a bit of overlap with summary.py, but I don't
want to entangle them further. summary.py is already trying to do too
much as is...
2023-11-04 13:16:50 -05:00

249 lines
7.5 KiB
Python
Executable File

#!/usr/bin/env python3
#
# Amortize benchmark measurements
#
import collections as co
import csv
import itertools as it
import math as m
import os
def openio(path, mode='r', buffering=-1):
# allow '-' for stdin/stdout
if path == '-':
if 'r' in mode:
return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
else:
return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
else:
return open(path, mode, buffering)
# parse different data representations
def dat(x):
# allow the first part of an a/b fraction
if '/' in x:
x, _ = x.split('/', 1)
# first try as int
try:
return int(x, 0)
except ValueError:
pass
# then try as float
try:
return float(x)
# just don't allow infinity or nan
if m.isinf(x) or m.isnan(x):
raise ValueError("invalid dat %r" % x)
except ValueError:
pass
# else give up
raise ValueError("invalid dat %r" % x)
def collect(csv_paths, renames=[], defines=[]):
# collect results from CSV files
results = []
for path in csv_paths:
try:
with openio(path) as f:
reader = csv.DictReader(f, restval='')
for r in reader:
# apply any renames
if renames:
# make a copy so renames can overlap
r_ = {}
for new_k, old_k in renames:
if old_k in r:
r_[new_k] = r[old_k]
r.update(r_)
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
results.append(r)
except FileNotFoundError:
pass
return results
def main(csv_paths, output, *,
amor=False,
per=False,
meas=None,
iter=None,
size=None,
by=None,
fields=None,
defines=[]):
# default to amortizing and per-byte results if size is present
if not amor and not per:
amor = True
if size is not None:
per = True
# separate out renames
renames = list(it.chain.from_iterable(
((k, v) for v in vs)
for k, vs in it.chain(by or [], fields or [])))
if by is not None:
by = [k for k, _ in by]
if fields is not None:
fields = [k for k, _ in fields]
# collect results from csv files
results = collect(csv_paths, renames, defines)
# if fields not specified, try to guess from data
if fields is None:
fields = co.OrderedDict()
for r in results:
for k, v in r.items():
if k not in (by or []) and k != iter and v.strip():
try:
dat(v)
fields[k] = True
except ValueError:
fields[k] = False
fields = list(k for k,v in fields.items() if v)
# if by not specified, guess it's anything not in iter/fields and not a
# source of a rename
if by is None:
by = co.OrderedDict()
for r in results:
# also ignore None keys, these are introduced by csv.DictReader
# when header + row mismatch
by.update((k, True) for k in r.keys()
if k is not None
and k != iter
and k not in fields
and not any(k == old_k for _, old_k in renames))
by = list(by.keys())
# convert iter/fields to ints/floats
for r in results:
for k in {iter} | set(fields) | ({size} if size is not None else {}):
if k in r:
r[k] = dat(r[k]) if r[k].strip() else 0
# organize by 'by' values
results_ = co.defaultdict(lambda: [])
for r in results:
key = tuple(r.get(k, '') for k in by)
results_[key].append(r)
results = results_
# for each key compute the amortized results
amors = []
for key, rs in results.items():
# keep a running sum for each fied
sums = {f: 0 for f in fields}
size_ = 0
for j, (i, r) in enumerate(sorted(
((r.get(iter, 0), r) for r in rs),
key=lambda p: p[0])):
# update sums
for f in fields:
sums[f] += r.get(f, 0)
size_ += r.get(size, 1)
# find amortized results
if amor:
amors.append(r
| {f: sums[f] / (j+1) for f in fields}
| ({} if meas is None
else {meas: r[meas]+'+amor'} if meas in r
else {meas: 'amor'}))
# also find per-byte results
if per:
amors.append(r
| {f: r.get(f, 0) / size_ for f in fields}
| ({} if meas is None
else {meas: r[meas]+'+per'} if meas in r
else {meas: 'per'}))
# write results to CSV
with openio(output, 'w') as f:
writer = csv.DictWriter(f,
by + ([meas] if meas not in by else []) + [iter] + fields)
writer.writeheader()
for r in amors:
writer.writerow(r)
if __name__ == "__main__":
import argparse
import sys
parser = argparse.ArgumentParser(
description="Amortize benchmark measurements.",
allow_abbrev=False)
parser.add_argument(
'csv_paths',
nargs='*',
help="Input *.csv files.")
parser.add_argument(
'-o', '--output',
required=True,
help="*.csv file to write amortized measurements to.")
parser.add_argument(
'--amor',
action='store_true',
help="Compute amortized results.")
parser.add_argument(
'--per',
action='store_true',
help="Compute per-byte results.")
parser.add_argument(
'-m', '--meas',
help="Optional name of measurement name field. If provided, the name "
"will be modified with +amor or +per.")
parser.add_argument(
'-i', '--iter',
required=True,
help="Name of iteration field.")
parser.add_argument(
'-n', '--size',
help="Optional name of size field.")
parser.add_argument(
'-b', '--by',
action='append',
type=lambda x: (
lambda k, vs=None: (
k.strip(),
tuple(v.strip() for v in vs.split(','))
if vs is not None else ())
)(*x.split('=', 1)),
help="Group by this field. Can rename fields with new_name=old_name.")
parser.add_argument(
'-f', '--field',
dest='fields',
action='append',
type=lambda x: (
lambda k, vs=None: (
k.strip(),
tuple(v.strip() for v in vs.split(','))
if vs is not None else ())
)(*x.split('=', 1)),
help="Field to amortize. Can rename fields with new_name=old_name.")
parser.add_argument(
'-D', '--define',
dest='defines',
action='append',
type=lambda x: (
lambda k, vs: (
k.strip(),
{v.strip() for v in vs.split(',')})
)(*x.split('=', 1)),
help="Only include results where this field is this value. May include "
"comma-separated options.")
sys.exit(main(**{k: v
for k, v in vars(parser.parse_intermixed_args()).items()
if v is not None}))