Tweaked scripts that consume .csv files to filter defines early

With the quantity of data being output by bench.py now, filtering ASAP
while parsing CSV files is a valuable optimization. And thanks to how
CSV files are structured, we can even avoid ever loading the full
contents into RAM.

This does end up with use filtering for defines redundantly in a few
places, but this is well worth the saved overhead from early filtering.

Also tried to clean up the plot.py/plotmpl.py's data folding path,
though that may have been wasted effort.
This commit is contained in:
Christopher Haster
2023-11-03 14:30:22 -05:00
parent fb9277feac
commit 616b4e1c9e
10 changed files with 274 additions and 212 deletions

View File

@@ -317,18 +317,18 @@ def collect(obj_paths, *,
def fold(Result, results, *,
by=None,
defines=None,
defines=[],
**_):
if by is None:
by = Result._by
for k in it.chain(by or [], (k for k, _ in defines or [])):
for k in it.chain(by or [], (k for k, _ in defines)):
if k not in Result._by and k not in Result._fields:
print("error: could not find field %r?" % k)
sys.exit(-1)
# filter by matching defines
if defines is not None:
if defines:
results_ = []
for r in results:
if all(getattr(r, k) in vs for k, vs in defines):
@@ -524,7 +524,7 @@ def table(Result, results, diff_results=None, *,
def main(obj_paths, *,
by=None,
fields=None,
defines=None,
defines=[],
sort=None,
**args):
# find sizes
@@ -535,6 +535,10 @@ def main(obj_paths, *,
with openio(args['use']) as f:
reader = csv.DictReader(f, restval='')
for r in reader:
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
if not any('code_'+k in r and r['code_'+k].strip()
for k in CodeResult._fields):
continue
@@ -582,6 +586,10 @@ def main(obj_paths, *,
with openio(args['diff']) as f:
reader = csv.DictReader(f, restval='')
for r in reader:
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
if not any('code_'+k in r and r['code_'+k].strip()
for k in CodeResult._fields):
continue