Tweaked scripts that consume .csv files to filter defines early

With the quantity of data being output by bench.py now, filtering ASAP while parsing CSV files is a valuable optimization. And thanks to how CSV files are structured, we can even avoid ever loading the full contents into RAM. This does end up with use filtering for defines redundantly in a few places, but this is well worth the saved overhead from early filtering. Also tried to clean up the plot.py/plotmpl.py's data folding path, though that may have been wasted effort.
2025-12-07 16:12:47 +00:00 · 2023-11-03 14:30:22 -05:00
parent fb9277feac
commit 616b4e1c9e
10 changed files with 274 additions and 212 deletions
--- a/scripts/code.py
+++ b/scripts/code.py
@@ -317,18 +317,18 @@ def collect(obj_paths, *,

 def fold(Result, results, *,
        by=None,
-        defines=None,
+        defines=[],
        **_):
    if by is None:
        by = Result._by

-    for k in it.chain(by or [], (k for k, _ in defines or [])):
+    for k in it.chain(by or [], (k for k, _ in defines)):
        if k not in Result._by and k not in Result._fields:
            print("error: could not find field %r?" % k)
            sys.exit(-1)

    # filter by matching defines
-    if defines is not None:
+    if defines:
        results_ = []
        for r in results:
            if all(getattr(r, k) in vs for k, vs in defines):
@@ -524,7 +524,7 @@ def table(Result, results, diff_results=None, *,
 def main(obj_paths, *,
        by=None,
        fields=None,
-        defines=None,
+        defines=[],
        sort=None,
        **args):
    # find sizes
@@ -535,6 +535,10 @@ def main(obj_paths, *,
        with openio(args['use']) as f:
            reader = csv.DictReader(f, restval='')
            for r in reader:
+                # filter by matching defines
+                if not all(k in r and r[k] in vs for k, vs in defines):
+                    continue
+
                if not any('code_'+k in r and r['code_'+k].strip()
                        for k in CodeResult._fields):
                    continue
@@ -582,6 +586,10 @@ def main(obj_paths, *,
            with openio(args['diff']) as f:
                reader = csv.DictReader(f, restval='')
                for r in reader:
+                    # filter by matching defines
+                    if not all(k in r and r[k] in vs for k, vs in defines):
+                        continue
+
                    if not any('code_'+k in r and r['code_'+k].strip()
                            for k in CodeResult._fields):
                        continue