Avoid O(n^2) folding in summary.py

Noticed weird slowness when summarizing test results by suite vs case. Turns out the way we accumulate results by overloading Python's __add__ quickly leads to O(n^2) behavior as we repeatedly concatenate increasingly large lists. Instead of doing anything sane, I've added a second, immutable length to each list such that we can opportunistically reuse/mutate/append lists in __add__. The end result should be O(n) most of the time. Observe: lines bytes test.csv: 537749 64551874 62MiB ./scripts/summary.py test.csv -ftest_time -S before after -bcase: 0m51.772s 0m9.302s (-82.0%) -bsuite: 10m29.067s 0m9.357s (-98.5%)
2024-05-13 02:00:03 -05:00
parent 4920cb092c
commit 7a7da9680a
1 changed files with 17 additions and 7 deletions
--- a/scripts/summary.py
+++ b/scripts/summary.py
@@ -350,22 +350,32 @@ def infer(fields_, results,
    def __new__(cls, **r):
        return cls.__mro__[1].__new__(cls,
            **{k: r.get(k, '') for k in by},
-            **{k: r[k] if k in r and isinstance(r[k], list)
-                else [types[k](r[k])] if k in r
-                else []
+            **{k: r[k] if k in r and isinstance(r[k], tuple)
+                else ([types[k](r[k])], 1) if k in r
+                else ([], 0)
                for k in fields})

    def __add__(self, other):
+        # reuse lists if possible
+        def extend(a, b):
+            if len(a[0]) == a[1]:
+                a[0].extend(b[0][:b[1]])
+                return (a[0], a[1] + b[1])
+            else:
+                return (a[0][:a[1]] + b[0][:b[1]], a[1] + b[1])
+
        return self.__class__(
            **{k: getattr(self, k) for k in by},
-            **{k: object.__getattribute__(self, k)
-                + object.__getattribute__(other, k)
+            **{k: extend(
+                    object.__getattribute__(self, k),
+                    object.__getattribute__(other, k))
                for k in fields})

    def __getattribute__(self, k):
        if k in fields:
-            if object.__getattribute__(self, k):
-                return ops.get(k, OPS['sum'])(object.__getattribute__(self, k))
+            v = object.__getattribute__(self, k)
+            if v[1]:
+                return ops.get(k, OPS['sum'])(v[0][:v[1]])
            else:
                return None
        return object.__getattribute__(self, k)