From 904c2eddd772e40cf0a196c260eb8acd3c770287 Mon Sep 17 00:00:00 2001 From: Christopher Haster Date: Sat, 2 Nov 2024 23:02:41 -0500 Subject: [PATCH] scripts: Memoized stack.py's limit calculation This is a pretty classic case for memoization. We don't really need to recalculate every stack limit at every call site. Cuts the runtime in half: before: 0.335s after: 0.139s (-58.5%) --- Unfortunately functools.cache was not fit for purpose. It's stuck using all parameters as the key, which breaks on the "seen" parameter we use for cycle detection that otherwise has no impact on results. Fortunately decorators aren't too difficult in Python, so I just rolled my own (cache1). --- scripts/stack.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/scripts/stack.py b/scripts/stack.py index fff659d6..7194a5c7 100755 --- a/scripts/stack.py +++ b/scripts/stack.py @@ -234,18 +234,29 @@ def collect(ci_paths, *, callgraph_[source] = (s_file, s_function, frame, targets) callgraph = callgraph_ + # memoize via only the first argument + def cache1(f): + def f_(a, *args, **kwargs): + if a in f_.cache: + return f_.cache[a] + r = f(a, *args, **kwargs) + f_.cache[a] = r + return r + f_.cache = {} + return f_ + # find maximum stack size recursively, this requires also detecting cycles # (in case of recursion) - def find_limit(source, seen=None): - seen = seen or set() + @cache1 + def find_limit(source, seen=set()): if source not in callgraph: return 0 _, _, frame, targets = callgraph[source] limit = 0 for target in targets: + # found a cycle? if target in seen: - # found a cycle return m.inf limit_ = find_limit(target, seen | {target}) limit = max(limit, limit_)