Added perf.py a wrapper around Linux's perf tool for perf sampling

This provides 2 things: 1. perf integration with the bench/test runners - This is a bit tricky with perf as it doesn't have its own way to combine perf measurements across multiple processes. perf.py works around this by writing everything to a zip file, using flock to synchronize. As a plus, free compression! 2. Parsing and presentation of perf results in a format consistent with the other CSV-based tools. This actually ran into a surprising number of issues: - We need to process raw events to get the information we want, this ends up being a lot of data (~16MiB at 100Hz uncompressed), so we paralellize the parsing of each decompressed perf file. - perf reports raw addresses post-ASLR. It does provide sym+off which is very useful, but to find the source of static functions we need to reverse the ASLR by finding the delta the produces the best symbol<->addr matches. - This isn't related to perf, but decoding dwarf line-numbers is really complicated. You basically need to write a tiny VM. This also turns on perf measurement by default for the bench-runner, but at a low frequency (100 Hz). This can be decreased or removed in the future if it causes any slowdown.
2025-12-06 15:42:43 +00:00 · 2022-10-02 18:35:46 -05:00
parent ca66993812
commit 490e1c4616
15 changed files with 2104 additions and 283 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,7 @@
 *.a.c
 *.gcno
 *.gcda
+*.perf

 # Testing things
 blocks/
--- a/133
+++ b/133
@@ -25,7 +25,9 @@ SIZE    ?= size
 CTAGS    ?= ctags
 NM       ?= nm
 OBJDUMP  ?= objdump
-LCOV    ?= lcov
+VALGRIND ?= valgrind
+GDB		 ?= gdb
+PERF	 ?= perf

 SRC  ?= $(filter-out $(wildcard *.*.c),$(wildcard *.c))
 OBJ  := $(SRC:%.c=$(BUILDDIR)%.o)
@@ -38,25 +40,31 @@ TESTS ?= $(wildcard tests/*.toml)
 TEST_SRC ?= $(SRC) \
 		$(filter-out $(wildcard bd/*.*.c),$(wildcard bd/*.c)) \
 		runners/test_runner.c
-TEST_TC := $(TESTS:%.toml=$(BUILDDIR)%.t.c) $(TEST_SRC:%.c=$(BUILDDIR)%.t.c)
+TEST_RUNNER ?= $(BUILDDIR)runners/test_runner
+TEST_TC   := $(TESTS:%.toml=$(BUILDDIR)%.t.c) \
+		$(TEST_SRC:%.c=$(BUILDDIR)%.t.c)
 TEST_TAC  := $(TEST_TC:%.t.c=%.t.a.c)
 TEST_OBJ  := $(TEST_TAC:%.t.a.c=%.t.a.o)
 TEST_DEP  := $(TEST_TAC:%.t.a.c=%.t.a.d)
 TEST_CI	  := $(TEST_TAC:%.t.a.c=%.t.a.ci)
 TEST_GCNO := $(TEST_TAC:%.t.a.c=%.t.a.gcno)
 TEST_GCDA := $(TEST_TAC:%.t.a.c=%.t.a.gcda)
+TEST_PERF := $(TEST_RUNNER:%=%.perf)

 BENCHES ?= $(wildcard benches/*.toml)
 BENCH_SRC ?= $(SRC) \
 		$(filter-out $(wildcard bd/*.*.c),$(wildcard bd/*.c)) \
 		runners/bench_runner.c
-BENCH_BC := $(BENCHES:%.toml=$(BUILDDIR)%.b.c) $(BENCH_SRC:%.c=$(BUILDDIR)%.b.c)
+BENCH_RUNNER ?= $(BUILDDIR)runners/bench_runner
+BENCH_BC   := $(BENCHES:%.toml=$(BUILDDIR)%.b.c) \
+		$(BENCH_SRC:%.c=$(BUILDDIR)%.b.c)
 BENCH_BAC  := $(BENCH_BC:%.b.c=%.b.a.c)
 BENCH_OBJ  := $(BENCH_BAC:%.b.a.c=%.b.a.o)
 BENCH_DEP  := $(BENCH_BAC:%.b.a.c=%.b.a.d)
 BENCH_CI   := $(BENCH_BAC:%.b.a.c=%.b.a.ci)
 BENCH_GCNO := $(BENCH_BAC:%.b.a.c=%.b.a.gcno)
 BENCH_GCDA := $(BENCH_BAC:%.b.a.c=%.b.a.gcda)
+BENCH_PERF := $(BENCH_RUNNER:%=%.perf)

 ifdef DEBUG
 override CFLAGS += -O0
@@ -71,18 +79,47 @@ override CFLAGS += -I.
 override CFLAGS += -std=c99 -Wall -pedantic
 override CFLAGS += -Wextra -Wshadow -Wjump-misses-init -Wundef
 override CFLAGS += -ftrack-macro-expansion=0
+ifdef YES_COVERAGE
+override CFLAGS += --coverage
+endif
+ifdef YES_PERF
+override CFLAGS += -fno-omit-frame-pointer
+endif

-override TESTFLAGS += -b
-override BENCHFLAGS += -b
-# forward -j flag
-override TESTFLAGS += $(filter -j%,$(MAKEFLAGS))
-override BENCHFLAGS += $(filter -j%,$(MAKEFLAGS))
 ifdef VERBOSE
 override CODEFLAGS     += -v
 override DATAFLAGS     += -v
 override STACKFLAGS    += -v
 override STRUCTFLAGS   += -v
 override COVERAGEFLAGS += -v
+override PERFFLAGS     += -v
+endif
+ifneq ($(NM),nm)
+override CODEFLAGS += --nm-tool="$(NM)"
+override DATAFLAGS += --nm-tool="$(NM)"
+endif
+ifneq ($(OBJDUMP),objdump)
+override CODEFLAGS   += --objdump-tool="$(OBJDUMP)"
+override DATAFLAGS   += --objdump-tool="$(OBJDUMP)"
+override STRUCTFLAGS += --objdump-tool="$(OBJDUMP)"
+override PERFFLAGS   += --objdump-tool="$(OBJDUMP)"
+endif
+ifneq ($(PERF),perf)
+override PERFFLAGS += --perf-tool="$(PERF)"
+endif
+
+override TESTFLAGS  += -b
+override BENCHFLAGS += -b
+# forward -j flag
+override TESTFLAGS  += $(filter -j%,$(MAKEFLAGS))
+override BENCHFLAGS += $(filter -j%,$(MAKEFLAGS))
+ifdef YES_PERF
+override TESTFLAGS += --perf=$(TEST_PERF)
+endif
+ifndef NO_PERF
+override BENCHFLAGS += --perf=$(BENCH_PERF)
+endif
+ifdef VERBOSE
 override TESTFLAGS   += -v
 override TESTCFLAGS  += -v
 override BENCHFLAGS  += -v
@@ -92,19 +129,17 @@ ifdef EXEC
 override TESTFLAGS  += --exec="$(EXEC)"
 override BENCHFLAGS += --exec="$(EXEC)"
 endif
-ifdef BUILDDIR
-override CODEFLAGS     	+= --build-dir="$(BUILDDIR:/=)"
-override DATAFLAGS     	+= --build-dir="$(BUILDDIR:/=)"
-override STACKFLAGS    	+= --build-dir="$(BUILDDIR:/=)"
-override STRUCTFLAGS   	+= --build-dir="$(BUILDDIR:/=)"
-override COVERAGEFLAGS	+= --build-dir="$(BUILDDIR:/=)"
+ifneq ($(GDB),gdb)
+override TESTFLAGS  += --gdb-tool="$(GDB)"
+override BENCHFLAGS += --gdb-tool="$(GDB)"
 endif
-ifneq ($(NM),nm)
-override CODEFLAGS += --nm-tool="$(NM)"
-override DATAFLAGS += --nm-tool="$(NM)"
+ifneq ($(VALGRIND),valgrind)
+override TESTFLAGS  += --valgrind-tool="$(VALGRIND)"
+override BENCHFLAGS += --valgrind-tool="$(VALGRIND)"
 endif
-ifneq ($(OBJDUMP),objdump)
-override STRUCTFLAGS += --objdump-tool="$(OBJDUMP)"
+ifneq ($(PERF),perf)
+override TESTFLAGS  += --perf-tool="$(PERF)"
+override BENCHFLAGS += --perf-tool="$(PERF)"
 endif


@@ -124,28 +159,50 @@ tags:
 	$(CTAGS) --totals --c-types=+p $(shell find -H -name '*.h') $(SRC)

 .PHONY: test-runner build-test
+ifndef NO_COVERAGE
 test-runner build-test: override CFLAGS+=--coverage
-test-runner build-test: $(BUILDDIR)runners/test_runner
+endif
+ifdef YES_PERF
+bench-runner build-bench: override CFLAGS+=-fno-omit-frame-pointer
+endif
+test-runner build-test: $(TEST_RUNNER)
+ifndef NO_COVERAGE
 	rm -f $(TEST_GCDA)
+endif
+ifdef YES_PERF
+	rm -f $(TEST_PERF)
+endif

 .PHONY: test
 test: test-runner
-	./scripts/test.py $(BUILDDIR)runners/test_runner $(TESTFLAGS)
+	./scripts/test.py $(TEST_RUNNER) $(TESTFLAGS)

 .PHONY: test-list
 test-list: test-runner
-	./scripts/test.py $(BUILDDIR)runners/test_runner $(TESTFLAGS) -l
+	./scripts/test.py $(TEST_RUNNER) $(TESTFLAGS) -l

 .PHONY: bench-runner build-bench
-bench-runner build-bench: $(BUILDDIR)runners/bench_runner
+ifdef YES_COVERAGE
+bench-runner build-bench: override CFLAGS+=--coverage
+endif
+ifndef NO_PERF
+bench-runner build-bench: override CFLAGS+=-fno-omit-frame-pointer
+endif
+bench-runner build-bench: $(BENCH_RUNNER)
+ifdef YES_COVERAGE 
+	rm -f $(BENCH_GCDA)
+endif
+ifndef NO_PERF
+	rm -f $(BENCH_PERF)
+endif

 .PHONY: bench
 bench: bench-runner
-	./scripts/bench.py $(BUILDDIR)runners/bench_runner $(BENCHFLAGS)
+	./scripts/bench.py $(BENCH_RUNNER) $(BENCHFLAGS)

 .PHONY: bench-list
 bench-list: bench-runner
-	./scripts/bench.py $(BUILDDIR)runners/bench_runner $(BENCHFLAGS) -l
+	./scripts/bench.py $(BENCH_RUNNER) $(BENCHFLAGS) -l

 .PHONY: code
 code: $(OBJ)
@@ -165,7 +222,17 @@ struct: $(OBJ)

 .PHONY: coverage
 coverage: $(GCDA)
-	./scripts/coverage.py $^ -slines -sbranches $(COVERAGEFLAGS)
+	$(strip ./scripts/coverage.py \
+		$^ $(patsubst %,-F%,$(SRC)) \
+		-slines -sbranches \
+		$(COVERAGEFLAGS))
+
+.PHONY: perf
+perf: $(BENCH_PERF)
+	$(strip ./scripts/perf.py \
+		$^ $(patsubst %,-F%,$(SRC)) \
+		-scycles \
+		$(PERFFLAGS))

 .PHONY: summary sizes
 summary sizes: $(BUILDDIR)lfs.csv
@@ -203,7 +270,10 @@ $(BUILDDIR)lfs.struct.csv: $(OBJ)
 	./scripts/struct_.py $^ -q $(CODEFLAGS) -o $@

 $(BUILDDIR)lfs.coverage.csv: $(GCDA)
-	./scripts/coverage.py $^ -q $(COVERAGEFLAGS) -o $@
+	./scripts/coverage.py $^ $(patsubst %,-F%,$(SRC)) -q $(COVERAGEFLAGS) -o $@
+
+$(BUILDDIR)lfs.perf.csv: $(BENCH_PERF)
+	./scripts/perf.py $^ $(patsubst %,-F%,$(SRC)) -q $(PERFFLAGS) -o $@

 $(BUILDDIR)lfs.csv: \
 		$(BUILDDIR)lfs.code.csv \
@@ -255,13 +325,13 @@ clean:
 		$(BUILDDIR)lfs.data.csv \
 		$(BUILDDIR)lfs.stack.csv \
 		$(BUILDDIR)lfs.struct.csv \
-		$(BUILDDIR)lfs.coverage.csv)
-	rm -f $(BUILDDIR)runners/test_runner
-	rm -f $(BUILDDIR)runners/bench_runner
+		$(BUILDDIR)lfs.coverage.csv \
+		$(BUILDDIR)lfs.perf.csv)
 	rm -f $(OBJ)
 	rm -f $(DEP)
 	rm -f $(ASM)
 	rm -f $(CI)
+	rm -f $(TEST_RUNNER)
 	rm -f $(TEST_TC)
 	rm -f $(TEST_TAC)
 	rm -f $(TEST_OBJ)
@@ -269,6 +339,8 @@ clean:
 	rm -f $(TEST_CI)
 	rm -f $(TEST_GCNO)
 	rm -f $(TEST_GCDA)
+	rm -f $(TEST_PERF)
+	rm -f $(BENCH_RUNNER)
 	rm -f $(BENCH_BC)
 	rm -f $(BENCH_BAC)
 	rm -f $(BENCH_OBJ)
@@ -276,3 +348,4 @@ clean:
 	rm -f $(BENCH_CI)
 	rm -f $(BENCH_GCNO)
 	rm -f $(BENCH_GCDA)
+	rm -f $(BENCH_PERF)
--- a/scripts/bench.py
+++ b/scripts/bench.py
@@ -27,9 +27,13 @@ import time
 import toml


-RUNNER_PATH = 'runners/bench_runner'
+RUNNER_PATH = './runners/bench_runner'
 HEADER_PATH = 'runners/bench_runner.h'

+GDB_TOOL = ['gdb']
+VALGRIND_TOOL = ['valgrind']
+PERF_SCRIPT = ['./scripts/perf.py']
+

 def openio(path, mode='r', buffering=-1, nb=False):
    if path == '-':
@@ -502,12 +506,25 @@ def find_runner(runner, **args):

    # run under valgrind?
    if args.get('valgrind'):
-        cmd[:0] = filter(None, [
-            'valgrind',
+        cmd[:0] = args['valgrind_tool'] + [
            '--leak-check=full',
            '--track-origins=yes',
            '--error-exitcode=4',
-            '-q'])
+            '-q']
+
+    # run under perf?
+    if args.get('perf'):
+        cmd[:0] = args['perf_script'] + list(filter(None, [
+            '-R',
+            '--perf-freq=%s' % args['perf_freq']
+                if args.get('perf_freq') else None,
+            '--perf-period=%s' % args['perf_period']
+                if args.get('perf_period') else None,
+            '--perf-events=%s' % args['perf_events']
+                if args.get('perf_events') else None,
+            '--perf-tool=%s' % args['perf_tool']
+                if args.get('perf_tool') else None,
+            '-o%s' % args['perf']]))

    # other context
    if args.get('geometry'):
@@ -789,9 +806,9 @@ def run_stage(name, runner_, ids, output_, **args):
                try:
                    line = mpty.readline()
                except OSError as e:
-                    if e.errno == errno.EIO:
-                        break
+                    if e.errno != errno.EIO:
                        raise
+                    break
                if not line:
                    break
                last_stdout.append(line)
@@ -1126,24 +1143,24 @@ def run(runner, bench_ids=[], **args):
        cmd = runner_ + [failure.id]

        if args.get('gdb_main'):
-            cmd[:0] = ['gdb',
+            cmd[:0] = args['gdb_tool'] + [
                '-ex', 'break main',
                '-ex', 'run',
                '--args']
        elif args.get('gdb_case'):
            path, lineno = find_path(runner_, failure.id, **args)
-            cmd[:0] = ['gdb',
+            cmd[:0] = args['gdb_tool'] + [
                '-ex', 'break %s:%d' % (path, lineno),
                '-ex', 'run',
                '--args']
        elif failure.assert_ is not None:
-            cmd[:0] = ['gdb',
+            cmd[:0] = args['gdb_tool'] + [
                '-ex', 'run',
                '-ex', 'frame function raise',
                '-ex', 'up 2',
                '--args']
        else:
-            cmd[:0] = ['gdb',
+            cmd[:0] = args['gdb_tool'] + [
                '-ex', 'run',
                '--args']

@@ -1187,6 +1204,7 @@ if __name__ == "__main__":
    argparse._ArgumentGroup._handle_conflict_ignore = lambda *_: None
    parser = argparse.ArgumentParser(
        description="Build and run benches.",
+        allow_abbrev=False,
        conflict_handler='ignore')
    parser.add_argument(
        '-v', '--verbose',
@@ -1315,6 +1333,11 @@ if __name__ == "__main__":
        action='store_true',
        help="Drop into gdb on bench failure but stop at the beginning "
            "of main.")
+    bench_parser.add_argument(
+        '--gdb-tool',
+        type=lambda x: x.split(),
+        default=GDB_TOOL,
+        help="Path to gdb tool to use. Defaults to %r." % GDB_TOOL)
    bench_parser.add_argument(
        '--exec',
        type=lambda e: e.split(),
@@ -1324,6 +1347,37 @@ if __name__ == "__main__":
        action='store_true',
        help="Run under Valgrind to find memory errors. Implicitly sets "
            "--isolate.")
+    bench_parser.add_argument(
+        '--valgrind-tool',
+        type=lambda x: x.split(),
+        default=VALGRIND_TOOL,
+        help="Path to Valgrind tool to use. Defaults to %r." % VALGRIND_TOOL)
+    bench_parser.add_argument(
+        '--perf',
+        help="Run under Linux's perf to sample performance counters, writing "
+            "samples to this file.")
+    bench_parser.add_argument(
+        '--perf-freq',
+        help="perf sampling frequency. This is passed directly to the perf "
+            "script.")
+    bench_parser.add_argument(
+        '--perf-period',
+        help="perf sampling period. This is passed directly to the perf "
+            "script.")
+    bench_parser.add_argument(
+        '--perf-events',
+        help="perf events to record. This is passed directly to the perf "
+            "script.")
+    bench_parser.add_argument(
+        '--perf-script',
+        type=lambda x: x.split(),
+        default=PERF_SCRIPT,
+        help="Path to the perf script to use. Defaults to %r." % PERF_SCRIPT)
+    bench_parser.add_argument(
+        '--perf-tool',
+        type=lambda x: x.split(),
+        help="Path to the perf tool to use. This is passed directly to the "
+            "perf script")

    # compilation flags
    comp_parser = parser.add_argument_group('compilation options')
@@ -1348,7 +1402,7 @@ if __name__ == "__main__":
        '-o', '--output',
        help="Output file.")

-    # runner + bench_ids overlaps bench_paths, so we need to do some munging here
+    # runner/bench_paths overlap, so need to do some munging here
    args = parser.parse_intermixed_args()
    args.bench_paths = [' '.join(args.runner or [])] + args.bench_ids
    args.runner = args.runner or [RUNNER_PATH]
--- a/scripts/code.py
+++ b/scripts/code.py
@@ -5,7 +5,7 @@
 # by Linux's Bloat-O-Meter.
 #
 # Example:
-# ./scripts/code.py lfs.o lfs_util.o -S
+# ./scripts/code.py lfs.o lfs_util.o -Ssize
 #
 # Copyright (c) 2022, The littlefs authors.
 # Copyright (c) 2020, Arm Limited. All rights reserved.
@@ -14,6 +14,7 @@

 import collections as co
 import csv
+import difflib
 import glob
 import itertools as it
 import math as m
@@ -25,7 +26,8 @@ import subprocess as sp

 OBJ_PATHS = ['*.o']
 NM_TOOL = ['nm']
-TYPE = 'tTrRdD'
+NM_TYPES = 'tTrRdD'
+OBJDUMP_TOOL = ['objdump']


 # integer fields
@@ -135,21 +137,32 @@ def openio(path, mode='r'):

 def collect(paths, *,
        nm_tool=NM_TOOL,
-        type=TYPE,
-        build_dir=None,
+        nm_types=NM_TYPES,
+        objdump_tool=OBJDUMP_TOOL,
+        sources=None,
        everything=False,
        **args):
-    results = []
-    pattern = re.compile(
+    size_pattern = re.compile(
        '^(?P<size>[0-9a-fA-F]+)' +
-        ' (?P<type>[%s])' % re.escape(type) +
+        ' (?P<type>[%s])' % re.escape(nm_types) +
        ' (?P<func>.+?)$')
+    line_pattern = re.compile(
+        '^\s+(?P<no>[0-9]+)\s+'
+            '(?:(?P<dir>[0-9]+)\s+)?'
+            '.*\s+'
+            '(?P<path>[^\s]+)$')
+    info_pattern = re.compile(
+        '^(?:.*(?P<tag>DW_TAG_[a-z_]+).*'
+            '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
+            '|^.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*)$')
+
+    results = []
    for path in paths:
-        # map to source file
-        src_path = re.sub('\.o$', '.c', path)
-        if build_dir:
-            src_path = re.sub('%s/*' % re.escape(build_dir), '',
-                src_path)
+        # guess the source, if we have debug-info we'll replace this later
+        file = re.sub('(\.o)?$', '.c', path, 1)
+
+        # find symbol sizes
+        results_ = []
        # note nm-tool may contain extra args
        cmd = nm_tool + ['--size-sort', path]
        if args.get('verbose'):
@@ -158,21 +171,18 @@ def collect(paths, *,
            stdout=sp.PIPE,
            stderr=sp.PIPE if not args.get('verbose') else None,
            universal_newlines=True,
-            errors='replace')
+            errors='replace',
+            close_fds=False)
        for line in proc.stdout:
-            m = pattern.match(line)
+            m = size_pattern.match(line)
            if m:
                func = m.group('func')
                # discard internal functions
                if not everything and func.startswith('__'):
                    continue
-                # discard .8449 suffixes created by optimizer
-                func = re.sub('\.[0-9]+', '', func)
-
-                results.append(CodeResult(
-                    src_path, func,
+                results_.append(CodeResult(
+                    file, func,
                    int(m.group('size'), 16)))
-
        proc.wait()
        if proc.returncode != 0:
            if not args.get('verbose'):
@@ -180,6 +190,121 @@ def collect(paths, *,
                    sys.stdout.write(line)
            sys.exit(-1)

+
+        # try to figure out the source file if we have debug-info
+        dirs = {}
+        files = {}
+        # note objdump-tool may contain extra args
+        cmd = objdump_tool + ['--dwarf=rawline', path]
+        if args.get('verbose'):
+            print(' '.join(shlex.quote(c) for c in cmd))
+        proc = sp.Popen(cmd,
+            stdout=sp.PIPE,
+            stderr=sp.PIPE if not args.get('verbose') else None,
+            universal_newlines=True,
+            errors='replace',
+            close_fds=False)
+        for line in proc.stdout:
+            # note that files contain references to dirs, which we
+            # dereference as soon as we see them as each file table follows a
+            # dir table
+            m = line_pattern.match(line)
+            if m:
+                if not m.group('dir'):
+                    # found a directory entry
+                    dirs[int(m.group('no'))] = m.group('path')
+                else:
+                    # found a file entry
+                    dir = int(m.group('dir'))
+                    if dir in dirs:
+                        files[int(m.group('no'))] = os.path.join(
+                            dirs[dir],
+                            m.group('path'))
+                    else:
+                        files[int(m.group('no'))] = m.group('path')
+        proc.wait()
+        if proc.returncode != 0:
+            if not args.get('verbose'):
+                for line in proc.stderr:
+                    sys.stdout.write(line)
+            # do nothing on error, we don't need objdump to work, source files
+            # may just be inaccurate
+            pass
+
+        defs = {}
+        is_func = False
+        f_name = None
+        f_file = None
+        # note objdump-tool may contain extra args
+        cmd = objdump_tool + ['--dwarf=info', path]
+        if args.get('verbose'):
+            print(' '.join(shlex.quote(c) for c in cmd))
+        proc = sp.Popen(cmd,
+            stdout=sp.PIPE,
+            stderr=sp.PIPE if not args.get('verbose') else None,
+            universal_newlines=True,
+            errors='replace',
+            close_fds=False)
+        for line in proc.stdout:
+            # state machine here to find definitions
+            m = info_pattern.match(line)
+            if m:
+                if m.group('tag'):
+                    if is_func:
+                        defs[f_name] = files.get(f_file, '?')
+                    is_func = (m.group('tag') == 'DW_TAG_subprogram')
+                elif m.group('name'):
+                    f_name = m.group('name')
+                elif m.group('file'):
+                    f_file = int(m.group('file'))
+        proc.wait()
+        if proc.returncode != 0:
+            if not args.get('verbose'):
+                for line in proc.stderr:
+                    sys.stdout.write(line)
+            # do nothing on error, we don't need objdump to work, source files
+            # may just be inaccurate
+            pass
+
+        for r in results_:
+            # find best matching debug symbol, this may be slightly different
+            # due to optimizations
+            if defs:
+                # exact match? avoid difflib if we can for speed
+                if r.function in defs:
+                    file = defs[r.function]
+                else:
+                    _, file = max(
+                        defs.items(),
+                        key=lambda d: difflib.SequenceMatcher(None,
+                            d[0],
+                            r.function, False).ratio())
+            else:
+                file = r.file
+
+            # ignore filtered sources
+            if sources is not None:
+                if not any(
+                        os.path.abspath(file) == os.path.abspath(s)
+                        for s in sources):
+                    continue
+            else:
+                # default to only cwd
+                if not everything and not os.path.commonpath([
+                        os.getcwd(),
+                        os.path.abspath(file)]) == os.getcwd():
+                    continue
+
+            # simplify path
+            if os.path.commonpath([
+                    os.getcwd(),
+                    os.path.abspath(file)]) == os.getcwd():
+                file = os.path.relpath(file)
+            else:
+                file = os.path.abspath(file)
+
+            results.append(CodeResult(file, r.function, r.size))
+
    return results


@@ -437,7 +562,7 @@ def main(obj_paths, *,
                paths.append(path)

        if not paths:
-            print("error: no .obj files found in %r?" % obj_paths)
+            print("error: no .o files found in %r?" % obj_paths)
            sys.exit(-1)

        results = collect(paths, **args)
@@ -469,13 +594,16 @@ def main(obj_paths, *,
    # write results to CSV
    if args.get('output'):
        with openio(args['output'], 'w') as f:
-            writer = csv.DictWriter(f, CodeResult._by
+            writer = csv.DictWriter(f,
+                (by if by is not None else CodeResult._by)
                + ['code_'+k for k in CodeResult._fields])
            writer.writeheader()
            for r in results:
                writer.writerow(
-                    {k: getattr(r, k) for k in CodeResult._by}
-                    | {'code_'+k: getattr(r, k) for k in CodeResult._fields})
+                    {k: getattr(r, k)
+                        for k in (by if by is not None else CodeResult._by)}
+                    | {'code_'+k: getattr(r, k)
+                        for k in CodeResult._fields})

    # find previous results?
    if args.get('diff'):
@@ -512,7 +640,8 @@ if __name__ == "__main__":
    import argparse
    import sys
    parser = argparse.ArgumentParser(
-        description="Find code size at the function level.")
+        description="Find code size at the function level.",
+        allow_abbrev=False)
    parser.add_argument(
        'obj_paths',
        nargs='*',
@@ -579,23 +708,30 @@ if __name__ == "__main__":
        action='store_true',
        help="Only show the total.")
    parser.add_argument(
-        '-A', '--everything',
+        '-F', '--source',
+        dest='sources',
+        action='append',
+        help="Only consider definitions in this file. Defaults to anything "
+            "in the current directory.")
+    parser.add_argument(
+        '--everything',
        action='store_true',
        help="Include builtin and libc specific symbols.")
    parser.add_argument(
-        '--type',
-        default=TYPE,
+        '--nm-types',
+        default=NM_TYPES,
        help="Type of symbols to report, this uses the same single-character "
-            "type-names emitted by nm. Defaults to %r." % TYPE)
+            "type-names emitted by nm. Defaults to %r." % NM_TYPES)
    parser.add_argument(
        '--nm-tool',
        type=lambda x: x.split(),
        default=NM_TOOL,
        help="Path to the nm tool to use. Defaults to %r." % NM_TOOL)
    parser.add_argument(
-        '--build-dir',
-        help="Specify the relative build directory. Used to map object files "
-            "to the correct source files.")
+        '--objdump-tool',
+        type=lambda x: x.split(),
+        default=OBJDUMP_TOOL,
+        help="Path to the objdump tool to use. Defaults to %r." % OBJDUMP_TOOL)
    sys.exit(main(**{k: v
        for k, v in vars(parser.parse_intermixed_args()).items()
        if v is not None}))
--- a/scripts/coverage.py
+++ b/scripts/coverage.py
@@ -3,7 +3,9 @@
 # Script to find coverage info after running tests.
 #
 # Example:
-# ./scripts/coverage.py lfs.t.a.gcda lfs_util.t.a.gcda -s
+# ./scripts/coverage.py \
+#     lfs.t.a.gcda lfs_util.t.a.gcda \
+#     -Flfs.c -Flfs_util.c -slines
 #
 # Copyright (c) 2022, The littlefs authors.
 # Copyright (c) 2020, Arm Limited. All rights reserved.
@@ -209,19 +211,13 @@ def openio(path, mode='r'):
    else:
        return open(path, mode)

-def collect(paths, *,
+def collect(gcda_paths, *,
        gcov_tool=GCOV_TOOL,
-        build_dir=None,
+        sources=None,
        everything=False,
        **args):
    results = []
-    for path in paths:
-        # map to source file
-        src_path = re.sub('\.t\.a\.gcda$', '.c', path)
-        if build_dir:
-            src_path = re.sub('%s/*' % re.escape(build_dir), '',
-                src_path)
-
+    for path in gcda_paths:
        # get coverage info through gcov's json output
        # note, gcov-tool may contain extra args
        cmd = GCOV_TOOL + ['-b', '-t', '--json-format', path]
@@ -231,7 +227,8 @@ def collect(paths, *,
            stdout=sp.PIPE,
            stderr=sp.PIPE if not args.get('verbose') else None,
            universal_newlines=True,
-            errors='replace')
+            errors='replace',
+            close_fds=False)
        data = json.load(proc.stdout)
        proc.wait()
        if proc.returncode != 0:
@@ -242,12 +239,30 @@ def collect(paths, *,

        # collect line/branch coverage
        for file in data['files']:
-            if file['file'] != src_path:
+            # ignore filtered sources
+            if sources is not None:
+                if not any(
+                        os.path.abspath(file['file']) == os.path.abspath(s)
+                        for s in sources):
                    continue
+            else:
+                # default to only cwd
+                if not everything and not os.path.commonpath([
+                        os.getcwd(),
+                        os.path.abspath(file['file'])]) == os.getcwd():
+                    continue
+
+            # simplify path
+            if os.path.commonpath([
+                    os.getcwd(),
+                    os.path.abspath(file['file'])]) == os.getcwd():
+                file_name = os.path.relpath(file['file'])
+            else:
+                file_name = os.path.abspath(file['file'])

            for func in file['functions']:
                func_name = func.get('name', '(inlined)')
-                # discard internal function (this includes injected test cases)
+                # discard internal functions (this includes injected test cases)
                if not everything:
                    if func_name.startswith('__'):
                        continue
@@ -255,7 +270,7 @@ def collect(paths, *,
                # go ahead and add functions, later folding will merge this if
                # there are other hits on this line
                results.append(CoverageResult(
-                    src_path, func_name, func['start_line'],
+                    file_name, func_name, func['start_line'],
                    func['execution_count'], 0,
                    Frac(1 if func['execution_count'] > 0 else 0, 1),
                    0,
@@ -271,7 +286,7 @@ def collect(paths, *,
                # go ahead and add lines, later folding will merge this if
                # there are other hits on this line
                results.append(CoverageResult(
-                    src_path, func_name, line['line_number'],
+                    file_name, func_name, line['line_number'],
                    0, line['count'],
                    0,
                    Frac(1 if line['count'] > 0 else 0, 1),
@@ -519,31 +534,25 @@ def table(Result, results, diff_results=None, *,
            line[-1]))


-def annotate(Result, results, paths, *,
+def annotate(Result, results, *,
        annotate=False,
        lines=False,
        branches=False,
-        build_dir=None,
        **args):
    # if neither branches/lines specified, color both
    if annotate and not lines and not branches:
        lines, branches = True, True

-    for path in paths:
-        # map to source file
-        src_path = re.sub('\.t\.a\.gcda$', '.c', path)
-        if build_dir:
-            src_path = re.sub('%s/*' % re.escape(build_dir), '',
-                src_path)
-
+    for path in co.OrderedDict.fromkeys(r.file for r in results).keys():
        # flatten to line info
        results = fold(Result, results, by=['file', 'line'])
-        table = {r.line: r for r in results if r.file == src_path}
+        table = {r.line: r for r in results if r.file == path}

        # calculate spans to show
        if not annotate:
            spans = []
            last = None
+            func = None
            for line, r in sorted(table.items()):
                if ((lines and int(r.hits) == 0)
                        or (branches and r.branches.a < r.branches.b)):
@@ -553,27 +562,29 @@ def annotate(Result, results, paths, *,
                            line+1+args['context'])
                    else:
                        if last is not None:
-                            spans.append(last)
+                            spans.append((last, func))
                        last = range(
                            line-args['context'],
                            line+1+args['context'])
+                        func = r.function
            if last is not None:
-                spans.append(last)
+                spans.append((last, func))

-        with open(src_path) as f:
+        with open(path) as f:
            skipped = False
            for i, line in enumerate(f):
                # skip lines not in spans?
-                if not annotate and not any(i+1 in s for s in spans):
+                if not annotate and not any(i+1 in s for s, _ in spans):
                    skipped = True
                    continue

                if skipped:
                    skipped = False
-                    print('%s@@ %s:%d @@%s' % (
+                    print('%s@@ %s:%d: %s @@%s' % (
                        '\x1b[36m' if args['color'] else '',
-                        src_path,
+                        path,
                        i+1,
+                        next(iter(f for _, f in spans)),
                        '\x1b[m' if args['color'] else ''))

                # build line
@@ -659,12 +670,14 @@ def main(gcda_paths, *,
    # write results to CSV
    if args.get('output'):
        with openio(args['output'], 'w') as f:
-            writer = csv.DictWriter(f, CoverageResult._by
+            writer = csv.DictWriter(f,
+                (by if by is not None else CoverageResult._by)
                + ['coverage_'+k for k in CoverageResult._fields])
            writer.writeheader()
            for r in results:
                writer.writerow(
-                    {k: getattr(r, k) for k in CoverageResult._by}
+                    {k: getattr(r, k)
+                        for k in (by if by is not None else CoverageResult._by)}
                    | {'coverage_'+k: getattr(r, k)
                        for k in CoverageResult._fields})

@@ -698,8 +711,7 @@ def main(gcda_paths, *,
                or args.get('lines')
                or args.get('branches')):
            # annotate sources
-            annotate(CoverageResult, results, paths,
-                **args)
+            annotate(CoverageResult, results, **args)
        else:
            # print table
            table(CoverageResult, results,
@@ -724,7 +736,8 @@ if __name__ == "__main__":
    import argparse
    import sys
    parser = argparse.ArgumentParser(
-        description="Find coverage info after running tests.")
+        description="Find coverage info after running tests.",
+        allow_abbrev=False)
    parser.add_argument(
        'gcda_paths',
        nargs='*',
@@ -791,15 +804,21 @@ if __name__ == "__main__":
        action='store_true',
        help="Only show the total.")
    parser.add_argument(
-        '-A', '--everything',
+        '-F', '--source',
+        dest='sources',
+        action='append',
+        help="Only consider definitions in this file. Defaults to anything "
+            "in the current directory.")
+    parser.add_argument(
+        '--everything',
        action='store_true',
        help="Include builtin and libc specific symbols.")
    parser.add_argument(
-        '-H', '--hits',
+        '--hits',
        action='store_true',
        help="Show total hits instead of coverage.")
    parser.add_argument(
-        '-l', '--annotate',
+        '-A', '--annotate',
        action='store_true',
        help="Show source files annotated with coverage info.")
    parser.add_argument(
@@ -814,7 +833,7 @@ if __name__ == "__main__":
        '-c', '--context',
        type=lambda x: int(x, 0),
        default=3,
-        help="Show a additional lines of context. Defaults to 3.")
+        help="Show n additional lines of context. Defaults to 3.")
    parser.add_argument(
        '-W', '--width',
        type=lambda x: int(x, 0),
@@ -838,10 +857,6 @@ if __name__ == "__main__":
        default=GCOV_TOOL,
        type=lambda x: x.split(),
        help="Path to the gcov tool to use. Defaults to %r." % GCOV_TOOL)
-    parser.add_argument(
-        '--build-dir',
-        help="Specify the relative build directory. Used to map object files "
-            "to the correct source files.")
    sys.exit(main(**{k: v
        for k, v in vars(parser.parse_intermixed_args()).items()
        if v is not None}))
--- a/scripts/data.py
+++ b/scripts/data.py
@@ -5,7 +5,7 @@
 # by Linux's Bloat-O-Meter.
 #
 # Example:
-# ./scripts/data.py lfs.o lfs_util.o -S
+# ./scripts/data.py lfs.o lfs_util.o -Ssize
 #
 # Copyright (c) 2022, The littlefs authors.
 # Copyright (c) 2020, Arm Limited. All rights reserved.
@@ -14,6 +14,7 @@

 import collections as co
 import csv
+import difflib
 import glob
 import itertools as it
 import math as m
@@ -25,7 +26,8 @@ import subprocess as sp

 OBJ_PATHS = ['*.o']
 NM_TOOL = ['nm']
-TYPE = 'dDbB'
+NM_TYPES = 'dDbB'
+OBJDUMP_TOOL = ['objdump']


 # integer fields
@@ -135,21 +137,32 @@ def openio(path, mode='r'):

 def collect(paths, *,
        nm_tool=NM_TOOL,
-        type=TYPE,
-        build_dir=None,
+        nm_types=NM_TYPES,
+        objdump_tool=OBJDUMP_TOOL,
+        sources=None,
        everything=False,
        **args):
-    results = []
-    pattern = re.compile(
+    size_pattern = re.compile(
        '^(?P<size>[0-9a-fA-F]+)' +
-        ' (?P<type>[%s])' % re.escape(type) +
+        ' (?P<type>[%s])' % re.escape(nm_types) +
        ' (?P<func>.+?)$')
+    line_pattern = re.compile(
+        '^\s+(?P<no>[0-9]+)\s+'
+            '(?:(?P<dir>[0-9]+)\s+)?'
+            '.*\s+'
+            '(?P<path>[^\s]+)$')
+    info_pattern = re.compile(
+        '^(?:.*(?P<tag>DW_TAG_[a-z_]+).*'
+            '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
+            '|^.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*)$')
+
+    results = []
    for path in paths:
-        # map to source file
-        src_path = re.sub('\.o$', '.c', path)
-        if build_dir:
-            src_path = re.sub('%s/*' % re.escape(build_dir), '',
-                src_path)
+        # guess the source, if we have debug-info we'll replace this later
+        file = re.sub('(\.o)?$', '.c', path, 1)
+
+        # find symbol sizes
+        results_ = []
        # note nm-tool may contain extra args
        cmd = nm_tool + ['--size-sort', path]
        if args.get('verbose'):
@@ -158,21 +171,18 @@ def collect(paths, *,
            stdout=sp.PIPE,
            stderr=sp.PIPE if not args.get('verbose') else None,
            universal_newlines=True,
-            errors='replace')
+            errors='replace',
+            close_fds=False)
        for line in proc.stdout:
-            m = pattern.match(line)
+            m = size_pattern.match(line)
            if m:
                func = m.group('func')
                # discard internal functions
                if not everything and func.startswith('__'):
                    continue
-                # discard .8449 suffixes created by optimizer
-                func = re.sub('\.[0-9]+', '', func)
-
-                results.append(DataResult(
-                    src_path, func,
+                results_.append(DataResult(
+                    file, func,
                    int(m.group('size'), 16)))
-
        proc.wait()
        if proc.returncode != 0:
            if not args.get('verbose'):
@@ -180,6 +190,121 @@ def collect(paths, *,
                    sys.stdout.write(line)
            sys.exit(-1)

+
+        # try to figure out the source file if we have debug-info
+        dirs = {}
+        files = {}
+        # note objdump-tool may contain extra args
+        cmd = objdump_tool + ['--dwarf=rawline', path]
+        if args.get('verbose'):
+            print(' '.join(shlex.quote(c) for c in cmd))
+        proc = sp.Popen(cmd,
+            stdout=sp.PIPE,
+            stderr=sp.PIPE if not args.get('verbose') else None,
+            universal_newlines=True,
+            errors='replace',
+            close_fds=False)
+        for line in proc.stdout:
+            # note that files contain references to dirs, which we
+            # dereference as soon as we see them as each file table follows a
+            # dir table
+            m = line_pattern.match(line)
+            if m:
+                if not m.group('dir'):
+                    # found a directory entry
+                    dirs[int(m.group('no'))] = m.group('path')
+                else:
+                    # found a file entry
+                    dir = int(m.group('dir'))
+                    if dir in dirs:
+                        files[int(m.group('no'))] = os.path.join(
+                            dirs[dir],
+                            m.group('path'))
+                    else:
+                        files[int(m.group('no'))] = m.group('path')
+        proc.wait()
+        if proc.returncode != 0:
+            if not args.get('verbose'):
+                for line in proc.stderr:
+                    sys.stdout.write(line)
+            # do nothing on error, we don't need objdump to work, source files
+            # may just be inaccurate
+            pass
+
+        defs = {}
+        is_func = False
+        f_name = None
+        f_file = None
+        # note objdump-tool may contain extra args
+        cmd = objdump_tool + ['--dwarf=info', path]
+        if args.get('verbose'):
+            print(' '.join(shlex.quote(c) for c in cmd))
+        proc = sp.Popen(cmd,
+            stdout=sp.PIPE,
+            stderr=sp.PIPE if not args.get('verbose') else None,
+            universal_newlines=True,
+            errors='replace',
+            close_fds=False)
+        for line in proc.stdout:
+            # state machine here to find definitions
+            m = info_pattern.match(line)
+            if m:
+                if m.group('tag'):
+                    if is_func:
+                        defs[f_name] = files.get(f_file, '?')
+                    is_func = (m.group('tag') == 'DW_TAG_subprogram')
+                elif m.group('name'):
+                    f_name = m.group('name')
+                elif m.group('file'):
+                    f_file = int(m.group('file'))
+        proc.wait()
+        if proc.returncode != 0:
+            if not args.get('verbose'):
+                for line in proc.stderr:
+                    sys.stdout.write(line)
+            # do nothing on error, we don't need objdump to work, source files
+            # may just be inaccurate
+            pass
+
+        for r in results_:
+            # find best matching debug symbol, this may be slightly different
+            # due to optimizations
+            if defs:
+                # exact match? avoid difflib if we can for speed
+                if r.function in defs:
+                    file = defs[r.function]
+                else:
+                    _, file = max(
+                        defs.items(),
+                        key=lambda d: difflib.SequenceMatcher(None,
+                            d[0],
+                            r.function, False).ratio())
+            else:
+                file = r.file
+
+            # ignore filtered sources
+            if sources is not None:
+                if not any(
+                        os.path.abspath(file) == os.path.abspath(s)
+                        for s in sources):
+                    continue
+            else:
+                # default to only cwd
+                if not everything and not os.path.commonpath([
+                        os.getcwd(),
+                        os.path.abspath(file)]) == os.getcwd():
+                    continue
+
+            # simplify path
+            if os.path.commonpath([
+                    os.getcwd(),
+                    os.path.abspath(file)]) == os.getcwd():
+                file = os.path.relpath(file)
+            else:
+                file = os.path.abspath(file)
+
+            results.append(DataResult(file, r.function, r.size))
+
    return results


@@ -437,7 +562,7 @@ def main(obj_paths, *,
                paths.append(path)

        if not paths:
-            print("error: no .obj files found in %r?" % obj_paths)
+            print("error: no .o files found in %r?" % obj_paths)
            sys.exit(-1)

        results = collect(paths, **args)
@@ -469,13 +594,16 @@ def main(obj_paths, *,
    # write results to CSV
    if args.get('output'):
        with openio(args['output'], 'w') as f:
-            writer = csv.DictWriter(f, DataResult._by
+            writer = csv.DictWriter(f,
+                (by if by is not None else DataResult._by)
                + ['data_'+k for k in DataResult._fields])
            writer.writeheader()
            for r in results:
                writer.writerow(
-                    {k: getattr(r, k) for k in DataResult._by}
-                    | {'data_'+k: getattr(r, k) for k in DataResult._fields})
+                    {k: getattr(r, k)
+                        for k in (by if by is not None else DataResult._by)}
+                    | {'data_'+k: getattr(r, k)
+                        for k in DataResult._fields})

    # find previous results?
    if args.get('diff'):
@@ -512,7 +640,8 @@ if __name__ == "__main__":
    import argparse
    import sys
    parser = argparse.ArgumentParser(
-        description="Find data size at the function level.")
+        description="Find data size at the function level.",
+        allow_abbrev=False)
    parser.add_argument(
        'obj_paths',
        nargs='*',
@@ -579,23 +708,30 @@ if __name__ == "__main__":
        action='store_true',
        help="Only show the total.")
    parser.add_argument(
-        '-A', '--everything',
+        '-F', '--source',
+        dest='sources',
+        action='append',
+        help="Only consider definitions in this file. Defaults to anything "
+            "in the current directory.")
+    parser.add_argument(
+        '--everything',
        action='store_true',
        help="Include builtin and libc specific symbols.")
    parser.add_argument(
-        '--type',
-        default=TYPE,
+        '--nm-types',
+        default=NM_TYPES,
        help="Type of symbols to report, this uses the same single-character "
-            "type-names emitted by nm. Defaults to %r." % TYPE)
+            "type-names emitted by nm. Defaults to %r." % NM_TYPES)
    parser.add_argument(
        '--nm-tool',
        type=lambda x: x.split(),
        default=NM_TOOL,
        help="Path to the nm tool to use. Defaults to %r." % NM_TOOL)
    parser.add_argument(
-        '--build-dir',
-        help="Specify the relative build directory. Used to map object files "
-            "to the correct source files.")
+        '--objdump-tool',
+        type=lambda x: x.split(),
+        default=OBJDUMP_TOOL,
+        help="Path to the objdump tool to use. Defaults to %r." % OBJDUMP_TOOL)
    sys.exit(main(**{k: v
        for k, v in vars(parser.parse_intermixed_args()).items()
        if v is not None}))
--- a/scripts/perf.py
+++ b/scripts/perf.py
--- a/scripts/plot.py
+++ b/scripts/plot.py
@@ -727,7 +727,8 @@ if __name__ == "__main__":
    import sys
    import argparse
    parser = argparse.ArgumentParser(
-        description="Plot CSV files in terminal.")
+        description="Plot CSV files in terminal.",
+        allow_abbrev=False)
    parser.add_argument(
        'csv_paths',
        nargs='*',
--- a/scripts/prettyasserts.py
+++ b/scripts/prettyasserts.py
@@ -424,7 +424,8 @@ if __name__ == "__main__":
    import argparse
    import sys
    parser = argparse.ArgumentParser(
-        description="Preprocessor that makes asserts easier to debug.")
+        description="Preprocessor that makes asserts easier to debug.",
+        allow_abbrev=False)
    parser.add_argument(
        'input',
        help="Input C file.")
--- a/scripts/stack.py
+++ b/scripts/stack.py
@@ -4,7 +4,7 @@
 # report as infinite stack usage.
 #
 # Example:
-# ./scripts/stack.py lfs.ci lfs_util.ci -S
+# ./scripts/stack.py lfs.ci lfs_util.ci -Slimit
 #
 # Copyright (c) 2022, The littlefs authors.
 # SPDX-License-Identifier: BSD-3-Clause
@@ -131,6 +131,7 @@ def openio(path, mode='r'):
        return open(path, mode)

 def collect(paths, *,
+        sources=None,
        everything=False,
        **args):
    # parse the vcg format
@@ -181,8 +182,9 @@ def collect(paths, *,
                        if (not args.get('quiet')
                                and 'static' not in type
                                and 'bounded' not in type):
-                            print("warning: found non-static stack for %s (%s)"
-                                % (function, type, size))
+                            print("warning: "
+                                "found non-static stack for %s (%s, %s)" % (
+                                function, type, size))
                        _, _, _, targets = callgraph[info['title']]
                        callgraph[info['title']] = (
                            file, function, int(size), targets)
@@ -193,11 +195,48 @@ def collect(paths, *,
                else:
                    continue

-    if not everything:
-        for source, (s_file, s_function, _, _) in list(callgraph.items()):
+    callgraph_ = co.defaultdict(lambda: (None, None, 0, set()))
+    for source, (s_file, s_function, frame, targets) in callgraph.items():
        # discard internal functions
-            if s_file.startswith('<') or s_file.startswith('/usr/include'):
-                del callgraph[source]
+        if not everything and s_function.startswith('__'):
+            continue
+        # ignore filtered sources
+        if sources is not None:
+            if not any(
+                    os.path.abspath(s_file) == os.path.abspath(s)
+                    for s in sources):
+                continue
+        else:
+            # default to only cwd
+            if not everything and not os.path.commonpath([
+                    os.getcwd(),
+                    os.path.abspath(s_file)]) == os.getcwd():
+                continue
+
+        # smiplify path
+        if os.path.commonpath([
+                os.getcwd(),
+                os.path.abspath(s_file)]) == os.getcwd():
+            s_file = os.path.relpath(s_file)
+        else:
+            s_file = os.path.abspath(s_file)
+
+        callgraph_[source] = (s_file, s_function, frame, targets)
+    callgraph = callgraph_
+
+    if not everything:
+        callgraph_ = co.defaultdict(lambda: (None, None, 0, set()))
+        for source, (s_file, s_function, frame, targets) in callgraph.items():
+            # discard filtered sources
+            if sources is not None and not any(
+                    os.path.abspath(s_file) == os.path.abspath(s)
+                    for s in sources):
+                continue
+            # discard internal functions
+            if s_function.startswith('__'):
+                continue
+            callgraph_[source] = (s_file, s_function, frame, targets)
+        callgraph = callgraph_

    # find maximum stack size recursively, this requires also detecting cycles
    # (in case of recursion)
@@ -278,7 +317,7 @@ def table(Result, results, diff_results=None, *,
        all=False,
        percent=False,
        tree=False,
-        depth=None,
+        depth=1,
        **_):
    all_, all = all, __builtins__.all

@@ -467,15 +506,8 @@ def table(Result, results, diff_results=None, *,
    # adjust the name width based on the expected call depth, though
    # note this doesn't really work with unbounded recursion
    if not summary:
-        # it doesn't really make sense to not have a depth with tree,
-        # so assume depth=inf if tree by default
-        if depth is None:
-            depth = m.inf if tree else 0
-        elif depth == 0:
-            depth = m.inf
-
        if not m.isinf(depth):
-            widths[0] += 4*depth
+            widths[0] += 4*(depth-1)

    # print our table with optional call info
    #
@@ -528,7 +560,7 @@ def table(Result, results, diff_results=None, *,
                         prefixes[2+is_last] + "'-> ",
                         prefixes[2+is_last] + "|   ",
                         prefixes[2+is_last] + "    "))
-        recurse(names, depth)
+        recurse(names, depth-1)

    if not tree:
        print('%-*s  %s%s' % (
@@ -544,6 +576,13 @@ def main(ci_paths,
        defines=None,
        sort=None,
        **args):
+    # it doesn't really make sense to not have a depth with tree,
+    # so assume depth=inf if tree by default
+    if args.get('depth') is None:
+        args['depth'] = m.inf if args['tree'] else 1
+    elif args.get('depth') == 0:
+        args['depth'] = m.inf
+
    # find sizes
    if not args.get('use', None):
        # find .ci files
@@ -588,13 +627,16 @@ def main(ci_paths,
    # write results to CSV
    if args.get('output'):
        with openio(args['output'], 'w') as f:
-            writer = csv.DictWriter(f, StackResult._by
+            writer = csv.DictWriter(f,
+                (by if by is not None else StackResult._by)
                + ['stack_'+k for k in StackResult._fields])
            writer.writeheader()
            for r in results:
                writer.writerow(
-                    {k: getattr(r, k) for k in StackResult._by}
-                    | {'stack_'+k: getattr(r, k) for k in StackResult._fields})
+                    {k: getattr(r, k)
+                        for k in (by if by is not None else StackResult._by)}
+                    | {'stack_'+k: getattr(r, k)
+                        for k in StackResult._fields})

    # find previous results?
    if args.get('diff'):
@@ -636,7 +678,8 @@ if __name__ == "__main__":
    import argparse
    import sys
    parser = argparse.ArgumentParser(
-        description="Find stack usage at the function level.")
+        description="Find stack usage at the function level.",
+        allow_abbrev=False)
    parser.add_argument(
        'ci_paths',
        nargs='*',
@@ -703,7 +746,13 @@ if __name__ == "__main__":
        action='store_true',
        help="Only show the total.")
    parser.add_argument(
-        '-A', '--everything',
+        '-F', '--source',
+        dest='sources',
+        action='append',
+        help="Only consider definitions in this file. Defaults to anything "
+            "in the current directory.")
+    parser.add_argument(
+        '--everything',
        action='store_true',
        help="Include builtin and libc specific symbols.")
    parser.add_argument(
@@ -711,20 +760,16 @@ if __name__ == "__main__":
        action='store_true',
        help="Only show the function call tree.")
    parser.add_argument(
-        '-L', '--depth',
+        '-Z', '--depth',
        nargs='?',
        type=lambda x: int(x, 0),
        const=0,
-        help="Depth of function calls to show. 0 show all calls but may not "
+        help="Depth of function calls to show. 0 shows all calls but may not "
            "terminate!")
    parser.add_argument(
        '-e', '--error-on-recursion',
        action='store_true',
        help="Error if any functions are recursive.")
-    parser.add_argument(
-        '--build-dir',
-        help="Specify the relative build directory. Used to map object files "
-            "to the correct source files.")
    sys.exit(main(**{k: v
        for k, v in vars(parser.parse_intermixed_args()).items()
        if v is not None}))
--- a/scripts/struct_.py
+++ b/scripts/struct_.py
@@ -3,7 +3,7 @@
 # Script to find struct sizes.
 #
 # Example:
-# ./scripts/struct_.py lfs.o lfs_util.o -S
+# ./scripts/struct_.py lfs.o lfs_util.o -Ssize
 #
 # Copyright (c) 2022, The littlefs authors.
 # SPDX-License-Identifier: BSD-3-Clause
@@ -11,6 +11,7 @@

 import collections as co
 import csv
+import difflib
 import glob
 import itertools as it
 import math as m
@@ -128,26 +129,28 @@ def openio(path, mode='r'):
    else:
        return open(path, mode)

-def collect(paths, *,
+def collect(obj_paths, *,
        objdump_tool=OBJDUMP_TOOL,
-        build_dir=None,
+        sources=None,
        everything=False,
+        internal=False,
        **args):
-    decl_pattern = re.compile(
-        '^\s+(?P<no>[0-9]+)'
-            '\s+(?P<dir>[0-9]+)'
-            '\s+.*'
-            '\s+(?P<file>[^\s]+)$')
-    struct_pattern = re.compile(
-        '^(?:.*DW_TAG_(?P<tag>[a-z_]+).*'
+    line_pattern = re.compile(
+        '^\s+(?P<no>[0-9]+)\s+'
+            '(?:(?P<dir>[0-9]+)\s+)?'
+            '.*\s+'
+            '(?P<path>[^\s]+)$')
+    info_pattern = re.compile(
+        '^(?:.*(?P<tag>DW_TAG_[a-z_]+).*'
            '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
-            '|^.*DW_AT_decl_file.*:\s*(?P<decl>[0-9]+)\s*'
+            '|^.*DW_AT_decl_file.*:\s*(?P<file>[0-9]+)\s*'
            '|^.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')

    results = []
-    for path in paths:
-        # find decl, we want to filter by structs in .h files
-        decls = {}
+    for path in obj_paths:
+        # find files, we want to filter by structs in .h files
+        dirs = {}
+        files = {}
        # note objdump-tool may contain extra args
        cmd = objdump_tool + ['--dwarf=rawline', path]
        if args.get('verbose'):
@@ -156,12 +159,26 @@ def collect(paths, *,
            stdout=sp.PIPE,
            stderr=sp.PIPE if not args.get('verbose') else None,
            universal_newlines=True,
-            errors='replace')
+            errors='replace',
+            close_fds=False)
        for line in proc.stdout:
-            # find file numbers
-            m = decl_pattern.match(line)
+            # note that files contain references to dirs, which we
+            # dereference as soon as we see them as each file table follows a
+            # dir table
+            m = line_pattern.match(line)
            if m:
-                decls[int(m.group('no'))] = m.group('file')
+                if not m.group('dir'):
+                    # found a directory entry
+                    dirs[int(m.group('no'))] = m.group('path')
+                else:
+                    # found a file entry
+                    dir = int(m.group('dir'))
+                    if dir in dirs:
+                        files[int(m.group('no'))] = os.path.join(
+                            dirs[dir],
+                            m.group('path'))
+                    else:
+                        files[int(m.group('no'))] = m.group('path')
        proc.wait()
        if proc.returncode != 0:
            if not args.get('verbose'):
@@ -170,11 +187,11 @@ def collect(paths, *,
            sys.exit(-1)

        # collect structs as we parse dwarf info
-        found = False
-        name = None
-        decl = None
-        size = None
-
+        results_ = []
+        is_struct = False
+        s_name = None
+        s_file = None
+        s_size = None
        # note objdump-tool may contain extra args
        cmd = objdump_tool + ['--dwarf=info', path]
        if args.get('verbose'):
@@ -183,38 +200,23 @@ def collect(paths, *,
            stdout=sp.PIPE,
            stderr=sp.PIPE if not args.get('verbose') else None,
            universal_newlines=True,
-            errors='replace')
+            errors='replace',
+            close_fds=False)
        for line in proc.stdout:
            # state machine here to find structs
-            m = struct_pattern.match(line)
+            m = info_pattern.match(line)
            if m:
                if m.group('tag'):
-                    if (name is not None
-                            and decl is not None
-                            and size is not None):
-                        file = decls.get(decl, '?')
-                        # map to source file
-                        file = re.sub('\.o$', '.c', file)
-                        if build_dir:
-                            file = re.sub(
-                                '%s/*' % re.escape(build_dir), '',
-                                file)
-                        # only include structs declared in header files in the
-                        # current directory, ignore internal-only structs (
-                        # these are represented in other measurements)
-                        if everything or file.endswith('.h'):
-                            results.append(StructResult(file, name, size))
-
-                    found = (m.group('tag') == 'structure_type')
-                    name = None
-                    decl = None
-                    size = None
-                elif found and m.group('name'):
-                    name = m.group('name')
-                elif found and name and m.group('decl'):
-                    decl = int(m.group('decl'))
-                elif found and name and m.group('size'):
-                    size = int(m.group('size'))
+                    if is_struct:
+                        file = files.get(s_file, '?')
+                        results_.append(StructResult(file, s_name, s_size))
+                    is_struct = (m.group('tag') == 'DW_TAG_structure_type')
+                elif m.group('name'):
+                    s_name = m.group('name')
+                elif m.group('file'):
+                    s_file = int(m.group('file'))
+                elif m.group('size'):
+                    s_size = int(m.group('size'))
        proc.wait()
        if proc.returncode != 0:
            if not args.get('verbose'):
@@ -222,6 +224,34 @@ def collect(paths, *,
                    sys.stdout.write(line)
            sys.exit(-1)

+        for r in results_:
+            # ignore filtered sources
+            if sources is not None:
+                if not any(
+                        os.path.abspath(r.file) == os.path.abspath(s)
+                        for s in sources):
+                    continue
+            else:
+                # default to only cwd
+                if not everything and not os.path.commonpath([
+                        os.getcwd(),
+                        os.path.abspath(r.file)]) == os.getcwd():
+                    continue
+
+                # limit to .h files unless --internal
+                if not internal and not r.file.endswith('.h'):
+                    continue
+
+            # simplify path
+            if os.path.commonpath([
+                    os.getcwd(),
+                    os.path.abspath(r.file)]) == os.getcwd():
+                file = os.path.relpath(r.file)
+            else:
+                file = os.path.abspath(r.file)
+
+            results.append(StructResult(r.file, r.struct, r.size))
+
    return results


@@ -479,7 +509,7 @@ def main(obj_paths, *,
                paths.append(path)

        if not paths:
-            print("error: no .obj files found in %r?" % obj_paths)
+            print("error: no .o files found in %r?" % obj_paths)
            sys.exit(-1)

        results = collect(paths, **args)
@@ -513,12 +543,14 @@ def main(obj_paths, *,
    # write results to CSV
    if args.get('output'):
        with openio(args['output'], 'w') as f:
-            writer = csv.DictWriter(f, StructResult._by
+            writer = csv.DictWriter(f,
+                (by if by is not None else StructResult._by)
                + ['struct_'+k for k in StructResult._fields])
            writer.writeheader()
            for r in results:
                writer.writerow(
-                    {k: getattr(r, k) for k in StructResult._by}
+                    {k: getattr(r, k)
+                        for k in (by if by is not None else StructResult._by)}
                    | {'struct_'+k: getattr(r, k)
                        for k in StructResult._fields})

@@ -559,7 +591,8 @@ if __name__ == "__main__":
    import argparse
    import sys
    parser = argparse.ArgumentParser(
-        description="Find struct sizes.")
+        description="Find struct sizes.",
+        allow_abbrev=False)
    parser.add_argument(
        'obj_paths',
        nargs='*',
@@ -626,18 +659,24 @@ if __name__ == "__main__":
        action='store_true',
        help="Only show the total.")
    parser.add_argument(
-        '-A', '--everything',
+        '-F', '--source',
+        dest='sources',
+        action='append',
+        help="Only consider definitions in this file. Defaults to anything "
+            "in the current directory.")
+    parser.add_argument(
+        '--everything',
        action='store_true',
        help="Include builtin and libc specific symbols.")
+    parser.add_argument(
+        '--internal',
+        action='store_true',
+        help="Also show structs in .c files.")
    parser.add_argument(
        '--objdump-tool',
        type=lambda x: x.split(),
        default=OBJDUMP_TOOL,
        help="Path to the objdump tool to use. Defaults to %r." % OBJDUMP_TOOL)
-    parser.add_argument(
-        '--build-dir',
-        help="Specify the relative build directory. Used to map object files "
-            "to the correct source files.")
    sys.exit(main(**{k: v
        for k, v in vars(parser.parse_intermixed_args()).items()
        if v is not None}))
--- a/scripts/summary.py
+++ b/scripts/summary.py
@@ -750,7 +750,8 @@ if __name__ == "__main__":
    import argparse
    import sys
    parser = argparse.ArgumentParser(
-        description="Summarize measurements in CSV files.")
+        description="Summarize measurements in CSV files.",
+        allow_abbrev=False)
    parser.add_argument(
        'csv_paths',
        nargs='*',
--- a/scripts/tailpipe.py
+++ b/scripts/tailpipe.py
@@ -115,7 +115,8 @@ if __name__ == "__main__":
    import sys
    import argparse
    parser = argparse.ArgumentParser(
-        description="Efficiently displays the last n lines of a file/pipe.")
+        description="Efficiently displays the last n lines of a file/pipe.",
+        allow_abbrev=False)
    parser.add_argument(
        'path',
        nargs='?',
--- a/scripts/test.py
+++ b/scripts/test.py
@@ -27,9 +27,13 @@ import time
 import toml


-RUNNER_PATH = 'runners/test_runner'
+RUNNER_PATH = './runners/test_runner'
 HEADER_PATH = 'runners/test_runner.h'

+GDB_TOOL = ['gdb']
+VALGRIND_TOOL = ['valgrind']
+PERF_SCRIPT = ['./scripts/perf.py']
+

 def openio(path, mode='r', buffering=-1, nb=False):
    if path == '-':
@@ -516,12 +520,25 @@ def find_runner(runner, **args):

    # run under valgrind?
    if args.get('valgrind'):
-        cmd[:0] = filter(None, [
-            'valgrind',
+        cmd[:0] = args['valgrind_tool'] + [
            '--leak-check=full',
            '--track-origins=yes',
            '--error-exitcode=4',
-            '-q'])
+            '-q']
+
+    # run under perf?
+    if args.get('perf'):
+        cmd[:0] = args['perf_script'] + list(filter(None, [
+            '-R',
+            '--perf-freq=%s' % args['perf_freq']
+                if args.get('perf_freq') else None,
+            '--perf-period=%s' % args['perf_period']
+                if args.get('perf_period') else None,
+            '--perf-events=%s' % args['perf_events']
+                if args.get('perf_events') else None,
+            '--perf-tool=%s' % args['perf_tool']
+                if args.get('perf_tool') else None,
+            '-o%s' % args['perf']]))

    # other context
    if args.get('geometry'):
@@ -799,9 +816,9 @@ def run_stage(name, runner_, ids, output_, **args):
                try:
                    line = mpty.readline()
                except OSError as e:
-                    if e.errno == errno.EIO:
-                        break
+                    if e.errno != errno.EIO:
                        raise
+                    break
                if not line:
                    break
                last_stdout.append(line)
@@ -1126,24 +1143,24 @@ def run(runner, test_ids=[], **args):
        cmd = runner_ + [failure.id]

        if args.get('gdb_main'):
-            cmd[:0] = ['gdb',
+            cmd[:0] = args['gdb_tool'] + [
                '-ex', 'break main',
                '-ex', 'run',
                '--args']
        elif args.get('gdb_case'):
            path, lineno = find_path(runner_, failure.id, **args)
-            cmd[:0] = ['gdb',
+            cmd[:0] = args['gdb_tool'] + [
                '-ex', 'break %s:%d' % (path, lineno),
                '-ex', 'run',
                '--args']
        elif failure.assert_ is not None:
-            cmd[:0] = ['gdb',
+            cmd[:0] = args['gdb_tool'] + [
                '-ex', 'run',
                '-ex', 'frame function raise',
                '-ex', 'up 2',
                '--args']
        else:
-            cmd[:0] = ['gdb',
+            cmd[:0] = args['gdb_tool'] + [
                '-ex', 'run',
                '--args']

@@ -1188,6 +1205,7 @@ if __name__ == "__main__":
    argparse._ArgumentGroup._handle_conflict_ignore = lambda *_: None
    parser = argparse.ArgumentParser(
        description="Build and run tests.",
+        allow_abbrev=False,
        conflict_handler='ignore')
    parser.add_argument(
        '-v', '--verbose',
@@ -1323,6 +1341,11 @@ if __name__ == "__main__":
        action='store_true',
        help="Drop into gdb on test failure but stop at the beginning "
            "of main.")
+    test_parser.add_argument(
+        '--gdb-tool',
+        type=lambda x: x.split(),
+        default=GDB_TOOL,
+        help="Path to gdb tool to use. Defaults to %r." % GDB_TOOL)
    test_parser.add_argument(
        '--exec',
        type=lambda e: e.split(),
@@ -1332,6 +1355,37 @@ if __name__ == "__main__":
        action='store_true',
        help="Run under Valgrind to find memory errors. Implicitly sets "
            "--isolate.")
+    test_parser.add_argument(
+        '--valgrind-tool',
+        type=lambda x: x.split(),
+        default=VALGRIND_TOOL,
+        help="Path to Valgrind tool to use. Defaults to %r." % VALGRIND_TOOL)
+    test_parser.add_argument(
+        '--perf',
+        help="Run under Linux's perf to sample performance counters, writing "
+            "samples to this file.")
+    test_parser.add_argument(
+        '--perf-freq',
+        help="perf sampling frequency. This is passed directly to the perf "
+            "script.")
+    test_parser.add_argument(
+        '--perf-period',
+        help="perf sampling period. This is passed directly to the perf "
+            "script.")
+    test_parser.add_argument(
+        '--perf-events',
+        help="perf events to record. This is passed directly to the perf "
+            "script.")
+    test_parser.add_argument(
+        '--perf-script',
+        type=lambda x: x.split(),
+        default=PERF_SCRIPT,
+        help="Path to the perf script to use. Defaults to %r." % PERF_SCRIPT)
+    test_parser.add_argument(
+        '--perf-tool',
+        type=lambda x: x.split(),
+        help="Path to the perf tool to use. This is passed directly to the "
+            "perf script")

    # compilation flags
    comp_parser = parser.add_argument_group('compilation options')
@@ -1356,7 +1410,7 @@ if __name__ == "__main__":
        '-o', '--output',
        help="Output file.")

-    # runner + test_ids overlaps test_paths, so we need to do some munging here
+    # runner/test_paths overlap, so need to do some munging here
    args = parser.parse_intermixed_args()
    args.test_paths = [' '.join(args.runner or [])] + args.test_ids
    args.runner = args.runner or [RUNNER_PATH]
--- a/scripts/tracebd.py
+++ b/scripts/tracebd.py
@@ -830,7 +830,8 @@ if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(
        description="Display operations on block devices based on "
-            "trace output.")
+            "trace output.",
+        allow_abbrev=False)
    parser.add_argument(
        'path',
        nargs='?',