Added perfbd.py and block device performance sampling in bench-runner

Based loosely on Linux's perf tool, perfbd.py uses trace output with
backtraces to aggregate and show the block device usage of all functions
in a program, propagating block devices operation cost up the backtrace
for each operation.

This combined with --trace-period and --trace-freq for
sampling/filtering trace events allow the bench-runner to very
efficiently record the general cost of block device operations with very
little overhead.

Adopted this as the default side-effect of make bench, replacing
cycle-based performance measurements which are less important for
littlefs.
This commit is contained in:
Christopher Haster
2022-10-13 11:09:26 -05:00
parent 29cbafeb67
commit 3a33c3795b
20 changed files with 2026 additions and 610 deletions

View File

@@ -41,30 +41,32 @@ TEST_SRC ?= $(SRC) \
$(filter-out $(wildcard bd/*.*.c),$(wildcard bd/*.c)) \
runners/test_runner.c
TEST_RUNNER ?= $(BUILDDIR)runners/test_runner
TEST_TC := $(TESTS:%.toml=$(BUILDDIR)%.t.c) \
TEST_TC := $(TESTS:%.toml=$(BUILDDIR)%.t.c) \
$(TEST_SRC:%.c=$(BUILDDIR)%.t.c)
TEST_TAC := $(TEST_TC:%.t.c=%.t.a.c)
TEST_OBJ := $(TEST_TAC:%.t.a.c=%.t.a.o)
TEST_DEP := $(TEST_TAC:%.t.a.c=%.t.a.d)
TEST_CI := $(TEST_TAC:%.t.a.c=%.t.a.ci)
TEST_GCNO := $(TEST_TAC:%.t.a.c=%.t.a.gcno)
TEST_GCDA := $(TEST_TAC:%.t.a.c=%.t.a.gcda)
TEST_PERF := $(TEST_RUNNER:%=%.perf)
TEST_TAC := $(TEST_TC:%.t.c=%.t.a.c)
TEST_OBJ := $(TEST_TAC:%.t.a.c=%.t.a.o)
TEST_DEP := $(TEST_TAC:%.t.a.c=%.t.a.d)
TEST_CI := $(TEST_TAC:%.t.a.c=%.t.a.ci)
TEST_GCNO := $(TEST_TAC:%.t.a.c=%.t.a.gcno)
TEST_GCDA := $(TEST_TAC:%.t.a.c=%.t.a.gcda)
TEST_PERF := $(TEST_RUNNER:%=%.perf)
TEST_TRACE := $(TEST_RUNNER:%=%.trace)
BENCHES ?= $(wildcard benches/*.toml)
BENCH_SRC ?= $(SRC) \
$(filter-out $(wildcard bd/*.*.c),$(wildcard bd/*.c)) \
runners/bench_runner.c
BENCH_RUNNER ?= $(BUILDDIR)runners/bench_runner
BENCH_BC := $(BENCHES:%.toml=$(BUILDDIR)%.b.c) \
BENCH_BC := $(BENCHES:%.toml=$(BUILDDIR)%.b.c) \
$(BENCH_SRC:%.c=$(BUILDDIR)%.b.c)
BENCH_BAC := $(BENCH_BC:%.b.c=%.b.a.c)
BENCH_OBJ := $(BENCH_BAC:%.b.a.c=%.b.a.o)
BENCH_DEP := $(BENCH_BAC:%.b.a.c=%.b.a.d)
BENCH_CI := $(BENCH_BAC:%.b.a.c=%.b.a.ci)
BENCH_GCNO := $(BENCH_BAC:%.b.a.c=%.b.a.gcno)
BENCH_GCDA := $(BENCH_BAC:%.b.a.c=%.b.a.gcda)
BENCH_PERF := $(BENCH_RUNNER:%=%.perf)
BENCH_BAC := $(BENCH_BC:%.b.c=%.b.a.c)
BENCH_OBJ := $(BENCH_BAC:%.b.a.c=%.b.a.o)
BENCH_DEP := $(BENCH_BAC:%.b.a.c=%.b.a.d)
BENCH_CI := $(BENCH_BAC:%.b.a.c=%.b.a.ci)
BENCH_GCNO := $(BENCH_BAC:%.b.a.c=%.b.a.gcno)
BENCH_GCDA := $(BENCH_BAC:%.b.a.c=%.b.a.gcda)
BENCH_PERF := $(BENCH_RUNNER:%=%.perf)
BENCH_TRACE := $(BENCH_RUNNER:%=%.trace)
ifdef DEBUG
override CFLAGS += -O0
@@ -85,6 +87,9 @@ endif
ifdef YES_PERF
override CFLAGS += -fno-omit-frame-pointer
endif
ifdef YES_PERFBD
override CFLAGS += -fno-omit-frame-pointer
endif
ifdef VERBOSE
override CODEFLAGS += -v
@@ -93,7 +98,11 @@ override STACKFLAGS += -v
override STRUCTFLAGS += -v
override COVFLAGS += -v
override PERFFLAGS += -v
override PERFBDFLAGS += -v
endif
# forward -j flag
override PERFFLAGS += $(filter -j%,$(MAKEFLAGS))
override PERFBDFLAGS += $(filter -j%,$(MAKEFLAGS))
ifneq ($(NM),nm)
override CODEFLAGS += --nm-tool="$(NM)"
override DATAFLAGS += --nm-tool="$(NM)"
@@ -103,6 +112,7 @@ override CODEFLAGS += --objdump-tool="$(OBJDUMP)"
override DATAFLAGS += --objdump-tool="$(OBJDUMP)"
override STRUCTFLAGS += --objdump-tool="$(OBJDUMP)"
override PERFFLAGS += --objdump-tool="$(OBJDUMP)"
override PERFBDFLAGS += --objdump-tool="$(OBJDUMP)"
endif
ifneq ($(PERF),perf)
override PERFFLAGS += --perf-tool="$(PERF)"
@@ -114,10 +124,14 @@ override BENCHFLAGS += -b
override TESTFLAGS += $(filter -j%,$(MAKEFLAGS))
override BENCHFLAGS += $(filter -j%,$(MAKEFLAGS))
ifdef YES_PERF
override TESTFLAGS += --perf=$(TEST_PERF)
override TESTFLAGS += -p$(TEST_PERF)
override BENCHFLAGS += -p$(BENCH_PERF)
endif
ifndef NO_PERF
override BENCHFLAGS += --perf=$(BENCH_PERF)
ifdef YES_PERFBD
override TESTFLAGS += -t$(TEST_TRACE) --trace-backtrace --trace-freq=100
endif
ifndef NO_PERFBD
override BENCHFLAGS += -t$(BENCH_TRACE) --trace-backtrace --trace-freq=100
endif
ifdef VERBOSE
override TESTFLAGS += -v
@@ -165,6 +179,11 @@ endif
ifdef YES_PERF
bench-runner build-bench: override CFLAGS+=-fno-omit-frame-pointer
endif
ifdef YES_PERFBD
bench-runner build-bench: override CFLAGS+=-fno-omit-frame-pointer
endif
# note we remove some binary dependent files during compilation,
# otherwise it's way to easy to end up with outdated results
test-runner build-test: $(TEST_RUNNER)
ifndef NO_COV
rm -f $(TEST_GCDA)
@@ -172,6 +191,9 @@ endif
ifdef YES_PERF
rm -f $(TEST_PERF)
endif
ifdef YES_PERFBD
rm -f $(TEST_TRACE)
endif
.PHONY: test
test: test-runner
@@ -185,16 +207,24 @@ test-list: test-runner
ifdef YES_COV
bench-runner build-bench: override CFLAGS+=--coverage
endif
ifndef NO_PERF
ifdef YES_PERF
bench-runner build-bench: override CFLAGS+=-fno-omit-frame-pointer
endif
ifndef NO_PERFBD
bench-runner build-bench: override CFLAGS+=-fno-omit-frame-pointer
endif
# note we remove some binary dependent files during compilation,
# otherwise it's way to easy to end up with outdated results
bench-runner build-bench: $(BENCH_RUNNER)
ifdef YES_COV
rm -f $(BENCH_GCDA)
endif
ifndef NO_PERF
ifdef YES_PERF
rm -f $(BENCH_PERF)
endif
ifndef NO_PERFBD
rm -f $(BENCH_TRACE)
endif
.PHONY: bench
bench: bench-runner
@@ -234,6 +264,13 @@ perf: $(BENCH_PERF)
-Scycles \
$(PERFFLAGS))
.PHONY: perfbd
perfbd: $(BENCH_TRACE)
$(strip ./scripts/perfbd.py \
$(BENCH_RUNNER) $^ $(patsubst %,-F%,$(SRC)) \
-Serased -Sproged -Sreaded \
$(PERFBDFLAGS))
.PHONY: summary sizes
summary sizes: $(BUILDDIR)lfs.csv
$(strip ./scripts/summary.py -Y $^ \
@@ -275,6 +312,11 @@ $(BUILDDIR)lfs.cov.csv: $(GCDA)
$(BUILDDIR)lfs.perf.csv: $(BENCH_PERF)
./scripts/perf.py $^ $(patsubst %,-F%,$(SRC)) -q $(PERFFLAGS) -o $@
$(BUILDDIR)lfs.perfbd.csv: $(BENCH_TRACE)
$(strip ./scripts/perfbd.py \
$(BENCH_RUNNER) $^ $(patsubst %,-F%,$(SRC)) \
-q $(PERFBDFLAGS) -o $@)
$(BUILDDIR)lfs.csv: \
$(BUILDDIR)lfs.code.csv \
$(BUILDDIR)lfs.data.csv \
@@ -326,7 +368,8 @@ clean:
$(BUILDDIR)lfs.stack.csv \
$(BUILDDIR)lfs.struct.csv \
$(BUILDDIR)lfs.cov.csv \
$(BUILDDIR)lfs.perf.csv)
$(BUILDDIR)lfs.perf.csv \
$(BUILDDIR)lfs.perfbd.csv)
rm -f $(OBJ)
rm -f $(DEP)
rm -f $(ASM)
@@ -340,6 +383,7 @@ clean:
rm -f $(TEST_GCNO)
rm -f $(TEST_GCDA)
rm -f $(TEST_PERF)
rm -f $(TEST_TRACE)
rm -f $(BENCH_RUNNER)
rm -f $(BENCH_BC)
rm -f $(BENCH_BAC)
@@ -349,3 +393,4 @@ clean:
rm -f $(BENCH_GCNO)
rm -f $(BENCH_GCDA)
rm -f $(BENCH_PERF)
rm -f $(BENCH_TRACE)