From 27fc481ec2b5724925cc9dc32b9abc5625267a82 Mon Sep 17 00:00:00 2001 From: Christopher Haster Date: Fri, 17 Mar 2023 01:58:39 -0500 Subject: [PATCH] Generalized btree benchmarks, amortized benchmarks, plot.py/plotmpl.py tweaks These benchmarks are now more useful for seeing how these B-trees perform. In plot.py/plotmpl.py: - Added --legend as another alias for -l, --legend-right. - Allowed omitting of datasets from the legend by using empty strings in --labels. - Do not sum multiple data points on the same x coordinate. This was a bad idea that risks invalid results going unnoticed. As a plus multiple data points on the same x coordinate can be abused for a cheap representation of measurement error. --- benches/bench_btree.toml | 42 ++++++++++++++++++------ scripts/plot.py | 70 +++++++++++++++++++--------------------- scripts/plotmpl.py | 49 ++++++++++++++-------------- 3 files changed, 92 insertions(+), 69 deletions(-) diff --git a/benches/bench_btree.toml b/benches/bench_btree.toml index 5c3fb280..1eb2bb25 100644 --- a/benches/bench_btree.toml +++ b/benches/bench_btree.toml @@ -5,6 +5,11 @@ defines.LOOKAHEAD_SIZE = 'BLOCK_COUNT / 8' [cases.bench_btree_lookup] defines.N = [8, 16, 32, 64, 128, 256, 1024] +# 0 = in-order +# 1 = reversed-order +# 2 = random-order +defines.ORDER = [0, 1, 2] +defines.SEED = 42 in = 'lfs.c' code = ''' lfs_t lfs; @@ -17,17 +22,22 @@ code = ''' lfs.free.i = 0; lfs_alloc_ack(&lfs); + uint32_t prng = SEED; + // create a tree with N elements lfsr_btree_t btree = LFSR_BTREE_NULL; const char *alphas = "abcdefghijklmnopqrstuvwxyz"; for (lfs_size_t i = 0; i < N; i++) { - lfsr_btree_push(&lfs, &btree, i, LFSR_TAG_INLINED, 1, + lfs_off_t i_ + = (ORDER == 0) ? i + : (ORDER == 1) ? 0 + : BENCH_PRNG(&prng) % (btree.weight+1); + lfsr_btree_push(&lfs, &btree, i_, LFSR_TAG_INLINED, 1, &alphas[i % 26], 1) => 0; } // bench lookup BENCH_START(); - uint32_t prng = 42; lfs_size_t i = BENCH_PRNG(&prng) % N; uint8_t buffer[4]; lfsr_tag_t tag_; @@ -40,12 +50,17 @@ code = ''' assert(tag_ == LFSR_TAG_INLINED); assert(id_ == i); assert(weight_ == 1); - assert(memcmp(buffer, &alphas[i % 26], 1) == 0); BENCH_STOP(); ''' -[cases.bench_btree_append] +[cases.bench_btree_commit] defines.N = [8, 16, 32, 64, 128, 256, 1024] +# 0 = in-order +# 1 = reversed-order +# 2 = random-order +defines.ORDER = [0, 1, 2] +defines.SEED = 42 +defines.AMORTIZED = false in = 'lfs.c' code = ''' lfs_t lfs; @@ -58,19 +73,28 @@ code = ''' lfs.free.i = 0; lfs_alloc_ack(&lfs); - uint32_t prng = 42; + uint32_t prng = SEED; // create a tree with N elements + if (AMORTIZED) { + BENCH_START(); + } lfsr_btree_t btree = LFSR_BTREE_NULL; const char *alphas = "abcdefghijklmnopqrstuvwxyz"; for (lfs_size_t i = 0; i < N; i++) { - lfsr_btree_push(&lfs, &btree, i, LFSR_TAG_INLINED, 1, + lfs_off_t i_ + = (ORDER == 0) ? i + : (ORDER == 1) ? 0 + : BENCH_PRNG(&prng) % (btree.weight+1); + lfsr_btree_push(&lfs, &btree, i_, LFSR_TAG_INLINED, 1, &alphas[i % 26], 1) => 0; } // bench appending a new id - BENCH_START(); - lfs_size_t i = N; + if (!AMORTIZED) { + BENCH_START(); + } + lfs_size_t i = BENCH_PRNG(&prng) % N; lfsr_btree_push(&lfs, &btree, i, LFSR_TAG_INLINED, 1, &alphas[i % 26], 1) => 0; BENCH_STOP(); @@ -86,5 +110,5 @@ code = ''' assert(tag_ == LFSR_TAG_INLINED); assert(id_ == i); assert(weight_ == 1); - assert(memcmp(buffer, &alphas[i % 26], 1) == 0); ''' + diff --git a/scripts/plot.py b/scripts/plot.py index 4a34f415..391f25e5 100755 --- a/scripts/plot.py +++ b/scripts/plot.py @@ -468,7 +468,7 @@ def collect(csv_paths, renames=[]): def dataset(results, x=None, y=None, define=[]): # organize by 'by', x, and y - dataset = {} + dataset = [] i = 0 for r in results: # filter results by matching defines @@ -498,10 +498,7 @@ def dataset(results, x=None, y=None, define=[]): else: y_ = None - if y_ is not None: - dataset[x_] = y_ + dataset.get(x_, 0) - else: - dataset[x_] = y_ or dataset.get(x_, None) + dataset.append((x_, y_)) return dataset @@ -880,7 +877,7 @@ def main(csv_paths, *, if labels is not None: labels_ = labels else: - labels_ = [''] + labels_ = [None] # allow escape codes in labels/titles title = escape(title).splitlines() if title is not None else [] @@ -1018,6 +1015,8 @@ def main(csv_paths, *, if legend_right or legend_above or legend_below: legend_ = [] for i, k in enumerate(datasets_.keys()): + if datalabels_[k] is not None and not datalabels_[k]: + continue label = '%s%s' % ( '%s ' % datachars_[k] if chars is not None @@ -1028,7 +1027,7 @@ def main(csv_paths, *, or ','.join(k_ for k_ in k if k_)) if label: - legend_.append(label) + legend_.append((label, colors_[i % len(colors_)])) legend_width = max(legend_width, len(label)+1) # figure out our canvas size @@ -1064,7 +1063,7 @@ def main(csv_paths, *, legend_cols = len(legend_) while True: legend_widths = [ - max(len(l) for l in legend_[i::legend_cols]) + max(len(l) for l, _ in legend_[i::legend_cols]) for i in range(legend_cols)] if (legend_cols <= 1 or sum(legend_widths)+2*(legend_cols-1) @@ -1077,7 +1076,7 @@ def main(csv_paths, *, legend_cols = len(legend_) while True: legend_widths = [ - max(len(l) for l in legend_[i::legend_cols]) + max(len(l) for l, _ in legend_[i::legend_cols]) for i in range(legend_cols)] if (legend_cols <= 1 or sum(legend_widths)+2*(legend_cols-1) @@ -1151,27 +1150,27 @@ def main(csv_paths, *, # find actual xlim/ylim xlim_ = ( xlim_[0] if xlim_[0] is not None - else min(it.chain([0], (k - for r in subdatasets.values() - for k, v in r.items() - if v is not None))), + else min(it.chain([0], (x + for dataset in subdatasets.values() + for x, y in dataset + if y is not None))), xlim_[1] if xlim_[1] is not None - else max(it.chain([0], (k - for r in subdatasets.values() - for k, v in r.items() - if v is not None)))) + else max(it.chain([0], (x + for dataset in subdatasets.values() + for x, y in dataset + if y is not None)))) ylim_ = ( ylim_[0] if ylim_[0] is not None - else min(it.chain([0], (v - for r in subdatasets.values() - for _, v in r.items() - if v is not None))), + else min(it.chain([0], (y + for dataset in subdatasets.values() + for _, y in dataset + if y is not None))), ylim_[1] if ylim_[1] is not None - else max(it.chain([0], (v - for r in subdatasets.values() - for _, v in r.items() - if v is not None)))) + else max(it.chain([0], (y + for dataset in subdatasets.values() + for _, y in dataset + if y is not None)))) # find actual width/height subwidth = sum(widths[s.x:s.x+s.xspan]) - sum(s.xmargin) @@ -1190,7 +1189,7 @@ def main(csv_paths, *, for name, dataset in subdatasets.items(): plot.plot( - sorted((x,y) for x,y in dataset.items()), + sorted((x,y) for x,y in dataset), color=datacolors_[name], char=datachars_[name], line_char=dataline_chars_[name]) @@ -1230,11 +1229,9 @@ def main(csv_paths, *, // 2, 0), '', ' '.join('%s%s%s' % ( - '\x1b[%sm' % colors_[(i+j) % len(colors_)] - if color else '', - '%-*s' % (legend_widths[j], legend_[i+j]), - '\x1b[m' - if color else '') + '\x1b[%sm' % legend_[i+j][1] if color else '', + '%-*s' % (legend_widths[j], legend_[i+j][0]), + '\x1b[m' if color else '') for j in range(min(legend_cols, len(legend_)-i))))) for row in range(height_): @@ -1344,8 +1341,8 @@ def main(csv_paths, *, and row-ymargin[-1] < len(legend_)): j = row-ymargin[-1] f.write(' %s%s%s' % ( - '\x1b[%sm' % colors_[j % len(colors_)] if color else '', - legend_[j], + '\x1b[%sm' % legend_[j][1] if color else '', + legend_[j][0], '\x1b[m' if color else '')) f.writeln() @@ -1366,9 +1363,9 @@ def main(csv_paths, *, // 2, 0), '', ' '.join('%s%s%s' % ( - '\x1b[%sm' % colors_[(i+j) % len(colors_)] + '\x1b[%sm' % legend_[i+j][1] if color else '', - '%-*s' % (legend_widths[j], legend_[i+j]), + '%-*s' % (legend_widths[j], legend_[i+j][0]), '\x1b[m' if color else '') for j in range(min(legend_cols, len(legend_)-i))))) @@ -1552,7 +1549,8 @@ if __name__ == "__main__": '-t', '--title', help="Add a title.") parser.add_argument( - '-l', '--legend-right', + '-l', '--legend', '--legend-right', + dest='legend_right', action='store_true', help="Place a legend to the right.") parser.add_argument( diff --git a/scripts/plotmpl.py b/scripts/plotmpl.py index e909cf26..d991a611 100755 --- a/scripts/plotmpl.py +++ b/scripts/plotmpl.py @@ -214,7 +214,7 @@ def collect(csv_paths, renames=[]): def dataset(results, x=None, y=None, define=[]): # organize by 'by', x, and y - dataset = {} + dataset = [] i = 0 for r in results: # filter results by matching defines @@ -244,10 +244,7 @@ def dataset(results, x=None, y=None, define=[]): else: y_ = None - if y_ is not None: - dataset[x_] = y_ + dataset.get(x_, 0) - else: - dataset[x_] = y_ or dataset.get(x_, None) + dataset.append((x_, y_)) return dataset @@ -649,7 +646,7 @@ def main(csv_paths, output, *, if labels is not None: labels_ = labels else: - labels_ = [''] + labels_ = [None] if font_color is not None: font_color_ = font_color @@ -844,7 +841,7 @@ def main(csv_paths, output, *, # plot! ax = s.ax for name, dataset in subdatasets.items(): - dats = sorted((x,y) for x,y in dataset.items()) + dats = sorted((x,y) for x,y in dataset) ax.plot([x for x,_ in dats], [y for _,y in dats], dataformats_[name], color=datacolors_[name], @@ -860,26 +857,26 @@ def main(csv_paths, output, *, # axes limits ax.set_xlim( xlim_[0] if xlim_[0] is not None - else min(it.chain([0], (k - for r in subdatasets.values() - for k, v in r.items() - if v is not None))), + else min(it.chain([0], (x + for dataset in subdatasets.values() + for x, y in dataset + if y is not None))), xlim_[1] if xlim_[1] is not None - else max(it.chain([0], (k + else max(it.chain([0], (x for r in subdatasets.values() - for k, v in r.items() - if v is not None)))) + for x, y in dataset + if y is not None)))) ax.set_ylim( ylim_[0] if ylim_[0] is not None - else min(it.chain([0], (v - for r in subdatasets.values() - for _, v in r.items() - if v is not None))), + else min(it.chain([0], (y + for dataset in subdatasets.values() + for _, y in dataset + if y is not None))), ylim_[1] if ylim_[1] is not None - else max(it.chain([0], (v - for r in subdatasets.values() - for _, v in r.items() - if v is not None)))) + else max(it.chain([0], (y + for dataset in subdatasets.values() + for _, y in dataset + if y is not None)))) # axes ticks if x2_: ax.xaxis.set_major_formatter(lambda x, pos: @@ -960,7 +957,10 @@ def main(csv_paths, output, *, for name in datasets_.keys(): name_ = ','.join(k for k in name if k) if name_ in legend: - legend_.append((datalabels_[name] or name_, legend[name_])) + if datalabels_[name] is None: + legend_.append((name_, legend[name_])) + elif datalabels_[name]: + legend_.append((datalabels_[name], legend[name_])) legend = legend_ if legend_right: @@ -1226,7 +1226,8 @@ if __name__ == "__main__": '-t', '--title', help="Add a title.") parser.add_argument( - '-l', '--legend-right', + '-l', '--legend', '--legend-right', + dest='legend_right', action='store_true', help="Place a legend to the right.") parser.add_argument(