Tweaked scripts that consume .csv files to filter defines early

With the quantity of data being output by bench.py now, filtering ASAP
while parsing CSV files is a valuable optimization. And thanks to how
CSV files are structured, we can even avoid ever loading the full
contents into RAM.

This does end up with use filtering for defines redundantly in a few
places, but this is well worth the saved overhead from early filtering.

Also tried to clean up the plot.py/plotmpl.py's data folding path,
though that may have been wasted effort.
This commit is contained in:
Christopher Haster
2023-11-03 14:30:22 -05:00
parent fb9277feac
commit 616b4e1c9e
10 changed files with 274 additions and 212 deletions

View File

@@ -317,18 +317,18 @@ def collect(obj_paths, *,
def fold(Result, results, *, def fold(Result, results, *,
by=None, by=None,
defines=None, defines=[],
**_): **_):
if by is None: if by is None:
by = Result._by by = Result._by
for k in it.chain(by or [], (k for k, _ in defines or [])): for k in it.chain(by or [], (k for k, _ in defines)):
if k not in Result._by and k not in Result._fields: if k not in Result._by and k not in Result._fields:
print("error: could not find field %r?" % k) print("error: could not find field %r?" % k)
sys.exit(-1) sys.exit(-1)
# filter by matching defines # filter by matching defines
if defines is not None: if defines:
results_ = [] results_ = []
for r in results: for r in results:
if all(getattr(r, k) in vs for k, vs in defines): if all(getattr(r, k) in vs for k, vs in defines):
@@ -524,7 +524,7 @@ def table(Result, results, diff_results=None, *,
def main(obj_paths, *, def main(obj_paths, *,
by=None, by=None,
fields=None, fields=None,
defines=None, defines=[],
sort=None, sort=None,
**args): **args):
# find sizes # find sizes
@@ -535,6 +535,10 @@ def main(obj_paths, *,
with openio(args['use']) as f: with openio(args['use']) as f:
reader = csv.DictReader(f, restval='') reader = csv.DictReader(f, restval='')
for r in reader: for r in reader:
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
if not any('code_'+k in r and r['code_'+k].strip() if not any('code_'+k in r and r['code_'+k].strip()
for k in CodeResult._fields): for k in CodeResult._fields):
continue continue
@@ -582,6 +586,10 @@ def main(obj_paths, *,
with openio(args['diff']) as f: with openio(args['diff']) as f:
reader = csv.DictReader(f, restval='') reader = csv.DictReader(f, restval='')
for r in reader: for r in reader:
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
if not any('code_'+k in r and r['code_'+k].strip() if not any('code_'+k in r and r['code_'+k].strip()
for k in CodeResult._fields): for k in CodeResult._fields):
continue continue

View File

@@ -299,18 +299,18 @@ def collect(gcda_paths, *,
def fold(Result, results, *, def fold(Result, results, *,
by=None, by=None,
defines=None, defines=[],
**_): **_):
if by is None: if by is None:
by = Result._by by = Result._by
for k in it.chain(by or [], (k for k, _ in defines or [])): for k in it.chain(by or [], (k for k, _ in defines)):
if k not in Result._by and k not in Result._fields: if k not in Result._by and k not in Result._fields:
print("error: could not find field %r?" % k) print("error: could not find field %r?" % k)
sys.exit(-1) sys.exit(-1)
# filter by matching defines # filter by matching defines
if defines is not None: if defines:
results_ = [] results_ = []
for r in results: for r in results:
if all(getattr(r, k) in vs for k, vs in defines): if all(getattr(r, k) in vs for k, vs in defines):
@@ -581,7 +581,7 @@ def annotate(Result, results, *,
def main(gcda_paths, *, def main(gcda_paths, *,
by=None, by=None,
fields=None, fields=None,
defines=None, defines=[],
sort=None, sort=None,
hits=False, hits=False,
**args): **args):
@@ -601,6 +601,10 @@ def main(gcda_paths, *,
with openio(args['use']) as f: with openio(args['use']) as f:
reader = csv.DictReader(f, restval='') reader = csv.DictReader(f, restval='')
for r in reader: for r in reader:
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
if not any('cov_'+k in r and r['cov_'+k].strip() if not any('cov_'+k in r and r['cov_'+k].strip()
for k in CovResult._fields): for k in CovResult._fields):
continue continue
@@ -650,6 +654,10 @@ def main(gcda_paths, *,
with openio(args['diff']) as f: with openio(args['diff']) as f:
reader = csv.DictReader(f, restval='') reader = csv.DictReader(f, restval='')
for r in reader: for r in reader:
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
if not any('cov_'+k in r and r['cov_'+k].strip() if not any('cov_'+k in r and r['cov_'+k].strip()
for k in CovResult._fields): for k in CovResult._fields):
continue continue

View File

@@ -317,18 +317,18 @@ def collect(obj_paths, *,
def fold(Result, results, *, def fold(Result, results, *,
by=None, by=None,
defines=None, defines=[],
**_): **_):
if by is None: if by is None:
by = Result._by by = Result._by
for k in it.chain(by or [], (k for k, _ in defines or [])): for k in it.chain(by or [], (k for k, _ in defines)):
if k not in Result._by and k not in Result._fields: if k not in Result._by and k not in Result._fields:
print("error: could not find field %r?" % k) print("error: could not find field %r?" % k)
sys.exit(-1) sys.exit(-1)
# filter by matching defines # filter by matching defines
if defines is not None: if defines:
results_ = [] results_ = []
for r in results: for r in results:
if all(getattr(r, k) in vs for k, vs in defines): if all(getattr(r, k) in vs for k, vs in defines):
@@ -524,7 +524,7 @@ def table(Result, results, diff_results=None, *,
def main(obj_paths, *, def main(obj_paths, *,
by=None, by=None,
fields=None, fields=None,
defines=None, defines=[],
sort=None, sort=None,
**args): **args):
# find sizes # find sizes
@@ -535,6 +535,10 @@ def main(obj_paths, *,
with openio(args['use']) as f: with openio(args['use']) as f:
reader = csv.DictReader(f, restval='') reader = csv.DictReader(f, restval='')
for r in reader: for r in reader:
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
try: try:
results.append(DataResult( results.append(DataResult(
**{k: r[k] for k in DataResult._by **{k: r[k] for k in DataResult._by
@@ -579,6 +583,10 @@ def main(obj_paths, *,
with openio(args['diff']) as f: with openio(args['diff']) as f:
reader = csv.DictReader(f, restval='') reader = csv.DictReader(f, restval='')
for r in reader: for r in reader:
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
if not any('data_'+k in r and r['data_'+k].strip() if not any('data_'+k in r and r['data_'+k].strip()
for k in DataResult._fields): for k in DataResult._fields):
continue continue

View File

@@ -629,18 +629,18 @@ def collect(perf_paths, *,
def fold(Result, results, *, def fold(Result, results, *,
by=None, by=None,
defines=None, defines=[],
**_): **_):
if by is None: if by is None:
by = Result._by by = Result._by
for k in it.chain(by or [], (k for k, _ in defines or [])): for k in it.chain(by or [], (k for k, _ in defines)):
if k not in Result._by and k not in Result._fields: if k not in Result._by and k not in Result._fields:
print("error: could not find field %r?" % k) print("error: could not find field %r?" % k)
sys.exit(-1) sys.exit(-1)
# filter by matching defines # filter by matching defines
if defines is not None: if defines:
results_ = [] results_ = []
for r in results: for r in results:
if all(getattr(r, k) in vs for k, vs in defines): if all(getattr(r, k) in vs for k, vs in defines):
@@ -1037,7 +1037,7 @@ def annotate(Result, results, *,
def report(perf_paths, *, def report(perf_paths, *,
by=None, by=None,
fields=None, fields=None,
defines=None, defines=[],
sort=None, sort=None,
branches=False, branches=False,
caches=False, caches=False,
@@ -1062,6 +1062,10 @@ def report(perf_paths, *,
with openio(args['use']) as f: with openio(args['use']) as f:
reader = csv.DictReader(f, restval='') reader = csv.DictReader(f, restval='')
for r in reader: for r in reader:
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
if not any('perf_'+k in r and r['perf_'+k].strip() if not any('perf_'+k in r and r['perf_'+k].strip()
for k in PerfResult._fields): for k in PerfResult._fields):
continue continue
@@ -1109,6 +1113,10 @@ def report(perf_paths, *,
with openio(args['diff']) as f: with openio(args['diff']) as f:
reader = csv.DictReader(f, restval='') reader = csv.DictReader(f, restval='')
for r in reader: for r in reader:
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
if not any('perf_'+k in r and r['perf_'+k].strip() if not any('perf_'+k in r and r['perf_'+k].strip()
for k in PerfResult._fields): for k in PerfResult._fields):
continue continue

View File

@@ -595,18 +595,18 @@ def collect(obj_path, trace_paths, *,
def fold(Result, results, *, def fold(Result, results, *,
by=None, by=None,
defines=None, defines=[],
**_): **_):
if by is None: if by is None:
by = Result._by by = Result._by
for k in it.chain(by or [], (k for k, _ in defines or [])): for k in it.chain(by or [], (k for k, _ in defines)):
if k not in Result._by and k not in Result._fields: if k not in Result._by and k not in Result._fields:
print("error: could not find field %r?" % k) print("error: could not find field %r?" % k)
sys.exit(-1) sys.exit(-1)
# filter by matching defines # filter by matching defines
if defines is not None: if defines:
results_ = [] results_ = []
for r in results: for r in results:
if all(getattr(r, k) in vs for k, vs in defines): if all(getattr(r, k) in vs for k, vs in defines):
@@ -1019,7 +1019,7 @@ def annotate(Result, results, *,
def report(obj_path='', trace_paths=[], *, def report(obj_path='', trace_paths=[], *,
by=None, by=None,
fields=None, fields=None,
defines=None, defines=[],
sort=None, sort=None,
**args): **args):
# figure out what color should be # figure out what color should be
@@ -1042,6 +1042,10 @@ def report(obj_path='', trace_paths=[], *,
with openio(args['use']) as f: with openio(args['use']) as f:
reader = csv.DictReader(f, restval='') reader = csv.DictReader(f, restval='')
for r in reader: for r in reader:
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
if not any('perfbd_'+k in r and r['perfbd_'+k].strip() if not any('perfbd_'+k in r and r['perfbd_'+k].strip()
for k in PerfBdResult._fields): for k in PerfBdResult._fields):
continue continue
@@ -1089,6 +1093,10 @@ def report(obj_path='', trace_paths=[], *,
with openio(args['diff']) as f: with openio(args['diff']) as f:
reader = csv.DictReader(f, restval='') reader = csv.DictReader(f, restval='')
for r in reader: for r in reader:
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
if not any('perfbd_'+k in r and r['perfbd_'+k].strip() if not any('perfbd_'+k in r and r['perfbd_'+k].strip()
for k in PerfBdResult._fields): for k in PerfBdResult._fields):
continue continue

View File

@@ -443,7 +443,7 @@ class Plot:
return ''.join(row_) return ''.join(row_)
def collect(csv_paths, renames=[]): def collect(csv_paths, renames=[], defines=[]):
# collect results from CSV files # collect results from CSV files
results = [] results = []
for path in csv_paths: for path in csv_paths:
@@ -451,64 +451,33 @@ def collect(csv_paths, renames=[]):
with openio(path) as f: with openio(path) as f:
reader = csv.DictReader(f, restval='') reader = csv.DictReader(f, restval='')
for r in reader: for r in reader:
# apply any renames
if renames:
# make a copy so renames can overlap
r_ = {}
for new_k, old_k in renames:
if old_k in r:
r_[new_k] = r[old_k]
r.update(r_)
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
results.append(r) results.append(r)
except FileNotFoundError: except FileNotFoundError:
pass pass
if renames:
for r in results:
# make a copy so renames can overlap
r_ = {}
for new_k, old_k in renames:
if old_k in r:
r_[new_k] = r[old_k]
r.update(r_)
return results return results
def dataset(results, x=None, y=None, define=[]): def fold(results, by=None, x=None, y=None, defines=[]):
# organize by 'by', x, and y # filter by matching defines
dataset = [] if defines:
i = 0 results_ = []
for r in results: for r in results:
# filter results by matching defines if all(k in r and r[k] in vs for k, vs in defines):
if not all(k in r and r[k] in vs for k, vs in define): results_.append(r)
continue results = results_
# find xs
if x is not None:
if x not in r:
continue
try:
x_ = dat(r[x])
except ValueError:
continue
else:
x_ = i
i += 1
# find ys
if y is not None:
if y not in r:
continue
try:
y_ = dat(r[y])
except ValueError:
continue
else:
y_ = None
dataset.append((x_, y_))
return dataset
def datasets(results, by=None, x=None, y=None, define=[]):
# filter results by matching defines
results_ = []
for r in results:
if all(k in r and r[k] in vs for k, vs in define):
results_.append(r)
results = results_
# if y not specified, try to guess from data # if y not specified, try to guess from data
if not y: if not y:
@@ -535,16 +504,46 @@ def datasets(results, by=None, x=None, y=None, define=[]):
for ks_ in (ks if by else [()]): for ks_ in (ks if by else [()]):
for x_ in (x if x else [None]): for x_ in (x if x else [None]):
for y_ in y: for y_ in y:
# organize by 'by', x, and y
dataset = []
i = 0
for r in results:
# filter by 'by'
if by and not all(
k in r and r[k] == v
for k, v in zip(by, ks_)):
continue
# find xs
if x_ is not None:
if x_ not in r:
continue
try:
x__ = dat(r[x_])
except ValueError:
continue
else:
# fallback to enumeration
x__ = i
i += 1
# find ys
if y_ is not None:
if y_ not in r:
continue
try:
y__ = dat(r[y_])
except ValueError:
continue
else:
y__ = None
dataset.append((x__, y__))
# hide x/y if there is only one field # hide x/y if there is only one field
k_x = x_ if len(x or []) > 1 else '' k_x = x_ if len(x or []) > 1 else ''
k_y = y_ if len(y or []) > 1 or (not ks_ and not k_x) else '' k_y = y_ if len(y or []) > 1 or (not ks_ and not k_x) else ''
datasets[ks_ + (k_x, k_y)] = dataset
datasets[ks_ + (k_x, k_y)] = dataset(
results,
x_,
y_,
[(by_, {k_}) for by_, k_ in zip(by, ks_)]
if by else [])
return datasets return datasets
@@ -898,6 +897,11 @@ def main(csv_paths, *,
all_by = (by or []) + subplots_get('by', **subplot, subplots=subplots) all_by = (by or []) + subplots_get('by', **subplot, subplots=subplots)
all_x = (x or []) + subplots_get('x', **subplot, subplots=subplots) all_x = (x or []) + subplots_get('x', **subplot, subplots=subplots)
all_y = (y or []) + subplots_get('y', **subplot, subplots=subplots) all_y = (y or []) + subplots_get('y', **subplot, subplots=subplots)
all_defines = co.defaultdict(lambda: set())
for k, vs in it.chain(define or [],
subplots_get('define', **subplot, subplots=subplots)):
all_defines[k] |= vs
all_defines = sorted(all_defines.items())
# separate out renames # separate out renames
renames = list(it.chain.from_iterable( renames = list(it.chain.from_iterable(
@@ -990,10 +994,10 @@ def main(csv_paths, *,
f.writeln = writeln f.writeln = writeln
# first collect results from CSV files # first collect results from CSV files
results = collect(csv_paths, renames) results = collect(csv_paths, renames, all_defines)
# then extract the requested datasets # then extract the requested datasets
datasets_ = datasets(results, all_by, all_x, all_y, define) datasets_ = fold(results, all_by, all_x, all_y)
# figure out colors/chars here so that subplot defines # figure out colors/chars here so that subplot defines
# don't change them later, that'd be bad # don't change them later, that'd be bad
@@ -1139,7 +1143,7 @@ def main(csv_paths, *,
# data can be constrained by subplot-specific defines, # data can be constrained by subplot-specific defines,
# so re-extract for each plot # so re-extract for each plot
subdatasets = datasets(results, all_by, all_x, all_y, define_) subdatasets = fold(results, all_by, all_x, all_y, define_)
# filter by subplot x/y # filter by subplot x/y
subdatasets = co.OrderedDict([(name, dataset) subdatasets = co.OrderedDict([(name, dataset)

View File

@@ -189,7 +189,7 @@ def dat(x):
# else give up # else give up
raise ValueError("invalid dat %r" % x) raise ValueError("invalid dat %r" % x)
def collect(csv_paths, renames=[]): def collect(csv_paths, renames=[], defines=[]):
# collect results from CSV files # collect results from CSV files
results = [] results = []
for path in csv_paths: for path in csv_paths:
@@ -197,64 +197,33 @@ def collect(csv_paths, renames=[]):
with openio(path) as f: with openio(path) as f:
reader = csv.DictReader(f, restval='') reader = csv.DictReader(f, restval='')
for r in reader: for r in reader:
# apply any renames
if renames:
# make a copy so renames can overlap
r_ = {}
for new_k, old_k in renames:
if old_k in r:
r_[new_k] = r[old_k]
r.update(r_)
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
results.append(r) results.append(r)
except FileNotFoundError: except FileNotFoundError:
pass pass
if renames:
for r in results:
# make a copy so renames can overlap
r_ = {}
for new_k, old_k in renames:
if old_k in r:
r_[new_k] = r[old_k]
r.update(r_)
return results return results
def dataset(results, x=None, y=None, define=[]): def fold(results, by=None, x=None, y=None, defines=[]):
# organize by 'by', x, and y # filter by matching defines
dataset = [] if defines:
i = 0 results_ = []
for r in results: for r in results:
# filter results by matching defines if all(k in r and r[k] in vs for k, vs in defines):
if not all(k in r and r[k] in vs for k, vs in define): results_.append(r)
continue results = results_
# find xs
if x is not None:
if x not in r:
continue
try:
x_ = dat(r[x])
except ValueError:
continue
else:
x_ = i
i += 1
# find ys
if y is not None:
if y not in r:
continue
try:
y_ = dat(r[y])
except ValueError:
continue
else:
y_ = None
dataset.append((x_, y_))
return dataset
def datasets(results, by=None, x=None, y=None, define=[]):
# filter results by matching defines
results_ = []
for r in results:
if all(k in r and r[k] in vs for k, vs in define):
results_.append(r)
results = results_
# if y not specified, try to guess from data # if y not specified, try to guess from data
if not y: if not y:
@@ -281,16 +250,46 @@ def datasets(results, by=None, x=None, y=None, define=[]):
for ks_ in (ks if by else [()]): for ks_ in (ks if by else [()]):
for x_ in (x if x else [None]): for x_ in (x if x else [None]):
for y_ in y: for y_ in y:
# organize by 'by', x, and y
dataset = []
i = 0
for r in results:
# filter by 'by'
if by and not all(
k in r and r[k] == v
for k, v in zip(by, ks_)):
continue
# find xs
if x_ is not None:
if x_ not in r:
continue
try:
x__ = dat(r[x_])
except ValueError:
continue
else:
# fallback to enumeration
x__ = i
i += 1
# find ys
if y_ is not None:
if y_ not in r:
continue
try:
y__ = dat(r[y_])
except ValueError:
continue
else:
y__ = None
dataset.append((x__, y__))
# hide x/y if there is only one field # hide x/y if there is only one field
k_x = x_ if len(x or []) > 1 else '' k_x = x_ if len(x or []) > 1 else ''
k_y = y_ if len(y or []) > 1 or (not ks_ and not k_x) else '' k_y = y_ if len(y or []) > 1 or (not ks_ and not k_x) else ''
datasets[ks_ + (k_x, k_y)] = dataset
datasets[ks_ + (k_x, k_y)] = dataset(
results,
x_,
y_,
[(by_, {k_}) for by_, k_ in zip(by, ks_)]
if by else [])
return datasets return datasets
@@ -730,7 +729,7 @@ def main(csv_paths, output, *,
# become a mess... # become a mess...
subplots += subplot.pop('subplots', []) subplots += subplot.pop('subplots', [])
# allow any subplots to contribute to by/x/y # allow any subplots to contribute to by/x/y/defines
def subplots_get(k, *, subplots=[], **args): def subplots_get(k, *, subplots=[], **args):
v = args.get(k, []).copy() v = args.get(k, []).copy()
for _, subargs in subplots: for _, subargs in subplots:
@@ -740,6 +739,11 @@ def main(csv_paths, output, *,
all_by = (by or []) + subplots_get('by', **subplot, subplots=subplots) all_by = (by or []) + subplots_get('by', **subplot, subplots=subplots)
all_x = (x or []) + subplots_get('x', **subplot, subplots=subplots) all_x = (x or []) + subplots_get('x', **subplot, subplots=subplots)
all_y = (y or []) + subplots_get('y', **subplot, subplots=subplots) all_y = (y or []) + subplots_get('y', **subplot, subplots=subplots)
all_defines = co.defaultdict(lambda: set())
for k, vs in it.chain(define or [],
subplots_get('define', **subplot, subplots=subplots)):
all_defines[k] |= vs
all_defines = sorted(all_defines.items())
# separate out renames # separate out renames
renames = list(it.chain.from_iterable( renames = list(it.chain.from_iterable(
@@ -750,10 +754,10 @@ def main(csv_paths, output, *,
all_y = [k for k, _ in all_y] all_y = [k for k, _ in all_y]
# first collect results from CSV files # first collect results from CSV files
results = collect(csv_paths, renames) results = collect(csv_paths, renames, all_defines)
# then extract the requested datasets # then extract the requested datasets
datasets_ = datasets(results, all_by, all_x, all_y, define) datasets_ = fold(results, all_by, all_x, all_y)
# figure out formats/colors/labels here so that subplot defines # figure out formats/colors/labels here so that subplot defines
# don't change them later, that'd be bad # don't change them later, that'd be bad
@@ -830,7 +834,7 @@ def main(csv_paths, output, *,
# data can be constrained by subplot-specific defines, # data can be constrained by subplot-specific defines,
# so re-extract for each plot # so re-extract for each plot
subdatasets = datasets(results, all_by, all_x, all_y, define_) subdatasets = fold(results, all_by, all_x, all_y, define_)
# filter by subplot x/y # filter by subplot x/y
subdatasets = co.OrderedDict([(name, dataset) subdatasets = co.OrderedDict([(name, dataset)

View File

@@ -275,18 +275,18 @@ def collect(ci_paths, *,
def fold(Result, results, *, def fold(Result, results, *,
by=None, by=None,
defines=None, defines=[],
**_): **_):
if by is None: if by is None:
by = Result._by by = Result._by
for k in it.chain(by or [], (k for k, _ in defines or [])): for k in it.chain(by or [], (k for k, _ in defines)):
if k not in Result._by and k not in Result._fields: if k not in Result._by and k not in Result._fields:
print("error: could not find field %r?" % k) print("error: could not find field %r?" % k)
sys.exit(-1) sys.exit(-1)
# filter by matching defines # filter by matching defines
if defines is not None: if defines:
results_ = [] results_ = []
for r in results: for r in results:
if all(getattr(r, k) in vs for k, vs in defines): if all(getattr(r, k) in vs for k, vs in defines):
@@ -548,7 +548,7 @@ def table(Result, results, diff_results=None, *,
def main(ci_paths, def main(ci_paths,
by=None, by=None,
fields=None, fields=None,
defines=None, defines=[],
sort=None, sort=None,
**args): **args):
if args.get('depth') is None: if args.get('depth') is None:
@@ -564,6 +564,10 @@ def main(ci_paths,
with openio(args['use']) as f: with openio(args['use']) as f:
reader = csv.DictReader(f, restval='') reader = csv.DictReader(f, restval='')
for r in reader: for r in reader:
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
if not any('stack_'+k in r and r['stack_'+k].strip() if not any('stack_'+k in r and r['stack_'+k].strip()
for k in StackResult._fields): for k in StackResult._fields):
continue continue
@@ -611,6 +615,10 @@ def main(ci_paths,
with openio(args['diff']) as f: with openio(args['diff']) as f:
reader = csv.DictReader(f, restval='') reader = csv.DictReader(f, restval='')
for r in reader: for r in reader:
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
if not any('stack_'+k in r and r['stack_'+k].strip() if not any('stack_'+k in r and r['stack_'+k].strip()
for k in StackResult._fields): for k in StackResult._fields):
continue continue

View File

@@ -266,18 +266,18 @@ def collect(obj_paths, *,
def fold(Result, results, *, def fold(Result, results, *,
by=None, by=None,
defines=None, defines=[],
**_): **_):
if by is None: if by is None:
by = Result._by by = Result._by
for k in it.chain(by or [], (k for k, _ in defines or [])): for k in it.chain(by or [], (k for k, _ in defines)):
if k not in Result._by and k not in Result._fields: if k not in Result._by and k not in Result._fields:
print("error: could not find field %r?" % k) print("error: could not find field %r?" % k)
sys.exit(-1) sys.exit(-1)
# filter by matching defines # filter by matching defines
if defines is not None: if defines:
results_ = [] results_ = []
for r in results: for r in results:
if all(getattr(r, k) in vs for k, vs in defines): if all(getattr(r, k) in vs for k, vs in defines):
@@ -473,7 +473,7 @@ def table(Result, results, diff_results=None, *,
def main(obj_paths, *, def main(obj_paths, *,
by=None, by=None,
fields=None, fields=None,
defines=None, defines=[],
sort=None, sort=None,
**args): **args):
# find sizes # find sizes
@@ -484,6 +484,10 @@ def main(obj_paths, *,
with openio(args['use']) as f: with openio(args['use']) as f:
reader = csv.DictReader(f, restval='') reader = csv.DictReader(f, restval='')
for r in reader: for r in reader:
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
if not any('struct_'+k in r and r['struct_'+k].strip() if not any('struct_'+k in r and r['struct_'+k].strip()
for k in StructResult._fields): for k in StructResult._fields):
continue continue
@@ -533,6 +537,10 @@ def main(obj_paths, *,
with openio(args['diff']) as f: with openio(args['diff']) as f:
reader = csv.DictReader(f, restval='') reader = csv.DictReader(f, restval='')
for r in reader: for r in reader:
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
if not any('struct_'+k in r and r['struct_'+k].strip() if not any('struct_'+k in r and r['struct_'+k].strip()
for k in StructResult._fields): for k in StructResult._fields):
continue continue

View File

@@ -239,6 +239,43 @@ TYPES = co.OrderedDict([
]) ])
def openio(path, mode='r', buffering=-1):
# allow '-' for stdin/stdout
if path == '-':
if 'r' in mode:
return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
else:
return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
else:
return open(path, mode, buffering)
def collect(csv_paths, renames=[], defines=[]):
# collect results from CSV files
results = []
for path in csv_paths:
try:
with openio(path) as f:
reader = csv.DictReader(f, restval='')
for r in reader:
# apply any renames
if renames:
# make a copy so renames can overlap
r_ = {}
for new_k, old_k in renames:
if old_k in r:
r_[new_k] = r[old_k]
r.update(r_)
# filter by matching defines
if not all(k in r and r[k] in vs for k, vs in defines):
continue
results.append(r)
except FileNotFoundError:
pass
return results
def infer(results, *, def infer(results, *,
by=None, by=None,
fields=None, fields=None,
@@ -346,18 +383,18 @@ def infer(results, *,
def fold(Result, results, *, def fold(Result, results, *,
by=None, by=None,
defines=None, defines=[],
**_): **_):
if by is None: if by is None:
by = Result._by by = Result._by
for k in it.chain(by or [], (k for k, _ in defines or [])): for k in it.chain(by or [], (k for k, _ in defines)):
if k not in Result._by and k not in Result._fields: if k not in Result._by and k not in Result._fields:
print("error: could not find field %r?" % k) print("error: could not find field %r?" % k)
sys.exit(-1) sys.exit(-1)
# filter by matching defines # filter by matching defines
if defines is not None: if defines:
results_ = [] results_ = []
for r in results: for r in results:
if all(getattr(r, k) in vs for k, vs in defines): if all(getattr(r, k) in vs for k, vs in defines):
@@ -550,20 +587,10 @@ def table(Result, results, diff_results=None, *,
line[-1])) line[-1]))
def openio(path, mode='r', buffering=-1):
# allow '-' for stdin/stdout
if path == '-':
if 'r' in mode:
return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
else:
return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
else:
return open(path, mode, buffering)
def main(csv_paths, *, def main(csv_paths, *,
by=None, by=None,
fields=None, fields=None,
defines=None, defines=[],
sort=None, sort=None,
**args): **args):
# separate out renames # separate out renames
@@ -608,24 +635,7 @@ def main(csv_paths, *,
ops.update(ops_) ops.update(ops_)
# find CSV files # find CSV files
results = [] results = collect(csv_paths, renames=renames, defines=defines)
for path in csv_paths:
try:
with openio(path) as f:
reader = csv.DictReader(f, restval='')
for r in reader:
# rename fields?
if renames:
# make a copy so renames can overlap
r_ = {}
for new_k, old_k in renames:
if old_k in r:
r_[new_k] = r[old_k]
r.update(r_)
results.append(r)
except FileNotFoundError:
pass
# homogenize # homogenize
Result = infer(results, Result = infer(results,
@@ -672,31 +682,19 @@ def main(csv_paths, *,
# find previous results? # find previous results?
if args.get('diff'): if args.get('diff'):
diff_results = [] diff_results = collect([args['diff']], renames=renames, defines=defines)
try: diff_results_ = []
with openio(args['diff']) as f: for r in diff_results:
reader = csv.DictReader(f, restval='') if not any(k in r and r[k].strip()
for r in reader: for k in Result._fields):
# rename fields? continue
if renames: try:
# make a copy so renames can overlap diff_results_.append(Result(**{
r_ = {} k: r[k] for k in Result._by + Result._fields
for new_k, old_k in renames: if k in r and r[k].strip()}))
if old_k in r: except TypeError:
r_[new_k] = r[old_k] pass
r.update(r_) diff_results = diff_results_
if not any(k in r and r[k].strip()
for k in Result._fields):
continue
try:
diff_results.append(Result(**{
k: r[k] for k in Result._by + Result._fields
if k in r and r[k].strip()}))
except TypeError:
pass
except FileNotFoundError:
pass
# fold # fold
diff_results = fold(Result, diff_results, by=by, defines=defines) diff_results = fold(Result, diff_results, by=by, defines=defines)