mirror of
https://github.com/littlefs-project/littlefs.git
synced 2025-12-26 17:18:26 +00:00
Consistent handling of by/field arguments for plot.py and summary.py
Now both scripts also fallback to guessing what fields to use based on what fields can be converted to integers. This is more falible, and doesn't work for tests/benchmarks, but in those cases explicit fields can be used (which is what would be needed without guessing anyways).
This commit is contained in:
3
Makefile
3
Makefile
@@ -170,10 +170,11 @@ coverage: $(GCDA)
|
|||||||
.PHONY: summary sizes
|
.PHONY: summary sizes
|
||||||
summary sizes: $(BUILDDIR)lfs.csv
|
summary sizes: $(BUILDDIR)lfs.csv
|
||||||
$(strip ./scripts/summary.py -Y $^ \
|
$(strip ./scripts/summary.py -Y $^ \
|
||||||
-f code=code_size,$\
|
-fcode=code_size,$\
|
||||||
data=data_size,$\
|
data=data_size,$\
|
||||||
stack=stack_limit,$\
|
stack=stack_limit,$\
|
||||||
struct=struct_size \
|
struct=struct_size \
|
||||||
|
--max=stack \
|
||||||
$(SUMMARYFLAGS))
|
$(SUMMARYFLAGS))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -330,12 +330,13 @@ def collect(csv_paths, renames=[]):
|
|||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def dataset(results, x=None, y=None, defines={}):
|
def dataset(results, x=None, y=None, define=[]):
|
||||||
# organize by 'by', x, and y
|
# organize by 'by', x, and y
|
||||||
dataset = {}
|
dataset = {}
|
||||||
for i, r in enumerate(results):
|
i = 0
|
||||||
|
for r in results:
|
||||||
# filter results by matching defines
|
# filter results by matching defines
|
||||||
if not all(k in r and r[k] in vs for k, vs in defines.items()):
|
if not all(k in r and r[k] in vs for k, vs in define):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# find xs
|
# find xs
|
||||||
@@ -348,6 +349,7 @@ def dataset(results, x=None, y=None, defines={}):
|
|||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
x_ = i
|
x_ = i
|
||||||
|
i += 1
|
||||||
|
|
||||||
# find ys
|
# find ys
|
||||||
if y is not None:
|
if y is not None:
|
||||||
@@ -368,14 +370,29 @@ def dataset(results, x=None, y=None, defines={}):
|
|||||||
|
|
||||||
return dataset
|
return dataset
|
||||||
|
|
||||||
def datasets(results, by=None, x=None, y=None, defines={}):
|
def datasets(results, by=None, x=None, y=None, define=[]):
|
||||||
# filter results by matching defines
|
# filter results by matching defines
|
||||||
results_ = []
|
results_ = []
|
||||||
for r in results:
|
for r in results:
|
||||||
if all(k in r and r[k] in vs for k, vs in defines.items()):
|
if all(k in r and r[k] in vs for k, vs in define):
|
||||||
results_.append(r)
|
results_.append(r)
|
||||||
results = results_
|
results = results_
|
||||||
|
|
||||||
|
# if y not specified, try to guess from data
|
||||||
|
if y is None:
|
||||||
|
y = co.OrderedDict()
|
||||||
|
for r in results:
|
||||||
|
for k, v in r.items():
|
||||||
|
if by is not None and k in by:
|
||||||
|
continue
|
||||||
|
if y.get(k, True):
|
||||||
|
try:
|
||||||
|
dat(v)
|
||||||
|
y[k] = True
|
||||||
|
except ValueError:
|
||||||
|
y[k] = False
|
||||||
|
y = list(k for k,v in y.items() if v)
|
||||||
|
|
||||||
if by is not None:
|
if by is not None:
|
||||||
# find all 'by' values
|
# find all 'by' values
|
||||||
ks = set()
|
ks = set()
|
||||||
@@ -387,13 +404,17 @@ def datasets(results, by=None, x=None, y=None, defines={}):
|
|||||||
datasets = co.OrderedDict()
|
datasets = co.OrderedDict()
|
||||||
for ks_ in (ks if by is not None else [()]):
|
for ks_ in (ks if by is not None else [()]):
|
||||||
for x_ in (x if x is not None else [None]):
|
for x_ in (x if x is not None else [None]):
|
||||||
for y_ in (y if y is not None else [None]):
|
for y_ in y:
|
||||||
datasets[ks_ + (x_, y_)] = dataset(
|
# hide x/y if there is only one field
|
||||||
|
k_x = x_ if len(x or []) > 1 else ''
|
||||||
|
k_y = y_ if len(y or []) > 1 else ''
|
||||||
|
|
||||||
|
datasets[ks_ + (k_x, k_y)] = dataset(
|
||||||
results,
|
results,
|
||||||
x_,
|
x_,
|
||||||
y_,
|
y_,
|
||||||
{by_: {k_} for by_, k_ in zip(by, ks_)}
|
[(by_, k_) for by_, k_ in zip(by, ks_)]
|
||||||
if by is not None else {})
|
if by is not None else [])
|
||||||
|
|
||||||
return datasets
|
return datasets
|
||||||
|
|
||||||
@@ -431,7 +452,7 @@ def main(csv_paths, *,
|
|||||||
if ylim is not None and len(ylim) == 1:
|
if ylim is not None and len(ylim) == 1:
|
||||||
ylim = (0, ylim[0])
|
ylim = (0, ylim[0])
|
||||||
|
|
||||||
# seperate out renames
|
# separate out renames
|
||||||
renames = [k.split('=', 1)
|
renames = [k.split('=', 1)
|
||||||
for k in it.chain(by or [], x or [], y or [])
|
for k in it.chain(by or [], x or [], y or [])
|
||||||
if '=' in k]
|
if '=' in k]
|
||||||
@@ -452,7 +473,7 @@ def main(csv_paths, *,
|
|||||||
results = collect(csv_paths, renames)
|
results = collect(csv_paths, renames)
|
||||||
|
|
||||||
# then extract the requested datasets
|
# then extract the requested datasets
|
||||||
datasets_ = datasets(results, by, x, y, dict(define))
|
datasets_ = datasets(results, by, x, y, define)
|
||||||
|
|
||||||
# what colors to use?
|
# what colors to use?
|
||||||
if colors is not None:
|
if colors is not None:
|
||||||
@@ -483,10 +504,7 @@ def main(csv_paths, *,
|
|||||||
else '%s ' % line_chars_[i % len(line_chars_)]
|
else '%s ' % line_chars_[i % len(line_chars_)]
|
||||||
if line_chars is not None
|
if line_chars is not None
|
||||||
else '',
|
else '',
|
||||||
','.join(k_ for i, k_ in enumerate(k)
|
','.join(k_ for k_ in k if k_))
|
||||||
if k_
|
|
||||||
if not (i == len(k)-2 and len(x) == 1)
|
|
||||||
if not (i == len(k)-1 and len(y) == 1)))
|
|
||||||
|
|
||||||
if label:
|
if label:
|
||||||
legend_.append(label)
|
legend_.append(label)
|
||||||
@@ -685,7 +703,7 @@ if __name__ == "__main__":
|
|||||||
'-b', '--by',
|
'-b', '--by',
|
||||||
type=lambda x: [x.strip() for x in x.split(',')],
|
type=lambda x: [x.strip() for x in x.split(',')],
|
||||||
help="Fields to render as separate plots. All other fields will be "
|
help="Fields to render as separate plots. All other fields will be "
|
||||||
"summed. Can rename fields with new_name=old_name.")
|
"summed as needed. Can rename fields with new_name=old_name.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-x',
|
'-x',
|
||||||
type=lambda x: [x.strip() for x in x.split(',')],
|
type=lambda x: [x.strip() for x in x.split(',')],
|
||||||
@@ -694,15 +712,14 @@ if __name__ == "__main__":
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-y',
|
'-y',
|
||||||
type=lambda x: [x.strip() for x in x.split(',')],
|
type=lambda x: [x.strip() for x in x.split(',')],
|
||||||
required=True,
|
|
||||||
help="Fields to use for the y-axis. Can rename fields with "
|
help="Fields to use for the y-axis. Can rename fields with "
|
||||||
"new_name=old_name.")
|
"new_name=old_name.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-D', '--define',
|
'-D', '--define',
|
||||||
type=lambda x: (lambda k, v: (k, set(v.split(','))))(*x.split('=', 1)),
|
type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
|
||||||
action='append',
|
action='append',
|
||||||
help="Only include rows where this field is this value (field=value). "
|
help="Only include rows where this field is this value. May include "
|
||||||
"May include comma-separated options.")
|
"comma-separated options.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--color',
|
'--color',
|
||||||
choices=['never', 'always', 'auto'],
|
choices=['never', 'always', 'auto'],
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ import collections as co
|
|||||||
import csv
|
import csv
|
||||||
import functools as ft
|
import functools as ft
|
||||||
import glob
|
import glob
|
||||||
|
import itertools as it
|
||||||
import math as m
|
import math as m
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@@ -23,31 +24,13 @@ import re
|
|||||||
|
|
||||||
CSV_PATHS = ['*.csv']
|
CSV_PATHS = ['*.csv']
|
||||||
|
|
||||||
# Defaults are common fields generated by other littlefs scripts
|
# supported merge operations
|
||||||
MERGES = {
|
OPS = {
|
||||||
'add': (
|
'add': lambda xs: sum(xs[1:], start=xs[0]),
|
||||||
['code_size', 'data_size', 'stack_frame', 'struct_size',
|
'mul': lambda xs: m.prod(xs[1:], start=xs[0]),
|
||||||
'coverage_lines', 'coverage_branches',
|
'min': min,
|
||||||
'test_passed',
|
'max': max,
|
||||||
'bench_read', 'bench_prog', 'bench_erased'],
|
'avg': lambda xs: sum(xs[1:], start=xs[0]) / len(xs),
|
||||||
lambda xs: sum(xs[1:], start=xs[0])
|
|
||||||
),
|
|
||||||
'mul': (
|
|
||||||
[],
|
|
||||||
lambda xs: m.prod(xs[1:], start=xs[0])
|
|
||||||
),
|
|
||||||
'min': (
|
|
||||||
[],
|
|
||||||
min
|
|
||||||
),
|
|
||||||
'max': (
|
|
||||||
['stack_limit', 'coverage_hits'],
|
|
||||||
max
|
|
||||||
),
|
|
||||||
'avg': (
|
|
||||||
[],
|
|
||||||
lambda xs: sum(xs[1:], start=xs[0]) / len(xs)
|
|
||||||
),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -273,112 +256,142 @@ class FracField(co.namedtuple('FracField', 'a,b')):
|
|||||||
def __truediv__(self, n):
|
def __truediv__(self, n):
|
||||||
return FracField(self.a / n, self.b / n)
|
return FracField(self.a / n, self.b / n)
|
||||||
|
|
||||||
|
# available types
|
||||||
|
TYPES = [IntField, FloatField, FracField]
|
||||||
|
|
||||||
|
|
||||||
def homogenize(results, *,
|
def homogenize(results, *,
|
||||||
|
by=None,
|
||||||
fields=None,
|
fields=None,
|
||||||
merges=None,
|
renames=[],
|
||||||
renames=None,
|
define={},
|
||||||
types=None,
|
types=None,
|
||||||
**_):
|
**_):
|
||||||
|
results = results.copy()
|
||||||
|
|
||||||
# rename fields?
|
# rename fields?
|
||||||
if renames is not None:
|
if renames:
|
||||||
|
for r in results:
|
||||||
|
# make a copy so renames can overlap
|
||||||
|
r_ = {}
|
||||||
|
for new_k, old_k in renames:
|
||||||
|
if old_k in r:
|
||||||
|
r_[new_k] = r[old_k]
|
||||||
|
r.update(r_)
|
||||||
|
|
||||||
|
# filter by matching defines
|
||||||
|
if define:
|
||||||
results_ = []
|
results_ = []
|
||||||
for r in results:
|
for r in results:
|
||||||
results_.append({renames.get(k, k): v for k, v in r.items()})
|
if all(k in r and r[k] in vs for k, vs in define):
|
||||||
|
results_.append(r)
|
||||||
results = results_
|
results = results_
|
||||||
|
|
||||||
# find all fields
|
# if fields not specified, try to guess from data
|
||||||
if not fields:
|
if fields is None:
|
||||||
fields = co.OrderedDict()
|
fields = co.OrderedDict()
|
||||||
for r in results:
|
for r in results:
|
||||||
# also remove None fields, these can get introduced by
|
for k, v in r.items():
|
||||||
# csv.DictReader when header and rows mismatch
|
if by is not None and k in by:
|
||||||
fields.update((k, v) for k, v in r.items() if k is not None)
|
continue
|
||||||
fields = list(fields.keys())
|
types_ = []
|
||||||
|
for type in fields.get(k, TYPES):
|
||||||
|
try:
|
||||||
|
type(v)
|
||||||
|
types_.append(type)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
fields[k] = types_
|
||||||
|
fields = list(k for k,v in fields.items() if v)
|
||||||
|
|
||||||
|
# infer 'by' fields?
|
||||||
|
if by is None:
|
||||||
|
by = co.OrderedDict()
|
||||||
|
for r in results:
|
||||||
|
# also ignore None keys, these are introduced by csv.DictReader
|
||||||
|
# when header + row mismatch
|
||||||
|
by.update((k, True) for k in r.keys()
|
||||||
|
if k is not None
|
||||||
|
and k not in fields
|
||||||
|
and not any(k == old_k for _, old_k in renames))
|
||||||
|
by = list(by.keys())
|
||||||
|
|
||||||
# go ahead and clean up none values, these can have a few forms
|
# go ahead and clean up none values, these can have a few forms
|
||||||
results_ = []
|
results_ = []
|
||||||
for r in results:
|
for r in results:
|
||||||
results_.append({
|
results_.append({
|
||||||
k: r[k] for k in fields
|
k: r[k] for k in it.chain(by, fields)
|
||||||
if r.get(k) is not None and not(
|
if r.get(k) is not None and not (
|
||||||
isinstance(r[k], str)
|
isinstance(r[k], str)
|
||||||
and re.match('^\s*[+-]?\s*$', r[k]))})
|
and re.match('^\s*[+-]?\s*$', r[k]))})
|
||||||
|
results = results_
|
||||||
|
|
||||||
# find best type for all fields
|
# find best type for all fields
|
||||||
def try_(x, type):
|
|
||||||
try:
|
|
||||||
type(x)
|
|
||||||
return True
|
|
||||||
except ValueError:
|
|
||||||
return False
|
|
||||||
|
|
||||||
if types is None:
|
if types is None:
|
||||||
|
def is_type(x, type):
|
||||||
|
try:
|
||||||
|
type(x)
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
types = {}
|
types = {}
|
||||||
for k in fields:
|
for k in fields:
|
||||||
if merges is not None and merges.get(k):
|
for type in TYPES:
|
||||||
for type in [IntField, FloatField, FracField]:
|
if all(k not in r or is_type(r[k], type) for r in results_):
|
||||||
if all(k not in r or try_(r[k], type) for r in results_):
|
types[k] = type
|
||||||
types[k] = type
|
break
|
||||||
break
|
else:
|
||||||
else:
|
print("no type matches field %r?" % k)
|
||||||
print("no type matches field %r?" % k)
|
sys.exit(-1)
|
||||||
sys.exit(-1)
|
|
||||||
|
|
||||||
# homogenize types
|
# homogenize types
|
||||||
for k in fields:
|
for r in results:
|
||||||
if k in types:
|
for k in fields:
|
||||||
for r in results_:
|
if k in r:
|
||||||
if k in r:
|
r[k] = types[k](r[k])
|
||||||
r[k] = types[k](r[k])
|
|
||||||
|
|
||||||
return fields, types, results_
|
return by, fields, types, results
|
||||||
|
|
||||||
|
|
||||||
def fold(results, *,
|
def fold(results, *,
|
||||||
fields=None,
|
by=[],
|
||||||
merges=None,
|
fields=[],
|
||||||
by=None,
|
ops={},
|
||||||
**_):
|
**_):
|
||||||
folding = co.OrderedDict()
|
folding = co.OrderedDict()
|
||||||
if by is None:
|
|
||||||
by = [k for k in fields if k not in merges]
|
|
||||||
|
|
||||||
for r in results:
|
for r in results:
|
||||||
name = tuple(r.get(k) for k in by)
|
name = tuple(r.get(k, '') for k in by)
|
||||||
if name not in folding:
|
if name not in folding:
|
||||||
folding[name] = {k: [] for k in fields if k in merges}
|
folding[name] = {k: [] for k in fields}
|
||||||
for k in fields:
|
for k in fields:
|
||||||
# drop all fields fields without a type
|
if k in r:
|
||||||
if k in merges and k in r:
|
|
||||||
folding[name][k].append(r[k])
|
folding[name][k].append(r[k])
|
||||||
|
|
||||||
# merge fields, we need the count at this point for averages
|
# merge fields, we need the count at this point for averages
|
||||||
folded = []
|
folded = []
|
||||||
types = {}
|
|
||||||
for name, r in folding.items():
|
for name, r in folding.items():
|
||||||
r_ = {}
|
r_ = {}
|
||||||
for k, vs in r.items():
|
for k, vs in r.items():
|
||||||
if vs:
|
if vs:
|
||||||
_, merge = MERGES[merges[k]]
|
# sum fields by default
|
||||||
r_[k] = merge(vs)
|
op = OPS[ops.get(k, 'add')]
|
||||||
|
r_[k] = op(vs)
|
||||||
|
|
||||||
# drop all rows without any fields
|
# drop any rows without fields and any empty keys
|
||||||
# and drop all empty keys
|
|
||||||
if r_:
|
if r_:
|
||||||
folded.append(dict(
|
folded.append(dict(
|
||||||
{k: n for k, n in zip(by, name) if n},
|
{k: v for k, v in zip(by, name) if v},
|
||||||
**r_))
|
**r_))
|
||||||
|
|
||||||
fields_ = by + [k for k in fields if k in merges]
|
return folded
|
||||||
return fields_, folded
|
|
||||||
|
|
||||||
|
|
||||||
def table(results, diff_results=None, *,
|
def table(results, diff_results=None, *,
|
||||||
|
by=None,
|
||||||
fields=None,
|
fields=None,
|
||||||
types=None,
|
types=None,
|
||||||
merges=None,
|
ops=None,
|
||||||
by=None,
|
|
||||||
sort=None,
|
sort=None,
|
||||||
reverse_sort=None,
|
reverse_sort=None,
|
||||||
summary=False,
|
summary=False,
|
||||||
@@ -387,29 +400,18 @@ def table(results, diff_results=None, *,
|
|||||||
**_):
|
**_):
|
||||||
all_, all = all, __builtins__.all
|
all_, all = all, __builtins__.all
|
||||||
|
|
||||||
# fold
|
table = {tuple(r.get(k,'') for k in by): r for r in results}
|
||||||
if by is not None:
|
diff_table = {tuple(r.get(k,'') for k in by): r for r in diff_results or []}
|
||||||
fields, results = fold(results, fields=fields, merges=merges, by=by)
|
|
||||||
if diff_results is not None:
|
|
||||||
_, diff_results = fold(diff_results,
|
|
||||||
fields=fields, merges=merges, by=by)
|
|
||||||
|
|
||||||
table = {
|
|
||||||
tuple(r.get(k,'') for k in fields if k not in merges): r
|
|
||||||
for r in results}
|
|
||||||
diff_table = {
|
|
||||||
tuple(r.get(k,'') for k in fields if k not in merges): r
|
|
||||||
for r in diff_results or []}
|
|
||||||
|
|
||||||
# sort, note that python's sort is stable
|
# sort, note that python's sort is stable
|
||||||
names = list(table.keys() | diff_table.keys())
|
names = list(table.keys() | diff_table.keys())
|
||||||
names.sort()
|
names.sort()
|
||||||
if diff_results is not None:
|
if diff_results is not None:
|
||||||
names.sort(key=lambda n: [
|
names.sort(key=lambda n: tuple(
|
||||||
-types[k].ratio(
|
-types[k].ratio(
|
||||||
table.get(n,{}).get(k),
|
table.get(n,{}).get(k),
|
||||||
diff_table.get(n,{}).get(k))
|
diff_table.get(n,{}).get(k))
|
||||||
for k in fields if k in merges])
|
for k in fields))
|
||||||
if sort:
|
if sort:
|
||||||
names.sort(key=lambda n: tuple(
|
names.sort(key=lambda n: tuple(
|
||||||
(table[n][k],) if k in table.get(n,{}) else ()
|
(table[n][k],) if k in table.get(n,{}) else ()
|
||||||
@@ -423,7 +425,7 @@ def table(results, diff_results=None, *,
|
|||||||
|
|
||||||
# print header
|
# print header
|
||||||
print('%-36s' % ('%s%s' % (
|
print('%-36s' % ('%s%s' % (
|
||||||
','.join(k for k in fields if k not in merges),
|
','.join(k for k in by),
|
||||||
' (%d added, %d removed)' % (
|
' (%d added, %d removed)' % (
|
||||||
sum(1 for n in table if n not in diff_table),
|
sum(1 for n in table if n not in diff_table),
|
||||||
sum(1 for n in diff_table if n not in table))
|
sum(1 for n in diff_table if n not in table))
|
||||||
@@ -433,19 +435,19 @@ def table(results, diff_results=None, *,
|
|||||||
if diff_results is None:
|
if diff_results is None:
|
||||||
print(' %s' % (
|
print(' %s' % (
|
||||||
' '.join(k.rjust(len(types[k].none))
|
' '.join(k.rjust(len(types[k].none))
|
||||||
for k in fields if k in merges)))
|
for k in fields)))
|
||||||
elif percent:
|
elif percent:
|
||||||
print(' %s' % (
|
print(' %s' % (
|
||||||
' '.join(k.rjust(len(types[k].diff_none))
|
' '.join(k.rjust(len(types[k].diff_none))
|
||||||
for k in fields if k in merges)))
|
for k in fields)))
|
||||||
else:
|
else:
|
||||||
print(' %s %s %s' % (
|
print(' %s %s %s' % (
|
||||||
' '.join(('o'+k).rjust(len(types[k].diff_none))
|
' '.join(('o'+k).rjust(len(types[k].diff_none))
|
||||||
for k in fields if k in merges),
|
for k in fields),
|
||||||
' '.join(('n'+k).rjust(len(types[k].diff_none))
|
' '.join(('n'+k).rjust(len(types[k].diff_none))
|
||||||
for k in fields if k in merges),
|
for k in fields),
|
||||||
' '.join(('d'+k).rjust(len(types[k].diff_none))
|
' '.join(('d'+k).rjust(len(types[k].diff_none))
|
||||||
for k in fields if k in merges)))
|
for k in fields)))
|
||||||
|
|
||||||
# print entries
|
# print entries
|
||||||
if not summary:
|
if not summary:
|
||||||
@@ -454,7 +456,7 @@ def table(results, diff_results=None, *,
|
|||||||
if diff_results is not None:
|
if diff_results is not None:
|
||||||
diff_r = diff_table.get(name, {})
|
diff_r = diff_table.get(name, {})
|
||||||
ratios = [types[k].ratio(r.get(k), diff_r.get(k))
|
ratios = [types[k].ratio(r.get(k), diff_r.get(k))
|
||||||
for k in fields if k in merges]
|
for k in fields]
|
||||||
if not any(ratios) and not all_:
|
if not any(ratios) and not all_:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -463,12 +465,12 @@ def table(results, diff_results=None, *,
|
|||||||
print(' %s' % (
|
print(' %s' % (
|
||||||
' '.join(r[k].table()
|
' '.join(r[k].table()
|
||||||
if k in r else types[k].none
|
if k in r else types[k].none
|
||||||
for k in fields if k in merges)))
|
for k in fields)))
|
||||||
elif percent:
|
elif percent:
|
||||||
print(' %s%s' % (
|
print(' %s%s' % (
|
||||||
' '.join(r[k].diff_table()
|
' '.join(r[k].diff_table()
|
||||||
if k in r else types[k].diff_none
|
if k in r else types[k].diff_none
|
||||||
for k in fields if k in merges),
|
for k in fields),
|
||||||
' (%s)' % ', '.join(
|
' (%s)' % ', '.join(
|
||||||
'+∞%' if t == float('+inf')
|
'+∞%' if t == float('+inf')
|
||||||
else '-∞%' if t == float('-inf')
|
else '-∞%' if t == float('-inf')
|
||||||
@@ -478,13 +480,13 @@ def table(results, diff_results=None, *,
|
|||||||
print(' %s %s %s%s' % (
|
print(' %s %s %s%s' % (
|
||||||
' '.join(diff_r[k].diff_table()
|
' '.join(diff_r[k].diff_table()
|
||||||
if k in diff_r else types[k].diff_none
|
if k in diff_r else types[k].diff_none
|
||||||
for k in fields if k in merges),
|
for k in fields),
|
||||||
' '.join(r[k].diff_table()
|
' '.join(r[k].diff_table()
|
||||||
if k in r else types[k].diff_none
|
if k in r else types[k].diff_none
|
||||||
for k in fields if k in merges),
|
for k in fields),
|
||||||
' '.join(types[k].diff_diff(r.get(k), diff_r.get(k))
|
' '.join(types[k].diff_diff(r.get(k), diff_r.get(k))
|
||||||
if k in r or k in diff_r else types[k].diff_none
|
if k in r or k in diff_r else types[k].diff_none
|
||||||
for k in fields if k in merges),
|
for k in fields),
|
||||||
' (%s)' % ', '.join(
|
' (%s)' % ', '.join(
|
||||||
'+∞%' if t == float('+inf')
|
'+∞%' if t == float('+inf')
|
||||||
else '-∞%' if t == float('-inf')
|
else '-∞%' if t == float('-inf')
|
||||||
@@ -494,26 +496,25 @@ def table(results, diff_results=None, *,
|
|||||||
if any(ratios) else ''))
|
if any(ratios) else ''))
|
||||||
|
|
||||||
# print total
|
# print total
|
||||||
_, total = fold(results, fields=fields, merges=merges, by=[])
|
total = fold(results, by=[], fields=fields, ops=ops)
|
||||||
r = total[0] if total else {}
|
r = total[0] if total else {}
|
||||||
if diff_results is not None:
|
if diff_results is not None:
|
||||||
_, diff_total = fold(diff_results,
|
diff_total = fold(diff_results, by=[], fields=fields, ops=ops)
|
||||||
fields=fields, merges=merges, by=[])
|
|
||||||
diff_r = diff_total[0] if diff_total else {}
|
diff_r = diff_total[0] if diff_total else {}
|
||||||
ratios = [types[k].ratio(r.get(k), diff_r.get(k))
|
ratios = [types[k].ratio(r.get(k), diff_r.get(k))
|
||||||
for k in fields if k in merges]
|
for k in fields]
|
||||||
|
|
||||||
print('%-36s' % 'TOTAL', end='')
|
print('%-36s' % 'TOTAL', end='')
|
||||||
if diff_results is None:
|
if diff_results is None:
|
||||||
print(' %s' % (
|
print(' %s' % (
|
||||||
' '.join(r[k].table()
|
' '.join(r[k].table()
|
||||||
if k in r else types[k].none
|
if k in r else types[k].none
|
||||||
for k in fields if k in merges)))
|
for k in fields)))
|
||||||
elif percent:
|
elif percent:
|
||||||
print(' %s%s' % (
|
print(' %s%s' % (
|
||||||
' '.join(r[k].diff_table()
|
' '.join(r[k].diff_table()
|
||||||
if k in r else types[k].diff_none
|
if k in r else types[k].diff_none
|
||||||
for k in fields if k in merges),
|
for k in fields),
|
||||||
' (%s)' % ', '.join(
|
' (%s)' % ', '.join(
|
||||||
'+∞%' if t == float('+inf')
|
'+∞%' if t == float('+inf')
|
||||||
else '-∞%' if t == float('-inf')
|
else '-∞%' if t == float('-inf')
|
||||||
@@ -523,13 +524,13 @@ def table(results, diff_results=None, *,
|
|||||||
print(' %s %s %s%s' % (
|
print(' %s %s %s%s' % (
|
||||||
' '.join(diff_r[k].diff_table()
|
' '.join(diff_r[k].diff_table()
|
||||||
if k in diff_r else types[k].diff_none
|
if k in diff_r else types[k].diff_none
|
||||||
for k in fields if k in merges),
|
for k in fields),
|
||||||
' '.join(r[k].diff_table()
|
' '.join(r[k].diff_table()
|
||||||
if k in r else types[k].diff_none
|
if k in r else types[k].diff_none
|
||||||
for k in fields if k in merges),
|
for k in fields),
|
||||||
' '.join(types[k].diff_diff(r.get(k), diff_r.get(k))
|
' '.join(types[k].diff_diff(r.get(k), diff_r.get(k))
|
||||||
if k in r or k in diff_r else types[k].diff_none
|
if k in r or k in diff_r else types[k].diff_none
|
||||||
for k in fields if k in merges),
|
for k in fields),
|
||||||
' (%s)' % ', '.join(
|
' (%s)' % ', '.join(
|
||||||
'+∞%' if t == float('+inf')
|
'+∞%' if t == float('+inf')
|
||||||
else '-∞%' if t == float('-inf')
|
else '-∞%' if t == float('-inf')
|
||||||
@@ -539,56 +540,35 @@ def table(results, diff_results=None, *,
|
|||||||
if any(ratios) else ''))
|
if any(ratios) else ''))
|
||||||
|
|
||||||
|
|
||||||
def main(csv_paths, *, fields=None, by=None, **args):
|
def main(csv_paths, *,
|
||||||
# figure out what fields to use
|
by=None,
|
||||||
renames = {}
|
fields=None,
|
||||||
|
define=[],
|
||||||
|
**args):
|
||||||
|
# separate out renames
|
||||||
|
renames = [k.split('=', 1)
|
||||||
|
for k in it.chain(by or [], fields or [])
|
||||||
|
if '=' in k]
|
||||||
|
if by is not None:
|
||||||
|
by = [k.split('=', 1)[0] for k in by]
|
||||||
if fields is not None:
|
if fields is not None:
|
||||||
fields_ = []
|
fields = [k.split('=', 1)[0] for k in fields]
|
||||||
for name in fields:
|
|
||||||
if '=' in name:
|
|
||||||
a, b = name.split('=', 1)
|
|
||||||
renames[b] = a
|
|
||||||
name = a
|
|
||||||
fields_.append(name)
|
|
||||||
fields = fields_
|
|
||||||
|
|
||||||
if by is not None:
|
# figure out merge operations
|
||||||
by_ = []
|
ops = {}
|
||||||
for name in by:
|
for m in OPS.keys():
|
||||||
if '=' in name:
|
for k in args.get(m, []):
|
||||||
a, b = name.split('=', 1)
|
if k in ops:
|
||||||
renames[b] = a
|
print("conflicting op for field %r?" % k)
|
||||||
name = a
|
|
||||||
by_.append(name)
|
|
||||||
by = by_
|
|
||||||
|
|
||||||
# include 'by' fields in fields, it doesn't make sense to not
|
|
||||||
if fields is not None and by is not None:
|
|
||||||
fields[:0] = [k for k in by if k not in fields]
|
|
||||||
|
|
||||||
# use preconfigured merge operations unless any merge operation is
|
|
||||||
# explictly specified
|
|
||||||
merge_args = (args
|
|
||||||
if any(args.get(m) for m in MERGES.keys())
|
|
||||||
else {m: k for m, (k, _) in MERGES.items()})
|
|
||||||
merges = {}
|
|
||||||
for m in MERGES.keys():
|
|
||||||
for k in merge_args.get(m, []):
|
|
||||||
if k in merges:
|
|
||||||
print("conflicting merge type for field %r?" % k)
|
|
||||||
sys.exit(-1)
|
sys.exit(-1)
|
||||||
merges[k] = m
|
ops[k] = m
|
||||||
# allow renames to apply to merges
|
# rename ops?
|
||||||
for m in MERGES.keys():
|
if renames:
|
||||||
for k in merge_args.get(m, []):
|
ops_ = {}
|
||||||
if renames.get(k, k) not in merges:
|
for new_k, old_k in renames:
|
||||||
merges[renames.get(k, k)] = m
|
if old_k in ops:
|
||||||
# ignore merges that conflict with 'by' fields
|
ops_[new_k] = ops[old_k]
|
||||||
if by is not None:
|
ops.update(ops_)
|
||||||
for k in by:
|
|
||||||
if k in merges:
|
|
||||||
del merges[k]
|
|
||||||
|
|
||||||
# find CSV files
|
# find CSV files
|
||||||
paths = []
|
paths = []
|
||||||
@@ -614,17 +594,17 @@ def main(csv_paths, *, fields=None, by=None, **args):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
# homogenize
|
# homogenize
|
||||||
fields, types, results = homogenize(results,
|
by, fields, types, results = homogenize(results,
|
||||||
fields=fields, merges=merges, renames=renames)
|
by=by, fields=fields, renames=renames, define=define)
|
||||||
|
|
||||||
# fold to remove duplicates
|
# fold to remove duplicates
|
||||||
fields, results = fold(results,
|
results = fold(results,
|
||||||
fields=fields, merges=merges)
|
by=by, fields=fields, ops=ops)
|
||||||
|
|
||||||
# write results to CSV
|
# write results to CSV
|
||||||
if args.get('output'):
|
if args.get('output'):
|
||||||
with openio(args['output'], 'w') as f:
|
with openio(args['output'], 'w') as f:
|
||||||
writer = csv.DictWriter(f, fields)
|
writer = csv.DictWriter(f, by + fields)
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
for r in results:
|
for r in results:
|
||||||
writer.writerow(r)
|
writer.writerow(r)
|
||||||
@@ -641,22 +621,22 @@ def main(csv_paths, *, fields=None, by=None, **args):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
# homogenize
|
# homogenize
|
||||||
_, _, diff_results = homogenize(diff_results,
|
_, _, _, diff_results = homogenize(diff_results,
|
||||||
fields=fields, merges=merges, renames=renames, types=types)
|
by=by, fields=fields, renames=renames, define=define, types=types)
|
||||||
|
|
||||||
# fold to remove duplicates
|
# fold to remove duplicates
|
||||||
_, diff_results = fold(diff_results,
|
diff_results = fold(diff_results,
|
||||||
fields=fields, merges=merges)
|
by=by, fields=fields, ops=ops)
|
||||||
|
|
||||||
# print table
|
# print table
|
||||||
if not args.get('quiet'):
|
if not args.get('quiet'):
|
||||||
table(
|
table(
|
||||||
results,
|
results,
|
||||||
diff_results if args.get('diff') else None,
|
diff_results if args.get('diff') else None,
|
||||||
fields=fields,
|
|
||||||
types=types,
|
|
||||||
merges=merges,
|
|
||||||
by=by,
|
by=by,
|
||||||
|
fields=fields,
|
||||||
|
ops=ops,
|
||||||
|
types=types,
|
||||||
**args)
|
**args)
|
||||||
|
|
||||||
|
|
||||||
@@ -689,36 +669,41 @@ if __name__ == "__main__":
|
|||||||
'-p', '--percent',
|
'-p', '--percent',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help="Only show percentage change, not a full diff.")
|
help="Only show percentage change, not a full diff.")
|
||||||
parser.add_argument(
|
|
||||||
'-f', '--fields',
|
|
||||||
type=lambda x: [x.strip() for x in x.split(',')],
|
|
||||||
help="Only show these fields. Can rename fields "
|
|
||||||
"with new_name=old_name.")
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-b', '--by',
|
'-b', '--by',
|
||||||
type=lambda x: [x.strip() for x in x.split(',')],
|
type=lambda x: [x.strip() for x in x.split(',')],
|
||||||
help="Group by these fields. Can rename fields "
|
help="Group by these fields. All other fields will be merged as "
|
||||||
"with new_name=old_name.")
|
"needed. Can rename fields with new_name=old_name.")
|
||||||
|
parser.add_argument(
|
||||||
|
'-f', '--fields',
|
||||||
|
type=lambda x: [x.strip() for x in x.split(',')],
|
||||||
|
help="Use these fields. Can rename fields with new_name=old_name.")
|
||||||
|
parser.add_argument(
|
||||||
|
'-D', '--define',
|
||||||
|
type=lambda x: (lambda k,v: (k, set(v.split(','))))(*x.split('=', 1)),
|
||||||
|
action='append',
|
||||||
|
help="Only include rows where this field is this value. May include "
|
||||||
|
"comma-separated options.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--add',
|
'--add',
|
||||||
type=lambda x: [x.strip() for x in x.split(',')],
|
type=lambda x: [x.strip() for x in x.split(',')],
|
||||||
help="Add these fields when merging.")
|
help="Add these fields (the default).")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--mul',
|
'--mul',
|
||||||
type=lambda x: [x.strip() for x in x.split(',')],
|
type=lambda x: [x.strip() for x in x.split(',')],
|
||||||
help="Multiply these fields when merging.")
|
help="Multiply these fields.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--min',
|
'--min',
|
||||||
type=lambda x: [x.strip() for x in x.split(',')],
|
type=lambda x: [x.strip() for x in x.split(',')],
|
||||||
help="Take the minimum of these fields when merging.")
|
help="Take the minimum of these fields.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--max',
|
'--max',
|
||||||
type=lambda x: [x.strip() for x in x.split(',')],
|
type=lambda x: [x.strip() for x in x.split(',')],
|
||||||
help="Take the maximum of these fields when merging.")
|
help="Take the maximum of these fields.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--avg',
|
'--avg',
|
||||||
type=lambda x: [x.strip() for x in x.split(',')],
|
type=lambda x: [x.strip() for x in x.split(',')],
|
||||||
help="Average these fields when merging.")
|
help="Average these fields.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-s', '--sort',
|
'-s', '--sort',
|
||||||
type=lambda x: [x.strip() for x in x.split(',')],
|
type=lambda x: [x.strip() for x in x.split(',')],
|
||||||
|
|||||||
Reference in New Issue
Block a user