mirror of
https://github.com/littlefs-project/littlefs.git
synced 2025-12-08 00:22:44 +00:00
This is a work-in-progress, but the general idea is to replace the
existing rename mechanic in csv.py with a full expr parser:
$ ./scripts/csv.py input.csv -ba=x -fb=y+z
I've been putting this off for a while, as it feels like too big a jump
in complexity for what was intended to be a simple script. But
complexity is a bit funny in programming. Even if a full parser is more
difficult to implement, if it's the right grammar for the job, the
resulting script should end up both easier to understand and easier to
extend.
The original intention was that any sufficiently complicated math could
be implemented in ad-hoc Python scripts that operate directly on the CSV
files, but CSV parsing in Python is annoying enough that this never
really worked well.
But I'm probably overselling the complexity. This is classic CS stuff:
1. build a syntax tree
2. map symbols to input fields
3. typecheck, fold, eval, etc
One neat thing is that in addition to providing type and eval
information, our exprs can also provide information on how to "fold" the
field after eval. This kicks in when merging muliple rows when grouping
by -b/--by, and for finding the TOTAL results.
This can be used to merge stack results correctly with max:
$ ./scripts/csv.py stack.csv \
-fframe='sum(frame)' -flimit='max(limit)'
Or can be used to find other interesting measurements:
$ ./scripts/csv.py stack.csv \
-favg='avg(frame)' -fstddev='stddev(frame)'
These changes also make the eval order of input/output fields much
stricter which is probably a good thing.
This should replace all of the somewhat hacky fake-expr flags in csv.py:
- --int => -fa='int(b)'
- --float => -fa='float(b)'
- --frac => -fa='frac(b)'
- --sum => -fa='sum(b)'
- --prod => -fa='prod(b)'
- --min => -fa='min(b)'
- --max => -fa='max(b)'
- --avg => -fa='avg(b)'
- --stddev => -fa='stddev(b)'
- --gmean => -fa='gmean(b)'
- --gstddev => -fa='gstddev(b)'
If you squint you might be able to see a pattern.
1325 lines
39 KiB
Python
Executable File
1325 lines
39 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
#
|
|
# Script to manipulate CSV files.
|
|
#
|
|
# Example:
|
|
# ./scripts/code.py lfs.o lfs_util.o -q -o lfs.code.csv
|
|
# ./scripts/data.py lfs.o lfs_util.o -q -o lfs.data.csv
|
|
# ./scripts/csv.py lfs.code.csv lfs.data.csv -q -o lfs.csv
|
|
# ./scripts/csv.py -Y lfs.csv -f code=code_size,data=data_size
|
|
#
|
|
# Copyright (c) 2022, The littlefs authors.
|
|
# SPDX-License-Identifier: BSD-3-Clause
|
|
#
|
|
|
|
# prevent local imports
|
|
__import__('sys').path.pop(0)
|
|
|
|
import collections as co
|
|
import csv
|
|
import functools as ft
|
|
import itertools as it
|
|
import math as mt
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
|
|
# various field types
|
|
|
|
# integer fields
|
|
class RInt(co.namedtuple('RInt', 'x')):
|
|
__slots__ = ()
|
|
def __new__(cls, x=0):
|
|
if isinstance(x, RInt):
|
|
return x
|
|
if isinstance(x, str):
|
|
try:
|
|
x = int(x, 0)
|
|
except ValueError:
|
|
# also accept +-∞ and +-inf
|
|
if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
|
|
x = mt.inf
|
|
elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
|
|
x = -mt.inf
|
|
else:
|
|
raise
|
|
assert isinstance(x, int) or mt.isinf(x), x
|
|
return super().__new__(cls, x)
|
|
|
|
def __str__(self):
|
|
if self.x == mt.inf:
|
|
return '∞'
|
|
elif self.x == -mt.inf:
|
|
return '-∞'
|
|
else:
|
|
return str(self.x)
|
|
|
|
def __int__(self):
|
|
assert not mt.isinf(self.x)
|
|
return self.x
|
|
|
|
def __float__(self):
|
|
return float(self.x)
|
|
|
|
none = '%7s' % '-'
|
|
def table(self):
|
|
return '%7s' % (self,)
|
|
|
|
def diff(self, other):
|
|
new = self.x if self else 0
|
|
old = other.x if other else 0
|
|
diff = new - old
|
|
if diff == +mt.inf:
|
|
return '%7s' % '+∞'
|
|
elif diff == -mt.inf:
|
|
return '%7s' % '-∞'
|
|
else:
|
|
return '%+7d' % diff
|
|
|
|
def ratio(self, other):
|
|
new = self.x if self else 0
|
|
old = other.x if other else 0
|
|
if mt.isinf(new) and mt.isinf(old):
|
|
return 0.0
|
|
elif mt.isinf(new):
|
|
return +mt.inf
|
|
elif mt.isinf(old):
|
|
return -mt.inf
|
|
elif not old and not new:
|
|
return 0.0
|
|
elif not old:
|
|
return +mt.inf
|
|
else:
|
|
return (new-old) / old
|
|
|
|
def __add__(self, other):
|
|
return self.__class__(self.x + other.x)
|
|
|
|
def __sub__(self, other):
|
|
return self.__class__(self.x - other.x)
|
|
|
|
def __mul__(self, other):
|
|
return self.__class__(self.x * other.x)
|
|
|
|
# float fields
|
|
class RFloat(co.namedtuple('RFloat', 'x')):
|
|
__slots__ = ()
|
|
def __new__(cls, x=0.0):
|
|
if isinstance(x, RFloat):
|
|
return x
|
|
if isinstance(x, str):
|
|
try:
|
|
x = float(x)
|
|
except ValueError:
|
|
# also accept +-∞ and +-inf
|
|
if re.match('^\s*\+?\s*(?:∞|inf)\s*$', x):
|
|
x = mt.inf
|
|
elif re.match('^\s*-\s*(?:∞|inf)\s*$', x):
|
|
x = -mt.inf
|
|
else:
|
|
raise
|
|
assert isinstance(x, float), x
|
|
return super().__new__(cls, x)
|
|
|
|
def __str__(self):
|
|
if self.x == mt.inf:
|
|
return '∞'
|
|
elif self.x == -mt.inf:
|
|
return '-∞'
|
|
else:
|
|
return '%.1f' % self.x
|
|
|
|
def __float__(self):
|
|
return float(self.x)
|
|
|
|
none = RInt.none
|
|
table = RInt.table
|
|
|
|
def diff(self, other):
|
|
new = self.x if self else 0
|
|
old = other.x if other else 0
|
|
diff = new - old
|
|
if diff == +mt.inf:
|
|
return '%7s' % '+∞'
|
|
elif diff == -mt.inf:
|
|
return '%7s' % '-∞'
|
|
else:
|
|
return '%+7.1f' % diff
|
|
|
|
ratio = RInt.ratio
|
|
__add__ = RInt.__add__
|
|
__sub__ = RInt.__sub__
|
|
__mul__ = RInt.__mul__
|
|
|
|
# fractional fields, a/b
|
|
class RFrac(co.namedtuple('RFrac', 'a,b')):
|
|
__slots__ = ()
|
|
def __new__(cls, a=0, b=None):
|
|
if isinstance(a, RFrac) and b is None:
|
|
return a
|
|
if isinstance(a, str) and b is None:
|
|
a, b = a.split('/', 1)
|
|
if b is None:
|
|
b = a
|
|
return super().__new__(cls, RInt(a), RInt(b))
|
|
|
|
def __str__(self):
|
|
return '%s/%s' % (self.a, self.b)
|
|
|
|
def __float__(self):
|
|
return float(self.a)
|
|
|
|
none = '%11s' % '-'
|
|
def table(self):
|
|
return '%11s' % (self,)
|
|
|
|
def notes(self):
|
|
t = self.a.x/self.b.x if self.b.x else 1.0
|
|
return ['∞%' if t == +mt.inf
|
|
else '-∞%' if t == -mt.inf
|
|
else '%.1f%%' % (100*t)]
|
|
|
|
def diff(self, other):
|
|
new_a, new_b = self if self else (RInt(0), RInt(0))
|
|
old_a, old_b = other if other else (RInt(0), RInt(0))
|
|
return '%11s' % ('%s/%s' % (
|
|
new_a.diff(old_a).strip(),
|
|
new_b.diff(old_b).strip()))
|
|
|
|
def ratio(self, other):
|
|
new_a, new_b = self if self else (RInt(0), RInt(0))
|
|
old_a, old_b = other if other else (RInt(0), RInt(0))
|
|
new = new_a.x/new_b.x if new_b.x else 1.0
|
|
old = old_a.x/old_b.x if old_b.x else 1.0
|
|
return new - old
|
|
|
|
def __add__(self, other):
|
|
return self.__class__(self.a + other.a, self.b + other.b)
|
|
|
|
def __sub__(self, other):
|
|
return self.__class__(self.a - other.a, self.b - other.b)
|
|
|
|
def __mul__(self, other):
|
|
return self.__class__(self.a * other.a, self.b + other.b)
|
|
|
|
def __eq__(self, other):
|
|
self_a, self_b = self if self.b.x else (RInt(1), RInt(1))
|
|
other_a, other_b = other if other.b.x else (RInt(1), RInt(1))
|
|
return self_a * other_b == other_a * self_b
|
|
|
|
def __ne__(self, other):
|
|
return not self.__eq__(other)
|
|
|
|
def __lt__(self, other):
|
|
self_a, self_b = self if self.b.x else (RInt(1), RInt(1))
|
|
other_a, other_b = other if other.b.x else (RInt(1), RInt(1))
|
|
return self_a * other_b < other_a * self_b
|
|
|
|
def __gt__(self, other):
|
|
return self.__class__.__lt__(other, self)
|
|
|
|
def __le__(self, other):
|
|
return not self.__gt__(other)
|
|
|
|
def __ge__(self, other):
|
|
return not self.__lt__(other)
|
|
|
|
|
|
# various fold operations
|
|
class RSum:
|
|
def __call__(self, xs):
|
|
return sum(xs[1:], start=xs[0])
|
|
|
|
class RProd:
|
|
def __call__(self, xs):
|
|
return mt.prod(xs[1:], start=xs[0])
|
|
|
|
class RMin:
|
|
def __call__(self, xs):
|
|
return min(xs)
|
|
|
|
class RMax:
|
|
def __call__(self, xs):
|
|
return max(xs)
|
|
|
|
class RAvg:
|
|
def __call__(self, xs):
|
|
return RFloat(sum(float(x) for x in xs) / len(xs))
|
|
|
|
class RStddev:
|
|
def __call__(self, xs):
|
|
avg = sum(float(x) for x in xs) / len(xs)
|
|
return RFloat(mt.sqrt(sum((float(x) - avg)**2 for x in xs) / len(xs)))
|
|
|
|
class RGMean:
|
|
def __call__(self, xs):
|
|
return RFloat(mt.prod(float(x) for x in xs)**(1/len(xs)))
|
|
|
|
class RGStddev:
|
|
def __call__(self, xs):
|
|
gmean = mt.prod(float(x) for x in xs)**(1/len(xs))
|
|
return RFloat(
|
|
mt.exp(mt.sqrt(
|
|
sum(mt.log(float(x)/gmean)**2 for x in xs) / len(xs)))
|
|
if gmean else mt.inf)
|
|
|
|
|
|
# a lazily-evaluated field expression
|
|
class RExpr:
|
|
# expr parsing/typechecking/etc errors
|
|
class Error(Exception):
|
|
def __init__(self, reason):
|
|
self.reason = reason
|
|
|
|
# expr nodes
|
|
class Expr:
|
|
def __init__(self, *args):
|
|
for k, v in zip('abcdefghijklmnopqrstuvwxyz', args):
|
|
setattr(self, k, v)
|
|
|
|
def __repr__(self):
|
|
return '%s(%s)' % (
|
|
self.__class__.__name__,
|
|
','.join(
|
|
repr(getattr(self, k))
|
|
for k in it.takewhile(
|
|
lambda k: hasattr(self, k),
|
|
'abcdefghijklmnopqrstuvwxyz')))
|
|
|
|
def fields(self):
|
|
return set(it.chain.from_iterable(
|
|
getattr(self, k).fields()
|
|
for k in it.takewhile(
|
|
lambda k: hasattr(self, k),
|
|
'abcdefghijklmnopqrstuvwxyz')))
|
|
|
|
def type(self, types={}):
|
|
return self.a.type(types)
|
|
|
|
def fold(self, types={}):
|
|
return self.a.fold(types)
|
|
|
|
def eval(self, fields={}):
|
|
return self.a.eval(fields)
|
|
|
|
class Field(Expr):
|
|
def fields(self):
|
|
return {self.a}
|
|
|
|
def type(self, types={}):
|
|
if self.a not in types:
|
|
raise RExpr.Error("untyped field? %s" % self.a)
|
|
return types[self.a]
|
|
|
|
def fold(self, types={}):
|
|
if self.a not in types:
|
|
raise RExpr.Error("unfoldable field? %s" % self.a)
|
|
return RSum, types[self.a]
|
|
|
|
def eval(self, fields={}):
|
|
if self.a not in fields:
|
|
raise RExpr.Error("unknown field? %s" % self.a)
|
|
return fields[self.a]
|
|
|
|
class StrLit(Expr):
|
|
def fields(self):
|
|
return set()
|
|
|
|
def eval(self, fields={}):
|
|
return self.a
|
|
|
|
class IntLit(Expr):
|
|
def fields(self):
|
|
return set()
|
|
|
|
def type(self, types={}):
|
|
return RInt
|
|
|
|
def fold(self, types={}):
|
|
return RSum, RInt
|
|
|
|
def eval(self, fields={}):
|
|
return self.a
|
|
|
|
class FloatLit(Expr):
|
|
def fields(self):
|
|
return set()
|
|
|
|
def type(self, types={}):
|
|
return RFloat
|
|
|
|
def fold(self, types={}):
|
|
return RSum, RFloat
|
|
|
|
def eval(self, fields={}):
|
|
return self.a
|
|
|
|
class Int(Expr):
|
|
def type(self, types={}):
|
|
return RInt
|
|
|
|
def eval(self, fields={}):
|
|
return RInt(self.a.eval(fields))
|
|
|
|
class Float(Expr):
|
|
def type(self, types={}):
|
|
return RFloat
|
|
|
|
def eval(self, fields={}):
|
|
return RFloat(self.a.eval(fields))
|
|
|
|
class Frac(Expr):
|
|
def type(self, types={}):
|
|
return RFrac
|
|
|
|
def eval(self, fields={}):
|
|
return RFrac(self.a.eval(fields), self.b.eval(fields))
|
|
|
|
class Sum(Expr):
|
|
def fold(self, types={}):
|
|
return RSum, self.a.type(types)
|
|
|
|
class Prod(Expr):
|
|
def fold(self, types={}):
|
|
return RProd, self.a.type(types)
|
|
|
|
class Min(Expr):
|
|
def fold(self, types={}):
|
|
return RMin, self.a.type(types)
|
|
|
|
class Max(Expr):
|
|
def fold(self, types={}):
|
|
return RMax, self.a.type(types)
|
|
|
|
class Avg(Expr):
|
|
def fold(self, types={}):
|
|
return RAvg, RFloat
|
|
|
|
class Stddev(Expr):
|
|
def fold(self, types={}):
|
|
return RStddev, RFloat
|
|
|
|
class GMean(Expr):
|
|
def fold(self, types={}):
|
|
return RGMean, RFloat
|
|
|
|
class GStddev(Expr):
|
|
def fold(self, types={}):
|
|
return RGStddev, RFloat
|
|
|
|
class Ratio(Expr):
|
|
pass
|
|
|
|
class Total(Expr):
|
|
pass
|
|
|
|
class Ceil(Expr):
|
|
pass
|
|
|
|
class Floor(Expr):
|
|
pass
|
|
|
|
class Log(Expr):
|
|
pass
|
|
|
|
class Pow(Expr):
|
|
pass
|
|
|
|
class Sqrt(Expr):
|
|
pass
|
|
|
|
funcs = {
|
|
# types
|
|
'int': Int,
|
|
'float': Float,
|
|
'frac': Frac,
|
|
|
|
# functions
|
|
'ratio': Ratio,
|
|
'total': Total,
|
|
'ceil': Ceil,
|
|
'floor': Floor,
|
|
'log': Log,
|
|
'pow': Pow,
|
|
'sqrt': Sqrt,
|
|
|
|
# mergers
|
|
'sum': Sum,
|
|
'prod': Prod,
|
|
'min': Min,
|
|
'max': Max,
|
|
'avg': Avg,
|
|
'stddev': Stddev,
|
|
'gmean': GMean,
|
|
'gstddev': GStddev,
|
|
}
|
|
|
|
class Pos(Expr):
|
|
pass
|
|
|
|
class Neg(Expr):
|
|
pass
|
|
|
|
class Not(Expr):
|
|
pass
|
|
|
|
class Notnot(Expr):
|
|
pass
|
|
|
|
class Mul(Expr):
|
|
pass
|
|
|
|
class Div(Expr):
|
|
pass
|
|
|
|
class Mod(Expr):
|
|
pass
|
|
|
|
class Add(Expr):
|
|
pass
|
|
|
|
class Sub(Expr):
|
|
pass
|
|
|
|
class Shl(Expr):
|
|
pass
|
|
|
|
class Shr(Expr):
|
|
pass
|
|
|
|
class And(Expr):
|
|
pass
|
|
|
|
class Xor(Expr):
|
|
pass
|
|
|
|
class Or(Expr):
|
|
pass
|
|
|
|
class Lt(Expr):
|
|
pass
|
|
|
|
class Le(Expr):
|
|
pass
|
|
|
|
class Gt(Expr):
|
|
pass
|
|
|
|
class Ge(Expr):
|
|
pass
|
|
|
|
class Ne(Expr):
|
|
pass
|
|
|
|
class Eq(Expr):
|
|
pass
|
|
|
|
class Andand(Expr):
|
|
pass
|
|
|
|
class Oror(Expr):
|
|
pass
|
|
|
|
class Ife(Expr):
|
|
def type(self, types={}):
|
|
return self.b.type(types)
|
|
|
|
def fold(self, types={}):
|
|
return self.b.fold(types)
|
|
|
|
# parse and expr
|
|
def __init__(self, expr):
|
|
self.expr = expr.strip()
|
|
|
|
# parse the expression into a tree
|
|
def p_expr(expr, prec=0):
|
|
if expr.startswith('('):
|
|
a, tail = p_expr(expr[1:].lstrip())
|
|
if not tail.startswith(')'):
|
|
raise RExpr.Error("mismatched parens? %s" % tail)
|
|
tail = tail[1:].lstrip()
|
|
|
|
elif re.match('[_a-zA-Z][_a-zA-Z0-9]*', expr):
|
|
m = re.match('[_a-zA-Z][_a-zA-Z0-9]*', expr)
|
|
tail = expr[len(m.group()):].lstrip()
|
|
|
|
if tail.startswith('('):
|
|
tail = tail[1:].lstrip()
|
|
if m.group() not in RExpr.funcs:
|
|
raise RExpr.Error("unknown function? %s" % m.group())
|
|
args = []
|
|
while True:
|
|
a, tail = p_expr(tail)
|
|
args.append(a)
|
|
if tail.startswith(','):
|
|
tail = tail[1:].lstrip()
|
|
continue
|
|
else:
|
|
if not tail.startswith(')'):
|
|
raise RExpr.Error(
|
|
"mismatched parens? %s" % tail)
|
|
a = RExpr.funcs[m.group()](*args)
|
|
tail = tail[1:].lstrip()
|
|
break
|
|
|
|
else:
|
|
a = RExpr.Field(m.group())
|
|
|
|
elif re.match('(?:"(?:\\.|[^"])*"|\'(?:\\.|[^\'])\')', expr):
|
|
m = re.match('(?:"(?:\\.|[^"])*"|\'(?:\\.|[^\'])\')', expr)
|
|
a = RExpr.StrLit(m.group()[1:-1])
|
|
tail = expr[len(m.group()):].lstrip()
|
|
|
|
elif re.match('[+-]?[_0-9]*\.[_0-9eE]', expr):
|
|
m = re.match('[+-]?[_0-9]*\.[_0-9eE]', expr)
|
|
a = RExpr.FloatLit(RFloat(m.group()))
|
|
tail = expr[len(m.group()):].lstrip()
|
|
|
|
elif re.match('[+-]?(?:(?:0[bBoOxX])?[_0-9a-fA-F]+|∞|inf)', expr):
|
|
m = re.match('[+-]?(?:(?:0[bBoOxX])?[_0-9a-fA-F]+|∞|inf)', expr)
|
|
a = RExpr.IntLit(RInt(m.group()))
|
|
tail = expr[len(m.group()):].lstrip()
|
|
|
|
elif expr.startswith('+'):
|
|
a, tail = p_expr(expr[1:].lstrip(), 12)
|
|
a = RExpr.Pos(a)
|
|
|
|
elif expr.startswith('-'):
|
|
a, tail = p_expr(expr[1:].lstrip(), 12)
|
|
a = RExpr.Neg(a)
|
|
|
|
elif expr.startswith('~'):
|
|
a, tail = p_expr(expr[1:].lstrip(), 12)
|
|
a = RExpr.Not(a)
|
|
|
|
elif expr.startswith('!'):
|
|
a, tail = p_expr(expr[1:].lstrip(), 4)
|
|
a = RExpr.Notnot(a)
|
|
|
|
else:
|
|
raise RExpr.Error("unknown expr? %s" % expr)
|
|
|
|
while True:
|
|
if tail.startswith('*') and prec < 11:
|
|
b, tail = p_expr(tail[1:].lstrip(), 11)
|
|
a = RExpr.Mul(a, b)
|
|
|
|
elif tail.startswith('/') and prec < 11:
|
|
b, tail = p_expr(tail[1:].lstrip(), 11)
|
|
a = RExpr.Div(a, b)
|
|
|
|
elif tail.startswith('%') and prec < 11:
|
|
b, tail = p_expr(tail[1:].lstrip(), 11)
|
|
a = RExpr.Mod(a, b)
|
|
|
|
elif tail.startswith('+') and prec < 10:
|
|
b, tail = p_expr(tail[1:].lstrip(), 10)
|
|
a = RExpr.Add(a, b)
|
|
|
|
elif tail.startswith('-') and prec < 10:
|
|
b, tail = p_expr(tail[1:].lstrip(), 10)
|
|
a = RExpr.Sub(a, b)
|
|
|
|
elif tail.startswith('<<') and prec < 9:
|
|
b, tail = p_expr(tail[2:].lstrip(), 9)
|
|
a = RExpr.Shl(a, b)
|
|
|
|
elif tail.startswith('>>') and prec < 9:
|
|
b, tail = p_expr(tail[2:].lstrip(), 9)
|
|
a = RExpr.Shr(a, b)
|
|
|
|
elif tail.startswith('&') and prec < 8:
|
|
b, tail = p_expr(tail[1:].lstrip(), 8)
|
|
a = RExpr.And(a, b)
|
|
|
|
elif tail.startswith('^') and prec < 7:
|
|
b, tail = p_expr(tail[1:].lstrip(), 7)
|
|
a = RExpr.Xor(a, b)
|
|
|
|
elif tail.startswith('|') and prec < 6:
|
|
b, tail = p_expr(tail[1:].lstrip(), 6)
|
|
a = RExpr.Or(a, b)
|
|
|
|
elif tail.startswith('<') and prec < 5:
|
|
b, tail = p_expr(tail[1:].lstrip(), 5)
|
|
a = RExpr.Lt(a, b)
|
|
|
|
elif tail.startswith('<=') and prec < 5:
|
|
b, tail = p_expr(tail[2:].lstrip(), 5)
|
|
a = RExpr.Le(a, b)
|
|
|
|
elif tail.startswith('>') and prec < 5:
|
|
b, tail = p_expr(tail[1:].lstrip(), 5)
|
|
a = RExpr.Gt(a, b)
|
|
|
|
elif tail.startswith('>=') and prec < 5:
|
|
b, tail = p_expr(tail[2:].lstrip(), 5)
|
|
a = RExpr.Ge(a, b)
|
|
|
|
elif tail.startswith('!=') and prec < 5:
|
|
b, tail = p_expr(tail[2:].lstrip(), 5)
|
|
a = RExpr.Ne(a, b)
|
|
|
|
elif tail.startswith('==') and prec < 5:
|
|
b, tail = p_expr(tail[2:].lstrip(), 5)
|
|
a = RExpr.Eq(a, b)
|
|
|
|
elif tail.startswith('&&') and prec < 3:
|
|
b, tail = p_expr(tail[2:].lstrip(), 3)
|
|
a = RExpr.Andand(a, b)
|
|
|
|
elif tail.startswith('||') and prec < 2:
|
|
b, tail = p_expr(tail[2:].lstrip(), 2)
|
|
a = RExpr.Oror(a, b)
|
|
|
|
elif tail.startswith('?') and prec <= 1:
|
|
b, tail = p_expr(tail[1:].lstrip(), 1)
|
|
if not tail.startswith(':'):
|
|
raise RExpr.Error("Mismatched ?:? %s" % tail)
|
|
c, tail = p_expr(tail[1:].lstrip(), 1)
|
|
a = RExpr.Ife(a, b, c)
|
|
|
|
else:
|
|
return a, tail
|
|
|
|
try:
|
|
self.tree, tail = p_expr(self.expr)
|
|
if tail:
|
|
raise RExpr.Error("trailing expr? %s" % tail)
|
|
|
|
except RExpr.Error as e:
|
|
print('error: in expr: %s' % self.expr,
|
|
file=sys.stderr)
|
|
print('error: %s' % e.reason,
|
|
file=sys.stderr)
|
|
sys.exit(3)
|
|
|
|
# recursively find all fields
|
|
def fields(self):
|
|
try:
|
|
return self.tree.fields()
|
|
except RExpr.Error as e:
|
|
print('error: in expr: %s' % self.expr,
|
|
file=sys.stderr)
|
|
print('error: %s' % e.reason,
|
|
file=sys.stderr)
|
|
sys.exit(3)
|
|
|
|
# recursively find the type
|
|
def type(self, types={}):
|
|
try:
|
|
return self.tree.type(types)
|
|
except RExpr.Error as e:
|
|
print('error: in expr: %s' % self.expr,
|
|
file=sys.stderr)
|
|
print('error: %s' % e.reason,
|
|
file=sys.stderr)
|
|
sys.exit(3)
|
|
|
|
# recursively find the fold operation
|
|
def fold(self, types={}):
|
|
try:
|
|
return self.tree.fold(types)
|
|
except RExpr.Error as e:
|
|
print('error: in expr: %s' % self.expr,
|
|
file=sys.stderr)
|
|
print('error: %s' % e.reason,
|
|
file=sys.stderr)
|
|
sys.exit(3)
|
|
|
|
# recursive evaluate the expr
|
|
def eval(self, fields={}):
|
|
try:
|
|
return self.tree.eval(fields)
|
|
except RExpr.Error as e:
|
|
print('error: in expr: %s' % self.expr,
|
|
file=sys.stderr)
|
|
print('error: %s' % e.reason,
|
|
file=sys.stderr)
|
|
sys.exit(3)
|
|
|
|
|
|
def openio(path, mode='r', buffering=-1):
|
|
# allow '-' for stdin/stdout
|
|
if path == '-':
|
|
if 'r' in mode:
|
|
return os.fdopen(os.dup(sys.stdin.fileno()), mode, buffering)
|
|
else:
|
|
return os.fdopen(os.dup(sys.stdout.fileno()), mode, buffering)
|
|
else:
|
|
return open(path, mode, buffering)
|
|
|
|
def collect(csv_paths, defines=[]):
|
|
# collect results from CSV files
|
|
fields = []
|
|
results = []
|
|
for path in csv_paths:
|
|
try:
|
|
with openio(path) as f:
|
|
reader = csv.DictReader(f, restval='')
|
|
fields.extend(
|
|
k for k in reader.fieldnames
|
|
if k not in fields)
|
|
for r in reader:
|
|
# filter by matching defines
|
|
if not all(k in r and r[k] in vs for k, vs in defines):
|
|
continue
|
|
|
|
results.append(r)
|
|
except FileNotFoundError:
|
|
pass
|
|
|
|
return fields, results
|
|
|
|
def infer(fields_, results,
|
|
by=None,
|
|
fields=None,
|
|
exprs=[],
|
|
defines=[]):
|
|
# we only really care about the last expr for each field
|
|
exprs = {k: expr for k, expr in exprs}
|
|
|
|
# find all fields our exprs depend on
|
|
fields__ = set(it.chain.from_iterable(
|
|
expr.fields() for _, expr in exprs.items()))
|
|
|
|
# if by not specified, guess it's anything not in fields/exprs/defines
|
|
if by is None:
|
|
by = [k for k in fields_
|
|
if k not in (fields or [])
|
|
and k not in fields__
|
|
and not any(k == k_ for k_, _ in defines)]
|
|
|
|
# if fields not specified, guess it's anything not in by/exprs/defines
|
|
if fields is None:
|
|
fields = [k for k in fields_
|
|
if k not in (by or [])
|
|
and k not in fields__
|
|
and not any(k == k_ for k_, _ in defines)]
|
|
|
|
# deduplicate by/fields
|
|
by = list(co.OrderedDict.fromkeys(by).keys())
|
|
fields = list(co.OrderedDict.fromkeys(fields).keys())
|
|
|
|
# find best type for all fields used by field exprs
|
|
fields__ = set(it.chain.from_iterable(
|
|
exprs[k].fields() if k in exprs else {k}
|
|
for k in fields))
|
|
types = {}
|
|
for k in fields__:
|
|
for t in [RInt, RFloat, RFrac]:
|
|
for r in results:
|
|
if k in r and r[k].strip():
|
|
try:
|
|
t(r[k])
|
|
except ValueError:
|
|
break
|
|
else:
|
|
types[k] = t
|
|
break
|
|
else:
|
|
print("error: no type matches field %r?" % k,
|
|
file=sys.stderr)
|
|
sys.exit(-1)
|
|
|
|
# typecheck field exprs, note these may reference input fields
|
|
# with the same name
|
|
types__ = types.copy()
|
|
for k, expr in exprs.items():
|
|
if k in fields:
|
|
types__[k] = expr.type(types)
|
|
|
|
# foldcheck field exprs
|
|
folds = {k: (RSum, t) for k, v in types.items()}
|
|
for k, expr in exprs.items():
|
|
if k in fields:
|
|
folds[k] = expr.fold(types)
|
|
folds = {k: (f(), t) for k, (f, t) in folds.items()}
|
|
|
|
# create result class
|
|
def __new__(cls, **r):
|
|
# evaluate types
|
|
r_ = {k: types[k](v) if k in types else v
|
|
for k, v in r.items()}
|
|
# evaluate exprs
|
|
r__ = r_.copy()
|
|
for k, expr in exprs.items():
|
|
r__[k] = expr.eval(r_)
|
|
|
|
# return result
|
|
return cls.__mro__[1].__new__(cls,
|
|
**{k: r__.get(k, '') for k in by},
|
|
**{k: ([r__[k]], 1) if k in r__ else ([], 0)
|
|
for k in fields})
|
|
|
|
def __add__(self, other):
|
|
# reuse lists if possible
|
|
def extend(a, b):
|
|
if len(a[0]) == a[1]:
|
|
a[0].extend(b[0][:b[1]])
|
|
return (a[0], a[1] + b[1])
|
|
else:
|
|
return (a[0][:a[1]] + b[0][:b[1]], a[1] + b[1])
|
|
|
|
# lazily fold results
|
|
return self.__class__.__mro__[1].__new__(self.__class__,
|
|
**{k: getattr(self, k) for k in by},
|
|
**{k: extend(
|
|
object.__getattribute__(self, k),
|
|
object.__getattribute__(other, k))
|
|
for k in fields})
|
|
|
|
def __getattribute__(self, k):
|
|
# lazily fold results on demand, this avoids issues with fold
|
|
# operations that depend on the number of results
|
|
if k in fields:
|
|
v = object.__getattribute__(self, k)
|
|
if v[1]:
|
|
return folds[k][0](v[0][:v[1]])
|
|
else:
|
|
return None
|
|
return object.__getattribute__(self, k)
|
|
|
|
return type('Result', (co.namedtuple('Result', by + fields),), {
|
|
'__slots__': (),
|
|
'__new__': __new__,
|
|
'__add__': __add__,
|
|
'__getattribute__': __getattribute__,
|
|
'_by': by,
|
|
'_fields': fields,
|
|
'_sort': fields,
|
|
'_types': {k: t for k, (_, t) in folds.items()},
|
|
})
|
|
|
|
|
|
def fold(Result, results, by=None, defines=[]):
|
|
if by is None:
|
|
by = Result._by
|
|
|
|
for k in it.chain(by or [], (k for k, _ in defines)):
|
|
if k not in Result._by and k not in Result._fields:
|
|
print("error: could not find field %r?" % k,
|
|
file=sys.stderr)
|
|
sys.exit(-1)
|
|
|
|
# filter by matching defines
|
|
if defines:
|
|
results_ = []
|
|
for r in results:
|
|
if all(getattr(r, k) in vs for k, vs in defines):
|
|
results_.append(r)
|
|
results = results_
|
|
|
|
# organize results into conflicts
|
|
folding = co.OrderedDict()
|
|
for r in results:
|
|
name = tuple(getattr(r, k) for k in by)
|
|
if name not in folding:
|
|
folding[name] = []
|
|
folding[name].append(r)
|
|
|
|
# merge conflicts
|
|
folded = []
|
|
for name, rs in folding.items():
|
|
folded.append(sum(rs[1:], start=rs[0]))
|
|
|
|
return folded
|
|
|
|
def table(Result, results, diff_results=None, *,
|
|
by=None,
|
|
fields=None,
|
|
sort=None,
|
|
summary=False,
|
|
all=False,
|
|
percent=False,
|
|
**_):
|
|
all_, all = all, __builtins__.all
|
|
|
|
if by is None:
|
|
by = Result._by
|
|
if fields is None:
|
|
fields = Result._fields
|
|
types = Result._types
|
|
|
|
# fold again
|
|
results = fold(Result, results, by=by)
|
|
if diff_results is not None:
|
|
diff_results = fold(Result, diff_results, by=by)
|
|
|
|
# organize by name
|
|
table = {
|
|
','.join(str(getattr(r, k) or '') for k in by): r
|
|
for r in results}
|
|
diff_table = {
|
|
','.join(str(getattr(r, k) or '') for k in by): r
|
|
for r in diff_results or []}
|
|
names = [name
|
|
for name in table.keys() | diff_table.keys()
|
|
if diff_results is None
|
|
or all_
|
|
or any(
|
|
types[k].ratio(
|
|
getattr(table.get(name), k, None),
|
|
getattr(diff_table.get(name), k, None))
|
|
for k in fields)]
|
|
|
|
# sort again, now with diff info, note that python's sort is stable
|
|
names.sort()
|
|
if diff_results is not None:
|
|
names.sort(
|
|
key=lambda n: tuple(
|
|
types[k].ratio(
|
|
getattr(table.get(n), k, None),
|
|
getattr(diff_table.get(n), k, None))
|
|
for k in fields),
|
|
reverse=True)
|
|
if sort:
|
|
for k, reverse in reversed(sort):
|
|
names.sort(
|
|
key=lambda n: tuple(
|
|
(getattr(table[n], k),)
|
|
if getattr(table.get(n), k, None) is not None
|
|
else ()
|
|
for k in (
|
|
[k] if k else [
|
|
k for k in Result._sort
|
|
if k in fields])),
|
|
reverse=reverse ^ (not k or k in Result._fields))
|
|
|
|
|
|
# build up our lines
|
|
lines = []
|
|
|
|
# header
|
|
header = ['%s%s' % (
|
|
','.join(by),
|
|
' (%d added, %d removed)' % (
|
|
sum(1 for n in table if n not in diff_table),
|
|
sum(1 for n in diff_table if n not in table))
|
|
if diff_results is not None and not percent else '')
|
|
if not summary else '']
|
|
if diff_results is None:
|
|
for k in fields:
|
|
header.append(k)
|
|
elif percent:
|
|
for k in fields:
|
|
header.append(k)
|
|
else:
|
|
for k in fields:
|
|
header.append('o'+k)
|
|
for k in fields:
|
|
header.append('n'+k)
|
|
for k in fields:
|
|
header.append('d'+k)
|
|
lines.append(header)
|
|
|
|
# entry helper
|
|
def table_entry(name, r, diff_r=None):
|
|
entry = [name]
|
|
if diff_results is None:
|
|
for k in fields:
|
|
entry.append(
|
|
(getattr(r, k).table(),
|
|
getattr(getattr(r, k), 'notes', lambda: [])())
|
|
if getattr(r, k, None) is not None
|
|
else types[k].none)
|
|
elif percent:
|
|
for k in fields:
|
|
entry.append(
|
|
(getattr(r, k).table()
|
|
if getattr(r, k, None) is not None
|
|
else types[k].none,
|
|
(lambda t: ['+∞%'] if t == +mt.inf
|
|
else ['-∞%'] if t == -mt.inf
|
|
else ['%+.1f%%' % (100*t)])(
|
|
types[k].ratio(
|
|
getattr(r, k, None),
|
|
getattr(diff_r, k, None)))))
|
|
else:
|
|
for k in fields:
|
|
entry.append(getattr(diff_r, k).table()
|
|
if getattr(diff_r, k, None) is not None
|
|
else types[k].none)
|
|
for k in fields:
|
|
entry.append(getattr(r, k).table()
|
|
if getattr(r, k, None) is not None
|
|
else types[k].none)
|
|
for k in fields:
|
|
entry.append(
|
|
(types[k].diff(
|
|
getattr(r, k, None),
|
|
getattr(diff_r, k, None)),
|
|
(lambda t: ['+∞%'] if t == +mt.inf
|
|
else ['-∞%'] if t == -mt.inf
|
|
else ['%+.1f%%' % (100*t)] if t
|
|
else [])(
|
|
types[k].ratio(
|
|
getattr(r, k, None),
|
|
getattr(diff_r, k, None)))))
|
|
return entry
|
|
|
|
# entries
|
|
if not summary:
|
|
for name in names:
|
|
r = table.get(name)
|
|
if diff_results is None:
|
|
diff_r = None
|
|
else:
|
|
diff_r = diff_table.get(name)
|
|
lines.append(table_entry(name, r, diff_r))
|
|
|
|
# total
|
|
r = next(iter(fold(Result, results, by=[])), None)
|
|
if diff_results is None:
|
|
diff_r = None
|
|
else:
|
|
diff_r = next(iter(fold(Result, diff_results, by=[])), None)
|
|
lines.append(table_entry('TOTAL', r, diff_r))
|
|
|
|
# homogenize
|
|
lines = [
|
|
[x if isinstance(x, tuple) else (x, []) for x in line]
|
|
for line in lines]
|
|
|
|
# find the best widths, note that column 0 contains the names and is
|
|
# handled a bit differently
|
|
widths = co.defaultdict(lambda: 7, {0: 23})
|
|
notes = co.defaultdict(lambda: 0)
|
|
for line in lines:
|
|
for i, x in enumerate(line):
|
|
widths[i] = max(widths[i], ((len(x[0])+1+4-1)//4)*4-1)
|
|
notes[i] = max(notes[i], 1+2*len(x[1])+sum(len(n) for n in x[1]))
|
|
|
|
# print our table
|
|
for line in lines:
|
|
print('%-*s %s' % (
|
|
widths[0], line[0][0],
|
|
' '.join('%*s%-*s' % (
|
|
widths[i], x[0],
|
|
notes[i], ' (%s)' % ', '.join(x[1]) if x[1] else '')
|
|
for i, x in enumerate(line[1:], 1))))
|
|
|
|
|
|
def main(csv_paths, *,
|
|
by=None,
|
|
fields=None,
|
|
defines=[],
|
|
sort=None,
|
|
**args):
|
|
# separate out exprs
|
|
exprs = [(k, v)
|
|
for k, v in it.chain(by or [], fields or [])
|
|
if v is not None]
|
|
if by is not None:
|
|
by = [k for k, _ in by]
|
|
if fields is not None:
|
|
fields = [k for k, _ in fields]
|
|
|
|
if by is None and fields is None:
|
|
print("error: needs --by or --fields to figure out fields",
|
|
file=sys.stderr)
|
|
sys.exit(-1)
|
|
|
|
# use is just an alias
|
|
if args.get('use'):
|
|
csv_paths = csv_paths + [args['use']]
|
|
|
|
# find CSV files
|
|
fields_, results = collect(csv_paths, defines)
|
|
|
|
# homogenize
|
|
Result = infer(fields_, results,
|
|
by=by,
|
|
fields=fields,
|
|
exprs=exprs,
|
|
defines=defines)
|
|
results_ = []
|
|
for r in results:
|
|
results_.append(Result(**{
|
|
k: v for k, v in r.items() if v.strip()}))
|
|
results = results_
|
|
|
|
# fold
|
|
results = fold(Result, results, by=by, defines=defines)
|
|
|
|
# sort, note that python's sort is stable
|
|
results.sort()
|
|
if sort:
|
|
for k, reverse in reversed(sort):
|
|
results.sort(
|
|
key=lambda r: tuple(
|
|
(getattr(r, k),) if getattr(r, k) is not None else ()
|
|
for k in ([k] if k else Result._sort)),
|
|
reverse=reverse ^ (not k or k in Result._fields))
|
|
|
|
# write results to CSV
|
|
if args.get('output'):
|
|
with openio(args['output'], 'w') as f:
|
|
writer = csv.DictWriter(f, Result._by + Result._fields)
|
|
writer.writeheader()
|
|
for r in results:
|
|
# note we need to go through getattr to resolve lazy fields
|
|
writer.writerow({
|
|
k: getattr(r, k)
|
|
for k in Result._by + Result._fields})
|
|
|
|
# find previous results?
|
|
if args.get('diff'):
|
|
_, diff_results = collect([args['diff']], renames, defines)
|
|
diff_results_ = []
|
|
for r in diff_results:
|
|
if not any(k in r and r[k].strip()
|
|
for k in Result._fields):
|
|
continue
|
|
try:
|
|
diff_results_.append(Result(**{
|
|
k: r[k] for k in Result._by + Result._fields
|
|
if k in r and r[k].strip()}))
|
|
except TypeError:
|
|
pass
|
|
diff_results = diff_results_
|
|
|
|
# fold
|
|
diff_results = fold(Result, diff_results, by=by, defines=defines)
|
|
|
|
# print table
|
|
if not args.get('quiet'):
|
|
table(Result, results,
|
|
diff_results if args.get('diff') else None,
|
|
by=by,
|
|
fields=fields,
|
|
sort=sort,
|
|
**args)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
import sys
|
|
parser = argparse.ArgumentParser(
|
|
description="Script to manipulate CSV files.",
|
|
allow_abbrev=False)
|
|
parser.add_argument(
|
|
'csv_paths',
|
|
nargs='*',
|
|
help="Input *.csv files.")
|
|
parser.add_argument(
|
|
'-q', '--quiet',
|
|
action='store_true',
|
|
help="Don't show anything, useful with -o.")
|
|
parser.add_argument(
|
|
'-o', '--output',
|
|
help="Specify CSV file to store results.")
|
|
parser.add_argument(
|
|
'-u', '--use',
|
|
help="Don't parse anything, use this CSV file.")
|
|
parser.add_argument(
|
|
'-d', '--diff',
|
|
help="Specify CSV file to diff against.")
|
|
parser.add_argument(
|
|
'-a', '--all',
|
|
action='store_true',
|
|
help="Show all, not just the ones that changed.")
|
|
parser.add_argument(
|
|
'-p', '--percent',
|
|
action='store_true',
|
|
help="Only show percentage change, not a full diff.")
|
|
parser.add_argument(
|
|
'-b', '--by',
|
|
action='append',
|
|
type=lambda x: (
|
|
lambda k, v=None: (
|
|
k.strip(),
|
|
RExpr(v) if v is not None else None)
|
|
)(*x.split('=', 1)),
|
|
help="Group by this field. Can include an expression of the form "
|
|
"field=expr.")
|
|
parser.add_argument(
|
|
'-f', '--field',
|
|
dest='fields',
|
|
action='append',
|
|
type=lambda x: (
|
|
lambda k, v=None: (
|
|
k.strip(),
|
|
RExpr(v) if v is not None else None)
|
|
)(*x.split('=', 1)),
|
|
help="Show this field. Can include an expression of the form "
|
|
"field=expr.")
|
|
parser.add_argument(
|
|
'-D', '--define',
|
|
dest='defines',
|
|
action='append',
|
|
type=lambda x: (
|
|
lambda k, vs: (
|
|
k.strip(),
|
|
{v.strip() for v in vs.split(',')})
|
|
)(*x.split('=', 1)),
|
|
help="Only include results where this field is this value. May "
|
|
"include comma-separated options.")
|
|
class AppendSort(argparse.Action):
|
|
def __call__(self, parser, namespace, value, option):
|
|
if namespace.sort is None:
|
|
namespace.sort = []
|
|
namespace.sort.append((value, True if option == '-S' else False))
|
|
parser.add_argument(
|
|
'-s', '--sort',
|
|
nargs='?',
|
|
action=AppendSort,
|
|
help="Sort by this field.")
|
|
parser.add_argument(
|
|
'-S', '--reverse-sort',
|
|
nargs='?',
|
|
action=AppendSort,
|
|
help="Sort by this field, but backwards.")
|
|
parser.add_argument(
|
|
'-Y', '--summary',
|
|
action='store_true',
|
|
help="Only show the total.")
|
|
parser.add_argument(
|
|
'--int',
|
|
action='append',
|
|
help="Treat these fields as ints.")
|
|
parser.add_argument(
|
|
'--float',
|
|
action='append',
|
|
help="Treat these fields as floats.")
|
|
parser.add_argument(
|
|
'--frac',
|
|
action='append',
|
|
help="Treat these fields as fractions.")
|
|
parser.add_argument(
|
|
'--sum',
|
|
action='append',
|
|
help="Add these fields (the default).")
|
|
parser.add_argument(
|
|
'--prod',
|
|
action='append',
|
|
help="Multiply these fields.")
|
|
parser.add_argument(
|
|
'--min',
|
|
action='append',
|
|
help="Take the minimum of these fields.")
|
|
parser.add_argument(
|
|
'--max',
|
|
action='append',
|
|
help="Take the maximum of these fields.")
|
|
parser.add_argument(
|
|
'--avg', '--mean',
|
|
action='append',
|
|
help="Average these fields.")
|
|
parser.add_argument(
|
|
'--stddev',
|
|
action='append',
|
|
help="Find the standard deviation of these fields.")
|
|
parser.add_argument(
|
|
'--gmean',
|
|
action='append',
|
|
help="Find the geometric mean of these fields.")
|
|
parser.add_argument(
|
|
'--gstddev',
|
|
action='append',
|
|
help="Find the geometric standard deviation of these fields.")
|
|
sys.exit(main(**{k: v
|
|
for k, v in vars(parser.parse_intermixed_args()).items()
|
|
if v is not None}))
|