#!/usr/bin/env python3 # # Script to summarize the outputs of other scripts. Operates on CSV files. # import collections as co import csv import functools as ft import math as m import os import re # each result is a type generated by another script RESULTS = [] FIELDS = 'code,data,stack,structs' def result(cls): RESULTS.append(cls) return cls @result class CodeResult(co.namedtuple('CodeResult', 'code_size')): __slots__ = () def __new__(cls, code_size=0): return super().__new__(cls, int(code_size)) def __add__(self, other): return self.__class__(self.code_size + other.code_size) def __sub__(self, other): old = other.code_size if other is not None else 0 new = self.code_size if self is not None else 0 return (new-old) / old if old else 1.0 def __rsub__(self, other): return self.__class__.__sub__(other, self) def key(self): return -self.code_size _header = '%7s' % 'code' _nil = '%7s' % '-' def __str__(self): return '%7s' % self.code_size @result class DataResult(co.namedtuple('DataResult', 'data_size')): __slots__ = () def __new__(cls, data_size=0): return super().__new__(cls, int(data_size)) def __add__(self, other): return self.__class__(self.data_size + other.data_size) def __sub__(self, other): old = other.data_size if other is not None else 0 new = self.data_size if self is not None else 0 return (new-old) / old if old else 1.0 def __rsub__(self, other): return self.__class__.__sub__(other, self) def key(self): return -self.data_size _header = '%7s' % 'data' _nil = '%7s' % '-' def __str__(self): return '%7s' % self.data_size @result class StackResult(co.namedtuple('StackResult', 'stack_limit')): __slots__ = () def __new__(cls, stack_limit=0): return super().__new__(cls, float(stack_limit)) def __add__(self, other): return self.__class__(max(self.stack_limit, other.stack_limit)) def __sub__(self, other): old_limit = other.stack_limit if other is not None else 0 new_limit = self.stack_limit if self is not None else 0 return (0.0 if m.isinf(new_limit) and m.isinf(old_limit) else +float('inf') if m.isinf(new_limit) else -float('inf') if m.isinf(old_limit) else 0.0 if not old_limit and not new_limit else 1.0 if not old_limit else (new_limit-old_limit) / old_limit) def __rsub__(self, other): return self.__class__.__sub__(other, self) def key(self): return -self.stack_limit _header = '%7s' % 'stack' _nil = '%7s' % '-' def __str__(self): return '%7s' % ( '∞' if m.isinf(self.stack_limit) else int(self.stack_limit)) @result class StructsResult(co.namedtuple('StructsResult', 'struct_size')): __slots__ = () def __new__(cls, struct_size=0): return super().__new__(cls, int(struct_size)) def __add__(self, other): return self.__class__(self.struct_size + other.struct_size) def __sub__(self, other): old = other.struct_size if other is not None else 0 new = self.struct_size if self is not None else 0 return (new-old) / old if old else 1.0 def __rsub__(self, other): return self.__class__.__sub__(other, self) def key(self): return -self.struct_size _header = '%7s' % 'structs' _nil = '%7s' % '-' def __str__(self): return '%7s' % self.struct_size @result class CoverageLineResult(co.namedtuple('CoverageResult', 'coverage_line_hits,coverage_line_count')): __slots__ = () def __new__(cls, coverage_line_hits=0, coverage_line_count=0): return super().__new__(cls, int(coverage_line_hits), int(coverage_line_count)) def __add__(self, other): return self.__class__( self.coverage_line_hits + other.coverage_line_hits, self.coverage_line_count + other.coverage_line_count) def __sub__(self, other): old_hits = other.coverage_line_hits if other is not None else 0 old_count = other.coverage_line_count if other is not None else 0 new_hits = self.coverage_line_hits if self is not None else 0 new_count = self.coverage_line_count if self is not None else 0 return ((new_hits/new_count if new_count else 1.0) - (old_hits/old_count if old_count else 1.0)) def __rsub__(self, other): return self.__class__.__sub__(other, self) def key(self): return -(self.coverage_line_hits/self.coverage_line_count if self.coverage_line_count else -1) _header = '%19s' % 'coverage/line' _nil = '%11s %7s' % ('-', '-') def __str__(self): return '%11s %7s' % ( '%d/%d' % (self.coverage_line_hits, self.coverage_line_count) if self.coverage_line_count else '-', '%.1f%%' % (100*self.coverage_line_hits/self.coverage_line_count) if self.coverage_line_count else '-') @result class CoverageBranchResult(co.namedtuple('CoverageResult', 'coverage_branch_hits,coverage_branch_count')): __slots__ = () def __new__(cls, coverage_branch_hits=0, coverage_branch_count=0): return super().__new__(cls, int(coverage_branch_hits), int(coverage_branch_count)) def __add__(self, other): return self.__class__( self.coverage_branch_hits + other.coverage_branch_hits, self.coverage_branch_count + other.coverage_branch_count) def __sub__(self, other): old_hits = other.coverage_branch_hits if other is not None else 0 old_count = other.coverage_branch_count if other is not None else 0 new_hits = self.coverage_branch_hits if self is not None else 0 new_count = self.coverage_branch_count if self is not None else 0 return ((new_hits/new_count if new_count else 1.0) - (old_hits/old_count if old_count else 1.0)) def __rsub__(self, other): return self.__class__.__sub__(other, self) def key(self): return -(self.coverage_branch_hits/self.coverage_branch_count if self.coverage_branch_count else -1) _header = '%19s' % 'coverage/branch' _nil = '%11s %7s' % ('-', '-') def __str__(self): return '%11s %7s' % ( '%d/%d' % (self.coverage_branch_hits, self.coverage_branch_count) if self.coverage_branch_count else '-', '%.1f%%' % (100*self.coverage_branch_hits/self.coverage_branch_count) if self.coverage_branch_count else '-') def openio(path, mode='r'): if path == '-': if 'r' in mode: return os.fdopen(os.dup(sys.stdin.fileno()), 'r') else: return os.fdopen(os.dup(sys.stdout.fileno()), 'w') else: return open(path, mode) def main(**args): # find results results = co.defaultdict(lambda: {}) for path in args.get('csv_paths', '-'): try: with openio(path) as f: r = csv.DictReader(f) for result in r: file = result.pop('file', '') name = result.pop('name', '') for Result in RESULTS: if all(result.get(f) not in {None, ''} for f in Result._fields): results[(file, name)][Result.__name__] = ( results[(file, name)].get( Result.__name__, Result()) + Result(*(result[f] for f in Result._fields))) except FileNotFoundError: pass # find previous results? if args.get('diff'): prev_results = co.defaultdict(lambda: {}) for path in args.get('csv_paths', '-'): try: with openio(args['diff']) as f: r = csv.DictReader(f) for result in r: file = result.pop('file', '') name = result.pop('name', '') for Result in RESULTS: if all(result.get(f) not in {None, ''} for f in Result._fields): prev_results[(file, name)][Result.__name__] = ( prev_results[(file, name)].get( Result.__name__, Result()) + Result(*(result[f] for f in Result._fields))) except FileNotFoundError: pass # filter our result types by results that are present if 'all' in args['fields']: filtered_results = RESULTS else: filtered_results = [ Result for Result in RESULTS if (any(f.startswith(r) for r in args['fields'] for f in Result._fields) or any(Result._header.strip().startswith(r) for r in args['fields']))] # figure out a sort key if args.get('sort'): key_Result = next( Result for Result in RESULTS if (any(f.startswith(args['sort']) for f in Result._fields) or Result._header.strip().startswith(args['sort']))) key = lambda result: result.get(key_Result.__name__, key_Result()).key() reverse = False elif args.get('reverse_sort'): key_Result = next( Result for Result in RESULTS if (any(f.startswith(args['reverse_sort']) for f in Result._fields) or Result._header.strip().startswith(args['reverse_sort']))) key = lambda result: result.get(key_Result.__name__, key_Result()).key() reverse = True else: key = lambda _: None reverse = False # write merged results to CSV if args.get('output'): with openio(args['output'], 'w') as f: w = csv.DictWriter(f, sum( (Result._fields for Result in filtered_results), ('file', 'name'))) w.writeheader() for (file, name), result in sorted(results.items()): w.writerow(ft.reduce(dict.__or__, (r._asdict() for r in result.values()), {'file': file, 'name': name})) # print results def print_header(by): if by == 'total': entry = lambda k: 'TOTAL' elif by == 'file': entry = lambda k: k[0] else: entry = lambda k: k[1] if not args.get('diff'): print('%-36s %s' % (by, ' '.join(Result._header for Result in filtered_results))) else: old = {entry(k) for k in results.keys()} new = {entry(k) for k in prev_results.keys()} print('%-36s %s' % ( '%s (%d added, %d removed)' % (by, sum(1 for k in new if k not in old), sum(1 for k in old if k not in new)) if by else '', ' '.join('%s%-10s' % (Result._header, '') for Result in filtered_results))) def print_entries(by): if by == 'total': entry = lambda k: 'TOTAL' elif by == 'file': entry = lambda k: k[0] else: entry = lambda k: k[1] entries = co.defaultdict(lambda: {}) for k, result in results.items(): entries[entry(k)] |= { r.__class__.__name__: entries[entry(k)].get( r.__class__.__name__, r.__class__()) + r for r in result.values()} if not args.get('diff'): for name, result in sorted(entries.items(), key=lambda p: (key(p[1]), p), reverse=reverse): print('%-36s %s' % (name, ' '.join( str(result.get(Result.__name__, Result._nil)) for Result in filtered_results))) else: prev_entries = co.defaultdict(lambda: {}) for k, result in prev_results.items(): prev_entries[entry(k)] |= { r.__class__.__name__: prev_entries[entry(k)].get( r.__class__.__name__, r.__class__()) + r for r in result.values()} diff_entries = { name: (prev_entries.get(name), entries.get(name)) for name in (entries.keys() | prev_entries.keys())} for name, (old, new) in sorted(diff_entries.items(), key=lambda p: (key(p[1][1]), p)): fields = [] changed = False for Result in filtered_results: o = old.get(Result.__name__) if old is not None else None n = new.get(Result.__name__) if new is not None else None ratio = n - o if n is not None or o is not None else 0 changed = changed or ratio fields.append('%s%-10s' % ( n if n is not None else Result._nil, '' if not ratio else ' (+∞%)' if ratio > 0 and m.isinf(ratio) else ' (-∞%)' if ratio < 0 and m.isinf(ratio) else ' (%+.1f%%)' % (100*ratio))) if changed or args.get('all'): print('%-36s %s' % (name, ' '.join(fields))) if args.get('quiet'): pass elif args.get('summary'): print_header('') print_entries('total') elif args.get('files'): print_header('file') print_entries('file') print_entries('total') else: print_header('name') print_entries('name') print_entries('total') if __name__ == "__main__": import argparse import sys parser = argparse.ArgumentParser( description="Summarize measurements") parser.add_argument('csv_paths', nargs='*', default='-', help="Description of where to find *.csv files. May be a directory \ or list of paths.") parser.add_argument('-q', '--quiet', action='store_true', help="Don't show anything, useful with -o.") parser.add_argument('-o', '--output', help="Specify CSV file to store results.") parser.add_argument('-d', '--diff', help="Specify CSV file to diff against.") parser.add_argument('-a', '--all', action='store_true', help="Show all objects, not just the ones that changed.") parser.add_argument('-f', '--fields', type=lambda x: set(re.split('\s*,\s*', x)), default=FIELDS, help="Comma separated list of fields to print, by default all fields \ that are found in the CSV files are printed. \"all\" prints all \ fields this script knows. Defaults to %r." % FIELDS) parser.add_argument('-s', '--sort', help="Sort by this field.") parser.add_argument('-S', '--reverse-sort', help="Sort by this field, but backwards.") parser.add_argument('-F', '--files', action='store_true', help="Show file-level calls.") parser.add_argument('-Y', '--summary', action='store_true', help="Only show the totals.") sys.exit(main(**{k: v for k, v in vars(parser.parse_args()).items() if v is not None}))