diff options
author | Daniel Thompson <daniel.thompson@linaro.org> | 2021-04-21 11:38:40 +0100 |
---|---|---|
committer | Daniel Thompson <daniel.thompson@linaro.org> | 2021-04-21 11:38:40 +0100 |
commit | 937d93946f4dd24e20c77ed4c27fdb69db9d57fa (patch) | |
tree | 7454eca0b318d5c532b892de3d4691f3ed27e1b5 | |
parent | 864d5fe4d1c2cd6fe491656ab53610600cbf382e (diff) |
maventool: Initial CSV reprocessor
This is a (very heavily) forked and edited version of ldtstool that
has been updates to generate summaries from mavenlink time entries.
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
-rwxr-xr-x | bin/maventool | 477 |
1 files changed, 477 insertions, 0 deletions
diff --git a/bin/maventool b/bin/maventool new file mode 100755 index 0000000..0d85adb --- /dev/null +++ b/bin/maventool @@ -0,0 +1,477 @@ +#!/usr/bin/env python3 + +''' +maventool - CSV to JSON and JSON data extraction + +Prerequisites (ubuntu:16.04): + + * sudo apt -y install python3 python3-pip + * sudo apt -y install python3-iso8601 python3-keyring + +''' + +import argparse +import collections +import csv +import datetime +import json +import iso8601 +import os +import re +import textwrap +import subprocess +import sys + +import toys.collect as collect +import toys.config as config +import toys.date as date + +import toys.chart as chart +import matplotlib.pyplot as plt + +# If it's installed we'd rather use IPython for interaction... +try: + import IPython + interact = IPython.embed +except: + import pdb + interact = pdb.set_trace + +# +# Wrapper classes +# + +class TimeEntry(dict): + @staticmethod + def load(obj): + if not obj: + data = json.load(sys.stdin) + elif isinstance(obj, str): + with open(obj, 'r') as f: + if obj.endswith('csv'): + data = [ d for d in csv.reader(f) ] + headings = [ h.lower() for h in data[0] ] + data = [ dict(zip(headings, d)) for d in data[1:] ] + else: + data = json.load(f) + else: + data = json.load(obj) + + return [ TimeEntry(te) for te in data ] + + def __init__(self, template): + '''Massage the data to allow other tools to operate in as generically as possible.''' + super().__init__(template) + + # Parse numeric fields + for f in ('time in hours', 'rate', 'subtotal', 'time in minutes'): + self[f] = float(self[f]) + + # High level categorization + if self['project'] in ('Internal Projects [Internal]', 'Support & Solutions Engineering', 'Paid Time Off [PTO]'): + self['category'] = self['project'] + else: + self['category'] = 'Time and Materials Services' if self['rate'] else 'Fixed Price Services' + #if self['project'] == 'Paid Time Off [PTO]': + # self['category'] = 'Overhead' + #elif self['project'] == 'Support & Solutions Engineering': + # self['category'] = 'Member activities' \ + # if self['task/deliverable'] not in ('Team Leadership') \ + # else 'Overhead' + #elif self['project'] == 'Internal Projects [Internal]': + # self['category'] = Research and Development + + + def date(self): + return iso8601.parse_date(self['date']) + +# +# Composable actions +# +# These typically match a specific sub-command but can also be used by other +# sub-commands in combination to achieve macro commands. +# + +def do_chart(data, **args): + args = collections.defaultdict(lambda : None, args) + + if not args['barchart'] and not args['piechart']: + args['barchart'] = True + + def collate_by_week(w): + d = w.date() + while d.weekday() != 4: + d += datetime.timedelta(1) + return d.strftime('%Y-%m-%d') + collate_by_month = lambda w: w.date().strftime('%Y-%m') + collate_by_category = lambda w: w['category'] + collate_by_engineer = lambda w: w['person'] + collate_by_project = lambda w: w['project'] + collate_by_task = lambda w: w['task/deliverable'] + count_effort = lambda w: w['time in hours'] / 8 + + charts = { + 'effort_by_category': { + 'primary': collate_by_month, + 'secondary': collate_by_category, + 'count': count_effort, + 'title': 'Effort by week and work category', + 'xlabel': 'Date', + 'ylabel': 'Effort (man/days)' + }, + 'effort_by_engineer': { + 'primary': collate_by_week, + 'secondary': collate_by_engineer, + 'count': count_effort, + 'title': 'Effort by week and assigned engineer', + 'xlabel': 'Date', + 'ylabel': 'Effort (man/days)' + }, + 'effort_by_project': { + 'primary': collate_by_month, + 'secondary': collate_by_project, + 'count': count_effort, + 'title': 'Effort by month and project', + 'xlabel': 'Date', + 'ylabel': 'Effort (man/days)' + }, + 'effort_by_task': { + 'primary': collate_by_month, + 'secondary': collate_by_task, + 'count': count_effort, + 'title': 'Effort by month and task', + 'xlabel': 'Date', + 'ylabel': 'Effort (man/days)' + }, + } + + for arg in charts: + if args[arg]: + config = charts[arg] + config['pngfile'] = args[arg] + simplify = args['simplify'] / 100 if args['simplify'] else None + + if args['barchart']: + graph = collect.accumulate_2d(data, + config['primary'], config['secondary'], config['count']) + if simplify: + collect.simplify_2d(graph, simplify) + + #chart.stacked_barchart(graph, config['pngfile'], title = config['title'], + # xlabel = config['xlabel'], ylabel = config['ylabel']) + chart.stacked_barchart(graph, config['pngfile'], + ylabel = config['ylabel']) + else: + graph = collect.accumulate(data, config['secondary'], config['count']) + if simplify: + graph['Other'] = collect.simplify(graph, simplify) + chart.piechart(graph, config['pngfile'], legend=False) + +def do_collate(data, **args): + args = collections.defaultdict(lambda : None, args) + + if args['field']: + field = args['field'] + c = collect.collate(data, lambda k: k[field]) + + # Flatten by accumulating numbers and discarding mismatched fields + data = [] + for category in sorted(c): + summary = c[category][0] + for d in c[category][1:]: + for k, v in d.items(): + if isinstance(v, float): + summary[k] += v + elif summary[k] != v: + summary[k] = 'Merged' + data.append(summary) + + if args['percent']: + totals = collections.defaultdict(float) + for d in data: + for k, v in d.items(): + if isinstance(v, float): + totals[k] += v + for d in data: + for k, v in totals.items(): + # If v is zero then all elements of the list must be + # zero (or someone is playing silly buggers with -ve + # numbers) either way, we can do nothing in that case + if v: + d[k+' percent'] = d[k] * (100 / v) + + return data + +def do_count(data, **args): + args = collections.defaultdict(lambda : None, args) + + if args['field']: + field = args['field'] + acc = 0 + for d in data: + acc += d[field] + fmt = args['format'] if args['format'] else '{:.1f}' + print(fmt.format(acc)) + else: + print(len(data)) + +def do_filter(data, **args): + args = collections.defaultdict(lambda : None, args) + + def partial_match(haystack, needles): + for needle in needles: + if needle in haystack: + return True + return False + + if args['category']: + data = [ d for d in data if partial_match(d['category'], args['category'].split(',')) ] + + if args['people']: + data = [ d for d in data if partial_match(d['person'], args['people'].split(',')) ] + + if args['project']: + data = [ d for d in data if partial_match(d['project'], args['project'].split(',')) ] + + if args['task']: + data = [ d for d in data if partial_match(d['task/deliverable'], args['task'].split(',')) ] + + if args['since']: + since = date.smart_parse(args['since']) + data = [ d for d in data if d.date() >= since ] + + if args['until']: + until = date.smart_parse(args['until']) + data = [ d for d in data if d.date() <= until ] + + if args['sort_by']: + key = args['sort_by'] + data.sort(key=lambda d: d[key], reverse=not not args['reverse']) + + return data + +def do_format(data, **args): + output = [] + + for d in data: + ln = args['template'] + for m in re.finditer('{([^}:]+)([^}]*)}', args['template']): + field = m.group(1) + fmt = m.group(2) + + try: + if '-' in field: + (field, attr) = field.split('-', 1) + if isinstance(d.field(field), str): + val = d.field(field)[0:int(attr)] + else: + val = d.field(field)[attr] + else: + val = d[field] + except KeyError: + continue + + ln = ln.replace(m.group(0), '{{{}}}'.format(fmt).format(val)) + output.append(ln) + return output + +def do_scale(data, **args): + for d in data: + d[args['to']] = d[args['from']] * args['factor'] + return data + +# +# Sub-command implementation +# +# These primarily rely on composable functions to do the actual work. +# + +def do_chart_cmd(args): + data = TimeEntry.load(args.fname) + data = do_filter(data, **vars(args)) + do_chart(data, **vars(args)) + +def do_collate_cmd(args): + data = TimeEntry.load(args.fname) + data = do_filter(data, **vars(args)) + data = do_collate(data, **vars(args)) + json.dump(data, sys.stdout) + +def do_count_cmd(args): + data = TimeEntry.load(args.fname) + data = do_filter(data, **vars(args)) + do_count(data, **vars(args)) + +def do_csv_cmd(args): + entries = TimeEntry.load(args.fname) + json.dump(entries, sys.stdout) + +def add_filter_args(subparser, parser=None): + if not parser: + parser = subparser.add_parser('filter', help='Filter cards and worklogs') + parser.add_argument('fname', nargs='?') + parser.set_defaults(func=do_filter_cmd) + + parser.add_argument('--category', help='Comma separated list of categories') + parser.add_argument('--people', help='Comma separated list of people') + parser.add_argument('--project', help='Comma separated list of projects') + parser.add_argument('--task', help='Comma separated list of tasks') + parser.add_argument('--since', help='Only select entries dated later than SINCE') + parser.add_argument('--sort-by', help='Provide a key to sort by') + parser.add_argument('--reverse', action='store_true', help='Reverse sort order (requires --sort-by)') + parser.add_argument('--until', help='Only select entries dated before UNTIL') + +def do_filter_cmd(args): + data = TimeEntry.load(args.fname) + data = do_filter(data, **vars(args)) + json.dump(data, sys.stdout) + +def do_format_cmd(args): + data = TimeEntry.load(args.fname) + data = do_filter(data, **vars(args)) + formatted = do_format(data, **vars(args)) + print('\n'.join(formatted)) + +def do_interact_cmd(args): + '''Directly interaction with the JSON data''' + data = TimeEntry.load(args.fname) + datum = data[-1] + print('Locals include: data, datum\n') + interact() + +def do_scale_cmd(args): + data = TimeEntry.load(args.fname) + data = do_scale(data, **vars(args)) + json.dump(data, sys.stdout) + +def do_selftest_cmd(args): + '''Very simple built-in-self-test''' + cmds = ( + 'chart --card-tracker=test.png --since 2017-01-01 --until 2017-03-31', + 'chart --effort-by-component=test.png', + 'chart --effort-by-engineer=test.png', + 'chart --effort-by-member=test.png', + 'chart --count-by-member=test.png', + 'count', + 'count --worklog', + 'filter --assignee daniel > /dev/null', + 'filter --worklog-since 2017-01-01 --worklog-until 2017-03-31 --no-worklog > /dev/null', + 'format > /dev/null', + 'monthly > /dev/null', + 'summary > /dev/null', + 'weekly > /dev/null', + 'worklog > /dev/null', + ) + + def run(cmd): + print(cmd) + return subprocess.check_output( + cmd, + shell=True, + stderr=subprocess.STDOUT + ).decode('UTF-8') + + try: + # Grab some data to run the tests on + if not args.json: + run('glance fetch --since 2018-01-01 > test.json') + args.json = 'test.json' + else: + args.keep = True + + # Run the tests + for cmd in cmds: + print(run('cat {} | glance {}'.format(args.json, cmd)), end='') + except subprocess.CalledProcessError as e: + print(e.output.decode('UTF-8')) + print('FAILED WITH ERRORCODE: {}'.format(e.returncode)) + finally: + if not args.keep: + run('rm -f test.json') + run('rm -f test.png') + +# +# main - argument parsing and dispatch to sub-commands +# + +def main(argv): + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers(dest='sub-command') + subparsers.required = True # Can't be set using named arguments (yet) + + s = subparsers.add_parser('chart', + help='Draw charts from the data') + s.add_argument('--effort-by-category', metavar='PNGFILE') + s.add_argument('--effort-by-engineer', metavar='PNGFILE') + s.add_argument('--effort-by-project', metavar='PNGFILE') + s.add_argument('--effort-by-task', metavar='PNGFILE') + s.add_argument('--barchart', action='store_true') + s.add_argument('--piechart', action='store_true') + s.add_argument('--simplify', type=float, + help="Combine values less than N percent") + s.add_argument('fname', nargs='?') + s.set_defaults(func=do_chart_cmd) + add_filter_args(subparsers, s) + + s = subparsers.add_parser('collate', help='Collate the output into categories') + s.add_argument('--field', help='Field to collate the input with') + s.add_argument('--percent', action='store_true', + help='Calulcate percentages for each numeric field') + s.add_argument('fname', nargs='?') + s.set_defaults(func=do_collate_cmd) + add_filter_args(subparsers, s) + + s = subparsers.add_parser('count', help='Count the number of elements') + s.add_argument('fname', nargs='?') + s.add_argument('--field', help='Field to accumulate into the final count') + s.add_argument('--format', help='Template to format the count (default: {:.1f})') + s.set_defaults(func=do_count_cmd) + add_filter_args(subparsers, s) + + s = subparsers.add_parser('csv', help='Convert a raw csv file from mavenlink into JSON format') + s.add_argument("fname", nargs="?", help="File to process") + s.set_defaults(func=do_csv_cmd) + + add_filter_args(subparsers) + + s = subparsers.add_parser('format', help='Summarize each card using a template') + s.add_argument('--template', default='{date}: {time in hours} - {project} ({person})') + s.add_argument('fname', nargs='?') + s.set_defaults(func=do_format_cmd) + add_filter_args(subparsers, s) + + s = subparsers.add_parser('interact', help='Interact with report data via REPL') + s.add_argument('fname', nargs='?') + s.set_defaults(func=do_interact_cmd) + + s = subparsers.add_parser('scale', help='Re-scale numeric columns') + s.add_argument('--from', required=True, help='Column to scale from') + s.add_argument('--to', required=True, help='Column to scale into') + s.add_argument('--factor', required=True, type=eval, help='Scaling factor') + s.add_argument('fname', nargs='?') + s.set_defaults(func=do_scale_cmd) + + #s = subparsers.add_parser('selftest', + # help='Run some basic sanity tests') + #s.add_argument('--keep', action='store_true', + # help='Do not delete test.json after test is completed') + #s.add_argument('json', nargs='?') + #s.set_defaults(func=do_selftest_cmd) + + #s = subparsers.add_parser('summary', + # help='Generate a quick summary') + #s.add_argument('--template', + # default='{created-10}: {key}: {summary} ({member})') + #s.add_argument('json', nargs='?') + #s.set_defaults(func=do_format) + + args = parser.parse_args(argv[1:]) + args.func(args) + +if __name__ == '__main__': + try: + sys.exit(main(sys.argv)) + except KeyboardInterrupt: + sys.exit(1) + sys.exit(127) + |