aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Thompson <daniel.thompson@linaro.org>2021-04-21 11:38:40 +0100
committerDaniel Thompson <daniel.thompson@linaro.org>2021-04-21 11:38:40 +0100
commit937d93946f4dd24e20c77ed4c27fdb69db9d57fa (patch)
tree7454eca0b318d5c532b892de3d4691f3ed27e1b5
parent864d5fe4d1c2cd6fe491656ab53610600cbf382e (diff)
maventool: Initial CSV reprocessor
This is a (very heavily) forked and edited version of ldtstool that has been updates to generate summaries from mavenlink time entries. Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
-rwxr-xr-xbin/maventool477
1 files changed, 477 insertions, 0 deletions
diff --git a/bin/maventool b/bin/maventool
new file mode 100755
index 0000000..0d85adb
--- /dev/null
+++ b/bin/maventool
@@ -0,0 +1,477 @@
+#!/usr/bin/env python3
+
+'''
+maventool - CSV to JSON and JSON data extraction
+
+Prerequisites (ubuntu:16.04):
+
+ * sudo apt -y install python3 python3-pip
+ * sudo apt -y install python3-iso8601 python3-keyring
+
+'''
+
+import argparse
+import collections
+import csv
+import datetime
+import json
+import iso8601
+import os
+import re
+import textwrap
+import subprocess
+import sys
+
+import toys.collect as collect
+import toys.config as config
+import toys.date as date
+
+import toys.chart as chart
+import matplotlib.pyplot as plt
+
+# If it's installed we'd rather use IPython for interaction...
+try:
+ import IPython
+ interact = IPython.embed
+except:
+ import pdb
+ interact = pdb.set_trace
+
+#
+# Wrapper classes
+#
+
+class TimeEntry(dict):
+ @staticmethod
+ def load(obj):
+ if not obj:
+ data = json.load(sys.stdin)
+ elif isinstance(obj, str):
+ with open(obj, 'r') as f:
+ if obj.endswith('csv'):
+ data = [ d for d in csv.reader(f) ]
+ headings = [ h.lower() for h in data[0] ]
+ data = [ dict(zip(headings, d)) for d in data[1:] ]
+ else:
+ data = json.load(f)
+ else:
+ data = json.load(obj)
+
+ return [ TimeEntry(te) for te in data ]
+
+ def __init__(self, template):
+ '''Massage the data to allow other tools to operate in as generically as possible.'''
+ super().__init__(template)
+
+ # Parse numeric fields
+ for f in ('time in hours', 'rate', 'subtotal', 'time in minutes'):
+ self[f] = float(self[f])
+
+ # High level categorization
+ if self['project'] in ('Internal Projects [Internal]', 'Support & Solutions Engineering', 'Paid Time Off [PTO]'):
+ self['category'] = self['project']
+ else:
+ self['category'] = 'Time and Materials Services' if self['rate'] else 'Fixed Price Services'
+ #if self['project'] == 'Paid Time Off [PTO]':
+ # self['category'] = 'Overhead'
+ #elif self['project'] == 'Support & Solutions Engineering':
+ # self['category'] = 'Member activities' \
+ # if self['task/deliverable'] not in ('Team Leadership') \
+ # else 'Overhead'
+ #elif self['project'] == 'Internal Projects [Internal]':
+ # self['category'] = Research and Development
+
+
+ def date(self):
+ return iso8601.parse_date(self['date'])
+
+#
+# Composable actions
+#
+# These typically match a specific sub-command but can also be used by other
+# sub-commands in combination to achieve macro commands.
+#
+
+def do_chart(data, **args):
+ args = collections.defaultdict(lambda : None, args)
+
+ if not args['barchart'] and not args['piechart']:
+ args['barchart'] = True
+
+ def collate_by_week(w):
+ d = w.date()
+ while d.weekday() != 4:
+ d += datetime.timedelta(1)
+ return d.strftime('%Y-%m-%d')
+ collate_by_month = lambda w: w.date().strftime('%Y-%m')
+ collate_by_category = lambda w: w['category']
+ collate_by_engineer = lambda w: w['person']
+ collate_by_project = lambda w: w['project']
+ collate_by_task = lambda w: w['task/deliverable']
+ count_effort = lambda w: w['time in hours'] / 8
+
+ charts = {
+ 'effort_by_category': {
+ 'primary': collate_by_month,
+ 'secondary': collate_by_category,
+ 'count': count_effort,
+ 'title': 'Effort by week and work category',
+ 'xlabel': 'Date',
+ 'ylabel': 'Effort (man/days)'
+ },
+ 'effort_by_engineer': {
+ 'primary': collate_by_week,
+ 'secondary': collate_by_engineer,
+ 'count': count_effort,
+ 'title': 'Effort by week and assigned engineer',
+ 'xlabel': 'Date',
+ 'ylabel': 'Effort (man/days)'
+ },
+ 'effort_by_project': {
+ 'primary': collate_by_month,
+ 'secondary': collate_by_project,
+ 'count': count_effort,
+ 'title': 'Effort by month and project',
+ 'xlabel': 'Date',
+ 'ylabel': 'Effort (man/days)'
+ },
+ 'effort_by_task': {
+ 'primary': collate_by_month,
+ 'secondary': collate_by_task,
+ 'count': count_effort,
+ 'title': 'Effort by month and task',
+ 'xlabel': 'Date',
+ 'ylabel': 'Effort (man/days)'
+ },
+ }
+
+ for arg in charts:
+ if args[arg]:
+ config = charts[arg]
+ config['pngfile'] = args[arg]
+ simplify = args['simplify'] / 100 if args['simplify'] else None
+
+ if args['barchart']:
+ graph = collect.accumulate_2d(data,
+ config['primary'], config['secondary'], config['count'])
+ if simplify:
+ collect.simplify_2d(graph, simplify)
+
+ #chart.stacked_barchart(graph, config['pngfile'], title = config['title'],
+ # xlabel = config['xlabel'], ylabel = config['ylabel'])
+ chart.stacked_barchart(graph, config['pngfile'],
+ ylabel = config['ylabel'])
+ else:
+ graph = collect.accumulate(data, config['secondary'], config['count'])
+ if simplify:
+ graph['Other'] = collect.simplify(graph, simplify)
+ chart.piechart(graph, config['pngfile'], legend=False)
+
+def do_collate(data, **args):
+ args = collections.defaultdict(lambda : None, args)
+
+ if args['field']:
+ field = args['field']
+ c = collect.collate(data, lambda k: k[field])
+
+ # Flatten by accumulating numbers and discarding mismatched fields
+ data = []
+ for category in sorted(c):
+ summary = c[category][0]
+ for d in c[category][1:]:
+ for k, v in d.items():
+ if isinstance(v, float):
+ summary[k] += v
+ elif summary[k] != v:
+ summary[k] = 'Merged'
+ data.append(summary)
+
+ if args['percent']:
+ totals = collections.defaultdict(float)
+ for d in data:
+ for k, v in d.items():
+ if isinstance(v, float):
+ totals[k] += v
+ for d in data:
+ for k, v in totals.items():
+ # If v is zero then all elements of the list must be
+ # zero (or someone is playing silly buggers with -ve
+ # numbers) either way, we can do nothing in that case
+ if v:
+ d[k+' percent'] = d[k] * (100 / v)
+
+ return data
+
+def do_count(data, **args):
+ args = collections.defaultdict(lambda : None, args)
+
+ if args['field']:
+ field = args['field']
+ acc = 0
+ for d in data:
+ acc += d[field]
+ fmt = args['format'] if args['format'] else '{:.1f}'
+ print(fmt.format(acc))
+ else:
+ print(len(data))
+
+def do_filter(data, **args):
+ args = collections.defaultdict(lambda : None, args)
+
+ def partial_match(haystack, needles):
+ for needle in needles:
+ if needle in haystack:
+ return True
+ return False
+
+ if args['category']:
+ data = [ d for d in data if partial_match(d['category'], args['category'].split(',')) ]
+
+ if args['people']:
+ data = [ d for d in data if partial_match(d['person'], args['people'].split(',')) ]
+
+ if args['project']:
+ data = [ d for d in data if partial_match(d['project'], args['project'].split(',')) ]
+
+ if args['task']:
+ data = [ d for d in data if partial_match(d['task/deliverable'], args['task'].split(',')) ]
+
+ if args['since']:
+ since = date.smart_parse(args['since'])
+ data = [ d for d in data if d.date() >= since ]
+
+ if args['until']:
+ until = date.smart_parse(args['until'])
+ data = [ d for d in data if d.date() <= until ]
+
+ if args['sort_by']:
+ key = args['sort_by']
+ data.sort(key=lambda d: d[key], reverse=not not args['reverse'])
+
+ return data
+
+def do_format(data, **args):
+ output = []
+
+ for d in data:
+ ln = args['template']
+ for m in re.finditer('{([^}:]+)([^}]*)}', args['template']):
+ field = m.group(1)
+ fmt = m.group(2)
+
+ try:
+ if '-' in field:
+ (field, attr) = field.split('-', 1)
+ if isinstance(d.field(field), str):
+ val = d.field(field)[0:int(attr)]
+ else:
+ val = d.field(field)[attr]
+ else:
+ val = d[field]
+ except KeyError:
+ continue
+
+ ln = ln.replace(m.group(0), '{{{}}}'.format(fmt).format(val))
+ output.append(ln)
+ return output
+
+def do_scale(data, **args):
+ for d in data:
+ d[args['to']] = d[args['from']] * args['factor']
+ return data
+
+#
+# Sub-command implementation
+#
+# These primarily rely on composable functions to do the actual work.
+#
+
+def do_chart_cmd(args):
+ data = TimeEntry.load(args.fname)
+ data = do_filter(data, **vars(args))
+ do_chart(data, **vars(args))
+
+def do_collate_cmd(args):
+ data = TimeEntry.load(args.fname)
+ data = do_filter(data, **vars(args))
+ data = do_collate(data, **vars(args))
+ json.dump(data, sys.stdout)
+
+def do_count_cmd(args):
+ data = TimeEntry.load(args.fname)
+ data = do_filter(data, **vars(args))
+ do_count(data, **vars(args))
+
+def do_csv_cmd(args):
+ entries = TimeEntry.load(args.fname)
+ json.dump(entries, sys.stdout)
+
+def add_filter_args(subparser, parser=None):
+ if not parser:
+ parser = subparser.add_parser('filter', help='Filter cards and worklogs')
+ parser.add_argument('fname', nargs='?')
+ parser.set_defaults(func=do_filter_cmd)
+
+ parser.add_argument('--category', help='Comma separated list of categories')
+ parser.add_argument('--people', help='Comma separated list of people')
+ parser.add_argument('--project', help='Comma separated list of projects')
+ parser.add_argument('--task', help='Comma separated list of tasks')
+ parser.add_argument('--since', help='Only select entries dated later than SINCE')
+ parser.add_argument('--sort-by', help='Provide a key to sort by')
+ parser.add_argument('--reverse', action='store_true', help='Reverse sort order (requires --sort-by)')
+ parser.add_argument('--until', help='Only select entries dated before UNTIL')
+
+def do_filter_cmd(args):
+ data = TimeEntry.load(args.fname)
+ data = do_filter(data, **vars(args))
+ json.dump(data, sys.stdout)
+
+def do_format_cmd(args):
+ data = TimeEntry.load(args.fname)
+ data = do_filter(data, **vars(args))
+ formatted = do_format(data, **vars(args))
+ print('\n'.join(formatted))
+
+def do_interact_cmd(args):
+ '''Directly interaction with the JSON data'''
+ data = TimeEntry.load(args.fname)
+ datum = data[-1]
+ print('Locals include: data, datum\n')
+ interact()
+
+def do_scale_cmd(args):
+ data = TimeEntry.load(args.fname)
+ data = do_scale(data, **vars(args))
+ json.dump(data, sys.stdout)
+
+def do_selftest_cmd(args):
+ '''Very simple built-in-self-test'''
+ cmds = (
+ 'chart --card-tracker=test.png --since 2017-01-01 --until 2017-03-31',
+ 'chart --effort-by-component=test.png',
+ 'chart --effort-by-engineer=test.png',
+ 'chart --effort-by-member=test.png',
+ 'chart --count-by-member=test.png',
+ 'count',
+ 'count --worklog',
+ 'filter --assignee daniel > /dev/null',
+ 'filter --worklog-since 2017-01-01 --worklog-until 2017-03-31 --no-worklog > /dev/null',
+ 'format > /dev/null',
+ 'monthly > /dev/null',
+ 'summary > /dev/null',
+ 'weekly > /dev/null',
+ 'worklog > /dev/null',
+ )
+
+ def run(cmd):
+ print(cmd)
+ return subprocess.check_output(
+ cmd,
+ shell=True,
+ stderr=subprocess.STDOUT
+ ).decode('UTF-8')
+
+ try:
+ # Grab some data to run the tests on
+ if not args.json:
+ run('glance fetch --since 2018-01-01 > test.json')
+ args.json = 'test.json'
+ else:
+ args.keep = True
+
+ # Run the tests
+ for cmd in cmds:
+ print(run('cat {} | glance {}'.format(args.json, cmd)), end='')
+ except subprocess.CalledProcessError as e:
+ print(e.output.decode('UTF-8'))
+ print('FAILED WITH ERRORCODE: {}'.format(e.returncode))
+ finally:
+ if not args.keep:
+ run('rm -f test.json')
+ run('rm -f test.png')
+
+#
+# main - argument parsing and dispatch to sub-commands
+#
+
+def main(argv):
+ parser = argparse.ArgumentParser()
+ subparsers = parser.add_subparsers(dest='sub-command')
+ subparsers.required = True # Can't be set using named arguments (yet)
+
+ s = subparsers.add_parser('chart',
+ help='Draw charts from the data')
+ s.add_argument('--effort-by-category', metavar='PNGFILE')
+ s.add_argument('--effort-by-engineer', metavar='PNGFILE')
+ s.add_argument('--effort-by-project', metavar='PNGFILE')
+ s.add_argument('--effort-by-task', metavar='PNGFILE')
+ s.add_argument('--barchart', action='store_true')
+ s.add_argument('--piechart', action='store_true')
+ s.add_argument('--simplify', type=float,
+ help="Combine values less than N percent")
+ s.add_argument('fname', nargs='?')
+ s.set_defaults(func=do_chart_cmd)
+ add_filter_args(subparsers, s)
+
+ s = subparsers.add_parser('collate', help='Collate the output into categories')
+ s.add_argument('--field', help='Field to collate the input with')
+ s.add_argument('--percent', action='store_true',
+ help='Calulcate percentages for each numeric field')
+ s.add_argument('fname', nargs='?')
+ s.set_defaults(func=do_collate_cmd)
+ add_filter_args(subparsers, s)
+
+ s = subparsers.add_parser('count', help='Count the number of elements')
+ s.add_argument('fname', nargs='?')
+ s.add_argument('--field', help='Field to accumulate into the final count')
+ s.add_argument('--format', help='Template to format the count (default: {:.1f})')
+ s.set_defaults(func=do_count_cmd)
+ add_filter_args(subparsers, s)
+
+ s = subparsers.add_parser('csv', help='Convert a raw csv file from mavenlink into JSON format')
+ s.add_argument("fname", nargs="?", help="File to process")
+ s.set_defaults(func=do_csv_cmd)
+
+ add_filter_args(subparsers)
+
+ s = subparsers.add_parser('format', help='Summarize each card using a template')
+ s.add_argument('--template', default='{date}: {time in hours} - {project} ({person})')
+ s.add_argument('fname', nargs='?')
+ s.set_defaults(func=do_format_cmd)
+ add_filter_args(subparsers, s)
+
+ s = subparsers.add_parser('interact', help='Interact with report data via REPL')
+ s.add_argument('fname', nargs='?')
+ s.set_defaults(func=do_interact_cmd)
+
+ s = subparsers.add_parser('scale', help='Re-scale numeric columns')
+ s.add_argument('--from', required=True, help='Column to scale from')
+ s.add_argument('--to', required=True, help='Column to scale into')
+ s.add_argument('--factor', required=True, type=eval, help='Scaling factor')
+ s.add_argument('fname', nargs='?')
+ s.set_defaults(func=do_scale_cmd)
+
+ #s = subparsers.add_parser('selftest',
+ # help='Run some basic sanity tests')
+ #s.add_argument('--keep', action='store_true',
+ # help='Do not delete test.json after test is completed')
+ #s.add_argument('json', nargs='?')
+ #s.set_defaults(func=do_selftest_cmd)
+
+ #s = subparsers.add_parser('summary',
+ # help='Generate a quick summary')
+ #s.add_argument('--template',
+ # default='{created-10}: {key}: {summary} ({member})')
+ #s.add_argument('json', nargs='?')
+ #s.set_defaults(func=do_format)
+
+ args = parser.parse_args(argv[1:])
+ args.func(args)
+
+if __name__ == '__main__':
+ try:
+ sys.exit(main(sys.argv))
+ except KeyboardInterrupt:
+ sys.exit(1)
+ sys.exit(127)
+