diff options
author | cpettet <rush@wikimedia.org> | 2015-05-28 13:12:21 -0500 |
---|---|---|
committer | cpettet <rush@wikimedia.org> | 2015-05-28 13:18:34 -0500 |
commit | 4c5b4eb59eea8cf30b941444b0162ff12f655913 (patch) | |
tree | 257272d66dd09fdac672f5a22a72c0d427001b8b | |
parent | 6a1ac6cdcd1f3859f893f77be64d7384c8d6aaba (diff) |
Script to dump public task information
Dumps to a json file for reporting and analytics that several
departments have requested. All security information is
derived from the security on the task, though project view policy
is also respected.
This is very specific as several columns in tables contain sensitive
or potentially sensitive information even on public tasks such as
mailKeys or hidden history.
Change-Id: I9e20d77a5642757c96f32f40914528b08104abbd
-rwxr-xr-x | public_task_dump.py | 41 | ||||
-rwxr-xr-x | wmfphablib/phabdb.py | 110 |
2 files changed, 151 insertions, 0 deletions
diff --git a/public_task_dump.py b/public_task_dump.py new file mode 100755 index 0000000..57b50fc --- /dev/null +++ b/public_task_dump.py @@ -0,0 +1,41 @@ +import os +import sys +import json +from wmfphablib import phabdb +from wmfphablib import util + +# Some transaction types are unsafe to reveal as they +# contain hidden information in their history and possible +# unsafe secrets we have dealt with in the UI context +transactions = ['projectcolumn', + 'priority', + 'status', + 'reassign'] + +data = {} +taskdata = {} +for task in phabdb.get_taskbypolicy(): + id = task[0] + taskdata[id] = {} + + taskdata[id]['transactions'] = {} + for t in transactions: + taskdata[id]['transactions'][t] = phabdb.get_transactionbytype(task[1], t) + + #('PHID-TASK-uegpsibvtzahh2n4efok', 21L, 'PHID-USER-7t36l5d3llsm5abqfx3u', 1426191381L, 0L, None) + # There are a few types of edge relationships, some of them we are not going to + # account for here as the current need is project based data. Thus if we see a relationship + # with a project and that project is public then include it. + edges = phabdb.get_edgebysrc(task[1]) + edge_allowed = [edge for edge in edges \ + if edge[2].startswith('PHID-PROJ') \ + and phabdb.get_projectpolicy(edge[2]) == 'public'] + taskdata[id]['edge'] = filter(bool, edge_allowed) + +data['task'] = taskdata +data['project'] = {} +data['project']['projects'] = phabdb.get_projectbypolicy(policy='public') +data['project']['columns'] = phabdb.get_projectcolumns() + +with open('phabricator_public.dump', 'w') as f: + f.write(json.dumps(data)) diff --git a/wmfphablib/phabdb.py b/wmfphablib/phabdb.py index 470e87f..94bbdc8 100755 --- a/wmfphablib/phabdb.py +++ b/wmfphablib/phabdb.py @@ -20,6 +20,116 @@ from config import bzmigrate_db from config import bzmigrate_user from config import bzmigrate_passwd +def get_projectcolumns(): + p = phdb(db='phabricator_project', + user=phuser_user, + passwd=phuser_passwd) + + _ = p.sql_x("SELECT id, \ + phid, \ + name, \ + status, \ + sequence, \ + projectPHID, \ + dateCreated, \ + dateModified, \ + properties \ + FROM project_column", + (), limit=None) + p.close() + return _ + + +def get_projectpolicy(projectPHID): + p = phdb(db='phabricator_project', + user=phuser_user, + passwd=phuser_passwd) + + _ = p.sql_x("SELECT viewPolicy \ + FROM project WHERE phid=%s", + (projectPHID), limit=None) + p.close() + if _ is not None and len(_[0]) > 0: + return _[0][0] + +def get_projectbypolicy(policy='public'): + p = phdb(db='phabricator_project', + user=phuser_user, + passwd=phuser_passwd) + + _ = p.sql_x("SELECT id, \ + name, \ + phid, \ + dateCreated, \ + dateModified, \ + icon, \ + color \ + FROM project WHERE viewPolicy=%s", + (policy), limit=None) + p.close() + return _ + +def get_edgebysrc(src): + p = phdb(db='phabricator_maniphest', + user=phuser_user, + passwd=phuser_passwd) + + _ = p.sql_x("SELECT src, \ + type, \ + dst, \ + dateCreated, \ + seq, \ + dataID \ + FROM edge WHERE src=%s", + (src), limit=None) + p.close() + return _ + +def get_transactionbytype(objectPHID, type): + p = phdb(db='phabricator_maniphest', + user=phuser_user, + passwd=phuser_passwd) + + _ = p.sql_x("SELECT id, \ + phid, \ + authorPHID, \ + objectPHID, \ + commentPHID, \ + commentVersion, \ + transactionType, \ + oldValue, \ + newValue, \ + metadata, \ + dateCreated, \ + dateModified \ + FROM maniphest_transaction WHERE objectPHID=%s AND transactionType=%s", + (objectPHID, type), limit=None) + p.close() + return _ + +def get_taskbypolicy(policy='public'): + p = phdb(db='phabricator_maniphest', + user=phuser_user, + passwd=phuser_passwd) + _ = p.sql_x("SELECT id, \ + phid, \ + authorPHID, \ + ownerPHID, \ + attached, \ + status, \ + priority, \ + title, \ + description, \ + dateCreated, \ + dateModified, \ + projectPHIDs, \ + subpriority \ + FROM maniphest_task WHERE viewPolicy=%s", + + (policy), limit=None) + p.close() + return _ + def get_user_relations_last_finish(dbcon): """ get last finish time for update script |