aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcpettet <rush@wikimedia.org>2015-05-28 13:12:21 -0500
committercpettet <rush@wikimedia.org>2015-05-28 13:18:34 -0500
commit4c5b4eb59eea8cf30b941444b0162ff12f655913 (patch)
tree257272d66dd09fdac672f5a22a72c0d427001b8b
parent6a1ac6cdcd1f3859f893f77be64d7384c8d6aaba (diff)
Script to dump public task information
Dumps to a json file for reporting and analytics that several departments have requested. All security information is derived from the security on the task, though project view policy is also respected. This is very specific as several columns in tables contain sensitive or potentially sensitive information even on public tasks such as mailKeys or hidden history. Change-Id: I9e20d77a5642757c96f32f40914528b08104abbd
-rwxr-xr-xpublic_task_dump.py41
-rwxr-xr-xwmfphablib/phabdb.py110
2 files changed, 151 insertions, 0 deletions
diff --git a/public_task_dump.py b/public_task_dump.py
new file mode 100755
index 0000000..57b50fc
--- /dev/null
+++ b/public_task_dump.py
@@ -0,0 +1,41 @@
+import os
+import sys
+import json
+from wmfphablib import phabdb
+from wmfphablib import util
+
+# Some transaction types are unsafe to reveal as they
+# contain hidden information in their history and possible
+# unsafe secrets we have dealt with in the UI context
+transactions = ['projectcolumn',
+ 'priority',
+ 'status',
+ 'reassign']
+
+data = {}
+taskdata = {}
+for task in phabdb.get_taskbypolicy():
+ id = task[0]
+ taskdata[id] = {}
+
+ taskdata[id]['transactions'] = {}
+ for t in transactions:
+ taskdata[id]['transactions'][t] = phabdb.get_transactionbytype(task[1], t)
+
+ #('PHID-TASK-uegpsibvtzahh2n4efok', 21L, 'PHID-USER-7t36l5d3llsm5abqfx3u', 1426191381L, 0L, None)
+ # There are a few types of edge relationships, some of them we are not going to
+ # account for here as the current need is project based data. Thus if we see a relationship
+ # with a project and that project is public then include it.
+ edges = phabdb.get_edgebysrc(task[1])
+ edge_allowed = [edge for edge in edges \
+ if edge[2].startswith('PHID-PROJ') \
+ and phabdb.get_projectpolicy(edge[2]) == 'public']
+ taskdata[id]['edge'] = filter(bool, edge_allowed)
+
+data['task'] = taskdata
+data['project'] = {}
+data['project']['projects'] = phabdb.get_projectbypolicy(policy='public')
+data['project']['columns'] = phabdb.get_projectcolumns()
+
+with open('phabricator_public.dump', 'w') as f:
+ f.write(json.dumps(data))
diff --git a/wmfphablib/phabdb.py b/wmfphablib/phabdb.py
index 470e87f..94bbdc8 100755
--- a/wmfphablib/phabdb.py
+++ b/wmfphablib/phabdb.py
@@ -20,6 +20,116 @@ from config import bzmigrate_db
from config import bzmigrate_user
from config import bzmigrate_passwd
+def get_projectcolumns():
+ p = phdb(db='phabricator_project',
+ user=phuser_user,
+ passwd=phuser_passwd)
+
+ _ = p.sql_x("SELECT id, \
+ phid, \
+ name, \
+ status, \
+ sequence, \
+ projectPHID, \
+ dateCreated, \
+ dateModified, \
+ properties \
+ FROM project_column",
+ (), limit=None)
+ p.close()
+ return _
+
+
+def get_projectpolicy(projectPHID):
+ p = phdb(db='phabricator_project',
+ user=phuser_user,
+ passwd=phuser_passwd)
+
+ _ = p.sql_x("SELECT viewPolicy \
+ FROM project WHERE phid=%s",
+ (projectPHID), limit=None)
+ p.close()
+ if _ is not None and len(_[0]) > 0:
+ return _[0][0]
+
+def get_projectbypolicy(policy='public'):
+ p = phdb(db='phabricator_project',
+ user=phuser_user,
+ passwd=phuser_passwd)
+
+ _ = p.sql_x("SELECT id, \
+ name, \
+ phid, \
+ dateCreated, \
+ dateModified, \
+ icon, \
+ color \
+ FROM project WHERE viewPolicy=%s",
+ (policy), limit=None)
+ p.close()
+ return _
+
+def get_edgebysrc(src):
+ p = phdb(db='phabricator_maniphest',
+ user=phuser_user,
+ passwd=phuser_passwd)
+
+ _ = p.sql_x("SELECT src, \
+ type, \
+ dst, \
+ dateCreated, \
+ seq, \
+ dataID \
+ FROM edge WHERE src=%s",
+ (src), limit=None)
+ p.close()
+ return _
+
+def get_transactionbytype(objectPHID, type):
+ p = phdb(db='phabricator_maniphest',
+ user=phuser_user,
+ passwd=phuser_passwd)
+
+ _ = p.sql_x("SELECT id, \
+ phid, \
+ authorPHID, \
+ objectPHID, \
+ commentPHID, \
+ commentVersion, \
+ transactionType, \
+ oldValue, \
+ newValue, \
+ metadata, \
+ dateCreated, \
+ dateModified \
+ FROM maniphest_transaction WHERE objectPHID=%s AND transactionType=%s",
+ (objectPHID, type), limit=None)
+ p.close()
+ return _
+
+def get_taskbypolicy(policy='public'):
+ p = phdb(db='phabricator_maniphest',
+ user=phuser_user,
+ passwd=phuser_passwd)
+ _ = p.sql_x("SELECT id, \
+ phid, \
+ authorPHID, \
+ ownerPHID, \
+ attached, \
+ status, \
+ priority, \
+ title, \
+ description, \
+ dateCreated, \
+ dateModified, \
+ projectPHIDs, \
+ subpriority \
+ FROM maniphest_task WHERE viewPolicy=%s",
+
+ (policy), limit=None)
+ p.close()
+ return _
+
def get_user_relations_last_finish(dbcon):
""" get last finish time for update script