aboutsummaryrefslogtreecommitdiff
path: root/rt_create.py
diff options
context:
space:
mode:
authorcpettet <rush@wikimedia.org>2014-12-12 00:50:09 -0600
committercpettet <rush@wikimedia.org>2014-12-12 00:51:58 -0600
commit51d54f4a314f978aaf713a91e0df764aa5d58f90 (patch)
treecebe8fcd6cdd16fbad67bc74b5a89897b37c73fc /rt_create.py
parent9a30ffdaec03830966f408a4057da7ed276c6343 (diff)
rt lots of misc fixes
* Had to reconsider some unicode compromises * Add bz reference from RT * Rry to sanely represent literal stuffs and quoted * Add bugzilla reference with links * Comments missing metadata * Attachments with weird names / formats
Diffstat (limited to 'rt_create.py')
-rwxr-xr-xrt_create.py201
1 files changed, 150 insertions, 51 deletions
diff --git a/rt_create.py b/rt_create.py
index e3c5447..8a202cd 100755
--- a/rt_create.py
+++ b/rt_create.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+#from __future__ import unicode_literals
import time
import json
import os
@@ -52,8 +53,8 @@ def create(rtid):
if current:
import_priority, rtinfo, com, created, modified = current[0]
else:
- elog('%s not present for migration' % (rtid,))
- return False
+ log('%s not present for migration' % (rtid,))
+ return 'missing'
if not rtinfo:
log("ignoring invalid data for issue %s" % (rtid,))
@@ -66,11 +67,49 @@ def create(rtid):
if get_ref(rtid):
log('reference ticket %s already exists' % (rtid,))
- #return True
+ return True
def remove_sig(content):
return re.split('--\s?\n', content)[0]
+ def uob(obj, encoding='utf-8'):
+ """ unicode or bust"""
+ if isinstance(obj, basestring):
+ if not isinstance(obj, unicode):
+ obj = unicode(obj, encoding)
+ return obj
+
+ def sanitize_text(line):
+ if line.strip() and not line.lstrip().startswith('>'):
+ # in remarkup having '--' on a new line seems to bold last
+ # line so signatures really cause issues
+ if all(map(lambda c: c in '-', line.strip())):
+ return '%%%{0}%%%'.format(line.strip())
+ elif line.strip() == '-------- Original Message --------':
+ return '%%%{0}%%%'.format(line.strip())
+ elif line.strip() == '---------- Forwarded message ----------':
+ return '%%%{0}%%%'.format(unicode(line.strip()))
+ elif line.strip().startswith('#'):
+ return uob('%%%') + uob(line.strip()) + uob('%%%')
+ else:
+ return uob(line).strip()
+ elif line.strip().startswith('>'):
+ quoted_content = line.lstrip('>').strip()
+ if not quoted_content.lstrip('>').strip():
+ return line.strip()
+ if all(map(lambda c: c in '-', quoted_content.lstrip('>').strip())):
+ return "> ~~"
+ else:
+ return uob(line.strip())
+ else:
+ vlog("ignoring content line %s" % (line,))
+ return None
+
+ viewpolicy = phabdb.get_project_phid('WMF-NDA')
+ if not viewpolicy:
+ elog("View policy group not present: %s" % (viewpolicy,))
+ return False
+
# Example:
# id: ticket/8175/attachments\n
# Attachments: 141490: (Unnamed) (multipart/mixed / 0b),
@@ -82,7 +121,6 @@ def create(rtid):
history = response.get(path="ticket/%s/history?format=l" % (rtid,))
-
rtinfo = json.loads(rtinfo)
comments = json.loads(com)
vlog(rtid)
@@ -99,7 +137,7 @@ def create(rtid):
body, attached = attachsplit[0], attachsplit[1]
else:
body, attached = c, '0'
- comment_dict[i]['text_body'] = body
+ comment_dict[i]['text_body'] = unicode(body)
comment_dict[i]['attached'] = attached
# Example:
@@ -184,30 +222,56 @@ def create(rtid):
# Private-l mailing list
# Private-l@lists.wikimedia.org
# https://lists.wikimedia.org/mailman/listinfo/private-l
+ if extract:
+ fdetails = extract.groups()
if not extract and v.startswith('Attached Message Part'):
continue
- elif not extract:
- raise Exception("no attachment extraction: %s %s (%s)" % (k, v, rtid))
- continue
- else:
- vlog(extract.groups())
- ainfo_ext[k] = extract.groups()
+ if not extract:
+ extract = re.match('(\S+)\s\((.*)\/(.*)\),.*', v)
+ if not extract:
+ elog("attachment CORRUPT or FAILED extraction: %s %s (%s)" % (k, v, rtid))
+ continue
+
+ fdetails = extract.group(1), '', extract.group(2), extract.group(3)
+
+ if not fdetails:
+ elog("attachment CORRUPT or FAILED extraction: %s %s (%s)" % (k, v, rtid))
+ continue
+ ainfo_ext[k] = fdetails
+ vlog(ainfo_ext[k])
+ # deb
+ # cgi
attachment_types = ['pdf',
'jpeg',
+ 'asc',
'tgz',
+ 'csr',
'jpg',
'png',
'xls',
- 'xlsx',
+ 'xls',
+ 'csv',
+ 'docx',
'gif',
'html',
'htm',
'txt',
+ 'diff',
'log',
'zip',
'rtf',
+ 'tmpl',
'vcf',
+ 'pub',
+ 'sql',
+ 'odt',
+ 'p7s',
+ 'iso',
+ 'ods',
+ 'conf',
+ 'doc',
+ 'xff',
'eml']
#Uploading attachment
@@ -216,11 +280,13 @@ def create(rtid):
uploaded = {}
for k, v in ainfo_ext.iteritems():
file_extension = v[1].lower()
+
# vendors have this weird habit of capitalizing extension names
# make sure we can handle the extension type otherwise
- if file_extension not in attachment_types:
- log("%s %s %s" % (rtid, v, file_extension))
- raise Exception('unknown extension: %s (%s)' % (v, rtid))
+ #if file_extension not in attachment_types:
+ # elog("Unknown Exception (%s) %s %s" % (rtid, v, file_extension))
+ # #raise Exception('unknown extension: %s (%s)' % (v, rtid))
+
full = "ticket/%s/attachments/%s/content" % (rtid, k)
vcontent = response.get(path=full, headers={'Content-Type': v[2], 'Content-Length': v[3] })
#PDF's don't react well to stripping header -- fine without it
@@ -230,12 +296,14 @@ def create(rtid):
vcontent = vcontent.readlines()
sanscontent = ''.join(vcontent[2:])
- #{u'mimeType': u'image/jpeg', u'authorPHID': u'PHID-USER-bn2kbod4i7geycrbicns',
- #u'phid': u'PHID-FILE-ioj2mrujudkrekhl5pkl', u'name': u'0jp9B09.jpg',
- #u'objectName': u'F25786', u'byteSize': u'120305',
- #u'uri': u'http://fabapitest.wmflabs.org/file/data/t7j2qp7l5z4ou5qpbx2u/PHID-FILE-ioj2mrujudkrekhl5pkl/0jp9B09.jpg',
- #u'dateCreated': u'1409345752', u'dateModified': u'1409345752', u'id': u'25786'}
- upload = phabm.upload_file("%s.%s" % (v[0], file_extension), sanscontent)
+ if file_extension:
+ fname = "%s.%s" % (v[0], file_extension)
+ else:
+ fname = v[0]
+
+ upload = phabm.upload_file(fname,
+ sanscontent,
+ viewpolicy)
uploaded[k] = upload
if rtinfo['Queue'] not in rtlib.enabled:
@@ -243,6 +311,13 @@ def create(rtid):
return True
ptags = []
+
+ # In a practical sense ops-requets seemed to get tagged
+ # with straight Operations group in Phab so we backfill
+ # this for consistency.
+ if rtinfo['Queue'] == 'ops-requests':
+ ptags.append('operations')
+
pname = rtlib.project_translate(rtinfo['Queue'])
ptags.append(pname)
@@ -267,8 +342,13 @@ def create(rtid):
# much like bugzilla comment 0 is the task description
header = comment_dict[comment_dict.keys()[0]]
del comment_dict[comment_dict.keys()[0]]
- dtext = '\n'.join([l.strip() for l in header['body']['content'][0].splitlines()])
- dtext = rtlib.shadow_emails(dtext)
+
+ dtext_san = []
+ dtext_list = header['body']['content'][0].splitlines()
+ for t in dtext_list:
+ dtext_san.append(sanitize_text(rtlib.shadow_emails(t)))
+ dtext = '\n'.join(filter(None, dtext_san))
+ #dtext = '\n'.join(filter(None, sanitize_text(rtlib.shadow_emails(dtext_list))))
full_description = "**Author:** `%s`\n\n**Description:**\n%s\n" % (rtinfo['Creator'].strip(),
dtext)
@@ -278,9 +358,18 @@ def create(rtid):
for at in hafound:
if at in upfiles:
header_attachments.append('{F%s}' % uploaded[at]['id'])
- if header_attachments:
+ if 'CF.{Bugzilla ticket}' in rtinfo or header_attachments:
full_description += '\n__________________________\n\n'
- full_description += '\n'.join(header_attachments)
+ if 'CF.{Bugzilla ticket}' in rtinfo and rtinfo['CF.{Bugzilla ticket}']:
+ obzurl = 'https://old-bugzilla.wikimedia.org/show_bug.cgi?id='
+ obz = "[[ %s%s | %s ]]" % (obzurl,
+ rtinfo['CF.{Bugzilla ticket}'],
+ rtinfo['CF.{Bugzilla ticket}'],)
+ bzref = int(rtinfo['CF.{Bugzilla ticket}'].strip())
+ newbzref = bzref + 2000
+ full_description += "Bugzilla Ticket: %s => %s\n" % (obz, '{T%s}' % (newbzref,))
+ if header_attachments:
+ full_description += '\n'.join(header_attachments)
vlog("Ticket Info: %s" % (full_description,))
ticket = phab.maniphest.createtask(title=rtinfo['Subject'],
@@ -288,8 +377,8 @@ def create(rtid):
projectPHIDs=phids,
ccPHIDs=[],
priority=rtinfo['xpriority'],
- auxiliary={"std:maniphest:external_reference":"rt%s" % (rtid,),
- "std:maniphest:security_topic":"%s" % ('none')})
+ auxiliary={"std:maniphest:external_reference":"rt%s" % (rtid,)})
+
# XXX: perms
botphid = phabdb.get_phid_by_username(config.phab_user)
phabdb.set_task_title_transaction(ticket['phid'],
@@ -298,13 +387,13 @@ def create(rtid):
'public')
phabdb.set_task_ctime(ticket['phid'], rtlib.str_to_epoch(rtinfo['Created']))
+ phabdb.set_task_policy(ticket['phid'], viewpolicy)
- vlog(str(ordered_comments))
+ #vlog(str(ordered_comments))
fmt_comments = {}
for comment, contents in comment_dict.iteritems():
fmt_comment = {}
dbody = contents['body']
- print dbody
if dbody['content'] is None and dbody['creator'] is None:
continue
elif dbody['content'] is None:
@@ -313,20 +402,15 @@ def create(rtid):
mailsan = rtlib.shadow_emails(dbody['content'][0])
content_literal = []
for c in mailsan.splitlines():
- if c.strip() and not c.lstrip().startswith('>'):
- # in remarkup having '--' on a new line seems to bold last
- # line so signatures really cause issues
- if c.strip() == '--':
- content_literal.append('%%%{0}%%%'.format(c.strip()))
- else:
- content_literal.append('{0}'.format(c.strip()))
- elif c.strip():
- content_literal.append(c.strip())
- else:
- vlog("ignoring content line %s" % (c,))
- content = '\n'.join(content_literal)
-
- if 'This transaction appears to have no content' in content:
+ content_literal.append(sanitize_text(c))
+ content = '\n'.join(filter(None, content_literal))
+
+ # In case of attachment but not much else
+ if not content and dbody['attached']:
+ content = True
+
+ void_content = 'This transaction appears to have no content'
+ if not content == True and void_content in content:
content = None
auto_actions = ['Outgoing email about a comment recorded by RT_System',
@@ -339,8 +423,13 @@ def create(rtid):
preamble = ''
cbody = ''
if content:
+ if dbody['creator'] is None:
+ dbody['creator'] = '//creator field not set in source//'
preamble += "`%s wrote:`\n\n" % (dbody['creator'].strip(),)
- cbody += "%s" % (content.strip() or 'no content',)
+
+ if content == True:
+ content = ''
+ cbody += "%s" % (content.strip() or '//no content//',)
if dbody['nvalue'] or dbody['ovalue']:
@@ -376,7 +465,16 @@ def create(rtid):
fmt_comment['xattached'] = cbody_attachments
phabm.task_comment(ticket['id'], preamble + cbody)
ctransaction = phabdb.last_comment(ticket['phid'])
- created = rtlib.str_to_epoch_comments(dbody['created'])
+
+ try:
+ created = rtlib.str_to_epoch_comments(dbody['created'])
+ except (ValueError, TypeError):
+ # A handful of issues seems to show NULL creation times
+ # for now reason: see 1953 for example of NULL
+ # 3001 for example of None
+ elog("Could not determine comment time for %s" % (rtid,))
+ dbody['created'] = rtlib.str_to_epoch(rtinfo['Created'])
+
phabdb.set_comment_time(ctransaction,
created)
fmt_comment['xctransaction'] = ctransaction
@@ -442,17 +540,18 @@ def main():
pmig.close()
#Serious business
- if 'failed' in sys.argv:
+ if 'failed' in sys.argv or '-r' in sys.argv:
for b in bugs:
- notice("Removing rtid %s" % (b,))
- log(util.remove_issue_by_bugid(b, bzlib.prepend))
+ util.notice("Removing rtid %s" % (b,))
+ log(util.remove_issue_by_bugid(b, rtlib.prepend))
from multiprocessing import Pool
pool = Pool(processes=int(config.bz_createmulti))
_ = pool.map(run_create, bugs)
- complete = len(filter(bool, _))
- failed = len(_) - complete
- print '%s completed %s, failed %s' % (sys.argv[0], complete, failed)
+ missing = len([i for i in _ if i == 'missing'])
+ complete = len(filter(bool, [i for i in _ if i not in ['missing']]))
+ failed = (len(_) - missing) - complete
+ print '%s completed %s, missing %s, failed %s' % (sys.argv[0], complete, missing, failed)
if __name__ == '__main__':
main()