diff options
author | cpettet <rush@wikimedia.org> | 2014-12-12 00:50:09 -0600 |
---|---|---|
committer | cpettet <rush@wikimedia.org> | 2014-12-12 00:51:58 -0600 |
commit | 51d54f4a314f978aaf713a91e0df764aa5d58f90 (patch) | |
tree | cebe8fcd6cdd16fbad67bc74b5a89897b37c73fc /rt_create.py | |
parent | 9a30ffdaec03830966f408a4057da7ed276c6343 (diff) |
rt lots of misc fixes
* Had to reconsider some unicode compromises
* Add bz reference from RT
* Rry to sanely represent literal stuffs and quoted
* Add bugzilla reference with links
* Comments missing metadata
* Attachments with weird names / formats
Diffstat (limited to 'rt_create.py')
-rwxr-xr-x | rt_create.py | 201 |
1 files changed, 150 insertions, 51 deletions
diff --git a/rt_create.py b/rt_create.py index e3c5447..8a202cd 100755 --- a/rt_create.py +++ b/rt_create.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +#from __future__ import unicode_literals import time import json import os @@ -52,8 +53,8 @@ def create(rtid): if current: import_priority, rtinfo, com, created, modified = current[0] else: - elog('%s not present for migration' % (rtid,)) - return False + log('%s not present for migration' % (rtid,)) + return 'missing' if not rtinfo: log("ignoring invalid data for issue %s" % (rtid,)) @@ -66,11 +67,49 @@ def create(rtid): if get_ref(rtid): log('reference ticket %s already exists' % (rtid,)) - #return True + return True def remove_sig(content): return re.split('--\s?\n', content)[0] + def uob(obj, encoding='utf-8'): + """ unicode or bust""" + if isinstance(obj, basestring): + if not isinstance(obj, unicode): + obj = unicode(obj, encoding) + return obj + + def sanitize_text(line): + if line.strip() and not line.lstrip().startswith('>'): + # in remarkup having '--' on a new line seems to bold last + # line so signatures really cause issues + if all(map(lambda c: c in '-', line.strip())): + return '%%%{0}%%%'.format(line.strip()) + elif line.strip() == '-------- Original Message --------': + return '%%%{0}%%%'.format(line.strip()) + elif line.strip() == '---------- Forwarded message ----------': + return '%%%{0}%%%'.format(unicode(line.strip())) + elif line.strip().startswith('#'): + return uob('%%%') + uob(line.strip()) + uob('%%%') + else: + return uob(line).strip() + elif line.strip().startswith('>'): + quoted_content = line.lstrip('>').strip() + if not quoted_content.lstrip('>').strip(): + return line.strip() + if all(map(lambda c: c in '-', quoted_content.lstrip('>').strip())): + return "> ~~" + else: + return uob(line.strip()) + else: + vlog("ignoring content line %s" % (line,)) + return None + + viewpolicy = phabdb.get_project_phid('WMF-NDA') + if not viewpolicy: + elog("View policy group not present: %s" % (viewpolicy,)) + return False + # Example: # id: ticket/8175/attachments\n # Attachments: 141490: (Unnamed) (multipart/mixed / 0b), @@ -82,7 +121,6 @@ def create(rtid): history = response.get(path="ticket/%s/history?format=l" % (rtid,)) - rtinfo = json.loads(rtinfo) comments = json.loads(com) vlog(rtid) @@ -99,7 +137,7 @@ def create(rtid): body, attached = attachsplit[0], attachsplit[1] else: body, attached = c, '0' - comment_dict[i]['text_body'] = body + comment_dict[i]['text_body'] = unicode(body) comment_dict[i]['attached'] = attached # Example: @@ -184,30 +222,56 @@ def create(rtid): # Private-l mailing list # Private-l@lists.wikimedia.org # https://lists.wikimedia.org/mailman/listinfo/private-l + if extract: + fdetails = extract.groups() if not extract and v.startswith('Attached Message Part'): continue - elif not extract: - raise Exception("no attachment extraction: %s %s (%s)" % (k, v, rtid)) - continue - else: - vlog(extract.groups()) - ainfo_ext[k] = extract.groups() + if not extract: + extract = re.match('(\S+)\s\((.*)\/(.*)\),.*', v) + if not extract: + elog("attachment CORRUPT or FAILED extraction: %s %s (%s)" % (k, v, rtid)) + continue + + fdetails = extract.group(1), '', extract.group(2), extract.group(3) + + if not fdetails: + elog("attachment CORRUPT or FAILED extraction: %s %s (%s)" % (k, v, rtid)) + continue + ainfo_ext[k] = fdetails + vlog(ainfo_ext[k]) + # deb + # cgi attachment_types = ['pdf', 'jpeg', + 'asc', 'tgz', + 'csr', 'jpg', 'png', 'xls', - 'xlsx', + 'xls', + 'csv', + 'docx', 'gif', 'html', 'htm', 'txt', + 'diff', 'log', 'zip', 'rtf', + 'tmpl', 'vcf', + 'pub', + 'sql', + 'odt', + 'p7s', + 'iso', + 'ods', + 'conf', + 'doc', + 'xff', 'eml'] #Uploading attachment @@ -216,11 +280,13 @@ def create(rtid): uploaded = {} for k, v in ainfo_ext.iteritems(): file_extension = v[1].lower() + # vendors have this weird habit of capitalizing extension names # make sure we can handle the extension type otherwise - if file_extension not in attachment_types: - log("%s %s %s" % (rtid, v, file_extension)) - raise Exception('unknown extension: %s (%s)' % (v, rtid)) + #if file_extension not in attachment_types: + # elog("Unknown Exception (%s) %s %s" % (rtid, v, file_extension)) + # #raise Exception('unknown extension: %s (%s)' % (v, rtid)) + full = "ticket/%s/attachments/%s/content" % (rtid, k) vcontent = response.get(path=full, headers={'Content-Type': v[2], 'Content-Length': v[3] }) #PDF's don't react well to stripping header -- fine without it @@ -230,12 +296,14 @@ def create(rtid): vcontent = vcontent.readlines() sanscontent = ''.join(vcontent[2:]) - #{u'mimeType': u'image/jpeg', u'authorPHID': u'PHID-USER-bn2kbod4i7geycrbicns', - #u'phid': u'PHID-FILE-ioj2mrujudkrekhl5pkl', u'name': u'0jp9B09.jpg', - #u'objectName': u'F25786', u'byteSize': u'120305', - #u'uri': u'http://fabapitest.wmflabs.org/file/data/t7j2qp7l5z4ou5qpbx2u/PHID-FILE-ioj2mrujudkrekhl5pkl/0jp9B09.jpg', - #u'dateCreated': u'1409345752', u'dateModified': u'1409345752', u'id': u'25786'} - upload = phabm.upload_file("%s.%s" % (v[0], file_extension), sanscontent) + if file_extension: + fname = "%s.%s" % (v[0], file_extension) + else: + fname = v[0] + + upload = phabm.upload_file(fname, + sanscontent, + viewpolicy) uploaded[k] = upload if rtinfo['Queue'] not in rtlib.enabled: @@ -243,6 +311,13 @@ def create(rtid): return True ptags = [] + + # In a practical sense ops-requets seemed to get tagged + # with straight Operations group in Phab so we backfill + # this for consistency. + if rtinfo['Queue'] == 'ops-requests': + ptags.append('operations') + pname = rtlib.project_translate(rtinfo['Queue']) ptags.append(pname) @@ -267,8 +342,13 @@ def create(rtid): # much like bugzilla comment 0 is the task description header = comment_dict[comment_dict.keys()[0]] del comment_dict[comment_dict.keys()[0]] - dtext = '\n'.join([l.strip() for l in header['body']['content'][0].splitlines()]) - dtext = rtlib.shadow_emails(dtext) + + dtext_san = [] + dtext_list = header['body']['content'][0].splitlines() + for t in dtext_list: + dtext_san.append(sanitize_text(rtlib.shadow_emails(t))) + dtext = '\n'.join(filter(None, dtext_san)) + #dtext = '\n'.join(filter(None, sanitize_text(rtlib.shadow_emails(dtext_list)))) full_description = "**Author:** `%s`\n\n**Description:**\n%s\n" % (rtinfo['Creator'].strip(), dtext) @@ -278,9 +358,18 @@ def create(rtid): for at in hafound: if at in upfiles: header_attachments.append('{F%s}' % uploaded[at]['id']) - if header_attachments: + if 'CF.{Bugzilla ticket}' in rtinfo or header_attachments: full_description += '\n__________________________\n\n' - full_description += '\n'.join(header_attachments) + if 'CF.{Bugzilla ticket}' in rtinfo and rtinfo['CF.{Bugzilla ticket}']: + obzurl = 'https://old-bugzilla.wikimedia.org/show_bug.cgi?id=' + obz = "[[ %s%s | %s ]]" % (obzurl, + rtinfo['CF.{Bugzilla ticket}'], + rtinfo['CF.{Bugzilla ticket}'],) + bzref = int(rtinfo['CF.{Bugzilla ticket}'].strip()) + newbzref = bzref + 2000 + full_description += "Bugzilla Ticket: %s => %s\n" % (obz, '{T%s}' % (newbzref,)) + if header_attachments: + full_description += '\n'.join(header_attachments) vlog("Ticket Info: %s" % (full_description,)) ticket = phab.maniphest.createtask(title=rtinfo['Subject'], @@ -288,8 +377,8 @@ def create(rtid): projectPHIDs=phids, ccPHIDs=[], priority=rtinfo['xpriority'], - auxiliary={"std:maniphest:external_reference":"rt%s" % (rtid,), - "std:maniphest:security_topic":"%s" % ('none')}) + auxiliary={"std:maniphest:external_reference":"rt%s" % (rtid,)}) + # XXX: perms botphid = phabdb.get_phid_by_username(config.phab_user) phabdb.set_task_title_transaction(ticket['phid'], @@ -298,13 +387,13 @@ def create(rtid): 'public') phabdb.set_task_ctime(ticket['phid'], rtlib.str_to_epoch(rtinfo['Created'])) + phabdb.set_task_policy(ticket['phid'], viewpolicy) - vlog(str(ordered_comments)) + #vlog(str(ordered_comments)) fmt_comments = {} for comment, contents in comment_dict.iteritems(): fmt_comment = {} dbody = contents['body'] - print dbody if dbody['content'] is None and dbody['creator'] is None: continue elif dbody['content'] is None: @@ -313,20 +402,15 @@ def create(rtid): mailsan = rtlib.shadow_emails(dbody['content'][0]) content_literal = [] for c in mailsan.splitlines(): - if c.strip() and not c.lstrip().startswith('>'): - # in remarkup having '--' on a new line seems to bold last - # line so signatures really cause issues - if c.strip() == '--': - content_literal.append('%%%{0}%%%'.format(c.strip())) - else: - content_literal.append('{0}'.format(c.strip())) - elif c.strip(): - content_literal.append(c.strip()) - else: - vlog("ignoring content line %s" % (c,)) - content = '\n'.join(content_literal) - - if 'This transaction appears to have no content' in content: + content_literal.append(sanitize_text(c)) + content = '\n'.join(filter(None, content_literal)) + + # In case of attachment but not much else + if not content and dbody['attached']: + content = True + + void_content = 'This transaction appears to have no content' + if not content == True and void_content in content: content = None auto_actions = ['Outgoing email about a comment recorded by RT_System', @@ -339,8 +423,13 @@ def create(rtid): preamble = '' cbody = '' if content: + if dbody['creator'] is None: + dbody['creator'] = '//creator field not set in source//' preamble += "`%s wrote:`\n\n" % (dbody['creator'].strip(),) - cbody += "%s" % (content.strip() or 'no content',) + + if content == True: + content = '' + cbody += "%s" % (content.strip() or '//no content//',) if dbody['nvalue'] or dbody['ovalue']: @@ -376,7 +465,16 @@ def create(rtid): fmt_comment['xattached'] = cbody_attachments phabm.task_comment(ticket['id'], preamble + cbody) ctransaction = phabdb.last_comment(ticket['phid']) - created = rtlib.str_to_epoch_comments(dbody['created']) + + try: + created = rtlib.str_to_epoch_comments(dbody['created']) + except (ValueError, TypeError): + # A handful of issues seems to show NULL creation times + # for now reason: see 1953 for example of NULL + # 3001 for example of None + elog("Could not determine comment time for %s" % (rtid,)) + dbody['created'] = rtlib.str_to_epoch(rtinfo['Created']) + phabdb.set_comment_time(ctransaction, created) fmt_comment['xctransaction'] = ctransaction @@ -442,17 +540,18 @@ def main(): pmig.close() #Serious business - if 'failed' in sys.argv: + if 'failed' in sys.argv or '-r' in sys.argv: for b in bugs: - notice("Removing rtid %s" % (b,)) - log(util.remove_issue_by_bugid(b, bzlib.prepend)) + util.notice("Removing rtid %s" % (b,)) + log(util.remove_issue_by_bugid(b, rtlib.prepend)) from multiprocessing import Pool pool = Pool(processes=int(config.bz_createmulti)) _ = pool.map(run_create, bugs) - complete = len(filter(bool, _)) - failed = len(_) - complete - print '%s completed %s, failed %s' % (sys.argv[0], complete, failed) + missing = len([i for i in _ if i == 'missing']) + complete = len(filter(bool, [i for i in _ if i not in ['missing']])) + failed = (len(_) - missing) - complete + print '%s completed %s, missing %s, failed %s' % (sys.argv[0], complete, missing, failed) if __name__ == '__main__': main() |