diff options
author | cpettet <rush@wikimedia.org> | 2014-12-12 00:50:09 -0600 |
---|---|---|
committer | cpettet <rush@wikimedia.org> | 2014-12-12 00:51:58 -0600 |
commit | 51d54f4a314f978aaf713a91e0df764aa5d58f90 (patch) | |
tree | cebe8fcd6cdd16fbad67bc74b5a89897b37c73fc | |
parent | 9a30ffdaec03830966f408a4057da7ed276c6343 (diff) |
rt lots of misc fixes
* Had to reconsider some unicode compromises
* Add bz reference from RT
* Rry to sanely represent literal stuffs and quoted
* Add bugzilla reference with links
* Comments missing metadata
* Attachments with weird names / formats
-rwxr-xr-x | README | 6 | ||||
-rwxr-xr-x | rt_create.py | 201 | ||||
-rwxr-xr-x | rt_create_new.py | 485 | ||||
-rwxr-xr-x | rt_fetch_new.py | 162 | ||||
-rwxr-xr-x | wmfphablib/__init__.py | 1 | ||||
-rwxr-xr-x | wmfphablib/phabdb.py | 11 | ||||
-rwxr-xr-x | wmfphablib/rtlib.py | 4 | ||||
-rw-r--r-- | wmfphablib/util.py | 12 |
8 files changed, 819 insertions, 63 deletions
@@ -125,6 +125,10 @@ create table task_relations (id INT, priority INT, blocks TEXT(1000), modified I Issue examples: +Consistent Attachments: + +https://old-bugzilla.wikimedia.org/show_bug.cgi?id=72256 + BZ lots of attachments: https://bugzilla.wikimedia.org/show_bug.cgi?id=22881 has 11 @@ -139,3 +143,5 @@ https://bugzilla.wikimedia.org/show_bug.cgi?id=2 has 1 RT ISSUES: https://rt.wikimedia.org/Ticket/Display.html?id=8175 + +http://requesttracker.wikia.com/wiki/REST diff --git a/rt_create.py b/rt_create.py index e3c5447..8a202cd 100755 --- a/rt_create.py +++ b/rt_create.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +#from __future__ import unicode_literals import time import json import os @@ -52,8 +53,8 @@ def create(rtid): if current: import_priority, rtinfo, com, created, modified = current[0] else: - elog('%s not present for migration' % (rtid,)) - return False + log('%s not present for migration' % (rtid,)) + return 'missing' if not rtinfo: log("ignoring invalid data for issue %s" % (rtid,)) @@ -66,11 +67,49 @@ def create(rtid): if get_ref(rtid): log('reference ticket %s already exists' % (rtid,)) - #return True + return True def remove_sig(content): return re.split('--\s?\n', content)[0] + def uob(obj, encoding='utf-8'): + """ unicode or bust""" + if isinstance(obj, basestring): + if not isinstance(obj, unicode): + obj = unicode(obj, encoding) + return obj + + def sanitize_text(line): + if line.strip() and not line.lstrip().startswith('>'): + # in remarkup having '--' on a new line seems to bold last + # line so signatures really cause issues + if all(map(lambda c: c in '-', line.strip())): + return '%%%{0}%%%'.format(line.strip()) + elif line.strip() == '-------- Original Message --------': + return '%%%{0}%%%'.format(line.strip()) + elif line.strip() == '---------- Forwarded message ----------': + return '%%%{0}%%%'.format(unicode(line.strip())) + elif line.strip().startswith('#'): + return uob('%%%') + uob(line.strip()) + uob('%%%') + else: + return uob(line).strip() + elif line.strip().startswith('>'): + quoted_content = line.lstrip('>').strip() + if not quoted_content.lstrip('>').strip(): + return line.strip() + if all(map(lambda c: c in '-', quoted_content.lstrip('>').strip())): + return "> ~~" + else: + return uob(line.strip()) + else: + vlog("ignoring content line %s" % (line,)) + return None + + viewpolicy = phabdb.get_project_phid('WMF-NDA') + if not viewpolicy: + elog("View policy group not present: %s" % (viewpolicy,)) + return False + # Example: # id: ticket/8175/attachments\n # Attachments: 141490: (Unnamed) (multipart/mixed / 0b), @@ -82,7 +121,6 @@ def create(rtid): history = response.get(path="ticket/%s/history?format=l" % (rtid,)) - rtinfo = json.loads(rtinfo) comments = json.loads(com) vlog(rtid) @@ -99,7 +137,7 @@ def create(rtid): body, attached = attachsplit[0], attachsplit[1] else: body, attached = c, '0' - comment_dict[i]['text_body'] = body + comment_dict[i]['text_body'] = unicode(body) comment_dict[i]['attached'] = attached # Example: @@ -184,30 +222,56 @@ def create(rtid): # Private-l mailing list # Private-l@lists.wikimedia.org # https://lists.wikimedia.org/mailman/listinfo/private-l + if extract: + fdetails = extract.groups() if not extract and v.startswith('Attached Message Part'): continue - elif not extract: - raise Exception("no attachment extraction: %s %s (%s)" % (k, v, rtid)) - continue - else: - vlog(extract.groups()) - ainfo_ext[k] = extract.groups() + if not extract: + extract = re.match('(\S+)\s\((.*)\/(.*)\),.*', v) + if not extract: + elog("attachment CORRUPT or FAILED extraction: %s %s (%s)" % (k, v, rtid)) + continue + + fdetails = extract.group(1), '', extract.group(2), extract.group(3) + + if not fdetails: + elog("attachment CORRUPT or FAILED extraction: %s %s (%s)" % (k, v, rtid)) + continue + ainfo_ext[k] = fdetails + vlog(ainfo_ext[k]) + # deb + # cgi attachment_types = ['pdf', 'jpeg', + 'asc', 'tgz', + 'csr', 'jpg', 'png', 'xls', - 'xlsx', + 'xls', + 'csv', + 'docx', 'gif', 'html', 'htm', 'txt', + 'diff', 'log', 'zip', 'rtf', + 'tmpl', 'vcf', + 'pub', + 'sql', + 'odt', + 'p7s', + 'iso', + 'ods', + 'conf', + 'doc', + 'xff', 'eml'] #Uploading attachment @@ -216,11 +280,13 @@ def create(rtid): uploaded = {} for k, v in ainfo_ext.iteritems(): file_extension = v[1].lower() + # vendors have this weird habit of capitalizing extension names # make sure we can handle the extension type otherwise - if file_extension not in attachment_types: - log("%s %s %s" % (rtid, v, file_extension)) - raise Exception('unknown extension: %s (%s)' % (v, rtid)) + #if file_extension not in attachment_types: + # elog("Unknown Exception (%s) %s %s" % (rtid, v, file_extension)) + # #raise Exception('unknown extension: %s (%s)' % (v, rtid)) + full = "ticket/%s/attachments/%s/content" % (rtid, k) vcontent = response.get(path=full, headers={'Content-Type': v[2], 'Content-Length': v[3] }) #PDF's don't react well to stripping header -- fine without it @@ -230,12 +296,14 @@ def create(rtid): vcontent = vcontent.readlines() sanscontent = ''.join(vcontent[2:]) - #{u'mimeType': u'image/jpeg', u'authorPHID': u'PHID-USER-bn2kbod4i7geycrbicns', - #u'phid': u'PHID-FILE-ioj2mrujudkrekhl5pkl', u'name': u'0jp9B09.jpg', - #u'objectName': u'F25786', u'byteSize': u'120305', - #u'uri': u'http://fabapitest.wmflabs.org/file/data/t7j2qp7l5z4ou5qpbx2u/PHID-FILE-ioj2mrujudkrekhl5pkl/0jp9B09.jpg', - #u'dateCreated': u'1409345752', u'dateModified': u'1409345752', u'id': u'25786'} - upload = phabm.upload_file("%s.%s" % (v[0], file_extension), sanscontent) + if file_extension: + fname = "%s.%s" % (v[0], file_extension) + else: + fname = v[0] + + upload = phabm.upload_file(fname, + sanscontent, + viewpolicy) uploaded[k] = upload if rtinfo['Queue'] not in rtlib.enabled: @@ -243,6 +311,13 @@ def create(rtid): return True ptags = [] + + # In a practical sense ops-requets seemed to get tagged + # with straight Operations group in Phab so we backfill + # this for consistency. + if rtinfo['Queue'] == 'ops-requests': + ptags.append('operations') + pname = rtlib.project_translate(rtinfo['Queue']) ptags.append(pname) @@ -267,8 +342,13 @@ def create(rtid): # much like bugzilla comment 0 is the task description header = comment_dict[comment_dict.keys()[0]] del comment_dict[comment_dict.keys()[0]] - dtext = '\n'.join([l.strip() for l in header['body']['content'][0].splitlines()]) - dtext = rtlib.shadow_emails(dtext) + + dtext_san = [] + dtext_list = header['body']['content'][0].splitlines() + for t in dtext_list: + dtext_san.append(sanitize_text(rtlib.shadow_emails(t))) + dtext = '\n'.join(filter(None, dtext_san)) + #dtext = '\n'.join(filter(None, sanitize_text(rtlib.shadow_emails(dtext_list)))) full_description = "**Author:** `%s`\n\n**Description:**\n%s\n" % (rtinfo['Creator'].strip(), dtext) @@ -278,9 +358,18 @@ def create(rtid): for at in hafound: if at in upfiles: header_attachments.append('{F%s}' % uploaded[at]['id']) - if header_attachments: + if 'CF.{Bugzilla ticket}' in rtinfo or header_attachments: full_description += '\n__________________________\n\n' - full_description += '\n'.join(header_attachments) + if 'CF.{Bugzilla ticket}' in rtinfo and rtinfo['CF.{Bugzilla ticket}']: + obzurl = 'https://old-bugzilla.wikimedia.org/show_bug.cgi?id=' + obz = "[[ %s%s | %s ]]" % (obzurl, + rtinfo['CF.{Bugzilla ticket}'], + rtinfo['CF.{Bugzilla ticket}'],) + bzref = int(rtinfo['CF.{Bugzilla ticket}'].strip()) + newbzref = bzref + 2000 + full_description += "Bugzilla Ticket: %s => %s\n" % (obz, '{T%s}' % (newbzref,)) + if header_attachments: + full_description += '\n'.join(header_attachments) vlog("Ticket Info: %s" % (full_description,)) ticket = phab.maniphest.createtask(title=rtinfo['Subject'], @@ -288,8 +377,8 @@ def create(rtid): projectPHIDs=phids, ccPHIDs=[], priority=rtinfo['xpriority'], - auxiliary={"std:maniphest:external_reference":"rt%s" % (rtid,), - "std:maniphest:security_topic":"%s" % ('none')}) + auxiliary={"std:maniphest:external_reference":"rt%s" % (rtid,)}) + # XXX: perms botphid = phabdb.get_phid_by_username(config.phab_user) phabdb.set_task_title_transaction(ticket['phid'], @@ -298,13 +387,13 @@ def create(rtid): 'public') phabdb.set_task_ctime(ticket['phid'], rtlib.str_to_epoch(rtinfo['Created'])) + phabdb.set_task_policy(ticket['phid'], viewpolicy) - vlog(str(ordered_comments)) + #vlog(str(ordered_comments)) fmt_comments = {} for comment, contents in comment_dict.iteritems(): fmt_comment = {} dbody = contents['body'] - print dbody if dbody['content'] is None and dbody['creator'] is None: continue elif dbody['content'] is None: @@ -313,20 +402,15 @@ def create(rtid): mailsan = rtlib.shadow_emails(dbody['content'][0]) content_literal = [] for c in mailsan.splitlines(): - if c.strip() and not c.lstrip().startswith('>'): - # in remarkup having '--' on a new line seems to bold last - # line so signatures really cause issues - if c.strip() == '--': - content_literal.append('%%%{0}%%%'.format(c.strip())) - else: - content_literal.append('{0}'.format(c.strip())) - elif c.strip(): - content_literal.append(c.strip()) - else: - vlog("ignoring content line %s" % (c,)) - content = '\n'.join(content_literal) - - if 'This transaction appears to have no content' in content: + content_literal.append(sanitize_text(c)) + content = '\n'.join(filter(None, content_literal)) + + # In case of attachment but not much else + if not content and dbody['attached']: + content = True + + void_content = 'This transaction appears to have no content' + if not content == True and void_content in content: content = None auto_actions = ['Outgoing email about a comment recorded by RT_System', @@ -339,8 +423,13 @@ def create(rtid): preamble = '' cbody = '' if content: + if dbody['creator'] is None: + dbody['creator'] = '//creator field not set in source//' preamble += "`%s wrote:`\n\n" % (dbody['creator'].strip(),) - cbody += "%s" % (content.strip() or 'no content',) + + if content == True: + content = '' + cbody += "%s" % (content.strip() or '//no content//',) if dbody['nvalue'] or dbody['ovalue']: @@ -376,7 +465,16 @@ def create(rtid): fmt_comment['xattached'] = cbody_attachments phabm.task_comment(ticket['id'], preamble + cbody) ctransaction = phabdb.last_comment(ticket['phid']) - created = rtlib.str_to_epoch_comments(dbody['created']) + + try: + created = rtlib.str_to_epoch_comments(dbody['created']) + except (ValueError, TypeError): + # A handful of issues seems to show NULL creation times + # for now reason: see 1953 for example of NULL + # 3001 for example of None + elog("Could not determine comment time for %s" % (rtid,)) + dbody['created'] = rtlib.str_to_epoch(rtinfo['Created']) + phabdb.set_comment_time(ctransaction, created) fmt_comment['xctransaction'] = ctransaction @@ -442,17 +540,18 @@ def main(): pmig.close() #Serious business - if 'failed' in sys.argv: + if 'failed' in sys.argv or '-r' in sys.argv: for b in bugs: - notice("Removing rtid %s" % (b,)) - log(util.remove_issue_by_bugid(b, bzlib.prepend)) + util.notice("Removing rtid %s" % (b,)) + log(util.remove_issue_by_bugid(b, rtlib.prepend)) from multiprocessing import Pool pool = Pool(processes=int(config.bz_createmulti)) _ = pool.map(run_create, bugs) - complete = len(filter(bool, _)) - failed = len(_) - complete - print '%s completed %s, failed %s' % (sys.argv[0], complete, failed) + missing = len([i for i in _ if i == 'missing']) + complete = len(filter(bool, [i for i in _ if i not in ['missing']])) + failed = (len(_) - missing) - complete + print '%s completed %s, missing %s, failed %s' % (sys.argv[0], complete, missing, failed) if __name__ == '__main__': main() diff --git a/rt_create_new.py b/rt_create_new.py new file mode 100755 index 0000000..32c53b4 --- /dev/null +++ b/rt_create_new.py @@ -0,0 +1,485 @@ +#!/usr/bin/env python +#from __future__ import unicode_literals +import time +import json +import os +import re +import sys +import getpass +sys.path.append('/home/rush/python-rtkit/') +from phabricator import Phabricator +from wmfphablib import Phab as phabmacros +from wmfphablib import errorlog as elog +from wmfphablib import return_bug_list +from wmfphablib import phdb +from wmfphablib import phabdb +from wmfphablib import mailinglist_phid +from wmfphablib import set_project_icon +from wmfphablib import log +from wmfphablib import util +from wmfphablib import rtlib +from wmfphablib import vlog +from wmfphablib import config +from wmfphablib import rtlib +from wmfphablib import datetime_to_epoch +from wmfphablib import epoch_to_datetime +from wmfphablib import now +from rtkit import resource +from rtkit import authenticators +from rtkit import errors +from wmfphablib import ipriority + + +def create(rtid): + + phab = Phabricator(config.phab_user, + config.phab_cert, + config.phab_host) + + phabm = phabmacros('', '', '') + phabm.con = phab + + pmig = phdb(db=config.rtmigrate_db) + + response = resource.RTResource(config.rt_url, + config.rt_login, + config.rt_passwd, + authenticators.CookieAuthenticator) + + current = pmig.sql_x("SELECT priority, header, \ + comments, created, modified \ + FROM rt_meta WHERE id = %s", + (rtid,)) + if current: + import_priority, rtinfo, com, created, modified = current[0] + else: + elog('%s not present for migration' % (rtid,)) + return False + + if not rtinfo: + log("ignoring invalid data for issue %s" % (rtid,)) + return False + + def get_ref(id): + refexists = phabdb.reference_ticket('%s%s' % (rtlib.prepend, id)) + if refexists: + return refexists + + if get_ref(rtid): + log('reference ticket %s already exists' % (rtid,)) + return True + + viewpolicy = phabdb.get_project_phid('WMF-NDA') + if not viewpolicy: + elog("View policy group not present: %s" % (viewpolicy,)) + return False + + def remove_sig(content): + return re.split('--\s?\n', content)[0] + + # Example: + # id: ticket/8175/attachments\n + # Attachments: 141490: (Unnamed) (multipart/mixed / 0b), + # 141491: (Unnamed) (text/html / 23b), + # 141492: 0jp9B09.jpg (image/jpeg / 117.4k), + attachments = response.get(path="ticket/%s/attachments/" % (rtid,)) + if not attachments: + raise Exception("no attachment response: %s" % (rtid)) + + history = response.get(path="ticket/%s/history?format=l" % (rtid,)) + + + rtinfo = json.loads(rtinfo) + comments = json.loads(com) + vlog(rtid) + vlog(rtinfo) + + comment_dict = {} + for i, c in enumerate(comments): + cwork = {} + comment_dict[i] = cwork + if not 'Attachments:' in c: + pass + attachsplit = c.split('Attachments:') + if len(attachsplit) > 1: + body, attached = attachsplit[0], attachsplit[1] + else: + body, attached = c, '0' + comment_dict[i]['text_body'] = body + comment_dict[i]['attached'] = attached + + # Example: + # Ticket: 8175\nTimeTaken: 0\n + # Type: + # Create\nField: + # Data: \nDescription: Ticket created by cpettet\n\n + # Content: test ticket description\n\n\n + # Creator: cpettet\nCreated: 2014-08-21 21:21:38\n\n'} + params = {'id': 'id:(.*)', + 'ticket': 'Ticket:(.*)', + 'timetaken': 'TimeTaken:(.*)', + 'content': 'Content:(.*)', + 'creator': 'Creator:(.*)', + 'description': 'Description:(.*)', + 'created': 'Created:(.*)', + 'ovalue': 'OldValue:(.*)', + 'nvalue': 'NewValue:(.*)'} + + for k, v in comment_dict.iteritems(): + text_body = v['text_body'] + comment_dict[k]['body'] = {} + for paramkey, regex in params.iteritems(): + value = re.search(regex, text_body) + if value: + comment_dict[k]['body'][paramkey] = value.group(1).strip() + else: + comment_dict[k]['body'][paramkey] = None + + if 'Content' in text_body: + content = text_body.split('Content:')[1] + content = content.split('Creator:') + comment_dict[k]['body']['content'] = content + + creator = comment_dict[k]['body']['creator'] + if creator and '@' in creator: + comment_dict[k]['body']['creator'] = rtlib.sanitize_email(creator) + + #15475: untitled (18.7k) + comment_attachments= re.findall('(\d+):\s', v['attached']) + comment_dict[k]['body']['attached'] = comment_attachments + + # due to the nature of the RT api sometimes whitespacing becomes + # a noise comment + if not any(comment_dict[comment_dict.keys()[0]]['body'].values()): + vlog('dropping %s comment' % (str(comment_dict[comment_dict.keys()[0]],))) + del comment_dict[0] + + #attachments into a dict + def attach_to_kv(attachments_output): + attached = re.split('Attachments:', attachments_output, 1)[1] + ainfo = {} + for at in attached.strip().splitlines(): + if not at: + continue + k, v = re.split(':', at, 1) + ainfo[k.strip()] = v.strip() + return ainfo + + ainfo = attach_to_kv(attachments) + #lots of junk attachments from emailing comments and ticket creation + ainfo_f = {} + for k, v in ainfo.iteritems(): + if '(Unnamed)' not in v: + ainfo_f[k] = v + + #taking attachment text and convert to tuple (name, content type, size) + ainfo_ext = {} + comments = re.split("\d+\/\d+\s+\(id\/.\d+\/total\)", history) + attachregex = '(.*)\.(\S{3,4})\s\((.*)\s\/\s(.*)\)' + for k, v in ainfo_f.iteritems(): + # Handle general attachment case: + # NO: 686318802.html (application/octet-stream / 19.5k), + # YES: Summary_686318802.pdf (application/unknown / 215.3k), + print attachregex + print v + extract = re.search(attachregex, v) + print extract + # due to goofy email handling of signature/x-header/meta info + # it seems they sometimes + # become malformed attachments. Such as when a response into + # rt was directed to a mailinglist + # Example: + # ->Attached Message Part (text/plain / 158b) + # + # Private-l mailing list + # Private-l@lists.wikimedia.org + # https://lists.wikimedia.org/mailman/listinfo/private-l + if extract: + print "YES" + vlog(extract.groups()) + ainfo_ext[k] = extract.groups() + elif not extract and v.startswith('Attached Message Part'): + continue + else: + elog("no attachment CORRUPT or FAILED extraction: %s %s (%s)" % (k, v, rtid)) + print ainfo_ext + + attachment_types = ['pdf', + 'jpeg', + 'tgz', + 'jpg', + 'png', + 'xls', + 'xlsx', + 'gif', + 'html', + 'htm', + 'txt', + 'log', + 'zip', + 'rtf', + 'vcf', + 'eml'] + + #Uploading attachment + dl = [] + #('Quote Summary_686318802', 'pdf', 'application/unknown', '215.3k') + uploaded = {} + for k, v in ainfo_ext.iteritems(): + file_extension = v[1].lower() + # vendors have this weird habit of capitalizing extension names + # make sure we can handle the extension type otherwise + if file_extension not in attachment_types: + log("%s %s %s" % (rtid, v, file_extension)) + raise Exception('unknown extension: %s (%s)' % (v, rtid)) + full = "ticket/%s/attachments/%s/content" % (rtid, k) + + vcontent = response.get(path=full, + headers={'Content-Type': v[2], 'Content-Length': v[3] }) + try: + #PDF's don't react well to stripping header -- fine without it + if file_extension.strip() == 'pdf': + sanscontent = str(''.join(vcontent.readlines())) + else: + log("%s.%s" % (v[0], file_extension)) + vcontent = str(vcontent.readlines()) + sanscontent = ''.join(vcontent[2:]) + upload = phabm.upload_file("%s.%s" % (v[0], file_extension), + sanscontent, + viewpolicy) + uploaded[k] = upload + + except Exception as e: + print e + #elog("Attachment CORRUPT in source: %s" % (v[0] + file_extension,)) + + return + if rtinfo['Queue'] not in rtlib.enabled: + log("%s not in an enabled queue" % (rtid,)) + return True + + ptags = [] + if rtinfo['Queue'] == 'ops-requests': + ptags.append('operations') + + pname = rtlib.project_translate(rtinfo['Queue']) + ptags.append(pname) + + phids = [] + for p in ptags: + phids.append(phabm.ensure_project(p)) + + rtinfo['xpriority'] = rtlib.priority_convert(rtinfo['Priority']) + rtinfo['xstatus'] = rtlib.status_convert(rtinfo['Status']) + + import collections + # {'ovalue': u'open', + # 'description': u"Status changed from 'open' to 'resolved' by robh", + # 'nvalue': None, 'creator': u'robh', 'attached': [], + # 'timetaken': u'0', 'created': u'2011-07-01 02:47:24', + # 'content': [u' This transaction appears to have no content\n', u' + # robh\nCreated: 2011-07-01 02:47:24\n'], + # 'ticket': u'1000', 'id': u'23192'} + ordered_comments = collections.OrderedDict(sorted(comment_dict.items())) + upfiles = uploaded.keys() + + # much like bugzilla comment 0 is the task description + header = comment_dict[comment_dict.keys()[0]] + del comment_dict[comment_dict.keys()[0]] + dtext = '\n'.join([l.strip() for l in header['body']['content'][0].splitlines()]) + dtext = rtlib.shadow_emails(dtext) + full_description = "**Author:** `%s`\n\n**Description:**\n%s\n" % (rtinfo['Creator'].strip(), + dtext) + + + hafound = header['body']['attached'] + header_attachments = [] + for at in hafound: + if at in upfiles: + header_attachments.append('{F%s}' % uploaded[at]['id']) + if header_attachments: + full_description += '\n__________________________\n\n' + full_description += '\n'.join(header_attachments) + + vlog("Ticket Info: %s" % (full_description,)) + ticket = phab.maniphest.createtask(title=rtinfo['Subject'], + description=full_description, + projectPHIDs=phids, + ccPHIDs=[], + priority=rtinfo['xpriority'], + auxiliary={"std:maniphest:external_reference":"rt%s" % (rtid,)}) + + # XXX: perms + botphid = phabdb.get_phid_by_username(config.phab_user) + phabdb.set_task_title_transaction(ticket['phid'], + botphid, + 'public', + 'public') + + phabdb.set_task_ctime(ticket['phid'], rtlib.str_to_epoch(rtinfo['Created'])) + phabdb.set_task_policy(ticket['phid'], viewpolicy) + + vlog(str(ordered_comments)) + fmt_comments = {} + for comment, contents in comment_dict.iteritems(): + fmt_comment = {} + dbody = contents['body'] + if dbody['content'] is None and dbody['creator'] is None: + continue + elif dbody['content'] is None: + content = 'no content found' + else: + mailsan = rtlib.shadow_emails(dbody['content'][0]) + content_literal = [] + for c in mailsan.splitlines(): + if c.strip() and not c.lstrip().startswith('>'): + # in remarkup having '--' on a new line seems to bold last + # line so signatures really cause issues + if c.strip() == '--': + content_literal.append('%%%{0}%%%'.format(c.strip())) + else: + content_literal.append(unicode('{0}'.format(c.strip()))) + elif c.strip(): + content_literal.append(c.strip()) + else: + vlog("ignoring content line %s" % (c,)) + content = '\n'.join(content_literal) + + if 'This transaction appears to have no content' in content: + content = None + + auto_actions = ['Outgoing email about a comment recorded by RT_System', + 'Outgoing email recorded by RT_System'] + + if dbody['description'] in auto_actions: + vlog("ignoring comment: %s/%s" % (dbody['description'], content)) + continue + + preamble = unicode('') + cbody = unicode('') + if content: + preamble += unicode("`%s wrote:`\n\n" % (dbody['creator'].strip(),)) + cbody += unicode(content).strip() or 'no content' + if dbody['nvalue'] or dbody['ovalue']: + value_update = '' + value_update_text = rtlib.shadow_emails(dbody['description']) + value_update_text = value_update_text.replace('fsck.com-rt', 'https') + relations = ['Reference by ticket', + 'Dependency by', + 'Reference to ticket', + 'Dependency on', + 'Merged into ticket', + 'Membership in'] + + states = ['open', 'resolved', 'new', 'stalled'] + if any(map(lambda x: x in dbody['description'], relations)): + value_update = value_update_text + elif re.search('tags\s\S+\sadded', dbody['description']): + value_update = "%s added tag %s" % (dbody['creator'], dbody['nvalue']) + elif re.search('Taken\sby\s\S+', dbody['description']): + value_update = "Issue taken by **%s**" % (dbody['creator'],) + else: + value_update = "//%s//" % (value_update_text,) + cbody += value_update + + afound = contents['body']['attached'] + cbody_attachments = [] + for a in afound: + if a in upfiles: + cbody_attachments.append('{F%s}' % uploaded[a]['id']) + if cbody_attachments: + cbody += '\n__________________________\n\n' + cbody += '\n'.join(cbody_attachments) + fmt_comment['xattached'] = cbody_attachments + + phabm.task_comment(ticket['id'], preamble + cbody) + ctransaction = phabdb.last_comment(ticket['phid']) + created = rtlib.str_to_epoch_comments(dbody['created']) + phabdb.set_comment_time(ctransaction, + created) + fmt_comment['xctransaction'] = ctransaction + fmt_comment['preamble'] = preamble + fmt_comment['content'] = cbody + fmt_comment['created'] = created + # XXX TRX both ways? + #fmt_comment['creator'] = dbody['creator']user_lookup(name) + fmt_comments[created] = fmt_comment + + if rtinfo['Status'].lower() != 'open': + log('setting %s to status %s' % (rtid, rtinfo['xstatus'].lower())) + phabdb.set_issue_status(ticket['phid'], rtinfo['xstatus'].lower()) + + log("Created task: T%s (%s)" % (ticket['id'], ticket['phid'])) + phabdb.set_task_mtime(ticket['phid'], rtlib.str_to_epoch(rtinfo['LastUpdated'])) + xcomments = json.dumps(fmt_comments) + pmig.sql_x("UPDATE rt_meta SET xcomments=%s WHERE id = %s", (xcomments, rtid)) + pmig.sql_x("UPDATE rt_meta SET priority=%s, modified=%s WHERE id = %s", + (ipriority['creation_success'], now(), rtid)) + pmig.close() + return True + + +def run_create(rtid, tries=1): + if tries == 0: + pmig = phabdb.phdb(db=config.rtmigrate_db) + import_priority = pmig.sql_x("SELECT priority \ + FROM rt_meta \ + WHERE id = %s", \ + (rtid,)) + if import_priority: + pmig.sql_x("UPDATE rt_meta \ + SET priority=%s, modified=%s \ + WHERE id = %s", + (ipriority['creation_failed'], + now(), + rtid)) + else: + elog("%s does not seem to exist" % (rtid)) + elog('failed to create %s' % (rtid,)) + pmig.close() + return False + try: + return create(rtid) + except Exception as e: + import traceback + tries -= 1 + time.sleep(5) + traceback.print_exc(file=sys.stdout) + elog('failed to grab %s (%s)' % (rtid, e)) + return run_create(rtid, tries=tries) + +def main(): + + if not util.can_edit_ref: + elog('%s reference field not editable on this install' % (rtid,)) + sys.exit(1) + + if 'failed' in sys.argv: + priority = ipriority['creation_failed'] + elif 'success' in sys.argv: + priority = ipriority['creation_success'] + else: + priority = None + + vlog("Grabbing for priority: %s" % (priority,)) + pmig = phdb(db=config.rtmigrate_db) + bugs = return_bug_list(dbcon=pmig, + priority=priority, + table='rt_meta') + pmig.close() + + #Serious business + if 'failed' in sys.argv or '-r' in sys.argv: + for b in bugs: + util.notice("Removing rtid %s" % (b,)) + log(util.remove_issue_by_bugid(b, rtlib.prepend)) + + from multiprocessing import Pool + pool = Pool(processes=int(config.bz_createmulti)) + _ = pool.map(run_create, bugs) + complete = len(filter(bool, _)) + failed = len(_) - complete + print '%s completed %s, failed %s' % (sys.argv[0], complete, failed) + +if __name__ == '__main__': + main() diff --git a/rt_fetch_new.py b/rt_fetch_new.py new file mode 100755 index 0000000..6614b97 --- /dev/null +++ b/rt_fetch_new.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python +import time +import os +import re +import sys +import getpass +import ConfigParser +import json +sys.path.append('/home/rush/python-rtkit/') +from wmfphablib import phabdb +from wmfphablib import rtlib +from wmfphablib import log +from wmfphablib import vlog +from wmfphablib import errorlog as elog +from wmfphablib import return_bug_list +from rtkit import resource +from rtkit import authenticators +from rtkit import errors +from wmfphablib import ipriority +from wmfphablib import now +from wmfphablib import config + + +def fetch(tid): + + response = resource.RTResource(config.rt_url, + config.rt_login, + config.rt_passwd, + authenticators.CookieAuthenticator) + + log("fetching issue %s" % (tid,)) + tinfo = response.get(path="ticket/%s" % (tid,)) + history = response.get(path="ticket/%s/history?format=l" % (tid,)) + links = response.get(path="ticket/%s/links/show" % (tid,)) + vlog(tinfo) + + if re.search('\#\sTicket\s\d+\sdoes\snot\sexist.$', tinfo.strip()): + log("Skipped as source missing for %s" % (tid,)) + return 'missing' + + # some private todo's and such + if 'You are not allowed to display' in tinfo: + log("Skipped as access denied for %s" % (tid,)) + return 'denied' + + #breaking detailed history into posts + #23/23 (id/114376/total) + comments = re.split("\d+\/\d+\s+\(id\/.\d+\/total\)", history) + comments = [c.rstrip('#').rstrip('--') for c in comments] + + # we get back freeform text and create a dict + dtinfo = {} + link_dict = rtlib.links_to_dict(links) + dtinfo['links'] = link_dict + for cv in tinfo.strip().splitlines(): + if not cv: + continue + cv_kv = re.split(':', cv, 1) + if len(cv_kv) > 1: + k = cv_kv[0] + v = cv_kv[1] + dtinfo[k.strip()] = v.strip() + + vlog("Enabled queues: %s" % (str(rtlib.enabled))) + if dtinfo['Queue'] not in rtlib.enabled: + log("Skipped as disabled queue for %s (%s)" % (str(tid), dtinfo['Queue'])) + return 'disabled' + + com = json.dumps(comments) + tinfo = json.dumps(dtinfo) + + pmig = phabdb.phdb(db=config.rtmigrate_db, + user=config.rtmigrate_user, + passwd=config.rtmigrate_passwd) + + + creation_priority = ipriority['fetch_success'] + current = pmig.sql_x("SELECT * from rt_meta where id = %s", tid) + if current: + update_values = (creation_priority, tinfo, com, now(), now()) + pmig.sql_x("UPDATE rt_meta SET priority=%s, \ + header=%s, \ + comments=%s, \ + modified=%s \ + WHERE id = %s", + update_values) + vlog('update: ' + str(update_values)) + + else: + insert_values = (tid, creation_priority, tinfo, com, now(), now()) + + pmig.sql_x("INSERT INTO rt_meta \ + (id, priority, header, comments, created, modified) \ + VALUES (%s, %s, %s, %s, %s, %s)", + insert_values) + pmig.close() + return True + +def run_fetch(tid, tries=1): + if tries == 0: + pmig = phabdb.phdb(db=config.rtmigrate_db, + user=config.rtmigrate_user, + passwd=config.rtmigrate_passwd) + insert_values = (tid, ipriority['fetch_failed'], '', '', now(), now()) + + pmig.sql_x("INSERT INTO rt_meta \ + (id, priority, header, comments, created, modified) \ + VALUES (%s, %s, %s, %s, %s, %s)", + insert_values) + pmig.close() + elog('failed to grab %s' % (tid,)) + return False + try: + return fetch(tid) + except Exception as e: + import traceback + tries -= 1 + time.sleep(5) + traceback.print_exc(file=sys.stdout) + elog('failed to grab %s (%s)' % (tid, e)) + return run_fetch(tid, tries=tries) + +def main(): + + pmig = phabdb.phdb(db=config.rtmigrate_db, + user=config.rtmigrate_user, + passwd=config.rtmigrate_passwd) + + if 'failed' in sys.argv: + priority = ipriority['fetch_failed'] + else: + priority = None + + bugs = return_bug_list(dbcon=pmig, + priority=priority, + table='rt_meta') + pmig.close() + + from multiprocessing import Pool + pool = Pool(processes=int(config.bz_fetchmulti)) + _ = pool.map(run_fetch, bugs) + vlog(_) + denied = len([i for i in _ if i == 'denied']) + disabled = len([i for i in _ if i == 'disabled']) + missing = len([i for i in _ if i == 'missing']) + complete = len(filter(bool, [i for i in _ if i not in ['denied', 'disabled', 'missing']])) + known_bad = denied + disabled + missing + failed = (len(_) - known_bad) - complete + print '-----------------------------\n \ + %s Total %s\n \ + known bad %s (denied %s, disabled %s, missing %s)\n\n \ + completed %s, failed %s' % (sys.argv[0], + len(bugs), + known_bad, + denied, + disabled, + missing, + complete, + failed) + +if __name__ == '__main__': + main() diff --git a/wmfphablib/__init__.py b/wmfphablib/__init__.py index a83f8d7..eed8ec9 100755 --- a/wmfphablib/__init__.py +++ b/wmfphablib/__init__.py @@ -29,6 +29,7 @@ def tflatten(t_of_tuples): ipriority = {'creation_failed': 6, 'creation_success': 7, 'fetch_failed': 5, + 'fetch_success': 4, 'na': 0, 'denied': 2, 'missing': 3, diff --git a/wmfphablib/phabdb.py b/wmfphablib/phabdb.py index e0bf645..33d00cf 100755 --- a/wmfphablib/phabdb.py +++ b/wmfphablib/phabdb.py @@ -73,16 +73,19 @@ def is_bz_security_issue(id): else: return False -def get_issues_by_priority(dbcon, priority): +def get_issues_by_priority(dbcon, priority, table): """ get failed creations :param dbcon: db connector :param priority: int + :param table: str :returns: list """ + print table + print priority _ = dbcon.sql_x("SELECT id \ - from bugzilla_meta \ - where priority=%s", - (priority,), + from %s \ + where priority=%s" % (table, priority), + (), limit=None) if _ is None: return diff --git a/wmfphablib/rtlib.py b/wmfphablib/rtlib.py index c4f3001..225ec7d 100755 --- a/wmfphablib/rtlib.py +++ b/wmfphablib/rtlib.py @@ -82,8 +82,10 @@ def priority_convert(priority): def status_convert(status): statuses = { 'resolved': 'resolved', 'new': 'open', + 'rejected': 'declined', 'open': 'open', - 'stalled': 'needsinfo'} + 'stalled': 'stalled', + 'deleted': 'invalid'} return statuses[status.lower()] def links_to_dict(link_text): diff --git a/wmfphablib/util.py b/wmfphablib/util.py index 9c4369b..0100650 100644 --- a/wmfphablib/util.py +++ b/wmfphablib/util.py @@ -145,20 +145,20 @@ def remove_issue_by_bugid(bugid, ref): out += str(phabdb.reference_ticket("%s%s" % (ref, bugid))) return out -def return_bug_list(dbcon=None, priority=None): +def return_bug_list(dbcon=None, priority=None, table='bugzilla_meta'): if sys.stdin.isatty(): bugs = sys.argv[1:] else: bugs = sys.stdin.read().strip('\n').strip().split() - #if 'failed' in ''.join(sys.argv): if priority: if dbcon == None: print "cant find dbcon for priority buglist" return [] - bugs = phabdb.get_issues_by_priority(dbcon, priority) - #bugs = phabdb.get_failed_creations(dbcon) + bugs = phabdb.get_issues_by_priority(dbcon, + priority, + table=table) elif '-' in bugs[0]: start, stop = bugs[0].split('-') @@ -175,12 +175,10 @@ def return_bug_list(dbcon=None, priority=None): if not isinstance(bugs, list): print "Bug list not built" - return + return [] #exclude known bad bugs = [b for b in bugs if b not in bzlib.missing] log("Bugs count: %d" % (len(bugs))) - if bugs is None: - return [] return bugs |