rt lots of misc fixes

* Had to reconsider some unicode compromises * Add bz reference from RT * Rry to sanely represent literal stuffs and quoted * Add bugzilla reference with links * Comments missing metadata * Attachments with weird names / formats
author: cpettet <rush@wikimedia.org> 2014-12-12 00:50:09 -0600
committer: cpettet <rush@wikimedia.org> 2014-12-12 00:51:58 -0600
commit: 51d54f4a314f978aaf713a91e0df764aa5d58f90 (patch)
tree: cebe8fcd6cdd16fbad67bc74b5a89897b37c73fc
parent: 9a30ffdaec03830966f408a4057da7ed276c6343 (diff)
8 files changed, 819 insertions, 63 deletions
diff --git a/README b/README
index 01fa68c..628456c 100755
--- a/README
+++ b/README
@@ -125,6 +125,10 @@ create table task_relations (id INT, priority INT, blocks TEXT(1000), modified I
 
 Issue examples:
 
+Consistent Attachments:
+
+https://old-bugzilla.wikimedia.org/show_bug.cgi?id=72256
+
 BZ lots of attachments:
 
 https://bugzilla.wikimedia.org/show_bug.cgi?id=22881 has 11
@@ -139,3 +143,5 @@ https://bugzilla.wikimedia.org/show_bug.cgi?id=2 has 1
 RT ISSUES:
 
 https://rt.wikimedia.org/Ticket/Display.html?id=8175
+
+http://requesttracker.wikia.com/wiki/REST
diff --git a/rt_create.py b/rt_create.py
index e3c5447..8a202cd 100755
--- a/rt_create.py
+++ b/rt_create.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+#from __future__ import unicode_literals
 import time
 import json
 import os
@@ -52,8 +53,8 @@ def create(rtid):
     if current:
         import_priority, rtinfo, com, created, modified = current[0]
     else:
-        elog('%s not present for migration' % (rtid,))
-        return False
+        log('%s not present for migration' % (rtid,))
+        return 'missing'
 
     if not rtinfo:
         log("ignoring invalid data for issue %s" % (rtid,))
@@ -66,11 +67,49 @@ def create(rtid):
 
     if get_ref(rtid):
         log('reference ticket %s already exists' % (rtid,))
-        #return True
+        return True
 
     def remove_sig(content):
         return re.split('--\s?\n', content)[0]
 
+    def uob(obj, encoding='utf-8'):
+        """ unicode or bust"""
+        if isinstance(obj, basestring):
+            if not isinstance(obj, unicode):
+                obj = unicode(obj, encoding)
+        return obj
+
+    def sanitize_text(line):
+        if line.strip() and not line.lstrip().startswith('>'):
+            # in remarkup having '--' on a new line seems to bold last
+            # line so signatures really cause issues
+            if all(map(lambda c: c in '-', line.strip())):
+                return '%%%{0}%%%'.format(line.strip())
+            elif line.strip() == '-------- Original Message --------':
+                return '%%%{0}%%%'.format(line.strip())
+            elif line.strip() == '---------- Forwarded message ----------':
+                return '%%%{0}%%%'.format(unicode(line.strip()))
+            elif line.strip().startswith('#'):
+                return uob('%%%') + uob(line.strip()) + uob('%%%')
+            else:
+                return uob(line).strip()
+        elif line.strip().startswith('>'):
+            quoted_content = line.lstrip('>').strip()
+            if not quoted_content.lstrip('>').strip():
+                return line.strip()
+            if all(map(lambda c: c in '-', quoted_content.lstrip('>').strip())):
+                return "> ~~"
+            else:
+                return uob(line.strip())
+        else:
+            vlog("ignoring content line %s" % (line,))
+            return None
+
+    viewpolicy = phabdb.get_project_phid('WMF-NDA')
+    if not viewpolicy:
+        elog("View policy group not present: %s" % (viewpolicy,))
+        return False
+
     # Example:
     # id: ticket/8175/attachments\n
     # Attachments: 141490: (Unnamed) (multipart/mixed / 0b),
@@ -82,7 +121,6 @@ def create(rtid):
 
     history = response.get(path="ticket/%s/history?format=l" % (rtid,))
 
-
     rtinfo = json.loads(rtinfo)
     comments = json.loads(com)
     vlog(rtid)
@@ -99,7 +137,7 @@ def create(rtid):
             body, attached = attachsplit[0], attachsplit[1]
         else:
             body, attached = c, '0'
-        comment_dict[i]['text_body'] = body
+        comment_dict[i]['text_body'] = unicode(body)
         comment_dict[i]['attached'] = attached
 
     # Example:
@@ -184,30 +222,56 @@ def create(rtid):
         #    Private-l mailing list
         #    Private-l@lists.wikimedia.org
         #    https://lists.wikimedia.org/mailman/listinfo/private-l
+        if extract:
+            fdetails = extract.groups()
         if not extract and v.startswith('Attached Message Part'):
             continue
-        elif not extract:
-           raise Exception("no attachment extraction: %s %s (%s)" % (k, v, rtid))
-           continue
-        else:
-           vlog(extract.groups())
-           ainfo_ext[k] = extract.groups()
+        if not extract:
+            extract = re.match('(\S+)\s\((.*)\/(.*)\),.*', v)
+            if not extract:
+                elog("attachment CORRUPT or FAILED extraction: %s %s (%s)" % (k, v, rtid))
+                continue
+
+            fdetails = extract.group(1), '', extract.group(2), extract.group(3)
+
+        if not fdetails:
+            elog("attachment CORRUPT or FAILED extraction: %s %s (%s)" % (k, v, rtid))
+            continue
+        ainfo_ext[k] = fdetails
+        vlog(ainfo_ext[k])
 
+    # deb
+    # cgi
     attachment_types = ['pdf',
                         'jpeg',
+                        'asc',
                         'tgz',
+                        'csr',
                         'jpg',
                         'png',
                         'xls',
-                        'xlsx',
+                        'xls',
+                        'csv',
+                        'docx',
                         'gif',
                         'html',
                         'htm',
                         'txt',
+                        'diff',
                         'log',
                         'zip',
                         'rtf',
+                        'tmpl',
                         'vcf',
+                        'pub',
+                        'sql',
+                        'odt',
+                        'p7s',
+                        'iso',
+                        'ods',
+                        'conf',
+                        'doc',
+                        'xff',
                         'eml']
 
     #Uploading attachment
@@ -216,11 +280,13 @@ def create(rtid):
     uploaded = {}
     for k, v in ainfo_ext.iteritems():
         file_extension = v[1].lower()
+
         # vendors have this weird habit of capitalizing extension names
         # make sure we can handle the extension type otherwise
-        if file_extension not in attachment_types:
-            log("%s %s %s" % (rtid, v, file_extension))
-            raise Exception('unknown extension: %s (%s)' % (v, rtid))
+        #if file_extension not in attachment_types:
+        #    elog("Unknown Exception (%s) %s %s" % (rtid, v, file_extension))
+        #    #raise Exception('unknown extension: %s (%s)' % (v, rtid))
+
         full = "ticket/%s/attachments/%s/content" % (rtid, k)
         vcontent = response.get(path=full, headers={'Content-Type': v[2], 'Content-Length': v[3] })
         #PDF's don't react well to stripping header -- fine without it
@@ -230,12 +296,14 @@ def create(rtid):
             vcontent = vcontent.readlines()
             sanscontent = ''.join(vcontent[2:])
 
-        #{u'mimeType': u'image/jpeg', u'authorPHID': u'PHID-USER-bn2kbod4i7geycrbicns', 
-        #u'phid': u'PHID-FILE-ioj2mrujudkrekhl5pkl', u'name': u'0jp9B09.jpg',
-        #u'objectName': u'F25786', u'byteSize': u'120305',
-        #u'uri': u'http://fabapitest.wmflabs.org/file/data/t7j2qp7l5z4ou5qpbx2u/PHID-FILE-ioj2mrujudkrekhl5pkl/0jp9B09.jpg',
-        #u'dateCreated': u'1409345752', u'dateModified': u'1409345752', u'id': u'25786'}
-        upload = phabm.upload_file("%s.%s" % (v[0], file_extension), sanscontent)
+        if file_extension:
+            fname = "%s.%s" % (v[0], file_extension)
+        else:
+            fname = v[0]
+
+        upload = phabm.upload_file(fname,
+                                   sanscontent,
+                                   viewpolicy)
         uploaded[k] = upload
 
     if rtinfo['Queue'] not in rtlib.enabled:
@@ -243,6 +311,13 @@ def create(rtid):
         return True
 
     ptags = []
+
+    # In a practical sense ops-requets seemed to get tagged
+    # with straight Operations group in Phab so we backfill
+    # this for consistency.
+    if rtinfo['Queue'] == 'ops-requests':
+        ptags.append('operations')
+
     pname = rtlib.project_translate(rtinfo['Queue'])
     ptags.append(pname)
 
@@ -267,8 +342,13 @@ def create(rtid):
     # much like bugzilla comment 0 is the task description
     header = comment_dict[comment_dict.keys()[0]]
     del comment_dict[comment_dict.keys()[0]]
-    dtext = '\n'.join([l.strip() for l in header['body']['content'][0].splitlines()])
-    dtext = rtlib.shadow_emails(dtext)
+
+    dtext_san = []
+    dtext_list = header['body']['content'][0].splitlines()
+    for t in dtext_list:
+        dtext_san.append(sanitize_text(rtlib.shadow_emails(t)))
+    dtext = '\n'.join(filter(None, dtext_san))
+    #dtext = '\n'.join(filter(None, sanitize_text(rtlib.shadow_emails(dtext_list))))
     full_description = "**Author:** `%s`\n\n**Description:**\n%s\n" % (rtinfo['Creator'].strip(),
                                                                        dtext)
 
@@ -278,9 +358,18 @@ def create(rtid):
     for at in hafound:
         if at in upfiles:
             header_attachments.append('{F%s}' % uploaded[at]['id'])
-    if header_attachments:
+    if 'CF.{Bugzilla ticket}' in rtinfo or header_attachments: 
         full_description += '\n__________________________\n\n'
-        full_description += '\n'.join(header_attachments)
+        if 'CF.{Bugzilla ticket}' in rtinfo and rtinfo['CF.{Bugzilla ticket}']:
+            obzurl = 'https://old-bugzilla.wikimedia.org/show_bug.cgi?id='
+            obz = "[[ %s%s | %s ]]" % (obzurl,
+                                       rtinfo['CF.{Bugzilla ticket}'],
+                                       rtinfo['CF.{Bugzilla ticket}'],)
+            bzref = int(rtinfo['CF.{Bugzilla ticket}'].strip())
+            newbzref = bzref + 2000
+            full_description += "Bugzilla Ticket: %s => %s\n" % (obz, '{T%s}' % (newbzref,))
+        if header_attachments:
+            full_description += '\n'.join(header_attachments)
 
     vlog("Ticket Info: %s" % (full_description,))
     ticket =  phab.maniphest.createtask(title=rtinfo['Subject'],
@@ -288,8 +377,8 @@ def create(rtid):
                                         projectPHIDs=phids,
                                         ccPHIDs=[],
                                         priority=rtinfo['xpriority'],
-                                        auxiliary={"std:maniphest:external_reference":"rt%s" % (rtid,),
-                                                   "std:maniphest:security_topic":"%s" % ('none')})
+                                        auxiliary={"std:maniphest:external_reference":"rt%s" % (rtid,)})
+
     # XXX: perms
     botphid = phabdb.get_phid_by_username(config.phab_user)
     phabdb.set_task_title_transaction(ticket['phid'],
@@ -298,13 +387,13 @@ def create(rtid):
                                       'public')
 
     phabdb.set_task_ctime(ticket['phid'], rtlib.str_to_epoch(rtinfo['Created']))
+    phabdb.set_task_policy(ticket['phid'], viewpolicy)
 
-    vlog(str(ordered_comments))
+    #vlog(str(ordered_comments))
     fmt_comments = {}
     for comment, contents in comment_dict.iteritems():
         fmt_comment = {}
         dbody = contents['body']
-        print dbody
         if dbody['content'] is None and dbody['creator'] is None:
             continue
         elif dbody['content'] is None:
@@ -313,20 +402,15 @@ def create(rtid):
             mailsan = rtlib.shadow_emails(dbody['content'][0])
             content_literal = []
             for c in mailsan.splitlines():
-                if c.strip() and not c.lstrip().startswith('>'):
-                    # in remarkup having '--' on a new line seems to bold last
-                    # line so signatures really cause issues
-                    if c.strip() == '--':
-                        content_literal.append('%%%{0}%%%'.format(c.strip()))
-                    else:
-                        content_literal.append('{0}'.format(c.strip()))
-                elif c.strip():
-                    content_literal.append(c.strip())
-                else:
-                    vlog("ignoring content line %s" % (c,))
-            content = '\n'.join(content_literal)
-
-        if 'This transaction appears to have no content' in content:
+                content_literal.append(sanitize_text(c))
+            content = '\n'.join(filter(None, content_literal))
+
+            # In case of attachment but not much else
+            if not content and dbody['attached']:
+                content = True
+
+        void_content = 'This transaction appears to have no content'
+        if not content == True and void_content in content:
             content = None
 
         auto_actions = ['Outgoing email about a comment recorded by RT_System',
@@ -339,8 +423,13 @@ def create(rtid):
         preamble = ''
         cbody = ''
         if content:
+            if dbody['creator'] is None:
+                dbody['creator'] = '//creator field not set in source//'
             preamble += "`%s  wrote:`\n\n" % (dbody['creator'].strip(),)
-            cbody += "%s" % (content.strip() or 'no content',)
+
+            if content == True:
+                content = ''
+            cbody += "%s" % (content.strip() or '//no content//',)
 
         
         if dbody['nvalue'] or dbody['ovalue']:
@@ -376,7 +465,16 @@ def create(rtid):
             fmt_comment['xattached'] = cbody_attachments
         phabm.task_comment(ticket['id'], preamble + cbody)
         ctransaction = phabdb.last_comment(ticket['phid'])
-        created = rtlib.str_to_epoch_comments(dbody['created'])
+
+        try:    
+            created = rtlib.str_to_epoch_comments(dbody['created'])
+        except (ValueError, TypeError):
+            # A handful of issues seems to show NULL creation times
+            # for now reason: see 1953 for example of NULL
+            # 3001 for example of None
+            elog("Could not determine comment time for %s" % (rtid,))
+            dbody['created'] = rtlib.str_to_epoch(rtinfo['Created'])
+
         phabdb.set_comment_time(ctransaction,
                                 created)
         fmt_comment['xctransaction'] = ctransaction
@@ -442,17 +540,18 @@ def main():
     pmig.close()
 
     #Serious business
-    if 'failed' in sys.argv:
+    if 'failed' in sys.argv or '-r' in sys.argv:
         for b in bugs:
-            notice("Removing rtid %s" % (b,))
-            log(util.remove_issue_by_bugid(b, bzlib.prepend))
+            util.notice("Removing rtid %s" % (b,))
+            log(util.remove_issue_by_bugid(b, rtlib.prepend))
 
     from multiprocessing import Pool
     pool = Pool(processes=int(config.bz_createmulti))
     _ =  pool.map(run_create, bugs)
-    complete = len(filter(bool, _))
-    failed = len(_) - complete
-    print '%s completed %s, failed %s' % (sys.argv[0], complete, failed)
+    missing = len([i for i in _ if i == 'missing'])
+    complete = len(filter(bool, [i for i in _ if i not in ['missing']]))
+    failed = (len(_) - missing) - complete
+    print '%s completed %s, missing %s, failed %s' % (sys.argv[0], complete, missing, failed)
 
 if __name__ == '__main__':
     main()
diff --git a/rt_create_new.py b/rt_create_new.py
new file mode 100755
index 0000000..32c53b4
--- /dev/null
+++ b/rt_create_new.py
@@ -0,0 +1,485 @@
+#!/usr/bin/env python
+#from __future__ import unicode_literals
+import time
+import json
+import os
+import re
+import sys
+import getpass
+sys.path.append('/home/rush/python-rtkit/')
+from phabricator import Phabricator
+from wmfphablib import Phab as phabmacros
+from wmfphablib import errorlog as elog
+from wmfphablib import return_bug_list
+from wmfphablib import phdb
+from wmfphablib import phabdb
+from wmfphablib import mailinglist_phid
+from wmfphablib import set_project_icon
+from wmfphablib import log
+from wmfphablib import util
+from wmfphablib import rtlib
+from wmfphablib import vlog
+from wmfphablib import config
+from wmfphablib import rtlib
+from wmfphablib import datetime_to_epoch
+from wmfphablib import epoch_to_datetime
+from wmfphablib import now
+from rtkit import resource
+from rtkit import authenticators
+from rtkit import errors
+from wmfphablib import ipriority
+
+
+def create(rtid):
+
+    phab = Phabricator(config.phab_user,
+                       config.phab_cert,
+                       config.phab_host)
+
+    phabm = phabmacros('', '', '')
+    phabm.con = phab
+
+    pmig = phdb(db=config.rtmigrate_db)
+
+    response = resource.RTResource(config.rt_url,
+                                   config.rt_login,
+                                   config.rt_passwd,
+                                   authenticators.CookieAuthenticator)
+
+    current = pmig.sql_x("SELECT priority, header, \
+                          comments, created, modified \
+                          FROM rt_meta WHERE id = %s",
+                          (rtid,))
+    if current:
+        import_priority, rtinfo, com, created, modified = current[0]
+    else:
+        elog('%s not present for migration' % (rtid,))
+        return False
+
+    if not rtinfo:
+        log("ignoring invalid data for issue %s" % (rtid,))
+        return False
+        
+    def get_ref(id):
+        refexists = phabdb.reference_ticket('%s%s' % (rtlib.prepend, id))
+        if refexists:
+            return refexists
+
+    if get_ref(rtid):
+        log('reference ticket %s already exists' % (rtid,))
+        return True
+
+    viewpolicy = phabdb.get_project_phid('WMF-NDA')
+    if not viewpolicy:
+        elog("View policy group not present: %s" % (viewpolicy,))
+        return False
+
+    def remove_sig(content):
+        return re.split('--\s?\n', content)[0]
+
+    # Example:
+    # id: ticket/8175/attachments\n
+    # Attachments: 141490: (Unnamed) (multipart/mixed / 0b),
+    #              141491: (Unnamed) (text/html / 23b),
+    #              141492: 0jp9B09.jpg (image/jpeg / 117.4k),
+    attachments = response.get(path="ticket/%s/attachments/" % (rtid,))
+    if not attachments:
+        raise Exception("no attachment response: %s" % (rtid))
+
+    history = response.get(path="ticket/%s/history?format=l" % (rtid,))
+
+
+    rtinfo = json.loads(rtinfo)
+    comments = json.loads(com)
+    vlog(rtid)
+    vlog(rtinfo)
+
+    comment_dict = {}
+    for i, c in enumerate(comments):
+        cwork = {}
+        comment_dict[i] = cwork
+        if not 'Attachments:' in c:
+            pass
+        attachsplit = c.split('Attachments:')
+        if len(attachsplit) > 1:
+            body, attached = attachsplit[0], attachsplit[1]
+        else:
+            body, attached = c, '0'
+        comment_dict[i]['text_body'] = body
+        comment_dict[i]['attached'] = attached
+
+    # Example:
+    # Ticket: 8175\nTimeTaken: 0\n
+    # Type: 
+    # Create\nField:
+    # Data: \nDescription: Ticket created by cpettet\n\n
+    # Content: test ticket description\n\n\n
+    # Creator: cpettet\nCreated: 2014-08-21 21:21:38\n\n'}
+    params = {'id': 'id:(.*)',
+              'ticket': 'Ticket:(.*)',
+              'timetaken': 'TimeTaken:(.*)',
+              'content': 'Content:(.*)',
+              'creator': 'Creator:(.*)',
+              'description': 'Description:(.*)',
+              'created': 'Created:(.*)',
+              'ovalue': 'OldValue:(.*)',
+              'nvalue': 'NewValue:(.*)'}
+
+    for k, v in comment_dict.iteritems():
+        text_body = v['text_body']
+        comment_dict[k]['body'] = {}
+        for paramkey, regex in params.iteritems():
+            value = re.search(regex, text_body)
+            if value:
+                comment_dict[k]['body'][paramkey] = value.group(1).strip()
+            else:
+                comment_dict[k]['body'][paramkey] = None
+
+        if 'Content' in text_body:
+            content = text_body.split('Content:')[1]
+            content = content.split('Creator:')
+            comment_dict[k]['body']['content'] = content
+
+        creator = comment_dict[k]['body']['creator']
+        if creator and '@' in creator:
+            comment_dict[k]['body']['creator'] = rtlib.sanitize_email(creator)
+
+        #15475: untitled (18.7k)
+        comment_attachments= re.findall('(\d+):\s', v['attached'])
+        comment_dict[k]['body']['attached'] = comment_attachments
+
+    # due to the nature of the RT api sometimes whitespacing becomes
+    # a noise comment
+    if not any(comment_dict[comment_dict.keys()[0]]['body'].values()):
+        vlog('dropping %s comment' % (str(comment_dict[comment_dict.keys()[0]],)))
+        del comment_dict[0]
+
+    #attachments into a dict
+    def attach_to_kv(attachments_output):
+        attached = re.split('Attachments:', attachments_output, 1)[1]
+        ainfo = {}
+        for at in attached.strip().splitlines():
+            if not at:
+                continue
+            k, v = re.split(':', at, 1)
+            ainfo[k.strip()] = v.strip()
+        return ainfo
+
+    ainfo = attach_to_kv(attachments)
+    #lots of junk attachments from emailing comments and ticket creation
+    ainfo_f = {}
+    for k, v in ainfo.iteritems():
+        if '(Unnamed)' not in v:
+            ainfo_f[k] = v
+
+    #taking attachment text and convert to tuple (name, content type, size)
+    ainfo_ext = {}
+    comments = re.split("\d+\/\d+\s+\(id\/.\d+\/total\)", history)
+    attachregex = '(.*)\.(\S{3,4})\s\((.*)\s\/\s(.*)\)'
+    for k, v in ainfo_f.iteritems():
+        # Handle general attachment case:
+        # NO: 686318802.html (application/octet-stream / 19.5k),
+        # YES: Summary_686318802.pdf (application/unknown / 215.3k),
+        print attachregex
+        print v
+        extract = re.search(attachregex, v)
+        print extract
+        # due to goofy email handling of signature/x-header/meta info
+        # it seems they sometimes
+        # become malformed attachments.  Such as when a response into
+        # rt was directed to a mailinglist
+        # Example:
+        #     ->Attached Message Part (text/plain / 158b)
+        #
+        #    Private-l mailing list
+        #    Private-l@lists.wikimedia.org
+        #    https://lists.wikimedia.org/mailman/listinfo/private-l
+        if extract:
+            print "YES"
+            vlog(extract.groups())
+            ainfo_ext[k] = extract.groups()
+        elif not extract and v.startswith('Attached Message Part'):
+            continue
+        else:
+            elog("no attachment CORRUPT or FAILED extraction: %s %s (%s)" % (k, v, rtid))
+    print ainfo_ext
+
+    attachment_types = ['pdf',
+                        'jpeg',
+                        'tgz',
+                        'jpg',
+                        'png',
+                        'xls',
+                        'xlsx',
+                        'gif',
+                        'html',
+                        'htm',
+                        'txt',
+                        'log',
+                        'zip',
+                        'rtf',
+                        'vcf',
+                        'eml']
+
+    #Uploading attachment
+    dl = []
+    #('Quote Summary_686318802', 'pdf', 'application/unknown', '215.3k')
+    uploaded = {}
+    for k, v in ainfo_ext.iteritems():
+        file_extension = v[1].lower()
+        # vendors have this weird habit of capitalizing extension names
+        # make sure we can handle the extension type otherwise
+        if file_extension not in attachment_types:
+            log("%s %s %s" % (rtid, v, file_extension))
+            raise Exception('unknown extension: %s (%s)' % (v, rtid))
+        full = "ticket/%s/attachments/%s/content" % (rtid, k)
+
+        vcontent = response.get(path=full,
+                                headers={'Content-Type': v[2], 'Content-Length': v[3] })
+        try:
+            #PDF's don't react well to stripping header -- fine without it
+            if file_extension.strip() == 'pdf':
+                sanscontent = str(''.join(vcontent.readlines()))
+            else:
+                log("%s.%s" % (v[0], file_extension))
+                vcontent = str(vcontent.readlines())
+                sanscontent = ''.join(vcontent[2:])
+            upload = phabm.upload_file("%s.%s" % (v[0], file_extension),
+                                       sanscontent,
+                                      viewpolicy)
+            uploaded[k] = upload
+
+        except Exception as e:
+            print e
+            #elog("Attachment CORRUPT in source: %s" % (v[0] + file_extension,))
+
+    return
+    if rtinfo['Queue'] not in rtlib.enabled:
+        log("%s not in an enabled queue" % (rtid,))
+        return True
+
+    ptags = []
+    if rtinfo['Queue'] == 'ops-requests':
+        ptags.append('operations')
+
+    pname = rtlib.project_translate(rtinfo['Queue'])
+    ptags.append(pname)
+
+    phids = []
+    for p in ptags:
+        phids.append(phabm.ensure_project(p))
+
+    rtinfo['xpriority'] = rtlib.priority_convert(rtinfo['Priority'])
+    rtinfo['xstatus'] = rtlib.status_convert(rtinfo['Status'])
+
+    import collections
+    # {'ovalue': u'open',
+    # 'description': u"Status changed from 'open' to 'resolved' by robh",
+    # 'nvalue': None, 'creator': u'robh', 'attached': [],
+    # 'timetaken': u'0', 'created': u'2011-07-01 02:47:24', 
+    # 'content': [u' This transaction appears to have no content\n', u'
+    #              robh\nCreated: 2011-07-01 02:47:24\n'],
+    # 'ticket': u'1000', 'id': u'23192'}
+    ordered_comments = collections.OrderedDict(sorted(comment_dict.items()))
+    upfiles = uploaded.keys()
+
+    # much like bugzilla comment 0 is the task description
+    header = comment_dict[comment_dict.keys()[0]]
+    del comment_dict[comment_dict.keys()[0]]
+    dtext = '\n'.join([l.strip() for l in header['body']['content'][0].splitlines()])
+    dtext = rtlib.shadow_emails(dtext)
+    full_description = "**Author:** `%s`\n\n**Description:**\n%s\n" % (rtinfo['Creator'].strip(),
+                                                                       dtext)
+
+
+    hafound = header['body']['attached']
+    header_attachments = []
+    for at in hafound:
+        if at in upfiles:
+            header_attachments.append('{F%s}' % uploaded[at]['id'])
+    if header_attachments:
+        full_description += '\n__________________________\n\n'
+        full_description += '\n'.join(header_attachments)
+
+    vlog("Ticket Info: %s" % (full_description,))
+    ticket = phab.maniphest.createtask(title=rtinfo['Subject'],
+                                       description=full_description,
+                                       projectPHIDs=phids,
+                                       ccPHIDs=[],
+                                       priority=rtinfo['xpriority'],
+                                       auxiliary={"std:maniphest:external_reference":"rt%s" % (rtid,)})
+
+    # XXX: perms
+    botphid = phabdb.get_phid_by_username(config.phab_user)
+    phabdb.set_task_title_transaction(ticket['phid'],
+                                      botphid,
+                                      'public',
+                                      'public')
+
+    phabdb.set_task_ctime(ticket['phid'], rtlib.str_to_epoch(rtinfo['Created']))
+    phabdb.set_task_policy(ticket['phid'], viewpolicy)
+
+    vlog(str(ordered_comments))
+    fmt_comments = {}
+    for comment, contents in comment_dict.iteritems():
+        fmt_comment = {}
+        dbody = contents['body']
+        if dbody['content'] is None and dbody['creator'] is None:
+            continue
+        elif dbody['content'] is None:
+            content = 'no content found'
+        else:
+            mailsan = rtlib.shadow_emails(dbody['content'][0])
+            content_literal = []
+            for c in mailsan.splitlines():
+                if c.strip() and not c.lstrip().startswith('>'):
+                    # in remarkup having '--' on a new line seems to bold last
+                    # line so signatures really cause issues
+                    if c.strip() == '--':
+                        content_literal.append('%%%{0}%%%'.format(c.strip()))
+                    else:
+                        content_literal.append(unicode('{0}'.format(c.strip())))
+                elif c.strip():
+                    content_literal.append(c.strip())
+                else:
+                    vlog("ignoring content line %s" % (c,))
+            content = '\n'.join(content_literal)
+
+        if 'This transaction appears to have no content' in content:
+            content = None
+
+        auto_actions = ['Outgoing email about a comment recorded by RT_System',
+                        'Outgoing email recorded by RT_System']
+
+        if dbody['description'] in auto_actions:
+            vlog("ignoring comment: %s/%s" % (dbody['description'], content))
+            continue
+
+        preamble = unicode('')
+        cbody = unicode('')
+        if content:
+            preamble += unicode("`%s  wrote:`\n\n" % (dbody['creator'].strip(),))
+            cbody += unicode(content).strip() or 'no content'
+        if dbody['nvalue'] or dbody['ovalue']:
+            value_update = ''
+            value_update_text = rtlib.shadow_emails(dbody['description'])
+            value_update_text = value_update_text.replace('fsck.com-rt', 'https')
+            relations = ['Reference by ticket',
+                         'Dependency by',
+                         'Reference to ticket',
+                         'Dependency on',
+                         'Merged into ticket',
+                         'Membership in']
+
+            states = ['open', 'resolved', 'new', 'stalled']
+            if any(map(lambda x: x in dbody['description'], relations)):
+                value_update = value_update_text
+            elif re.search('tags\s\S+\sadded', dbody['description']):
+                value_update = "%s added tag %s" % (dbody['creator'], dbody['nvalue'])
+            elif re.search('Taken\sby\s\S+', dbody['description']):
+                value_update = "Issue taken by **%s**" % (dbody['creator'],)
+            else:
+                value_update = "//%s//" % (value_update_text,)
+            cbody += value_update
+
+        afound = contents['body']['attached']
+        cbody_attachments = []
+        for a in afound:
+            if a in upfiles:
+                cbody_attachments.append('{F%s}' % uploaded[a]['id'])
+        if cbody_attachments:
+            cbody += '\n__________________________\n\n'
+            cbody += '\n'.join(cbody_attachments)
+            fmt_comment['xattached'] = cbody_attachments
+
+        phabm.task_comment(ticket['id'], preamble + cbody)
+        ctransaction = phabdb.last_comment(ticket['phid'])
+        created = rtlib.str_to_epoch_comments(dbody['created'])
+        phabdb.set_comment_time(ctransaction,
+                                created)
+        fmt_comment['xctransaction'] = ctransaction
+        fmt_comment['preamble'] = preamble
+        fmt_comment['content'] = cbody
+        fmt_comment['created'] = created
+        # XXX TRX both ways?
+        #fmt_comment['creator'] = dbody['creator']user_lookup(name)
+        fmt_comments[created] = fmt_comment
+
+    if rtinfo['Status'].lower() != 'open':
+        log('setting %s to status %s' % (rtid, rtinfo['xstatus'].lower()))
+        phabdb.set_issue_status(ticket['phid'], rtinfo['xstatus'].lower())
+
+    log("Created task: T%s (%s)" % (ticket['id'], ticket['phid']))
+    phabdb.set_task_mtime(ticket['phid'], rtlib.str_to_epoch(rtinfo['LastUpdated']))
+    xcomments = json.dumps(fmt_comments)
+    pmig.sql_x("UPDATE rt_meta SET xcomments=%s WHERE id = %s", (xcomments, rtid))
+    pmig.sql_x("UPDATE rt_meta SET priority=%s, modified=%s WHERE id = %s",
+               (ipriority['creation_success'], now(), rtid))
+    pmig.close()
+    return True
+
+
+def run_create(rtid, tries=1):
+    if tries == 0:
+        pmig = phabdb.phdb(db=config.rtmigrate_db)
+        import_priority = pmig.sql_x("SELECT priority \
+                                      FROM rt_meta \
+                                      WHERE id = %s", \
+                                      (rtid,))
+        if import_priority:
+            pmig.sql_x("UPDATE rt_meta \
+                       SET priority=%s, modified=%s \
+                       WHERE id = %s",
+                       (ipriority['creation_failed'],
+                       now(),
+                       rtid))
+        else:
+            elog("%s does not seem to exist" % (rtid))
+        elog('failed to create %s' % (rtid,))
+        pmig.close()
+        return False
+    try:
+        return create(rtid)
+    except Exception as e:
+        import traceback
+        tries -= 1
+        time.sleep(5)
+        traceback.print_exc(file=sys.stdout)
+        elog('failed to grab %s (%s)' % (rtid, e))
+        return run_create(rtid, tries=tries)
+
+def main():
+
+    if not util.can_edit_ref:
+        elog('%s reference field not editable on this install' % (rtid,))
+        sys.exit(1)
+
+    if 'failed' in sys.argv:
+        priority = ipriority['creation_failed']
+    elif 'success' in sys.argv:
+        priority = ipriority['creation_success']
+    else:
+        priority = None
+
+    vlog("Grabbing for priority: %s" % (priority,))
+    pmig = phdb(db=config.rtmigrate_db)
+    bugs = return_bug_list(dbcon=pmig,
+                           priority=priority,
+                           table='rt_meta')
+    pmig.close()
+
+    #Serious business
+    if 'failed' in sys.argv or '-r' in sys.argv:
+        for b in bugs:
+            util.notice("Removing rtid %s" % (b,))
+            log(util.remove_issue_by_bugid(b, rtlib.prepend))
+
+    from multiprocessing import Pool
+    pool = Pool(processes=int(config.bz_createmulti))
+    _ =  pool.map(run_create, bugs)
+    complete = len(filter(bool, _))
+    failed = len(_) - complete
+    print '%s completed %s, failed %s' % (sys.argv[0], complete, failed)
+
+if __name__ == '__main__':
+    main()
diff --git a/rt_fetch_new.py b/rt_fetch_new.py
new file mode 100755
index 0000000..6614b97
--- /dev/null
+++ b/rt_fetch_new.py
@@ -0,0 +1,162 @@
+#!/usr/bin/env python
+import time
+import os
+import re
+import sys
+import getpass
+import ConfigParser
+import json
+sys.path.append('/home/rush/python-rtkit/')
+from wmfphablib import phabdb
+from wmfphablib import rtlib
+from wmfphablib import log
+from wmfphablib import vlog
+from wmfphablib import errorlog as elog
+from wmfphablib import return_bug_list
+from rtkit import resource
+from rtkit import authenticators
+from rtkit import errors
+from wmfphablib import ipriority
+from wmfphablib import now
+from wmfphablib import config
+
+
+def fetch(tid):
+
+    response = resource.RTResource(config.rt_url,
+                                   config.rt_login,
+                                   config.rt_passwd,
+                                   authenticators.CookieAuthenticator)
+
+    log("fetching issue %s" % (tid,))
+    tinfo = response.get(path="ticket/%s" % (tid,))
+    history = response.get(path="ticket/%s/history?format=l" % (tid,))
+    links = response.get(path="ticket/%s/links/show" % (tid,))
+    vlog(tinfo)
+
+    if re.search('\#\sTicket\s\d+\sdoes\snot\sexist.$', tinfo.strip()):
+        log("Skipped as source missing for %s" % (tid,))
+        return 'missing'
+
+    # some private todo's and such
+    if 'You are not allowed to display' in tinfo:
+        log("Skipped as access denied for %s" % (tid,))
+        return 'denied'
+
+    #breaking detailed history into posts
+    #23/23 (id/114376/total)
+    comments = re.split("\d+\/\d+\s+\(id\/.\d+\/total\)", history)
+    comments = [c.rstrip('#').rstrip('--') for c in comments]
+
+    # we get back freeform text and create a dict
+    dtinfo = {}
+    link_dict = rtlib.links_to_dict(links)
+    dtinfo['links'] = link_dict
+    for cv in tinfo.strip().splitlines():
+        if not cv:
+            continue
+        cv_kv = re.split(':', cv, 1)
+        if len(cv_kv) > 1:
+            k = cv_kv[0]
+            v = cv_kv[1]
+            dtinfo[k.strip()] = v.strip()
+
+    vlog("Enabled queues: %s" % (str(rtlib.enabled)))
+    if dtinfo['Queue'] not in rtlib.enabled:
+        log("Skipped as disabled queue for %s (%s)" % (str(tid), dtinfo['Queue']))
+        return 'disabled'
+
+    com = json.dumps(comments)
+    tinfo = json.dumps(dtinfo)
+
+    pmig = phabdb.phdb(db=config.rtmigrate_db,
+                       user=config.rtmigrate_user,
+                       passwd=config.rtmigrate_passwd)
+
+
+    creation_priority = ipriority['fetch_success']
+    current = pmig.sql_x("SELECT * from rt_meta where id = %s", tid)
+    if current:
+        update_values =  (creation_priority, tinfo, com, now(), now())
+        pmig.sql_x("UPDATE rt_meta SET priority=%s, \
+                                       header=%s, \
+                                       comments=%s, \
+                                       modified=%s \
+                                       WHERE id = %s",
+                   update_values)
+        vlog('update: ' + str(update_values))
+
+    else:
+        insert_values =  (tid, creation_priority, tinfo, com, now(), now())
+
+        pmig.sql_x("INSERT INTO rt_meta \
+                (id, priority, header, comments, created, modified) \
+                VALUES (%s, %s, %s, %s, %s, %s)",
+                insert_values)
+    pmig.close()
+    return True
+
+def run_fetch(tid, tries=1):
+    if tries == 0:
+        pmig = phabdb.phdb(db=config.rtmigrate_db,
+                       user=config.rtmigrate_user,
+                       passwd=config.rtmigrate_passwd)
+        insert_values =  (tid, ipriority['fetch_failed'], '', '', now(), now())
+
+        pmig.sql_x("INSERT INTO rt_meta \
+                (id, priority, header, comments, created, modified) \
+                VALUES (%s, %s, %s, %s, %s, %s)",
+                insert_values)
+        pmig.close()
+        elog('failed to grab %s' % (tid,))
+        return False
+    try:
+        return fetch(tid)
+    except Exception as e:
+        import traceback
+        tries -= 1
+        time.sleep(5)
+        traceback.print_exc(file=sys.stdout)
+        elog('failed to grab %s (%s)' % (tid, e))
+        return run_fetch(tid, tries=tries)
+
+def main():
+
+    pmig = phabdb.phdb(db=config.rtmigrate_db,
+                       user=config.rtmigrate_user,
+                       passwd=config.rtmigrate_passwd)
+
+    if 'failed' in sys.argv:
+        priority = ipriority['fetch_failed']
+    else:
+        priority = None
+
+    bugs = return_bug_list(dbcon=pmig,
+                           priority=priority,
+                           table='rt_meta')
+    pmig.close()
+
+    from multiprocessing import Pool
+    pool = Pool(processes=int(config.bz_fetchmulti))
+    _ =  pool.map(run_fetch, bugs)
+    vlog(_)
+    denied = len([i for i in _ if i == 'denied'])
+    disabled = len([i for i in _ if i == 'disabled'])
+    missing = len([i for i in _ if i == 'missing'])
+    complete = len(filter(bool, [i for i in _ if i not in ['denied', 'disabled', 'missing']]))
+    known_bad = denied + disabled + missing
+    failed = (len(_) - known_bad) - complete
+    print '-----------------------------\n \
+          %s Total %s\n \
+          known bad %s (denied %s, disabled %s, missing %s)\n\n \
+          completed %s, failed %s' % (sys.argv[0],
+                                                          len(bugs),
+                                                          known_bad,
+                                                          denied,
+                                                          disabled,
+                                                          missing,
+                                                          complete,
+                                                          failed)
+
+if __name__ == '__main__':
+    main()
diff --git a/wmfphablib/__init__.py b/wmfphablib/__init__.py
index a83f8d7..eed8ec9 100755
--- a/wmfphablib/__init__.py
+++ b/wmfphablib/__init__.py
@@ -29,6 +29,7 @@ def tflatten(t_of_tuples):
 ipriority = {'creation_failed': 6,
              'creation_success': 7,
              'fetch_failed': 5,
+             'fetch_success': 4,
              'na': 0,
              'denied': 2,
              'missing': 3,
diff --git a/wmfphablib/phabdb.py b/wmfphablib/phabdb.py
index e0bf645..33d00cf 100755
--- a/wmfphablib/phabdb.py
+++ b/wmfphablib/phabdb.py
@@ -73,16 +73,19 @@ def is_bz_security_issue(id):
     else:
         return False
 
-def get_issues_by_priority(dbcon, priority):
+def get_issues_by_priority(dbcon, priority, table):
     """ get failed creations
     :param dbcon: db connector
     :param priority: int
+    :param table: str
     :returns: list
     """
+    print table
+    print priority
     _ = dbcon.sql_x("SELECT id \
-                    from bugzilla_meta \
-                    where priority=%s",
-                    (priority,),
+                    from %s \
+                    where priority=%s" % (table, priority),
+                    (),
                     limit=None)
     if _ is None:
         return
diff --git a/wmfphablib/rtlib.py b/wmfphablib/rtlib.py
index c4f3001..225ec7d 100755
--- a/wmfphablib/rtlib.py
+++ b/wmfphablib/rtlib.py
@@ -82,8 +82,10 @@ def priority_convert(priority):
 def status_convert(status):
     statuses = { 'resolved': 'resolved',
                  'new': 'open',
+                 'rejected': 'declined',
                  'open': 'open',
-                 'stalled': 'needsinfo'}
+                 'stalled': 'stalled',
+                 'deleted': 'invalid'}
     return statuses[status.lower()]
 
 def links_to_dict(link_text):
diff --git a/wmfphablib/util.py b/wmfphablib/util.py
index 9c4369b..0100650 100644
--- a/wmfphablib/util.py
+++ b/wmfphablib/util.py
@@ -145,20 +145,20 @@ def remove_issue_by_bugid(bugid, ref):
     out += str(phabdb.reference_ticket("%s%s" % (ref, bugid)))
     return out
 
-def return_bug_list(dbcon=None, priority=None):
+def return_bug_list(dbcon=None, priority=None, table='bugzilla_meta'):
 
     if sys.stdin.isatty():
         bugs = sys.argv[1:]
     else:
         bugs = sys.stdin.read().strip('\n').strip().split()
 
-    #if 'failed' in ''.join(sys.argv):
     if priority:
         if dbcon == None:
             print "cant find dbcon for priority buglist"
             return []
-        bugs = phabdb.get_issues_by_priority(dbcon, priority)
-        #bugs = phabdb.get_failed_creations(dbcon)
+        bugs = phabdb.get_issues_by_priority(dbcon,
+                                             priority,
+                                             table=table)
     elif '-' in bugs[0]:
         start, stop = bugs[0].split('-')
 
@@ -175,12 +175,10 @@ def return_bug_list(dbcon=None, priority=None):
 
     if not isinstance(bugs, list):
         print "Bug list not built"
-        return
+        return []
 
     #exclude known bad
     bugs = [b for b in bugs if b not in bzlib.missing]
 
     log("Bugs count: %d" % (len(bugs)))
-    if bugs is None:
-        return []
     return bugs
author	cpettet <rush@wikimedia.org>	2014-12-12 00:50:09 -0600
committer	cpettet <rush@wikimedia.org>	2014-12-12 00:51:58 -0600
commit	51d54f4a314f978aaf713a91e0df764aa5d58f90 (patch)
tree	cebe8fcd6cdd16fbad67bc74b5a89897b37c73fc
parent	9a30ffdaec03830966f408a4057da7ed276c6343 (diff)