#!/usr/bin/python import os, sys, shutil import MySQLdb import string import re import time from datetime import datetime from monitor import config from monitor import database def convert_time(time_str): if '-' in str: try: tup = time.strptime(str, "%Y-%m-%d %H:%M:%S") except: tup = time.strptime(str, "%Y-%m-%d-%H:%M") elif '/' in str: tup = time.strptime(str, "%m/%d/%Y") else: tup = time.strptime(str, "%m/%d/%Y") d_ret = datetime.fromtimestamp(time.mktime(tup)) return d_ret def open_rt_db(): try: rt_db = MySQLdb.connect(host=config.RT_DB_HOST, user=config.RT_DB_USER, passwd=config.RT_DB_PASSWORD, db=config.RT_DB_NAME) except Exception, err: print "Failed to connect to RT database: %s" %err return -1 return rt_db def fetch_from_db(db, sql): try: # create a 'cursor' (required by MySQLdb) c = db.cursor() c.execute(sql) except Exception, err: print "Could not execute RT query %s" %err return -1 # fetch all rows (list of lists) raw = c.fetchall() return raw def get_rt_tickets(date): print "open db connection" db = open_rt_db() if db == -1: return "" if date is None: date_select = "" else: #date_select = "AND tr.Created >= DATE_SUB(CURDATE(),INTERVAL %s DAY)" % date date_select = "AND tr.Created >= STR_TO_DATE('%s', '%%Y-%%m-%%d')" % date sql = """SELECT tk.id, tk.Queue, tr.Type, tr.Field, tr.OldValue, tr.NewValue, tr.Created, at.id, at.Subject, at.Content, us.Name FROM Tickets as tk, Transactions as tr, Users as us LEFT OUTER JOIN Attachments as at ON tr.id=at.TransactionId WHERE (tk.Queue=3 OR tk.Queue=22) AND tk.id=tr.ObjectId AND tk.id>0 AND us.id=tr.Creator %s""" % date_select #WHERE (tk.Queue=3 OR tk.Queue=22) AND tk.id=tr.ObjectId AND tk.id>12506 AND # WHERE (tk.Queue=22) AND tk.id=tr.ObjectId AND tk.id>40800 AND #WHERE (tk.Queue=22) AND tk.id=tr.ObjectId AND tk.id>39896 AND tk.id<42241 AND ## (oct15th2008) #WHERE (tk.Queue=22) AND tk.id=tr.ObjectId AND tk.id>40800 AND ## (1st3months) #WHERE (tk.Queue=3 OR tk.Queue=22) AND tk.id=tr.ObjectId AND tk.id>12506 # 12506 jan-1-2006 print sql print "run query" raw = fetch_from_db(db, sql) if raw == -1: return raw tickets = {} subject_map = {} def parse_ticket(x): ticket_id = int(x[0]) queue = int(x[1]) trtype = str(x[2]) field = x[3] oldvalue = x[4] newvalue = x[5] datecreated = x[6] # already a datetime object attachmentid = x[7] subject = x[8] content = x[9] creator = x[10] if ticket_id not in tickets: print "found new ticket_id", ticket_id tickets[ticket_id] = {'queue' : queue, 'transactions' : [] } if subject != "": subject_map[ticket_id] = subject elif ticket_id in subject_map: subject = subject_map[ticket_id] else: # subject == "" and no record in subject_map yet # should probably put on a queue to be processed later. print "no subject for %s" % ticket_id transaction = { 'type' : trtype, 'field' : field, 'oldvalue' : oldvalue, 'newvalue' : newvalue, 'datecreated' : datecreated, 'attachmentid' : attachmentid, 'creator' : creator, 'subject' : subject, 'content' : content, } tickets[ticket_id]['transactions'].append(transaction) print "sort data" list = map(parse_ticket, raw) db.close() return tickets # flow chart: # classify: # for each ticket # classify into category # remove from ticket set, add to classified-set # # add new search patterns, # re-run classify algorithm re_map = [ #('mom', {'pattern' : '.*pl_mom.*'}), #('technical-support', {'pattern' : '.*PlanetLab node.* down'}), #('technical-support', {'pattern' : 'Node .* was stopped by'}), # and opened #('technical-support', {'pattern' : 'bootcd|BootCD|bootCD|boot cd|boot CD|booting'}), #('technical-support', {'pattern' : '.* failed to authenticate'}), #('technical-support', {'pattern' : '.* fails to boot'}), #('technical-support', {'pattern' : '.* fail.* to boot'}), #('technical-support', {'pattern' : '.* failed to authenticate'}), #('technical-support', {'pattern' : 'curl (60)|.* CA certificates.*|peer certificate.*authenticated'}), #('technical-support', {'pattern' : '(usb|USB).*(key|Disk|stick|boot|help|problem|trouble)'}), #('complaint', {'pattern' : '.*omplaint|.*attack'}), #('complaint', {'pattern' : '.* stop .*'}), # and subject #('spam', {}),j #('user-support', {'pattern' : '(R|r)egistration|(R|r)egister'}), #('user-support', {'pattern' : 'password reset|reset password'}), ('user-support', {'pattern' : 'New PI account registration from'}), #('other', {}), ] def sort_tickets(tickets, re_map): ticket_count = len(tickets.keys()) marked_subject = 0 marked_content = 0 for ticket_id in sorted(tickets.keys()): for i,(name, pattern) in enumerate(re_map): if 'compile' not in pattern: pattern['compile'] = re.compile(pattern['pattern']) pat = pattern['compile'] for transaction in tickets[ticket_id]['transactions']: try: if transaction['subject'] and re.match(pat, transaction['subject']): print "ticket %s matches pattern %s: %s" % (ticket_id, pattern['pattern'], transaction['subject']) marked_subject += 1 break if transaction['content'] and re.match(pat, transaction['content']): print "ticket %s matches pattern %s: %s" % (ticket_id, pattern['pattern'], transaction['subject']) #if transaction['subject'] == "": # print transaction marked_content += 1 break except: import traceback print traceback.print_exc() print transaction print ticket_id print pattern sys.exit(1) print ticket_count print marked_subject print marked_content print ticket_count - marked_content - marked_content def main(): from optparse import OptionParser parser = OptionParser() parser.set_defaults(runsql=False,date=None) parser.add_option("", "--runsql", dest="runsql", action="store_true", help="Whether to collect data from the MySQL server before "+ "caching it, or to just use the previously collected data.") parser.add_option("", "--date", dest="date", help="Query all transactions after the given date.") (config, args) = parser.parse_args() if len(sys.argv) == 1: parser.print_help() sys.exit(1) if config.runsql: tickets = get_rt_tickets(config.date) database.dbDump("survey_tickets", tickets) else: print "loading" tickets = database.dbLoad("survey_tickets") if __name__ == '__main__': main()