From 8814d16dc60e8027c9a30963b47dd77b54efcdbf Mon Sep 17 00:00:00 2001 From: Stephen Soltesz Date: Fri, 12 Jun 2009 16:27:07 +0000 Subject: [PATCH] add scrappy statistics gathering scripts --- statistics/aggregate-nm.py | 103 +++++++++++++ statistics/aggregate.py | 39 +++++ statistics/aggregatehistory.py | 133 ++++++++++++++++ statistics/correspondence.py | 33 ++++ statistics/harvestrt.py | 46 ++++++ statistics/monitorstats.py | 80 ++++++++++ statistics/nodebad.py | 152 +++++++++++++++++++ statistics/nodecommon.py | 200 ++++++++++++++++++++++++ statistics/nodediff-graph-better.py | 138 +++++++++++++++++ statistics/nodediff-graph.py | 100 ++++++++++++ statistics/nodediff-length.py | 107 +++++++++++++ statistics/nodediff.py | 128 ++++++++++++++++ statistics/nodequeryold.py | 216 ++++++++++++++++++++++++++ statistics/parserpms.py | 25 +++ statistics/rtsurvey.py | 226 ++++++++++++++++++++++++++++ statistics/sliceavg.py | 45 ++++++ statistics/usedtickets.py | 21 +++ 17 files changed, 1792 insertions(+) create mode 100755 statistics/aggregate-nm.py create mode 100755 statistics/aggregate.py create mode 100755 statistics/aggregatehistory.py create mode 100644 statistics/correspondence.py create mode 100755 statistics/harvestrt.py create mode 100644 statistics/monitorstats.py create mode 100755 statistics/nodebad.py create mode 100644 statistics/nodecommon.py create mode 100755 statistics/nodediff-graph-better.py create mode 100644 statistics/nodediff-graph.py create mode 100755 statistics/nodediff-length.py create mode 100755 statistics/nodediff.py create mode 100755 statistics/nodequeryold.py create mode 100755 statistics/parserpms.py create mode 100755 statistics/rtsurvey.py create mode 100755 statistics/sliceavg.py create mode 100755 statistics/usedtickets.py diff --git a/statistics/aggregate-nm.py b/statistics/aggregate-nm.py new file mode 100755 index 0000000..70a8574 --- /dev/null +++ b/statistics/aggregate-nm.py @@ -0,0 +1,103 @@ +#!/usr/bin/python + +from monitor.wrapper import plc +api = plc.getAuthAPI() + +from monitor import database +import time +from datetime import datetime, timedelta +import calendar + +import sys +import time +from monitor.model import * + +from monitorstats import * + + +def main(): + from monitor import parser as parsermodule + + parser = parsermodule.getParser() + parser.set_defaults(node=None, aggname='aggregatenm', archivedir='archive-pdb', field='nm', value='Y', fromtime=None, load=False, state='BOOT') + parser.add_option("", "--node", dest="node", metavar="nodename.edu", + help="A single node name to add to the nodegroup") + parser.add_option("", "--archivedir", dest="archivedir", metavar="filename", + help="Pickle file aggregate output.") + parser.add_option("", "--aggname", dest="aggname", metavar="filename", + help="Pickle file aggregate output.") + parser.add_option("", "--field", dest="field", metavar="key", + help="Which record field to extract from all files.") + parser.add_option("", "--value", dest="value", metavar="val", + help="Which value to look for in field.") + parser.add_option("", "--state", dest="state", metavar="key", + help="Which boot state to accept.") + parser.add_option("", "--load", action="store_true", + help="load aggregatenm rather than recreate it.") + parser.add_option("", "--fromtime", dest="fromtime", metavar="YYYY-MM-DD", + help="Specify a starting date from which to begin the query.") + config = parsermodule.parse_args(parser) + + archive = get_archive(config.archivedir) + agg = {} + + if config.fromtime: + begin = config.fromtime + else: + begin = "2008-09-28" + + d = datetime_fromstr(begin) + tdelta = timedelta(1) + verbose = 1 + + if not config.load: + while True: + file = get_filefromglob(d, "production.findbad", config.archivedir) + print archive.path + fb = archive.load(file) + try: + print "nodes: ", len(fb['nodes']) + state_count=0 + for node in fb['nodes']: + fb_nodeinfo = fb['nodes'][node]['values'] + time = d.strftime("%Y-%m-%d") + + if type(fb_nodeinfo) == type([]): + continue + + if fb_nodeinfo['state'] != config.state: + continue + state_count += 1 + + if node not in agg: + agg[node] = { 'total' : 0, 'up' : 0} + + agg[node]['total'] += 1 + if fb_nodeinfo[config.field] == config.value: + agg[node]['up'] += 1 + print "%s nodes in state %s" % ( state_count, config.state ) + + del fb + verbose = 0 + except SystemExit: + sys.exit(1) + except KeyboardInterrupt: + sys.exit(1) + except: + import traceback; print traceback.print_exc() + print d.strftime("%Y-%m-%d"), "No record" + + d = d + tdelta + if d > datetime.now(): break + else: + agg = database.dbLoad(config.aggname) + + for node in agg: + if agg[node]['total'] > 0: + if agg[node]['up'] != agg[node]['total']: + print "%s %s" % (node, float(agg[node]['up']) / float(agg[node]['total'])) + + database.dbDump(config.aggname, agg) + +if __name__ == "__main__": + main() diff --git a/statistics/aggregate.py b/statistics/aggregate.py new file mode 100755 index 0000000..371a2b8 --- /dev/null +++ b/statistics/aggregate.py @@ -0,0 +1,39 @@ +#!/usr/bin/python + + +from monitor import database +import time +import sys + +actall = database.dbLoad("act_all_080825") +agg = database.dbLoad("aggregatehistory") + +for node in actall.keys(): + for record in actall[node]: + if 'date_created' in record: + t = record['date_created'] + elif 'time' in record: + t = record['time'] + else: + continue + + acttime = time.strftime("%Y-%m-%d", time.localtime(t)) + + if acttime > '2007-11-06': + if 'noop' in record['action']: + if node in agg: + for ntime,state in agg[node]: + if state == 'BOOT': + if ntime > acttime: + if type(record['action']) == type([]): + action = record['action'][0] + else: + action = record['action'] + print acttime, action, ntime, state, node + + #print time.strftime("%Y-%m-%d", time.localtime(t)), record['action'], node + +#for node in agg: +# for ntime,state in agg[node]: +# if state == 'BOOT': +# print ntime, state, node diff --git a/statistics/aggregatehistory.py b/statistics/aggregatehistory.py new file mode 100755 index 0000000..588d24c --- /dev/null +++ b/statistics/aggregatehistory.py @@ -0,0 +1,133 @@ +#!/usr/bin/python + +import plc +api = plc.getAuthAPI() + +import database +import reboot +import time +from datetime import datetime, timedelta +import calendar + +import sys +import time +from monitor.model import * +from nodecommon import * + +def get_filefromglob(d, str): + import os + import glob + # TODO: This is aweful. + path = "archive-pdb" + archive = database.SPickle(path) + glob_str = "%s*.%s.pkl" % (d.strftime("%Y-%m-%d"), str) + os.chdir(path) + #print glob_str + file = glob.glob(glob_str)[0] + #print "loading %s" % file + os.chdir("..") + return file[:-4] + #fb = archive.load(file[:-4]) + + +def fb_print_nodeinfo(fbnode, verbose, date=None): + if verbose: print " state | ssh | pcu | bootcd | category | kernel" + if 'checked' in fbnode: + print "%11.11s " % diff_time(fbnode['checked']), + else: + if date: print date, + else: print "Unknown", + + if fbnode['bootcd']: + fbnode['bootcd'] = fbnode['bootcd'].split()[-1] + else: + fbnode['bootcd'] = "unknown" + fbnode['state'] = color_boot_state(get_current_state(fbnode)) + if len(fbnode['kernel'].split()) >= 3: + fbnode['kernel'] = fbnode['kernel'].split()[2] + print " %(state)5s | %(ssh)5.5s | %(pcu)5.5s | %(bootcd)6.6s | %(category)8.8s | %(kernel)s" % fbnode + +def pcu_print_info(pcuinfo, hostname): + print " Checked: ", + if 'checked' in pcuinfo: + print "%11.11s " % diff_time(pcuinfo['checked']) + else: + print "Unknown" + + print "\t user | password | port | hostname " + print "\t %17s | %17s | %4s | %30s | %s" % \ + (pcuinfo['username'], pcuinfo['password'], + pcuinfo[hostname], reboot.pcu_name(pcuinfo), pcuinfo['model']) + + if 'portstatus' in pcuinfo and pcuinfo['portstatus'] != {}: + if pcuinfo['portstatus']['22'] == "open": + print "\t ssh -o PasswordAuthentication=yes -o PubkeyAuthentication=no %s@%s" % (pcuinfo['username'], reboot.pcu_name(pcuinfo)) + if pcuinfo['portstatus']['23'] == "open": + print "\t telnet %s" % (reboot.pcu_name(pcuinfo)) + if pcuinfo['portstatus']['80'] == "open" or \ + pcuinfo['portstatus']['443'] == "open": + print "\t http://%s" % (reboot.pcu_name(pcuinfo)) + if pcuinfo['portstatus']['443'] == "open": + print "\t racadm.py -r %s -u %s -p '%s'" % (pcuinfo['ip'], pcuinfo['username'], pcuinfo['password']) + print "\t cmdhttps/locfg.pl -s %s -f iloxml/Reset_Server.xml -u %s -p '%s' | grep MESSAGE" % \ + (reboot.pcu_name(pcuinfo), pcuinfo['username'], pcuinfo['password']) + +agg = {} + +def main(): + import parser as parsermodule + + parser = parsermodule.getParser() + parser.set_defaults(node=None, fields='state', fromtime=None) + parser.add_option("", "--node", dest="node", metavar="nodename.edu", + help="A single node name to add to the nodegroup") + parser.add_option("", "--fields", dest="fields", metavar="key", + help="Which record field to extract from all files.") + parser.add_option("", "--fromtime", dest="fromtime", metavar="YYYY-MM-DD", + help="Specify a starting date from which to begin the query.") + config = parsermodule.parse_args(parser) + + path = "archive-pdb" + archive = database.SPickle(path) + + if config.fromtime: + begin = config.fromtime + else: + begin = "2007-11-06" + + d = datetime_fromstr(begin) + tdelta = timedelta(1) + verbose = 1 + + while True: + try: + file = get_filefromglob(d, "production.findbad") + fb = archive.load(file) + for node in fb['nodes']: + fb_nodeinfo = fb['nodes'][node]['values'] + state = fb_nodeinfo['state'] + time = d.strftime("%Y-%m-%d") + if node not in agg: + agg[node] = [] + if len(agg[node]) == 0: + agg[node].append((time, state)) + else: + oldtime = agg[node][-1][0] + oldstate = agg[node][-1][1] + if oldstate != state: + agg[node].append((time, state)) + del fb + verbose = 0 + except KeyboardInterrupt: + sys.exit(1) + except: + #import traceback; print traceback.print_exc() + print d.strftime("%Y-%m-%d"), "No record" + + d = d + tdelta + if d > datetime.now(): break + + database.dbDump("aggregatehistory", agg) + +if __name__ == "__main__": + main() diff --git a/statistics/correspondence.py b/statistics/correspondence.py new file mode 100644 index 0000000..db9ad7b --- /dev/null +++ b/statistics/correspondence.py @@ -0,0 +1,33 @@ + + +def dt_mod_range(dt, range=(60*60*24*7)): + t_stamp = time.mktime(dt.timetuple()) + t_stamp -= (t_stamp % range) + dt_ret = datetime.datetime.fromtimestamp(t_stamp) + return dt_ret + +SUPPORT =3 +MONITOR =22 + +weekly_bin = {} +c = 0 +for ticket in tickets.keys(): + if tickets[ticket]['queue'] != MONITOR: continue + for t in tickets[ticket]['transactions']: + if t['type'] == 'Correspond': + #print t['datecreated'], t['field'], t['oldvalue'], t['type'], t['newvalue'], t['subject'] + k = dt_mod_range(t['datecreated']) + s_key = k.strftime("%Y-%m-%d") + if s_key not in weekly_bin: weekly_bin[s_key] = 0 + + weekly_bin[s_key] += 1 + + # c += 1 + #if c > 100 : break; + #break; + +dates = weekly_bin.keys() +dates.sort() +for t in dates: + print t, ",", weekly_bin[t] + diff --git a/statistics/harvestrt.py b/statistics/harvestrt.py new file mode 100755 index 0000000..f3940e0 --- /dev/null +++ b/statistics/harvestrt.py @@ -0,0 +1,46 @@ +#!/usr/bin/python + +import os +import time +from datetime import datetime, timedelta +import sys + +def popen(cmdstr): + f = os.popen(cmdstr) + ret = f.read() + return ret + +def datetime_fromstr(str): + if '-' in str: + try: + tup = time.strptime(str, "%Y-%m-%d") + except: + tup = time.strptime(str, "%Y-%m-%d-%H:%M") + elif '/' in str: + tup = time.strptime(str, "%m/%d/%Y") + else: + tup = time.strptime(str, "%m/%d/%Y") + ret = datetime.fromtimestamp(time.mktime(tup)) + return ret + + +def main(): + queue = sys.argv[1] + d1 = datetime_fromstr(sys.argv[2]) + iterations = int(sys.argv[3]) + i = 0 + while i < iterations: + d1_s = d1.strftime("%Y-%m-%d") + d2 = d1 + timedelta(30) + d2_s = d2.strftime("%Y-%m-%d") + query = "Queue='%s' and " % queue + query = query + "Told > '%s' and Told < '%s'" % (d1_s, d2_s) + cmd = """rt ls -t ticket "%s" | grep -v "No matching" | wc -l """ % query + print cmd + ret = popen(cmd) + print d1_s, ",", ret[:-1] + d1=d2 + i += 1 + +if __name__ == "__main__": + main() diff --git a/statistics/monitorstats.py b/statistics/monitorstats.py new file mode 100644 index 0000000..8fc24d5 --- /dev/null +++ b/statistics/monitorstats.py @@ -0,0 +1,80 @@ + +from monitor import database +from datetime import datetime, timedelta +import os +import glob +import time + +from monitor import config + +def datetime_fromstr(str): + if '-' in str: + try: + tup = time.strptime(str, "%Y-%m-%d") + except: + tup = time.strptime(str, "%Y-%m-%d-%H:%M") + elif '/' in str: + tup = time.strptime(str, "%m/%d/%Y") + else: + tup = time.strptime(str, "%m/%d/%Y") + ret = datetime.fromtimestamp(time.mktime(tup)) + return ret + +def get_filefromglob(d, str, path="archive-pdb", returnlist=False): + # TODO: This is aweful. + startpath = os.getcwd() + os.chdir(config.MONITOR_SCRIPT_ROOT + "/" + path) + + #archive = database.SPickle(path) + glob_str = "%s*.%s.pkl" % (d.strftime("%Y-%m-%d"), str) + fg_list = [ x[:-4] for x in glob.glob(glob_str) ] + + os.chdir(startpath) + + if returnlist: + return sorted(fg_list) + else: + return fg_list[0] + +def get_archive(path): + full_path = config.MONITOR_SCRIPT_ROOT + "/" + path + return database.SPickle(full_path) + +def print_graph(data, begin, end, xaxis, offset=500, window=100): + s1=[] + s2=[] + s3=[] + for row in data: + s1.append(row[0]) + s2.append(row[1]) + s3.append(row[2]) + + delta=offset + s1 = map(lambda x: x-delta, s1) + rlow= zip(s1,s3) + rhigh = zip(s1,s2) + diff_low = map(lambda x: x[0]-x[1], rlow) + diff_high = map(lambda x: x[0]+x[1], rhigh) + s1 = map(lambda x: str(x), s1) + diff_low = map(lambda x: str(x), diff_low) + diff_high = map(lambda x: str(x), diff_high) + print s1 + print diff_low + print diff_high + print "http://chart.apis.google.com/chart?cht=lc&chds=0,100&chxt=x,y&chxl=0:%s1:|500|550|600&chs=700x200&chm=F,aaaaaa,1,-1,2&chd=t1:%s" % (xaxis, ",".join(s1) + "|" + ",".join(diff_low) + "|" + ",".join(s1) + "|" + ",".join(s1) +"|" + ",".join(diff_high) ) + +def get_xaxis(list, width=16, wide=False): + # 3 for odd + # 4 for even + # 5 for wide odd + # 6 for wide even + list_len = len(list) + if list_len == 0: return "||" + + is_even = list_len % 2 == 0 + #if is_even: + # xaxis = "|" + list[0][:width] + "|" + list[-1][:width] + "|" + #else: + xaxis = "|" + list[0][:width] + "|" + list[list_len/2 + 1][:width] + "|" + list[-1][:width] + "|" + return xaxis + diff --git a/statistics/nodebad.py b/statistics/nodebad.py new file mode 100755 index 0000000..eec69be --- /dev/null +++ b/statistics/nodebad.py @@ -0,0 +1,152 @@ +#!/usr/bin/python + +import os +import sys +import string +import time + + +from monitor import database +from nodequeryold import verify,query_to_dict,node_select +from monitor.common import * + +from monitor.wrapper import plc +api = plc.getAuthAPI() +from monitor.model import * + +round = 1 +externalState = {'round': round, 'nodes': {}} +count = 0 + +def main(config): + global externalState + externalState = database.if_cached_else(1, config.dbname, lambda : externalState) + if config.increment: + # update global round number to force refreshes across all nodes + externalState['round'] += 1 + + #l_nodes = syncplcdb.create_plcdb() + l_plcnodes = database.dbLoad("l_plcnodes") + + l_nodes = get_nodeset(config) + #if config.node: + # l_nodes = [config.node] + ##else: + # l_nodes = [node['hostname'] for node in l_plcnodes] + + checkAndRecordState(l_nodes, l_plcnodes) + +def checkAndRecordState(l_nodes, l_plcnodes): + global externalState + global count + global_round = externalState['round'] + + for nodename in l_nodes: + if nodename not in externalState['nodes']: + externalState['nodes'][nodename] = {'round': 0, 'values': []} + + node_round = externalState['nodes'][nodename]['round'] + if node_round < global_round: + # do work + values = collectStatusAndState(nodename, l_plcnodes) + global_round = externalState['round'] + externalState['nodes'][nodename]['values'] = values + externalState['nodes'][nodename]['round'] = global_round + else: + count += 1 + + if count % 20 == 0: + database.dbDump(config.dbname, externalState) + + database.dbDump(config.dbname, externalState) + +fb = database.dbLoad('findbad') + +def getnodesup(nodelist): + up = 0 + for node in nodelist: + if node['hostname'] in fb['nodes'].keys(): + try: + if fb['nodes'][node['hostname']]['values']['state'] == "BOOT": + up = up + 1 + except: + pass + return up + +def get(fb, path): + indexes = path.split("/") + values = fb + for index in indexes: + if index in values: + values = values[index] + else: + return None + return values + +def collectStatusAndState(nodename, l_plcnodes): + global count + + d_node = None + for node in l_plcnodes: + if node['hostname'] == nodename: + d_node = node + break + if not d_node: + return None + + pf = PersistFlags(nodename, 1, db='node_persistflags') + + if not pf.checkattr('last_changed'): + pf.last_changed = time.time() + + pf.last_checked = time.time() + + if not pf.checkattr('status'): + pf.status = "unknown" + + state_path = "nodes/" + nodename + "/values/state" + bootstate_path = "nodes/" + nodename + "/values/plcnode/boot_state" + + if get(fb, state_path) == "BOOT": + if pf.status != "good": pf.last_changed = time.time() + pf.status = "good" + elif get(fb, state_path) == "DEBUG": + bs = get(fb, bootstate_path) + if pf.status != bs: pf.last_changed = time.time() + pf.status = bs + else: + if pf.status != "down": pf.last_changed = time.time() + pf.status = "down" + + count += 1 + print "%d %35s %s since(%s)" % (count, nodename, pf.status, diff_time(pf.last_changed)) + # updated by other modules + #pf.enabled = + #pf.suspended = + + pf.save() + + return True + +if __name__ == '__main__': + import parser as parsermodule + parser = parsermodule.getParser(['nodesets']) + parser.set_defaults(filename=None, node=None, nodeselect=False, nodegroup=None, + increment=False, dbname="nodebad", cachenodes=False) + + parser.add_option("", "--dbname", dest="dbname", metavar="FILE", + help="Specify the name of the database to which the information is saved") + parser.add_option("-i", "--increment", action="store_true", dest="increment", + help="Increment round number to force refresh or retry") + parser = parsermodule.getParser(['defaults'], parser) + config = parsermodule.parse_args(parser) + + try: + main(config) + except Exception, err: + import traceback + print traceback.print_exc() + print "Exception: %s" % err + print "Saving data... exitting." + database.dbDump(config.dbname, externalState) + sys.exit(0) diff --git a/statistics/nodecommon.py b/statistics/nodecommon.py new file mode 100644 index 0000000..042f80f --- /dev/null +++ b/statistics/nodecommon.py @@ -0,0 +1,200 @@ + +import struct +import time +from monitor.util import file +from monitor.wrapper import plc +from datetime import datetime +from monitor import database +esc = struct.pack('i', 27) +RED = esc + "[1;31m" +GREEN = esc + "[1;32m" +YELLOW = esc + "[1;33m" +BLUE = esc + "[1;34m" +LIGHTBLUE = esc + "[1;36m" +NORMAL = esc + "[0;39m" + +def red(str): + return RED + str + NORMAL + +def yellow(str): + return YELLOW + str + NORMAL + +def green(str): + return GREEN + str + NORMAL + +def lightblue(str): + return LIGHTBLUE + str + NORMAL + +def blue(str): + return BLUE + str + NORMAL + +def get_current_state(fbnode): + if 'state' in fbnode: + state = fbnode['state'] + else: + state = "none" + l = state.lower() + if l == "debug": l = 'dbg ' + return l + +def color_pcu_state(fbnode): + import reboot + + if 'plcnode' in fbnode and 'pcu_ids' in fbnode['plcnode'] and len(fbnode['plcnode']['pcu_ids']) > 0 : + values = reboot.get_pcu_values(fbnode['plcnode']['pcu_ids'][0]) + if values == None: + return fbnode['pcu'] + else: + if 'pcu' not in fbnode: + return 'NOPCU' + else: + return fbnode['pcu'] + + if 'reboot' in values: + rb = values['reboot'] + if rb == 0 or rb == "0": + return fbnode['pcu'] + "OK " + #return fbnode['pcu'] + "OK " + #return green(fbnode['pcu']) + elif "NetDown" == rb or "Not_Run" == rb: + return fbnode['pcu'] + "DOWN" + #return yellow(fbnode['pcu']) + else: + return fbnode['pcu'] + "BAD " + #return red(fbnode['pcu']) + else: + #return red(fbnode['pcu']) + return fbnode['pcu'] + "BAD " + +def color_boot_state(l): + if l == "dbg": return yellow("debg") + elif l == "dbg ": return yellow("debg") + elif l == "diag": return lightblue(l) + elif l == "disable": return red("dsbl") + elif l == "down": return red(l) + elif l == "boot": return green(l) + elif l == "rins": return blue(l) + else: + return l + +def diff_time(timestamp, abstime=True): + import math + now = time.time() + if timestamp == None: + return "unknown" + if abstime: + diff = now - timestamp + else: + diff = timestamp + # return the number of seconds as a difference from current time. + t_str = "" + if diff < 60: # sec in min. + t = diff / 1 + t_str = "%s sec ago" % int(math.ceil(t)) + elif diff < 60*60: # sec in hour + t = diff / (60) + t_str = "%s min ago" % int(math.ceil(t)) + elif diff < 60*60*24: # sec in day + t = diff / (60*60) + t_str = "%s hrs ago" % int(math.ceil(t)) + elif diff < 60*60*24*14: # sec in week + t = diff / (60*60*24) + t_str = "%s days ago" % int(math.ceil(t)) + elif diff <= 60*60*24*30: # approx sec in month + t = diff / (60*60*24*7) + t_str = "%s wks ago" % int(math.ceil(t)) + elif diff > 60*60*24*30: # approx sec in month + t = diff / (60*60*24*30) + t_str = "%s mnths ago" % int(t) + return t_str + +def getvalue(fb, path): + indexes = path.split("/") + values = fb + for index in indexes: + if index in values: + values = values[index] + else: + return None + return values + +def nodegroup_display(node, fb, conf=None): + from unified_model import PersistFlags + if node['hostname'] in fb['nodes']: + node['current'] = get_current_state(fb['nodes'][node['hostname']]['values']) + else: + node['current'] = 'none' + + if fb['nodes'][node['hostname']]['values'] == []: + return "" + + s = fb['nodes'][node['hostname']]['values']['kernel'].split() + if len(s) >=3: + node['kernel'] = s[2] + else: + node['kernel'] = fb['nodes'][node['hostname']]['values']['kernel'] + + if '2.6' not in node['kernel']: node['kernel'] = "" + if conf and not conf.nocolor: + node['boot_state'] = color_boot_state(node['boot_state']) + node['current'] = color_boot_state(node['current']) + #node['boot_state'] = node['boot_state'] + #node['current'] = node['current'] + node['pcu'] = fb['nodes'][node['hostname']]['values']['pcu'] + node['lastupdate'] = diff_time(node['last_contact']) + pf = PersistFlags(node['hostname'], 1, db='node_persistflags') + try: + node['lc'] = diff_time(pf.last_changed) + except: + node['lc'] = "err" + ut = fb['nodes'][node['hostname']]['values']['comonstats']['uptime'] + if ut != "null": + ut = diff_time(float(fb['nodes'][node['hostname']]['values']['comonstats']['uptime']), False) + node['uptime'] = ut + + return "%(hostname)-42s %(boot_state)8s %(current)5s %(pcu)6s %(key)10.10s... %(kernel)35.35s %(lastupdate)12s, %(lc)s, %(uptime)s" % node + +def datetime_fromstr(str): + if '-' in str: + try: + tup = time.strptime(str, "%Y-%m-%d") + except: + tup = time.strptime(str, "%Y-%m-%d-%H:%M") + elif '/' in str: + tup = time.strptime(str, "%m/%d/%Y") + else: + tup = time.strptime(str, "%m/%d/%Y") + ret = datetime.fromtimestamp(time.mktime(tup)) + return ret + +def get_nodeset(config): + """ + Given the config values passed in, return the set of hostnames that it + evaluates to. + """ + api = plc.getAuthAPI() + l_nodes = database.dbLoad("l_plcnodes") + + if config.nodelist: + f_nodes = util.file.getListFromFile(config.nodelist) + l_nodes = filter(lambda x: x['hostname'] in f_nodes, l_nodes) + elif config.node: + f_nodes = [config.node] + l_nodes = filter(lambda x: x['hostname'] in f_nodes, l_nodes) + elif config.nodegroup: + ng = api.GetNodeGroups({'name' : config.nodegroup}) + l_nodes = api.GetNodes(ng[0]['node_ids'], ['hostname']) + elif config.site: + site = api.GetSites(config.site) + l_nodes = api.GetNodes(site[0]['node_ids'], ['hostname']) + + l_nodes = [node['hostname'] for node in l_nodes] + + # perform this query after the above options, so that the filter above + # does not break. + if config.nodeselect: + fb = database.dbLoad("findbad") + l_nodes = node_select(config.nodeselect, fb['nodes'].keys(), fb) + + return l_nodes + diff --git a/statistics/nodediff-graph-better.py b/statistics/nodediff-graph-better.py new file mode 100755 index 0000000..68e14e1 --- /dev/null +++ b/statistics/nodediff-graph-better.py @@ -0,0 +1,138 @@ +#!/usr/bin/python + + +from monitor import config +from monitor.wrapper import plc +from monitor import parser as parsermodule +#from monitor.model import * +from monitorstats import * +from monitor import database + +import sys +import time +import calendar +from datetime import datetime, timedelta + +from nodequeryold import verify,query_to_dict,node_select + +api = plc.getAuthAPI() + +def nodes_from_time(arch, file, select=None): + fb = arch.load(file) + + nodelist = fb['nodes'].keys() + nodelist = node_select(select, nodelist, fb) + return nodelist + +def print_nodelist(nodelist, file): + for node in nodelist: + if file: + print >>file, node + else: + print node + + +def main(): + parser = parsermodule.getParser() + parser.set_defaults(archivedir='archive-pdb', begin=None, end=None, printnodes=False, select=None, select2=None) + parser.add_option("", "--archivedir", dest="archivedir", metavar="filename", + help="Pickle file aggregate output.") + parser.add_option("", "--select", dest="select", metavar="key", + help="Select .") + parser.add_option("", "--select2", dest="select2", metavar="key", + help="Select .") + parser.add_option("", "--print", dest="printnodes", action="store_true", + help="print the nodes that have come up or down.") + parser.add_option("", "--begin", dest="begin", metavar="YYYY-MM-DD", + help="Specify a starting date from which to begin the query.") + parser.add_option("", "--end", dest="end", metavar="YYYY-MM-DD", + help="Specify a ending date at which queries end.") + config = parsermodule.parse_args(parser) + archive = get_archive(config.archivedir) + + if not config.begin or not config.end: + print parsermodule.usage(parser) + sys.exit(1) + + tdelta = timedelta(1) + d_s1 = datetime_fromstr(config.begin) + d_s2 = datetime_fromstr(config.begin) + tdelta + d_end = datetime_fromstr(config.end) + + print d_s1 + print d_s2 + print d_end + + data = [] + while d_end > d_s2: + + try: + f_s1 = get_filefromglob(d_s1, "production.findbad", config.archivedir) + f_s2 = get_filefromglob(d_s2, "production.findbad", config.archivedir) + except: + timestr = d_s2.strftime("%Y-%m-%d") + print timestr, ",", 0, ",", 0 + d_s1 = d_s2 + d_s2 = d_s1 + tdelta + continue + + s1 = set(nodes_from_time(archive, f_s1, config.select)) + s2 = set(nodes_from_time(archive, f_s2, config.select)) + s3 = set(nodes_from_time(archive, f_s2, config.select2)) + + + timestr = d_s2.strftime("%Y-%m-%d") + print timestr, ",", len(s2),",", len(s3) + if not config.printnodes: + # f_up = open("up-%s" % timestr, 'w') + # f_down = open("dn-%s" % timestr, 'w') + f_up = None + f_down = None + pass + else: + print "%s nodes up" % len(s2-s1) + print "Nodes s2 minus s1: len(s2-s1) = %s" % len(s2-s1) + f_up = None + f_down = None + + #print_nodelist(s2-s1, f_up) + + if config.printnodes: + print "" + print "%s nodes down" % len(s1-s2) + print "Nodes s1 minus s2: len(s1-s2) = %s" % len(s1-s2) + + #print_nodelist(s1-s2, f_down) + if not config.printnodes: + if f_up: f_up.close() + if f_up: f_down.close() + + d_s1 = d_s2 + d_s2 = d_s1 + tdelta + + s1=[] + s2=[] + s3=[] + for row in data: + s1.append(row[0]) + s2.append(row[1]) + s3.append(row[2]) + + s1 = map(lambda x: x-500, s1) + rlow= zip(s1,s3) + rhigh = zip(s1,s2) + diff_low = map(lambda x: x[0]-x[1], rlow) + diff_high = map(lambda x: x[0]+x[1], rhigh) + s1 = map(lambda x: str(x), s1) + diff_low = map(lambda x: str(x), diff_low) + diff_high = map(lambda x: str(x), diff_high) + print s1 + print diff_low + print diff_high + print "http://chart.apis.google.com/chart?cht=lc&chds=40,100&chxt=x,y&chxl=0:|Oct|Nov|Dec|Jan|Feb|1:|540|580|600&chs=700x200&chm=F,aaaaaa,1,-1,2&chd=t1:%s" % ",".join(s1) + "|" + ",".join(diff_low) + "|" + ",".join(s1) + "|" + ",".join(s1) +"|" + ",".join(diff_high) + +# takes two arguments as dates, comparing the number of up nodes from one and +# the other. + +if __name__ == "__main__": + main() diff --git a/statistics/nodediff-graph.py b/statistics/nodediff-graph.py new file mode 100644 index 0000000..e5f9d39 --- /dev/null +++ b/statistics/nodediff-graph.py @@ -0,0 +1,100 @@ +#!/usr/bin/python + + +from monitor import config +from monitor.wrapper import plc +from monitor import parser as parsermodule +from monitor.model import * +from monitorstats import * +from monitor import database + +import sys +import time +import calendar +from datetime import datetime, timedelta + +from nodequeryold import verify,query_to_dict,node_select + +api = plc.getAuthAPI() + +def nodes_from_time(arch, file): + fb = arch.load(file) + + nodelist = fb['nodes'].keys() + nodelist = node_select(config.select, nodelist, fb) + return nodelist + + +def main(): + parser = parsermodule.getParser() + parser.set_defaults(archivedir='archive-pdb', begin=None, end=None, select=None) + parser.add_option("", "--archivedir", dest="archivedir", metavar="filename", + help="Pickle file aggregate output.") + parser.add_option("", "--select", dest="select", metavar="key", + help="Select .") + parser.add_option("", "--begin", dest="begin", metavar="YYYY-MM-DD", + help="Specify a starting date from which to begin the query.") + parser.add_option("", "--end", dest="end", metavar="YYYY-MM-DD", + help="Specify a ending date at which queries end.") + config = parsermodule.parse_args(parser) + archive = get_archive(config.archivedir) + + if not config.begin or not config.end: + print parsermodule.usage(parser) + sys.exit(1) + + tdelta = timedelta(1) + d_s1 = datetime_fromstr(config.begin) + d_s2 = datetime_fromstr(config.begin) + tdelta + d_end = datetime_fromstr(config.end) + + data = [] + while d_end > d_s2: + + f_s1 = get_filefromglob(d_s1, "production.findbad", config.archivedir) + f_s2 = get_filefromglob(d_s2, "production.findbad", config.archivedir) + + s1 = set(nodes_from_time(archive, f_s1)) + s2 = set(nodes_from_time(archive, f_s2)) + + print "[ %s, %s, %s ]," % ( len(s2), len(s2-s1), len(s1-s2) ) + data.append( [ len(s2), len(s2-s1), len(s1-s2)] ) + + #print "len s2 : ", len(s2) + #print "len s1 : ", len(s1) + #print "%s nodes up" % len(s2-s1) + #print "Nodes s2 minus s1: len(s2-s1) = %s" % len(s2-s1) + #for node in s2 - s1: print node + #print "" + #print "%s nodes down" % len(s1-s2) + #print "Nodes s1 minus s2: len(s1-s2) = %s" % len(s1-s2) + # for node in s1 - s2: print node + d_s1 = d_s2 + d_s2 = d_s1 + tdelta + + s1=[] + s2=[] + s3=[] + for row in data: + s1.append(row[0]) + s2.append(row[1]) + s3.append(row[2]) + + s1 = map(lambda x: x-500, s1) + rlow= zip(s1,s3) + rhigh = zip(s1,s2) + diff_low = map(lambda x: x[0]-x[1], rlow) + diff_high = map(lambda x: x[0]+x[1], rhigh) + s1 = map(lambda x: str(x), s1) + diff_low = map(lambda x: str(x), diff_low) + diff_high = map(lambda x: str(x), diff_high) + print s1 + print diff_low + print diff_high + print "http://chart.apis.google.com/chart?cht=lc&chds=40,100&chxt=x,y&chxl=0:|Oct|Nov|Dec|Jan|Feb|1:|540|580|600&chs=700x200&chm=F,aaaaaa,1,-1,2&chd=t1:%s" % ",".join(s1) + "|" + ",".join(diff_low) + "|" + ",".join(s1) + "|" + ",".join(s1) +"|" + ",".join(diff_high) + +# takes two arguments as dates, comparing the number of up nodes from one and +# the other. + +if __name__ == "__main__": + main() diff --git a/statistics/nodediff-length.py b/statistics/nodediff-length.py new file mode 100755 index 0000000..f2e468f --- /dev/null +++ b/statistics/nodediff-length.py @@ -0,0 +1,107 @@ +#!/usr/bin/python + + +from monitor import config +from monitor.wrapper import plc +from monitor import parser as parsermodule +from monitor.model import * +from monitorstats import * +from monitor import database + +import sys +import time +import calendar +from datetime import datetime, timedelta + +from nodequeryold import verify,query_to_dict,node_select + +api = plc.getAuthAPI() + +def nodes_from_time(arch, file): + fb = arch.load(file) + + nodelist = fb['nodes'].keys() + nodelist = node_select(config.select, nodelist, fb) + return nodelist + +def print_nodelist(nodelist, file): + for node in nodelist: + if file: + print >>file, node + else: + print node + + +def main(): + parser = parsermodule.getParser() + parser.set_defaults(archivedir='archive-pdb', begin=None, end=None, printnodes=False, select=None) + parser.add_option("", "--archivedir", dest="archivedir", metavar="filename", + help="Pickle file aggregate output.") + parser.add_option("", "--select", dest="select", metavar="key", + help="Select .") + parser.add_option("", "--print", dest="printnodes", action="store_true", + help="print the nodes that have come up or down.") + parser.add_option("", "--begin", dest="begin", metavar="YYYY-MM-DD", + help="Specify a starting date from which to begin the query.") + parser.add_option("", "--end", dest="end", metavar="YYYY-MM-DD", + help="Specify a ending date at which queries end.") + config = parsermodule.parse_args(parser) + archive = get_archive(config.archivedir) + + if not config.begin or not config.end: + print parsermodule.usage(parser) + sys.exit(1) + + tdelta = timedelta(1) + d_s1 = datetime_fromstr(config.begin) + d_s2 = datetime_fromstr(config.end) + d_end = d_s2 + + print d_s1 + print d_s2 + print d_end + + data = [] + while d_end >= d_s2: + + f_s1 = get_filefromglob(d_s1, "production.findbad", config.archivedir) + f_s2 = get_filefromglob(d_s2, "production.findbad", config.archivedir) + + s1 = set(nodes_from_time(archive, f_s1)) + s2 = set(nodes_from_time(archive, f_s2)) + + print "[ %s, %s, %s ]," % ( len(s2), len(s2-s1), len(s1-s2) ) + data.append( [ len(s2), len(s2-s1), len(s1-s2)] ) + + #print "len s2 : ", len(s2) + #print "len s1 : ", len(s1) + timestr = d_s2.strftime("%Y-%m-%d") + if not config.printnodes: + f_up = open("up-%s" % timestr, 'w') + f_down = open("dn-%s" % timestr, 'w') + else: + print "%s nodes up" % len(s2-s1) + print "Nodes s2 minus s1: len(s2-s1) = %s" % len(s2-s1) + f_up = None + f_down = None + + print_nodelist(s2-s1, f_up) + + if config.printnodes: + print "" + print "%s nodes down" % len(s1-s2) + print "Nodes s1 minus s2: len(s1-s2) = %s" % len(s1-s2) + + print_nodelist(s1-s2, f_down) + if not config.printnodes: + f_up.close() + f_down.close() + + d_s1 = d_s2 + d_s2 = d_s1 + tdelta + +# takes two arguments as dates, comparing the number of up nodes from one and +# the other. + +if __name__ == "__main__": + main() diff --git a/statistics/nodediff.py b/statistics/nodediff.py new file mode 100755 index 0000000..7e6674d --- /dev/null +++ b/statistics/nodediff.py @@ -0,0 +1,128 @@ +#!/usr/bin/python + + +from monitor import config +from monitor.wrapper import plc +from monitor import parser as parsermodule +from monitor.model import * +from monitorstats import * +from monitor import database + +import sys +import time +import calendar +from datetime import datetime, timedelta + +from nodequeryold import verify,query_to_dict,node_select + +api = plc.getAuthAPI() + +def nodes_from_time(arch, file): + fb = arch.load(file) + + nodelist = fb['nodes'].keys() + nodelist = node_select(config.select, nodelist, fb) + del fb + return nodelist + +def print_nodelist(nodelist, file): + for node in nodelist: + if file: + print >>file, node + else: + print node + +def main(): + parser = parsermodule.getParser() + parser.set_defaults(archivedir='archive-pdb', begin=None, end=None, + sequential=False, printnodes=False, select=None) + + parser.add_option("", "--archivedir", dest="archivedir", metavar="filename", + help="Pickle file aggregate output.") + parser.add_option("", "--select", dest="select", metavar="key", + help="Select .") + parser.add_option("", "--sequential", dest="sequential", action="store_true", + help="Compare EVERY timestep between begin and end .") + parser.add_option("", "--print", dest="printnodes", action="store_true", + help="print the nodes that have come up or down.") + parser.add_option("", "--begin", dest="begin", metavar="YYYY-MM-DD", + help="Specify a starting date from which to begin the query.") + parser.add_option("", "--end", dest="end", metavar="YYYY-MM-DD", + help="Specify a ending date at which queries end.") + + config = parsermodule.parse_args(parser) + archive = get_archive(config.archivedir) + + if not config.begin or not config.end: + print parsermodule.usage(parser) + sys.exit(1) + + tdelta = timedelta(1) + d_s1 = datetime_fromstr(config.begin) + d_s2 = datetime_fromstr(config.begin) + tdelta + d_end = datetime_fromstr(config.end) + + print d_s1 + print d_s2 + print d_end + + file_list = [] + # then the iterations are day-based. + while d_end > d_s2: + f_s1 = get_filefromglob(d_s1, "production.findbad", config.archivedir, True) + if not config.sequential: + file_list.append(f_s1) + else: + file_list += f_s1 + + d_s1 = d_s2 + d_s2 = d_s1 + tdelta + + print file_list + file_list = file_list[4:] + + xaxis = get_xaxis(file_list) + + data = [] + f_s1 = None + f_s2 = None + i = 0 + for file in file_list: + + i+=1 + f_s2 = file + if f_s1 is None: + f_s1 = f_s2 + continue + + s1 = set(nodes_from_time(archive, f_s1)) + s2 = set(nodes_from_time(archive, f_s2)) + + print f_s1 + print "[ %s, %s, %s ]," % ( len(s2), len(s2-s1), len(s1-s2) ) + data.append( [ len(s2), len(s2-s1), len(s1-s2)] ) + + print "%s nodes up" % len(s2-s1) + print "Nodes s2 minus s1: len(s2-s1) = %s" % len(s2-s1) + f_up = None + f_down = None + + if config.printnodes: + print_nodelist(s2-s1, f_up) + + print "" + print "%s nodes down" % len(s1-s2) + print "Nodes s1 minus s2: len(s1-s2) = %s" % len(s1-s2) + + if config.printnodes: + print_nodelist(s1-s2, f_down) + + f_s1 = f_s2 + f_s2 = None + + print_graph(data, config.begin, config.end, xaxis) +# takes two arguments as dates, comparing the number of up nodes from one and +# the other. + +if __name__ == "__main__": + main() diff --git a/statistics/nodequeryold.py b/statistics/nodequeryold.py new file mode 100755 index 0000000..a2aba4b --- /dev/null +++ b/statistics/nodequeryold.py @@ -0,0 +1,216 @@ +#!/usr/bin/python + +import sys +from nodecommon import * +import glob +import os +from monitor.util import file + +import time +import re + +#fb = {} +fb = None +fbpcu = None + +class NoKeyException(Exception): pass + +def fb_print_nodeinfo(fbnode, hostname, fields=None): + fbnode['hostname'] = hostname + fbnode['checked'] = diff_time(fbnode['checked']) + if fbnode['bootcd']: + fbnode['bootcd'] = fbnode['bootcd'].split()[-1] + else: + fbnode['bootcd'] = "unknown" + fbnode['pcu'] = color_pcu_state(fbnode) + + if not fields: + if 'ERROR' in fbnode['category']: + fbnode['kernel'] = "" + else: + fbnode['kernel'] = fbnode['kernel'].split()[2] + fbnode['boot_state'] = fbnode['plcnode']['boot_state'] + + try: + if len(fbnode['nodegroups']) > 0: + fbnode['category'] = fbnode['nodegroups'][0] + except: + #print "ERROR!!!!!!!!!!!!!!!!!!!!!" + pass + + print "%(hostname)-45s | %(checked)11.11s | %(boot_state)5.5s| %(state)8.8s | %(ssh)5.5s | %(pcu)6.6s | %(bootcd)6.6s | %(category)8.8s | %(kernel)s" % fbnode + else: + format = "" + for f in fields: + format += "%%(%s)s " % f + print format % fbnode + +def get(fb, path): + indexes = path.split("/") + values = fb + for index in indexes: + if index in values: + values = values[index] + else: + raise NoKeyException(index) + return values + +def verifyType(constraints, data): + """ + constraints is a list of key, value pairs. + # [ {... : ...}==AND , ... , ... , ] == OR + """ + con_or_true = False + for con in constraints: + #print "con: %s" % con + if len(con.keys()) == 0: + con_and_true = False + else: + con_and_true = True + + for key in con.keys(): + #print "looking at key: %s" % key + if data is None: + con_and_true = False + break + + try: + get(data,key) + o = con[key] + if o.name() == "Match": + if get(data,key) is not None: + value_re = re.compile(o.value) + con_and_true = con_and_true & (value_re.search(get(data,key)) is not None) + else: + con_and_true = False + elif o.name() == "ListMatch": + if get(data,key) is not None: + match = False + for listitem in get(data,key): + value_re = re.compile(o.value) + if value_re.search(listitem) is not None: + match = True + break + con_and_true = con_and_true & match + else: + con_and_true = False + elif o.name() == "Is": + con_and_true = con_and_true & (get(data,key) == o.value) + elif o.name() == "FilledIn": + con_and_true = con_and_true & (len(get(data,key)) > 0) + elif o.name() == "PortOpen": + if get(data,key) is not None: + v = get(data,key) + con_and_true = con_and_true & (v[str(o.value)] == "open") + else: + con_and_true = False + else: + value_re = re.compile(o.value) + con_and_true = con_and_true & (value_re.search(get(data,key)) is not None) + + except NoKeyException, key: + print "missing key %s" % key, + pass + #print "missing key %s" % key + #con_and_true = False + + con_or_true = con_or_true | con_and_true + + return con_or_true + +def verify(constraints, data): + """ + constraints is a list of key, value pairs. + # [ {... : ...}==AND , ... , ... , ] == OR + """ + con_or_true = False + for con in constraints: + #print "con: %s" % con + if len(con.keys()) == 0: + con_and_true = False + else: + con_and_true = True + + for key in con.keys(): + #print "looking at key: %s" % key + if key in data: + value_re = re.compile(con[key]) + if type([]) == type(data[key]): + local_or_true = False + for val in data[key]: + local_or_true = local_or_true | (value_re.search(val) is not None) + con_and_true = con_and_true & local_or_true + else: + con_and_true = con_and_true & (value_re.search(data[key]) is not None) + elif key not in data: + print "missing key %s" % key, + pass + #print "missing key %s" % key + #con_and_true = False + + con_or_true = con_or_true | con_and_true + + return con_or_true + +def query_to_dict(query): + + ad = [] + + or_queries = query.split('||') + for or_query in or_queries: + and_queries = or_query.split('&&') + + d = {} + + for and_query in and_queries: + (key, value) = and_query.split('=') + d[key] = value + + ad.append(d) + + return ad + +def pcu_in(fbdata): + if 'plcnode' in fbdata: + if 'pcu_ids' in fbdata['plcnode']: + if len(fbdata['plcnode']['pcu_ids']) > 0: + return True + return False + +def node_select(str_query, nodelist=None, fbdb=None): + global fb + + hostnames = [] + if str_query is None: return hostnames + + #print str_query + dict_query = query_to_dict(str_query) + #print dict_query + + if fbdb is not None: + fb = fbdb + + for node in fb['nodes'].keys(): + if nodelist is not None: + if node not in nodelist: continue + + fb_nodeinfo = fb['nodes'][node]['values'] + + if fb_nodeinfo == []: + #print node, "has lost values" + continue + #sys.exit(1) + #fb_nodeinfo['pcu'] = color_pcu_state(fb_nodeinfo) + fb_nodeinfo['hostname'] = node + if 'plcnode' in fb_nodeinfo: + fb_nodeinfo.update(fb_nodeinfo['plcnode']) + + if verify(dict_query, fb_nodeinfo): + #print node #fb_nodeinfo + hostnames.append(node) + else: + #print "NO MATCH", node + pass + + return hostnames + diff --git a/statistics/parserpms.py b/statistics/parserpms.py new file mode 100755 index 0000000..6ca34b2 --- /dev/null +++ b/statistics/parserpms.py @@ -0,0 +1,25 @@ +#!/usr/bin/python + +import sys +import os +import md5 + +def list_to_md5(strlist): + digest = md5.new() + for f in strlist: + digest.update(f) + + return digest.hexdigest() + +while True: + line = sys.stdin.readline() + if not line: + break + line = line.strip() + fields = line.split() + host = fields[1] + rpms = fields[2:] + rpms.sort() + if len(rpms) != 0: + sum = list_to_md5(rpms) + print sum, host diff --git a/statistics/rtsurvey.py b/statistics/rtsurvey.py new file mode 100755 index 0000000..2f2babd --- /dev/null +++ b/statistics/rtsurvey.py @@ -0,0 +1,226 @@ +#!/usr/bin/python + +import os, sys, shutil +import MySQLdb +import string + +import re + +import time +from datetime import datetime + +from monitor import config +from monitor import database + +def convert_time(time_str): + if '-' in str: + try: + tup = time.strptime(str, "%Y-%m-%d %H:%M:%S") + except: + tup = time.strptime(str, "%Y-%m-%d-%H:%M") + elif '/' in str: + tup = time.strptime(str, "%m/%d/%Y") + else: + tup = time.strptime(str, "%m/%d/%Y") + d_ret = datetime.fromtimestamp(time.mktime(tup)) + return d_ret + +def open_rt_db(): + + try: + rt_db = MySQLdb.connect(host=config.RT_DB_HOST, + user=config.RT_DB_USER, + passwd=config.RT_DB_PASSWORD, + db=config.RT_DB_NAME) + except Exception, err: + print "Failed to connect to RT database: %s" %err + return -1 + + return rt_db + +def fetch_from_db(db, sql): + try: + # create a 'cursor' (required by MySQLdb) + c = db.cursor() + c.execute(sql) + except Exception, err: + print "Could not execute RT query %s" %err + return -1 + + # fetch all rows (list of lists) + raw = c.fetchall() + return raw + + +def get_rt_tickets(): + print "open db connection" + db = open_rt_db() + if db == -1: + return "" + + sql = """SELECT tk.id, tk.Queue, tr.Type, tr.Field, tr.OldValue, tr.NewValue, + tr.Created, at.id, at.Subject, at.Content + FROM Tickets as tk, Transactions as tr + LEFT OUTER JOIN Attachments as at ON tr.id=at.TransactionId + WHERE (tk.Queue=3 OR tk.Queue=22) AND tk.id=tr.ObjectId AND tk.id>10000 """ + + print "run query" + raw = fetch_from_db(db, sql) + if raw == -1: + return raw + + tickets = {} + subject_map = {} + def parse_ticket(x): + ticket_id = int(x[0]) + queue = int(x[1]) + trtype = str(x[2]) + field = x[3] + oldvalue = x[4] + newvalue = x[5] + datecreated = x[6] # already a datetime object + attachmentid = x[7] + subject = x[8] + content = x[9] + + if ticket_id not in tickets: + print "found new ticket_id", ticket_id + tickets[ticket_id] = {'queue' : queue, + 'transactions' : [] } + + if subject != "": + subject_map[ticket_id] = subject + elif ticket_id in subject_map: + subject = subject_map[ticket_id] + else: + # subject == "" and no record in subject_map yet + # should probably put on a queue to be processed later. + print "no subject for %s" % ticket_id + + transaction = { + 'type' : trtype, + 'field' : field, + 'oldvalue' : oldvalue, + 'newvalue' : newvalue, + 'datecreated' : datecreated, + 'attachmentid' : attachmentid, + 'subject' : subject, + 'content' : content, + } + tickets[ticket_id]['transactions'].append(transaction) + + + print "sort data" + list = map(parse_ticket, raw) + + # map(lambda x: { "email":str(x[4]), "lastupdated":str(x[5]), "owner":str(x[7]), }, raw) + + db.close() + + + return tickets + + +# flow chart: +# classify: +# for each ticket +# classify into category +# remove from ticket set, add to classified-set +# +# add new search patterns, +# re-run classify algorithm + +re_map = [ + #('mom', {'pattern' : '.*pl_mom.*'}), + #('technical-support', {'pattern' : '.*PlanetLab node.* down'}), + #('technical-support', {'pattern' : 'Node .* was stopped by'}), # and opened + #('technical-support', {'pattern' : 'bootcd|BootCD|bootCD|boot cd|boot CD|booting'}), + #('technical-support', {'pattern' : '.* failed to authenticate'}), + #('technical-support', {'pattern' : '.* fails to boot'}), + #('technical-support', {'pattern' : '.* fail.* to boot'}), + #('technical-support', {'pattern' : '.* failed to authenticate'}), + #('technical-support', {'pattern' : 'curl (60)|.* CA certificates.*|peer certificate.*authenticated'}), + #('technical-support', {'pattern' : '(usb|USB).*(key|Disk|stick|boot|help|problem|trouble)'}), + #('complaint', {'pattern' : '.*omplaint|.*attack'}), + #('complaint', {'pattern' : '.* stop .*'}), # and subject + #('spam', {}),j + #('user-support', {'pattern' : '(R|r)egistration|(R|r)egister'}), + #('user-support', {'pattern' : 'password reset|reset password'}), + ('user-support', {'pattern' : 'New PI account registration from'}), + #('other', {}), +] + +def sort_tickets(tickets, re_map): + + ticket_count = len(tickets.keys()) + marked_subject = 0 + marked_content = 0 + for ticket_id in sorted(tickets.keys()): + for i,(name, pattern) in enumerate(re_map): + if 'compile' not in pattern: + pattern['compile'] = re.compile(pattern['pattern']) + pat = pattern['compile'] + for transaction in tickets[ticket_id]['transactions']: + + try: + if transaction['subject'] and re.match(pat, transaction['subject']): + print "ticket %s matches pattern %s: %s" % (ticket_id, + pattern['pattern'], transaction['subject']) + marked_subject += 1 + break + if transaction['content'] and re.match(pat, transaction['content']): + print "ticket %s matches pattern %s: %s" % (ticket_id, + pattern['pattern'], transaction['subject']) + #if transaction['subject'] == "": + # print transaction + marked_content += 1 + break + except: + import traceback + print traceback.print_exc() + print transaction + print ticket_id + print pattern + sys.exit(1) + + print ticket_count + print marked_subject + print marked_content + print ticket_count - marked_content - marked_content + +def main(): + from optparse import OptionParser + parser = OptionParser() + + parser.set_defaults(runsql=False,) + + parser.add_option("", "--runsql", dest="runsql", action="store_true", + help="Whether to collect data from the MySQL server before "+ + "caching it, or to just use the previously collected data.") + + (config, args) = parser.parse_args() + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) + + for i,(name, pattern) in enumerate(re_map): + print i, name + + if config.runsql: + tickets = get_rt_tickets() + database.dbDump("survey_tickets", tickets) + else: + print "loading" + tickets = database.dbLoad("survey_tickets") + print tickets[42171]['transactions'][0] + + sort_tickets(tickets, re_map) + + # for each ticket id + # scan for known keywords and sort into classes + # record assigned class + + # review all tickets that remain + +if __name__ == '__main__': + main() diff --git a/statistics/sliceavg.py b/statistics/sliceavg.py new file mode 100755 index 0000000..739814f --- /dev/null +++ b/statistics/sliceavg.py @@ -0,0 +1,45 @@ +#!/usr/bin/python + +import os +import sys + +from monitor.wrapper import plc + +api = plc.cacheapi +api.AuthCheck() + +# for each site, find total number of assigned slivers +# find median, high, low, average + +site_list = [] + +for site in api.GetSites({'peer_id': None}): + sl = api.GetSlices(site['slice_ids']) + sliver_cnt = 0 + for slice in sl: + sliver_cnt += len(slice['node_ids']) + val = (site['login_base'], sliver_cnt) + site_list.append(val) + #print val + +print "------------------------------------------" +site_list.sort(lambda x,y: cmp(y[1], x[1])) +totals = 0 +use_count = 0 +for i in site_list: + if i[1] != 0: + print "%10s %s" % i + use_count += 1 + totals += i[1] + +site_avg = totals/len(site_list) + +print "high: %s %s" % site_list[0] +print "low: %s %s" % site_list[-1] +print "median: %s %s" % site_list[len(site_list)/2] +print "used median: %s %s" % site_list[use_count/2] +print "all avg: %s" % site_avg +print "used avg: %s" % (totals/use_count) +print "totals: %s" % totals +print "use_count: %s" % use_count +print "site_count: %s" % len(site_list) diff --git a/statistics/usedtickets.py b/statistics/usedtickets.py new file mode 100755 index 0000000..b3ab662 --- /dev/null +++ b/statistics/usedtickets.py @@ -0,0 +1,21 @@ +#!/usr/bin/python + +from monitor.model import * + +sql = database.dbLoad("idTickets") +for db in ["monitor", "pcu", "bootcd", "hardware", "unknown", + "suspect", "baddns", "nodenet", "nodeid"]: + db = "%s_persistmessages" % db + #print db + try: + pm = database.dbLoad(db) + except: + continue + for host in pm.keys(): + m = pm[host] + id = str(m.ticket_id) + if m.ticket_id > 0: + if id in sql: + print "%s %6s %s" % (m.ticket_id, sql[id]['status'], host) + else: + print "%s closed %s" % ( m.ticket_id, host) -- 2.43.0