#!/usr/bin/python from monitor import database from monitor import config import string import sys import time categories = {} ssherror = False fb = {} def sec2days(sec): if sec == "null": sec = -(60*60*24) sec = int(sec) return sec/(60*60*24) def array_to_priority_map(array): """ Create a mapping where each entry of array is given a priority equal to its position in the array. This is useful for subsequent use in the cmpMap() function.""" map = {} count = 0 for i in array: map[i] = count count += 1 return map def cmpValMap(v1, v2, map): if v1 in map and v2 in map and map[v1] < map[v2]: return 1 elif v1 in map and v2 in map and map[v1] > map[v2]: return -1 elif v1 in map and v2 in map: return 0 else: raise Exception("No index %s or %s in map" % (v1, v2)) def cmpMap(l1, l2, index, map): if index in l1 and index in l2: if map[l1[index]] < map[l2[index]]: return -1 elif map[l1[index]] > map[l2[index]]: return 1 else: return 0 else: return 0 def cmpLoginBase(l1, l2): #print "'" + l1['loginbase'] + "'" + " < " + "'" + l2['loginbase'] + "'" + "
" if l1['loginbase'] == l2['loginbase']: return 0 elif l1['loginbase'] < l2['loginbase']: return -1 elif l1['loginbase'] > l2['loginbase']: return 1 else: return 0 def cmpState(l1, l2): map = array_to_priority_map([ 'BOOT', 'DEBUG', 'DOWN' ]) return cmpMap(l1,l2,'state', map) def cmpCategoryVal(v1, v2): map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDPROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ]) return cmpValMap(v1,v2,map) def cmpCategory(l1, l2): map = array_to_priority_map([ 'ALPHA', 'PROD', 'OLDPROD', 'OLDBOOTCD', 'UNKNOWN', 'ERROR', ]) return cmpMap(l1,l2,'category', map) def cmpPCU(l1, l2): """ Either PCU or NOPCU""" map = array_to_priority_map([ 'PCU', 'NOPCU', 'UNKNOWN']) return cmpMap(l1, l2, 'pcu', map) def cmpSSH(l1, l2): """ Either SSH or NOSSH """ map = array_to_priority_map([ 'SSH', 'NOSSH']) return cmpMap(l1, l2, 'ssh', map) def cmpDNS(l1,l2): """ Compare DNS states """ map = array_to_priority_map([ 'OK', 'NOHOSTNAME', 'NOENTRY', 'MISMATCH']) return cmpMap(l1, l2, 'dnsmatch', map) def cmpPing(l1,l2): """ Either PING or NOPING """ map = array_to_priority_map([ 'PING', 'NOPING']) return cmpMap(l1, l2, 'ping', map) def cmpUname(l1, l2): # Extract the kernel version from kernel -a string l_k1 = l1['kernel'].split() if len(l_k1) > 2: k1 = l_k1[2] else: return 1 l_k2 = l2['kernel'].split() if len(l_k2) > 2: k2 = l_k2[2] else: return -1 return cmp(k1, k2) def cmpDays(l1, l2): if l1['comonstats'][config.comon] == "null": l1['comonstats'][config.comon] = -1 if l2['comonstats'][config.comon] == "null": l2['comonstats'][config.comon] = -1 if int(l1['comonstats'][config.comon]) > int(l2['comonstats'][config.comon]): return -1 elif int(l1['comonstats'][config.comon]) < int(l2['comonstats'][config.comon]): return 1 else: return 0 def ssh_error_to_str(str): ssh_error = "" if "Connection timed out" in str: ssh_error = "Timeout" elif "Connection closed by remote host" in str: ssh_error = "Closed by remote host" elif "Connection refused" in str: ssh_error = "Connection refused" elif "Temporary failure in name resolution" in str: ssh_error = "Could not resolve name" elif "Name or service not known" in str: ssh_error = "Name not known" elif "Too many authentication failures" in str: ssh_error = "Disconnect: root auth failure" elif "Network is unreachable" in str: ssh_error = "Network is unreachable" elif "Connection reset by peer" in str: ssh_error = "Connection reset by peer" elif "WARNING" in str: ssh_error = "WARNING ssh key updated" else: ssh_error = str return ssh_error def pcu_state(pcu_id): global fb if 'nodes' in fb and "id_%s" % pcu_id in fb['nodes'] \ and 'values' in fb['nodes']["id_%s" % pcu_id]: rec = fb['nodes']["id_%s" % pcu_id]['values'] if 'reboot' in rec: rb = rec['reboot'] if rb == 0 or rb == "0": return 0 elif "NetDown" == rb or "Not_Run" == rb: return 1 else: return -1 else: return -1 else: return -1 def fields_to_html(fields, vals): global categories global ssherror pcu_colorMap = { -1 : 'indianred', 0 : 'darkseagreen', 1 : 'gold', } colorMap = { 'PING' : 'darkseagreen', 'NOPING': 'darksalmon', 'SSH': 'darkseagreen', 'NOSSH': 'indianred', 'PCU': 'darkseagreen', 'NOPCU': 'lightgrey', 'OLDBOOTCD': 'crimson', 'DOWN': 'indianred', 'ALPHA': 'gold', 'ERROR': 'crimson', 'PROD': 'darkseagreen', 'DEBUG': 'darksalmon', 'DEBUG': 'darksalmon', 'BOOT': 'lightgreen'} r_str = "" f_prev = "" f_2prev = "" #print 'inside--------------' for f in fields: f = f.strip() #print f if f in ['DOWN', 'BOOT', 'DEBUG']: #key = "%s-%s-%s" % (f,f_prev,f_2prev) key = "%s-%s" % (f,f_prev) if key not in categories: categories[key] = 1 else: categories[key] += 1 #print "
%s

" % f if f in colorMap: bgcolor="bgcolor='%s'" % colorMap[f] else: bgcolor="" if f == 'NOSSH': if ssherror: if 'ssherror' in vals: str_ssh_error = ssh_error_to_str(vals['ssherror']) else: str_ssh_error = "NO SSHERROR in VALS" if str_ssh_error != "Timeout": r_str += """%s
%s""" % \ (bgcolor,f,str_ssh_error) else: r_str += "%s" % (bgcolor, f) else: r_str += "%s" % (bgcolor, f) elif f == 'PCU': if len(vals['plcnode']['pcu_ids']) > 0: #print "pcu_id: %s
" % vals['plcnode']['pcu_ids'][0] #print "state: %s
" % pcu_state(vals['plcnode']['pcu_ids'][0]) #print "color: %s
" % pcu_colorMap[pcu_state(vals['plcnode']['pcu_ids'][0])] bgcolor = "bgcolor='%s'" % pcu_colorMap[pcu_state(vals['plcnode']['pcu_ids'][0])] url = "PCU" % vals['plcnode']['pcu_ids'][0] r_str += "%s" % (bgcolor, url) else: r_str += "%s" % (bgcolor, f) f_2prev = f_prev f_prev = f return r_str def my_diff_time(timestamp): now = time.time() if timestamp == None: return "not yet contacted" diff = now - timestamp # return the number of seconds as a difference from current time. t_str = "" if diff < 60: # sec in min. t = diff t_str = "%s sec ago" % t elif diff < 60*60: # sec in hour t = diff // (60) t_str = "%s min ago" % int(t) elif diff < 60*60*24: # sec in day t = diff // (60*60) t_str = "%s hours ago" % int(t) elif diff < 60*60*24*7: # sec in week t = diff // (60*60*24) t_str = "%s days ago" % int(t) elif diff < 60*60*24*30: # approx sec in month t = diff // (60*60*24*7) t_str = "%s weeks ago" % int(t) elif diff > 60*60*24*30 and diff < 60*60*24*30*2: # approx sec in month month = int( diff // (60*60*24*30) ) weeks = (diff - (month * (60*60*24*30))) // (60*60*24*7) if weeks == 0: t_str = "%s month ago" % int(month) elif weeks == 4: t_str = "2 months ago" else: t_str = "%s month and %s weeks ago" % ( int(month) , int(weeks) ) elif diff >= 60*60*24*30*2: month = diff // (60*60*24*30) t_str = "%s months ago" % int(month) return t_str def main(sitefilter, catfilter, statefilter, comonfilter, nodeonlyfilter): global fb import os import datetime if nodeonlyfilter == None: print "\n" try: mtime = os.stat("/var/lib/monitor-server/production.findbad.pkl")[-2] print "Last Updated: %s GMT" % datetime.datetime.fromtimestamp(mtime) except: pass db = database.dbLoad(config.dbname) fb = database.dbLoad("findbadpcus") ## Field widths used for printing maxFieldLengths = { 'nodename' : -45, 'ping' : 6, 'ssh' : 6, 'pcu' : 7, 'category' : 9, 'state' : 5, 'kernel' : 10.65, 'comonstats' : 5, 'last_contact' : 10.65, 'plcsite' : 12, 'bootcd' : 10.65} ## create format string based on config.fields fields = {} format = "" format_fields = [] for f in config.fields.split(','): fields[f] = "%%(%s)s" % f #print f #if f in maxFieldLengths: # fields[f] = "%%(%s)%ds" % (f, maxFieldLengths[f]) #else: # fields[f] = "%%(%s)%ds" % (f, 10) format_fields.append(fields[f]) #print fields for f in config.fields.split(','): format += fields[f] + " " #print format d_n = db['nodes'] l_nodes = d_n.keys() # category by site #bysite = {} #for nodename in l_nodes: # if 'plcsite' in d_n[nodename]['values'] and \ # 'login_base' in d_n[nodename]['values']['plcsite']: # loginbase = d_n[nodename]['values']['plcsite']['login_base'] # if loginbase not in bysite: # bysite[loginbase] = [] # d_n[nodename]['values']['nodename'] = nodename # bysite[loginbase].append(d_n[nodename]['values']) # d2 was an array of [{node}, {}, ...] # the bysite is a loginbase dict of [{node}, {node}] d2 = [] import re if sitefilter != None: sf = re.compile(sitefilter) else: sf = None for nodename in l_nodes: vals=d_n[nodename]['values'] v = {} v.update(vals) v['nodename'] = nodename if 'plcsite' in vals and \ 'status' in vals['plcsite'] and \ vals['plcsite']['status'] == "SUCCESS": url = "%s" % ( vals['plcsite']['login_base'], vals['plcsite']['login_base']) site_string = "%s %2s nodes :: %2s of %4s slices" % ( \ url, vals['plcsite']['num_nodes'], vals['plcsite']['num_slices'], vals['plcsite']['max_slices']) loginbase = d_n[nodename]['values']['plcsite']['login_base'] else: #print "ERROR: ", nodename, vals, "
" site_string = "UNKNOWN" loginbase = "" v['site_string'] = site_string v['loginbase'] = loginbase if (sitefilter != None and sf.match(loginbase) != None) or sitefilter == None: d2.append(v) if sitefilter != None: config.cmpcategory = True else: config.cmploginbase = True if config.cmploginbase: d2.sort(cmp=cmpLoginBase) elif config.cmpping: d2.sort(cmp=cmpPing) elif config.cmpdns: d2.sort(cmp=cmpDNS) elif config.cmpssh: d2.sort(cmp=cmpSSH) elif config.cmpcategory: d2.sort(cmp=cmpCategory) elif config.cmpstate: d2.sort(cmp=cmpState) elif config.cmpdays: d2.sort(cmp=cmpDays) elif config.cmpkernel: d2.sort(cmp=cmpUname) else: d2.sort(cmp=cmpCategory) if catfilter != None: cf = re.compile(catfilter) else: cf = None if statefilter != None: stf = re.compile(statefilter) else: stf = None if comonfilter != None: cmf = re.compile(comonfilter) else: cmf = None output_str = "" #l_loginbase = bysite.keys() #l_loginbase.sort() if nodeonlyfilter == None: output_str += "" prev_sitestring = "" for row in d2: vals = row #added by guto about last contact information if (catfilter != None and cf.match(vals['category']) == None): continue if (statefilter != None and stf.match(vals['state']) == None): continue if (comonfilter != None and comonfilter in vals['comonstats'] and vals['comonstats'][comonfilter] != 'null'): continue if nodeonlyfilter != None: output_str += vals['nodename'] continue site_string = row['site_string'] if site_string != prev_sitestring: output_str += "" else: output_str += "" prev_sitestring = site_string # convert uname values into a single kernel version string if 'kernel' in vals: kernel = vals['kernel'].split() if len(kernel) > 0: if kernel[0] == "Linux": vals['kernel'] = kernel[2] else: vals['ssherror'] = vals['kernel'] vals['kernel'] = "" else: vals['ssherror'] = "" vals['kernel'] = "" # continue if 'model' in vals or 'protocol' in vals or 'portstatus' in vals: #vals['model'] = string.replace(vals['model']," ", " ") #vals['protocol'] = vals['protocol'].replace(" ", " ") if vals['model'] == None: vals['model'] = " " vals['model'] = string.replace(vals['model']," ", "_") vals['protocol'] = vals['protocol'].replace(" ", "_") ps = "" ports = vals['portstatus'] lports = ports.keys() lports.sort() for port in lports: t = ports[port] if t != "closed": ps += "%s: %s
" % (port, ports[port]) if ps == "": ps = "All_closed" vals['portstatus'] = ps if 'reboot' in vals: vals['reboot'] = "%s" % vals['reboot'] vals['reboot'] = vals['reboot'].replace(" ", "_") if 'nodename' in vals: url = "%s" % (config.MONITOR_HOSTNAME, vals['nodename'], vals['nodename']) vals['nodename'] = url if 'plcnode' in vals: if vals['plcnode']['status'] == "GN_FAILED": vals['last_contact'] = "UNKNOWN" else: vals['last_contact'] = my_diff_time(vals['plcnode']['last_contact']) try: str_fields = [] count = 0 for f in format_fields: str_fields.append(f % vals) count += 1 except: print >>sys.stderr, vals s = fields_to_html(str_fields, vals) output_str += s output_str += "\n" if nodeonlyfilter == None: output_str += "
" output_str += site_string output_str += "
 
" keys = categories.keys() keys.sort() print "" for cat in keys: print "" print "" % cat print "" % categories[cat] print "" if nodeonlyfilter == None: print "
Total %s%s
" print output_str if nodeonlyfilter == None: print "\n" if __name__ == '__main__': import cgi import cgitb; cgitb.enable() import sys form = cgi.FieldStorage() myfilter = None if form.has_key('site'): myfilter = form.getvalue("site") else: myfilter = None if form.has_key('category'): mycategory = form.getvalue("category") else: mycategory = None if form.has_key('state'): mystate = form.getvalue("state") else: mystate = None if form.has_key('comon'): mycomon = form.getvalue("comon") else: mycomon = None if form.has_key('nodeonly'): mynodeonly = form.getvalue("nodeonly") else: mynodeonly = None config.cmpdays=False config.comon="sshstatus" config.fields="nodename,ping,ssh,pcu,category,state,last_contact,kernel,bootcd" config.dbname="findbad" config.cmpping=False config.cmpdns=False config.cmploginbase=False config.cmpssh=False config.cmpcategory=False print "Content-Type: text/html\r\n" if len(sys.argv) > 1: if sys.argv[1] == "ssherror": ssherror = True main(myfilter, mycategory, mystate, mycomon,mynodeonly)