2 from monitor import database
3 from monitor import config
4 from monitor import general_functions
19 def array_to_priority_map(array):
20 """ Create a mapping where each entry of array is given a priority equal
21 to its position in the array. This is useful for subsequent use in the
30 def cmpValMap(v1, v2, map):
31 if v1 in map and v2 in map and map[v1] < map[v2]:
33 elif v1 in map and v2 in map and map[v1] > map[v2]:
35 elif v1 in map and v2 in map:
38 raise Exception("No index %s or %s in map" % (v1, v2))
40 def cmpMap(l1, l2, index, map):
41 if index in l1 and index in l2:
42 if map[l1[index]] < map[l2[index]]:
44 elif map[l1[index]] > map[l2[index]]:
51 def cmpLoginBase(l1, l2):
52 #print "'" + l1['loginbase'] + "'" + " < " + "'" + l2['loginbase'] + "'" + "<BR>"
53 if l1['loginbase'] == l2['loginbase']:
55 elif l1['loginbase'] < l2['loginbase']:
57 elif l1['loginbase'] > l2['loginbase']:
63 map = array_to_priority_map([ 'BOOT', 'DEBUG', 'DOWN' ])
64 return cmpMap(l1,l2,'state', map)
66 def cmpCategoryVal(v1, v2):
67 map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDPROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
68 return cmpValMap(v1,v2,map)
70 def cmpCategory(l1, l2):
71 map = array_to_priority_map([ 'ALPHA', 'PROD', 'OLDPROD', 'OLDBOOTCD', 'UNKNOWN', 'ERROR', ])
72 return cmpMap(l1,l2,'category', map)
75 """ Either PCU or NOPCU"""
76 map = array_to_priority_map([ 'PCU', 'NOPCU', 'UNKNOWN'])
77 return cmpMap(l1, l2, 'pcu', map)
80 """ Either SSH or NOSSH """
81 map = array_to_priority_map([ 'SSH', 'NOSSH'])
82 return cmpMap(l1, l2, 'ssh', map)
85 """ Compare DNS states """
86 map = array_to_priority_map([ 'OK', 'NOHOSTNAME', 'NOENTRY', 'MISMATCH'])
87 return cmpMap(l1, l2, 'dnsmatch', map)
90 """ Either PING or NOPING """
91 map = array_to_priority_map([ 'PING', 'NOPING'])
92 return cmpMap(l1, l2, 'ping', map)
95 # Extract the kernel version from kernel -a string
96 l_k1 = l1['kernel'].split()
102 l_k2 = l2['kernel'].split()
111 if l1['comonstats'][config.comon] == "null":
112 l1['comonstats'][config.comon] = -1
113 if l2['comonstats'][config.comon] == "null":
114 l2['comonstats'][config.comon] = -1
116 if int(l1['comonstats'][config.comon]) > int(l2['comonstats'][config.comon]):
118 elif int(l1['comonstats'][config.comon]) < int(l2['comonstats'][config.comon]):
123 def ssh_error_to_str(str):
125 if "Connection timed out" in str:
126 ssh_error = "Timeout"
127 elif "Connection closed by remote host" in str:
128 ssh_error = "Closed by remote host"
129 elif "Connection refused" in str:
130 ssh_error = "Connection refused"
131 elif "Temporary failure in name resolution" in str:
132 ssh_error = "Could not resolve name"
133 elif "Name or service not known" in str:
134 ssh_error = "Name not known"
135 elif "Too many authentication failures" in str:
136 ssh_error = "Disconnect: root auth failure"
137 elif "Network is unreachable" in str:
138 ssh_error = "Network is unreachable"
139 elif "Connection reset by peer" in str:
140 ssh_error = "Connection reset by peer"
141 elif "WARNING" in str:
142 ssh_error = "WARNING ssh key updated"
148 def pcu_state(pcu_id):
151 if 'nodes' in fb and "id_%s" % pcu_id in fb['nodes'] \
152 and 'values' in fb['nodes']["id_%s" % pcu_id]:
153 rec = fb['nodes']["id_%s" % pcu_id]['values']
156 if rb == 0 or rb == "0":
158 elif "NetDown" == rb or "Not_Run" == rb:
167 def fields_to_html(fields, vals):
170 pcu_colorMap = { -1 : 'indianred',
174 colorMap = { 'PING' : 'darkseagreen',
175 'NOPING': 'darksalmon',
176 'SSH': 'darkseagreen',
177 'NOSSH': 'indianred',
178 'PCU': 'darkseagreen',
179 'NOPCU': 'lightgrey',
180 'OLDBOOTCD': 'crimson',
184 'PROD': 'darkseagreen',
185 'DEBUG': 'darksalmon',
186 'DEBUG': 'darksalmon',
187 'BOOT': 'lightgreen'}
191 #print 'inside--------------'
196 if f in ['DOWN', 'BOOT', 'DEBUG']:
197 #key = "%s-%s-%s" % (f,f_prev,f_2prev)
198 key = "%s-%s" % (f,f_prev)
199 if key not in categories:
204 #print "<pre>%s</pre><br>" % f
207 bgcolor="bgcolor='%s'" % colorMap[f]
213 if 'ssherror' in vals:
214 str_ssh_error = ssh_error_to_str(vals['ssherror'])
216 str_ssh_error = "NO SSHERROR in VALS"
217 if str_ssh_error != "Timeout":
218 r_str += """<td nowrap %s>%s<br><b><font size="-2">%s</font></b></td>""" % \
219 (bgcolor,f,str_ssh_error)
221 r_str += "<td %s>%s</td>" % (bgcolor, f)
223 r_str += "<td %s>%s</td>" % (bgcolor, f)
225 if len(vals['plcnode']['pcu_ids']) > 0:
226 #print "pcu_id: %s<br>" % vals['plcnode']['pcu_ids'][0]
227 #print "state: %s<br>" % pcu_state(vals['plcnode']['pcu_ids'][0])
228 #print "color: %s<br>" % pcu_colorMap[pcu_state(vals['plcnode']['pcu_ids'][0])]
229 bgcolor = "bgcolor='%s'" % pcu_colorMap[pcu_state(vals['plcnode']['pcu_ids'][0])]
230 url = "<a href='/cgi-bin/monitor/printbadpcus.php?id=%s'>PCU</a>" % vals['plcnode']['pcu_ids'][0]
231 r_str += "<td nowrap %s>%s</td>" % (bgcolor, url)
233 r_str += "<td nowrap %s>%s</td>" % (bgcolor, f)
239 def my_diff_time(timestamp):
241 if timestamp == None:
242 return "not yet contacted"
243 diff = now - timestamp
244 # return the number of seconds as a difference from current time.
246 if diff < 60: # sec in min.
248 t_str = "%s sec ago" % t
249 elif diff < 60*60: # sec in hour
251 t_str = "%s min ago" % int(t)
252 elif diff < 60*60*24: # sec in day
254 t_str = "%s hours ago" % int(t)
255 elif diff < 60*60*24*7: # sec in week
256 t = diff // (60*60*24)
257 t_str = "%s days ago" % int(t)
258 elif diff < 60*60*24*30: # approx sec in month
259 t = diff // (60*60*24*7)
260 t_str = "%s weeks ago" % int(t)
261 elif diff > 60*60*24*30 and diff < 60*60*24*30*2: # approx sec in month
262 month = int( diff // (60*60*24*30) )
263 weeks = (diff - (month * (60*60*24*30))) // (60*60*24*7)
265 t_str = "%s month ago" % int(month)
267 t_str = "2 months ago"
269 t_str = "%s month and %s weeks ago" % ( int(month) , int(weeks) )
270 elif diff >= 60*60*24*30*2:
271 month = diff // (60*60*24*30)
272 t_str = "%s months ago" % int(month)
276 def main(sitefilter, catfilter, statefilter, comonfilter, nodeonlyfilter):
280 if nodeonlyfilter == None:
281 print "<html><body>\n"
284 mtime = os.stat("/var/lib/monitor-server/production.findbad.pkl")[-2]
285 print "Last Updated: %s GMT" % datetime.datetime.fromtimestamp(mtime)
290 db = database.dbLoad(config.dbname)
291 fb = database.dbLoad("findbadpcus")
293 ## Field widths used for printing
294 maxFieldLengths = { 'nodename' : -45,
302 'last_contact' : 10.65,
305 ## create format string based on config.fields
309 for f in config.fields.split(','):
310 fields[f] = "%%(%s)s" % f
312 #if f in maxFieldLengths:
313 # fields[f] = "%%(%s)%ds" % (f, maxFieldLengths[f])
315 # fields[f] = "%%(%s)%ds" % (f, 10)
317 format_fields.append(fields[f])
319 for f in config.fields.split(','):
320 format += fields[f] + " "
328 #for nodename in l_nodes:
329 # if 'plcsite' in d_n[nodename]['values'] and \
330 # 'login_base' in d_n[nodename]['values']['plcsite']:
331 # loginbase = d_n[nodename]['values']['plcsite']['login_base']
332 # if loginbase not in bysite:
333 # bysite[loginbase] = []
334 # d_n[nodename]['values']['nodename'] = nodename
335 # bysite[loginbase].append(d_n[nodename]['values'])
337 # d2 was an array of [{node}, {}, ...]
338 # the bysite is a loginbase dict of [{node}, {node}]
341 if sitefilter != None:
342 sf = re.compile(sitefilter)
345 for nodename in l_nodes:
346 vals=d_n[nodename]['values']
349 v['nodename'] = nodename
350 if 'plcsite' in vals and \
351 'status' in vals['plcsite'] and \
352 vals['plcsite']['status'] == "SUCCESS":
354 url = "<a href='printbadnodes.py?site=%s'>%s</a>" % ( vals['plcsite']['login_base'],
355 vals['plcsite']['login_base'])
357 site_string = "%s %2s nodes :: %2s of %4s slices" % ( \
359 vals['plcsite']['num_nodes'],
360 vals['plcsite']['num_slices'],
361 vals['plcsite']['max_slices'])
362 loginbase = d_n[nodename]['values']['plcsite']['login_base']
364 #print "ERROR: ", nodename, vals, "<br>"
365 site_string = "<b>UNKNOWN</b>"
368 v['site_string'] = site_string
369 v['loginbase'] = loginbase
370 if (sitefilter != None and sf.match(loginbase) != None) or sitefilter == None:
374 if sitefilter != None:
375 config.cmpcategory = True
377 config.cmploginbase = True
380 if config.cmploginbase:
381 d2.sort(cmp=cmpLoginBase)
388 elif config.cmpcategory:
389 d2.sort(cmp=cmpCategory)
390 elif config.cmpstate:
391 d2.sort(cmp=cmpState)
394 elif config.cmpkernel:
395 d2.sort(cmp=cmpUname)
397 d2.sort(cmp=cmpCategory)
400 if catfilter != None: cf = re.compile(catfilter)
403 if statefilter != None: stf = re.compile(statefilter)
406 if comonfilter != None: cmf = re.compile(comonfilter)
411 #l_loginbase = bysite.keys()
413 if nodeonlyfilter == None:
414 output_str += "<table width=80% border=1>"
421 #added by guto about last contact information
422 if (catfilter != None and cf.match(vals['category']) == None):
425 if (statefilter != None and stf.match(vals['state']) == None):
428 if (comonfilter != None and comonfilter in vals['comonstats'] and vals['comonstats'][comonfilter] != 'null'):
431 if nodeonlyfilter != None:
432 output_str += vals['nodename']
435 site_string = row['site_string']
436 if site_string != prev_sitestring:
437 output_str += "<tr><td bgcolor=lightblue nowrap>"
438 output_str += site_string
439 output_str += "</td>"
441 output_str += "<tr><td> </td>"
443 prev_sitestring = site_string
446 # convert uname values into a single kernel version string
448 kernel = vals['kernel'].split()
450 if kernel[0] == "Linux":
451 vals['kernel'] = kernel[2]
453 vals['ssherror'] = vals['kernel']
456 vals['ssherror'] = ""
459 if 'model' in vals or 'protocol' in vals or 'portstatus' in vals:
460 #vals['model'] = string.replace(vals['model']," ", " ")
461 #vals['protocol'] = vals['protocol'].replace(" ", " ")
462 if vals['model'] == None:
464 vals['model'] = string.replace(vals['model']," ", "_")
465 vals['protocol'] = vals['protocol'].replace(" ", "_")
467 ports = vals['portstatus']
468 lports = ports.keys()
473 ps += "%s: %s<br>" % (port, ports[port])
477 vals['portstatus'] = ps
480 vals['reboot'] = "%s" % vals['reboot']
481 vals['reboot'] = vals['reboot'].replace(" ", "_")
483 if 'nodename' in vals:
484 url = "<a href='https://%s/db/nodes/index.php?nodepattern=%s'>%s</a>" % (config.MONITOR_HOSTNAME, vals['nodename'], vals['nodename'])
485 vals['nodename'] = url
487 if 'plcnode' in vals:
488 if vals['plcnode']['status'] == "GN_FAILED":
489 vals['last_contact'] = "UNKNOWN"
491 vals['last_contact'] = my_diff_time(vals['plcnode']['last_contact'])
496 for f in format_fields:
497 str_fields.append(f % vals)
500 print >>sys.stderr, vals
502 s = fields_to_html(str_fields, vals)
505 output_str += "\n</tr>"
507 if nodeonlyfilter == None:
508 output_str += "</table>"
509 keys = categories.keys()
514 print "<th nowrap align=left>Total %s</th>" % cat
515 print "<td align=left>%s</td>" % categories[cat]
517 if nodeonlyfilter == None:
521 if nodeonlyfilter == None:
522 print "</body></html>\n"
526 if __name__ == '__main__':
532 form = cgi.FieldStorage()
535 if form.has_key('site'):
536 myfilter = form.getvalue("site")
540 if form.has_key('category'):
541 mycategory = form.getvalue("category")
545 if form.has_key('state'):
546 mystate = form.getvalue("state")
550 if form.has_key('comon'):
551 mycomon = form.getvalue("comon")
555 if form.has_key('nodeonly'):
556 mynodeonly = form.getvalue("nodeonly")
561 config.comon="sshstatus"
562 config.fields="nodename,ping,ssh,pcu,category,state,last_contact,kernel,bootcd"
563 config.dbname="findbad"
566 config.cmploginbase=False
568 config.cmpcategory=False
570 print "Content-Type: text/html\r\n"
571 if len(sys.argv) > 1:
572 if sys.argv[1] == "ssherror":
574 main(myfilter, mycategory, mystate, mycomon,mynodeonly)