2 from monitor import database
3 from monitor import config
18 def array_to_priority_map(array):
19 """ Create a mapping where each entry of array is given a priority equal
20 to its position in the array. This is useful for subsequent use in the
29 def cmpValMap(v1, v2, map):
30 if v1 in map and v2 in map and map[v1] < map[v2]:
32 elif v1 in map and v2 in map and map[v1] > map[v2]:
34 elif v1 in map and v2 in map:
37 raise Exception("No index %s or %s in map" % (v1, v2))
39 def cmpMap(l1, l2, index, map):
40 if index in l1 and index in l2:
41 if map[l1[index]] < map[l2[index]]:
43 elif map[l1[index]] > map[l2[index]]:
50 def cmpLoginBase(l1, l2):
51 #print "'" + l1['loginbase'] + "'" + " < " + "'" + l2['loginbase'] + "'" + "<BR>"
52 if l1['loginbase'] == l2['loginbase']:
54 elif l1['loginbase'] < l2['loginbase']:
56 elif l1['loginbase'] > l2['loginbase']:
62 map = array_to_priority_map([ 'BOOT', 'DEBUG', 'DOWN' ])
63 return cmpMap(l1,l2,'state', map)
65 def cmpCategoryVal(v1, v2):
66 map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDPROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
67 return cmpValMap(v1,v2,map)
69 def cmpCategory(l1, l2):
70 map = array_to_priority_map([ 'ALPHA', 'PROD', 'OLDPROD', 'OLDBOOTCD', 'UNKNOWN', 'ERROR', ])
71 return cmpMap(l1,l2,'category', map)
74 """ Either PCU or NOPCU"""
75 map = array_to_priority_map([ 'PCU', 'NOPCU', 'UNKNOWN'])
76 return cmpMap(l1, l2, 'pcu', map)
79 """ Either SSH or NOSSH """
80 map = array_to_priority_map([ 'SSH', 'NOSSH'])
81 return cmpMap(l1, l2, 'ssh', map)
84 """ Compare DNS states """
85 map = array_to_priority_map([ 'OK', 'NOHOSTNAME', 'NOENTRY', 'MISMATCH'])
86 return cmpMap(l1, l2, 'dnsmatch', map)
89 """ Either PING or NOPING """
90 map = array_to_priority_map([ 'PING', 'NOPING'])
91 return cmpMap(l1, l2, 'ping', map)
94 # Extract the kernel version from kernel -a string
95 l_k1 = l1['kernel'].split()
101 l_k2 = l2['kernel'].split()
110 if l1['comonstats'][config.comon] == "null":
111 l1['comonstats'][config.comon] = -1
112 if l2['comonstats'][config.comon] == "null":
113 l2['comonstats'][config.comon] = -1
115 if int(l1['comonstats'][config.comon]) > int(l2['comonstats'][config.comon]):
117 elif int(l1['comonstats'][config.comon]) < int(l2['comonstats'][config.comon]):
122 def ssh_error_to_str(str):
124 if "Connection timed out" in str:
125 ssh_error = "Timeout"
126 elif "Connection closed by remote host" in str:
127 ssh_error = "Closed by remote host"
128 elif "Connection refused" in str:
129 ssh_error = "Connection refused"
130 elif "Temporary failure in name resolution" in str:
131 ssh_error = "Could not resolve name"
132 elif "Name or service not known" in str:
133 ssh_error = "Name not known"
134 elif "Too many authentication failures" in str:
135 ssh_error = "Disconnect: root auth failure"
136 elif "Network is unreachable" in str:
137 ssh_error = "Network is unreachable"
138 elif "Connection reset by peer" in str:
139 ssh_error = "Connection reset by peer"
140 elif "WARNING" in str:
141 ssh_error = "WARNING ssh key updated"
147 def pcu_state(pcu_id):
150 if 'nodes' in fb and "id_%s" % pcu_id in fb['nodes'] \
151 and 'values' in fb['nodes']["id_%s" % pcu_id]:
152 rec = fb['nodes']["id_%s" % pcu_id]['values']
155 if rb == 0 or rb == "0":
157 elif "NetDown" == rb or "Not_Run" == rb:
166 def fields_to_html(fields, vals):
169 pcu_colorMap = { -1 : 'indianred',
173 colorMap = { 'PING' : 'darkseagreen',
174 'NOPING': 'darksalmon',
175 'SSH': 'darkseagreen',
176 'NOSSH': 'indianred',
177 'PCU': 'darkseagreen',
178 'NOPCU': 'lightgrey',
179 'OLDBOOTCD': 'crimson',
183 'PROD': 'darkseagreen',
184 'DEBUG': 'darksalmon',
185 'DEBUG': 'darksalmon',
186 'BOOT': 'lightgreen'}
190 #print 'inside--------------'
195 if f in ['DOWN', 'BOOT', 'DEBUG']:
196 #key = "%s-%s-%s" % (f,f_prev,f_2prev)
197 key = "%s-%s" % (f,f_prev)
198 if key not in categories:
203 #print "<pre>%s</pre><br>" % f
206 bgcolor="bgcolor='%s'" % colorMap[f]
212 if 'ssherror' in vals:
213 str_ssh_error = ssh_error_to_str(vals['ssherror'])
215 str_ssh_error = "NO SSHERROR in VALS"
216 if str_ssh_error != "Timeout":
217 r_str += """<td nowrap %s>%s<br><b><font size="-2">%s</font></b></td>""" % \
218 (bgcolor,f,str_ssh_error)
220 r_str += "<td %s>%s</td>" % (bgcolor, f)
222 r_str += "<td %s>%s</td>" % (bgcolor, f)
224 if len(vals['plcnode']['pcu_ids']) > 0:
225 #print "pcu_id: %s<br>" % vals['plcnode']['pcu_ids'][0]
226 #print "state: %s<br>" % pcu_state(vals['plcnode']['pcu_ids'][0])
227 #print "color: %s<br>" % pcu_colorMap[pcu_state(vals['plcnode']['pcu_ids'][0])]
228 bgcolor = "bgcolor='%s'" % pcu_colorMap[pcu_state(vals['plcnode']['pcu_ids'][0])]
229 url = "<a href='/cgi-bin/monitor/printbadpcus.php?id=%s'>PCU</a>" % vals['plcnode']['pcu_ids'][0]
230 r_str += "<td nowrap %s>%s</td>" % (bgcolor, url)
232 r_str += "<td nowrap %s>%s</td>" % (bgcolor, f)
238 def my_diff_time(timestamp):
240 if timestamp == None:
241 return "not yet contacted"
242 diff = now - timestamp
243 # return the number of seconds as a difference from current time.
245 if diff < 60: # sec in min.
247 t_str = "%s sec ago" % t
248 elif diff < 60*60: # sec in hour
250 t_str = "%s min ago" % int(t)
251 elif diff < 60*60*24: # sec in day
253 t_str = "%s hours ago" % int(t)
254 elif diff < 60*60*24*7: # sec in week
255 t = diff // (60*60*24)
256 t_str = "%s days ago" % int(t)
257 elif diff < 60*60*24*30: # approx sec in month
258 t = diff // (60*60*24*7)
259 t_str = "%s weeks ago" % int(t)
260 elif diff > 60*60*24*30 and diff < 60*60*24*30*2: # approx sec in month
261 month = int( diff // (60*60*24*30) )
262 weeks = (diff - (month * (60*60*24*30))) // (60*60*24*7)
264 t_str = "%s month ago" % int(month)
266 t_str = "2 months ago"
268 t_str = "%s month and %s weeks ago" % ( int(month) , int(weeks) )
269 elif diff >= 60*60*24*30*2:
270 month = diff // (60*60*24*30)
271 t_str = "%s months ago" % int(month)
275 def main(sitefilter, catfilter, statefilter, comonfilter, nodeonlyfilter):
279 if nodeonlyfilter == None:
280 print "<html><body>\n"
283 mtime = os.stat("/var/lib/monitor-server/production.findbad.pkl")[-2]
284 print "Last Updated: %s GMT" % datetime.datetime.fromtimestamp(mtime)
289 db = database.dbLoad(config.dbname)
290 fb = database.dbLoad("findbadpcus")
292 ## Field widths used for printing
293 maxFieldLengths = { 'nodename' : -45,
301 'last_contact' : 10.65,
304 ## create format string based on config.fields
308 for f in config.fields.split(','):
309 fields[f] = "%%(%s)s" % f
311 #if f in maxFieldLengths:
312 # fields[f] = "%%(%s)%ds" % (f, maxFieldLengths[f])
314 # fields[f] = "%%(%s)%ds" % (f, 10)
316 format_fields.append(fields[f])
318 for f in config.fields.split(','):
319 format += fields[f] + " "
327 #for nodename in l_nodes:
328 # if 'plcsite' in d_n[nodename]['values'] and \
329 # 'login_base' in d_n[nodename]['values']['plcsite']:
330 # loginbase = d_n[nodename]['values']['plcsite']['login_base']
331 # if loginbase not in bysite:
332 # bysite[loginbase] = []
333 # d_n[nodename]['values']['nodename'] = nodename
334 # bysite[loginbase].append(d_n[nodename]['values'])
336 # d2 was an array of [{node}, {}, ...]
337 # the bysite is a loginbase dict of [{node}, {node}]
340 if sitefilter != None:
341 sf = re.compile(sitefilter)
344 for nodename in l_nodes:
345 vals=d_n[nodename]['values']
348 v['nodename'] = nodename
349 if 'plcsite' in vals and \
350 'status' in vals['plcsite'] and \
351 vals['plcsite']['status'] == "SUCCESS":
353 url = "<a href='printbadnodes.py?site=%s'>%s</a>" % ( vals['plcsite']['login_base'],
354 vals['plcsite']['login_base'])
356 site_string = "%s %2s nodes :: %2s of %4s slices" % ( \
358 vals['plcsite']['num_nodes'],
359 vals['plcsite']['num_slices'],
360 vals['plcsite']['max_slices'])
361 loginbase = d_n[nodename]['values']['plcsite']['login_base']
363 #print "ERROR: ", nodename, vals, "<br>"
364 site_string = "<b>UNKNOWN</b>"
367 v['site_string'] = site_string
368 v['loginbase'] = loginbase
369 if (sitefilter != None and sf.match(loginbase) != None) or sitefilter == None:
373 if sitefilter != None:
374 config.cmpcategory = True
376 config.cmploginbase = True
379 if config.cmploginbase:
380 d2.sort(cmp=cmpLoginBase)
387 elif config.cmpcategory:
388 d2.sort(cmp=cmpCategory)
389 elif config.cmpstate:
390 d2.sort(cmp=cmpState)
393 elif config.cmpkernel:
394 d2.sort(cmp=cmpUname)
396 d2.sort(cmp=cmpCategory)
399 if catfilter != None: cf = re.compile(catfilter)
402 if statefilter != None: stf = re.compile(statefilter)
405 if comonfilter != None: cmf = re.compile(comonfilter)
410 #l_loginbase = bysite.keys()
412 if nodeonlyfilter == None:
413 output_str += "<table width=80% border=1>"
420 #added by guto about last contact information
421 if (catfilter != None and cf.match(vals['category']) == None):
424 if (statefilter != None and stf.match(vals['state']) == None):
427 if (comonfilter != None and comonfilter in vals['comonstats'] and vals['comonstats'][comonfilter] != 'null'):
430 if nodeonlyfilter != None:
431 output_str += vals['nodename']
434 site_string = row['site_string']
435 if site_string != prev_sitestring:
436 output_str += "<tr><td bgcolor=lightblue nowrap>"
437 output_str += site_string
438 output_str += "</td>"
440 output_str += "<tr><td> </td>"
442 prev_sitestring = site_string
445 # convert uname values into a single kernel version string
447 kernel = vals['kernel'].split()
449 if kernel[0] == "Linux":
450 vals['kernel'] = kernel[2]
452 vals['ssherror'] = vals['kernel']
455 vals['ssherror'] = ""
458 if 'model' in vals or 'protocol' in vals or 'portstatus' in vals:
459 #vals['model'] = string.replace(vals['model']," ", " ")
460 #vals['protocol'] = vals['protocol'].replace(" ", " ")
461 if vals['model'] == None:
463 vals['model'] = string.replace(vals['model']," ", "_")
464 vals['protocol'] = vals['protocol'].replace(" ", "_")
466 ports = vals['portstatus']
467 lports = ports.keys()
472 ps += "%s: %s<br>" % (port, ports[port])
476 vals['portstatus'] = ps
479 vals['reboot'] = "%s" % vals['reboot']
480 vals['reboot'] = vals['reboot'].replace(" ", "_")
482 if 'nodename' in vals:
483 url = "<a href='https://%s/db/nodes/index.php?nodepattern=%s'>%s</a>" % (config.MONITOR_HOSTNAME, vals['nodename'], vals['nodename'])
484 vals['nodename'] = url
486 if 'plcnode' in vals:
487 if vals['plcnode']['status'] == "GN_FAILED":
488 vals['last_contact'] = "UNKNOWN"
490 vals['last_contact'] = my_diff_time(vals['plcnode']['last_contact'])
495 for f in format_fields:
496 str_fields.append(f % vals)
499 print >>sys.stderr, vals
501 s = fields_to_html(str_fields, vals)
504 output_str += "\n</tr>"
506 if nodeonlyfilter == None:
507 output_str += "</table>"
508 keys = categories.keys()
513 print "<th nowrap align=left>Total %s</th>" % cat
514 print "<td align=left>%s</td>" % categories[cat]
516 if nodeonlyfilter == None:
520 if nodeonlyfilter == None:
521 print "</body></html>\n"
525 if __name__ == '__main__':
531 form = cgi.FieldStorage()
534 if form.has_key('site'):
535 myfilter = form.getvalue("site")
539 if form.has_key('category'):
540 mycategory = form.getvalue("category")
544 if form.has_key('state'):
545 mystate = form.getvalue("state")
549 if form.has_key('comon'):
550 mycomon = form.getvalue("comon")
554 if form.has_key('nodeonly'):
555 mynodeonly = form.getvalue("nodeonly")
560 config.comon="sshstatus"
561 config.fields="nodename,ping,ssh,pcu,category,state,last_contact,kernel,bootcd"
562 config.dbname="findbad"
565 config.cmploginbase=False
567 config.cmpcategory=False
569 print "Content-Type: text/html\r\n"
570 if len(sys.argv) > 1:
571 if sys.argv[1] == "ssherror":
573 main(myfilter, mycategory, mystate, mycomon,mynodeonly)