24a6dc4239c58f9951a7e9266e4e8fa9adc899e5
[monitor.git] / www / printbadnodes.py
1 #!/usr/bin/python
2 from monitor import database
3 from monitor import config
4 import string
5 import sys
6
7 categories = {}
8 ssherror = False
9 fb = {}
10
11 def sec2days(sec):
12         if sec == "null":
13                 sec = -(60*60*24)
14         sec = int(sec)
15         return sec/(60*60*24)
16
17 def array_to_priority_map(array):
18         """ Create a mapping where each entry of array is given a priority equal
19         to its position in the array.  This is useful for subsequent use in the
20         cmpMap() function."""
21         map = {}
22         count = 0
23         for i in array:
24                 map[i] = count
25                 count += 1
26         return map
27
28 def cmpValMap(v1, v2, map):
29         if v1 in map and v2 in map and map[v1] < map[v2]:
30                 return 1
31         elif v1 in map and v2 in map and map[v1] > map[v2]:
32                 return -1
33         elif v1 in map and v2 in map:
34                 return 0
35         else:
36                 raise Exception("No index %s or %s in map" % (v1, v2))
37
38 def cmpMap(l1, l2, index, map):
39         if index in l1 and index in l2:
40                 if map[l1[index]] < map[l2[index]]:
41                         return -1
42                 elif map[l1[index]] > map[l2[index]]:
43                         return 1
44                 else:
45                         return 0
46         else:
47                 return 0
48
49 def cmpLoginBase(l1, l2):
50         #print "'" + l1['loginbase'] + "'"  + " < " + "'" + l2['loginbase'] + "'" + "<BR>"
51         if l1['loginbase'] == l2['loginbase']:
52                 return 0
53         elif l1['loginbase'] < l2['loginbase']:
54                 return -1
55         elif l1['loginbase'] > l2['loginbase']:
56                 return 1
57         else:
58                 return 0
59
60 def cmpState(l1, l2):
61         map = array_to_priority_map([ 'BOOT', 'DEBUG', 'DOWN' ])
62         return cmpMap(l1,l2,'state', map)
63
64 def cmpCategoryVal(v1, v2):
65         map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDPROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
66         return cmpValMap(v1,v2,map)
67
68 def cmpCategory(l1, l2):
69         map = array_to_priority_map([ 'ALPHA', 'PROD', 'OLDPROD', 'OLDBOOTCD', 'UNKNOWN', 'ERROR', ])
70         return cmpMap(l1,l2,'category', map)
71
72 def cmpPCU(l1, l2):
73         """ Either PCU or NOPCU"""
74         map = array_to_priority_map([ 'PCU', 'NOPCU', 'UNKNOWN'])
75         return cmpMap(l1, l2, 'pcu', map)
76
77 def cmpSSH(l1, l2):
78         """ Either SSH or NOSSH """
79         map = array_to_priority_map([ 'SSH', 'NOSSH'])
80         return cmpMap(l1, l2, 'ssh', map)
81
82 def cmpDNS(l1,l2):
83         """ Compare DNS states """
84         map = array_to_priority_map([ 'OK', 'NOHOSTNAME', 'NOENTRY', 'MISMATCH'])
85         return cmpMap(l1, l2, 'dnsmatch', map)
86         
87 def cmpPing(l1,l2):
88         """ Either PING or NOPING """
89         map = array_to_priority_map([ 'PING', 'NOPING'])
90         return cmpMap(l1, l2, 'ping', map)
91
92 def cmpUname(l1, l2):
93         # Extract the kernel version from kernel -a string
94         l_k1 = l1['kernel'].split()
95         if len(l_k1) > 2:
96                 k1 = l_k1[2]
97         else:
98                 return 1
99
100         l_k2 = l2['kernel'].split()
101         if len(l_k2) > 2:
102                 k2 = l_k2[2]
103         else:
104                 return -1
105
106         return cmp(k1, k2)
107
108 def cmpDays(l1, l2):
109         if l1['comonstats'][config.comon] == "null":
110                 l1['comonstats'][config.comon] = -1
111         if l2['comonstats'][config.comon] == "null":
112                 l2['comonstats'][config.comon] = -1
113                 
114         if int(l1['comonstats'][config.comon]) > int(l2['comonstats'][config.comon]):
115                 return -1
116         elif int(l1['comonstats'][config.comon]) < int(l2['comonstats'][config.comon]):
117                 return 1
118         else:
119                 return 0
120
121 def ssh_error_to_str(str):
122         ssh_error = ""
123         if "Connection timed out" in str:
124                 ssh_error = "Timeout" 
125         elif "Connection closed by remote host" in str:
126                 ssh_error = "Closed by remote host"
127         elif "Connection refused" in str:
128                 ssh_error = "Connection refused"
129         elif "Temporary failure in name resolution" in str:
130                 ssh_error = "Could not resolve name"
131         elif "Name or service not known" in str:
132                 ssh_error = "Name not known"
133         elif "Too many authentication failures" in str:
134                 ssh_error = "Disconnect: root auth failure"
135         elif "Network is unreachable" in str:
136                 ssh_error = "Network is unreachable"
137         elif "Connection reset by peer" in str:
138                 ssh_error = "Connection reset by peer"
139         elif "WARNING" in str:
140                 ssh_error = "WARNING ssh key updated"
141         else:
142                 ssh_error = str
143
144         return ssh_error
145
146 def pcu_state(pcu_id):
147         global fb
148
149         if 'nodes' in fb and "id_%s" % pcu_id in fb['nodes'] \
150                 and 'values' in fb['nodes']["id_%s" % pcu_id]:
151                 rec = fb['nodes']["id_%s" % pcu_id]['values']
152                 if 'reboot' in rec:
153                         rb = rec['reboot']
154                         if rb == 0 or rb == "0":
155                                 return 0
156                         elif "NetDown" == rb  or "Not_Run" == rb:
157                                 return 1
158                         else:
159                                 return -1
160                 else:
161                         return -1
162         else:
163                 return -1 
164
165 def fields_to_html(fields, vals):
166         global categories
167         global ssherror
168         pcu_colorMap = { -1 : 'indianred',
169                                           0 : 'darkseagreen',
170                                           1 : 'gold', }
171
172         colorMap = { 'PING'  : 'darkseagreen',
173                                  'NOPING': 'darksalmon',
174                                  'SSH': 'darkseagreen',
175                                  'NOSSH': 'indianred',
176                                  'PCU': 'darkseagreen',
177                                  'NOPCU': 'lightgrey',
178                                  'OLDBOOTCD': 'crimson',
179                                  'DOWN': 'indianred',
180                                  'ALPHA': 'gold',
181                                  'ERROR': 'crimson',
182                                  'PROD': 'darkseagreen',
183                                  'DEBUG': 'darksalmon',
184                                  'DEBUG': 'darksalmon',
185                                  'BOOT': 'lightgreen'}
186         r_str = ""
187         f_prev = ""
188         f_2prev = ""
189         #print 'inside--------------'
190         for f in fields:
191                 f = f.strip()
192                 #print f
193
194                 if f in ['DOWN', 'BOOT', 'DEBUG']:
195                         #key = "%s-%s-%s" % (f,f_prev,f_2prev)
196                         key = "%s-%s" % (f,f_prev)
197                         if key not in categories:
198                                 categories[key] = 1
199                         else:
200                                 categories[key] += 1
201
202                 #print "<pre>%s</pre><br>" % f
203                                 
204                 if f in colorMap:
205                         bgcolor="bgcolor='%s'" % colorMap[f]
206                 else:
207                         bgcolor=""
208
209                 if f == 'NOSSH':
210                         if ssherror:
211                                 if 'ssherror' in vals:
212                                         str_ssh_error = ssh_error_to_str(vals['ssherror'])
213                                 else:
214                                         str_ssh_error = "NO SSHERROR in VALS"
215                                 if str_ssh_error != "Timeout":
216                                         r_str += """<td nowrap %s>%s<br><b><font size="-2">%s</font></b></td>""" % \
217                                                                 (bgcolor,f,str_ssh_error)
218                                 else:
219                                         r_str += "<td %s>%s</td>" % (bgcolor, f)
220                         else:
221                                 r_str += "<td %s>%s</td>" % (bgcolor, f)
222                 elif f == 'PCU':
223                         if len(vals['plcnode']['pcu_ids']) > 0:
224                                 #print "pcu_id: %s<br>" % vals['plcnode']['pcu_ids'][0]
225                                 #print "state: %s<br>" % pcu_state(vals['plcnode']['pcu_ids'][0])
226                                 #print "color: %s<br>" % pcu_colorMap[pcu_state(vals['plcnode']['pcu_ids'][0])]
227                                 bgcolor = "bgcolor='%s'" % pcu_colorMap[pcu_state(vals['plcnode']['pcu_ids'][0])]
228                                 url = "<a href='/cgi-bin/monitor/printbadpcus.php?id=%s'>PCU</a>" % vals['plcnode']['pcu_ids'][0]
229                                 r_str += "<td nowrap %s>%s</td>" % (bgcolor, url)
230                 else:
231                         r_str += "<td nowrap %s>%s</td>" % (bgcolor, f)
232                 f_2prev = f_prev
233                 f_prev  = f
234         
235         return r_str
236
237
238
239 def main(sitefilter, catfilter, statefilter, comonfilter, nodeonlyfilter):
240         global fb
241         import os
242         import datetime
243         if nodeonlyfilter == None:
244                 print "<html><body>\n"
245
246                 try:
247                         mtime = os.stat("/var/lib/monitor-server/production.findbad.pkl")[-2]
248                         print "Last Updated: %s GMT" % datetime.datetime.fromtimestamp(mtime)
249                 except:
250                         pass
251
252
253         db = database.dbLoad(config.dbname)
254         fb = database.dbLoad("findbadpcus")
255
256         ## Field widths used for printing
257         maxFieldLengths = { 'nodename' : -45,
258                                                 'ping' : 6, 
259                                                 'ssh' : 6, 
260                                                 'pcu' : 7, 
261                                                 'category' : 9, 
262                                                 'state' : 5, 
263                                                 'kernel' : 10.65, 
264                                                 'comonstats' : 5, 
265                                                 'plcsite' : 12,
266                                                 'bootcd' : 10.65}
267         ## create format string based on config.fields
268         fields = {}
269         format = ""
270         format_fields = []
271         for f in config.fields.split(','):
272                 fields[f] = "%%(%s)s" % f
273                 #if f in maxFieldLengths:
274                 #       fields[f] = "%%(%s)%ds" % (f, maxFieldLengths[f])
275                 #else:
276                 #       fields[f] = "%%(%s)%ds" % (f, 10)
277
278                 format_fields.append(fields[f])
279         #print fields
280         for f in config.fields.split(','):
281                 format += fields[f] + " "
282         #print format
283
284         d_n = db['nodes']
285         l_nodes = d_n.keys()
286
287         # category by site
288         #bysite = {}
289         #for nodename in l_nodes:
290         #       if 'plcsite' in d_n[nodename]['values'] and \
291         #       'login_base' in d_n[nodename]['values']['plcsite']:
292         #               loginbase = d_n[nodename]['values']['plcsite']['login_base']
293         #               if loginbase not in bysite:
294         #                       bysite[loginbase] = []
295         #               d_n[nodename]['values']['nodename'] = nodename
296         #               bysite[loginbase].append(d_n[nodename]['values'])
297
298         # d2 was an array of [{node}, {}, ...]
299         # the bysite is a loginbase dict of [{node}, {node}]
300         d2 = []
301         import re
302         if sitefilter != None:
303                 sf = re.compile(sitefilter)
304         else:
305                 sf = None
306         for nodename in l_nodes: 
307                 vals=d_n[nodename]['values'] 
308                 v = {}
309                 v.update(vals)
310                 v['nodename'] = nodename 
311                 if  'plcsite' in vals and  \
312                         'status' in vals['plcsite'] and  \
313                         vals['plcsite']['status'] == "SUCCESS":
314
315                         url = "<a href='printbadnodes.py?site=%s'>%s</a>" % ( vals['plcsite']['login_base'],
316                                                                                                                          vals['plcsite']['login_base'])
317
318                         site_string = "%s %2s nodes :: %2s of %4s slices" % ( \
319                                                                                                                 url,
320                                                                                                                 vals['plcsite']['num_nodes'], 
321                                                                                                                 vals['plcsite']['num_slices'], 
322                                                                                                                 vals['plcsite']['max_slices'])
323                         loginbase = d_n[nodename]['values']['plcsite']['login_base']
324                 else:
325                         #print "ERROR: ", nodename, vals, "<br>"
326                         site_string = "<b>UNKNOWN</b>"
327                         loginbase = ""
328
329                 v['site_string'] = site_string
330                 v['loginbase'] = loginbase
331                 if (sitefilter != None and sf.match(loginbase) != None) or sitefilter == None:
332                         d2.append(v)
333                         
334
335         if sitefilter != None:
336                 config.cmpcategory = True
337         else:
338                 config.cmploginbase = True
339                 
340
341         if config.cmploginbase:
342                 d2.sort(cmp=cmpLoginBase)
343         elif config.cmpping:
344                 d2.sort(cmp=cmpPing)
345         elif config.cmpdns:
346                 d2.sort(cmp=cmpDNS)
347         elif config.cmpssh:
348                 d2.sort(cmp=cmpSSH)
349         elif config.cmpcategory:
350                 d2.sort(cmp=cmpCategory)
351         elif config.cmpstate:
352                 d2.sort(cmp=cmpState)
353         elif config.cmpdays:
354                 d2.sort(cmp=cmpDays)
355         elif config.cmpkernel:
356                 d2.sort(cmp=cmpUname)
357         else:
358                 d2.sort(cmp=cmpCategory)
359         
360
361         if catfilter != None:   cf = re.compile(catfilter)
362         else:                                   cf = None
363
364         if statefilter != None: stf = re.compile(statefilter)
365         else:                                   stf = None
366
367         if comonfilter != None: cmf = re.compile(comonfilter)
368         else:                                   cmf = None
369
370
371         output_str = ""
372         #l_loginbase = bysite.keys()
373         #l_loginbase.sort()
374         if nodeonlyfilter == None:
375                 output_str += "<table width=80% border=1>"
376
377         prev_sitestring = ""
378         for row in d2:
379
380                 vals = row
381
382                 if (catfilter != None and cf.match(vals['category']) == None):
383                         continue
384
385                 if (statefilter != None and stf.match(vals['state']) == None):
386                         continue
387
388                 if (comonfilter != None and comonfilter in vals['comonstats'] and vals['comonstats'][comonfilter] != 'null'):
389                         continue
390
391                 if nodeonlyfilter != None:
392                         output_str += vals['nodename']
393                         continue
394
395                 site_string = row['site_string']
396                 if site_string != prev_sitestring:
397                         output_str += "<tr><td bgcolor=lightblue nowrap>" 
398                         output_str += site_string
399                         output_str += "</td>"
400                 else:
401                         output_str += "<tr><td>&nbsp;</td>"
402
403                 prev_sitestring = site_string
404
405                         
406                 # convert uname values into a single kernel version string
407                 if 'kernel' in vals:
408                         kernel = vals['kernel'].split()
409                         if len(kernel) > 0:
410                                 if kernel[0] == "Linux":
411                                         vals['kernel'] = kernel[2]
412                                 else:
413                                         vals['ssherror'] = vals['kernel']
414                                         vals['kernel'] = ""
415                 else:
416                         vals['ssherror'] = ""
417                         vals['kernel'] = ""
418 #                       continue
419                 if 'model' in vals or 'protocol' in vals or 'portstatus' in vals:
420                         #vals['model'] = string.replace(vals['model']," ", "&nbsp;")
421                         #vals['protocol'] = vals['protocol'].replace(" ", "&nbsp;")
422                         if vals['model'] == None:
423                                 vals['model'] = " "
424                         vals['model'] = string.replace(vals['model']," ", "_")
425                         vals['protocol'] = vals['protocol'].replace(" ", "_")
426                         ps = ""
427                         ports = vals['portstatus']
428                         lports = ports.keys()
429                         lports.sort()
430                         for port in lports:
431                                 t = ports[port]
432                                 if t != "closed":
433                                         ps += "%s:&nbsp;%s<br>" % (port, ports[port])
434                         if ps == "":
435                                 ps = "All_closed"
436                                 
437                         vals['portstatus'] = ps
438
439                 if 'reboot' in vals:
440                         vals['reboot'] = "%s" % vals['reboot']
441                         vals['reboot'] = vals['reboot'].replace(" ", "_")
442
443                 if 'nodename' in vals:
444                         url = "<a href='https://%s/db/nodes/index.php?nodepattern=%s'>%s</a>" % (config.MONITOR_HOSTNAME, vals['nodename'], vals['nodename'])
445                         vals['nodename'] = url
446
447                 try:
448                         str_fields = []
449                         count = 0
450                         for f in format_fields:
451                                 str_fields.append(f % vals)
452                                 count += 1
453                 except:
454                         print >>sys.stderr, vals
455
456                 s = fields_to_html(str_fields, vals)
457                 output_str += s
458                         
459                 output_str += "\n</tr>"
460
461         if nodeonlyfilter == None:
462                 output_str += "</table>"
463         keys = categories.keys()
464         keys.sort()
465         print "<table>"
466         for cat in keys:
467                 print "<tr>"
468                 print "<th nowrap align=left>Total %s</th>" % cat
469                 print "<td align=left>%s</td>" % categories[cat]
470                 print "</tr>"
471         if nodeonlyfilter == None:
472                 print "</table>"
473
474         print output_str
475         if nodeonlyfilter == None:
476                 print "</body></html>\n"
477
478
479
480 if __name__ == '__main__':
481         import cgi
482         import cgitb; 
483         cgitb.enable()
484         import sys
485
486         form = cgi.FieldStorage()
487         myfilter = None
488
489         if form.has_key('site'):
490                 myfilter = form.getvalue("site")
491         else:
492                 myfilter = None
493
494         if form.has_key('category'):
495                 mycategory = form.getvalue("category")
496         else:
497                 mycategory = None
498
499         if form.has_key('state'):
500                 mystate = form.getvalue("state")
501         else:
502                 mystate = None
503
504         if form.has_key('comon'):
505                 mycomon = form.getvalue("comon")
506         else:
507                 mycomon = None
508
509         if form.has_key('nodeonly'):
510                 mynodeonly = form.getvalue("nodeonly")
511         else:
512                 mynodeonly = None
513
514         config.cmpdays=False
515         config.comon="sshstatus"
516         config.fields="nodename,ping,ssh,pcu,category,state,kernel,bootcd"
517         config.dbname="findbad"
518         config.cmpping=False 
519         config.cmpdns=False
520         config.cmploginbase=False
521         config.cmpssh=False 
522         config.cmpcategory=False
523
524         print "Content-Type: text/html\r\n"
525         if len(sys.argv) > 1:
526                 if sys.argv[1] == "ssherror":
527                         ssherror = True
528         main(myfilter, mycategory, mystate, mycomon,mynodeonly)