Moved some files around and merged from 1.0 branch:
[monitor.git] / www / printbadnodes.py
1 #!/usr/bin/python
2 from monitor import database
3 from monitor import config
4 import string
5 import sys
6 import time
7
8 categories = {}
9 ssherror = False
10 fb = {}
11
12 def sec2days(sec):
13         if sec == "null":
14                 sec = -(60*60*24)
15         sec = int(sec)
16         return sec/(60*60*24)
17
18 def array_to_priority_map(array):
19         """ Create a mapping where each entry of array is given a priority equal
20         to its position in the array.  This is useful for subsequent use in the
21         cmpMap() function."""
22         map = {}
23         count = 0
24         for i in array:
25                 map[i] = count
26                 count += 1
27         return map
28
29 def cmpValMap(v1, v2, map):
30         if v1 in map and v2 in map and map[v1] < map[v2]:
31                 return 1
32         elif v1 in map and v2 in map and map[v1] > map[v2]:
33                 return -1
34         elif v1 in map and v2 in map:
35                 return 0
36         else:
37                 raise Exception("No index %s or %s in map" % (v1, v2))
38
39 def cmpMap(l1, l2, index, map):
40         if index in l1 and index in l2:
41                 if map[l1[index]] < map[l2[index]]:
42                         return -1
43                 elif map[l1[index]] > map[l2[index]]:
44                         return 1
45                 else:
46                         return 0
47         else:
48                 return 0
49
50 def cmpLoginBase(l1, l2):
51         #print "'" + l1['loginbase'] + "'"  + " < " + "'" + l2['loginbase'] + "'" + "<BR>"
52         if l1['loginbase'] == l2['loginbase']:
53                 return 0
54         elif l1['loginbase'] < l2['loginbase']:
55                 return -1
56         elif l1['loginbase'] > l2['loginbase']:
57                 return 1
58         else:
59                 return 0
60
61 def cmpState(l1, l2):
62         map = array_to_priority_map([ 'BOOT', 'DEBUG', 'DOWN' ])
63         return cmpMap(l1,l2,'state', map)
64
65 def cmpCategoryVal(v1, v2):
66         map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDPROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
67         return cmpValMap(v1,v2,map)
68
69 def cmpCategory(l1, l2):
70         map = array_to_priority_map([ 'ALPHA', 'PROD', 'OLDPROD', 'OLDBOOTCD', 'UNKNOWN', 'ERROR', ])
71         return cmpMap(l1,l2,'category', map)
72
73 def cmpPCU(l1, l2):
74         """ Either PCU or NOPCU"""
75         map = array_to_priority_map([ 'PCU', 'NOPCU', 'UNKNOWN'])
76         return cmpMap(l1, l2, 'pcu', map)
77
78 def cmpSSH(l1, l2):
79         """ Either SSH or NOSSH """
80         map = array_to_priority_map([ 'SSH', 'NOSSH'])
81         return cmpMap(l1, l2, 'ssh', map)
82
83 def cmpDNS(l1,l2):
84         """ Compare DNS states """
85         map = array_to_priority_map([ 'OK', 'NOHOSTNAME', 'NOENTRY', 'MISMATCH'])
86         return cmpMap(l1, l2, 'dnsmatch', map)
87         
88 def cmpPing(l1,l2):
89         """ Either PING or NOPING """
90         map = array_to_priority_map([ 'PING', 'NOPING'])
91         return cmpMap(l1, l2, 'ping', map)
92
93 def cmpUname(l1, l2):
94         # Extract the kernel version from kernel -a string
95         l_k1 = l1['kernel'].split()
96         if len(l_k1) > 2:
97                 k1 = l_k1[2]
98         else:
99                 return 1
100
101         l_k2 = l2['kernel'].split()
102         if len(l_k2) > 2:
103                 k2 = l_k2[2]
104         else:
105                 return -1
106
107         return cmp(k1, k2)
108
109 def cmpDays(l1, l2):
110         if l1['comonstats'][config.comon] == "null":
111                 l1['comonstats'][config.comon] = -1
112         if l2['comonstats'][config.comon] == "null":
113                 l2['comonstats'][config.comon] = -1
114                 
115         if int(l1['comonstats'][config.comon]) > int(l2['comonstats'][config.comon]):
116                 return -1
117         elif int(l1['comonstats'][config.comon]) < int(l2['comonstats'][config.comon]):
118                 return 1
119         else:
120                 return 0
121
122 def ssh_error_to_str(str):
123         ssh_error = ""
124         if "Connection timed out" in str:
125                 ssh_error = "Timeout" 
126         elif "Connection closed by remote host" in str:
127                 ssh_error = "Closed by remote host"
128         elif "Connection refused" in str:
129                 ssh_error = "Connection refused"
130         elif "Temporary failure in name resolution" in str:
131                 ssh_error = "Could not resolve name"
132         elif "Name or service not known" in str:
133                 ssh_error = "Name not known"
134         elif "Too many authentication failures" in str:
135                 ssh_error = "Disconnect: root auth failure"
136         elif "Network is unreachable" in str:
137                 ssh_error = "Network is unreachable"
138         elif "Connection reset by peer" in str:
139                 ssh_error = "Connection reset by peer"
140         elif "WARNING" in str:
141                 ssh_error = "WARNING ssh key updated"
142         else:
143                 ssh_error = str
144
145         return ssh_error
146
147 def pcu_state(pcu_id):
148         global fb
149
150         if 'nodes' in fb and "id_%s" % pcu_id in fb['nodes'] \
151                 and 'values' in fb['nodes']["id_%s" % pcu_id]:
152                 rec = fb['nodes']["id_%s" % pcu_id]['values']
153                 if 'reboot' in rec:
154                         rb = rec['reboot']
155                         if rb == 0 or rb == "0":
156                                 return 0
157                         elif "NetDown" == rb  or "Not_Run" == rb:
158                                 return 1
159                         else:
160                                 return -1
161                 else:
162                         return -1
163         else:
164                 return -1 
165
166 def fields_to_html(fields, vals):
167         global categories
168         global ssherror
169         pcu_colorMap = { -1 : 'indianred',
170                                           0 : 'darkseagreen',
171                                           1 : 'gold', }
172
173         colorMap = { 'PING'  : 'darkseagreen',
174                                  'NOPING': 'darksalmon',
175                                  'SSH': 'darkseagreen',
176                                  'NOSSH': 'indianred',
177                                  'PCU': 'darkseagreen',
178                                  'NOPCU': 'lightgrey',
179                                  'OLDBOOTCD': 'crimson',
180                                  'DOWN': 'indianred',
181                                  'ALPHA': 'gold',
182                                  'ERROR': 'crimson',
183                                  'PROD': 'darkseagreen',
184                                  'DEBUG': 'darksalmon',
185                                  'DEBUG': 'darksalmon',
186                                  'BOOT': 'lightgreen'}
187         r_str = ""
188         f_prev = ""
189         f_2prev = ""
190         #print 'inside--------------'
191         for f in fields:
192                 f = f.strip()
193                 #print f
194
195                 if f in ['DOWN', 'BOOT', 'DEBUG']:
196                         #key = "%s-%s-%s" % (f,f_prev,f_2prev)
197                         key = "%s-%s" % (f,f_prev)
198                         if key not in categories:
199                                 categories[key] = 1
200                         else:
201                                 categories[key] += 1
202
203                 #print "<pre>%s</pre><br>" % f
204                                 
205                 if f in colorMap:
206                         bgcolor="bgcolor='%s'" % colorMap[f]
207                 else:
208                         bgcolor=""
209
210                 if f == 'NOSSH':
211                         if ssherror:
212                                 if 'ssherror' in vals:
213                                         str_ssh_error = ssh_error_to_str(vals['ssherror'])
214                                 else:
215                                         str_ssh_error = "NO SSHERROR in VALS"
216                                 if str_ssh_error != "Timeout":
217                                         r_str += """<td nowrap %s>%s<br><b><font size="-2">%s</font></b></td>""" % \
218                                                                 (bgcolor,f,str_ssh_error)
219                                 else:
220                                         r_str += "<td %s>%s</td>" % (bgcolor, f)
221                         else:
222                                 r_str += "<td %s>%s</td>" % (bgcolor, f)
223                 elif f == 'PCU':
224                         if len(vals['plcnode']['pcu_ids']) > 0:
225                                 #print "pcu_id: %s<br>" % vals['plcnode']['pcu_ids'][0]
226                                 #print "state: %s<br>" % pcu_state(vals['plcnode']['pcu_ids'][0])
227                                 #print "color: %s<br>" % pcu_colorMap[pcu_state(vals['plcnode']['pcu_ids'][0])]
228                                 bgcolor = "bgcolor='%s'" % pcu_colorMap[pcu_state(vals['plcnode']['pcu_ids'][0])]
229                                 url = "<a href='/cgi-bin/monitor/printbadpcus.php?id=%s'>PCU</a>" % vals['plcnode']['pcu_ids'][0]
230                                 r_str += "<td nowrap %s>%s</td>" % (bgcolor, url)
231                 else:
232                         r_str += "<td nowrap %s>%s</td>" % (bgcolor, f)
233                 f_2prev = f_prev
234                 f_prev  = f
235         
236         return r_str
237
238 def my_diff_time(timestamp):
239         now = time.time()
240         if timestamp == None:
241                 return "not yet contacted"
242         diff = now - timestamp
243         # return the number of seconds as a difference from current time.
244         t_str = ""
245         if diff < 60: # sec in min.
246                 t = diff
247                 t_str = "%s sec ago" % t
248         elif diff < 60*60: # sec in hour
249                 t = diff // (60)
250                 t_str = "%s min ago" % int(t)
251         elif diff < 60*60*24: # sec in day
252                 t = diff // (60*60)
253                 t_str = "%s hours ago" % int(t)
254         elif diff < 60*60*24*7: # sec in week
255                 t = diff // (60*60*24)
256                 t_str = "%s days ago" % int(t)
257         elif diff < 60*60*24*30: # approx sec in month
258                 t = diff // (60*60*24*7)
259                 t_str = "%s weeks ago" % int(t)
260         elif diff > 60*60*24*30 and diff < 60*60*24*30*2: # approx sec in month
261                 month = int( diff // (60*60*24*30) )
262                 weeks = (diff - (month * (60*60*24*30))) // (60*60*24*7) 
263                 if weeks == 0:
264                         t_str = "%s month ago" % int(month)
265                 elif weeks == 4:
266                         t_str = "2 months ago"
267                 else:
268                         t_str = "%s month and %s weeks ago" % ( int(month) , int(weeks) )
269         elif diff >= 60*60*24*30*2:                
270                 month =  diff // (60*60*24*30)
271                 t_str = "%s months ago" % int(month)
272         return t_str
273
274
275 def main(sitefilter, catfilter, statefilter, comonfilter, nodeonlyfilter):
276         global fb
277         import os
278         import datetime
279         if nodeonlyfilter == None:
280                 print "<html><body>\n"
281
282                 try:
283                         mtime = os.stat("/var/lib/monitor-server/production.findbad.pkl")[-2]
284                         print "Last Updated: %s GMT" % datetime.datetime.fromtimestamp(mtime)
285                 except:
286                         pass
287
288
289         db = database.dbLoad(config.dbname)
290         fb = database.dbLoad("findbadpcus")
291
292         ## Field widths used for printing
293         maxFieldLengths = { 'nodename' : -45,
294                                                 'ping' : 6, 
295                                                 'ssh' : 6, 
296                                                 'pcu' : 7, 
297                                                 'category' : 9, 
298                                                 'state' : 5, 
299                                                 'kernel' : 10.65, 
300                                                 'comonstats' : 5, 
301                                                 'last_contact' : 10.65,
302                                                 'plcsite' : 12,
303                                                 'bootcd' : 10.65}
304         ## create format string based on config.fields
305         fields = {}
306         format = ""
307         format_fields = []
308         for f in config.fields.split(','):
309                 fields[f] = "%%(%s)s" % f
310                 #print f
311                 #if f in maxFieldLengths:
312                 #       fields[f] = "%%(%s)%ds" % (f, maxFieldLengths[f])
313                 #else:
314                 #       fields[f] = "%%(%s)%ds" % (f, 10)
315
316                 format_fields.append(fields[f])
317         #print fields
318         for f in config.fields.split(','):
319                 format += fields[f] + " "
320         #print format
321
322         d_n = db['nodes']
323         l_nodes = d_n.keys()
324
325         # category by site
326         #bysite = {}
327         #for nodename in l_nodes:
328         #       if 'plcsite' in d_n[nodename]['values'] and \
329         #       'login_base' in d_n[nodename]['values']['plcsite']:
330         #               loginbase = d_n[nodename]['values']['plcsite']['login_base']
331         #               if loginbase not in bysite:
332         #                       bysite[loginbase] = []
333         #               d_n[nodename]['values']['nodename'] = nodename
334         #               bysite[loginbase].append(d_n[nodename]['values'])
335
336         # d2 was an array of [{node}, {}, ...]
337         # the bysite is a loginbase dict of [{node}, {node}]
338         d2 = []
339         import re
340         if sitefilter != None:
341                 sf = re.compile(sitefilter)
342         else:
343                 sf = None
344         for nodename in l_nodes: 
345                 vals=d_n[nodename]['values'] 
346                 v = {}
347                 v.update(vals)
348                 v['nodename'] = nodename 
349                 if  'plcsite' in vals and  \
350                         'status' in vals['plcsite'] and  \
351                         vals['plcsite']['status'] == "SUCCESS":
352
353                         url = "<a href='printbadnodes.py?site=%s'>%s</a>" % ( vals['plcsite']['login_base'],
354                                                                                                                          vals['plcsite']['login_base'])
355
356                         site_string = "%s %2s nodes :: %2s of %4s slices" % ( \
357                                                                                                                 url,
358                                                                                                                 vals['plcsite']['num_nodes'], 
359                                                                                                                 vals['plcsite']['num_slices'], 
360                                                                                                                 vals['plcsite']['max_slices'])
361                         loginbase = d_n[nodename]['values']['plcsite']['login_base']
362                 else:
363                         #print "ERROR: ", nodename, vals, "<br>"
364                         site_string = "<b>UNKNOWN</b>"
365                         loginbase = ""
366
367                 v['site_string'] = site_string
368                 v['loginbase'] = loginbase
369                 if (sitefilter != None and sf.match(loginbase) != None) or sitefilter == None:
370                         d2.append(v)
371                         
372
373         if sitefilter != None:
374                 config.cmpcategory = True
375         else:
376                 config.cmploginbase = True
377                 
378
379         if config.cmploginbase:
380                 d2.sort(cmp=cmpLoginBase)
381         elif config.cmpping:
382                 d2.sort(cmp=cmpPing)
383         elif config.cmpdns:
384                 d2.sort(cmp=cmpDNS)
385         elif config.cmpssh:
386                 d2.sort(cmp=cmpSSH)
387         elif config.cmpcategory:
388                 d2.sort(cmp=cmpCategory)
389         elif config.cmpstate:
390                 d2.sort(cmp=cmpState)
391         elif config.cmpdays:
392                 d2.sort(cmp=cmpDays)
393         elif config.cmpkernel:
394                 d2.sort(cmp=cmpUname)
395         else:
396                 d2.sort(cmp=cmpCategory)
397         
398
399         if catfilter != None:   cf = re.compile(catfilter)
400         else:                                   cf = None
401
402         if statefilter != None: stf = re.compile(statefilter)
403         else:                                   stf = None
404
405         if comonfilter != None: cmf = re.compile(comonfilter)
406         else:                                   cmf = None
407
408
409         output_str = ""
410         #l_loginbase = bysite.keys()
411         #l_loginbase.sort()
412         if nodeonlyfilter == None:
413                 output_str += "<table width=80% border=1>"
414
415         prev_sitestring = ""
416         for row in d2:
417
418                 vals = row
419
420                 #added by guto about last contact information
421                 if (catfilter != None and cf.match(vals['category']) == None):
422                         continue
423
424                 if (statefilter != None and stf.match(vals['state']) == None):
425                         continue
426
427                 if (comonfilter != None and comonfilter in vals['comonstats'] and vals['comonstats'][comonfilter] != 'null'):
428                         continue
429
430                 if nodeonlyfilter != None:
431                         output_str += vals['nodename']
432                         continue
433
434                 site_string = row['site_string']
435                 if site_string != prev_sitestring:
436                         output_str += "<tr><td bgcolor=lightblue nowrap>" 
437                         output_str += site_string
438                         output_str += "</td>"
439                 else:
440                         output_str += "<tr><td>&nbsp;</td>"
441
442                 prev_sitestring = site_string
443
444                         
445                 # convert uname values into a single kernel version string
446                 if 'kernel' in vals:
447                         kernel = vals['kernel'].split()
448                         if len(kernel) > 0:
449                                 if kernel[0] == "Linux":
450                                         vals['kernel'] = kernel[2]
451                                 else:
452                                         vals['ssherror'] = vals['kernel']
453                                         vals['kernel'] = ""
454                 else:
455                         vals['ssherror'] = ""
456                         vals['kernel'] = ""
457 #                       continue
458                 if 'model' in vals or 'protocol' in vals or 'portstatus' in vals:
459                         #vals['model'] = string.replace(vals['model']," ", "&nbsp;")
460                         #vals['protocol'] = vals['protocol'].replace(" ", "&nbsp;")
461                         if vals['model'] == None:
462                                 vals['model'] = " "
463                         vals['model'] = string.replace(vals['model']," ", "_")
464                         vals['protocol'] = vals['protocol'].replace(" ", "_")
465                         ps = ""
466                         ports = vals['portstatus']
467                         lports = ports.keys()
468                         lports.sort()
469                         for port in lports:
470                                 t = ports[port]
471                                 if t != "closed":
472                                         ps += "%s:&nbsp;%s<br>" % (port, ports[port])
473                         if ps == "":
474                                 ps = "All_closed"
475                                 
476                         vals['portstatus'] = ps
477
478                 if 'reboot' in vals:
479                         vals['reboot'] = "%s" % vals['reboot']
480                         vals['reboot'] = vals['reboot'].replace(" ", "_")
481
482                 if 'nodename' in vals:
483                         url = "<a href='https://%s/db/nodes/index.php?nodepattern=%s'>%s</a>" % (config.MONITOR_HOSTNAME, vals['nodename'], vals['nodename'])
484                         vals['nodename'] = url
485
486                 if 'plcnode' in vals:
487                         if vals['plcnode']['status'] == "GN_FAILED":
488                                 vals['last_contact'] = "UNKNOWN"
489                         else:
490                                 vals['last_contact'] = my_diff_time(vals['plcnode']['last_contact'])
491
492                 try:
493                         str_fields = []
494                         count = 0
495                         for f in format_fields:
496                                 str_fields.append(f % vals)
497                                 count += 1
498                 except:
499                         print >>sys.stderr, vals
500
501                 s = fields_to_html(str_fields, vals)
502                 output_str += s
503                         
504                 output_str += "\n</tr>"
505
506         if nodeonlyfilter == None:
507                 output_str += "</table>"
508         keys = categories.keys()
509         keys.sort()
510         print "<table>"
511         for cat in keys:
512                 print "<tr>"
513                 print "<th nowrap align=left>Total %s</th>" % cat
514                 print "<td align=left>%s</td>" % categories[cat]
515                 print "</tr>"
516         if nodeonlyfilter == None:
517                 print "</table>"
518
519         print output_str
520         if nodeonlyfilter == None:
521                 print "</body></html>\n"
522
523
524
525 if __name__ == '__main__':
526         import cgi
527         import cgitb; 
528         cgitb.enable()
529         import sys
530
531         form = cgi.FieldStorage()
532         myfilter = None
533
534         if form.has_key('site'):
535                 myfilter = form.getvalue("site")
536         else:
537                 myfilter = None
538
539         if form.has_key('category'):
540                 mycategory = form.getvalue("category")
541         else:
542                 mycategory = None
543
544         if form.has_key('state'):
545                 mystate = form.getvalue("state")
546         else:
547                 mystate = None
548
549         if form.has_key('comon'):
550                 mycomon = form.getvalue("comon")
551         else:
552                 mycomon = None
553
554         if form.has_key('nodeonly'):
555                 mynodeonly = form.getvalue("nodeonly")
556         else:
557                 mynodeonly = None
558
559         config.cmpdays=False
560         config.comon="sshstatus"
561         config.fields="nodename,ping,ssh,pcu,category,state,last_contact,kernel,bootcd"
562         config.dbname="findbad"
563         config.cmpping=False 
564         config.cmpdns=False
565         config.cmploginbase=False
566         config.cmpssh=False 
567         config.cmpcategory=False
568
569         print "Content-Type: text/html\r\n"
570         if len(sys.argv) > 1:
571                 if sys.argv[1] == "ssherror":
572                         ssherror = True
573         main(myfilter, mycategory, mystate, mycomon,mynodeonly)