bcd3f23d2734e5ccf7fba5ee8430fb40509f8996
[monitor.git] / www / printbadnodes.py
1 #!/usr/bin/python
2 from monitor import database
3 from monitor import config
4 from monitor import general_functions
5 import string
6 import sys
7 import time
8
9 categories = {}
10 ssherror = False
11 fb = {}
12
13 def sec2days(sec):
14         if sec == "null":
15                 sec = -(60*60*24)
16         sec = int(sec)
17         return sec/(60*60*24)
18
19 def array_to_priority_map(array):
20         """ Create a mapping where each entry of array is given a priority equal
21         to its position in the array.  This is useful for subsequent use in the
22         cmpMap() function."""
23         map = {}
24         count = 0
25         for i in array:
26                 map[i] = count
27                 count += 1
28         return map
29
30 def cmpValMap(v1, v2, map):
31         if v1 in map and v2 in map and map[v1] < map[v2]:
32                 return 1
33         elif v1 in map and v2 in map and map[v1] > map[v2]:
34                 return -1
35         elif v1 in map and v2 in map:
36                 return 0
37         else:
38                 raise Exception("No index %s or %s in map" % (v1, v2))
39
40 def cmpMap(l1, l2, index, map):
41         if index in l1 and index in l2:
42                 if map[l1[index]] < map[l2[index]]:
43                         return -1
44                 elif map[l1[index]] > map[l2[index]]:
45                         return 1
46                 else:
47                         return 0
48         else:
49                 return 0
50
51 def cmpLoginBase(l1, l2):
52         #print "'" + l1['loginbase'] + "'"  + " < " + "'" + l2['loginbase'] + "'" + "<BR>"
53         if l1['loginbase'] == l2['loginbase']:
54                 return 0
55         elif l1['loginbase'] < l2['loginbase']:
56                 return -1
57         elif l1['loginbase'] > l2['loginbase']:
58                 return 1
59         else:
60                 return 0
61
62 def cmpState(l1, l2):
63         map = array_to_priority_map([ 'BOOT', 'DEBUG', 'DOWN' ])
64         return cmpMap(l1,l2,'state', map)
65
66 def cmpCategoryVal(v1, v2):
67         map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDPROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
68         return cmpValMap(v1,v2,map)
69
70 def cmpCategory(l1, l2):
71         map = array_to_priority_map([ 'ALPHA', 'PROD', 'OLDPROD', 'OLDBOOTCD', 'UNKNOWN', 'ERROR', ])
72         return cmpMap(l1,l2,'category', map)
73
74 def cmpPCU(l1, l2):
75         """ Either PCU or NOPCU"""
76         map = array_to_priority_map([ 'PCU', 'NOPCU', 'UNKNOWN'])
77         return cmpMap(l1, l2, 'pcu', map)
78
79 def cmpSSH(l1, l2):
80         """ Either SSH or NOSSH """
81         map = array_to_priority_map([ 'SSH', 'NOSSH'])
82         return cmpMap(l1, l2, 'ssh', map)
83
84 def cmpDNS(l1,l2):
85         """ Compare DNS states """
86         map = array_to_priority_map([ 'OK', 'NOHOSTNAME', 'NOENTRY', 'MISMATCH'])
87         return cmpMap(l1, l2, 'dnsmatch', map)
88         
89 def cmpPing(l1,l2):
90         """ Either PING or NOPING """
91         map = array_to_priority_map([ 'PING', 'NOPING'])
92         return cmpMap(l1, l2, 'ping', map)
93
94 def cmpUname(l1, l2):
95         # Extract the kernel version from kernel -a string
96         l_k1 = l1['kernel'].split()
97         if len(l_k1) > 2:
98                 k1 = l_k1[2]
99         else:
100                 return 1
101
102         l_k2 = l2['kernel'].split()
103         if len(l_k2) > 2:
104                 k2 = l_k2[2]
105         else:
106                 return -1
107
108         return cmp(k1, k2)
109
110 def cmpDays(l1, l2):
111         if l1['comonstats'][config.comon] == "null":
112                 l1['comonstats'][config.comon] = -1
113         if l2['comonstats'][config.comon] == "null":
114                 l2['comonstats'][config.comon] = -1
115                 
116         if int(l1['comonstats'][config.comon]) > int(l2['comonstats'][config.comon]):
117                 return -1
118         elif int(l1['comonstats'][config.comon]) < int(l2['comonstats'][config.comon]):
119                 return 1
120         else:
121                 return 0
122
123 def ssh_error_to_str(str):
124         ssh_error = ""
125         if "Connection timed out" in str:
126                 ssh_error = "Timeout" 
127         elif "Connection closed by remote host" in str:
128                 ssh_error = "Closed by remote host"
129         elif "Connection refused" in str:
130                 ssh_error = "Connection refused"
131         elif "Temporary failure in name resolution" in str:
132                 ssh_error = "Could not resolve name"
133         elif "Name or service not known" in str:
134                 ssh_error = "Name not known"
135         elif "Too many authentication failures" in str:
136                 ssh_error = "Disconnect: root auth failure"
137         elif "Network is unreachable" in str:
138                 ssh_error = "Network is unreachable"
139         elif "Connection reset by peer" in str:
140                 ssh_error = "Connection reset by peer"
141         elif "WARNING" in str:
142                 ssh_error = "WARNING ssh key updated"
143         else:
144                 ssh_error = str
145
146         return ssh_error
147
148 def pcu_state(pcu_id):
149         global fb
150
151         if 'nodes' in fb and "id_%s" % pcu_id in fb['nodes'] \
152                 and 'values' in fb['nodes']["id_%s" % pcu_id]:
153                 rec = fb['nodes']["id_%s" % pcu_id]['values']
154                 if 'reboot' in rec:
155                         rb = rec['reboot']
156                         if rb == 0 or rb == "0":
157                                 return 0
158                         elif "NetDown" == rb  or "Not_Run" == rb:
159                                 return 1
160                         else:
161                                 return -1
162                 else:
163                         return -1
164         else:
165                 return -1 
166
167 def fields_to_html(fields, vals):
168         global categories
169         global ssherror
170         pcu_colorMap = { -1 : 'indianred',
171                                           0 : 'darkseagreen',
172                                           1 : 'gold', }
173
174         colorMap = { 'PING'  : 'darkseagreen',
175                                  'NOPING': 'darksalmon',
176                                  'SSH': 'darkseagreen',
177                                  'NOSSH': 'indianred',
178                                  'PCU': 'darkseagreen',
179                                  'NOPCU': 'lightgrey',
180                                  'OLDBOOTCD': 'crimson',
181                                  'DOWN': 'indianred',
182                                  'ALPHA': 'gold',
183                                  'ERROR': 'crimson',
184                                  'PROD': 'darkseagreen',
185                                  'DEBUG': 'darksalmon',
186                                  'DEBUG': 'darksalmon',
187                                  'BOOT': 'lightgreen'}
188         r_str = ""
189         f_prev = ""
190         f_2prev = ""
191         #print 'inside--------------'
192         for f in fields:
193                 f = f.strip()
194                 #print f
195
196                 if f in ['DOWN', 'BOOT', 'DEBUG']:
197                         #key = "%s-%s-%s" % (f,f_prev,f_2prev)
198                         key = "%s-%s" % (f,f_prev)
199                         if key not in categories:
200                                 categories[key] = 1
201                         else:
202                                 categories[key] += 1
203
204                 #print "<pre>%s</pre><br>" % f
205                                 
206                 if f in colorMap:
207                         bgcolor="bgcolor='%s'" % colorMap[f]
208                 else:
209                         bgcolor=""
210
211                 if f == 'NOSSH':
212                         if ssherror:
213                                 if 'ssherror' in vals:
214                                         str_ssh_error = ssh_error_to_str(vals['ssherror'])
215                                 else:
216                                         str_ssh_error = "NO SSHERROR in VALS"
217                                 if str_ssh_error != "Timeout":
218                                         r_str += """<td nowrap %s>%s<br><b><font size="-2">%s</font></b></td>""" % \
219                                                                 (bgcolor,f,str_ssh_error)
220                                 else:
221                                         r_str += "<td %s>%s</td>" % (bgcolor, f)
222                         else:
223                                 r_str += "<td %s>%s</td>" % (bgcolor, f)
224                 elif f == 'PCU':
225                         if len(vals['plcnode']['pcu_ids']) > 0:
226                                 #print "pcu_id: %s<br>" % vals['plcnode']['pcu_ids'][0]
227                                 #print "state: %s<br>" % pcu_state(vals['plcnode']['pcu_ids'][0])
228                                 #print "color: %s<br>" % pcu_colorMap[pcu_state(vals['plcnode']['pcu_ids'][0])]
229                                 bgcolor = "bgcolor='%s'" % pcu_colorMap[pcu_state(vals['plcnode']['pcu_ids'][0])]
230                                 url = "<a href='/cgi-bin/monitor/printbadpcus.php?id=%s'>PCU</a>" % vals['plcnode']['pcu_ids'][0]
231                                 r_str += "<td nowrap %s>%s</td>" % (bgcolor, url)
232                 else:
233                         r_str += "<td nowrap %s>%s</td>" % (bgcolor, f)
234                 f_2prev = f_prev
235                 f_prev  = f
236         
237         return r_str
238
239 def my_diff_time(timestamp):
240         now = time.time()
241         if timestamp == None:
242                 return "not yet contacted"
243         diff = now - timestamp
244         # return the number of seconds as a difference from current time.
245         t_str = ""
246         if diff < 60: # sec in min.
247                 t = diff
248                 t_str = "%s sec ago" % t
249         elif diff < 60*60: # sec in hour
250                 t = diff // (60)
251                 t_str = "%s min ago" % int(t)
252         elif diff < 60*60*24: # sec in day
253                 t = diff // (60*60)
254                 t_str = "%s hours ago" % int(t)
255         elif diff < 60*60*24*7: # sec in week
256                 t = diff // (60*60*24)
257                 t_str = "%s days ago" % int(t)
258         elif diff < 60*60*24*30: # approx sec in month
259                 t = diff // (60*60*24*7)
260                 t_str = "%s weeks ago" % int(t)
261         elif diff > 60*60*24*30 and diff < 60*60*24*30*2: # approx sec in month
262                 month = int( diff // (60*60*24*30) )
263                 weeks = (diff - (month * (60*60*24*30))) // (60*60*24*7) 
264                 if weeks == 0:
265                         t_str = "%s month ago" % int(month)
266                 elif weeks == 4:
267                         t_str = "2 months ago"
268                 else:
269                         t_str = "%s month and %s weeks ago" % ( int(month) , int(weeks) )
270         elif diff >= 60*60*24*30*2:                
271                 month =  diff // (60*60*24*30)
272                 t_str = "%s months ago" % int(month)
273         return t_str
274
275
276 def main(sitefilter, catfilter, statefilter, comonfilter, nodeonlyfilter):
277         global fb
278         import os
279         import datetime
280         if nodeonlyfilter == None:
281                 print "<html><body>\n"
282
283                 try:
284                         mtime = os.stat("/var/lib/monitor-server/production.findbad.pkl")[-2]
285                         print "Last Updated: %s GMT" % datetime.datetime.fromtimestamp(mtime)
286                 except:
287                         pass
288
289
290         db = database.dbLoad(config.dbname)
291         fb = database.dbLoad("findbadpcus")
292
293         ## Field widths used for printing
294         maxFieldLengths = { 'nodename' : -45,
295                                                 'ping' : 6, 
296                                                 'ssh' : 6, 
297                                                 'pcu' : 7, 
298                                                 'category' : 9, 
299                                                 'state' : 5, 
300                                                 'kernel' : 10.65, 
301                                                 'comonstats' : 5, 
302                                                 'last_contact' : 10.65,
303                                                 'plcsite' : 12,
304                                                 'bootcd' : 10.65}
305         ## create format string based on config.fields
306         fields = {}
307         format = ""
308         format_fields = []
309         for f in config.fields.split(','):
310                 fields[f] = "%%(%s)s" % f
311                 #print f
312                 #if f in maxFieldLengths:
313                 #       fields[f] = "%%(%s)%ds" % (f, maxFieldLengths[f])
314                 #else:
315                 #       fields[f] = "%%(%s)%ds" % (f, 10)
316
317                 format_fields.append(fields[f])
318         #print fields
319         for f in config.fields.split(','):
320                 format += fields[f] + " "
321         #print format
322
323         d_n = db['nodes']
324         l_nodes = d_n.keys()
325
326         # category by site
327         #bysite = {}
328         #for nodename in l_nodes:
329         #       if 'plcsite' in d_n[nodename]['values'] and \
330         #       'login_base' in d_n[nodename]['values']['plcsite']:
331         #               loginbase = d_n[nodename]['values']['plcsite']['login_base']
332         #               if loginbase not in bysite:
333         #                       bysite[loginbase] = []
334         #               d_n[nodename]['values']['nodename'] = nodename
335         #               bysite[loginbase].append(d_n[nodename]['values'])
336
337         # d2 was an array of [{node}, {}, ...]
338         # the bysite is a loginbase dict of [{node}, {node}]
339         d2 = []
340         import re
341         if sitefilter != None:
342                 sf = re.compile(sitefilter)
343         else:
344                 sf = None
345         for nodename in l_nodes: 
346                 vals=d_n[nodename]['values'] 
347                 v = {}
348                 v.update(vals)
349                 v['nodename'] = nodename 
350                 if  'plcsite' in vals and  \
351                         'status' in vals['plcsite'] and  \
352                         vals['plcsite']['status'] == "SUCCESS":
353
354                         url = "<a href='printbadnodes.py?site=%s'>%s</a>" % ( vals['plcsite']['login_base'],
355                                                                                                                          vals['plcsite']['login_base'])
356
357                         site_string = "%s %2s nodes :: %2s of %4s slices" % ( \
358                                                                                                                 url,
359                                                                                                                 vals['plcsite']['num_nodes'], 
360                                                                                                                 vals['plcsite']['num_slices'], 
361                                                                                                                 vals['plcsite']['max_slices'])
362                         loginbase = d_n[nodename]['values']['plcsite']['login_base']
363                 else:
364                         #print "ERROR: ", nodename, vals, "<br>"
365                         site_string = "<b>UNKNOWN</b>"
366                         loginbase = ""
367
368                 v['site_string'] = site_string
369                 v['loginbase'] = loginbase
370                 if (sitefilter != None and sf.match(loginbase) != None) or sitefilter == None:
371                         d2.append(v)
372                         
373
374         if sitefilter != None:
375                 config.cmpcategory = True
376         else:
377                 config.cmploginbase = True
378                 
379
380         if config.cmploginbase:
381                 d2.sort(cmp=cmpLoginBase)
382         elif config.cmpping:
383                 d2.sort(cmp=cmpPing)
384         elif config.cmpdns:
385                 d2.sort(cmp=cmpDNS)
386         elif config.cmpssh:
387                 d2.sort(cmp=cmpSSH)
388         elif config.cmpcategory:
389                 d2.sort(cmp=cmpCategory)
390         elif config.cmpstate:
391                 d2.sort(cmp=cmpState)
392         elif config.cmpdays:
393                 d2.sort(cmp=cmpDays)
394         elif config.cmpkernel:
395                 d2.sort(cmp=cmpUname)
396         else:
397                 d2.sort(cmp=cmpCategory)
398         
399
400         if catfilter != None:   cf = re.compile(catfilter)
401         else:                                   cf = None
402
403         if statefilter != None: stf = re.compile(statefilter)
404         else:                                   stf = None
405
406         if comonfilter != None: cmf = re.compile(comonfilter)
407         else:                                   cmf = None
408
409
410         output_str = ""
411         #l_loginbase = bysite.keys()
412         #l_loginbase.sort()
413         if nodeonlyfilter == None:
414                 output_str += "<table width=80% border=1>"
415
416         prev_sitestring = ""
417         for row in d2:
418
419                 vals = row
420
421                 #added by guto about last contact information
422                 if (catfilter != None and cf.match(vals['category']) == None):
423                         continue
424
425                 if (statefilter != None and stf.match(vals['state']) == None):
426                         continue
427
428                 if (comonfilter != None and comonfilter in vals['comonstats'] and vals['comonstats'][comonfilter] != 'null'):
429                         continue
430
431                 if nodeonlyfilter != None:
432                         output_str += vals['nodename']
433                         continue
434
435                 site_string = row['site_string']
436                 if site_string != prev_sitestring:
437                         output_str += "<tr><td bgcolor=lightblue nowrap>" 
438                         output_str += site_string
439                         output_str += "</td>"
440                 else:
441                         output_str += "<tr><td>&nbsp;</td>"
442
443                 prev_sitestring = site_string
444
445                         
446                 # convert uname values into a single kernel version string
447                 if 'kernel' in vals:
448                         kernel = vals['kernel'].split()
449                         if len(kernel) > 0:
450                                 if kernel[0] == "Linux":
451                                         vals['kernel'] = kernel[2]
452                                 else:
453                                         vals['ssherror'] = vals['kernel']
454                                         vals['kernel'] = ""
455                 else:
456                         vals['ssherror'] = ""
457                         vals['kernel'] = ""
458 #                       continue
459                 if 'model' in vals or 'protocol' in vals or 'portstatus' in vals:
460                         #vals['model'] = string.replace(vals['model']," ", "&nbsp;")
461                         #vals['protocol'] = vals['protocol'].replace(" ", "&nbsp;")
462                         if vals['model'] == None:
463                                 vals['model'] = " "
464                         vals['model'] = string.replace(vals['model']," ", "_")
465                         vals['protocol'] = vals['protocol'].replace(" ", "_")
466                         ps = ""
467                         ports = vals['portstatus']
468                         lports = ports.keys()
469                         lports.sort()
470                         for port in lports:
471                                 t = ports[port]
472                                 if t != "closed":
473                                         ps += "%s:&nbsp;%s<br>" % (port, ports[port])
474                         if ps == "":
475                                 ps = "All_closed"
476                                 
477                         vals['portstatus'] = ps
478
479                 if 'reboot' in vals:
480                         vals['reboot'] = "%s" % vals['reboot']
481                         vals['reboot'] = vals['reboot'].replace(" ", "_")
482
483                 if 'nodename' in vals:
484                         url = "<a href='https://%s/db/nodes/index.php?nodepattern=%s'>%s</a>" % (config.MONITOR_HOSTNAME, vals['nodename'], vals['nodename'])
485                         vals['nodename'] = url
486
487                 if 'plcnode' in vals:
488                         if vals['plcnode']['status'] == "GN_FAILED":
489                                 vals['last_contact'] = "UNKNOWN"
490                         else:
491                                 vals['last_contact'] = my_diff_time(vals['plcnode']['last_contact'])
492
493                 try:
494                         str_fields = []
495                         count = 0
496                         for f in format_fields:
497                                 str_fields.append(f % vals)
498                                 count += 1
499                 except:
500                         print >>sys.stderr, vals
501
502                 s = fields_to_html(str_fields, vals)
503                 output_str += s
504                         
505                 output_str += "\n</tr>"
506
507         if nodeonlyfilter == None:
508                 output_str += "</table>"
509         keys = categories.keys()
510         keys.sort()
511         print "<table>"
512         for cat in keys:
513                 print "<tr>"
514                 print "<th nowrap align=left>Total %s</th>" % cat
515                 print "<td align=left>%s</td>" % categories[cat]
516                 print "</tr>"
517         if nodeonlyfilter == None:
518                 print "</table>"
519
520         print output_str
521         if nodeonlyfilter == None:
522                 print "</body></html>\n"
523
524
525
526 if __name__ == '__main__':
527         import cgi
528         import cgitb; 
529         cgitb.enable()
530         import sys
531
532         form = cgi.FieldStorage()
533         myfilter = None
534
535         if form.has_key('site'):
536                 myfilter = form.getvalue("site")
537         else:
538                 myfilter = None
539
540         if form.has_key('category'):
541                 mycategory = form.getvalue("category")
542         else:
543                 mycategory = None
544
545         if form.has_key('state'):
546                 mystate = form.getvalue("state")
547         else:
548                 mystate = None
549
550         if form.has_key('comon'):
551                 mycomon = form.getvalue("comon")
552         else:
553                 mycomon = None
554
555         if form.has_key('nodeonly'):
556                 mynodeonly = form.getvalue("nodeonly")
557         else:
558                 mynodeonly = None
559
560         config.cmpdays=False
561         config.comon="sshstatus"
562         config.fields="nodename,ping,ssh,pcu,category,state,last_contact,kernel,bootcd"
563         config.dbname="findbad"
564         config.cmpping=False 
565         config.cmpdns=False
566         config.cmploginbase=False
567         config.cmpssh=False 
568         config.cmpcategory=False
569
570         print "Content-Type: text/html\r\n"
571         if len(sys.argv) > 1:
572                 if sys.argv[1] == "ssherror":
573                         ssherror = True
574         main(myfilter, mycategory, mystate, mycomon,mynodeonly)