catch an exception from DRAC logins when the connection is closed early.
[monitor.git] / sitebad.py
1 #!/usr/bin/python
2
3 import os
4 import sys
5 import string
6 import time
7 from datetime import datetime,timedelta
8
9 from monitor import database
10 from monitor import parser as parsermodule
11 from monitor import config
12 from monitor.database.info.model import HistorySiteRecord, FindbadNodeRecord, session
13 from monitor.wrapper import plc, plccache
14 from monitor.const import MINUP
15
16 from monitor.common import *
17 from nodequery import verify,query_to_dict,node_select
18 from monitor.model import *
19
20 api = plc.getAuthAPI()
21 def main():
22         main2(config)
23
24 def main2(config):
25
26         l_nodes = plccache.l_nodes
27         l_plcsites = plccache.l_sites
28
29         if config.site:
30                 l_sites = [config.site]
31         elif config.sitelist:
32                 site_list = config.sitelist.split(',')
33                 l_sites = site_list
34         else:
35                 l_sites = [site['login_base'] for site in l_plcsites]
36         
37         checkAndRecordState(l_sites, l_plcsites)
38
39 def getnewsite(nodelist):
40         new = True
41         for node in nodelist:
42                 try:
43                         noderec = FindbadNodeRecord.query.filter(FindbadNodeRecord.hostname==node['hostname']).order_by(FindbadNodeRecord.date_checked.desc()).first()
44                         if noderec is not None and \
45                                 noderec.plc_node_stats['last_contact'] != None:
46                                 new = False
47                 except:
48                         import traceback
49                         print traceback.print_exc()
50         return new
51
52 def getnodesup(nodelist):
53         up = 0
54         for node in nodelist:
55                 try:
56                         noderec = FindbadNodeRecord.query.filter(FindbadNodeRecord.hostname==node['hostname']).order_by(FindbadNodeRecord.date_checked.desc()).first()
57                         #noderec = FindbadNodeRecord.select(FindbadNodeRecord.q.hostname==node['hostname'], 
58                         #                                                                  orderBy='date_checked').reversed()[0]
59                         if noderec is not None and noderec.observed_status == "BOOT":
60                                 up = up + 1
61                 except:
62                         import traceback
63                         print traceback.print_exc()
64         return up
65
66 def checkAndRecordState(l_sites, l_plcsites):
67         count = 0
68         lb2hn = plccache.plcdb_lb2hn
69         for sitename in l_sites:
70                 d_site = None
71                 for site in l_plcsites:
72                         if site['login_base'] == sitename:
73                                 d_site = site
74                                 break
75                 if not d_site:
76                         continue
77
78                 if sitename in lb2hn:
79                         pf = HistorySiteRecord.findby_or_create(loginbase=sitename)
80
81                         pf.last_checked = datetime.now()
82                         pf.slices_total = d_site['max_slices']
83                         pf.slices_used = len(d_site['slice_ids'])
84                         pf.nodes_total = len(lb2hn[sitename])
85                         pf.nodes_up = getnodesup(lb2hn[sitename])
86                         pf.new = getnewsite(lb2hn[sitename])
87                         pf.enabled = d_site['enabled']
88
89                         if pf.nodes_up >= MINUP:
90                                 if pf.status != "good": pf.last_changed = datetime.now()
91                                 pf.status = "good"
92                         else:
93                                 if pf.status != "down": pf.last_changed = datetime.now()
94                                 pf.status = "down"
95
96                         count += 1
97                         print "%d %15s slices(%2s) nodes(%2s) up(%2s) %s" % (count, sitename, pf.slices_used, 
98                                                                                         pf.nodes_total, pf.nodes_up, pf.status)
99                         pf.flush()
100
101         print HistorySiteRecord.query.count()
102         session.flush()
103
104         return True
105
106 if __name__ == '__main__':
107         from monitor import parser as parsermodule
108
109         parser = parsermodule.getParser()
110         parser.set_defaults(filename=None, node=None, site=None, 
111                                                 nodeselect=False, nodegroup=None, cachenodes=False)
112
113         parser.add_option("", "--site", dest="site", metavar="login_base", 
114                                                 help="Provide a single site to operate on")
115         parser.add_option("", "--sitelist", dest="sitelist", 
116                                                 help="Provide a list of sites separated by ','")
117
118         config = parsermodule.parse_args(parser)
119
120         try:
121                 main2(config)
122         except Exception, err:
123                 import traceback
124                 print traceback.print_exc()
125                 print "Exception: %s" % err
126                 sys.exit(0)