clearer names for actions, and infer actions better
[monitor.git] / commands / sitebad.py
1 #!/usr/bin/python
2
3 import os
4 import sys
5 import string
6 import time
7 from datetime import datetime,timedelta
8
9 from monitor import database
10 from monitor import parser as parsermodule
11 from monitor import config
12 from monitor.database.info.model import *
13 from monitor.wrapper import plc, plccache
14 from monitor.const import MINUP
15
16 from monitor.common import *
17 from monitor.query import verify,query_to_dict,node_select
18 from monitor.model import *
19
20 api = plc.getAuthAPI()
21 def main():
22         main2(config)
23
24 def main2(config):
25
26         l_nodes = plccache.l_nodes
27         l_plcsites = plccache.l_sites
28
29         if config.site:
30                 l_sites = [config.site]
31         elif config.node:
32                 l_sites = [plccache.plcdb_hn2lb[config.node]]
33         elif config.sitelist:
34                 site_list = config.sitelist.split(',')
35                 l_sites = site_list
36         else:
37                 l_sites = [site['login_base'] for site in l_plcsites]
38         
39         checkAndRecordState(l_sites, l_plcsites, config.checkpcu)
40
41 def getnodesup(nodelist, checkpcu):
42         # NOTE : assume that a blacklisted node is fine, since we're told not to
43         #               ignore it, no policy actions should be taken for it.
44         up = 0
45         for node in nodelist:
46                 try:
47                         # NOTE: adding a condition for nodehist.haspcu would include pcus
48                         #               in the calculation
49                         nodehist = HistoryNodeRecord.findby_or_create(hostname=node['hostname'])
50                         nodebl   = BlacklistRecord.get_by(hostname=node['hostname'])
51                         if checkpcu:
52                                 # get pcu history for node
53                                 if nodehist.haspcu:
54                                         # get node record for pcuid
55                                         noderec = FindbadNodeRecord.get_latest_by(hostname=node['hostname'])
56                                         # get pcuhistory based on pcuid
57                                         pcuhist = HistoryPCURecord.findby_or_create(plc_pcuid=noderec.plc_pcuid)
58                                         # if pcu is not down & node is not down
59                                         if (nodehist is not None and nodehist.status != 'down' and \
60                                                 pcuhist is not None and pcuhist.status != 'down') or \
61                                                 (nodebl is not None and not nodebl.expired()):
62                                                 up = up + 1
63
64                                 else:
65                                         # todo: don't count
66                                         pass
67                         else:
68                                 if (nodehist is not None and nodehist.status != 'down') or \
69                                         (nodebl is not None and not nodebl.expired()):
70                                         up = up + 1
71                 except:
72                         import traceback
73                         email_exception(node['hostname'])
74                         print traceback.print_exc()
75         return up
76
77 def check_site_state(rec, sitehist):
78
79         if sitehist.new and sitehist.status not in ['new', 'online', 'good']:
80                 sitehist.status = 'new'
81                 sitehist.penalty_applied = True         # because new sites are disabled by default, i.e. have a penalty.
82                 sitehist.last_changed = datetime.now()
83
84         if sitehist.nodes_up >= MINUP:
85
86                 if sitehist.status != 'online' and sitehist.status != 'good':
87                         sitehist.last_changed = datetime.now()
88
89                 if changed_lessthan(sitehist.last_changed, 0.5) and sitehist.status != 'online':
90                         print "changed status from %s to online" % sitehist.status
91                         sitehist.status = 'online'
92
93                 if changed_greaterthan(sitehist.last_changed, 0.5) and sitehist.status != 'good':
94                         print "changed status from %s to good" % sitehist.status
95                         sitehist.status = 'good'
96
97         elif not sitehist.new:
98         
99                 if sitehist.status != 'offline' and sitehist.status != 'down':
100                         sitehist.last_changed = datetime.now()
101
102                 if changed_lessthan(sitehist.last_changed, 0.5) and sitehist.status != 'offline':
103                         print "changed status from %s to offline" % sitehist.status
104                         sitehist.status = 'offline'
105
106                 if changed_greaterthan(sitehist.last_changed, 0.5) and sitehist.status != 'down':
107                         print "changed status from %s to down" % sitehist.status
108                         sitehist.status = 'down'
109
110 def checkAndRecordState(l_sites, l_plcsites, checkpcu):
111         count = 0
112         lb2hn = plccache.plcdb_lb2hn
113         for sitename in l_sites:
114                 d_site = None
115                 for site in l_plcsites:
116                         if site['login_base'] == sitename:
117                                 d_site = site
118                                 break
119                 if not d_site:
120                         continue
121
122                 if sitename in lb2hn:
123                         sitehist = HistorySiteRecord.findby_or_create(loginbase=sitename,
124                                                                                                 if_new_set={'status' : 'unknown', 
125                                                                                                                         'last_changed' : datetime.now(),
126                                                                                                                         'message_id': 0,
127                                                                                                                         'penalty_level' : 0})
128                         sitehist.last_checked = datetime.now()
129
130                         sitehist.plc_siteid = d_site['site_id']
131                         sitehist.slices_total = d_site['max_slices']
132                         sitehist.slices_used = len(d_site['slice_ids'])
133                         sitehist.nodes_total = len(lb2hn[sitename])
134                         if sitehist.message_id != 0:
135                                 rtstatus = mailer.getTicketStatus(sitehist.message_id)
136                                 sitehist.message_status = rtstatus['Status']
137                                 sitehist.message_queue = rtstatus['Queue']
138                                 sitehist.message_created = datetime.fromtimestamp(rtstatus['Created'])
139
140                         sitehist.nodes_up = getnodesup(lb2hn[sitename], checkpcu)
141                         sitehist.new = changed_lessthan(datetime.fromtimestamp(d_site['date_created']), 30) # created < 30 days ago
142                         sitehist.enabled = d_site['enabled']
143
144                         check_site_state(d_site, sitehist)
145
146                         count += 1
147                         print "%d %15s slices(%2s) nodes(%2s) notdown(%2s) %s" % (count, sitename, sitehist.slices_used, 
148                                                                                         sitehist.nodes_total, sitehist.nodes_up, sitehist.status)
149                         sitehist.flush()
150
151         print HistorySiteRecord.query.count()
152         session.flush()
153
154         return True
155
156 if __name__ == '__main__':
157         from monitor import parser as parsermodule
158
159         parser = parsermodule.getParser()
160         parser.set_defaults(checkpcu=False)
161
162         parser.add_option("", "--site", dest="site", metavar="login_base", 
163                                                 help="Provide a single site to operate on")
164         parser.add_option("", "--sitelist", dest="sitelist", 
165                                                 help="Provide a list of sites separated by ','")
166         parser.add_option("", "--checkpcu", dest="checkpcu", action="store_true",
167                                                 help="whether to include PCUs in the site status")
168
169         config = parsermodule.parse_args(parser)
170
171         try:
172                 main2(config)
173         except Exception, err:
174                 import traceback
175                 print traceback.print_exc()
176                 print "Exception: %s" % err
177                 sys.exit(0)