merge from 2.0 branch
[monitor.git] / sitebad.py
1 #!/usr/bin/python
2
3 import os
4 import sys
5 import string
6 import time
7 from datetime import datetime,timedelta
8
9 from monitor import database
10 from monitor import parser as parsermodule
11 from monitor import config
12 from monitor.database.info.model import HistorySiteRecord, HistoryNodeRecord, session, BlacklistRecord
13 from monitor.wrapper import plc, plccache
14 from monitor.const import MINUP
15
16 from monitor.common import *
17 from nodequery import verify,query_to_dict,node_select
18 from monitor.model import *
19
20 api = plc.getAuthAPI()
21 def main():
22         main2(config)
23
24 def main2(config):
25
26         l_nodes = plccache.l_nodes
27         l_plcsites = plccache.l_sites
28
29         if config.site:
30                 l_sites = [config.site]
31         elif config.node:
32                 l_sites = [plccache.plcdb_hn2lb[config.node]]
33         elif config.sitelist:
34                 site_list = config.sitelist.split(',')
35                 l_sites = site_list
36         else:
37                 l_sites = [site['login_base'] for site in l_plcsites]
38         
39         checkAndRecordState(l_sites, l_plcsites)
40
41 def getnodesup(nodelist):
42         # NOTE : assume that a blacklisted node is fine, since we're told not to
43         #               ignore it, no policy actions should be taken for it.
44         up = 0
45         for node in nodelist:
46                 try:
47                         # NOTE: adding a condition for nodehist.haspcu would include pcus
48                         #               in the calculation
49                         nodehist = HistoryNodeRecord.findby_or_create(hostname=node['hostname'])
50                         nodebl   = BlacklistRecord.get_by(hostname=node['hostname'])
51                         if (nodehist is not None and nodehist.status != 'down') or \
52                                 (nodebl is not None and not nodebl.expired()):
53                                 up = up + 1
54                 except:
55                         import traceback
56                         email_exception(node['hostname'])
57                         print traceback.print_exc()
58         return up
59
60 def check_site_state(rec, sitehist):
61
62         if sitehist.new and sitehist.status not in ['new', 'online', 'good']:
63                 sitehist.status = 'new'
64                 sitehist.penalty_applied = True         # because new sites are disabled by default, i.e. have a penalty.
65                 sitehist.last_changed = datetime.now()
66
67         if sitehist.nodes_up >= MINUP:
68
69                 if sitehist.status != 'online' and sitehist.status != 'good':
70                         sitehist.last_changed = datetime.now()
71
72                 if changed_lessthan(sitehist.last_changed, 0.5) and sitehist.status != 'online':
73                         print "changed status from %s to online" % sitehist.status
74                         sitehist.status = 'online'
75
76                 if changed_greaterthan(sitehist.last_changed, 0.5) and sitehist.status != 'good':
77                         print "changed status from %s to good" % sitehist.status
78                         sitehist.status = 'good'
79
80         elif not sitehist.new:
81         
82                 if sitehist.status != 'offline' and sitehist.status != 'down':
83                         sitehist.last_changed = datetime.now()
84
85                 if changed_lessthan(sitehist.last_changed, 0.5) and sitehist.status != 'offline':
86                         print "changed status from %s to offline" % sitehist.status
87                         sitehist.status = 'offline'
88
89                 if changed_greaterthan(sitehist.last_changed, 0.5) and sitehist.status != 'down':
90                         print "changed status from %s to down" % sitehist.status
91                         sitehist.status = 'down'
92
93 def checkAndRecordState(l_sites, l_plcsites):
94         count = 0
95         lb2hn = plccache.plcdb_lb2hn
96         for sitename in l_sites:
97                 d_site = None
98                 for site in l_plcsites:
99                         if site['login_base'] == sitename:
100                                 d_site = site
101                                 break
102                 if not d_site:
103                         continue
104
105                 if sitename in lb2hn:
106                         sitehist = HistorySiteRecord.findby_or_create(loginbase=sitename,
107                                                                                                 if_new_set={'status' : 'unknown', 
108                                                                                                                         'last_changed' : datetime.now(),
109                                                                                                                         'message_id': 0,
110                                                                                                                         'penalty_level' : 0})
111                         sitehist.last_checked = datetime.now()
112
113                         sitehist.slices_total = d_site['max_slices']
114                         sitehist.slices_used = len(d_site['slice_ids'])
115                         sitehist.nodes_total = len(lb2hn[sitename])
116                         if sitehist.message_id != 0:
117                                 rtstatus = mailer.getTicketStatus(sitehist.message_id)
118                                 sitehist.message_status = rtstatus['Status']
119                                 sitehist.message_queue = rtstatus['Queue']
120                                 sitehist.message_created = datetime.fromtimestamp(rtstatus['Created'])
121
122                         sitehist.nodes_up = getnodesup(lb2hn[sitename])
123                         sitehist.new = changed_lessthan(datetime.fromtimestamp(d_site['date_created']), 30) # created < 30 days ago
124                         sitehist.enabled = d_site['enabled']
125
126                         check_site_state(d_site, sitehist)
127
128                         count += 1
129                         print "%d %15s slices(%2s) nodes(%2s) notdown(%2s) %s" % (count, sitename, sitehist.slices_used, 
130                                                                                         sitehist.nodes_total, sitehist.nodes_up, sitehist.status)
131                         sitehist.flush()
132
133         print HistorySiteRecord.query.count()
134         session.flush()
135
136         return True
137
138 if __name__ == '__main__':
139         from monitor import parser as parsermodule
140
141         parser = parsermodule.getParser()
142         parser.set_defaults(filename=None, node=None, site=None, 
143                                                 nodeselect=False, nodegroup=None, cachenodes=False)
144
145         parser.add_option("", "--site", dest="site", metavar="login_base", 
146                                                 help="Provide a single site to operate on")
147         parser.add_option("", "--sitelist", dest="sitelist", 
148                                                 help="Provide a list of sites separated by ','")
149
150         config = parsermodule.parse_args(parser)
151
152         try:
153                 main2(config)
154         except Exception, err:
155                 import traceback
156                 print traceback.print_exc()
157                 print "Exception: %s" % err
158                 sys.exit(0)