AM nagios/plc2nagios.py
[monitor.git] / sitebad.py
1 #!/usr/bin/python
2
3 import os
4 import sys
5 import string
6 import time
7
8
9 import soltesz
10 import comon
11 import threadpool
12 import syncplcdb
13 from nodequery import verify,query_to_dict,node_select
14
15 import plc
16 import auth
17 api = plc.PLC(auth.auth, auth.plc)
18 from unified_model import *
19 from monitor_policy import MINUP
20
21 round = 1
22 externalState = {'round': round, 'sites': {}}
23 count = 0
24
25 def main(config):
26         global externalState
27         externalState = soltesz.if_cached_else(1, config.dbname, lambda : externalState) 
28         if config.increment:
29                 # update global round number to force refreshes across all nodes
30                 externalState['round'] += 1
31
32         l_nodes = syncplcdb.create_plcdb()
33         l_plcsites = soltesz.dbLoad("l_plcsites")
34
35         if config.site:
36                 l_sites = [config.site]
37         else:
38                 l_sites = [site['login_base'] for site in l_plcsites]
39         
40         checkAndRecordState(l_sites, l_plcsites)
41
42 def checkAndRecordState(l_sites, l_plcsites):
43         global externalState
44         global count
45         global_round = externalState['round']
46
47         for sitename in l_sites:
48                 if sitename not in externalState['sites']:
49                         externalState['sites'][sitename] = {'round': 0, 'values': []}
50
51                 site_round   = externalState['sites'][sitename]['round']
52                 if site_round < global_round:
53                         # do work
54                         values = collectStatusAndState(sitename, l_plcsites)
55                         global_round = externalState['round']
56                         externalState['sites'][sitename]['values'] = values
57                         externalState['sites'][sitename]['round'] = global_round
58                 else:
59                         count += 1
60
61                 if count % 20 == 0:
62                         soltesz.dbDump(config.dbname, externalState)
63
64         soltesz.dbDump(config.dbname, externalState)
65
66 fb = soltesz.dbLoad('findbad')
67 lb2hn = soltesz.dbLoad("plcdb_lb2hn")
68
69 def getnodesup(nodelist):
70         up = 0
71         for node in nodelist:
72                 if node['hostname'] in fb['nodes'].keys():
73                         try:
74                                 if fb['nodes'][node['hostname']]['values']['state'] == "BOOT":
75                                         up = up + 1
76                         except:
77                                 pass
78         return up
79
80 def collectStatusAndState(sitename, l_plcsites):
81         global count
82
83         d_site = None
84         for site in l_plcsites:
85                 if site['login_base'] == sitename:
86                         d_site = site
87                         break
88         if not d_site:
89                 return None
90
91         if sitename in lb2hn:
92                 pf = PersistFlags(sitename, 1, db='site_persistflags')
93
94                 if not pf.checkattr('last_changed'):
95                         pf.last_changed = time.time()
96                 
97                 pf.last_checked = time.time()
98                 pf.nodes_total = len(lb2hn[sitename])
99                 pf.slices_used = len(d_site['slice_ids'])
100                 pf.nodes_up = getnodesup(lb2hn[sitename])
101                 if not pf.checkattr('status'):
102                         pf.status = "unknown"
103
104                 if pf.nodes_up >= MINUP:
105                         if pf.status != "good": pf.last_changed = time.time()
106                         pf.status = "good"
107                 else:
108                         if pf.status != "down": pf.last_changed = time.time()
109                         pf.status = "down"
110
111                 count += 1
112                 print "%d %15s slices(%2s) nodes(%2s) up(%2s) %s" % (count, sitename, pf.slices_used, 
113                                                                                 pf.nodes_total, pf.nodes_up, pf.status)
114                 # updated by other modules
115                 #pf.enabled = 
116                 #pf.suspended = 
117
118                 pf.save()
119
120         return True
121
122 if __name__ == '__main__':
123         from config import config
124         from optparse import OptionParser
125         parser = OptionParser()
126         parser.set_defaults(filename=None, node=None, site=None, nodeselect=False, nodegroup=None, 
127                                                 increment=False, dbname="sitebad", cachenodes=False)
128         parser.add_option("", "--site", dest="site", metavar="login_base", 
129                                                 help="Provide a single site to operate on")
130         parser.add_option("", "--sitelist", dest="sitelist", metavar="file.list", 
131                                                 help="Provide a list of files to operate on")
132
133         parser.add_option("", "--dbname", dest="dbname", metavar="FILE", 
134                                                 help="Specify the name of the database to which the information is saved")
135         parser.add_option("-i", "--increment", action="store_true", dest="increment", 
136                                                 help="Increment round number to force refresh or retry")
137         config = config(parser)
138         config.parse_args()
139
140         try:
141                 main(config)
142         except Exception, err:
143                 import traceback
144                 print traceback.print_exc()
145                 print "Exception: %s" % err
146                 print "Saving data... exitting."
147                 soltesz.dbDump(config.dbname, externalState)
148                 sys.exit(0)