AM nagios/plc2nagios.py
[monitor.git] / pcubad.py
1 #!/usr/bin/python
2
3 import os
4 import sys
5 import string
6 import time
7
8 from reboot import pcu_name
9
10 import soltesz
11 import comon
12 import threadpool
13 import syncplcdb
14 from nodequery import verify,query_to_dict,node_select
15
16 import plc
17 import auth
18 api = plc.PLC(auth.auth, auth.plc)
19 from unified_model import *
20 from monitor_policy import MINUP
21
22 round = 1
23 externalState = {'round': round, 'nodes': {}}
24 count = 0
25
26 def main(config):
27         global externalState
28         externalState = soltesz.if_cached_else(1, config.dbname, lambda : externalState) 
29         if config.increment:
30                 # update global round number to force refreshes across all pcus
31                 externalState['round'] += 1
32
33         l_plcpcus = soltesz.if_cached_else_refresh(1, 1, "pculist", lambda : plc.GetPCUs())
34
35         l_pcu = None
36         if config.pcu:
37                 for pcu in l_plcpcus:
38                         if pcu['hostname'] == config.pcu  or pcu['ip'] == config.pcu:
39                                 l_pcus = [pcu['pcu_id']]
40                 if not l_pcu:
41                         print "ERROR: could not find pcu %s" % config.pcu
42                         sys.exit(1)
43         else:
44                 l_pcus = [pcu['pcu_id'] for pcu in l_plcpcus]
45         
46         checkAndRecordState(l_pcus, l_plcpcus)
47
48 def checkAndRecordState(l_pcus, l_plcpcus):
49         global externalState
50         global count
51         global_round = externalState['round']
52
53         for pcuname in l_pcus:
54                 if pcuname not in externalState['nodes']:
55                         externalState['nodes'][pcuname] = {'round': 0, 'values': []}
56
57                 pcu_round   = externalState['nodes'][pcuname]['round']
58                 if pcu_round < global_round:
59                         # do work
60                         values = collectStatusAndState(pcuname, l_plcpcus)
61                         global_round = externalState['round']
62                         externalState['nodes'][pcuname]['values'] = values
63                         externalState['nodes'][pcuname]['round'] = global_round
64                 else:
65                         count += 1
66
67                 if count % 20 == 0:
68                         soltesz.dbDump(config.dbname, externalState)
69
70         soltesz.dbDump(config.dbname, externalState)
71
72 fbpcu = soltesz.dbLoad('findbadpcus')
73 hn2lb = soltesz.dbLoad("plcdb_hn2lb")
74
75 def get(fb, path):
76         indexes = path.split("/")
77         values = fb
78         for index in indexes:
79                 if index in values:
80                         values = values[index]
81                 else:
82                         return None
83         return values
84
85 def collectStatusAndState(pcuname, l_plcpcus):
86         global count
87
88         d_pcu = None
89         for pcu in l_plcpcus:
90                 if pcu['pcu_id'] == pcuname:
91                         d_pcu = pcu
92                         break
93         if not d_pcu:
94                 return None
95
96         pf = PersistFlags(pcuname, 1, db='pcu_persistflags')
97
98         if not pf.checkattr('last_changed'):
99                 pf.last_changed = time.time()
100                 
101         pf.last_checked = time.time()
102
103         if not pf.checkattr('valid'):
104                 pf.valid = "unknown"
105                 pf.last_valid = 0
106
107         if not pf.checkattr('status'):
108                 pf.status = "unknown"
109
110         state_path     = "nodes/id_" + str(pcuname) + "/values/reboot"
111         bootstate_path = "nodes/id_" + str(pcuname) + "/values/plcpcu/boot_state"
112
113         current_state = get(fbpcu, state_path)
114         if current_state == 0:
115                 if pf.status != "good": pf.last_changed = time.time()
116                 pf.status = "good"
117         elif current_state == 'NetDown':
118                 if pf.status != "netdown": pf.last_changed = time.time()
119                 pf.status = "netdown"
120         elif current_state == 'Not_Run':
121                 if pf.status != "badconfig": pf.last_changed = time.time()
122                 pf.status = "badconfig"
123         else:
124                 if pf.status != "error": pf.last_changed = time.time()
125                 pf.status = "error"
126
127         count += 1
128         print "%d %35s %s since(%s)" % (count, pcu_name(d_pcu), pf.status, diff_time(pf.last_changed))
129         # updated by other modules
130         #pf.enabled = 
131         #pf.suspended = 
132
133         pf.save()
134
135         return True
136
137 if __name__ == '__main__':
138         from config import config
139         from optparse import OptionParser
140         parser = OptionParser()
141         parser.set_defaults(filename=None, pcu=None, pcuselect=False, pcugroup=None, 
142                                                 increment=False, dbname="pcubad", cachepcus=False)
143         parser.add_option("", "--pcu", dest="pcu", metavar="hostname", 
144                                                 help="Provide a single pcu to operate on")
145         parser.add_option("", "--pculist", dest="pculist", metavar="file.list", 
146                                                 help="Provide a list of files to operate on")
147
148         parser.add_option("", "--dbname", dest="dbname", metavar="FILE", 
149                                                 help="Specify the name of the database to which the information is saved")
150         parser.add_option("-i", "--increment", action="store_true", dest="increment", 
151                                                 help="Increment round number to force refresh or retry")
152         config = config(parser)
153         config.parse_args()
154
155         try:
156                 main(config)
157         except Exception, err:
158                 import traceback
159                 print traceback.print_exc()
160                 print "Exception: %s" % err
161                 print "Saving data... exitting."
162                 soltesz.dbDump(config.dbname, externalState)
163                 sys.exit(0)