allow dir to act as a module.
[monitor.git] / findbadpcu.py
1 #!/usr/bin/python
2
3 import os
4 import sys
5 import string
6 import time
7 import socket
8 import sets
9 import signal
10 import traceback
11 from datetime import datetime,timedelta
12 import threadpool
13 import threading
14
15 import monitor
16 from monitor import config
17 from monitor.database.info.model import FindbadPCURecordSync, FindbadPCURecord, session
18 from monitor import database
19 from monitor import util 
20 from monitor.wrapper import plc, plccache
21 from nodequery import pcu_select
22 from monitor.common import nmap_port_status
23 from monitor.scanapi import *
24
25 plc_lock = threading.Lock()
26 global_round = 1
27 errorState = {}
28 count = 0
29
30 # this will be called when an exception occurs within a thread
31 def handle_exception(request, result):
32         print "Exception occured in request %s" % request.requestID
33         for i in result:
34                 print "Result: %s" % i
35
36 def checkPCUs(l_pcus, cohash):
37         global global_round
38         global count
39
40         tp = threadpool.ThreadPool(10)
41         scanpcu = ScanPCU(global_round)
42
43         # CREATE all the work requests
44         for pcuname in l_pcus:
45                 pcu_id = int(pcuname)
46                 fbnodesync = FindbadPCURecordSync.findby_or_create(plc_pcuid=pcu_id, if_new_set={'round' : 0})
47                 fbnodesync.flush()
48
49                 node_round   = fbnodesync.round
50                 if node_round < global_round or config.force:
51                         # recreate node stats when refreshed
52                         #print "%s" % nodename
53                         req = threadpool.WorkRequest(scanpcu.collectInternal, [int(pcuname), cohash], {}, 
54                                                                                  None, scanpcu.record, handle_exception)
55                         tp.putRequest(req)
56                 else:
57                         # We just skip it, since it's "up to date"
58                         count += 1
59                         print "%d %s %s" % (count, pcu_id, node_round)
60
61         # WAIT while all the work requests are processed.
62         begin = time.time()
63         while 1:
64                 try:
65                         time.sleep(1)
66                         tp.poll()
67                         # if more than two hours
68                         if time.time() - begin > (60*60*1):
69                                 print "findbadpcus.py has run out of time!!!!!!"
70                                 os._exit(1)
71                 except KeyboardInterrupt:
72                         print "Interrupted!"
73                         break
74                 except threadpool.NoResultsPending:
75                         print "All results collected."
76                         break
77
78         print FindbadPCURecordSync.query.count()
79         print FindbadPCURecord.query.count()
80         session.flush()
81
82
83 def main():
84         global global_round
85
86         l_pcus = plccache.l_pcus
87         cohash = {}
88
89         fbsync = FindbadPCURecordSync.findby_or_create(plc_pcuid=0, 
90                                                                                         if_new_set={'round' : global_round})
91
92         global_round = fbsync.round
93         api = plc.getAuthAPI()
94
95         if config.site is not None:
96                 site = api.GetSites(config.site)
97                 l_nodes = api.GetNodes(site[0]['node_ids'], ['pcu_ids'])
98                 pcus = []
99                 for node in l_nodes:
100                         pcus += node['pcu_ids']
101                 # clear out dups.
102                 l_pcus = [pcu for pcu in sets.Set(pcus)]
103         elif config.sitelist:
104                 site_list = config.sitelist.split(',')
105
106                 sites = api.GetSites(site_list)
107                 node_ids = []
108                 for s in sites:
109                         node_ids += s['node_ids']
110
111                 l_nodes = api.GetNodes(node_ids, ['pcu_ids'])
112                 pcus = []
113                 for node in l_nodes:
114                         pcus += node['pcu_ids']
115                 # clear out dups.
116                 l_pcus = [pcu for pcu in sets.Set(pcus)]
117
118         elif config.pcuselect is not None:
119                 n, pcus = pcu_select(config.pcuselect)
120                 print pcus
121                 # clear out dups.
122                 l_pcus = [pcu for pcu in sets.Set(pcus)]
123
124         elif config.nodelist == None and config.pcuid == None:
125                 print "Calling API GetPCUs() : cachecalls(%s)" % config.cachecalls
126                 l_pcus  = [pcu['pcu_id'] for pcu in l_pcus]
127         elif config.nodelist is not None:
128                 l_pcus = util.file.getListFromFile(config.nodelist)
129                 l_pcus = [int(pcu) for pcu in l_pcus]
130         elif config.pcuid is not None:
131                 l_pcus = [ config.pcuid ] 
132                 l_pcus = [int(pcu) for pcu in l_pcus]
133
134         if config.increment:
135                 # update global round number to force refreshes across all nodes
136                 global_round += 1
137
138         checkPCUs(l_pcus, cohash)
139
140         if config.increment:
141                 # update global round number to force refreshes across all nodes
142                 fbsync.round = global_round
143                 fbsync.flush()
144                 session.flush()
145
146         return 0
147
148
149 print "main"
150 if __name__ == '__main__':
151         import logging
152         logger = logging.getLogger("monitor")
153         logger.setLevel(logging.DEBUG)
154         fh = logging.FileHandler("monitor.log", mode = 'a')
155         fh.setLevel(logging.DEBUG)
156         formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
157         fh.setFormatter(formatter)
158         logger.addHandler(fh)
159         from monitor import parser as parsermodule
160         parser = parsermodule.getParser()
161         parser.set_defaults(nodelist=None, 
162                                                 increment=False, 
163                                                 pcuid=None,
164                                                 pcuselect=None,
165                                                 site=None,
166                                                 sitelist=None,
167                                                 dbname="findbadpcus", 
168                                                 cachenodes=False,
169                                                 cachecalls=True,
170                                                 force=False,
171                                                 )
172         parser.add_option("-f", "--nodelist", dest="nodelist", metavar="FILE", 
173                                                 help="Provide the input file for the node list")
174         parser.add_option("", "--site", dest="site", metavar="FILE", 
175                                                 help="Get all pcus associated with the given site's nodes")
176         parser.add_option("", "--sitelist", dest="sitelist", metavar="FILE", 
177                                                 help="Get all pcus associated with the given site's nodes")
178         parser.add_option("", "--pcuselect", dest="pcuselect", metavar="FILE", 
179                                                 help="Query string to apply to the findbad pcus")
180         parser.add_option("", "--pcuid", dest="pcuid", metavar="id", 
181                                                 help="Provide the id for a single pcu")
182
183         parser.add_option("", "--cachenodes", action="store_true",
184                                                 help="Cache node lookup from PLC")
185         parser.add_option("", "--dbname", dest="dbname", metavar="FILE", 
186                                                 help="Specify the name of the database to which the information is saved")
187         parser.add_option("", "--nocachecalls", action="store_false", dest="cachecalls",
188                                                 help="Refresh the cached values")
189         parser.add_option("-i", "--increment", action="store_true", dest="increment", 
190                                                 help="Increment round number to force refresh or retry")
191         parser.add_option("", "--force", action="store_true", dest="force", 
192                                                 help="Force probe without incrementing global 'round'.")
193         parser = parsermodule.getParser(['defaults'], parser)
194         config = parsermodule.parse_args(parser)
195         if hasattr(config, 'cachecalls') and not config.cachecalls:
196                 # NOTE: if explicilty asked, refresh cached values.
197                 print "Reloading PLCCache"
198                 plccache.init()
199         try:
200                 # NOTE: evidently, there is a bizarre interaction between iLO and ssh
201                 # when LANG is set... Do not know why.  Unsetting LANG, fixes the problem.
202                 if 'LANG' in os.environ:
203                         del os.environ['LANG']
204                 main()
205                 time.sleep(1)
206         except Exception, err:
207                 traceback.print_exc()
208                 from monitor.common import email_exception
209                 email_exception()
210                 print "Exception: %s" % err
211                 print "Saving data... exitting."
212                 sys.exit(0)