ab4f5ff81077165eb618943d9a0e5625eaae0a79
[monitor.git] / findbadpcu.py
1 #!/usr/bin/python
2
3 import os
4 import sys
5 import string
6 import time
7 import socket
8 import sets
9 import signal
10 import traceback
11 from datetime import datetime,timedelta
12 import threadpool
13 import threading
14
15 import monitor
16 from monitor import config
17 from monitor.database.info.model import FindbadPCURecord, session
18 from monitor import database
19 from monitor import util 
20 from monitor.wrapper import plc, plccache
21 from nodequery import pcu_select
22 from monitor.common import nmap_port_status
23 from monitor.scanapi import *
24
25 plc_lock = threading.Lock()
26 global_round = 1
27 errorState = {}
28 count = 0
29
30 # this will be called when an exception occurs within a thread
31 def handle_exception(request, result):
32         print "Exception occured in request %s" % request.requestID
33         for i in result:
34                 print "Result: %s" % i
35
36 def checkPCUs(l_pcus, cohash):
37         global global_round
38         global count
39
40         tp = threadpool.ThreadPool(10)
41         scanpcu = ScanPCU(global_round)
42
43         # CREATE all the work requests
44         for pcuname in l_pcus:
45                 pcu_id = int(pcuname)
46                 #fbnodesync = FindbadPCURecordSync.findby_or_create(plc_pcuid=pcu_id, if_new_set={'round' : 0})
47                 #fbnodesync.flush()
48
49                 #node_round   = fbnodesync.round
50                 node_round   = global_round - 1
51                 if node_round < global_round or config.force:
52                         # recreate node stats when refreshed
53                         #print "%s" % nodename
54                         req = threadpool.WorkRequest(scanpcu.collectInternal, [int(pcuname), cohash], {}, 
55                                                                                  None, scanpcu.record, handle_exception)
56                         tp.putRequest(req)
57                 else:
58                         # We just skip it, since it's "up to date"
59                         count += 1
60                         print "%d %s %s" % (count, pcu_id, node_round)
61
62         # WAIT while all the work requests are processed.
63         begin = time.time()
64         while 1:
65                 try:
66                         time.sleep(1)
67                         tp.poll()
68                         # if more than two hours
69                         if time.time() - begin > (60*60*1):
70                                 print "findbadpcus.py has run out of time!!!!!!"
71                                 os._exit(1)
72                 except KeyboardInterrupt:
73                         print "Interrupted!"
74                         break
75                 except threadpool.NoResultsPending:
76                         print "All results collected."
77                         break
78
79         #print FindbadPCURecordSync.query.count()
80         print FindbadPCURecord.query.count()
81         session.flush()
82
83
84 def main():
85         global global_round
86
87         l_pcus = plccache.l_pcus
88         cohash = {}
89
90         #fbsync = FindbadPCURecordSync.findby_or_create(plc_pcuid=0, 
91                                                                                         #if_new_set={'round' : global_round})
92
93         #global_round = fbsync.round
94         api = plc.getAuthAPI()
95
96         if config.site is not None:
97                 site = plccache.GetSitesByName([config.site])
98                 l_nodes = plccache.GetNodesByIds(site[0]['node_ids'])
99                 pcus = []
100                 for node in l_nodes:
101                         pcus += node['pcu_ids']
102                 # clear out dups.
103                 l_pcus = [pcu for pcu in sets.Set(pcus)]
104
105         elif config.node is not None:
106                 l_nodes = plcacche.GetNodeByName(config.node)
107                 pcus = []
108                 for node in l_nodes:
109                         pcus += node['pcu_ids']
110                 # clear out dups.
111                 l_pcus = [pcu for pcu in sets.Set(pcus)]
112
113         elif config.sitelist:
114                 site_list = config.sitelist.split(',')
115
116                 sites = plccache.GetSitesByName(site_list)
117                 node_ids = []
118                 for s in sites:
119                         node_ids += s['node_ids']
120
121                 l_nodes = plccache.GetNodeByIds(node_ids)
122                 pcus = []
123                 for node in l_nodes:
124                         pcus += node['pcu_ids']
125                 # clear out dups.
126                 l_pcus = [pcu for pcu in sets.Set(pcus)]
127
128         elif config.pcuselect is not None:
129                 n, pcus = pcu_select(config.pcuselect)
130                 print pcus
131                 # clear out dups.
132                 l_pcus = [pcu for pcu in sets.Set(pcus)]
133
134         elif config.nodelist == None and config.pcuid == None:
135                 print "Calling API GetPCUs() : cachecalls(%s)" % config.cachecalls
136                 l_pcus  = [pcu['pcu_id'] for pcu in l_pcus]
137         elif config.nodelist is not None:
138                 l_pcus = util.file.getListFromFile(config.nodelist)
139                 l_pcus = [int(pcu) for pcu in l_pcus]
140         elif config.pcuid is not None:
141                 l_pcus = [ config.pcuid ] 
142                 l_pcus = [int(pcu) for pcu in l_pcus]
143
144         if config.increment:
145                 # update global round number to force refreshes across all nodes
146                 global_round += 1
147
148         checkPCUs(l_pcus, cohash)
149
150         if config.increment:
151                 # update global round number to force refreshes across all nodes
152                 #fbsync.round = global_round
153                 #fbsync.flush()
154                 session.flush()
155
156         return 0
157
158
159 print "main"
160 if __name__ == '__main__':
161         import logging
162         logger = logging.getLogger("monitor")
163         logger.setLevel(logging.DEBUG)
164         fh = logging.FileHandler("monitor.log", mode = 'a')
165         fh.setLevel(logging.DEBUG)
166         formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
167         fh.setFormatter(formatter)
168         logger.addHandler(fh)
169         from monitor import parser as parsermodule
170         parser = parsermodule.getParser()
171         parser.set_defaults(nodelist=None, 
172                                                 increment=False, 
173                                                 pcuid=None,
174                                                 pcuselect=None,
175                                                 site=None,
176                                                 node=None,
177                                                 sitelist=None,
178                                                 dbname="findbadpcus", 
179                                                 cachenodes=False,
180                                                 cachecalls=True,
181                                                 force=False,
182                                                 )
183         parser.add_option("-f", "--nodelist", dest="nodelist", metavar="FILE", 
184                                                 help="Provide the input file for the node list")
185         parser.add_option("", "--node", dest="node", metavar="FILE", 
186                                                 help="Get all pcus associated with the given node")
187         parser.add_option("", "--site", dest="site", metavar="FILE", 
188                                                 help="Get all pcus associated with the given site's nodes")
189         parser.add_option("", "--sitelist", dest="sitelist", metavar="FILE", 
190                                                 help="Get all pcus associated with the given site's nodes")
191         parser.add_option("", "--pcuselect", dest="pcuselect", metavar="FILE", 
192                                                 help="Query string to apply to the findbad pcus")
193         parser.add_option("", "--pcuid", dest="pcuid", metavar="id", 
194                                                 help="Provide the id for a single pcu")
195
196         parser.add_option("", "--cachenodes", action="store_true",
197                                                 help="Cache node lookup from PLC")
198         parser.add_option("", "--dbname", dest="dbname", metavar="FILE", 
199                                                 help="Specify the name of the database to which the information is saved")
200         parser.add_option("", "--nocachecalls", action="store_false", dest="cachecalls",
201                                                 help="Refresh the cached values")
202         parser.add_option("-i", "--increment", action="store_true", dest="increment", 
203                                                 help="Increment round number to force refresh or retry")
204         parser.add_option("", "--force", action="store_true", dest="force", 
205                                                 help="Force probe without incrementing global 'round'.")
206         parser = parsermodule.getParser(['defaults'], parser)
207         config = parsermodule.parse_args(parser)
208         if hasattr(config, 'cachecalls') and not config.cachecalls:
209                 # NOTE: if explicilty asked, refresh cached values.
210                 print "Reloading PLCCache"
211                 plccache.init()
212         try:
213                 # NOTE: evidently, there is a bizarre interaction between iLO and ssh
214                 # when LANG is set... Do not know why.  Unsetting LANG, fixes the problem.
215                 if 'LANG' in os.environ:
216                         del os.environ['LANG']
217                 main()
218                 time.sleep(1)
219         except Exception, err:
220                 traceback.print_exc()
221                 from monitor.common import email_exception
222                 email_exception()
223                 print "Exception: %s" % err
224                 print "Saving data... exitting."
225                 sys.exit(0)