clearer names for actions, and infer actions better
[monitor.git] / commands / findbadpcu.py
1 #!/usr/bin/python
2
3 import os
4 import sys
5 import string
6 import time
7 import socket
8 import sets
9 import signal
10 import traceback
11 from datetime import datetime,timedelta
12 import threadpool
13 import threading
14
15 import monitor
16 from monitor import config
17 from monitor.database.info.model import FindbadPCURecord, session
18 from monitor import database
19 from monitor import util 
20 from monitor.wrapper import plc, plccache
21 from nodequery import pcu_select
22 from monitor.common import nmap_port_status
23 from monitor.scanapi import *
24
25 plc_lock = threading.Lock()
26 global_round = 1
27 errorState = {}
28 count = 0
29
30 # this will be called when an exception occurs within a thread
31 def handle_exception(request, result):
32         print "Exception occured in request %s" % request.requestID
33         for i in result:
34                 print "Result: %s" % i
35
36 def checkPCUs(l_pcus, cohash):
37         global global_round
38         global count
39
40         tp = threadpool.ThreadPool(10)
41         scanpcu = ScanPCU(global_round)
42
43         # CREATE all the work requests
44         for pcuname in l_pcus:
45                 pcu_id = int(pcuname)
46                 #fbnodesync = FindbadPCURecordSync.findby_or_create(plc_pcuid=pcu_id, if_new_set={'round' : 0})
47                 #fbnodesync.flush()
48
49                 #node_round   = fbnodesync.round
50                 node_round   = global_round - 1
51                 if node_round < global_round or config.force:
52                         # recreate node stats when refreshed
53                         #print "%s" % nodename
54                         req = threadpool.WorkRequest(scanpcu.collectInternal, [int(pcuname), cohash], {}, 
55                                                                                  None, scanpcu.record, handle_exception)
56                         tp.putRequest(req)
57                 else:
58                         # We just skip it, since it's "up to date"
59                         count += 1
60                         print "%d %s %s" % (count, pcu_id, node_round)
61
62         # WAIT while all the work requests are processed.
63         begin = time.time()
64         while 1:
65                 try:
66                         time.sleep(1)
67                         tp.poll()
68                         # if more than two hours
69                         if time.time() - begin > (60*60*1):
70                                 print "findbadpcus.py has run out of time!!!!!!"
71                                 os._exit(1)
72                 except KeyboardInterrupt:
73                         print "Interrupted!"
74                         break
75                 except threadpool.NoResultsPending:
76                         print "All results collected."
77                         break
78
79         #print FindbadPCURecordSync.query.count()
80         print FindbadPCURecord.query.count()
81         session.flush()
82
83
84 def main():
85         global global_round
86
87         l_pcus = plccache.l_pcus
88         cohash = {}
89
90         #fbsync = FindbadPCURecordSync.findby_or_create(plc_pcuid=0, 
91                                                                                         #if_new_set={'round' : global_round})
92
93         #global_round = fbsync.round
94         api = plc.getAuthAPI()
95
96         if config.site is not None:
97                 site = plccache.GetSitesByName([config.site])
98                 l_nodes = plccache.GetNodesByIds(site[0]['node_ids'])
99                 pcus = []
100                 for node in l_nodes:
101                         pcus += node['pcu_ids']
102                 # clear out dups.
103                 l_pcus = [pcu for pcu in sets.Set(pcus)]
104
105         elif config.node is not None:
106                 node = plccache.GetNodeByName(config.node)
107                 print node
108                 pcus = node['pcu_ids']
109                 # clear out dups.
110                 l_pcus = [pcu for pcu in sets.Set(pcus)]
111
112         elif config.sitelist:
113                 site_list = config.sitelist.split(',')
114
115                 sites = plccache.GetSitesByName(site_list)
116                 node_ids = []
117                 for s in sites:
118                         node_ids += s['node_ids']
119
120                 l_nodes = plccache.GetNodesByIds(node_ids)
121                 pcus = []
122                 for node in l_nodes:
123                         pcus += node['pcu_ids']
124                 # clear out dups.
125                 l_pcus = [pcu for pcu in sets.Set(pcus)]
126
127         elif config.pcuselect is not None:
128                 n, pcus = pcu_select(config.pcuselect)
129                 print pcus
130                 # clear out dups.
131                 l_pcus = [pcu for pcu in sets.Set(pcus)]
132
133         elif config.nodelist == None and config.pcuid == None:
134                 print "Calling API GetPCUs() : cachecalls(%s)" % config.cachecalls
135                 l_pcus  = [pcu['pcu_id'] for pcu in l_pcus]
136         elif config.nodelist is not None:
137                 l_pcus = util.file.getListFromFile(config.nodelist)
138                 l_pcus = [int(pcu) for pcu in l_pcus]
139         elif config.pcuid is not None:
140                 l_pcus = [ config.pcuid ] 
141                 l_pcus = [int(pcu) for pcu in l_pcus]
142
143         if config.increment:
144                 # update global round number to force refreshes across all nodes
145                 global_round += 1
146
147         checkPCUs(l_pcus, cohash)
148
149         if config.increment:
150                 # update global round number to force refreshes across all nodes
151                 #fbsync.round = global_round
152                 #fbsync.flush()
153                 session.flush()
154
155         return 0
156
157
158 print "main"
159 if __name__ == '__main__':
160         import logging
161         logger = logging.getLogger("monitor")
162         logger.setLevel(logging.DEBUG)
163         fh = logging.FileHandler("monitor.log", mode = 'a')
164         fh.setLevel(logging.DEBUG)
165         formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
166         fh.setFormatter(formatter)
167         logger.addHandler(fh)
168         from monitor import parser as parsermodule
169         parser = parsermodule.getParser()
170         parser.set_defaults(nodelist=None, 
171                                                 increment=False, 
172                                                 pcuid=None,
173                                                 pcuselect=None,
174                                                 site=None,
175                                                 node=None,
176                                                 sitelist=None,
177                                                 dbname="findbadpcus", 
178                                                 cachenodes=False,
179                                                 cachecalls=True,
180                                                 force=False,
181                                                 )
182         parser.add_option("-f", "--nodelist", dest="nodelist", metavar="FILE", 
183                                                 help="Provide the input file for the node list")
184         parser.add_option("", "--node", dest="node", metavar="FILE", 
185                                                 help="Get all pcus associated with the given node")
186         parser.add_option("", "--site", dest="site", metavar="FILE", 
187                                                 help="Get all pcus associated with the given site's nodes")
188         parser.add_option("", "--sitelist", dest="sitelist", metavar="FILE", 
189                                                 help="Get all pcus associated with the given site's nodes")
190         parser.add_option("", "--pcuselect", dest="pcuselect", metavar="FILE", 
191                                                 help="Query string to apply to the findbad pcus")
192         parser.add_option("", "--pcuid", dest="pcuid", metavar="id", 
193                                                 help="Provide the id for a single pcu")
194
195         parser.add_option("", "--cachenodes", action="store_true",
196                                                 help="Cache node lookup from PLC")
197         parser.add_option("", "--dbname", dest="dbname", metavar="FILE", 
198                                                 help="Specify the name of the database to which the information is saved")
199         parser.add_option("", "--nocachecalls", action="store_false", dest="cachecalls",
200                                                 help="Refresh the cached values")
201         parser.add_option("-i", "--increment", action="store_true", dest="increment", 
202                                                 help="Increment round number to force refresh or retry")
203         parser.add_option("", "--force", action="store_true", dest="force", 
204                                                 help="Force probe without incrementing global 'round'.")
205         parser = parsermodule.getParser(['defaults'], parser)
206         config = parsermodule.parse_args(parser)
207         if hasattr(config, 'cachecalls') and not config.cachecalls:
208                 # NOTE: if explicilty asked, refresh cached values.
209                 print "Reloading PLCCache"
210                 plccache.init()
211         try:
212                 # NOTE: evidently, there is a bizarre interaction between iLO and ssh
213                 # when LANG is set... Do not know why.  Unsetting LANG, fixes the problem.
214                 if 'LANG' in os.environ:
215                         del os.environ['LANG']
216                 main()
217                 time.sleep(1)
218         except Exception, err:
219                 traceback.print_exc()
220                 from monitor.common import email_exception
221                 email_exception()
222                 print "Exception: %s" % err
223                 print "Saving data... exitting."
224                 sys.exit(0)