added sitelist option for find* scripts.
[monitor.git] / findbadpcu.py
1 #!/usr/bin/python
2
3 import os
4 import sys
5 import string
6 import time
7 import socket
8 import sets
9 import signal
10 import traceback
11 from datetime import datetime,timedelta
12 import threadpool
13 import threading
14
15 import monitor
16 from pcucontrol  import reboot
17 from monitor import config
18 from monitor.database.info.model import FindbadPCURecordSync, FindbadPCURecord, session
19 from monitor import database
20 from monitor import util 
21 from monitor.wrapper import plc, plccache
22 from nodequery import pcu_select
23 from monitor.common import nmap_port_status
24 from monitor.scanapi import *
25
26 plc_lock = threading.Lock()
27 global_round = 1
28 errorState = {}
29 count = 0
30
31 # this will be called when an exception occurs within a thread
32 def handle_exception(request, result):
33         print "Exception occured in request %s" % request.requestID
34         for i in result:
35                 print "Result: %s" % i
36
37 def checkPCUs(l_pcus, cohash):
38         global global_round
39         global count
40
41         tp = threadpool.ThreadPool(10)
42         scanpcu = ScanPCU(global_round)
43
44         # CREATE all the work requests
45         for pcuname in l_pcus:
46                 pcu_id = int(pcuname)
47                 fbnodesync = FindbadPCURecordSync.findby_or_create(plc_pcuid=pcu_id, if_new_set={'round' : 0})
48                 fbnodesync.flush()
49
50                 node_round   = fbnodesync.round
51                 if node_round < global_round or config.force:
52                         # recreate node stats when refreshed
53                         #print "%s" % nodename
54                         req = threadpool.WorkRequest(scanpcu.collectInternal, [int(pcuname), cohash], {}, 
55                                                                                  None, scanpcu.record, handle_exception)
56                         tp.putRequest(req)
57                 else:
58                         # We just skip it, since it's "up to date"
59                         count += 1
60                         print "%d %s %s" % (count, pcu_id, node_round)
61
62         # WAIT while all the work requests are processed.
63         begin = time.time()
64         while 1:
65                 try:
66                         time.sleep(1)
67                         tp.poll()
68                         # if more than two hours
69                         if time.time() - begin > (60*60*1):
70                                 print "findbadpcus.py has run out of time!!!!!!"
71                                 os._exit(1)
72                 except KeyboardInterrupt:
73                         print "Interrupted!"
74                         break
75                 except threadpool.NoResultsPending:
76                         print "All results collected."
77                         break
78
79         print FindbadPCURecordSync.query.count()
80         print FindbadPCURecord.query.count()
81         session.flush()
82
83
84 def main():
85         global global_round
86
87         l_pcus = plccache.l_pcus
88         cohash = {}
89
90         fbsync = FindbadPCURecordSync.findby_or_create(plc_pcuid=0, 
91                                                                                         if_new_set={'round' : global_round})
92
93         global_round = fbsync.round
94         api = plc.getAuthAPI()
95
96         if config.site is not None:
97                 site = api.GetSites(config.site)
98                 l_nodes = api.GetNodes(site[0]['node_ids'], ['pcu_ids'])
99                 pcus = []
100                 for node in l_nodes:
101                         pcus += node['pcu_ids']
102                 # clear out dups.
103                 l_pcus = [pcu for pcu in sets.Set(pcus)]
104         elif config.sitelist:
105                 site_list = config.sitelist.split(',')
106
107                 sites = api.GetSites(site_list)
108                 node_ids = []
109                 for s in sites:
110                         node_ids += s['node_ids']
111
112                 l_nodes = api.GetNodes(node_ids, ['pcu_ids'])
113                 pcus = []
114                 for node in l_nodes:
115                         pcus += node['pcu_ids']
116                 # clear out dups.
117                 l_pcus = [pcu for pcu in sets.Set(pcus)]
118
119         elif config.pcuselect is not None:
120                 n, pcus = pcu_select(config.pcuselect)
121                 print pcus
122                 # clear out dups.
123                 l_pcus = [pcu for pcu in sets.Set(pcus)]
124
125         elif config.nodelist == None and config.pcuid == None:
126                 print "Calling API GetPCUs() : cachecalls(%s)" % config.cachecalls
127                 l_pcus  = [pcu['pcu_id'] for pcu in l_pcus]
128         elif config.nodelist is not None:
129                 l_pcus = util.file.getListFromFile(config.nodelist)
130                 l_pcus = [int(pcu) for pcu in l_pcus]
131         elif config.pcuid is not None:
132                 l_pcus = [ config.pcuid ] 
133                 l_pcus = [int(pcu) for pcu in l_pcus]
134
135         if config.increment:
136                 # update global round number to force refreshes across all nodes
137                 global_round += 1
138
139         checkPCUs(l_pcus, cohash)
140
141         if config.increment:
142                 # update global round number to force refreshes across all nodes
143                 fbsync.round = global_round
144                 fbsync.flush()
145                 session.flush()
146
147         return 0
148
149
150 print "main"
151 if __name__ == '__main__':
152         import logging
153         logger = logging.getLogger("monitor")
154         logger.setLevel(logging.DEBUG)
155         fh = logging.FileHandler("monitor.log", mode = 'a')
156         fh.setLevel(logging.DEBUG)
157         formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
158         fh.setFormatter(formatter)
159         logger.addHandler(fh)
160         from monitor import parser as parsermodule
161         parser = parsermodule.getParser()
162         parser.set_defaults(nodelist=None, 
163                                                 increment=False, 
164                                                 pcuid=None,
165                                                 pcuselect=None,
166                                                 site=None,
167                                                 dbname="findbadpcus", 
168                                                 cachenodes=False,
169                                                 cachecalls=True,
170                                                 force=False,
171                                                 )
172         parser.add_option("-f", "--nodelist", dest="nodelist", metavar="FILE", 
173                                                 help="Provide the input file for the node list")
174         parser.add_option("", "--site", dest="site", metavar="FILE", 
175                                                 help="Get all pcus associated with the given site's nodes")
176         parser.add_option("", "--pcuselect", dest="pcuselect", metavar="FILE", 
177                                                 help="Query string to apply to the findbad pcus")
178         parser.add_option("", "--pcuid", dest="pcuid", metavar="id", 
179                                                 help="Provide the id for a single pcu")
180
181         parser.add_option("", "--cachenodes", action="store_true",
182                                                 help="Cache node lookup from PLC")
183         parser.add_option("", "--dbname", dest="dbname", metavar="FILE", 
184                                                 help="Specify the name of the database to which the information is saved")
185         parser.add_option("", "--nocachecalls", action="store_false", dest="cachecalls",
186                                                 help="Refresh the cached values")
187         parser.add_option("-i", "--increment", action="store_true", dest="increment", 
188                                                 help="Increment round number to force refresh or retry")
189         parser.add_option("", "--force", action="store_true", dest="force", 
190                                                 help="Force probe without incrementing global 'round'.")
191         parser = parsermodule.getParser(['defaults'], parser)
192         config = parsermodule.parse_args(parser)
193         if hasattr(config, 'cachecalls') and not config.cachecalls:
194                 # NOTE: if explicilty asked, refresh cached values.
195                 print "Reloading PLCCache"
196                 plccache.init()
197         try:
198                 # NOTE: evidently, there is a bizarre interaction between iLO and ssh
199                 # when LANG is set... Do not know why.  Unsetting LANG, fixes the problem.
200                 if 'LANG' in os.environ:
201                         del os.environ['LANG']
202                 main()
203                 time.sleep(1)
204         except Exception, err:
205                 traceback.print_exc()
206                 print "Exception: %s" % err
207                 print "Saving data... exitting."
208                 sys.exit(0)