modified findbad and findbadpcu to use scanapi. need to combine these files.
[monitor.git] / findbadpcu.py
1 #!/usr/bin/python
2
3 import os
4 import sys
5 import string
6 import time
7 import socket
8 import sets
9 import signal
10 import traceback
11 from datetime import datetime,timedelta
12 import threadpool
13 import threading
14
15 import monitor
16 from pcucontrol  import reboot
17 from monitor import config
18 from monitor.database.info.model import FindbadPCURecordSync, FindbadPCURecord, session
19 from monitor import database
20 from monitor import util 
21 from monitor.wrapper import plc, plccache
22 from nodequery import pcu_select
23 from nodecommon import nmap_port_status
24 from monitor.scanapi import *
25
26 plc_lock = threading.Lock()
27 global_round = 1
28 errorState = {}
29 count = 0
30
31 # this will be called when an exception occurs within a thread
32 def handle_exception(request, result):
33         print "Exception occured in request %s" % request.requestID
34         for i in result:
35                 print "Result: %s" % i
36
37 def checkPCUs(l_pcus, cohash):
38         global global_round
39         global count
40
41         tp = threadpool.ThreadPool(10)
42         scanpcu = ScanPCU(global_round)
43
44         # CREATE all the work requests
45         for pcuname in l_pcus:
46                 pcu_id = int(pcuname)
47                 fbnodesync = FindbadPCURecordSync.findby_or_create(plc_pcuid=pcu_id, if_new_set={'round' : 0})
48                 fbnodesync.flush()
49
50                 node_round   = fbnodesync.round
51                 if node_round < global_round or config.force:
52                         # recreate node stats when refreshed
53                         #print "%s" % nodename
54                         req = threadpool.WorkRequest(scanpcu.collectInternal, [int(pcuname), cohash], {}, 
55                                                                                  None, scanpcu.record, handle_exception)
56                         tp.putRequest(req)
57                 else:
58                         # We just skip it, since it's "up to date"
59                         count += 1
60                         print "%d %s %s" % (count, pcu_id, node_round)
61
62         # WAIT while all the work requests are processed.
63         begin = time.time()
64         while 1:
65                 try:
66                         time.sleep(1)
67                         tp.poll()
68                         # if more than two hours
69                         if time.time() - begin > (60*60*1):
70                                 print "findbadpcus.py has run out of time!!!!!!"
71                                 os._exit(1)
72                 except KeyboardInterrupt:
73                         print "Interrupted!"
74                         break
75                 except threadpool.NoResultsPending:
76                         print "All results collected."
77                         break
78
79         print FindbadPCURecordSync.query.count()
80         print FindbadPCURecord.query.count()
81         session.flush()
82
83
84 def main():
85         global global_round
86
87         l_pcus = plccache.l_pcus
88         cohash = {}
89
90         fbsync = FindbadPCURecordSync.findby_or_create(plc_pcuid=0, 
91                                                                                         if_new_set={'round' : global_round})
92
93         global_round = fbsync.round
94
95         if config.site is not None:
96                 api = plc.getAuthAPI()
97                 site = api.GetSites(config.site)
98                 l_nodes = api.GetNodes(site[0]['node_ids'], ['pcu_ids'])
99                 pcus = []
100                 for node in l_nodes:
101                         pcus += node['pcu_ids']
102                 # clear out dups.
103                 l_pcus = [pcu for pcu in sets.Set(pcus)]
104         elif config.pcuselect is not None:
105                 n, pcus = pcu_select(config.pcuselect)
106                 print pcus
107                 # clear out dups.
108                 l_pcus = [pcu for pcu in sets.Set(pcus)]
109
110         elif config.nodelist == None and config.pcuid == None:
111                 print "Calling API GetPCUs() : cachecalls(%s)" % config.cachecalls
112                 l_pcus  = [pcu['pcu_id'] for pcu in l_pcus]
113         elif config.nodelist is not None:
114                 l_pcus = util.file.getListFromFile(config.nodelist)
115                 l_pcus = [int(pcu) for pcu in l_pcus]
116         elif config.pcuid is not None:
117                 l_pcus = [ config.pcuid ] 
118                 l_pcus = [int(pcu) for pcu in l_pcus]
119
120         if config.increment:
121                 # update global round number to force refreshes across all nodes
122                 global_round += 1
123
124         checkPCUs(l_pcus, cohash)
125
126         if config.increment:
127                 # update global round number to force refreshes across all nodes
128                 fbsync.round = global_round
129                 fbsync.flush()
130                 session.flush()
131
132         return 0
133
134
135 print "main"
136 if __name__ == '__main__':
137         import logging
138         logger = logging.getLogger("monitor")
139         logger.setLevel(logging.DEBUG)
140         fh = logging.FileHandler("monitor.log", mode = 'a')
141         fh.setLevel(logging.DEBUG)
142         formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
143         fh.setFormatter(formatter)
144         logger.addHandler(fh)
145         from monitor import parser as parsermodule
146         parser = parsermodule.getParser()
147         parser.set_defaults(nodelist=None, 
148                                                 increment=False, 
149                                                 pcuid=None,
150                                                 pcuselect=None,
151                                                 site=None,
152                                                 dbname="findbadpcus", 
153                                                 cachenodes=False,
154                                                 cachecalls=True,
155                                                 force=False,
156                                                 )
157         parser.add_option("-f", "--nodelist", dest="nodelist", metavar="FILE", 
158                                                 help="Provide the input file for the node list")
159         parser.add_option("", "--site", dest="site", metavar="FILE", 
160                                                 help="Get all pcus associated with the given site's nodes")
161         parser.add_option("", "--pcuselect", dest="pcuselect", metavar="FILE", 
162                                                 help="Query string to apply to the findbad pcus")
163         parser.add_option("", "--pcuid", dest="pcuid", metavar="id", 
164                                                 help="Provide the id for a single pcu")
165
166         parser.add_option("", "--cachenodes", action="store_true",
167                                                 help="Cache node lookup from PLC")
168         parser.add_option("", "--dbname", dest="dbname", metavar="FILE", 
169                                                 help="Specify the name of the database to which the information is saved")
170         parser.add_option("", "--nocachecalls", action="store_false", dest="cachecalls",
171                                                 help="Refresh the cached values")
172         parser.add_option("-i", "--increment", action="store_true", dest="increment", 
173                                                 help="Increment round number to force refresh or retry")
174         parser.add_option("", "--force", action="store_true", dest="force", 
175                                                 help="Force probe without incrementing global 'round'.")
176         parser = parsermodule.getParser(['defaults'], parser)
177         config = parsermodule.parse_args(parser)
178         if hasattr(config, 'cachecalls') and not config.cachecalls:
179                 # NOTE: if explicilty asked, refresh cached values.
180                 print "Reloading PLCCache"
181                 plccache.init()
182         try:
183                 # NOTE: evidently, there is a bizarre interaction between iLO and ssh
184                 # when LANG is set... Do not know why.  Unsetting LANG, fixes the problem.
185                 if 'LANG' in os.environ:
186                         del os.environ['LANG']
187                 main()
188                 time.sleep(1)
189         except Exception, err:
190                 traceback.print_exc()
191                 print "Exception: %s" % err
192                 print "Saving data... exitting."
193                 sys.exit(0)