commands/findbadpcu.py

   1 #!/usr/bin/python
   2
   3 import os
   4 import sys
   5 import string
   6 import time
   7 import socket
   8 import sets
   9 import signal
  10 import traceback
  11 from datetime import datetime,timedelta
  12 import threadpool
  13 import threading
  14
  15 import monitor
  16 from monitor import config
  17 from monitor.database.info.model import FindbadPCURecord, session
  18 from monitor import database
  19 from monitor import util
  20 from monitor.wrapper import plc, plccache
  21 from nodequery import pcu_select
  22 from monitor.common import nmap_port_status
  23 from monitor.scanapi import *
  24
  25 plc_lock = threading.Lock()
  26 global_round = 1
  27 errorState = {}
  28 count = 0
  29
  30 # this will be called when an exception occurs within a thread
  31 def handle_exception(request, result):
  32         print "Exception occured in request %s" % request.requestID
  33         for i in result:
  34                 print "Result: %s" % i
  35
  36 def checkPCUs(l_pcus, cohash):
  37         global global_round
  38         global count
  39
  40         tp = threadpool.ThreadPool(10)
  41         scanpcu = ScanPCU(global_round)
  42
  43         # CREATE all the work requests
  44         for pcuname in l_pcus:
  45                 pcu_id = int(pcuname)
  46                 #fbnodesync = FindbadPCURecordSync.findby_or_create(plc_pcuid=pcu_id, if_new_set={'round' : 0})
  47                 #fbnodesync.flush()
  48
  49                 #node_round   = fbnodesync.round
  50                 node_round   = global_round - 1
  51                 if node_round < global_round or config.force:
  52                         # recreate node stats when refreshed
  53                         #print "%s" % nodename
  54                         req = threadpool.WorkRequest(scanpcu.collectInternal, [int(pcuname), cohash], {},
  55                                                                                  None, scanpcu.record, handle_exception)
  56                         tp.putRequest(req)
  57                 else:
  58                         # We just skip it, since it's "up to date"
  59                         count += 1
  60                         print "%d %s %s" % (count, pcu_id, node_round)
  61
  62         # WAIT while all the work requests are processed.
  63         begin = time.time()
  64         while 1:
  65                 try:
  66                         time.sleep(1)
  67                         tp.poll()
  68                         # if more than two hours
  69                         if time.time() - begin > (60*60*1):
  70                                 print "findbadpcus.py has run out of time!!!!!!"
  71                                 os._exit(1)
  72                 except KeyboardInterrupt:
  73                         print "Interrupted!"
  74                         break
  75                 except threadpool.NoResultsPending:
  76                         print "All results collected."
  77                         break
  78
  79         #print FindbadPCURecordSync.query.count()
  80         print FindbadPCURecord.query.count()
  81         session.flush()
  82
  83
  84 def main():
  85         global global_round
  86
  87         l_pcus = plccache.l_pcus
  88         cohash = {}
  89
  90         #fbsync = FindbadPCURecordSync.findby_or_create(plc_pcuid=0,
  91                                                                                         #if_new_set={'round' : global_round})
  92
  93         #global_round = fbsync.round
  94         api = plc.getAuthAPI()
  95
  96         if config.site is not None:
  97                 site = plccache.GetSitesByName([config.site])
  98                 l_nodes = plccache.GetNodesByIds(site[0]['node_ids'])
  99                 pcus = []
 100                 for node in l_nodes:
 101                         pcus += node['pcu_ids']
 102                 # clear out dups.
 103                 l_pcus = [pcu for pcu in sets.Set(pcus)]
 104
 105         elif config.node is not None:
 106                 node = plccache.GetNodeByName(config.node)
 107                 print node
 108                 pcus = node['pcu_ids']
 109                 # clear out dups.
 110                 l_pcus = [pcu for pcu in sets.Set(pcus)]
 111
 112         elif config.sitelist:
 113                 site_list = config.sitelist.split(',')
 114
 115                 sites = plccache.GetSitesByName(site_list)
 116                 node_ids = []
 117                 for s in sites:
 118                         node_ids += s['node_ids']
 119
 120                 l_nodes = plccache.GetNodeByIds(node_ids)
 121                 pcus = []
 122                 for node in l_nodes:
 123                         pcus += node['pcu_ids']
 124                 # clear out dups.
 125                 l_pcus = [pcu for pcu in sets.Set(pcus)]
 126
 127         elif config.pcuselect is not None:
 128                 n, pcus = pcu_select(config.pcuselect)
 129                 print pcus
 130                 # clear out dups.
 131                 l_pcus = [pcu for pcu in sets.Set(pcus)]
 132
 133         elif config.nodelist == None and config.pcuid == None:
 134                 print "Calling API GetPCUs() : cachecalls(%s)" % config.cachecalls
 135                 l_pcus  = [pcu['pcu_id'] for pcu in l_pcus]
 136         elif config.nodelist is not None:
 137                 l_pcus = util.file.getListFromFile(config.nodelist)
 138                 l_pcus = [int(pcu) for pcu in l_pcus]
 139         elif config.pcuid is not None:
 140                 l_pcus = [ config.pcuid ]
 141                 l_pcus = [int(pcu) for pcu in l_pcus]
 142
 143         if config.increment:
 144                 # update global round number to force refreshes across all nodes
 145                 global_round += 1
 146
 147         checkPCUs(l_pcus, cohash)
 148
 149         if config.increment:
 150                 # update global round number to force refreshes across all nodes
 151                 #fbsync.round = global_round
 152                 #fbsync.flush()
 153                 session.flush()
 154
 155         return 0
 156
 157
 158 print "main"
 159 if __name__ == '__main__':
 160         import logging
 161         logger = logging.getLogger("monitor")
 162         logger.setLevel(logging.DEBUG)
 163         fh = logging.FileHandler("monitor.log", mode = 'a')
 164         fh.setLevel(logging.DEBUG)
 165         formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
 166         fh.setFormatter(formatter)
 167         logger.addHandler(fh)
 168         from monitor import parser as parsermodule
 169         parser = parsermodule.getParser()
 170         parser.set_defaults(nodelist=None,
 171                                                 increment=False,
 172                                                 pcuid=None,
 173                                                 pcuselect=None,
 174                                                 site=None,
 175                                                 node=None,
 176                                                 sitelist=None,
 177                                                 dbname="findbadpcus",
 178                                                 cachenodes=False,
 179                                                 cachecalls=True,
 180                                                 force=False,
 181                                                 )
 182         parser.add_option("-f", "--nodelist", dest="nodelist", metavar="FILE",
 183                                                 help="Provide the input file for the node list")
 184         parser.add_option("", "--node", dest="node", metavar="FILE",
 185                                                 help="Get all pcus associated with the given node")
 186         parser.add_option("", "--site", dest="site", metavar="FILE",
 187                                                 help="Get all pcus associated with the given site's nodes")
 188         parser.add_option("", "--sitelist", dest="sitelist", metavar="FILE",
 189                                                 help="Get all pcus associated with the given site's nodes")
 190         parser.add_option("", "--pcuselect", dest="pcuselect", metavar="FILE",
 191                                                 help="Query string to apply to the findbad pcus")
 192         parser.add_option("", "--pcuid", dest="pcuid", metavar="id",
 193                                                 help="Provide the id for a single pcu")
 194
 195         parser.add_option("", "--cachenodes", action="store_true",
 196                                                 help="Cache node lookup from PLC")
 197         parser.add_option("", "--dbname", dest="dbname", metavar="FILE",
 198                                                 help="Specify the name of the database to which the information is saved")
 199         parser.add_option("", "--nocachecalls", action="store_false", dest="cachecalls",
 200                                                 help="Refresh the cached values")
 201         parser.add_option("-i", "--increment", action="store_true", dest="increment",
 202                                                 help="Increment round number to force refresh or retry")
 203         parser.add_option("", "--force", action="store_true", dest="force",
 204                                                 help="Force probe without incrementing global 'round'.")
 205         parser = parsermodule.getParser(['defaults'], parser)
 206         config = parsermodule.parse_args(parser)
 207         if hasattr(config, 'cachecalls') and not config.cachecalls:
 208                 # NOTE: if explicilty asked, refresh cached values.
 209                 print "Reloading PLCCache"
 210                 plccache.init()
 211         try:
 212                 # NOTE: evidently, there is a bizarre interaction between iLO and ssh
 213                 # when LANG is set... Do not know why.  Unsetting LANG, fixes the problem.
 214                 if 'LANG' in os.environ:
 215                         del os.environ['LANG']
 216                 main()
 217                 time.sleep(1)
 218         except Exception, err:
 219                 traceback.print_exc()
 220                 from monitor.common import email_exception
 221                 email_exception()
 222                 print "Exception: %s" % err
 223                 print "Saving data... exitting."
 224                 sys.exit(0)