findbadpcu.py

   1 #!/usr/bin/python
   2
   3 import os
   4 import sys
   5 import string
   6 import time
   7 import socket
   8 import sets
   9 import signal
  10 import traceback
  11 from datetime import datetime,timedelta
  12 import threadpool
  13 import threading
  14
  15 import monitor
  16 from monitor import config
  17 from monitor.database.info.model import FindbadPCURecord, session
  18 from monitor import database
  19 from monitor import util
  20 from monitor.wrapper import plc, plccache
  21 from nodequery import pcu_select
  22 from monitor.common import nmap_port_status
  23 from monitor.scanapi import *
  24
  25 plc_lock = threading.Lock()
  26 global_round = 1
  27 errorState = {}
  28 count = 0
  29
  30 # this will be called when an exception occurs within a thread
  31 def handle_exception(request, result):
  32         print "Exception occured in request %s" % request.requestID
  33         for i in result:
  34                 print "Result: %s" % i
  35
  36 def checkPCUs(l_pcus, cohash):
  37         global global_round
  38         global count
  39
  40         tp = threadpool.ThreadPool(10)
  41         scanpcu = ScanPCU(global_round)
  42
  43         # CREATE all the work requests
  44         for pcuname in l_pcus:
  45                 pcu_id = int(pcuname)
  46                 #fbnodesync = FindbadPCURecordSync.findby_or_create(plc_pcuid=pcu_id, if_new_set={'round' : 0})
  47                 #fbnodesync.flush()
  48
  49                 #node_round   = fbnodesync.round
  50                 node_round   = global_round - 1
  51                 if node_round < global_round or config.force:
  52                         # recreate node stats when refreshed
  53                         #print "%s" % nodename
  54                         req = threadpool.WorkRequest(scanpcu.collectInternal, [int(pcuname), cohash], {},
  55                                                                                  None, scanpcu.record, handle_exception)
  56                         tp.putRequest(req)
  57                 else:
  58                         # We just skip it, since it's "up to date"
  59                         count += 1
  60                         print "%d %s %s" % (count, pcu_id, node_round)
  61
  62         # WAIT while all the work requests are processed.
  63         begin = time.time()
  64         while 1:
  65                 try:
  66                         time.sleep(1)
  67                         tp.poll()
  68                         # if more than two hours
  69                         if time.time() - begin > (60*60*1):
  70                                 print "findbadpcus.py has run out of time!!!!!!"
  71                                 os._exit(1)
  72                 except KeyboardInterrupt:
  73                         print "Interrupted!"
  74                         break
  75                 except threadpool.NoResultsPending:
  76                         print "All results collected."
  77                         break
  78
  79         #print FindbadPCURecordSync.query.count()
  80         print FindbadPCURecord.query.count()
  81         session.flush()
  82
  83
  84 def main():
  85         global global_round
  86
  87         l_pcus = plccache.l_pcus
  88         cohash = {}
  89
  90         #fbsync = FindbadPCURecordSync.findby_or_create(plc_pcuid=0,
  91                                                                                         #if_new_set={'round' : global_round})
  92
  93         #global_round = fbsync.round
  94         api = plc.getAuthAPI()
  95
  96         if config.site is not None:
  97                 site = api.GetSites(config.site)
  98                 l_nodes = api.GetNodes(site[0]['node_ids'], ['pcu_ids'])
  99                 pcus = []
 100                 for node in l_nodes:
 101                         pcus += node['pcu_ids']
 102                 # clear out dups.
 103                 l_pcus = [pcu for pcu in sets.Set(pcus)]
 104
 105         elif config.node is not None:
 106                 l_nodes = api.GetNodes(config.node, ['pcu_ids'])
 107                 pcus = []
 108                 for node in l_nodes:
 109                         pcus += node['pcu_ids']
 110                 # clear out dups.
 111                 l_pcus = [pcu for pcu in sets.Set(pcus)]
 112
 113         elif config.sitelist:
 114                 site_list = config.sitelist.split(',')
 115
 116                 sites = api.GetSites(site_list)
 117                 node_ids = []
 118                 for s in sites:
 119                         node_ids += s['node_ids']
 120
 121                 l_nodes = api.GetNodes(node_ids, ['pcu_ids'])
 122                 pcus = []
 123                 for node in l_nodes:
 124                         pcus += node['pcu_ids']
 125                 # clear out dups.
 126                 l_pcus = [pcu for pcu in sets.Set(pcus)]
 127
 128         elif config.pcuselect is not None:
 129                 n, pcus = pcu_select(config.pcuselect)
 130                 print pcus
 131                 # clear out dups.
 132                 l_pcus = [pcu for pcu in sets.Set(pcus)]
 133
 134         elif config.nodelist == None and config.pcuid == None:
 135                 print "Calling API GetPCUs() : cachecalls(%s)" % config.cachecalls
 136                 l_pcus  = [pcu['pcu_id'] for pcu in l_pcus]
 137         elif config.nodelist is not None:
 138                 l_pcus = util.file.getListFromFile(config.nodelist)
 139                 l_pcus = [int(pcu) for pcu in l_pcus]
 140         elif config.pcuid is not None:
 141                 l_pcus = [ config.pcuid ]
 142                 l_pcus = [int(pcu) for pcu in l_pcus]
 143
 144         if config.increment:
 145                 # update global round number to force refreshes across all nodes
 146                 global_round += 1
 147
 148         checkPCUs(l_pcus, cohash)
 149
 150         if config.increment:
 151                 # update global round number to force refreshes across all nodes
 152                 #fbsync.round = global_round
 153                 #fbsync.flush()
 154                 session.flush()
 155
 156         return 0
 157
 158
 159 print "main"
 160 if __name__ == '__main__':
 161         import logging
 162         logger = logging.getLogger("monitor")
 163         logger.setLevel(logging.DEBUG)
 164         fh = logging.FileHandler("monitor.log", mode = 'a')
 165         fh.setLevel(logging.DEBUG)
 166         formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
 167         fh.setFormatter(formatter)
 168         logger.addHandler(fh)
 169         from monitor import parser as parsermodule
 170         parser = parsermodule.getParser()
 171         parser.set_defaults(nodelist=None,
 172                                                 increment=False,
 173                                                 pcuid=None,
 174                                                 pcuselect=None,
 175                                                 site=None,
 176                                                 node=None,
 177                                                 sitelist=None,
 178                                                 dbname="findbadpcus",
 179                                                 cachenodes=False,
 180                                                 cachecalls=True,
 181                                                 force=False,
 182                                                 )
 183         parser.add_option("-f", "--nodelist", dest="nodelist", metavar="FILE",
 184                                                 help="Provide the input file for the node list")
 185         parser.add_option("", "--node", dest="node", metavar="FILE",
 186                                                 help="Get all pcus associated with the given node")
 187         parser.add_option("", "--site", dest="site", metavar="FILE",
 188                                                 help="Get all pcus associated with the given site's nodes")
 189         parser.add_option("", "--sitelist", dest="sitelist", metavar="FILE",
 190                                                 help="Get all pcus associated with the given site's nodes")
 191         parser.add_option("", "--pcuselect", dest="pcuselect", metavar="FILE",
 192                                                 help="Query string to apply to the findbad pcus")
 193         parser.add_option("", "--pcuid", dest="pcuid", metavar="id",
 194                                                 help="Provide the id for a single pcu")
 195
 196         parser.add_option("", "--cachenodes", action="store_true",
 197                                                 help="Cache node lookup from PLC")
 198         parser.add_option("", "--dbname", dest="dbname", metavar="FILE",
 199                                                 help="Specify the name of the database to which the information is saved")
 200         parser.add_option("", "--nocachecalls", action="store_false", dest="cachecalls",
 201                                                 help="Refresh the cached values")
 202         parser.add_option("-i", "--increment", action="store_true", dest="increment",
 203                                                 help="Increment round number to force refresh or retry")
 204         parser.add_option("", "--force", action="store_true", dest="force",
 205                                                 help="Force probe without incrementing global 'round'.")
 206         parser = parsermodule.getParser(['defaults'], parser)
 207         config = parsermodule.parse_args(parser)
 208         if hasattr(config, 'cachecalls') and not config.cachecalls:
 209                 # NOTE: if explicilty asked, refresh cached values.
 210                 print "Reloading PLCCache"
 211                 plccache.init()
 212         try:
 213                 # NOTE: evidently, there is a bizarre interaction between iLO and ssh
 214                 # when LANG is set... Do not know why.  Unsetting LANG, fixes the problem.
 215                 if 'LANG' in os.environ:
 216                         del os.environ['LANG']
 217                 main()
 218                 time.sleep(1)
 219         except Exception, err:
 220                 traceback.print_exc()
 221                 from monitor.common import email_exception
 222                 email_exception()
 223                 print "Exception: %s" % err
 224                 print "Saving data... exitting."
 225                 sys.exit(0)