findbadpcu.py

   1 #!/usr/bin/python
   2
   3 import os
   4 import sys
   5 import string
   6 import time
   7 import socket
   8 import sets
   9 import signal
  10 import traceback
  11 from datetime import datetime,timedelta
  12 import threadpool
  13 import threading
  14
  15 import monitor
  16 from pcucontrol  import reboot
  17 from monitor import config
  18 from monitor.database.info.model import FindbadPCURecordSync, FindbadPCURecord, session
  19 from monitor import database
  20 from monitor import util
  21 from monitor.wrapper import plc, plccache
  22 from nodequery import pcu_select
  23 from monitor.common import nmap_port_status
  24 from monitor.scanapi import *
  25
  26 plc_lock = threading.Lock()
  27 global_round = 1
  28 errorState = {}
  29 count = 0
  30
  31 # this will be called when an exception occurs within a thread
  32 def handle_exception(request, result):
  33         print "Exception occured in request %s" % request.requestID
  34         for i in result:
  35                 print "Result: %s" % i
  36
  37 def checkPCUs(l_pcus, cohash):
  38         global global_round
  39         global count
  40
  41         tp = threadpool.ThreadPool(10)
  42         scanpcu = ScanPCU(global_round)
  43
  44         # CREATE all the work requests
  45         for pcuname in l_pcus:
  46                 pcu_id = int(pcuname)
  47                 fbnodesync = FindbadPCURecordSync.findby_or_create(plc_pcuid=pcu_id, if_new_set={'round' : 0})
  48                 fbnodesync.flush()
  49
  50                 node_round   = fbnodesync.round
  51                 if node_round < global_round or config.force:
  52                         # recreate node stats when refreshed
  53                         #print "%s" % nodename
  54                         req = threadpool.WorkRequest(scanpcu.collectInternal, [int(pcuname), cohash], {},
  55                                                                                  None, scanpcu.record, handle_exception)
  56                         tp.putRequest(req)
  57                 else:
  58                         # We just skip it, since it's "up to date"
  59                         count += 1
  60                         print "%d %s %s" % (count, pcu_id, node_round)
  61
  62         # WAIT while all the work requests are processed.
  63         begin = time.time()
  64         while 1:
  65                 try:
  66                         time.sleep(1)
  67                         tp.poll()
  68                         # if more than two hours
  69                         if time.time() - begin > (60*60*1):
  70                                 print "findbadpcus.py has run out of time!!!!!!"
  71                                 os._exit(1)
  72                 except KeyboardInterrupt:
  73                         print "Interrupted!"
  74                         break
  75                 except threadpool.NoResultsPending:
  76                         print "All results collected."
  77                         break
  78
  79         print FindbadPCURecordSync.query.count()
  80         print FindbadPCURecord.query.count()
  81         session.flush()
  82
  83
  84 def main():
  85         global global_round
  86
  87         l_pcus = plccache.l_pcus
  88         cohash = {}
  89
  90         fbsync = FindbadPCURecordSync.findby_or_create(plc_pcuid=0,
  91                                                                                         if_new_set={'round' : global_round})
  92
  93         global_round = fbsync.round
  94         api = plc.getAuthAPI()
  95
  96         if config.site is not None:
  97                 site = api.GetSites(config.site)
  98                 l_nodes = api.GetNodes(site[0]['node_ids'], ['pcu_ids'])
  99                 pcus = []
 100                 for node in l_nodes:
 101                         pcus += node['pcu_ids']
 102                 # clear out dups.
 103                 l_pcus = [pcu for pcu in sets.Set(pcus)]
 104         elif config.sitelist:
 105                 site_list = config.sitelist.split(',')
 106
 107                 sites = api.GetSites(site_list)
 108                 node_ids = []
 109                 for s in sites:
 110                         node_ids += s['node_ids']
 111
 112                 l_nodes = api.GetNodes(node_ids, ['pcu_ids'])
 113                 pcus = []
 114                 for node in l_nodes:
 115                         pcus += node['pcu_ids']
 116                 # clear out dups.
 117                 l_pcus = [pcu for pcu in sets.Set(pcus)]
 118
 119         elif config.pcuselect is not None:
 120                 n, pcus = pcu_select(config.pcuselect)
 121                 print pcus
 122                 # clear out dups.
 123                 l_pcus = [pcu for pcu in sets.Set(pcus)]
 124
 125         elif config.nodelist == None and config.pcuid == None:
 126                 print "Calling API GetPCUs() : cachecalls(%s)" % config.cachecalls
 127                 l_pcus  = [pcu['pcu_id'] for pcu in l_pcus]
 128         elif config.nodelist is not None:
 129                 l_pcus = util.file.getListFromFile(config.nodelist)
 130                 l_pcus = [int(pcu) for pcu in l_pcus]
 131         elif config.pcuid is not None:
 132                 l_pcus = [ config.pcuid ]
 133                 l_pcus = [int(pcu) for pcu in l_pcus]
 134
 135         if config.increment:
 136                 # update global round number to force refreshes across all nodes
 137                 global_round += 1
 138
 139         checkPCUs(l_pcus, cohash)
 140
 141         if config.increment:
 142                 # update global round number to force refreshes across all nodes
 143                 fbsync.round = global_round
 144                 fbsync.flush()
 145                 session.flush()
 146
 147         return 0
 148
 149
 150 print "main"
 151 if __name__ == '__main__':
 152         import logging
 153         logger = logging.getLogger("monitor")
 154         logger.setLevel(logging.DEBUG)
 155         fh = logging.FileHandler("monitor.log", mode = 'a')
 156         fh.setLevel(logging.DEBUG)
 157         formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
 158         fh.setFormatter(formatter)
 159         logger.addHandler(fh)
 160         from monitor import parser as parsermodule
 161         parser = parsermodule.getParser()
 162         parser.set_defaults(nodelist=None,
 163                                                 increment=False,
 164                                                 pcuid=None,
 165                                                 pcuselect=None,
 166                                                 site=None,
 167                                                 dbname="findbadpcus",
 168                                                 cachenodes=False,
 169                                                 cachecalls=True,
 170                                                 force=False,
 171                                                 )
 172         parser.add_option("-f", "--nodelist", dest="nodelist", metavar="FILE",
 173                                                 help="Provide the input file for the node list")
 174         parser.add_option("", "--site", dest="site", metavar="FILE",
 175                                                 help="Get all pcus associated with the given site's nodes")
 176         parser.add_option("", "--pcuselect", dest="pcuselect", metavar="FILE",
 177                                                 help="Query string to apply to the findbad pcus")
 178         parser.add_option("", "--pcuid", dest="pcuid", metavar="id",
 179                                                 help="Provide the id for a single pcu")
 180
 181         parser.add_option("", "--cachenodes", action="store_true",
 182                                                 help="Cache node lookup from PLC")
 183         parser.add_option("", "--dbname", dest="dbname", metavar="FILE",
 184                                                 help="Specify the name of the database to which the information is saved")
 185         parser.add_option("", "--nocachecalls", action="store_false", dest="cachecalls",
 186                                                 help="Refresh the cached values")
 187         parser.add_option("-i", "--increment", action="store_true", dest="increment",
 188                                                 help="Increment round number to force refresh or retry")
 189         parser.add_option("", "--force", action="store_true", dest="force",
 190                                                 help="Force probe without incrementing global 'round'.")
 191         parser = parsermodule.getParser(['defaults'], parser)
 192         config = parsermodule.parse_args(parser)
 193         if hasattr(config, 'cachecalls') and not config.cachecalls:
 194                 # NOTE: if explicilty asked, refresh cached values.
 195                 print "Reloading PLCCache"
 196                 plccache.init()
 197         try:
 198                 # NOTE: evidently, there is a bizarre interaction between iLO and ssh
 199                 # when LANG is set... Do not know why.  Unsetting LANG, fixes the problem.
 200                 if 'LANG' in os.environ:
 201                         del os.environ['LANG']
 202                 main()
 203                 time.sleep(1)
 204         except Exception, err:
 205                 traceback.print_exc()
 206                 print "Exception: %s" % err
 207                 print "Saving data... exitting."
 208                 sys.exit(0)