X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=findbadpcu.py;h=114c48bc49bc6a9ef355a1a7a701b60cf14749e2;hb=c9b0045bba8ab66adf5036f9cac7f37f476b9a69;hp=5f542358e63adcd1165e8ab43a12e4ce11730bfd;hpb=6496f5b4a0220e4055fee76c97f92293f9559117;p=monitor.git diff --git a/findbadpcu.py b/findbadpcu.py index 5f54235..114c48b 100755 --- a/findbadpcu.py +++ b/findbadpcu.py @@ -5,10 +5,14 @@ import sys import string import time import socket +import util.file +import plc +import sets import signal import traceback +from nodequery import pcu_select #old_handler = signal.getsignal(signal.SIGCHLD) @@ -81,6 +85,8 @@ def get_pcu(pcuname): l_pcu = i except: traceback.print_exc() + from nodecommon import email_exception + email_exception() l_pcu = None plc_lock.release() @@ -99,6 +105,8 @@ def get_nodes(node_ids): l_node.append(n) except: traceback.print_exc() + from nodecommon import email_exception + email_exception() l_node = None plc_lock.release() @@ -156,6 +164,8 @@ def get_plc_site_values(site_id): break except: traceback.print_exc() + from nodecommon import email_exception + email_exception() values = None plc_lock.release() @@ -194,6 +204,8 @@ def collectPingAndSSH(pcuname, cohash): except: b_except = True traceback.print_exc() + from nodecommon import email_exception + email_exception() continue_probe = False if b_except or not continue_probe: return (None, None, None) @@ -326,7 +338,7 @@ def checkAndRecordState(l_pcus, cohash): global count global_round = externalState['round'] - tp = threadpool.ThreadPool(20) + tp = threadpool.ThreadPool(10) # CREATE all the work requests for pcuname in l_pcus: @@ -380,11 +392,25 @@ def main(): # update global round number to force refreshes across all nodes externalState['round'] += 1 - if config.filename == None and config.pcuid == None: + if config.site is not None: + api = plc.getAuthAPI() + site = api.GetSites(config.site) + l_nodes = api.GetNodes(site[0]['node_ids'], ['pcu_ids']) + pcus = [] + for node in l_nodes: + pcus += node['pcu_ids'] + # clear out dups. + l_pcus = [pcu for pcu in sets.Set(pcus)] + elif config.pcuselect is not None: + n, pcus = pcu_select(config.pcuselect) + # clear out dups. + l_pcus = [pcu for pcu in sets.Set(pcus)] + + elif config.nodelist == None and config.pcuid == None: print "Calling API GetPCUs() : refresh(%s)" % config.refresh l_pcus = [pcu['pcu_id'] for pcu in l_pcus] - elif config.filename is not None: - l_pcus = config.getListFromFile(config.filename) + elif config.nodelist is not None: + l_pcus = util.file.getListFromFile(config.nodelist) l_pcus = [int(pcu) for pcu in l_pcus] elif config.pcuid is not None: l_pcus = [ config.pcuid ] @@ -404,20 +430,26 @@ if __name__ == '__main__': formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') fh.setFormatter(formatter) logger.addHandler(fh) - from config import config - from optparse import OptionParser - parser = OptionParser() - parser.set_defaults(filename=None, + import parser as parsermodule + parser = parsermodule.getParser() + parser.set_defaults(nodelist=None, increment=False, pcuid=None, + pcuselect=None, + site=None, dbname="findbadpcus", cachenodes=False, refresh=False, ) - parser.add_option("-f", "--nodelist", dest="filename", metavar="FILE", + parser.add_option("-f", "--nodelist", dest="nodelist", metavar="FILE", help="Provide the input file for the node list") + parser.add_option("", "--site", dest="site", metavar="FILE", + help="Get all pcus associated with the given site's nodes") + parser.add_option("", "--pcuselect", dest="pcuselect", metavar="FILE", + help="Query string to apply to the findbad pcus") parser.add_option("", "--pcuid", dest="pcuid", metavar="id", help="Provide the id for a single pcu") + parser.add_option("", "--cachenodes", action="store_true", help="Cache node lookup from PLC") parser.add_option("", "--dbname", dest="dbname", metavar="FILE", @@ -426,8 +458,8 @@ if __name__ == '__main__': help="Refresh the cached values") parser.add_option("-i", "--increment", action="store_true", dest="increment", help="Increment round number to force refresh or retry") - config = config(parser) - config.parse_args() + parser = parsermodule.getParser(['defaults'], parser) + config = parsermodule.parse_args(parser) try: # NOTE: evidently, there is a bizarre interaction between iLO and ssh # when LANG is set... Do not know why. Unsetting LANG, fixes the problem. @@ -437,6 +469,8 @@ if __name__ == '__main__': time.sleep(1) except Exception, err: traceback.print_exc() + from nodecommon import email_exception + email_exception() print "Exception: %s" % err print "Saving data... exitting." database.dbDump(config.dbname, externalState)