X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=findbadpcu.py;h=114c48bc49bc6a9ef355a1a7a701b60cf14749e2;hb=refs%2Fheads%2F1.0;hp=e3d160ddb3455b2828dc08a9081ffc4945fca426;hpb=6d46ab9b534b60675a3dcb11fcb664589a3691f8;p=monitor.git diff --git a/findbadpcu.py b/findbadpcu.py index e3d160d..114c48b 100755 --- a/findbadpcu.py +++ b/findbadpcu.py @@ -12,6 +12,7 @@ import sets import signal import traceback +from nodequery import pcu_select #old_handler = signal.getsignal(signal.SIGCHLD) @@ -84,6 +85,8 @@ def get_pcu(pcuname): l_pcu = i except: traceback.print_exc() + from nodecommon import email_exception + email_exception() l_pcu = None plc_lock.release() @@ -102,6 +105,8 @@ def get_nodes(node_ids): l_node.append(n) except: traceback.print_exc() + from nodecommon import email_exception + email_exception() l_node = None plc_lock.release() @@ -159,6 +164,8 @@ def get_plc_site_values(site_id): break except: traceback.print_exc() + from nodecommon import email_exception + email_exception() values = None plc_lock.release() @@ -197,6 +204,8 @@ def collectPingAndSSH(pcuname, cohash): except: b_except = True traceback.print_exc() + from nodecommon import email_exception + email_exception() continue_probe = False if b_except or not continue_probe: return (None, None, None) @@ -329,7 +338,7 @@ def checkAndRecordState(l_pcus, cohash): global count global_round = externalState['round'] - tp = threadpool.ThreadPool(20) + tp = threadpool.ThreadPool(10) # CREATE all the work requests for pcuname in l_pcus: @@ -390,6 +399,11 @@ def main(): pcus = [] for node in l_nodes: pcus += node['pcu_ids'] + # clear out dups. + l_pcus = [pcu for pcu in sets.Set(pcus)] + elif config.pcuselect is not None: + n, pcus = pcu_select(config.pcuselect) + # clear out dups. l_pcus = [pcu for pcu in sets.Set(pcus)] elif config.nodelist == None and config.pcuid == None: @@ -421,6 +435,7 @@ if __name__ == '__main__': parser.set_defaults(nodelist=None, increment=False, pcuid=None, + pcuselect=None, site=None, dbname="findbadpcus", cachenodes=False, @@ -430,6 +445,8 @@ if __name__ == '__main__': help="Provide the input file for the node list") parser.add_option("", "--site", dest="site", metavar="FILE", help="Get all pcus associated with the given site's nodes") + parser.add_option("", "--pcuselect", dest="pcuselect", metavar="FILE", + help="Query string to apply to the findbad pcus") parser.add_option("", "--pcuid", dest="pcuid", metavar="id", help="Provide the id for a single pcu") @@ -452,6 +469,8 @@ if __name__ == '__main__': time.sleep(1) except Exception, err: traceback.print_exc() + from nodecommon import email_exception + email_exception() print "Exception: %s" % err print "Saving data... exitting." database.dbDump(config.dbname, externalState)