X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=findbadpcu.py;h=0d06d1e0290788d1930aa7e410596e9da8beabc4;hb=d8c4f261680cbc9cb2708cf12d97202716120dc7;hp=1af600ced70d33a949d8896906d265a8380ab7f3;hpb=90b2e8e7cb145cb1f6b3780867617084441b6ca9;p=monitor.git diff --git a/findbadpcu.py b/findbadpcu.py index 1af600c..0d06d1e 100755 --- a/findbadpcu.py +++ b/findbadpcu.py @@ -13,31 +13,20 @@ import threadpool import threading import monitor -from monitor.pcu import reboot +from pcucontrol import reboot from monitor import config -from monitor.database import FindbadPCURecordSync, FindbadPCURecord +from monitor.database.info.model import FindbadPCURecordSync, FindbadPCURecord, session +from monitor import database from monitor import util from monitor.wrapper import plc, plccache from nodequery import pcu_select +from nodecommon import nmap_port_status plc_lock = threading.Lock() global_round = 1 errorState = {} count = 0 -def nmap_portstatus(status): - ps = {} - l_nmap = status.split() - ports = l_nmap[4:] - - continue_probe = False - for port in ports: - results = port.split('/') - ps[results[0]] = results[1] - if results[1] == "open": - continue_probe = True - return (ps, continue_probe) - def get_pcu(pcuname): plc_lock.acquire() try: @@ -175,34 +164,44 @@ def collectPingAndSSH(pcuname, cohash): if b_except or not continue_probe: return (None, None, None) - + #### RUN NMAP ############################### + if continue_probe: + nmap = util.command.CMD() + print "nmap -oG - -P0 -p22,23,80,443,5869,9100,16992 %s | grep Host:" % reboot.pcu_name(values['plc_pcu_stats']) + (oval,eval) = nmap.run_noexcept("nmap -oG - -P0 -p22,23,80,443,5869,9100,16992 %s | grep Host:" % reboot.pcu_name(values['plc_pcu_stats'])) + # NOTE: an empty / error value for oval, will still work. + (values['port_status'], continue_probe) = nmap_port_status(oval) + else: + values['port_status'] = None + #### COMPLETE ENTRY ####################### - values['complete_entry'] = [] + values['entry_complete'] = [] #if values['protocol'] is None or values['protocol'] is "": - # values['complete_entry'] += ["protocol"] + # values['entry_complete'] += ["protocol"] if values['plc_pcu_stats']['model'] is None or values['plc_pcu_stats']['model'] is "": - values['complete_entry'] += ["model"] + values['entry_complete'] += ["model"] # Cannot continue due to this condition continue_probe = False if values['plc_pcu_stats']['password'] is None or values['plc_pcu_stats']['password'] is "": - values['complete_entry'] += ["password"] + values['entry_complete'] += ["password"] # Cannot continue due to this condition continue_probe = False - if len(values['complete_entry']) > 0: + if len(values['entry_complete']) > 0: continue_probe = False if values['plc_pcu_stats']['hostname'] is None or values['plc_pcu_stats']['hostname'] is "": - values['complete_entry'] += ["hostname"] + values['entry_complete'] += ["hostname"] if values['plc_pcu_stats']['ip'] is None or values['plc_pcu_stats']['ip'] is "": - values['complete_entry'] += ["ip"] + values['entry_complete'] += ["ip"] # If there are no nodes associated with this PCU, then we cannot continue. if len(values['plc_pcu_stats']['node_ids']) == 0: continue_probe = False - values['complete_entry'] += ['NoNodeIds'] + values['entry_complete'] += ['nodeids'] + #### DNS and IP MATCH ####################### if values['plc_pcu_stats']['hostname'] is not None and values['plc_pcu_stats']['hostname'] is not "" and \ @@ -211,37 +210,29 @@ def collectPingAndSSH(pcuname, cohash): try: ipaddr = socket.gethostbyname(values['plc_pcu_stats']['hostname']) if ipaddr == values['plc_pcu_stats']['ip']: - values['dnsmatch'] = "DNS-OK" + values['dns_status'] = "DNS-OK" else: - values['dnsmatch'] = "DNS-MISMATCH" + values['dns_status'] = "DNS-MISMATCH" continue_probe = False except Exception, err: - values['dnsmatch'] = "DNS-NOENTRY" + values['dns_status'] = "DNS-NOENTRY" values['plc_pcu_stats']['hostname'] = values['plc_pcu_stats']['ip'] #print err else: if values['plc_pcu_stats']['ip'] is not None and values['plc_pcu_stats']['ip'] is not "": - values['dnsmatch'] = "NOHOSTNAME" + values['dns_status'] = "NOHOSTNAME" values['plc_pcu_stats']['hostname'] = values['plc_pcu_stats']['ip'] else: - values['dnsmatch'] = "NO-DNS-OR-IP" + values['dns_status'] = "NO-DNS-OR-IP" values['plc_pcu_stats']['hostname'] = "No_entry_in_DB" continue_probe = False - #### RUN NMAP ############################### - if continue_probe: - nmap = util.command.CMD() - (oval,eval) = nmap.run_noexcept("nmap -oG - -P0 -p22,23,80,443,5869,9100,16992 %s | grep Host:" % reboot.pcu_name(values['plc_pcu_stats'])) - # NOTE: an empty / error value for oval, will still work. - (values['portstatus'], continue_probe) = nmap_portstatus(oval) - else: - values['portstatus'] = None - ###### DRY RUN ############################ if 'node_ids' in values['plc_pcu_stats'] and len(values['plc_pcu_stats']['node_ids']) > 0: - rb_ret = reboot.reboot_test(values['plc_pcu_stats']['nodenames'][0], values, continue_probe, 1, True) + rb_ret = reboot.reboot_test_new(values['plc_pcu_stats']['nodenames'][0], + values, 1, True) else: rb_ret = "Not_Run" # No nodes to test" @@ -254,6 +245,7 @@ def collectPingAndSSH(pcuname, cohash): print "____________________________________" errors['traceback'] = traceback.format_exc() print errors['traceback'] + values['reboot'] = errors['traceback'] values['date_checked'] = time.time() return (pcuname, values, errors) @@ -266,23 +258,28 @@ def recordPingAndSSH(request, result): if values is not None: pcu_id = int(nodename) - fbsync = FindbadPCURecordSync.findby_or_create(plc_pcuid=0, - if_new_set={'round': global_round}) - global_round = fbsync.round + #fbsync = FindbadPCURecordSync.findby_or_create(plc_pcuid=0, + # if_new_set={'round': global_round}) + #global_round = fbsync.round fbnodesync = FindbadPCURecordSync.findby_or_create(plc_pcuid=pcu_id, if_new_set={'round' : global_round}) fbrec = FindbadPCURecord( date_checked=datetime.fromtimestamp(values['date_checked']), - record=fbsync.round, + round=global_round, plc_pcuid=pcu_id, plc_pcu_stats=values['plc_pcu_stats'], - dns_status=values['dnsmatch'], - port_status=values['portstatus'], - entry_complete=" ".join(values['complete_entry']), + dns_status=values['dns_status'], + port_status=values['port_status'], + entry_complete=" ".join(values['entry_complete']), reboot_trial_status="%s" % values['reboot'], ) fbnodesync.round = global_round + + fbnodesync.flush() + #fbsync.flush() + fbrec.flush() + count += 1 print "%d %s %s" % (count, nodename, values) @@ -308,9 +305,10 @@ def checkAndRecordState(l_pcus, cohash): for pcuname in l_pcus: pcu_id = int(pcuname) fbnodesync = FindbadPCURecordSync.findby_or_create(plc_pcuid=pcu_id, if_new_set={'round' : 0}) + fbnodesync.flush() node_round = fbnodesync.round - if node_round < global_round: + if node_round < global_round or config.force: # recreate node stats when refreshed #print "%s" % nodename req = threadpool.WorkRequest(collectPingAndSSH, [pcuname, cohash], {}, @@ -340,6 +338,7 @@ def checkAndRecordState(l_pcus, cohash): print FindbadPCURecordSync.query.count() print FindbadPCURecord.query.count() + session.flush() def main(): @@ -353,10 +352,6 @@ def main(): global_round = fbsync.round - if config.increment: - # update global round number to force refreshes across all nodes - global_round += 1 - fbsync.round = global_round if config.site is not None: api = plc.getAuthAPI() @@ -369,11 +364,12 @@ def main(): l_pcus = [pcu for pcu in sets.Set(pcus)] elif config.pcuselect is not None: n, pcus = pcu_select(config.pcuselect) + print pcus # clear out dups. l_pcus = [pcu for pcu in sets.Set(pcus)] elif config.nodelist == None and config.pcuid == None: - print "Calling API GetPCUs() : refresh(%s)" % config.refresh + print "Calling API GetPCUs() : cachecalls(%s)" % config.cachecalls l_pcus = [pcu['pcu_id'] for pcu in l_pcus] elif config.nodelist is not None: l_pcus = util.file.getListFromFile(config.nodelist) @@ -382,11 +378,22 @@ def main(): l_pcus = [ config.pcuid ] l_pcus = [int(pcu) for pcu in l_pcus] + if config.increment: + # update global round number to force refreshes across all nodes + global_round += 1 + checkAndRecordState(l_pcus, cohash) + if config.increment: + # update global round number to force refreshes across all nodes + fbsync.round = global_round + fbsync.flush() + session.flush() + return 0 +print "main" if __name__ == '__main__': import logging logger = logging.getLogger("monitor") @@ -405,7 +412,8 @@ if __name__ == '__main__': site=None, dbname="findbadpcus", cachenodes=False, - refresh=False, + cachecalls=True, + force=False, ) parser.add_option("-f", "--nodelist", dest="nodelist", metavar="FILE", help="Provide the input file for the node list") @@ -420,12 +428,18 @@ if __name__ == '__main__': help="Cache node lookup from PLC") parser.add_option("", "--dbname", dest="dbname", metavar="FILE", help="Specify the name of the database to which the information is saved") - parser.add_option("", "--refresh", action="store_true", dest="refresh", + parser.add_option("", "--nocachecalls", action="store_false", dest="cachecalls", help="Refresh the cached values") parser.add_option("-i", "--increment", action="store_true", dest="increment", help="Increment round number to force refresh or retry") + parser.add_option("", "--force", action="store_true", dest="force", + help="Force probe without incrementing global 'round'.") parser = parsermodule.getParser(['defaults'], parser) config = parsermodule.parse_args(parser) + if hasattr(config, 'cachecalls') and not config.cachecalls: + # NOTE: if explicilty asked, refresh cached values. + print "Reloading PLCCache" + plccache.init() try: # NOTE: evidently, there is a bizarre interaction between iLO and ssh # when LANG is set... Do not know why. Unsetting LANG, fixes the problem.