unify the model by which probes are made to collect information about nodes or
[monitor.git] / findbadpcu.py
index 1af600c..0d06d1e 100755 (executable)
@@ -13,31 +13,20 @@ import threadpool
 import threading
 
 import monitor
-from monitor.pcu import reboot
+from pcucontrol  import reboot
 from monitor import config
-from monitor.database import FindbadPCURecordSync, FindbadPCURecord
+from monitor.database.info.model import FindbadPCURecordSync, FindbadPCURecord, session
+from monitor import database
 from monitor import util 
 from monitor.wrapper import plc, plccache
 from nodequery import pcu_select
+from nodecommon import nmap_port_status
 
 plc_lock = threading.Lock()
 global_round = 1
 errorState = {}
 count = 0
 
-def nmap_portstatus(status):
-       ps = {}
-       l_nmap = status.split()
-       ports = l_nmap[4:]
-
-       continue_probe = False
-       for port in ports:
-               results = port.split('/')
-               ps[results[0]] = results[1]
-               if results[1] == "open":
-                       continue_probe = True
-       return (ps, continue_probe)
-
 def get_pcu(pcuname):
        plc_lock.acquire()
        try:
@@ -175,34 +164,44 @@ def collectPingAndSSH(pcuname, cohash):
 
                if b_except or not continue_probe: return (None, None, None)
 
-
+               #### RUN NMAP ###############################
+               if continue_probe:
+                       nmap = util.command.CMD()
+                       print "nmap -oG - -P0 -p22,23,80,443,5869,9100,16992 %s | grep Host:" % reboot.pcu_name(values['plc_pcu_stats'])
+                       (oval,eval) = nmap.run_noexcept("nmap -oG - -P0 -p22,23,80,443,5869,9100,16992 %s | grep Host:" % reboot.pcu_name(values['plc_pcu_stats']))
+                       # NOTE: an empty / error value for oval, will still work.
+                       (values['port_status'], continue_probe) = nmap_port_status(oval)
+               else:
+                       values['port_status'] = None
+                       
                #### COMPLETE ENTRY   #######################
 
-               values['complete_entry'] = []
+               values['entry_complete'] = []
                #if values['protocol'] is None or values['protocol'] is "":
-               #       values['complete_entry'] += ["protocol"]
+               #       values['entry_complete'] += ["protocol"]
                if values['plc_pcu_stats']['model'] is None or values['plc_pcu_stats']['model'] is "":
-                       values['complete_entry'] += ["model"]
+                       values['entry_complete'] += ["model"]
                        # Cannot continue due to this condition
                        continue_probe = False
 
                if values['plc_pcu_stats']['password'] is None or values['plc_pcu_stats']['password'] is "":
-                       values['complete_entry'] += ["password"]
+                       values['entry_complete'] += ["password"]
                        # Cannot continue due to this condition
                        continue_probe = False
 
-               if len(values['complete_entry']) > 0:
+               if len(values['entry_complete']) > 0:
                        continue_probe = False
 
                if values['plc_pcu_stats']['hostname'] is None or values['plc_pcu_stats']['hostname'] is "":
-                       values['complete_entry'] += ["hostname"]
+                       values['entry_complete'] += ["hostname"]
                if values['plc_pcu_stats']['ip'] is None or values['plc_pcu_stats']['ip'] is "":
-                       values['complete_entry'] += ["ip"]
+                       values['entry_complete'] += ["ip"]
 
                # If there are no nodes associated with this PCU, then we cannot continue.
                if len(values['plc_pcu_stats']['node_ids']) == 0:
                        continue_probe = False
-                       values['complete_entry'] += ['NoNodeIds']
+                       values['entry_complete'] += ['nodeids']
+
 
                #### DNS and IP MATCH #######################
                if values['plc_pcu_stats']['hostname'] is not None and values['plc_pcu_stats']['hostname'] is not "" and \
@@ -211,37 +210,29 @@ def collectPingAndSSH(pcuname, cohash):
                        try:
                                ipaddr = socket.gethostbyname(values['plc_pcu_stats']['hostname'])
                                if ipaddr == values['plc_pcu_stats']['ip']:
-                                       values['dnsmatch'] = "DNS-OK"
+                                       values['dns_status'] = "DNS-OK"
                                else:
-                                       values['dnsmatch'] = "DNS-MISMATCH"
+                                       values['dns_status'] = "DNS-MISMATCH"
                                        continue_probe = False
 
                        except Exception, err:
-                               values['dnsmatch'] = "DNS-NOENTRY"
+                               values['dns_status'] = "DNS-NOENTRY"
                                values['plc_pcu_stats']['hostname'] = values['plc_pcu_stats']['ip']
                                #print err
                else:
                        if values['plc_pcu_stats']['ip'] is not None and values['plc_pcu_stats']['ip'] is not "":
-                               values['dnsmatch'] = "NOHOSTNAME"
+                               values['dns_status'] = "NOHOSTNAME"
                                values['plc_pcu_stats']['hostname'] = values['plc_pcu_stats']['ip']
                        else:
-                               values['dnsmatch'] = "NO-DNS-OR-IP"
+                               values['dns_status'] = "NO-DNS-OR-IP"
                                values['plc_pcu_stats']['hostname'] = "No_entry_in_DB"
                                continue_probe = False
 
-               #### RUN NMAP ###############################
-               if continue_probe:
-                       nmap = util.command.CMD()
-                       (oval,eval) = nmap.run_noexcept("nmap -oG - -P0 -p22,23,80,443,5869,9100,16992 %s | grep Host:" % reboot.pcu_name(values['plc_pcu_stats']))
-                       # NOTE: an empty / error value for oval, will still work.
-                       (values['portstatus'], continue_probe) = nmap_portstatus(oval)
-               else:
-                       values['portstatus'] = None
-                       
 
                ######  DRY RUN  ############################
                if 'node_ids' in values['plc_pcu_stats'] and len(values['plc_pcu_stats']['node_ids']) > 0:
-                       rb_ret = reboot.reboot_test(values['plc_pcu_stats']['nodenames'][0], values, continue_probe, 1, True)
+                       rb_ret = reboot.reboot_test_new(values['plc_pcu_stats']['nodenames'][0], 
+                                                                                       values, 1, True)
                else:
                        rb_ret = "Not_Run" # No nodes to test"
 
@@ -254,6 +245,7 @@ def collectPingAndSSH(pcuname, cohash):
                print "____________________________________"
                errors['traceback'] = traceback.format_exc()
                print errors['traceback']
+               values['reboot'] = errors['traceback']
 
        values['date_checked'] = time.time()
        return (pcuname, values, errors)
@@ -266,23 +258,28 @@ def recordPingAndSSH(request, result):
 
        if values is not None:
                pcu_id = int(nodename)
-               fbsync = FindbadPCURecordSync.findby_or_create(plc_pcuid=0, 
-                                                                                       if_new_set={'round': global_round})
-               global_round = fbsync.round
+               #fbsync = FindbadPCURecordSync.findby_or_create(plc_pcuid=0, 
+               #                                                                       if_new_set={'round': global_round})
+               #global_round = fbsync.round
                fbnodesync = FindbadPCURecordSync.findby_or_create(plc_pcuid=pcu_id, 
                                                                                        if_new_set={'round' : global_round})
 
                fbrec = FindbadPCURecord(
                                        date_checked=datetime.fromtimestamp(values['date_checked']),
-                                       record=fbsync.round,
+                                       round=global_round,
                                        plc_pcuid=pcu_id,
                                        plc_pcu_stats=values['plc_pcu_stats'],
-                                       dns_status=values['dnsmatch'],
-                                       port_status=values['portstatus'],
-                                       entry_complete=" ".join(values['complete_entry']),
+                                       dns_status=values['dns_status'],
+                                       port_status=values['port_status'],
+                                       entry_complete=" ".join(values['entry_complete']),
                                        reboot_trial_status="%s" % values['reboot'],
                                )
                fbnodesync.round = global_round
+
+               fbnodesync.flush()
+               #fbsync.flush()
+               fbrec.flush()
+
                count += 1
                print "%d %s %s" % (count, nodename, values)
 
@@ -308,9 +305,10 @@ def checkAndRecordState(l_pcus, cohash):
        for pcuname in l_pcus:
                pcu_id = int(pcuname)
                fbnodesync = FindbadPCURecordSync.findby_or_create(plc_pcuid=pcu_id, if_new_set={'round' : 0})
+               fbnodesync.flush()
 
                node_round   = fbnodesync.round
-               if node_round < global_round:
+               if node_round < global_round or config.force:
                        # recreate node stats when refreshed
                        #print "%s" % nodename
                        req = threadpool.WorkRequest(collectPingAndSSH, [pcuname, cohash], {}, 
@@ -340,6 +338,7 @@ def checkAndRecordState(l_pcus, cohash):
 
        print FindbadPCURecordSync.query.count()
        print FindbadPCURecord.query.count()
+       session.flush()
 
 
 def main():
@@ -353,10 +352,6 @@ def main():
 
        global_round = fbsync.round
 
-       if config.increment:
-               # update global round number to force refreshes across all nodes
-               global_round += 1
-               fbsync.round = global_round
 
        if config.site is not None:
                api = plc.getAuthAPI()
@@ -369,11 +364,12 @@ def main():
                l_pcus = [pcu for pcu in sets.Set(pcus)]
        elif config.pcuselect is not None:
                n, pcus = pcu_select(config.pcuselect)
+               print pcus
                # clear out dups.
                l_pcus = [pcu for pcu in sets.Set(pcus)]
 
        elif config.nodelist == None and config.pcuid == None:
-               print "Calling API GetPCUs() : refresh(%s)" % config.refresh
+               print "Calling API GetPCUs() : cachecalls(%s)" % config.cachecalls
                l_pcus  = [pcu['pcu_id'] for pcu in l_pcus]
        elif config.nodelist is not None:
                l_pcus = util.file.getListFromFile(config.nodelist)
@@ -382,11 +378,22 @@ def main():
                l_pcus = [ config.pcuid ] 
                l_pcus = [int(pcu) for pcu in l_pcus]
 
+       if config.increment:
+               # update global round number to force refreshes across all nodes
+               global_round += 1
+
        checkAndRecordState(l_pcus, cohash)
 
+       if config.increment:
+               # update global round number to force refreshes across all nodes
+               fbsync.round = global_round
+               fbsync.flush()
+               session.flush()
+
        return 0
 
 
+print "main"
 if __name__ == '__main__':
        import logging
        logger = logging.getLogger("monitor")
@@ -405,7 +412,8 @@ if __name__ == '__main__':
                                                site=None,
                                                dbname="findbadpcus", 
                                                cachenodes=False,
-                                               refresh=False,
+                                               cachecalls=True,
+                                               force=False,
                                                )
        parser.add_option("-f", "--nodelist", dest="nodelist", metavar="FILE", 
                                                help="Provide the input file for the node list")
@@ -420,12 +428,18 @@ if __name__ == '__main__':
                                                help="Cache node lookup from PLC")
        parser.add_option("", "--dbname", dest="dbname", metavar="FILE", 
                                                help="Specify the name of the database to which the information is saved")
-       parser.add_option("", "--refresh", action="store_true", dest="refresh",
+       parser.add_option("", "--nocachecalls", action="store_false", dest="cachecalls",
                                                help="Refresh the cached values")
        parser.add_option("-i", "--increment", action="store_true", dest="increment", 
                                                help="Increment round number to force refresh or retry")
+       parser.add_option("", "--force", action="store_true", dest="force", 
+                                               help="Force probe without incrementing global 'round'.")
        parser = parsermodule.getParser(['defaults'], parser)
        config = parsermodule.parse_args(parser)
+       if hasattr(config, 'cachecalls') and not config.cachecalls:
+               # NOTE: if explicilty asked, refresh cached values.
+               print "Reloading PLCCache"
+               plccache.init()
        try:
                # NOTE: evidently, there is a bizarre interaction between iLO and ssh
                # when LANG is set... Do not know why.  Unsetting LANG, fixes the problem.