unify the model by which probes are made to collect information about nodes or

[monitor.git] / findbadpcu.py
diff --git a/findbadpcu.py b/findbadpcu.py

index 1af600c..0d06d1e 100755 (executable)
--- a/findbadpcu.py
+++ b/findbadpcu.py
@@ -13,31 +13,20 @@ import threadpool
  import threading
  
  import monitor
-from monitor.pcu import reboot
+from pcucontrol  import reboot
  from monitor import config
-from monitor.database import FindbadPCURecordSync, FindbadPCURecord
+from monitor.database.info.model import FindbadPCURecordSync, FindbadPCURecord, session
+from monitor import database
  from monitor import util 
  from monitor.wrapper import plc, plccache
  from nodequery import pcu_select
+from nodecommon import nmap_port_status
  
  plc_lock = threading.Lock()
  global_round = 1
  errorState = {}
  count = 0
  
-def nmap_portstatus(status):
-       ps = {}
-       l_nmap = status.split()
-       ports = l_nmap[4:]
-
-       continue_probe = False
-       for port in ports:
-               results = port.split('/')
-               ps[results[0]] = results[1]
-               if results[1] == "open":
-                       continue_probe = True
-       return (ps, continue_probe)
-
  def get_pcu(pcuname):
         plc_lock.acquire()
         try:
@@ -175,34 +164,44 @@ def collectPingAndSSH(pcuname, cohash):
  
                 if b_except or not continue_probe: return (None, None, None)
  
-
+               #### RUN NMAP ###############################
+               if continue_probe:
+                       nmap = util.command.CMD()
+                       print "nmap -oG - -P0 -p22,23,80,443,5869,9100,16992 %s | grep Host:" % reboot.pcu_name(values['plc_pcu_stats'])
+                       (oval,eval) = nmap.run_noexcept("nmap -oG - -P0 -p22,23,80,443,5869,9100,16992 %s | grep Host:" % reboot.pcu_name(values['plc_pcu_stats']))
+                       # NOTE: an empty / error value for oval, will still work.
+                       (values['port_status'], continue_probe) = nmap_port_status(oval)
+               else:
+                       values['port_status'] = None
+                       
                 #### COMPLETE ENTRY   #######################
  
-               values['complete_entry'] = []
+               values['entry_complete'] = []
                 #if values['protocol'] is None or values['protocol'] is "":
-               #       values['complete_entry'] += ["protocol"]
+               #       values['entry_complete'] += ["protocol"]
                 if values['plc_pcu_stats']['model'] is None or values['plc_pcu_stats']['model'] is "":
-                       values['complete_entry'] += ["model"]
+                       values['entry_complete'] += ["model"]
                         # Cannot continue due to this condition
                         continue_probe = False
  
                 if values['plc_pcu_stats']['password'] is None or values['plc_pcu_stats']['password'] is "":
-                       values['complete_entry'] += ["password"]
+                       values['entry_complete'] += ["password"]
                         # Cannot continue due to this condition
                         continue_probe = False
  
-               if len(values['complete_entry']) > 0:
+               if len(values['entry_complete']) > 0:
                         continue_probe = False
  
                 if values['plc_pcu_stats']['hostname'] is None or values['plc_pcu_stats']['hostname'] is "":
-                       values['complete_entry'] += ["hostname"]
+                       values['entry_complete'] += ["hostname"]
                 if values['plc_pcu_stats']['ip'] is None or values['plc_pcu_stats']['ip'] is "":
-                       values['complete_entry'] += ["ip"]
+                       values['entry_complete'] += ["ip"]
  
                 # If there are no nodes associated with this PCU, then we cannot continue.
                 if len(values['plc_pcu_stats']['node_ids']) == 0:
                         continue_probe = False
-                       values['complete_entry'] += ['NoNodeIds']
+                       values['entry_complete'] += ['nodeids']
+
  
                 #### DNS and IP MATCH #######################
                 if values['plc_pcu_stats']['hostname'] is not None and values['plc_pcu_stats']['hostname'] is not "" and \
@@ -211,37 +210,29 @@ def collectPingAndSSH(pcuname, cohash):
                         try:
                                 ipaddr = socket.gethostbyname(values['plc_pcu_stats']['hostname'])
                                 if ipaddr == values['plc_pcu_stats']['ip']:
-                                       values['dnsmatch'] = "DNS-OK"
+                                       values['dns_status'] = "DNS-OK"
                                 else:
-                                       values['dnsmatch'] = "DNS-MISMATCH"
+                                       values['dns_status'] = "DNS-MISMATCH"
                                         continue_probe = False
  
                         except Exception, err:
-                               values['dnsmatch'] = "DNS-NOENTRY"
+                               values['dns_status'] = "DNS-NOENTRY"
                                 values['plc_pcu_stats']['hostname'] = values['plc_pcu_stats']['ip']
                                 #print err
                 else:
                         if values['plc_pcu_stats']['ip'] is not None and values['plc_pcu_stats']['ip'] is not "":
-                               values['dnsmatch'] = "NOHOSTNAME"
+                               values['dns_status'] = "NOHOSTNAME"
                                 values['plc_pcu_stats']['hostname'] = values['plc_pcu_stats']['ip']
                         else:
-                               values['dnsmatch'] = "NO-DNS-OR-IP"
+                               values['dns_status'] = "NO-DNS-OR-IP"
                                 values['plc_pcu_stats']['hostname'] = "No_entry_in_DB"
                                 continue_probe = False
  
-               #### RUN NMAP ###############################
-               if continue_probe:
-                       nmap = util.command.CMD()
-                       (oval,eval) = nmap.run_noexcept("nmap -oG - -P0 -p22,23,80,443,5869,9100,16992 %s | grep Host:" % reboot.pcu_name(values['plc_pcu_stats']))
-                       # NOTE: an empty / error value for oval, will still work.
-                       (values['portstatus'], continue_probe) = nmap_portstatus(oval)
-               else:
-                       values['portstatus'] = None
-                       
  
                 ######  DRY RUN  ############################
                 if 'node_ids' in values['plc_pcu_stats'] and len(values['plc_pcu_stats']['node_ids']) > 0:
-                       rb_ret = reboot.reboot_test(values['plc_pcu_stats']['nodenames'][0], values, continue_probe, 1, True)
+                       rb_ret = reboot.reboot_test_new(values['plc_pcu_stats']['nodenames'][0], 
+                                                                                       values, 1, True)
                 else:
                         rb_ret = "Not_Run" # No nodes to test"
  
@@ -254,6 +245,7 @@ def collectPingAndSSH(pcuname, cohash):
                 print "____________________________________"
                 errors['traceback'] = traceback.format_exc()
                 print errors['traceback']
+               values['reboot'] = errors['traceback']
  
         values['date_checked'] = time.time()
         return (pcuname, values, errors)
@@ -266,23 +258,28 @@ def recordPingAndSSH(request, result):
  
         if values is not None:
                 pcu_id = int(nodename)
-               fbsync = FindbadPCURecordSync.findby_or_create(plc_pcuid=0, 
-                                                                                       if_new_set={'round': global_round})
-               global_round = fbsync.round
+               #fbsync = FindbadPCURecordSync.findby_or_create(plc_pcuid=0, 
+               #                                                                       if_new_set={'round': global_round})
+               #global_round = fbsync.round
                 fbnodesync = FindbadPCURecordSync.findby_or_create(plc_pcuid=pcu_id, 
                                                                                         if_new_set={'round' : global_round})
  
                 fbrec = FindbadPCURecord(
                                         date_checked=datetime.fromtimestamp(values['date_checked']),
-                                       record=fbsync.round,
+                                       round=global_round,
                                         plc_pcuid=pcu_id,
                                         plc_pcu_stats=values['plc_pcu_stats'],
-                                       dns_status=values['dnsmatch'],
-                                       port_status=values['portstatus'],
-                                       entry_complete=" ".join(values['complete_entry']),
+                                       dns_status=values['dns_status'],
+                                       port_status=values['port_status'],
+                                       entry_complete=" ".join(values['entry_complete']),
                                         reboot_trial_status="%s" % values['reboot'],
                                 )
                 fbnodesync.round = global_round
+
+               fbnodesync.flush()
+               #fbsync.flush()
+               fbrec.flush()
+
                 count += 1
                 print "%d %s %s" % (count, nodename, values)
  
@@ -308,9 +305,10 @@ def checkAndRecordState(l_pcus, cohash):
         for pcuname in l_pcus:
                 pcu_id = int(pcuname)
                 fbnodesync = FindbadPCURecordSync.findby_or_create(plc_pcuid=pcu_id, if_new_set={'round' : 0})
+               fbnodesync.flush()
  
                 node_round   = fbnodesync.round
-               if node_round < global_round:
+               if node_round < global_round or config.force:
                         # recreate node stats when refreshed
                         #print "%s" % nodename
                         req = threadpool.WorkRequest(collectPingAndSSH, [pcuname, cohash], {}, 
@@ -340,6 +338,7 @@ def checkAndRecordState(l_pcus, cohash):
  
         print FindbadPCURecordSync.query.count()
         print FindbadPCURecord.query.count()
+       session.flush()
  
  
  def main():
@@ -353,10 +352,6 @@ def main():
  
         global_round = fbsync.round
  
-       if config.increment:
-               # update global round number to force refreshes across all nodes
-               global_round += 1
-               fbsync.round = global_round
  
         if config.site is not None:
                 api = plc.getAuthAPI()
@@ -369,11 +364,12 @@ def main():
                 l_pcus = [pcu for pcu in sets.Set(pcus)]
         elif config.pcuselect is not None:
                 n, pcus = pcu_select(config.pcuselect)
+               print pcus
                 # clear out dups.
                 l_pcus = [pcu for pcu in sets.Set(pcus)]
  
         elif config.nodelist == None and config.pcuid == None:
-               print "Calling API GetPCUs() : refresh(%s)" % config.refresh
+               print "Calling API GetPCUs() : cachecalls(%s)" % config.cachecalls
                 l_pcus  = [pcu['pcu_id'] for pcu in l_pcus]
         elif config.nodelist is not None:
                 l_pcus = util.file.getListFromFile(config.nodelist)
@@ -382,11 +378,22 @@ def main():
                 l_pcus = [ config.pcuid ] 
                 l_pcus = [int(pcu) for pcu in l_pcus]
  
+       if config.increment:
+               # update global round number to force refreshes across all nodes
+               global_round += 1
+
         checkAndRecordState(l_pcus, cohash)
  
+       if config.increment:
+               # update global round number to force refreshes across all nodes
+               fbsync.round = global_round
+               fbsync.flush()
+               session.flush()
+
         return 0
  
  
+print "main"
  if __name__ == '__main__':
         import logging
         logger = logging.getLogger("monitor")
@@ -405,7 +412,8 @@ if __name__ == '__main__':
                                                 site=None,
                                                 dbname="findbadpcus", 
                                                 cachenodes=False,
-                                               refresh=False,
+                                               cachecalls=True,
+                                               force=False,
                                                 )
         parser.add_option("-f", "--nodelist", dest="nodelist", metavar="FILE", 
                                                 help="Provide the input file for the node list")
@@ -420,12 +428,18 @@ if __name__ == '__main__':
                                                 help="Cache node lookup from PLC")
         parser.add_option("", "--dbname", dest="dbname", metavar="FILE", 
                                                 help="Specify the name of the database to which the information is saved")
-       parser.add_option("", "--refresh", action="store_true", dest="refresh",
+       parser.add_option("", "--nocachecalls", action="store_false", dest="cachecalls",
                                                 help="Refresh the cached values")
         parser.add_option("-i", "--increment", action="store_true", dest="increment", 
                                                 help="Increment round number to force refresh or retry")
+       parser.add_option("", "--force", action="store_true", dest="force", 
+                                               help="Force probe without incrementing global 'round'.")
         parser = parsermodule.getParser(['defaults'], parser)
         config = parsermodule.parse_args(parser)
+       if hasattr(config, 'cachecalls') and not config.cachecalls:
+               # NOTE: if explicilty asked, refresh cached values.
+               print "Reloading PLCCache"
+               plccache.init()
         try:
                 # NOTE: evidently, there is a bizarre interaction between iLO and ssh
                 # when LANG is set... Do not know why.  Unsetting LANG, fixes the problem.