updated the model for actions, site history

[monitor.git] / findbadpcu.py
diff --git a/findbadpcu.py b/findbadpcu.py

index 468107d..d00d7f7 100755 (executable)
--- a/findbadpcu.py
+++ b/findbadpcu.py
@@ -13,303 +13,32 @@ import threadpool
  import threading
  
  import monitor
-from pcucontrol  import reboot
  from monitor import config
  from monitor.database.info.model import FindbadPCURecordSync, FindbadPCURecord, session
  from monitor import database
  from monitor import util 
  from monitor.wrapper import plc, plccache
  from nodequery import pcu_select
+from monitor.common import nmap_port_status
+from monitor.scanapi import *
  
  plc_lock = threading.Lock()
  global_round = 1
  errorState = {}
  count = 0
  
-def nmap_port_status(status):
-       ps = {}
-       l_nmap = status.split()
-       ports = l_nmap[4:]
-
-       continue_probe = False
-       for port in ports:
-               results = port.split('/')
-               ps[results[0]] = results[1]
-               if results[1] == "open":
-                       continue_probe = True
-       return (ps, continue_probe)
-
-def get_pcu(pcuname):
-       plc_lock.acquire()
-       try:
-               #print "GetPCU from PLC %s" % pcuname
-               l_pcu  = plc.GetPCUs({'pcu_id' : pcuname})
-               #print l_pcu
-               if len(l_pcu) > 0:
-                       l_pcu = l_pcu[0]
-       except:
-               try:
-                       #print "GetPCU from file %s" % pcuname
-                       l_pcus = plccache.l_pcus
-                       for i in l_pcus:
-                               if i['pcu_id'] == pcuname:
-                                       l_pcu = i
-               except:
-                       traceback.print_exc()
-                       l_pcu = None
-
-       plc_lock.release()
-       return l_pcu
-
-def get_nodes(node_ids):
-       plc_lock.acquire()
-       l_node = []
-       try:
-               l_node = plc.getNodes(node_ids, ['hostname', 'last_contact', 'node_id', 'ports'])
-       except:
-               try:
-                       plc_nodes = plccache.l_plcnodes
-                       for n in plc_nodes:
-                               if n['node_id'] in node_ids:
-                                       l_node.append(n)
-               except:
-                       traceback.print_exc()
-                       l_node = None
-
-       plc_lock.release()
-       if l_node == []:
-               l_node = None
-       return l_node
-       
-
-def get_plc_pcu_values(pcuname):
-       """
-               Try to contact PLC to get the PCU info.
-               If that fails, try a backup copy from the last run.
-               If that fails, return None
-       """
-       values = {}
-
-       l_pcu = get_pcu(pcuname)
-       
-       if l_pcu is not None:
-               site_id = l_pcu['site_id']
-               node_ids = l_pcu['node_ids']
-               l_node = get_nodes(node_ids) 
-                               
-               if l_node is not None:
-                       for node in l_node:
-                               values[node['hostname']] = node['ports'][0]
-
-                       values['nodenames'] = [node['hostname'] for node in l_node]
-
-                       # NOTE: this is for a dry run later. It doesn't matter which node.
-                       values['node_id'] = l_node[0]['node_id']
-
-               values.update(l_pcu)
-       else:
-               values = None
-       
-       return values
-
-def get_plc_site_values(site_id):
-       ### GET PLC SITE ######################
-       plc_lock.acquire()
-       values = {}
-       d_site = None
-
-       try:
-               d_site = plc.getSites({'site_id': site_id}, ['max_slices', 'slice_ids', 'node_ids', 'login_base'])
-               if len(d_site) > 0:
-                       d_site = d_site[0]
-       except:
-               try:
-                       plc_sites = plccache.l_plcsites
-                       for site in plc_sites:
-                               if site['site_id'] == site_id:
-                                       d_site = site
-                                       break
-               except:
-                       traceback.print_exc()
-                       values = None
-
-       plc_lock.release()
-
-       if d_site is not None:
-               max_slices = d_site['max_slices']
-               num_slices = len(d_site['slice_ids'])
-               num_nodes = len(d_site['node_ids'])
-               loginbase = d_site['login_base']
-               values['plcsite'] = {'num_nodes' : num_nodes, 
-                                                       'max_slices' : max_slices, 
-                                                       'num_slices' : num_slices,
-                                                       'login_base' : loginbase,
-                                                       'status'     : 'SUCCESS'}
-       else:
-               values = None
-
-
-       return values
-
-
-def collectPingAndSSH(pcuname, cohash):
-
-       continue_probe = True
-       errors = None
-       values = {'reboot' : 'novalue'}
-       ### GET PCU ######################
-       try:
-               b_except = False
-               try:
-                       v = get_plc_pcu_values(pcuname)
-                       if v['hostname'] is not None: v['hostname'] = v['hostname'].strip()
-                       if v['ip'] is not None: v['ip'] = v['ip'].strip()
-
-                       if v is not None:
-                               values['plc_pcu_stats'] = v
-                       else:
-                               continue_probe = False
-               except:
-                       b_except = True
-                       traceback.print_exc()
-                       continue_probe = False
-
-               if b_except or not continue_probe: return (None, None, None)
-
-
-               #### COMPLETE ENTRY   #######################
-
-               values['entry_complete'] = []
-               #if values['protocol'] is None or values['protocol'] is "":
-               #       values['entry_complete'] += ["protocol"]
-               if values['plc_pcu_stats']['model'] is None or values['plc_pcu_stats']['model'] is "":
-                       values['entry_complete'] += ["model"]
-                       # Cannot continue due to this condition
-                       continue_probe = False
-
-               if values['plc_pcu_stats']['password'] is None or values['plc_pcu_stats']['password'] is "":
-                       values['entry_complete'] += ["password"]
-                       # Cannot continue due to this condition
-                       continue_probe = False
-
-               if len(values['entry_complete']) > 0:
-                       continue_probe = False
-
-               if values['plc_pcu_stats']['hostname'] is None or values['plc_pcu_stats']['hostname'] is "":
-                       values['entry_complete'] += ["hostname"]
-               if values['plc_pcu_stats']['ip'] is None or values['plc_pcu_stats']['ip'] is "":
-                       values['entry_complete'] += ["ip"]
-
-               # If there are no nodes associated with this PCU, then we cannot continue.
-               if len(values['plc_pcu_stats']['node_ids']) == 0:
-                       continue_probe = False
-                       values['entry_complete'] += ['NoNodeIds']
-
-               #### DNS and IP MATCH #######################
-               if values['plc_pcu_stats']['hostname'] is not None and values['plc_pcu_stats']['hostname'] is not "" and \
-                  values['plc_pcu_stats']['ip'] is not None and values['plc_pcu_stats']['ip'] is not "":
-                       #print "Calling socket.gethostbyname(%s)" % values['hostname']
-                       try:
-                               ipaddr = socket.gethostbyname(values['plc_pcu_stats']['hostname'])
-                               if ipaddr == values['plc_pcu_stats']['ip']:
-                                       values['dns_status'] = "DNS-OK"
-                               else:
-                                       values['dns_status'] = "DNS-MISMATCH"
-                                       continue_probe = False
-
-                       except Exception, err:
-                               values['dns_status'] = "DNS-NOENTRY"
-                               values['plc_pcu_stats']['hostname'] = values['plc_pcu_stats']['ip']
-                               #print err
-               else:
-                       if values['plc_pcu_stats']['ip'] is not None and values['plc_pcu_stats']['ip'] is not "":
-                               values['dns_status'] = "NOHOSTNAME"
-                               values['plc_pcu_stats']['hostname'] = values['plc_pcu_stats']['ip']
-                       else:
-                               values['dns_status'] = "NO-DNS-OR-IP"
-                               values['plc_pcu_stats']['hostname'] = "No_entry_in_DB"
-                               continue_probe = False
-
-               #### RUN NMAP ###############################
-               if continue_probe:
-                       nmap = util.command.CMD()
-                       (oval,eval) = nmap.run_noexcept("nmap -oG - -P0 -p22,23,80,443,5869,9100,16992 %s | grep Host:" % reboot.pcu_name(values['plc_pcu_stats']))
-                       # NOTE: an empty / error value for oval, will still work.
-                       (values['port_status'], continue_probe) = nmap_port_status(oval)
-               else:
-                       values['port_status'] = None
-                       
-
-               ######  DRY RUN  ############################
-               if 'node_ids' in values['plc_pcu_stats'] and len(values['plc_pcu_stats']['node_ids']) > 0:
-                       rb_ret = reboot.reboot_test_new(values['plc_pcu_stats']['nodenames'][0], values, continue_probe, 1, True)
-               else:
-                       rb_ret = "Not_Run" # No nodes to test"
-
-               values['reboot'] = rb_ret
-
-       except:
-               print "____________________________________"
-               print values
-               errors = values
-               print "____________________________________"
-               errors['traceback'] = traceback.format_exc()
-               print errors['traceback']
-               values['reboot'] = errors['traceback']
-
-       values['date_checked'] = time.time()
-       return (pcuname, values, errors)
-
-def recordPingAndSSH(request, result):
-       global errorState
-       global count
-       global global_round
-       (nodename, values, errors) = result
-
-       if values is not None:
-               pcu_id = int(nodename)
-               fbsync = FindbadPCURecordSync.findby_or_create(plc_pcuid=0, 
-                                                                                       if_new_set={'round': global_round})
-               global_round = fbsync.round
-               fbnodesync = FindbadPCURecordSync.findby_or_create(plc_pcuid=pcu_id, 
-                                                                                       if_new_set={'round' : global_round})
-
-               fbrec = FindbadPCURecord(
-                                       date_checked=datetime.fromtimestamp(values['date_checked']),
-                                       round=fbsync.round,
-                                       plc_pcuid=pcu_id,
-                                       plc_pcu_stats=values['plc_pcu_stats'],
-                                       dns_status=values['dns_status'],
-                                       port_status=values['port_status'],
-                                       entry_complete=" ".join(values['entry_complete']),
-                                       reboot_trial_status="%s" % values['reboot'],
-                               )
-               fbnodesync.round = global_round
-
-               fbnodesync.flush()
-               fbsync.flush()
-               fbrec.flush()
-
-               count += 1
-               print "%d %s %s" % (count, nodename, values)
-
-       if errors is not None:
-               pcu_id = "id_%s" % nodename
-               errorState[pcu_id] = errors
-               database.dbDump("findbadpcu_errors", errorState)
-
  # this will be called when an exception occurs within a thread
  def handle_exception(request, result):
         print "Exception occured in request %s" % request.requestID
         for i in result:
                 print "Result: %s" % i
  
-
-def checkAndRecordState(l_pcus, cohash):
+def checkPCUs(l_pcus, cohash):
         global global_round
         global count
  
         tp = threadpool.ThreadPool(10)
+       scanpcu = ScanPCU(global_round)
  
         # CREATE all the work requests
         for pcuname in l_pcus:
@@ -321,8 +50,8 @@ def checkAndRecordState(l_pcus, cohash):
                 if node_round < global_round or config.force:
                         # recreate node stats when refreshed
                         #print "%s" % nodename
-                       req = threadpool.WorkRequest(collectPingAndSSH, [pcuname, cohash], {}, 
-                                                                                None, recordPingAndSSH, handle_exception)
+                       req = threadpool.WorkRequest(scanpcu.collectInternal, [int(pcuname), cohash], {}, 
+                                                                                None, scanpcu.record, handle_exception)
                         tp.putRequest(req)
                 else:
                         # We just skip it, since it's "up to date"
@@ -354,17 +83,16 @@ def checkAndRecordState(l_pcus, cohash):
  def main():
         global global_round
  
-       #  monitor.database.if_cached_else_refresh(1, config.refresh, "pculist", lambda : plc.GetPCUs())
         l_pcus = plccache.l_pcus
         cohash = {}
  
-       fbsync = FindbadPCURecordSync.findby_or_create(plc_pcuid=0, if_new_set={'round' : global_round})
+       fbsync = FindbadPCURecordSync.findby_or_create(plc_pcuid=0, 
+                                                                                       if_new_set={'round' : global_round})
  
         global_round = fbsync.round
-
+       api = plc.getAuthAPI()
  
         if config.site is not None:
-               api = plc.getAuthAPI()
                 site = api.GetSites(config.site)
                 l_nodes = api.GetNodes(site[0]['node_ids'], ['pcu_ids'])
                 pcus = []
@@ -372,6 +100,21 @@ def main():
                         pcus += node['pcu_ids']
                 # clear out dups.
                 l_pcus = [pcu for pcu in sets.Set(pcus)]
+       elif config.sitelist:
+               site_list = config.sitelist.split(',')
+
+               sites = api.GetSites(site_list)
+               node_ids = []
+               for s in sites:
+                       node_ids += s['node_ids']
+
+               l_nodes = api.GetNodes(node_ids, ['pcu_ids'])
+               pcus = []
+               for node in l_nodes:
+                       pcus += node['pcu_ids']
+               # clear out dups.
+               l_pcus = [pcu for pcu in sets.Set(pcus)]
+
         elif config.pcuselect is not None:
                 n, pcus = pcu_select(config.pcuselect)
                 print pcus
@@ -379,7 +122,7 @@ def main():
                 l_pcus = [pcu for pcu in sets.Set(pcus)]
  
         elif config.nodelist == None and config.pcuid == None:
-               print "Calling API GetPCUs() : refresh(%s)" % config.refresh
+               print "Calling API GetPCUs() : cachecalls(%s)" % config.cachecalls
                 l_pcus  = [pcu['pcu_id'] for pcu in l_pcus]
         elif config.nodelist is not None:
                 l_pcus = util.file.getListFromFile(config.nodelist)
@@ -391,10 +134,14 @@ def main():
         if config.increment:
                 # update global round number to force refreshes across all nodes
                 global_round += 1
-               fbsync.round = global_round
-       fbsync.flush()
  
-       checkAndRecordState(l_pcus, cohash)
+       checkPCUs(l_pcus, cohash)
+
+       if config.increment:
+               # update global round number to force refreshes across all nodes
+               fbsync.round = global_round
+               fbsync.flush()
+               session.flush()
  
         return 0
  
@@ -416,6 +163,7 @@ if __name__ == '__main__':
                                                 pcuid=None,
                                                 pcuselect=None,
                                                 site=None,
+                                               sitelist=None,
                                                 dbname="findbadpcus", 
                                                 cachenodes=False,
                                                 cachecalls=True,
@@ -425,6 +173,8 @@ if __name__ == '__main__':
                                                 help="Provide the input file for the node list")
         parser.add_option("", "--site", dest="site", metavar="FILE", 
                                                 help="Get all pcus associated with the given site's nodes")
+       parser.add_option("", "--sitelist", dest="sitelist", metavar="FILE", 
+                                               help="Get all pcus associated with the given site's nodes")
         parser.add_option("", "--pcuselect", dest="pcuselect", metavar="FILE", 
                                                 help="Query string to apply to the findbad pcus")
         parser.add_option("", "--pcuid", dest="pcuid", metavar="id", 
@@ -455,6 +205,8 @@ if __name__ == '__main__':
                 time.sleep(1)
         except Exception, err:
                 traceback.print_exc()
+               from monitor.common import email_exception
+               email_exception()
                 print "Exception: %s" % err
                 print "Saving data... exitting."
                 sys.exit(0)