Various bugfies to the main Observer loop
[plstackapi.git] / planetstack / openstack_observer / event_loop.py
index 90f99ad..51be5d3 100644 (file)
@@ -2,15 +2,20 @@ import os
 import imp
 import inspect
 import time
+import sys
 import traceback
 import commands
 import threading
 import json
+import pdb
+import pprint
+
 
 from datetime import datetime
 from collections import defaultdict
 from core.models import *
 from django.db.models import F, Q
+from django.db import connection
 #from openstack.manager import OpenStackManager
 from openstack.driver import OpenStackDriver
 from util.logger import Logger, logging, logger
@@ -20,9 +25,21 @@ from observer.steps import *
 from syncstep import SyncStep
 from toposort import toposort
 from observer.error_mapper import *
+from openstack_observer.openstacksyncstep import OpenStackSyncStep
+
 
 debug_mode = False
 
+class bcolors:
+    HEADER = '\033[95m'
+    OKBLUE = '\033[94m'
+    OKGREEN = '\033[92m'
+    WARNING = '\033[93m'
+    FAIL = '\033[91m'
+    ENDC = '\033[0m'
+    BOLD = '\033[1m'
+    UNDERLINE = '\033[4m'
+
 logger = Logger(level=logging.INFO)
 
 class StepNotReady(Exception):
@@ -31,11 +48,27 @@ class StepNotReady(Exception):
 class NoOpDriver:
        def __init__(self):
                 self.enabled = True
+                self.dependency_graph = None
+
+STEP_STATUS_WORKING=1
+STEP_STATUS_OK=2
+STEP_STATUS_KO=3
+
+def invert_graph(g):
+       ig = {}
+       for k,v in g.items():
+               for v0 in v:
+                       try:
+                               ig[v0].append(k)
+                       except:
+                               ig=[k]
+       return ig
 
 class PlanetStackObserver:
-       #sync_steps = [SyncNetworks,SyncNetworkSlivers,SyncSites,SyncSitePrivileges,SyncSlices,SyncSliceMemberships,SyncSlivers,SyncSliverIps,SyncExternalRoutes,SyncUsers,SyncRoles,SyncNodes,SyncImages,GarbageCollector]
+       #sync_steps = [SyncNetworks,SyncNetworkSlivers,SyncSites,SyncSitePrivilege,SyncSlices,SyncSliceMemberships,SyncSlivers,SyncSliverIps,SyncExternalRoutes,SyncUsers,SyncRoles,SyncNodes,SyncImages,GarbageCollector]
        sync_steps = []
 
+       
        def __init__(self):
                # The Condition object that gets signalled by Feefie events
                self.step_lookup = {}
@@ -78,7 +111,7 @@ class PlanetStackObserver:
                                        # provides field (this eliminates the abstract base classes
                                        # since they don't have a provides)
 
-                                       if inspect.isclass(c) and issubclass(c, SyncStep) and hasattr(c,"provides") and (c not in self.sync_steps):
+                                       if inspect.isclass(c) and (issubclass(c, SyncStep) or issubclass(c,OpenStackSyncStep)) and hasattr(c,"provides") and (c not in self.sync_steps):
                                                self.sync_steps.append(c)
                logger.info('loaded sync steps: %s' % ",".join([x.__name__ for x in self.sync_steps]))
                # print 'loaded sync steps: %s' % ",".join([x.__name__ for x in self.sync_steps])
@@ -89,6 +122,12 @@ class PlanetStackObserver:
                try:
                        # This contains dependencies between records, not sync steps
                        self.model_dependency_graph = json.loads(open(dep_path).read())
+                       for lst in self.model_dependency_graph.values():
+                               for k in lst:
+                                       try:
+                                               deps = self.model_dependency_graph[k]
+                                       except:
+                                               self.model_dependency_graph[k] = []
                except Exception,e:
                        raise e
 
@@ -111,11 +150,13 @@ class PlanetStackObserver:
                                except KeyError:
                                        provides_dict[m.__name__]=[s.__name__]
 
-                               
                step_graph = {}
                for k,v in self.model_dependency_graph.iteritems():
                        try:
                                for source in provides_dict[k]:
+                                       if (not v):
+                                               step_graph[source] = []
+               
                                        for m in v:
                                                try:
                                                        for dest in provides_dict[m]:
@@ -155,10 +196,17 @@ class PlanetStackObserver:
                                        pass
                                        # no dependencies, pass
 
-               dependency_graph = step_graph
+               self.dependency_graph = step_graph
+               self.deletion_dependency_graph = invert_graph(step_graph)
+
+               pp = pprint.PrettyPrinter(indent=4)
+               pp.pprint(step_graph)
+               self.ordered_steps = toposort(self.dependency_graph, map(lambda s:s.__name__,self.sync_steps))
+               #self.ordered_steps = ['SyncRoles', 'SyncControllerSites', 'SyncControllerSitePrivileges','SyncImages', 'SyncControllerImages','SyncControllerUsers','SyncControllerUserSitePrivileges','SyncControllerSlices', 'SyncControllerSlicePrivileges', 'SyncControllerUserSlicePrivileges', 'SyncControllerNetworks','SyncSlivers']
+               #self.ordered_steps = ['SyncControllerSites']
 
-               self.ordered_steps = toposort(dependency_graph, map(lambda s:s.__name__,self.sync_steps))
                print "Order of steps=",self.ordered_steps
+
                self.load_run_times()
                
 
@@ -205,7 +253,6 @@ class PlanetStackObserver:
                                self.last_deletion_run_times[e]=0
 
 
-
        def save_run_times(self):
                run_times = json.dumps(self.last_run_times)
                open('/tmp/observer_run_times','w').write(run_times)
@@ -221,16 +268,157 @@ class PlanetStackObserver:
                        if (failed_step in step.dependencies):
                                raise StepNotReady
 
+       def sync(self, S, deletion):
+            try:
+               step = self.step_lookup[S]
+               start_time=time.time()
+
+                logger.info("Starting to work on step %s" % step.__name__)
+               
+               dependency_graph = self.dependency_graph if not deletion else self.deletion_dependency_graph
+
+               # Wait for step dependencies to be met
+               try:
+                       deps = self.dependency_graph[S]
+                       has_deps = True
+               except KeyError:
+                       has_deps = False
+
+               go = True
+
+                failed_dep = None
+               if (has_deps):
+                       for d in deps:
+                                if d==step.__name__:
+                                    logger.info("   step %s self-wait skipped" % step.__name__)
+                                   go = True
+                                    continue
+
+                               cond = self.step_conditions[d]
+                               cond.acquire()
+                               if (self.step_status[d] is STEP_STATUS_WORKING):
+                                        logger.info("  step %s wait on dep %s" % (step.__name__, d))
+                                       cond.wait()
+                               elif self.step_status[d] == STEP_STATUS_OK:
+                                       go = True
+                               else:
+                                       go = False
+                                       failed_dep = d
+                               cond.release()
+                               if (not go):
+                                       break
+               else:
+                       go = True
+
+               if (not go):
+                        print bcolors.FAIL + "Step %r skipped on %r" % (step,failed_dep) + bcolors.ENDC
+                        # SMBAKER: sync_step was not defined here, so I changed
+                        #    this from 'sync_step' to 'step'. Verify.
+                       self.failed_steps.append(step)
+                       my_status = STEP_STATUS_KO
+               else:
+                       sync_step = step(driver=self.driver,error_map=self.error_mapper)
+                       sync_step.__name__ = step.__name__
+                       sync_step.dependencies = []
+                       try:
+                               mlist = sync_step.provides
+
+                               for m in mlist:
+                                       sync_step.dependencies.extend(self.model_dependency_graph[m.__name__])
+                       except KeyError:
+                               pass
+                       sync_step.debug_mode = debug_mode
+
+                       should_run = False
+                       try:
+                               # Various checks that decide whether
+                               # this step runs or not
+                               self.check_class_dependency(sync_step, self.failed_steps) # dont run Slices if Sites failed
+                               self.check_schedule(sync_step, deletion) # dont run sync_network_routes if time since last run < 1 hour
+                               should_run = True
+                       except StepNotReady:
+                               logger.info('Step not ready: %s'%sync_step.__name__)
+                               self.failed_steps.append(sync_step)
+                               my_status = STEP_STATUS_KO
+                       except Exception,e:
+                               logger.error('%r' % e)
+                               logger.log_exc("sync step failed: %r. Deletion: %r"%(sync_step,deletion))
+                               self.failed_steps.append(sync_step)
+                               my_status = STEP_STATUS_KO
+
+                       if (should_run):
+                               try:
+                                       duration=time.time() - start_time
+
+                                       logger.info('Executing step %s' % sync_step.__name__)
+
+                                       print bcolors.OKBLUE + "Executing step %s" % sync_step.__name__ + bcolors.ENDC
+                                       failed_objects = sync_step(failed=list(self.failed_step_objects), deletion=deletion)
+
+                                       self.check_duration(sync_step, duration)
+
+                                       if failed_objects:
+                                               self.failed_step_objects.update(failed_objects)
+
+                                        logger.info("Step %r succeeded" % step)
+                                        print bcolors.OKGREEN + "Step %r succeeded" % step + bcolors.ENDC
+                                       my_status = STEP_STATUS_OK
+                                       self.update_run_time(sync_step,deletion)
+                               except Exception,e:
+                                       print bcolors.FAIL + "Model step %r failed" % (step) + bcolors.ENDC
+                                       logger.error('Model step %r failed. This seems like a misconfiguration or bug: %r. This error will not be relayed to the user!' % (step, e))
+                                       logger.log_exc(e)
+                                       self.failed_steps.append(S)
+                                       my_status = STEP_STATUS_KO
+                       else:
+                                logger.info("Step %r succeeded due to non-run" % step)
+                               my_status = STEP_STATUS_OK
+
+               try:
+                       my_cond = self.step_conditions[S]
+                       my_cond.acquire()
+                       self.step_status[S]=my_status
+                       my_cond.notify_all()
+                       my_cond.release()
+               except KeyError,e:
+                       logger.info('Step %r is a leaf' % step)
+                       pass
+            finally:
+                connection.close()
+
        def run(self):
                if not self.driver.enabled:
                        return
+
                if (self.driver_kind=="openstack") and (not self.driver.has_openstack):
                        return
 
                while True:
                        try:
                                error_map_file = getattr(Config(), "error_map_path", "/opt/planetstack/error_map.txt")
-                               error_mapper = ErrorMapper(error_map_file)
+                               self.error_mapper = ErrorMapper(error_map_file)
+
+                               # Set of whole steps that failed
+                               self.failed_steps = []
+
+                               # Set of individual objects within steps that failed
+                               self.failed_step_objects = set()
+
+                               # Set up conditions and step status
+                               # This is needed for steps to run in parallel
+                               # while obeying dependencies.
+
+                               providers = set()
+                               for v in self.dependency_graph.values():
+                                       if (v):
+                                               providers.update(v)
+
+                               self.step_conditions = {}
+                               self.step_status = {}
+                               for p in list(providers):
+                                       self.step_conditions[p] = threading.Condition()
+                                       self.step_status[p] = STEP_STATUS_WORKING
+
 
                                logger.info('Waiting for event')
                                tBeforeWait = time.time()
@@ -239,69 +427,26 @@ class PlanetStackObserver:
 
                                # Two passes. One for sync, the other for deletion.
                                for deletion in [False,True]:
+                                       threads = []
                                        logger.info('Deletion=%r...'%deletion)
-                                       # Set of whole steps that failed
-                                       failed_steps = []
+                                       schedule = self.ordered_steps if not deletion else reversed(self.ordered_steps)
 
-                                       # Set of individual objects within steps that failed
-                                       failed_step_objects = set()
+                                       for S in schedule:
+                                               thread = threading.Thread(target=self.sync, args=(S, deletion))
 
-                                       ordered_steps = self.ordered_steps if not deletion else reversed(self.ordered_steps)
+                                               logger.info('Deletion=%r...'%deletion)
+                                               threads.append(thread)
 
-                                       for S in ordered_steps:
-                                               step = self.step_lookup[S]
-                                               start_time=time.time()
-                                               
-                                               sync_step = step(driver=self.driver,error_map=error_mapper)
-                                               sync_step.__name__ = step.__name__
-                                               sync_step.dependencies = []
-                                               try:
-                                                       mlist = sync_step.provides
-                                                       
-                                                       for m in mlist:
-                                                               sync_step.dependencies.extend(self.model_dependency_graph[m.__name__])
-                                               except KeyError:
-                                                       pass
-                                               sync_step.debug_mode = debug_mode
+                                       # Start threads 
+                                       for t in threads:
+                                               t.start()
+
+                                       # Wait for all threads to finish before continuing with the run loop
+                                       for t in threads:
+                                               t.join()
 
-                                               should_run = False
-                                               try:
-                                                       # Various checks that decide whether
-                                                       # this step runs or not
-                                                       self.check_class_dependency(sync_step, failed_steps) # dont run Slices if Sites failed
-                                                       self.check_schedule(sync_step, deletion) # dont run sync_network_routes if time since last run < 1 hour
-                                                       should_run = True
-                                               except StepNotReady:
-                                                       logging.info('Step not ready: %s'%sync_step.__name__)
-                                                       failed_steps.append(sync_step)
-                                               except Exception,e:
-                                                       logging.error('%r',e)
-                                                       logger.log_exc("sync step failed: %r. Deletion: %r"%(sync_step,deletion))
-                                                       failed_steps.append(sync_step)
-
-                                               if (should_run):
-                                                       try:
-                                                               duration=time.time() - start_time
-
-                                                               logger.info('Executing step %s' % sync_step.__name__)
-
-                                                               # ********* This is the actual sync step
-                                                               #import pdb
-                                                               #pdb.set_trace()
-                                                               failed_objects = sync_step(failed=list(failed_step_objects), deletion=deletion)
-
-
-                                                               self.check_duration(sync_step, duration)
-                                                               if failed_objects:
-                                                                       failed_step_objects.update(failed_objects)
-
-                                                               self.update_run_time(sync_step,deletion)
-                                                       except Exception,e:
-                                                               logging.error('Model step failed. This seems like a misconfiguration or bug: %r. This error will not be relayed to the user!',e)
-                                                               logger.log_exc(e)
-                                                               failed_steps.append(S)
                                self.save_run_times()
                        except Exception, e:
-                               logging.error('Core error. This seems like a misconfiguration or bug: %r. This error will not be relayed to the user!',e)
+                               logger.error('Core error. This seems like a misconfiguration or bug: %r. This error will not be relayed to the user!' % e)
                                logger.log_exc("Exception in observer run loop")
                                traceback.print_exc()