Merge branch 'master' of ssh://git.planet-lab.org/git/plstackapi
[plstackapi.git] / planetstack / openstack / observer.py
index 6fcb3b4..73bb114 100644 (file)
@@ -1,11 +1,15 @@
 import time
 import traceback
+import commands
+import threading
+
 from datetime import datetime
 from collections import defaultdict
 from core.models import *
 from django.db.models import F, Q
 from openstack.manager import OpenStackManager
-from util.logger import Logger, logging
+from util.logger import Logger, logging, logger
+#from timeout import timeout
 
 
 logger = Logger(logfile='observer.log', level=logging.INFO)
@@ -14,21 +18,98 @@ class OpenStackObserver:
     
     def __init__(self):
         self.manager = OpenStackManager()
+        # The Condition object that gets signalled by Feefie events
+        self.event_cond = threading.Condition()
+
+    def wait_for_event(self, timeout):
+        self.event_cond.acquire()
+        self.event_cond.wait(timeout)
+        self.event_cond.release()
+        
+    def wake_up(self):
+        logger.info('Wake up routine called. Event cond %r'%self.event_cond)
+        self.event_cond.acquire()
+        self.event_cond.notify()
+        self.event_cond.release()
 
     def run(self):
         if not self.manager.enabled or not self.manager.has_openstack:
             return
         while True:
             try:
+                logger.info('Observer run loop')
                 #self.sync_roles()
-                self.sync_tenants()
-                self.sync_users()
-                self.sync_user_tenant_roles()
-                self.sync_slivers()
-                self.sync_sliver_ips()
-                time.sleep(7)
+
+                logger.info('Calling sync tenants')
+                try:
+                    self.sync_tenants()
+                except:
+                    logger.log_exc("Exception in sync_tenants")
+                    traceback.print_exc()
+
+                logger.info('Calling sync users')
+                try:
+                    self.sync_users()
+                except:
+                    logger.log_exc("Exception in sync_users")
+                    traceback.print_exc()
+
+                logger.info('Calling sync tenant roles')
+                try:
+                    self.sync_user_tenant_roles()
+                except:
+                    logger.log_exc("Exception in sync_users")
+                    traceback.print_exc()
+
+                logger.info('Calling sync slivers')
+                try:
+                    self.sync_slivers()
+                except:
+                    logger.log_exc("Exception in sync slivers")
+                    traceback.print_exc()
+
+                logger.info('Calling sync sliver ips')
+                try:
+                    self.sync_sliver_ips()
+                except:
+                    logger.log_exc("Exception in sync_sliver_ips")
+                    traceback.print_exc()
+
+                logger.info('Calling sync networks')
+                try:
+                    self.sync_networks()
+                except:
+                    logger.log_exc("Exception in sync_networks")
+                    traceback.print_exc()
+
+                logger.info('Calling sync network slivers')
+                try:
+                    self.sync_network_slivers()
+                except:
+                    logger.log_exc("Exception in sync_network_slivers")
+                    traceback.print_exc()
+
+                logger.info('Calling sync external routes')
+                try:
+                    self.sync_external_routes()
+                except:
+                     logger.log_exc("Exception in sync_external_routes")
+                     traceback.print_exc()
+
+                logger.info('Waiting for event')
+                tBeforeWait = time.time()
+                self.wait_for_event(timeout=300)
+
+                # Enforce 5 minutes between wakeups
+                tSleep = 300 - (time.time() - tBeforeWait)
+                if tSleep > 0:
+                    logger.info('Sleeping for %d seconds' % tSleep)
+                    time.sleep(tSleep)
+
+                logger.info('Observer woken up')
             except:
-                traceback.print_exc() 
+                logger.log_exc("Exception in observer run loop")
+                traceback.print_exc()
 
     def sync_roles(self):
         """
@@ -231,7 +312,7 @@ class OpenStackObserver:
                     # update manager context
                     self.manager.init_caller(sliver.creator, sliver.slice.name)
                     self.manager.save_sliver(sliver)
-                    logger.info("saved sliver: %s %s" % (sliver))
+                    logger.info("saved sliver: %s" % (sliver))
                 except:
                     logger.log_exc("save sliver failed: %s" % sliver) 
 
@@ -243,7 +324,7 @@ class OpenStackObserver:
             sliver_dict[sliver.instance_id] = sliver
 
         # delete sliver that don't have a sliver record
-        ctx = self.manager.driver.shell.nova_db.ctx 
+        ctx = self.manager.driver.shell.nova_db.ctx
         instances = self.manager.driver.shell.nova_db.instance_get_all(ctx)
         for instance in instances:
             if instance.uuid not in sliver_dict:
@@ -263,7 +344,7 @@ class OpenStackObserver:
         for sliver in slivers:
             # update connection
             self.manager.init_admin(tenant=sliver.slice.name)
-            servers = self.manager.client.nova.servers.findall(id=sliver.instance_id)
+            servers = self.manager.driver.shell.nova.servers.findall(id=sliver.instance_id)
             if not servers:
                 continue
             server = servers[0]
@@ -273,3 +354,106 @@ class OpenStackObserver:
             sliver.ip = ips[0]['addr']
             sliver.save()
             logger.info("saved sliver ip: %s %s" % (sliver, ips[0]))
+
+    def sync_external_routes(self):
+        routes = self.manager.driver.get_external_routes() 
+        subnets = self.manager.driver.shell.quantum.list_subnets()['subnets']
+        for subnet in subnets:
+            try:
+                self.manager.driver.add_external_route(subnet, routes)
+            except:
+                logger.log_exc("failed to add external route for subnet %s" % subnet)
+
+    def sync_network_slivers(self):
+        networkSlivers = NetworkSliver.objects.all()
+        networkSlivers_by_id = {}
+        networkSlivers_by_port = {}
+        for networkSliver in networkSlivers:
+            networkSlivers_by_id[networkSliver.id] = networkSliver
+            networkSlivers_by_port[networkSliver.port_id] = networkSliver
+
+        networks = Network.objects.all()
+        networks_by_id = {}
+        for network in networks:
+            networks_by_id[network.network_id] = network
+
+        slivers = Sliver.objects.all()
+        slivers_by_instance_id = {}
+        for sliver in slivers:
+            slivers_by_instance_id[sliver.instance_id] = sliver
+
+        ports = self.manager.driver.shell.quantum.list_ports()["ports"]
+        for port in ports:
+            if port["id"] in networkSlivers_by_port:
+                # we already have it
+                print "already accounted for port", port["id"]
+                continue
+
+            if port["device_owner"] != "compute:nova":
+                # we only want the ports that connect to instances
+                continue
+
+            network = networks_by_id.get(port['network_id'], None)
+            if not network:
+                #print "no network for port", port["id"], "network", port["network_id"]
+                continue
+
+            sliver = slivers_by_instance_id.get(port['device_id'], None)
+            if not sliver:
+                print "no sliver for port", port["id"], "device_id", port['device_id']
+                continue
+
+            if network.template.sharedNetworkId is not None:
+                # If it's a shared network template, then more than one network
+                # object maps to the quantum network. We have to do a whole bunch
+                # of extra work to find the right one.
+                networks = network.template.network_set.all()
+                network = None
+                for candidate_network in networks:
+                    if (candidate_network.owner == sliver.slice):
+                        print "found network", candidate_network
+                        network = candidate_network
+
+                if not network:
+                    print "failed to find the correct network for a shared template for port", port["id"], "network", port["network_id"]
+                    continue
+
+            if not port["fixed_ips"]:
+                print "port", port["id"], "has no fixed_ips"
+                continue
+
+#            print "XXX", port
+
+            ns = NetworkSliver(network=network,
+                               sliver=sliver,
+                               ip=port["fixed_ips"][0]["ip_address"],
+                               port_id=port["id"])
+            ns.save()
+
+    def sync_networks(self):
+        """
+        save all networks where enacted < updated or enacted == None. Remove networks that
+        no don't exist in openstack db if they have an enacted time (enacted != None).
+        """
+        # get all users that need to be synced (enacted < updated or enacted is None)
+        pending_networks = Network.objects.filter(Q(enacted__lt=F('updated')) | Q(enacted=None))
+        for network in pending_networks:
+            if network.owner and network.owner.creator:
+                try:
+                    # update manager context
+                    self.manager.init_caller(network.owner.creator, network.owner.name)
+                    self.manager.save_network(network)
+                    logger.info("saved network: %s" % (network))
+                except:
+                    logger.log_exc("save network failed: %s" % network)
+
+        # get all networks where enacted != null. We can assume these users
+        # have previously been synced and need to be checed for deletion.
+        networks = Network.objects.filter(enacted__isnull=False)
+        network_dict = {}
+        for network in networks:
+            network_dict[network.network_id] = network
+
+        # TODO: delete Network objects if quantum network doesn't exist
+        #       (need to write self.manager.driver.shell.quantum_db)
+