omf plugin: triggers trigger script, but does not communicate with it - so, no hanging
[nodemanager.git] / plugins / omf_resctl.py
index a0f14ab..7b9b1eb 100644 (file)
@@ -13,7 +13,8 @@ import subprocess
 import tools
 import logger
 
-priority = 50
+# we need this to run after sliverauth
+priority = 150
 
 def start():
     pass
@@ -22,12 +23,16 @@ def start():
 # hard-wire this for now
 # once the variables are expanded, this is expected to go into
 config_ple_template="""---
-# Example:
-# _slicename_ = nicta_ruby
-# _hostname_ = planetlab1.research.nicta.com.au
-# _xmpp_server_ = xmpp.planet-lab.eu
-:uid: _slicename_@_hostname_
+# we extract expires time here, even in a comment so that the
+# trigger script gets called whenever this changes
+# expires: _expires_
+
+# these are not actual OMF parameters, they are only used by the trigger script
+:hostname: _hostname_
+:slicename: _slicename_
+
+# OMF configuration
+:uid: _slicename_%_hostname_
 :uri: xmpp://_slicename_-_hostname_-<%= "#{Process.pid}" %>:_slicename_-_hostname_-<%= "#{Process.pid}" %>@_xmpp_server_
 :environment: production
 :debug: false
@@ -84,11 +89,13 @@ def GetSlivers(data, conf = None, plc = None):
         # skip non OMF-friendly slices
         if not is_omf_friendly (sliver): continue
         slicename=sliver['name']
+        expires=str(sliver['expires'])
         yaml_template = config_ple_template
         yaml_contents = yaml_template\
             .replace('_xmpp_server_',xmpp_server)\
             .replace('_slicename_',slicename)\
-            .replace('_hostname_',hostname)
+            .replace('_hostname_',hostname)\
+            .replace('_expires_',expires)
         yaml_full_path="/vservers/%s/%s"%(slicename,yaml_slice_path)
         yaml_full_dir=os.path.dirname(yaml_full_path)
         if not os.path.isdir(yaml_full_dir):
@@ -106,17 +113,20 @@ def GetSlivers(data, conf = None, plc = None):
             try:
                 fetch_trigger_script_if_missing (slicename)
                 # the trigger script actually needs to be run in the slice context of course
-                slice_command = ["sudo -i %s"%omf_rc_trigger_script]
+                # in addition there is a requirement to pretend we run as a login shell
+                # hence sudo -i
+                slice_command = [ "sudo", "-i",  omf_rc_trigger_script ]
                 to_run = tools.command_in_slice (slicename, slice_command)
-                logger.log("command_in_slice: %s"%to_run)
-                sp=subprocess.Popen(to_run, stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
-                (out,err)=sp.communicate()
-                retcod=sp.returncode
-                # we don't wait for that, try to display a retcod for info purpose only
-                # might be None if that config script lasts or hangs whatever
-                logger.log("omf_resctl: %s: called OMF rc control script (imm. retcod=%r)"%(slicename,retcod))
-                logger.log("omf_resctl: got stdout\n%s"%out)
-                logger.log("omf_resctl: got stderr\n%s"%err)
+                log_filename = "/vservers/%s/var/log/%s.log"%(slicename,omf_rc_trigger_script)
+                logger.log("omf_resctl: starting %s"%to_run)
+                logger.log("redirected into %s"%log_filename)
+                logger.log("*not* waiting for completion..")
+                with open(log_filename,"a") as log_file:
+                    subprocess.Popen(to_run, stdout=log_file,stderr=subprocess.STDOUT)
+                # a first version tried to 'communicate' on that subprocess instance
+                # but that tended to create deadlocks in some cases
+                # causing nodemanager to stall...
+                # we're only losing the child's retcod, no big deal
             except:
                 import traceback
                 traceback.print_exc()