reguire gnupg1 on f>=31; sense the system to use gpg1 when installed
[nodemanager.git] / plugins / omf_resctl.py
index 0498a42..d893687 100644 (file)
@@ -13,21 +13,26 @@ import subprocess
 import tools
 import logger
 
-priority = 50
+# we need this to run after sliverauth
+priority = 150
 
 def start():
     pass
 
-### the new template for v6
+### the new template for OMF v6
 # hard-wire this for now
 # once the variables are expanded, this is expected to go into
 config_ple_template="""---
-# Example:
-# _slicename_ = nicta_ruby
-# _hostname_ = planetlab1.research.nicta.com.au
-# _xmpp_server_ = xmpp.planet-lab.eu
-:uid: _slicename_@_hostname_
+# we extract expires time here, even in a comment so that the
+# trigger script gets called whenever this changes
+# expires: _expires_
+
+# these are not actual OMF parameters, they are only used by the trigger script
+:hostname: _hostname_
+:slicename: _slicename_
+
+# OMF configuration
+:uid: _slicename_%_hostname_
 :uri: xmpp://_slicename_-_hostname_-<%= "#{Process.pid}" %>:_slicename_-_hostname_-<%= "#{Process.pid}" %>@_xmpp_server_
 :environment: production
 :debug: false
@@ -41,44 +46,28 @@ config_ple_template="""---
 # the path where the config is expected from within the sliver
 yaml_slice_path="/etc/omf_rc/config.yml"
 # the path for the script that we call when a change occurs
-omf_rc_trigger_script="/some/path/to/the/change/script"
-
-### this returns the kind of virtualization on the node
-# either 'vs' or 'lxc'
-# also caches it in /etc/planetlab/virt for next calls
-# could be promoted to core nm if need be
-virt_stamp="/etc/planetlab/virt"
-def get_node_virt ():
-    try:
-        return file(virt_stamp).read().strip()
-    except:
-        pass
-    logger.log("Computing virt..")
-    vs_retcod=subprocess.call ([ 'vserver', '--help' ])
-    if vs_retcod == 0:
-        virt='vs'
-    else:
-        virt='lxc'
-    with file(virt_stamp,"w") as f:
-        f.write(virt)
-    return virt
-
-def command_in_slice (slicename, argv):
-    # with vserver this can be done using vserver .. exec <trigger-script>
-    # with lxc this is less clear as we are still discussing how lxcsu should behave
-    # ideally we'd need to run lxcsu .. <trigger-script>
-    virt=get_node_virt()
-    if virt=='vs':
-        return [ 'vserver', slicename, 'exec', ] + argv
-    elif virt=='lxc':
-        return [ 'lxcsu', slicename, ] + argv
-    logger.log("command_in_slice: WARNING: could not find a valid virt")
-    return argv
+# given that we're now responsible for fetching this one, I have to
+# decide on an actual path - not jsut a name to search for in PATH
+omf_rc_trigger_script="/usr/bin/plc_trigger_omf_rc"
+omf_rc_trigger_log="/var/log/plc_trigger_omf_rc.log"
+
+# hopefully temporary: when trigger script is missing, fetch it at the url here
+omf_rc_trigger_url="http://git.mytestbed.net/?p=omf.git;a=blob_plain;f=omf_rc/bin/plc_trigger_omf_rc;hb=HEAD"
+def fetch_trigger_script_if_missing (slicename):
+    full_path="/vservers/%s/%s"%(slicename, omf_rc_trigger_script)
+    if not os.path.isfile (full_path):
+        retcod=subprocess.call (['curl', '--silent', '-o', full_path, omf_rc_trigger_url])
+        if retcod!=0:
+            logger.log("Could not fetch %s"%omf_rc_trigger_url)
+        else:
+            subprocess.call(['chmod', '+x', full_path])
+            logger.log("omf_resctl: fetched %s"%(full_path))
+            logger.log("omf_resctl: from %s"%(omf_rc_trigger_url))
 
 def GetSlivers(data, conf = None, plc = None):
     logger.log("omf_resctl.GetSlivers")
     if 'accounts' not in data:
-        logger.log_missing_data("omf_resctl.GetSlivers",'accounts')
+        logger.log_missing_data("omf_resctl.GetSlivers", 'accounts')
         return
 
     try:
@@ -101,19 +90,21 @@ def GetSlivers(data, conf = None, plc = None):
         # skip non OMF-friendly slices
         if not is_omf_friendly (sliver): continue
         slicename=sliver['name']
+        expires=str(sliver['expires'])
         yaml_template = config_ple_template
         yaml_contents = yaml_template\
-            .replace('_xmpp_server_',xmpp_server)\
-            .replace('_slicename_',slicename)\
-            .replace('_hostname_',hostname)
-        yaml_full_path="/vservers/%s/%s"%(slicename,yaml_slice_path)
+            .replace('_xmpp_server_', xmpp_server)\
+            .replace('_slicename_', slicename)\
+            .replace('_hostname_', hostname)\
+            .replace('_expires_', expires)
+        yaml_full_path="/vservers/%s/%s"%(slicename, yaml_slice_path)
         yaml_full_dir=os.path.dirname(yaml_full_path)
         if not os.path.isdir(yaml_full_dir):
             try: os.makedirs(yaml_full_dir)
             except OSError: pass
 
-        config_changes=tools.replace_file_with_string(yaml_full_path,yaml_contents)
-        logger.log("yaml_contents length=%d, config_changes=%r"%(len(yaml_contents),config_changes))
+        config_changes=tools.replace_file_with_string(yaml_full_path, yaml_contents)
+        logger.log("yaml_contents length=%d, config_changes=%r"%(len(yaml_contents), config_changes))
         # would make sense to also check for changes to authorized_keys 
         # would require saving a copy of that some place for comparison
         # xxx todo
@@ -121,19 +112,22 @@ def GetSlivers(data, conf = None, plc = None):
         if config_changes or keys_changes:
             # instead of restarting the service we call a companion script
             try:
+                fetch_trigger_script_if_missing (slicename)
                 # the trigger script actually needs to be run in the slice context of course
-                # xxx we might need to use
-                # slice_command=['bash','-l','-c',omf_rc_trigger_script] 
-                slice_command = [omf_rc_trigger_script]
-                to_run = command_in_slice (slicename, slice_command)
-                sp=subprocess.Popen(to_run, stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
-                (out,err)=sp.communicate()
-                retcod=sp.returncode
-                # we don't wait for that, try to display a retcod for info purpose only
-                # might be None if that config script lasts or hangs whatever
-                logger.log("omf_resctl: %s: called OMF rc control script (imm. retcod=%r)"%(slicename,retcod))
-                logger.log("omf_resctl: got stdout\n%s"%out)
-                logger.log("omf_resctl: got stderr\n%s"%err)
+                # in addition there is a requirement to pretend we run as a login shell
+                # hence sudo -i
+                slice_command = [ "sudo", "-i",  omf_rc_trigger_script ]
+                to_run = tools.command_in_slice (slicename, slice_command)
+                log_filename = "/vservers/%s/%s"%(slicename, omf_rc_trigger_log)
+                logger.log("omf_resctl: starting %s"%to_run)
+                logger.log("redirected into %s"%log_filename)
+                logger.log("*not* waiting for completion..")
+                with open(log_filename, "a") as log_file:
+                    subprocess.Popen(to_run, stdout=log_file, stderr=subprocess.STDOUT)
+                # a first version tried to 'communicate' on that subprocess instance
+                # but that tended to create deadlocks in some cases
+                # causing nodemanager to stall...
+                # we're only losing the child's retcod, no big deal
             except:
                 import traceback
                 traceback.print_exc()