added a cleanProcessesAfter attribute to the linux::Node class, which defaults to...
[nepi.git] / src / nepi / resources / linux / node.py
index fce5e6d..b0d63c9 100644 (file)
@@ -3,9 +3,8 @@
 #    Copyright (C) 2013 INRIA
 #
 #    This program is free software: you can redistribute it and/or modify
-#    it under the terms of the GNU General Public License as published by
-#    the Free Software Foundation, either version 3 of the License, or
-#    (at your option) any later version.
+#    it under the terms of the GNU General Public License version 2 as
+#    published by the Free Software Foundation;
 #
 #    This program is distributed in the hope that it will be useful,
 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -19,7 +18,7 @@
 
 from nepi.execution.attribute import Attribute, Flags, Types
 from nepi.execution.resource import ResourceManager, clsinit_copy, \
-        ResourceState, reschedule_delay
+        ResourceState
 from nepi.resources.linux import rpmfuncs, debfuncs 
 from nepi.util import sshfuncs, execfuncs
 from nepi.util.sshfuncs import ProcStatus
@@ -50,12 +49,12 @@ class OSType:
     """
     Supported flavors of Linux OS
     """
-    FEDORA_8 = "f8"
-    FEDORA_12 = "f12"
-    FEDORA_14 = "f14"
-    FEDORA = "fedora"
-    UBUNTU = "ubuntu"
-    DEBIAN = "debian"
+    DEBIAN = 1 
+    UBUNTU = 1 << 1 
+    FEDORA = 1 << 2
+    FEDORA_8 = 1 << 3 | FEDORA 
+    FEDORA_12 = 1 << 4 | FEDORA 
+    FEDORA_14 = 1 << 5 | FEDORA 
 
 @clsinit_copy
 class LinuxNode(ResourceManager):
@@ -141,10 +140,10 @@ class LinuxNode(ResourceManager):
                     source compilation, file download, etc)
 
     """
-    _rtype = "LinuxNode"
+    _rtype = "linux::Node"
     _help = "Controls Linux host machines ( either localhost or a host " \
             "that can be accessed using a SSH key)"
-    _backend_type = "linux"
+    _platform = "linux"
 
     @classmethod
     def _register_attributes(cls):
@@ -184,6 +183,13 @@ class LinuxNode(ResourceManager):
                 default = False,
                 flags = Flags.Design)
         
+        clean_processes_after = Attribute("cleanProcessesAfter", 
+                """Kill all running processes after starting experiment
+This might be dangerous when using user root""",
+                type = Types.Bool,
+                default = True,
+                flags = Flags.Design)
+        
         tear_down = Attribute("tearDown", "Bash script to be executed before " + \
                 "releasing the resource",
                 flags = Flags.Design)
@@ -194,6 +200,10 @@ class LinuxNode(ResourceManager):
         gateway = Attribute("gateway", "Hostname of the gateway machine",
                 flags = Flags.Design)
 
+        ip = Attribute("ip", "Linux host public IP address. "
+                   "Must not be modified by the user unless hostname is 'localhost'",
+                    flags = Flags.Design)
+
         cls._register_attribute(hostname)
         cls._register_attribute(username)
         cls._register_attribute(port)
@@ -203,9 +213,11 @@ class LinuxNode(ResourceManager):
         cls._register_attribute(clean_home)
         cls._register_attribute(clean_experiment)
         cls._register_attribute(clean_processes)
+        cls._register_attribute(clean_processes_after)
         cls._register_attribute(tear_down)
         cls._register_attribute(gateway_user)
         cls._register_attribute(gateway)
+        cls._register_attribute(ip)
 
     def __init__(self, ec, guid):
         super(LinuxNode, self).__init__(ec, guid)
@@ -285,18 +297,18 @@ class LinuxNode(ResourceManager):
 
         out = self.get_os()
 
-        if out.find("Fedora release 8") == 0:
-            self._os = OSType.FEDORA_8
-        elif out.find("Fedora release 12") == 0:
-            self._os = OSType.FEDORA_12
-        elif out.find("Fedora release 14") == 0:
-            self._os = OSType.FEDORA_14
-        elif out.find("Fedora release") == 0:
-            self._os = OSType.FEDORA
-        elif out.find("Debian") == 0: 
+        if out.find("Debian") == 0: 
             self._os = OSType.DEBIAN
         elif out.find("Ubuntu") ==0:
             self._os = OSType.UBUNTU
+        elif out.find("Fedora release") == 0:
+            self._os = OSType.FEDORA
+            if out.find("Fedora release 8") == 0:
+                self._os = OSType.FEDORA_8
+            elif out.find("Fedora release 12") == 0:
+                self._os = OSType.FEDORA_12
+            elif out.find("Fedora release 14") == 0:
+                self._os = OSType.FEDORA_14
         else:
             msg = "Unsupported OS"
             self.error(msg, out)
@@ -323,12 +335,11 @@ class LinuxNode(ResourceManager):
 
     @property
     def use_deb(self):
-        return self.os in [OSType.DEBIAN, OSType.UBUNTU]
+        return (self.os & (OSType.DEBIAN|OSType.UBUNTU))
 
     @property
     def use_rpm(self):
-        return self.os in [OSType.FEDORA_12, OSType.FEDORA_14, OSType.FEDORA_8,
-                OSType.FEDORA]
+        return (self.os & OSType.FEDORA)
 
     @property
     def localhost(self):
@@ -361,6 +372,16 @@ class LinuxNode(ResourceManager):
 
         self.mkdir(paths)
 
+        # Get Public IP address if possible
+        if not self.get("ip"):
+            try:
+                ip = sshfuncs.gethostbyname(self.get("hostname"))
+                self.set("ip", ip)
+            except:
+                if self.get("gateway") is None:
+                    msg = "Local DNS can not resolve hostname %s" % self.get("hostname") 
+                    self.error(msg)
+
         super(LinuxNode, self).do_provision()
 
     def do_deploy(self):
@@ -375,7 +396,7 @@ class LinuxNode(ResourceManager):
         ifaces = self.get_connected(LinuxInterface.get_rtype())
         for iface in ifaces:
             if iface.state < ResourceState.READY:
-                self.ec.schedule(reschedule_delay, self.deploy)
+                self.ec.schedule(self.reschedule_delay, self.deploy)
                 return 
 
         super(LinuxNode, self).do_deploy()
@@ -386,14 +407,15 @@ class LinuxNode(ResourceManager):
             # Node needs to wait until all associated RMs are released
             # before it can be released
             if rm.state != ResourceState.RELEASED:
-                self.ec.schedule(reschedule_delay, self.release)
+                self.ec.schedule(self.reschedule_delay, self.release)
                 return 
 
         tear_down = self.get("tearDown")
         if tear_down:
             self.execute(tear_down)
 
-        self.clean_processes()
+        if self.get("cleanProcessesAfter"):
+            self.clean_processes()
 
         super(LinuxNode, self).do_release()
 
@@ -409,6 +431,7 @@ class LinuxNode(ResourceManager):
         
         if self.get("username") != 'root':
             cmd = ("sudo -S killall tcpdump || /bin/true ; " +
+                "sudo -S kill -9 $(ps aux | grep '[.]nepi' | awk '{print $2}') || /bin/true ; " +
                 "sudo -S killall -u %s || /bin/true ; " % self.get("username"))
         else:
             if self.state >= ResourceState.READY:
@@ -425,14 +448,14 @@ class LinuxNode(ResourceManager):
                     kill_pids = ' '.join(dict(kill_pids).keys())
 
                     cmd = ("killall tcpdump || /bin/true ; " +
-                        "kill $(ps aux | grep '[n]epi' | awk '{print $2}') || /bin/true ; " +
+                        "kill $(ps aux | grep '[.]nepi' | awk '{print $2}') || /bin/true ; " +
                         "kill %s || /bin/true ; " % kill_pids)
                 else:
                     cmd = ("killall tcpdump || /bin/true ; " +
-                        "kill $(ps aux | grep '[n]epi' | awk '{print $2}') || /bin/true ; ")
+                        "kill $(ps aux | grep '[.]nepi' | awk '{print $2}') || /bin/true ; ")
             else:
                 cmd = ("killall tcpdump || /bin/true ; " +
-                    "kill $(ps aux | grep '[n]epi' | awk '{print $2}') || /bin/true ; ")
+                    "kill $(ps aux | grep '[.]nepi' | awk '{print $2}') || /bin/true ; ")
 
         (out, err), proc = self.execute(cmd, retry = 1, with_lock = True)
 
@@ -538,7 +561,8 @@ class LinuxNode(ResourceManager):
             stdout = 'stdout', 
             stderr = 'stderr', 
             sudo = False,
-            tty = False):
+            tty = False,
+            strict_host_checking = False):
         
         self.debug("Running command '%s'" % command)
         
@@ -569,7 +593,8 @@ class LinuxNode(ResourceManager):
                     agent = True,
                     identity = self.get("identity"),
                     server_key = self.get("serverKey"),
-                    tty = tty
+                    tty = tty,
+                    strict_host_checking = strict_host_checking
                     )
 
         return (out, err), proc
@@ -588,7 +613,8 @@ class LinuxNode(ResourceManager):
                     gw = self.get("gateway"),
                     agent = True,
                     identity = self.get("identity"),
-                    server_key = self.get("serverKey")
+                    server_key = self.get("serverKey"),
+                    strict_host_checking = False
                     )
         
         return pidtuple
@@ -607,7 +633,8 @@ class LinuxNode(ResourceManager):
                         gw = self.get("gateway"),
                         agent = True,
                         identity = self.get("identity"),
-                        server_key = self.get("serverKey")
+                        server_key = self.get("serverKey"),
+                        strict_host_checking = False
                         )
            
         return status
@@ -632,7 +659,8 @@ class LinuxNode(ResourceManager):
                         agent = True,
                         sudo = sudo,
                         identity = self.get("identity"),
-                        server_key = self.get("serverKey")
+                        server_key = self.get("serverKey"),
+                        strict_host_checking = False
                         )
 
         return (out, err), proc
@@ -818,17 +846,18 @@ class LinuxNode(ResourceManager):
         return self.execute(cmd, with_lock = True)
         
     def run_and_wait(self, command, home, 
-            shfile = "cmd.sh",
-            env = None,
-            overwrite = True,
-            pidfile = "pidfile", 
-            ecodefile = "exitcode", 
-            stdin = None, 
-            stdout = "stdout", 
-            stderr = "stderr", 
-            sudo = False,
-            tty = False,
-            raise_on_error = True):
+            shfile="cmd.sh",
+            env=None,
+            overwrite=True,
+            wait_run=True,
+            pidfile="pidfile", 
+            ecodefile="exitcode", 
+            stdin=None, 
+            stdout="stdout", 
+            stderr="stderr", 
+            sudo=False,
+            tty=False,
+            raise_on_error=True):
         """
         Uploads the 'command' to a bash script in the host.
         Then runs the script detached in background in the host, and
@@ -867,25 +896,26 @@ class LinuxNode(ResourceManager):
                 pidfile = pidfile, 
                 raise_on_error = raise_on_error)
 
-        # wait until command finishes to execute
-        self.wait_run(pid, ppid)
-      
-        (eout, err), proc = self.check_errors(home,
-            ecodefile = ecodefile,
-            stderr = stderr)
+        if wait_run:
+            # wait until command finishes to execute
+            self.wait_run(pid, ppid)
+          
+            (eout, err), proc = self.check_errors(home,
+                ecodefile = ecodefile,
+                stderr = stderr)
 
-        # Out is what was written in the stderr file
-        if err:
-            msg = " Failed to run command '%s' " % command
-            self.error(msg, eout, err)
+            # Out is what was written in the stderr file
+            if err:
+                msg = " Failed to run command '%s' " % command
+                self.error(msg, eout, err)
 
-            if raise_on_error:
-                raise RuntimeError, msg
+                if raise_on_error:
+                    raise RuntimeError, msg
 
         (out, oerr), proc = self.check_output(home, stdout)
         
         return (out, err), proc
-
+        
     def exitcode(self, home, ecodefile = "exitcode"):
         """
         Get the exit code of an application.
@@ -909,10 +939,10 @@ class LinuxNode(ResourceManager):
         return ExitCode.ERROR
 
     def upload_command(self, command, 
-            shfile = "cmd.sh",
-            ecodefile = "exitcode",
-            overwrite = True,
-            env = None):
+            shfile="cmd.sh",
+            ecodefile="exitcode",
+            overwrite=True,
+            env=None):
         """ Saves the command as a bash script file in the remote host, and
         forces to save the exit code of the command execution to the ecodefile
         """
@@ -932,9 +962,9 @@ class LinuxNode(ResourceManager):
         # Add environ to command
         command = environ + command
 
-        return self.upload(command, shfile, text = True, overwrite = overwrite)
+        return self.upload(command, shfile, text=True, overwrite=overwrite)
 
-    def format_environment(self, env, inline = False):
+    def format_environment(self, env, inline=False):
         """ Formats the environment variables for a command to be executed
         either as an inline command
         (i.e. export PYTHONPATH=src/..; export LALAL= ..;python script.py) or