# Author: Alina Quereilhac <alina.quereilhac@inria.fr>
from nepi.execution.attribute import Attribute, Flags
-from nepi.execution.resource import ResourceManager, clsinit, ResourceState
+from nepi.execution.resource import ResourceManager, clsinit, ResourceState, \
+ reschedule_delay
from nepi.resources.linux import rpmfuncs, debfuncs
from nepi.util import sshfuncs, execfuncs
from nepi.util.sshfuncs import ProcStatus
# TODO: Unify delays!!
# TODO: Validate outcome of uploads!!
-reschedule_delay = "0.5s"
class ExitCode:
"""
ERROR = -3
OK = 0
+class OSType:
+ """
+ Supported flavors of Linux OS
+ """
+ FEDORA_8 = "f8"
+ FEDORA_12 = "f12"
+ FEDORA_14 = "f14"
+ FEDORA = "fedora"
+ UBUNTU = "ubuntu"
+ DEBIAN = "debian"
+
@clsinit
class LinuxNode(ResourceManager):
+ """
+ .. class:: Class Args :
+
+ :param ec: The Experiment controller
+ :type ec: ExperimentController
+ :param guid: guid of the RM
+ :type guid: int
+
+ .. note::
+
+ There are different ways in which commands can be executed using the
+ LinuxNode interface (i.e. 'execute' - blocking and non blocking, 'run',
+ 'run_and_wait').
+
+ Brief explanation:
+
+ * 'execute' (blocking mode) :
+
+ HOW IT WORKS: 'execute', forks a process and run the
+ command, synchronously, attached to the terminal, in
+ foreground.
+ The execute method will block until the command returns
+ the result on 'out', 'err' (so until it finishes executing).
+
+ USAGE: short-lived commands that must be executed attached
+ to a terminal and in foreground, for which it IS necessary
+ to block until the command has finished (e.g. if you want
+ to run 'ls' or 'cat').
+
+ * 'execute' (NON blocking mode - blocking = False) :
+
+ HOW IT WORKS: Same as before, except that execute method
+ will return immediately (even if command still running).
+
+ USAGE: long-lived commands that must be executed attached
+ to a terminal and in foreground, but for which it is not
+ necessary to block until the command has finished. (e.g.
+ start an application using X11 forwarding)
+
+ * 'run' :
+
+ HOW IT WORKS: Connects to the host ( using SSH if remote)
+ and launches the command in background, detached from any
+ terminal (daemonized), and returns. The command continues to
+ run remotely, but since it is detached from the terminal,
+ its pipes (stdin, stdout, stderr) can't be redirected to the
+ console (as normal non detached processes would), and so they
+ are explicitly redirected to files. The pidfile is created as
+ part of the process of launching the command. The pidfile
+ holds the pid and ppid of the process forked in background,
+ so later on it is possible to check whether the command is still
+ running.
+
+ USAGE: long-lived commands that can run detached in background,
+ for which it is NOT necessary to block (wait) until the command
+ has finished. (e.g. start an application that is not using X11
+ forwarding. It can run detached and remotely in background)
+
+ * 'run_and_wait' :
+
+ HOW IT WORKS: Similar to 'run' except that it 'blocks' until
+ the command has finished execution. It also checks whether
+ errors occurred during runtime by reading the exitcode file,
+ which contains the exit code of the command that was run
+ (checking stderr only is not always reliable since many
+ commands throw debugging info to stderr and the only way to
+ automatically know whether an error really happened is to
+ check the process exit code).
+
+ Another difference with respect to 'run', is that instead
+ of directly executing the command as a bash command line,
+ it uploads the command to a bash script and runs the script.
+ This allows to use the bash script to debug errors, since
+ it remains at the remote host and can be run manually to
+ reproduce the error.
+
+ USAGE: medium-lived commands that can run detached in
+ background, for which it IS necessary to block (wait) until
+ the command has finished. (e.g. Package installation,
+ source compilation, file download, etc)
+
+ """
_rtype = "LinuxNode"
@classmethod
self.error(msg, out, err)
raise RuntimeError, "%s - %s - %s" %( msg, out, err )
- if out.find("Fedora release 12") == 0:
- self._os = "f12"
+ if out.find("Fedora release 8") == 0:
+ self._os = OSType.FEDORA_8
+ elif out.find("Fedora release 12") == 0:
+ self._os = OSType.FEDORA_12
elif out.find("Fedora release 14") == 0:
- self._os = "f14"
+ self._os = OSType.FEDORA_14
elif out.find("Debian") == 0:
- self._os = "debian"
+ self._os = OSType.DEBIAN
elif out.find("Ubuntu") ==0:
- self._os = "ubuntu"
+ self._os = OSType.UBUNTU
else:
msg = "Unsupported OS"
self.error(msg, out)
return self._os
+ @property
+ def use_deb(self):
+ return self.os in [OSType.DEBIAN, OSType.UBUNTU]
+
+ @property
+ def use_rpm(self):
+ return self.os in [OSType.FEDORA_12, OSType.FEDORA_14, OSType.FEDORA_8,
+ OSType.FEDORA]
+
@property
def localhost(self):
return self.get("hostname") in ['localhost', '127.0.0.7', '::1']
def deploy(self):
if self.state == ResourceState.NEW:
try:
- self.discover()
- self.provision()
+ self.discover()
+ self.provision()
except:
self._state = ResourceState.FAILED
raise
if not self.localhost:
# Build destination as <user>@<server>:<path>
dst = "%s@%s:%s" % (self.get("username"), self.get("hostname"), dst)
-
result = self.copy(src, dst)
# clean up temp file
def install_packages(self, packages, home):
command = ""
- if self.os in ["f12", "f14"]:
+ if self.use_rpm:
command = rpmfuncs.install_packages_command(self.os, packages)
- elif self.os in ["debian", "ubuntu"]:
+ elif self.use_deb:
command = debfuncs.install_packages_command(self.os, packages)
else:
msg = "Error installing packages ( OS not known ) "
def remove_packages(self, packages, home):
command = ""
- if self.os in ["f12", "f14"]:
+ if self.use_rpm:
command = rpmfuncs.remove_packages_command(self.os, packages)
- elif self.os in ["debian", "ubuntu"]:
+ elif self.use_deb:
command = debfuncs.remove_packages_command(self.os, packages)
else:
msg = "Error removing packages ( OS not known ) "
def run_and_wait(self, command, home,
shfile = "cmd.sh",
+ env = None,
pidfile = "pidfile",
ecodefile = "exitcode",
stdin = None,
sudo = False,
tty = False,
raise_on_error = False):
- """
- runs a command in background on the remote host, busy-waiting
- until the command finishes execution.
- This is more robust than doing a simple synchronized 'execute',
- since in the remote host the command can continue to run detached
- even if network disconnections occur
"""
- self.upload_command(command, home, shfile, ecodefile)
+ Uploads the 'command' to a bash script in the host.
+ Then runs the script detached in background in the host, and
+ busy-waites until the script finishes executing.
+ """
+ self.upload_command(command, home,
+ shfile = shfile,
+ ecodefile = ecodefile,
+ env = env)
command = "bash ./%s" % shfile
# run command in background in remote host
tty = tty)
# check no errors occurred
- if proc.poll() and err:
+ if proc.poll():
msg = " Failed to run command '%s' " % command
self.error(msg, out, err)
if raise_on_error:
# wait until command finishes to execute
self.wait_run(pid, ppid)
- (out, err), proc = self.check_errors(home, ecodefile, stderr)
+ (out, err), proc = self.check_errors(home,
+ ecodefile = ecodefile,
+ stdout = stdout,
+ stderr= stderr)
# Out is what was written in the stderr file
- if out or err:
+ if err:
msg = " Failed to run command '%s' " % command
self.error(msg, out, err)
shfile = "cmd.sh",
ecodefile = "exitcode",
env = None):
+ """ Saves the command as a bash script file in the remote host, and
+ forces to save the exit code of the command execution to the ecodefile
+ """
- command = "{ ( %(command)s ) ; } ; echo $? > %(ecodefile)s " % {
+ if not (command.strip().endswith(";") or command.strip().endswith("&")):
+ command += ";"
+
+ # The exit code of the command will be stored in ecodefile
+ command = " { %(command)s } ; echo $? > %(ecodefile)s ;" % {
'command': command,
'ecodefile': ecodefile,
}
# Export environment
- environ = ""
- if env:
- for var in env.split(" "):
- environ += 'export %s\n' % var
+ environ = self.format_environment(env)
+ # Add environ to command
command = environ + command
dst = os.path.join(home, shfile)
return self.upload(command, dst, text = True)
+ def format_environment(self, env, inline = False):
+ """Format environmental variables for command to be executed either
+ as an inline command
+ (i.e. export PYTHONPATH=src/..; export LALAL= ..;python script.py) or
+ as a bash script (i.e. export PYTHONPATH=src/.. \n export LALA=.. \n)
+ """
+ if not env: return ""
+
+ # Remove extra white spaces
+ env = re.sub(r'\s+', ' ', env.strip())
+
+ sep = ";" if inline else "\n"
+ return sep.join(map(lambda e: " export %s" % e, env.split(" "))) + sep
+
def check_errors(self, home,
ecodefile = "exitcode",
+ stdout = "stdout",
stderr = "stderr"):
"""
Checks whether errors occurred while running a command.
It first checks the exit code for the command, and only if the
exit code is an error one it returns the error output.
+
"""
- out = err = ""
proc = None
+ err = ""
+ # retrive standard output from the file
+ (out, oerr), oproc = self.check_output(home, stdout)
- # get Exit code
+ # get exit code saved in the 'exitcode' file
ecode = self.exitcode(home, ecodefile)
if ecode in [ ExitCode.CORRUPTFILE, ExitCode.ERROR ]:
elif ecode > 0 or ecode == ExitCode.FILENOTFOUND:
# The process returned an error code or didn't exist.
# Check standard error.
- (out, err), proc = self.check_output(home, stderr)
+ (err, eerr), proc = self.check_output(home, stderr)
+
+ # If the stderr file was not found, assume nothing bad happened,
+ # and just ignore the error.
+ # (cat returns 1 for error "No such file or directory")
+ if ecode == ExitCode.FILENOTFOUND and proc.poll() == 1:
+ err = ""
- # If the stderr file was not found, assume nothing happened.
- # We just ignore the error.
- if ecode == ExitCode.FILENOTFOUND and proc.poll() == 1: # cat - No such file or directory
- err = ""
-
return (out, err), proc
def wait_pid(self, home, pidfile = "pidfile", raise_on_error = False):
connect_timeout = 30,
strict_host_checking = False,
persistent = True,
+ blocking = True,
with_lock = False
):
""" Notice that this invocation will block until the
err_on_timeout = err_on_timeout,
connect_timeout = connect_timeout,
persistent = persistent,
+ blocking = blocking,
strict_host_checking = strict_host_checking
)
else:
retry = retry,
err_on_timeout = err_on_timeout,
connect_timeout = connect_timeout,
- persistent = persistent
+ persistent = persistent,
+ blocking = blocking,
+ strict_host_checking = strict_host_checking
)
return (out, err), proc