#
# Author: Alina Quereilhac <alina.quereilhac@inria.fr>
-from nepi.execution.attribute import Attribute, Flags
-from nepi.execution.resource import ResourceManager, clsinit, ResourceState, \
- reschedule_delay
+from nepi.execution.attribute import Attribute, Flags, Types
+from nepi.execution.resource import ResourceManager, clsinit_copy, \
+ ResourceState, reschedule_delay
from nepi.resources.linux import rpmfuncs, debfuncs
from nepi.util import sshfuncs, execfuncs
from nepi.util.sshfuncs import ProcStatus
UBUNTU = "ubuntu"
DEBIAN = "debian"
-@clsinit
+@clsinit_copy
class LinuxNode(ResourceManager):
"""
.. class:: Class Args :
"""
_rtype = "LinuxNode"
+ _help = "Controls Linux host machines ( either localhost or a host " \
+ "that can be accessed using a SSH key)"
+ _backend_type = "linux"
@classmethod
def _register_attributes(cls):
hostname = Attribute("hostname", "Hostname of the machine",
- flags = Flags.ExecReadOnly)
+ flags = Flags.Design)
username = Attribute("username", "Local account username",
flags = Flags.Credential)
- port = Attribute("port", "SSH port", flags = Flags.ExecReadOnly)
+ port = Attribute("port", "SSH port", flags = Flags.Design)
home = Attribute("home",
"Experiment home directory to store all experiment related files",
- flags = Flags.ExecReadOnly)
+ flags = Flags.Design)
identity = Attribute("identity", "SSH identity file",
flags = Flags.Credential)
server_key = Attribute("serverKey", "Server public key",
- flags = Flags.ExecReadOnly)
+ flags = Flags.Design)
clean_home = Attribute("cleanHome", "Remove all nepi files and directories "
" from node home folder before starting experiment",
- flags = Flags.ExecReadOnly)
+ type = Types.Bool,
+ default = False,
+ flags = Flags.Design)
clean_experiment = Attribute("cleanExperiment", "Remove all files and directories "
" from a previous same experiment, before the new experiment starts",
- flags = Flags.ExecReadOnly)
+ type = Types.Bool,
+ default = False,
+ flags = Flags.Design)
clean_processes = Attribute("cleanProcesses",
"Kill all running processes before starting experiment",
- flags = Flags.ExecReadOnly)
+ type = Types.Bool,
+ default = False,
+ flags = Flags.Design)
tear_down = Attribute("tearDown", "Bash script to be executed before " + \
"releasing the resource",
- flags = Flags.ExecReadOnly)
+ flags = Flags.Design)
+
+ gateway_user = Attribute("gatewayUser", "Gateway account username",
+ flags = Flags.Design)
+
+ gateway = Attribute("gateway", "Hostname of the gateway machine",
+ flags = Flags.Design)
cls._register_attribute(hostname)
cls._register_attribute(username)
cls._register_attribute(clean_experiment)
cls._register_attribute(clean_processes)
cls._register_attribute(tear_down)
+ cls._register_attribute(gateway_user)
+ cls._register_attribute(gateway)
def __init__(self, ec, guid):
super(LinuxNode, self).__init__(ec, guid)
self._os = None
# home directory at Linux host
self._home_dir = ""
-
+
# lock to prevent concurrent applications on the same node,
# to execute commands at the same time. There are potential
# concurrency issues when using SSH to a same host from
if self._os:
return self._os
- if (not self.get("hostname") or not self.get("username")):
+ if self.get("hostname") not in ["localhost", "127.0.0.1"] and \
+ not self.get("username"):
msg = "Can't resolve OS, insufficient data "
self.error(msg)
raise RuntimeError, msg
self._os = OSType.FEDORA_12
elif out.find("Fedora release 14") == 0:
self._os = OSType.FEDORA_14
+ elif out.find("Fedora release") == 0:
+ self._os = OSType.FEDORA
elif out.find("Debian") == 0:
self._os = OSType.DEBIAN
elif out.find("Ubuntu") ==0:
# To work arround this, repeat the operation N times or
# until the result is not empty string
out = ""
- retrydelay = 1.0
- for i in xrange(10):
- try:
- (out, err), proc = self.execute("cat /etc/issue",
- retry = 5,
- with_lock = True,
- blocking = True)
-
- if out.strip() != "":
- return out
- except:
- trace = traceback.format_exc()
- msg = "Error detecting OS: %s " % trace
- self.error(msg, out, err)
- return False
-
- time.sleep(min(30.0, retrydelay))
- retrydelay *= 1.5
-
+ try:
+ (out, err), proc = self.execute("cat /etc/issue",
+ with_lock = True,
+ blocking = True)
+ except:
+ trace = traceback.format_exc()
+ msg = "Error detecting OS: %s " % trace
+ self.error(msg, out, err)
+
+ return out
@property
def use_deb(self):
def localhost(self):
return self.get("hostname") in ['localhost', '127.0.0.7', '::1']
- def provision(self):
+ def do_provision(self):
# check if host is alive
if not self.is_alive():
- self.fail()
-
msg = "Deploy failed. Unresponsive node %s" % self.get("hostname")
self.error(msg)
raise RuntimeError, msg
# Create experiment node home directory
self.mkdir(self.node_home)
- super(LinuxNode, self).provision()
+ super(LinuxNode, self).do_provision()
- def deploy(self):
+ def do_deploy(self):
if self.state == ResourceState.NEW:
- try:
- self.discover()
- self.provision()
- except:
- self.fail()
- raise
+ self.info("Deploying node")
+ self.do_discover()
+ self.do_provision()
# Node needs to wait until all associated interfaces are
# ready before it can finalize deployment
from nepi.resources.linux.interface import LinuxInterface
- ifaces = self.get_connected(LinuxInterface.rtype())
+ ifaces = self.get_connected(LinuxInterface.get_rtype())
for iface in ifaces:
if iface.state < ResourceState.READY:
self.ec.schedule(reschedule_delay, self.deploy)
return
- super(LinuxNode, self).deploy()
+ super(LinuxNode, self).do_deploy()
- def release(self):
- # Node needs to wait until all associated RMs are released
- # to be released
+ def do_release(self):
rms = self.get_connected()
for rm in rms:
- if rm.state < ResourceState.STOPPED:
+ # Node needs to wait until all associated RMs are released
+ # before it can be released
+ if rm.state != ResourceState.RELEASED:
self.ec.schedule(reschedule_delay, self.release)
return
self.clean_processes()
- super(LinuxNode, self).release()
+ super(LinuxNode, self).do_release()
def valid_connection(self, guid):
# TODO: Validate!
return True
- def clean_processes(self, killer = False):
+ def clean_processes(self):
self.info("Cleaning up processes")
+
+ if self.get("hostname") in ["localhost", "127.0.0.2"]:
+ return
- if killer:
- # Hardcore kill
- cmd = ("sudo -S killall python tcpdump || /bin/true ; " +
- "sudo -S killall python tcpdump || /bin/true ; " +
- "sudo -S kill $(ps -N -T -o pid --no-heading | grep -v $PPID | sort) || /bin/true ; " +
- "sudo -S killall -u root || /bin/true ; " +
- "sudo -S killall -u root || /bin/true ; ")
- else:
- # Be gentler...
+ if self.get("username") != 'root':
cmd = ("sudo -S killall tcpdump || /bin/true ; " +
- "sudo -S killall tcpdump || /bin/true ; " +
- "sudo -S killall -u %s || /bin/true ; " % self.get("username") +
+ "sudo -S kill $(ps aux | grep '[n]epi' | awk '{print $2}') || /bin/true ; " +
"sudo -S killall -u %s || /bin/true ; " % self.get("username"))
+ else:
+ if self.state >= ResourceState.READY:
+ import pickle
+ pids = pickle.load(open("/tmp/save.proc", "rb"))
+ pids_temp = dict()
+ ps_aux = "ps aux |awk '{print $2,$11}'"
+ (out, err), proc = self.execute(ps_aux)
+ for line in out.strip().split("\n"):
+ parts = line.strip().split(" ")
+ pids_temp[parts[0]] = parts[1]
+ kill_pids = set(pids_temp.items()) - set(pids.items())
+ kill_pids = ' '.join(dict(kill_pids).keys())
+
+ cmd = ("killall tcpdump || /bin/true ; " +
+ "kill $(ps aux | grep '[n]epi' | awk '{print $2}') || /bin/true ; " +
+ "kill %s || /bin/true ; " % kill_pids)
+ else:
+ cmd = ("killall tcpdump || /bin/true ; " +
+ "kill $(ps aux | grep '[n]epi' | awk '{print $2}') || /bin/true ; ")
+
+ (out, err), proc = self.execute(cmd, retry = 1, with_lock = True)
- out = err = ""
- (out, err), proc = self.execute(cmd, retry = 1, with_lock = True)
-
def clean_home(self):
""" Cleans all NEPI related folders in the Linux host
"""
def execute(self, command,
sudo = False,
- stdin = None,
env = None,
tty = False,
forward_x11 = False,
- timeout = None,
retry = 3,
- err_on_timeout = True,
connect_timeout = 30,
strict_host_checking = False,
persistent = True,
if self.localhost:
(out, err), proc = execfuncs.lexec(command,
- user = user,
+ user = self.get("username"), # still problem with localhost
sudo = sudo,
- stdin = stdin,
env = env)
else:
if with_lock:
+ # If the execute command is blocking, we don't want to keep
+ # the node lock. This lock is used to avoid race conditions
+ # when creating the ControlMaster sockets. A more elegant
+ # solution is needed.
with self._node_lock:
(out, err), proc = sshfuncs.rexec(
command,
host = self.get("hostname"),
user = self.get("username"),
port = self.get("port"),
+ gwuser = self.get("gatewayUser"),
+ gw = self.get("gateway"),
agent = True,
sudo = sudo,
- stdin = stdin,
identity = self.get("identity"),
server_key = self.get("serverKey"),
env = env,
tty = tty,
forward_x11 = forward_x11,
- timeout = timeout,
retry = retry,
- err_on_timeout = err_on_timeout,
connect_timeout = connect_timeout,
persistent = persistent,
blocking = blocking,
host = self.get("hostname"),
user = self.get("username"),
port = self.get("port"),
+ gwuser = self.get("gatewayUser"),
+ gw = self.get("gateway"),
agent = True,
sudo = sudo,
- stdin = stdin,
identity = self.get("identity"),
server_key = self.get("serverKey"),
env = env,
tty = tty,
forward_x11 = forward_x11,
- timeout = timeout,
retry = retry,
- err_on_timeout = err_on_timeout,
connect_timeout = connect_timeout,
persistent = persistent,
blocking = blocking,
self.debug("Running command '%s'" % command)
if self.localhost:
- (out, err), proc = execfuncs.lspawn(command, pidfile,
- stdout = stdout,
- stderr = stderr,
- stdin = stdin,
+ (out, err), proc = execfuncs.lspawn(command, pidfile,
home = home,
create_home = create_home,
- sudo = sudo,
- user = user)
+ stdin = stdin or '/dev/null',
+ stdout = stdout or '/dev/null',
+ stderr = stderr or '/dev/null',
+ sudo = sudo)
else:
with self._node_lock:
(out, err), proc = sshfuncs.rspawn(
pidfile = pidfile,
home = home,
create_home = create_home,
- stdin = stdin if stdin is not None else '/dev/null',
- stdout = stdout if stdout else '/dev/null',
- stderr = stderr if stderr else '/dev/null',
+ stdin = stdin or '/dev/null',
+ stdout = stdout or '/dev/null',
+ stderr = stderr or '/dev/null',
sudo = sudo,
host = self.get("hostname"),
user = self.get("username"),
port = self.get("port"),
+ gwuser = self.get("gatewayUser"),
+ gw = self.get("gateway"),
agent = True,
identity = self.get("identity"),
server_key = self.get("serverKey"),
host = self.get("hostname"),
user = self.get("username"),
port = self.get("port"),
+ gwuser = self.get("gatewayUser"),
+ gw = self.get("gateway"),
agent = True,
identity = self.get("identity"),
server_key = self.get("serverKey")
host = self.get("hostname"),
user = self.get("username"),
port = self.get("port"),
+ gwuser = self.get("gatewayUser"),
+ gw = self.get("gateway"),
agent = True,
identity = self.get("identity"),
server_key = self.get("serverKey")
host = self.get("hostname"),
user = self.get("username"),
port = self.get("port"),
+ gwuser = self.get("gatewayUser"),
+ gw = self.get("gateway"),
agent = True,
sudo = sudo,
identity = self.get("identity"),
def copy(self, src, dst):
if self.localhost:
- (out, err), proc = execfuncs.lcopy(source, dest,
- recursive = True,
- strict_host_checking = False)
+ (out, err), proc = execfuncs.lcopy(src, dst,
+ recursive = True)
else:
with self._node_lock:
(out, err), proc = sshfuncs.rcopy(
src, dst,
port = self.get("port"),
+ gwuser = self.get("gatewayUser"),
+ gw = self.get("gateway"),
identity = self.get("identity"),
server_key = self.get("serverKey"),
recursive = True,
return (out, err), proc
-
- def upload(self, src, dst, text = False, overwrite = True):
+ def upload(self, src, dst, text = False, overwrite = True,
+ raise_on_error = True):
""" Copy content to destination
- src content to copy. Can be a local file, directory or a list of files
+ src string with the content to copy. Can be:
+ - plain text
+ - a string with the path to a local file
+ - a string with a semi-colon separeted list of local files
+ - a string with a local directory
- dst destination path on the remote host (remote is always self.host)
+ dst string with destination path on the remote host (remote is
+ always self.host)
- text src is text input, it must be stored into a temp file before uploading
+ text src is text input, it must be stored into a temp file before
+ uploading
"""
# If source is a string input
f = None
src = f.name
# If dst files should not be overwritten, check that the files do not
- # exits already
+ # exits already
+ if isinstance(src, str):
+ src = map(str.strip, src.split(";"))
+
if overwrite == False:
src = self.filter_existing_files(src, dst)
if not src:
- return ("", ""), None
+ return ("", ""), None
if not self.localhost:
# Build destination as <user>@<server>:<path>
dst = "%s@%s:%s" % (self.get("username"), self.get("hostname"), dst)
- result = self.copy(src, dst)
+ ((out, err), proc) = self.copy(src, dst)
# clean up temp file
if f:
os.remove(f.name)
- return result
+ if err:
+ msg = " Failed to upload files - src: %s dst: %s" % (";".join(src), dst)
+ self.error(msg, out, err)
+
+ if raise_on_error:
+ raise RuntimeError, msg
+
+ return ((out, err), proc)
- def download(self, src, dst):
+ def download(self, src, dst, raise_on_error = True):
if not self.localhost:
# Build destination as <user>@<server>:<path>
src = "%s@%s:%s" % (self.get("username"), self.get("hostname"), src)
- return self.copy(src, dst)
+
+ ((out, err), proc) = self.copy(src, dst)
+
+ if err:
+ msg = " Failed to download files - src: %s dst: %s" % (";".join(src), dst)
+ self.error(msg, out, err)
+
+ if raise_on_error:
+ raise RuntimeError, msg
+
+ return ((out, err), proc)
def install_packages_command(self, packages):
command = ""
return command
- def install_packages(self, packages, home, run_home = None):
+ def install_packages(self, packages, home, run_home = None,
+ raise_on_error = True):
""" Install packages in the Linux host.
'home' is the directory to upload the package installation script.
stdout = "instpkg_stdout",
stderr = "instpkg_stderr",
overwrite = False,
- raise_on_error = True)
+ raise_on_error = raise_on_error)
return (out, err), proc
- def remove_packages(self, packages, home, run_home = None):
+ def remove_packages(self, packages, home, run_home = None,
+ raise_on_error = True):
""" Uninstall packages from the Linux host.
'home' is the directory to upload the package un-installation script.
stdout = "rmpkg_stdout",
stderr = "rmpkg_stderr",
overwrite = False,
- raise_on_error = True)
+ raise_on_error = raise_on_error)
return (out, err), proc
stderr = "stderr",
sudo = False,
tty = False,
- raise_on_error = False):
+ raise_on_error = True):
"""
Uploads the 'command' to a bash script in the host.
Then runs the script detached in background in the host, and
pid = ppid = None
delay = 1.0
- for i in xrange(4):
+ for i in xrange(2):
pidtuple = self.getpid(home = home, pidfile = pidfile)
if pidtuple:
return True
out = err = ""
+ msg = "Unresponsive host. Wrong answer. "
+
# The underlying SSH layer will sometimes return an empty
# output (even if the command was executed without errors).
# To work arround this, repeat the operation N times or
# until the result is not empty string
- retrydelay = 1.0
- for i in xrange(10):
- try:
- (out, err), proc = self.execute("echo 'ALIVE'",
- retry = 5,
- blocking = True,
- with_lock = True)
-
- if out.find("ALIVE") > -1:
- return True
- except:
- trace = traceback.format_exc()
- msg = "Unresponsive host. Error reaching host: %s " % trace
- self.error(msg, out, err)
- return False
+ try:
+ (out, err), proc = self.execute("echo 'ALIVE'",
+ blocking = True,
+ with_lock = True)
+
+ if out.find("ALIVE") > -1:
+ return True
+ except:
+ trace = traceback.format_exc()
+ msg = "Unresponsive host. Error reaching host: %s " % trace
- time.sleep(min(30.0, retrydelay))
- retrydelay *= 1.5
-
- if out.find("ALIVE") > -1:
- return True
- else:
- msg = "Unresponsive host. Wrong answer. "
- self.error(msg, out, err)
- return False
+ self.error(msg, out, err)
+ return False
def find_home(self):
""" Retrieves host home directory
# output (even if the command was executed without errors).
# To work arround this, repeat the operation N times or
# until the result is not empty string
- retrydelay = 1.0
- for i in xrange(10):
- try:
- (out, err), proc = self.execute("echo ${HOME}",
- retry = 5,
- blocking = True,
- with_lock = True)
-
- if out.strip() != "":
- self._home_dir = out.strip()
- break
- except:
- trace = traceback.format_exc()
- msg = "Impossible to retrieve HOME directory" % trace
- self.error(msg, out, err)
- return False
-
- time.sleep(min(30.0, retrydelay))
- retrydelay *= 1.5
+ msg = "Impossible to retrieve HOME directory"
+ try:
+ (out, err), proc = self.execute("echo ${HOME}",
+ blocking = True,
+ with_lock = True)
+
+ if out.strip() != "":
+ self._home_dir = out.strip()
+ except:
+ trace = traceback.format_exc()
+ msg = "Impossible to retrieve HOME directory %s" % trace
if not self._home_dir:
- msg = "Impossible to retrieve HOME directory"
- self.error(msg, out, err)
+ self.error(msg)
raise RuntimeError, msg
def filter_existing_files(self, src, dst):
""" Removes files that already exist in the Linux host from src list
"""
# construct a dictionary with { dst: src }
- dests = dict(map(lambda x: ( os.path.join(dst, os.path.basename(x) ), x ),
- src.strip().split(" ") ) ) if src.strip().find(" ") != -1 else dict({dst: src})
+ dests = dict(map(
+ lambda s: (os.path.join(dst, os.path.basename(s)), s ), s)) \
+ if len(src) > 1 else dict({dst: src[0]})
command = []
for d in dests.keys():
del dests[d]
if not dests:
- return ""
+ return []
- return " ".join(dests.values())
+ return dests.values()