2 # NEPI, a framework to manage network experiments
3 # Copyright (C) 2013 INRIA
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License version 2 as
7 # published by the Free Software Foundation;
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 # Author: Alina Quereilhac <alina.quereilhac@inria.fr>
19 from nepi.execution.attribute import Attribute, Flags, Types
20 from nepi.execution.resource import (ResourceManager, clsinit_copy,
22 from nepi.resources.linux import rpmfuncs, debfuncs
23 from nepi.util import sshfuncs, execfuncs
24 from nepi.util.sshfuncs import ProcStatus
38 # TODO: Unify delays!!
39 # TODO: Validate outcome of uploads!!
43 Error codes that the rexitcode function can return if unable to
44 check the exit code of a spawned process
53 Supported flavors of Linux OS
58 FEDORA_8 = 1 << 3 | FEDORA
59 FEDORA_12 = 1 << 4 | FEDORA
60 FEDORA_14 = 1 << 5 | FEDORA
63 class LinuxNode(ResourceManager):
65 .. class:: Class Args :
67 :param ec: The Experiment controller
68 :type ec: ExperimentController
69 :param guid: guid of the RM
74 There are different ways in which commands can be executed using the
75 LinuxNode interface (i.e. 'execute' - blocking and non blocking, 'run',
80 * 'execute' (blocking mode) :
82 HOW IT WORKS: 'execute', forks a process and run the
83 command, synchronously, attached to the terminal, in
85 The execute method will block until the command returns
86 the result on 'out', 'err' (so until it finishes executing).
88 USAGE: short-lived commands that must be executed attached
89 to a terminal and in foreground, for which it IS necessary
90 to block until the command has finished (e.g. if you want
91 to run 'ls' or 'cat').
93 * 'execute' (NON blocking mode - blocking = False) :
95 HOW IT WORKS: Same as before, except that execute method
96 will return immediately (even if command still running).
98 USAGE: long-lived commands that must be executed attached
99 to a terminal and in foreground, but for which it is not
100 necessary to block until the command has finished. (e.g.
101 start an application using X11 forwarding)
105 HOW IT WORKS: Connects to the host ( using SSH if remote)
106 and launches the command in background, detached from any
107 terminal (daemonized), and returns. The command continues to
108 run remotely, but since it is detached from the terminal,
109 its pipes (stdin, stdout, stderr) can't be redirected to the
110 console (as normal non detached processes would), and so they
111 are explicitly redirected to files. The pidfile is created as
112 part of the process of launching the command. The pidfile
113 holds the pid and ppid of the process forked in background,
114 so later on it is possible to check whether the command is still
117 USAGE: long-lived commands that can run detached in background,
118 for which it is NOT necessary to block (wait) until the command
119 has finished. (e.g. start an application that is not using X11
120 forwarding. It can run detached and remotely in background)
124 HOW IT WORKS: Similar to 'run' except that it 'blocks' until
125 the command has finished execution. It also checks whether
126 errors occurred during runtime by reading the exitcode file,
127 which contains the exit code of the command that was run
128 (checking stderr only is not always reliable since many
129 commands throw debugging info to stderr and the only way to
130 automatically know whether an error really happened is to
131 check the process exit code).
133 Another difference with respect to 'run', is that instead
134 of directly executing the command as a bash command line,
135 it uploads the command to a bash script and runs the script.
136 This allows to use the bash script to debug errors, since
137 it remains at the remote host and can be run manually to
140 USAGE: medium-lived commands that can run detached in
141 background, for which it IS necessary to block (wait) until
142 the command has finished. (e.g. Package installation,
143 source compilation, file download, etc)
146 _rtype = "linux::Node"
147 _help = "Controls Linux host machines ( either localhost or a host " \
148 "that can be accessed using a SSH key)"
152 def _register_attributes(cls):
153 cls._register_attribute(
154 Attribute("hostname",
155 "Hostname of the machine",
156 flags = Flags.Design))
157 cls._register_attribute(
158 Attribute("username",
159 "Local account username",
160 flags = Flags.Credential))
161 cls._register_attribute(
164 flags = Flags.Design))
165 cls._register_attribute(
167 "Experiment home directory to store all experiment related files",
168 flags = Flags.Design))
169 cls._register_attribute(
170 Attribute("identity",
172 flags = Flags.Credential))
173 cls._register_attribute(
174 Attribute("serverKey",
176 flags = Flags.Design))
177 cls._register_attribute(
178 Attribute("cleanHome",
179 "Remove all nepi files and directories "
180 " from node home folder before starting experiment",
183 flags = Flags.Design))
184 cls._register_attribute(
185 Attribute("cleanExperiment",
186 "Remove all files and directories "
187 " from a previous same experiment, before the new experiment starts",
190 flags = Flags.Design))
191 cls._register_attribute(
192 Attribute("cleanProcesses",
193 "Kill all running processes before starting experiment",
196 flags = Flags.Design))
197 cls._register_attribute(
198 Attribute("cleanProcessesAfter",
199 "Kill all running processes after starting experiment"
200 "NOTE: This might be dangerous when using user root",
203 flags = Flags.Design))
204 cls._register_attribute(
205 Attribute("tearDown",
206 "Bash script to be executed before releasing the resource",
207 flags = Flags.Design))
208 cls._register_attribute(
209 Attribute("gatewayUser",
210 "Gateway account username",
211 flags = Flags.Design))
212 cls._register_attribute(
214 "Hostname of the gateway machine",
215 flags = Flags.Design))
216 cls._register_attribute(
218 "Linux host public IP address. "
219 "Must not be modified by the user unless hostname is 'localhost'",
220 flags = Flags.Design))
222 def __init__(self, ec, guid):
223 super(LinuxNode, self).__init__(ec, guid)
225 # home directory at Linux host
228 # lock to prevent concurrent applications on the same node,
229 # to execute commands at the same time. There are potential
230 # concurrency issues when using SSH to a same host from
231 # multiple threads. There are also possible operational
232 # issues, e.g. an application querying the existence
233 # of a file or folder prior to its creation, and another
234 # application creating the same file or folder in between.
235 self._node_lock = threading.Lock()
237 def log_message(self, msg):
238 return " guid {} - host {} - {} "\
239 .format(self.guid, self.get("hostname"), msg)
243 home = self.get("home") or ""
244 if not home.startswith("/"):
245 home = os.path.join(self._home_dir, home)
250 return os.path.join(self.home_dir, ".nepi")
254 return os.path.join(self.nepi_home, "nepi-usr")
258 return os.path.join(self.usr_dir, "lib")
262 return os.path.join(self.usr_dir, "bin")
266 return os.path.join(self.usr_dir, "src")
270 return os.path.join(self.usr_dir, "share")
274 return os.path.join(self.nepi_home, "nepi-exp")
278 return os.path.join(self.exp_dir, self.ec.exp_id)
282 return os.path.join(self.exp_home, "node-{}".format(self.guid))
286 return os.path.join(self.node_home, self.ec.run_id)
293 if not self.localhost and not self.get("username"):
294 msg = "Can't resolve OS, insufficient data "
296 raise RuntimeError(msg)
300 if out.find("Debian") == 0:
301 self._os = OSType.DEBIAN
302 elif out.find("Ubuntu") ==0:
303 self._os = OSType.UBUNTU
304 elif out.find("Fedora release") == 0:
305 self._os = OSType.FEDORA
306 if out.find("Fedora release 8") == 0:
307 self._os = OSType.FEDORA_8
308 elif out.find("Fedora release 12") == 0:
309 self._os = OSType.FEDORA_12
310 elif out.find("Fedora release 14") == 0:
311 self._os = OSType.FEDORA_14
313 msg = "Unsupported OS"
315 raise RuntimeError("{} - {} ".format(msg, out))
320 # The underlying SSH layer will sometimes return an empty
321 # output (even if the command was executed without errors).
322 # To work arround this, repeat the operation N times or
323 # until the result is not empty string
326 (out, err), proc = self.execute("cat /etc/issue",
330 trace = traceback.format_exc()
331 msg = "Error detecting OS: {} ".format(trace)
332 self.error(msg, out, err)
338 return (self.os & (OSType.DEBIAN|OSType.UBUNTU))
342 return (self.os & OSType.FEDORA)
346 return self.get("hostname") in ['localhost', '127.0.0.1', '::1']
348 def do_provision(self):
349 # check if host is alive
350 if not self.is_alive():
351 trace = traceback.format_exc()
352 msg = "Deploy failed. Unresponsive node {} -- traceback {}".format(self.get("hostname"), trace)
354 raise RuntimeError(msg)
358 if self.get("cleanProcesses"):
359 self.clean_processes()
361 if self.get("cleanHome"):
364 if self.get("cleanExperiment"):
365 self.clean_experiment()
367 # Create shared directory structure and node home directory
368 paths = [self.lib_dir,
376 # Get Public IP address if possible
377 if not self.get("ip"):
379 ip = sshfuncs.gethostbyname(self.get("hostname"))
382 if self.get("gateway") is None:
383 msg = "Local DNS can not resolve hostname {}".format(self.get("hostname"))
386 super(LinuxNode, self).do_provision()
389 if self.state == ResourceState.NEW:
390 self.info("Deploying node")
394 # Node needs to wait until all associated interfaces are
395 # ready before it can finalize deployment
396 from nepi.resources.linux.interface import LinuxInterface
397 ifaces = self.get_connected(LinuxInterface.get_rtype())
399 if iface.state < ResourceState.READY:
400 self.ec.schedule(self.reschedule_delay, self.deploy)
403 super(LinuxNode, self).do_deploy()
405 def do_release(self):
406 rms = self.get_connected()
408 # Node needs to wait until all associated RMs are released
409 # before it can be released
410 if rm.state != ResourceState.RELEASED:
411 self.ec.schedule(self.reschedule_delay, self.release)
414 tear_down = self.get("tearDown")
416 self.execute(tear_down)
418 if self.get("cleanProcessesAfter"):
419 self.clean_processes()
421 super(LinuxNode, self).do_release()
423 def valid_connection(self, guid):
427 def clean_processes(self):
428 self.info("Cleaning up processes")
433 if self.get("username") != 'root':
434 cmd = ("sudo -S killall tcpdump || /bin/true ; " +
435 "sudo -S kill -9 $(ps aux | grep '[.]nepi' | awk '{print $2}') || /bin/true ; " +
436 "sudo -S killall -u {} || /bin/true ; ".format(self.get("username")))
438 if self.state >= ResourceState.READY:
439 ########################
440 #Collect all process (must change for a more intelligent way)
443 avoid_pids = "ps axjf | awk '{print $1,$2}'"
444 (out, err), proc = self.execute(avoid_pids)
446 for line in out.strip().split("\n"):
447 parts = line.strip().split(" ")
448 ppid.append(parts[0])
449 pids.append(parts[1])
451 #Collect all process below ssh -D
454 sshs = "ps aux | grep 'sshd' | awk '{print $2,$12}'"
455 (out, err), proc = self.execute(sshs)
457 for line in out.strip().split("\n"):
458 parts = line.strip().split(" ")
459 if parts[1].startswith('root@pts'):
460 ssh_pids.append(parts[0])
461 elif parts[1] == "-D":
462 tree_owner = parts[0]
466 #Search for the child process of the pid's collected at the first block.
467 for process in ssh_pids:
468 temp = self.search_for_child(process, pids, ppid)
469 avoid_kill = list(set(temp))
471 if len(avoid_kill) > 0:
472 avoid_kill.append(tree_owner)
473 ########################
476 with open("/tmp/save.proc", "rb") as pickle_file:
477 pids = pickle.load(pickle_file)
479 ps_aux = "ps aux | awk '{print $2,$11}'"
480 (out, err), proc = self.execute(ps_aux)
482 for line in out.strip().split("\n"):
483 parts = line.strip().split(" ")
484 pids_temp[parts[0]] = parts[1]
485 # creates the difference between the machine pids freezed (pickle) and the actual
486 # adding the avoided pids filtered above (avoid_kill) to allow users keep process
487 # alive when using besides ssh connections
488 kill_pids = set(pids_temp.items()) - set(pids.items())
489 # py2/py3 : keep it simple
490 kill_pids = ' '.join(kill_pids)
492 # removing pids from beside connections and its process
493 kill_pids = kill_pids.split(' ')
494 kill_pids = list(set(kill_pids) - set(avoid_kill))
495 kill_pids = ' '.join(kill_pids)
497 cmd = ("killall tcpdump || /bin/true ; " +
498 "kill $(ps aux | grep '[.]nepi' | awk '{print $2}') || /bin/true ; " +
499 "kill {} || /bin/true ; ".format(kill_pids))
501 cmd = ("killall tcpdump || /bin/true ; " +
502 "kill $(ps aux | grep '[.]nepi' | awk '{print $2}') || /bin/true ; ")
504 cmd = ("killall tcpdump || /bin/true ; " +
505 "kill $(ps aux | grep '[.]nepi' | awk '{print $2}') || /bin/true ; ")
507 (out, err), proc = self.execute(cmd, retry = 1, with_lock = True)
509 def search_for_child(self, pid, pids, ppid, family=[]):
510 """ Recursive function to search for child. List A contains the pids and list B the parents (ppid)
513 for key, value in enumerate(ppid):
516 self.search_for_child(child, pids, ppid)
519 def clean_home(self):
520 """ Cleans all NEPI related folders in the Linux host
522 self.info("Cleaning up home")
524 cmd = "cd {} ; find . -maxdepth 1 -name \.nepi -execdir rm -rf {{}} + "\
525 .format(self.home_dir)
527 return self.execute(cmd, with_lock = True)
529 def clean_experiment(self):
530 """ Cleans all experiment related files in the Linux host.
531 It preserves NEPI files and folders that have a multi experiment
534 self.info("Cleaning up experiment files")
536 cmd = "cd {} ; find . -maxdepth 1 -name '{}' -execdir rm -rf {{}} + "\
537 .format(self.exp_dir, self.ec.exp_id)
539 return self.execute(cmd, with_lock = True)
541 def execute(self, command,
547 connect_timeout = 30,
548 strict_host_checking = False,
553 """ Notice that this invocation will block until the
554 execution finishes. If this is not the desired behavior,
555 use 'run' instead."""
558 (out, err), proc = execfuncs.lexec(
560 user = self.get("username"), # still problem with localhost
565 # If the execute command is blocking, we don't want to keep
566 # the node lock. This lock is used to avoid race conditions
567 # when creating the ControlMaster sockets. A more elegant
568 # solution is needed.
569 with self._node_lock:
570 (out, err), proc = sshfuncs.rexec(
572 host = self.get("hostname"),
573 user = self.get("username"),
574 port = self.get("port"),
575 gwuser = self.get("gatewayUser"),
576 gw = self.get("gateway"),
579 identity = self.get("identity"),
580 server_key = self.get("serverKey"),
583 forward_x11 = forward_x11,
585 connect_timeout = connect_timeout,
586 persistent = persistent,
588 strict_host_checking = strict_host_checking
591 (out, err), proc = sshfuncs.rexec(
593 host = self.get("hostname"),
594 user = self.get("username"),
595 port = self.get("port"),
596 gwuser = self.get("gatewayUser"),
597 gw = self.get("gateway"),
600 identity = self.get("identity"),
601 server_key = self.get("serverKey"),
604 forward_x11 = forward_x11,
606 connect_timeout = connect_timeout,
607 persistent = persistent,
609 strict_host_checking = strict_host_checking
612 return (out, err), proc
614 def run(self, command, home,
622 strict_host_checking = False):
624 self.debug("Running command '{}'".format(command))
627 (out, err), proc = execfuncs.lspawn(
630 create_home = create_home,
631 stdin = stdin or '/dev/null',
632 stdout = stdout or '/dev/null',
633 stderr = stderr or '/dev/null',
636 with self._node_lock:
637 (out, err), proc = sshfuncs.rspawn(
641 create_home = create_home,
642 stdin = stdin or '/dev/null',
643 stdout = stdout or '/dev/null',
644 stderr = stderr or '/dev/null',
646 host = self.get("hostname"),
647 user = self.get("username"),
648 port = self.get("port"),
649 gwuser = self.get("gatewayUser"),
650 gw = self.get("gateway"),
652 identity = self.get("identity"),
653 server_key = self.get("serverKey"),
655 strict_host_checking = strict_host_checking
658 return (out, err), proc
660 def getpid(self, home, pidfile = "pidfile"):
662 pidtuple = execfuncs.lgetpid(os.path.join(home, pidfile))
664 with self._node_lock:
665 pidtuple = sshfuncs.rgetpid(
666 os.path.join(home, pidfile),
667 host = self.get("hostname"),
668 user = self.get("username"),
669 port = self.get("port"),
670 gwuser = self.get("gatewayUser"),
671 gw = self.get("gateway"),
673 identity = self.get("identity"),
674 server_key = self.get("serverKey"),
675 strict_host_checking = False
680 def status(self, pid, ppid):
682 status = execfuncs.lstatus(pid, ppid)
684 with self._node_lock:
685 status = sshfuncs.rstatus(
687 host = self.get("hostname"),
688 user = self.get("username"),
689 port = self.get("port"),
690 gwuser = self.get("gatewayUser"),
691 gw = self.get("gateway"),
693 identity = self.get("identity"),
694 server_key = self.get("serverKey"),
695 strict_host_checking = False
700 def kill(self, pid, ppid, sudo = False):
703 status = self.status(pid, ppid)
705 if status == sshfuncs.ProcStatus.RUNNING:
707 (out, err), proc = execfuncs.lkill(pid, ppid, sudo)
709 with self._node_lock:
710 (out, err), proc = sshfuncs.rkill(
712 host = self.get("hostname"),
713 user = self.get("username"),
714 port = self.get("port"),
715 gwuser = self.get("gatewayUser"),
716 gw = self.get("gateway"),
719 identity = self.get("identity"),
720 server_key = self.get("serverKey"),
721 strict_host_checking = False
724 return (out, err), proc
726 def copy(self, src, dst):
728 (out, err), proc = execfuncs.lcopy(
732 with self._node_lock:
733 (out, err), proc = sshfuncs.rcopy(
735 port = self.get("port"),
736 gwuser = self.get("gatewayUser"),
737 gw = self.get("gateway"),
738 identity = self.get("identity"),
739 server_key = self.get("serverKey"),
741 strict_host_checking = False)
743 return (out, err), proc
745 def upload(self, src, dst, text = False, overwrite = True,
746 raise_on_error = True, executable = False):
747 """ Copy content to destination
749 src string with the content to copy. Can be:
751 - a string with the path to a local file
752 - a string with a semi-colon separeted list of local files
753 - a string with a local directory
755 dst string with destination path on the remote host (remote is
758 when src is text input, it gets stored into a temp file before
759 uploading; in this case, and if executable is True, said temp file
760 is made executable, and thus uploaded file will be too
762 # If source is a string input
764 if text and not os.path.isfile(src):
765 # src is text input that should be uploaded as file
766 # create a temporal file with the content to upload
767 # in python3 we need to open in binary mode if str is bytes
768 mode = 'w' if isinstance(src, str) else 'wb'
769 f = tempfile.NamedTemporaryFile(mode=mode, delete=False)
773 # do something like chmod u+x
774 mode = os.stat(f.name).st_mode
776 os.chmod(f.name, mode)
780 # If dst files should not be overwritten, check that the files do not
782 if isinstance(src, str):
783 src = [s.strip() for s in src.split(";")]
785 if overwrite == False:
786 src = self.filter_existing_files(src, dst)
788 return ("", ""), None
790 if not self.localhost:
791 # Build destination as <user>@<server>:<path>
792 dst = "{}@{}:{}".format(self.get("username"), self.get("hostname"), dst)
794 ((out, err), proc) = self.copy(src, dst)
801 msg = " Failed to upload files - src: {} dst: {}".format(";".join(src), dst)
802 self.error(msg, out, err)
804 msg = "{} out: {} err: {}".format(msg, out, err)
806 raise RuntimeError(msg)
808 return ((out, err), proc)
810 def download(self, src, dst, raise_on_error = True):
811 if not self.localhost:
812 # Build destination as <user>@<server>:<path>
813 src = "{}@{}:{}".format(self.get("username"), self.get("hostname"), src)
815 ((out, err), proc) = self.copy(src, dst)
818 msg = " Failed to download files - src: {} dst: {}".format(";".join(src), dst)
819 self.error(msg, out, err)
822 raise RuntimeError(msg)
824 return ((out, err), proc)
826 def install_packages_command(self, packages):
829 command = rpmfuncs.install_packages_command(self.os, packages)
831 command = debfuncs.install_packages_command(self.os, packages)
833 msg = "Error installing packages ( OS not known ) "
834 self.error(msg, self.os)
835 raise RuntimeError(msg)
839 def install_packages(self, packages, home,
841 raise_on_error = True):
842 """ Install packages in the Linux host.
844 'home' is the directory to upload the package installation script.
845 'run_home' is the directory from where to execute the script.
847 command = self.install_packages_command(packages)
849 run_home = run_home or home
851 (out, err), proc = self.run_and_wait(command, run_home,
852 shfile = os.path.join(home, "instpkg.sh"),
853 pidfile = "instpkg_pidfile",
854 ecodefile = "instpkg_exitcode",
855 stdout = "instpkg_stdout",
856 stderr = "instpkg_stderr",
858 raise_on_error = raise_on_error)
860 return (out, err), proc
862 def remove_packages(self, packages, home, run_home = None,
863 raise_on_error = True):
864 """ Uninstall packages from the Linux host.
866 'home' is the directory to upload the package un-installation script.
867 'run_home' is the directory from where to execute the script.
870 command = rpmfuncs.remove_packages_command(self.os, packages)
872 command = debfuncs.remove_packages_command(self.os, packages)
874 msg = "Error removing packages ( OS not known ) "
876 raise RuntimeError(msg)
878 run_home = run_home or home
880 (out, err), proc = self.run_and_wait(command, run_home,
881 shfile = os.path.join(home, "rmpkg.sh"),
882 pidfile = "rmpkg_pidfile",
883 ecodefile = "rmpkg_exitcode",
884 stdout = "rmpkg_stdout",
885 stderr = "rmpkg_stderr",
887 raise_on_error = raise_on_error)
889 return (out, err), proc
891 def mkdir(self, paths, clean = False):
892 """ Paths is either a single remote directory path to create,
893 or a list of directories to create.
898 if isinstance(paths, str):
901 cmd = " ; ".join(["mkdir -p {}".format(path) for path in paths])
903 return self.execute(cmd, with_lock = True)
905 def rmdir(self, paths):
906 """ Paths is either a single remote directory path to delete,
907 or a list of directories to delete.
910 if isinstance(paths, str):
913 cmd = " ; ".join(["rm -rf {}".format(path) for path in paths])
915 return self.execute(cmd, with_lock = True)
917 def run_and_wait(self, command, home,
923 ecodefile="exitcode",
929 raise_on_error=True):
931 Uploads the 'command' to a bash script in the host.
932 Then runs the script detached in background in the host, and
933 busy-waites until the script finishes executing.
936 if not shfile.startswith("/"):
937 shfile = os.path.join(home, shfile)
939 self.upload_command(command,
941 ecodefile = ecodefile,
943 overwrite = overwrite)
945 command = "bash {}".format(shfile)
946 # run command in background in remote host
947 (out, err), proc = self.run(command, home,
955 # check no errors occurred
957 msg = " Failed to run command '{}' ".format(command)
958 self.error(msg, out, err)
960 raise RuntimeError(msg)
962 # Wait for pid file to be generated
963 pid, ppid = self.wait_pid(
966 raise_on_error = raise_on_error)
969 # wait until command finishes to execute
970 self.wait_run(pid, ppid)
972 (eout, err), proc = self.check_errors(home,
973 ecodefile = ecodefile,
976 # Out is what was written in the stderr file
978 msg = " Failed to run command '{}' ".format(command)
979 self.error(msg, eout, err)
982 raise RuntimeError(msg)
984 (out, oerr), proc = self.check_output(home, stdout)
986 return (out, err), proc
988 def exitcode(self, home, ecodefile = "exitcode"):
990 Get the exit code of an application.
991 Returns an integer value with the exit code
993 (out, err), proc = self.check_output(home, ecodefile)
995 # Succeeded to open file, return exit code in the file
998 return int(out.strip())
1000 # Error in the content of the file!
1001 return ExitCode.CORRUPTFILE
1003 # No such file or directory
1004 if proc.returncode == 1:
1005 return ExitCode.FILENOTFOUND
1007 # Other error from 'cat'
1008 return ExitCode.ERROR
1010 def upload_command(self, command,
1012 ecodefile="exitcode",
1015 """ Saves the command as a bash script file in the remote host, and
1016 forces to save the exit code of the command execution to the ecodefile
1019 if not (command.strip().endswith(";") or command.strip().endswith("&")):
1022 # The exit code of the command will be stored in ecodefile
1023 command = " {{ {command} }} ; echo $? > {ecodefile} ;"\
1024 .format(command=command, ecodefile=ecodefile)
1026 # Export environment
1027 environ = self.format_environment(env)
1029 # Add environ to command
1030 command = environ + command
1032 return self.upload(command, shfile, text=True, overwrite=overwrite)
1034 def format_environment(self, env, inline=False):
1035 """ Formats the environment variables for a command to be executed
1036 either as an inline command
1037 (i.e. export PYTHONPATH=src/..; export LALAL= ..;python script.py) or
1038 as a bash script (i.e. export PYTHONPATH=src/.. \n export LALA=.. \n)
1040 if not env: return ""
1042 # Remove extra white spaces
1043 env = re.sub(r'\s+', ' ', env.strip())
1045 sep = ";" if inline else "\n"
1046 return sep.join([" export {}".format(e) for e in env.split(" ")]) + sep
1048 def check_errors(self, home,
1049 ecodefile = "exitcode",
1051 """ Checks whether errors occurred while running a command.
1052 It first checks the exit code for the command, and only if the
1053 exit code is an error one it returns the error output.
1059 # get exit code saved in the 'exitcode' file
1060 ecode = self.exitcode(home, ecodefile)
1062 if ecode in [ ExitCode.CORRUPTFILE, ExitCode.ERROR ]:
1063 err = "Error retrieving exit code status from file {}/{}".format(home, ecodefile)
1064 elif ecode > 0 or ecode == ExitCode.FILENOTFOUND:
1065 # The process returned an error code or didn't exist.
1066 # Check standard error.
1067 (err, eerr), proc = self.check_output(home, stderr)
1069 # If the stderr file was not found, assume nothing bad happened,
1070 # and just ignore the error.
1071 # (cat returns 1 for error "No such file or directory")
1072 if ecode == ExitCode.FILENOTFOUND and proc.poll() == 1:
1075 return ("", err), proc
1077 def wait_pid(self, home, pidfile = "pidfile", raise_on_error = False):
1078 """ Waits until the pid file for the command is generated,
1079 and returns the pid and ppid of the process """
1084 pidtuple = self.getpid(home = home, pidfile = pidfile)
1087 pid, ppid = pidtuple
1093 msg = " Failed to get pid for pidfile {}/{} ".format(home, pidfile )
1097 raise RuntimeError(msg)
1101 def wait_run(self, pid, ppid, trial = 0):
1102 """ wait for a remote process to finish execution """
1106 status = self.status(pid, ppid)
1108 if status is ProcStatus.FINISHED:
1110 elif status is not ProcStatus.RUNNING:
1113 # If it takes more than 20 seconds to start, then
1114 # asume something went wrong
1118 # The app is running, just wait...
1121 def check_output(self, home, filename):
1122 """ Retrives content of file """
1123 (out, err), proc = self.execute(
1124 "cat {}".format(os.path.join(home, filename)), retry = 1, with_lock = True)
1125 return (out, err), proc
1128 """ Checks if host is responsive
1134 msg = "Unresponsive host. Wrong answer. "
1136 # The underlying SSH layer will sometimes return an empty
1137 # output (even if the command was executed without errors).
1138 # To work arround this, repeat the operation N times or
1139 # until the result is not empty string
1141 (out, err), proc = self.execute("echo 'ALIVE'",
1145 if out.find("ALIVE") > -1:
1148 trace = traceback.format_exc()
1149 msg = "Unresponsive host. Error reaching host: {} ".format(trace)
1151 self.error(msg, out, err)
1154 def find_home(self):
1156 Retrieves host home directory
1158 # The underlying SSH layer will sometimes return an empty
1159 # output (even if the command was executed without errors).
1160 # To work arround this, repeat the operation N times or
1161 # until the result is not empty string
1162 msg = "Impossible to retrieve HOME directory"
1164 (out, err), proc = self.execute("echo ${HOME}",
1168 if out.strip() != "":
1169 self._home_dir = out.strip()
1171 trace = traceback.format_exc()
1172 msg = "Impossible to retrieve HOME directory {}".format(trace)
1174 if not self._home_dir:
1176 raise RuntimeError(msg)
1178 def filter_existing_files(self, src, dst):
1179 """ Removes files that already exist in the Linux host from src list
1181 # construct a dictionary with { dst: src }
1182 dests = { os.path.join(dst, os.path.basename(s)) : s for s in src } \
1183 if len(src) > 1 else {dst: src[0]}
1187 command.append(" [ -f {dst} ] && echo '{dst}' ".format(dst=d) )
1189 command = ";".join(command)
1191 (out, err), proc = self.execute(command, retry = 1, with_lock = True)
1193 # avoid RuntimeError that would result from
1194 # changing loop subject during iteration
1195 keys = list(dests.keys())
1197 if out.find(d) > -1:
1203 retcod = dests.values()
1204 if PY3: retcod = list(retcod)