X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=src%2Fnepi%2Fresources%2Flinux%2Fapplication.py;h=1848e9a1c36638c4a12c0de70c86440cc65dbbc9;hb=99d8b2a4431d8fafd0385e189375106d46f1abd9;hp=8b975acb735ed97154542470f729387eafa58eee;hpb=dadecc92b7cd8fb8732b53f6fbbc7bd1519c4ff6;p=nepi.git diff --git a/src/nepi/resources/linux/application.py b/src/nepi/resources/linux/application.py index 8b975acb..1848e9a1 100644 --- a/src/nepi/resources/linux/application.py +++ b/src/nepi/resources/linux/application.py @@ -19,8 +19,8 @@ from nepi.execution.attribute import Attribute, Flags, Types from nepi.execution.trace import Trace, TraceAttr -from nepi.execution.resource import ResourceManager, clsinit, ResourceState, \ - reschedule_delay +from nepi.execution.resource import ResourceManager, clsinit_copy, \ + ResourceState, reschedule_delay, failtrap from nepi.resources.linux.node import LinuxNode from nepi.util.sshfuncs import ProcStatus from nepi.util.timefuncs import tnow, tdiffsec @@ -29,8 +29,9 @@ import os import subprocess # TODO: Resolve wildcards in commands!! +# TODO: When a failure occurs during deployment, scp and ssh processes are left running behind!! -@clsinit +@clsinit_copy class LinuxApplication(ResourceManager): """ .. class:: Class Args : @@ -81,6 +82,8 @@ class LinuxApplication(ResourceManager): """ _rtype = "LinuxApplication" + _help = "Runs an application on a Linux host with a BASH command " + _backend_type = "linux" @classmethod def _register_attributes(cls): @@ -266,7 +269,8 @@ class LinuxApplication(ResourceManager): out = int(out.strip()) return out - + + @failtrap def provision(self): # create run dir for application self.node.mkdir(self.run_home) @@ -297,8 +301,9 @@ class LinuxApplication(ResourceManager): # Since provisioning takes a long time, before # each step we check that the EC is still for step in steps: - if self.ec.finished: - raise RuntimeError, "EC finished" + if self.ec.abort: + self.debug("Interrupting provisioning. EC says 'ABORT") + return ret = step() if ret: @@ -446,7 +451,7 @@ class LinuxApplication(ResourceManager): depends = self.get("depends") if depends: self.info("Installing dependencies %s" % depends) - self.node.install_packages(depends, self.app_home, self.run_home) + return self.node.install_packages_command(depends) def build(self): build = self.get("build") @@ -466,6 +471,7 @@ class LinuxApplication(ResourceManager): # replace application specific paths in the command return self.replace_paths(install) + @failtrap def deploy(self): # Wait until node is associated and deployed node = self.node @@ -473,17 +479,14 @@ class LinuxApplication(ResourceManager): self.debug("---- RESCHEDULING DEPLOY ---- node state %s " % self.node.state ) self.ec.schedule(reschedule_delay, self.deploy) else: - try: - command = self.get("command") or "" - self.info("Deploying command '%s' " % command) - self.discover() - self.provision() - except: - self.fail() - raise + command = self.get("command") or "" + self.info("Deploying command '%s' " % command) + self.discover() + self.provision() super(LinuxApplication, self).deploy() - + + @failtrap def start(self): command = self.get("command") @@ -492,9 +495,8 @@ class LinuxApplication(ResourceManager): if not command: # If no command was given (i.e. Application was used for dependency # installation), then the application is directly marked as FINISHED - self._state = ResourceState.FINISHED + self.set_finished() else: - if self.in_foreground: self._run_in_foreground() else: @@ -506,6 +508,7 @@ class LinuxApplication(ResourceManager): command = self.get("command") sudo = self.get("sudo") or False x11 = self.get("forwardX11") + env = self.get("env") # For a command being executed in foreground, if there is stdin, # it is expected to be text string not a file or pipe @@ -518,7 +521,7 @@ class LinuxApplication(ResourceManager): # to be able to kill the process from the stop method. # We also set blocking = False, since we don't want the # thread to block until the execution finishes. - (out, err), self._proc = self.execute_command(self, command, + (out, err), self._proc = self.execute_command(command, env = env, sudo = sudo, stdin = stdin, @@ -526,7 +529,6 @@ class LinuxApplication(ResourceManager): blocking = False) if self._proc.poll(): - self.fail() self.error(msg, out, err) raise RuntimeError, msg @@ -556,7 +558,6 @@ class LinuxApplication(ResourceManager): msg = " Failed to start command '%s' " % command if proc.poll(): - self.fail() self.error(msg, out, err) raise RuntimeError, msg @@ -573,11 +574,11 @@ class LinuxApplication(ResourceManager): # Out is what was written in the stderr file if err: - self.fail() msg = " Failed to start command '%s' " % command self.error(msg, out, err) raise RuntimeError, msg - + + @failtrap def stop(self): """ Stops application execution """ @@ -585,46 +586,44 @@ class LinuxApplication(ResourceManager): if self.state == ResourceState.STARTED: - stopped = True - - self.info("Stopping command '%s'" % command) + self.info("Stopping command '%s' " % command) # If the command is running in foreground (it was launched using # the node 'execute' method), then we use the handler to the Popen # process to kill it. Else we send a kill signal using the pid and ppid # retrieved after running the command with the node 'run' method - if self._proc: self._proc.kill() else: # Only try to kill the process if the pid and ppid # were retrieved if self.pid and self.ppid: - (out, err), proc = self.node.kill(self.pid, self.ppid, sudo = - self._sudo_kill) + (out, err), proc = self.node.kill(self.pid, self.ppid, + sudo = self._sudo_kill) - if out or err: - # check if execution errors occurred + # TODO: check if execution errors occurred + if proc.poll() or err: msg = " Failed to STOP command '%s' " % self.get("command") self.error(msg, out, err) - self.fail() - stopped = False - - if stopped: - super(LinuxApplication, self).stop() + + super(LinuxApplication, self).stop() def release(self): self.info("Releasing resource") - tear_down = self.get("tearDown") - if tear_down: - self.node.execute(tear_down) + try: + tear_down = self.get("tearDown") + if tear_down: + self.node.execute(tear_down) - self.stop() + self.stop() + except: + import traceback + err = traceback.format_exc() + self.error(err) - if self.state == ResourceState.STOPPED: - super(LinuxApplication, self).release() - + super(LinuxApplication, self).release() + @property def state(self): """ Returns the state of the application @@ -644,31 +643,33 @@ class LinuxApplication(ResourceManager): err = self._proc.stderr.read() self.error(msg, out, err) self.fail() - elif retcode == 0: - self._state = ResourceState.FINISHED + elif retcode == 0: + self.finish() else: # We need to query the status of the command we launched in - # background. In oredr to avoid overwhelming the remote host and + # background. In order to avoid overwhelming the remote host and # the local processor with too many ssh queries, the state is only # requested every 'state_check_delay' seconds. state_check_delay = 0.5 if tdiffsec(tnow(), self._last_state_check) > state_check_delay: - # check if execution errors occurred - (out, err), proc = self.node.check_errors(self.run_home) - - if err: - msg = " Failed to execute command '%s'" % self.get("command") - self.error(msg, out, err) - self.fail() - - elif self.pid and self.ppid: - # No execution errors occurred. Make sure the background - # process with the recorded pid is still running. + if self.pid and self.ppid: + # Make sure the process is still running in background status = self.node.status(self.pid, self.ppid) if status == ProcStatus.FINISHED: - self._state = ResourceState.FINISHED + # If the program finished, check if execution + # errors occurred + (out, err), proc = self.node.check_errors( + self.run_home) + + if err: + msg = "Failed to execute command '%s'" % \ + self.get("command") + self.error(msg, out, err) + self.fail() + else: + self.finish() self._last_state_check = tnow()