X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=src%2Fneco%2Fresources%2Flinux%2Fnode.py;h=38c38651770bc0ab4105fffea6ab374cd589daae;hb=94f4bad47f6b00e8d1e9ad9c55bfbbd6894329fc;hp=39107cb541defa74be33cd0a9e75f8a3032a0a91;hpb=d32dba78910bd348b9bbeb0e8242d31bfd39c0a7;p=nepi.git diff --git a/src/neco/resources/linux/node.py b/src/neco/resources/linux/node.py index 39107cb5..38c38651 100644 --- a/src/neco/resources/linux/node.py +++ b/src/neco/resources/linux/node.py @@ -15,6 +15,7 @@ import threading # TODO: Verify files and dirs exists already # TODO: Blacklist nodes! # TODO: Unify delays!! +# TODO: Validate outcome of uploads!! reschedule_delay = "0.5s" @@ -130,8 +131,9 @@ class LinuxNode(ResourceManager): def provision(self, filters = None): if not self.is_alive(): self._state = ResourceState.FAILED - self.error("Deploy failed. Unresponsive node") - return + msg = "Deploy failed. Unresponsive node %s" % self.get("hostname") + self.error(msg) + raise RuntimeError, msg if self.get("cleanProcesses"): self.clean_processes() @@ -411,29 +413,27 @@ class LinuxNode(ResourceManager): out = err = "" try: - (out, err), proc = self.execute("echo 'ALIVE'", with_lock = True) + (out, err), proc = self.execute("echo 'ALIVE'", retry = 5, + with_lock = True) except: import traceback trace = traceback.format_exc() msg = "Unresponsive host " - self.warn(msg, out, trace) + self.error(msg, out, trace) return False if out.strip().startswith('ALIVE'): return True else: msg = "Unresponsive host " - self.warn(msg, out, err) + self.error(msg, out, err) return False - # TODO! - #if self.check_bad_host(out,err): - # self.blacklist() - def copy(self, src, dst): if self.localhost: (out, err), proc = execfuncs.lcopy(source, dest, - recursive = True) + recursive = True, + strict_host_checking = False) else: with self._lock: (out, err), proc = sshfuncs.rcopy( @@ -441,7 +441,8 @@ class LinuxNode(ResourceManager): port = self.get("port"), identity = self.get("identity"), server_key = self.get("serverKey"), - recursive = True) + recursive = True, + strict_host_checking = False) return (out, err), proc @@ -455,6 +456,7 @@ class LinuxNode(ResourceManager): retry = 3, err_on_timeout = True, connect_timeout = 30, + strict_host_checking = False, persistent = True, with_lock = False ): @@ -488,7 +490,8 @@ class LinuxNode(ResourceManager): retry = retry, err_on_timeout = err_on_timeout, connect_timeout = connect_timeout, - persistent = persistent + persistent = persistent, + strict_host_checking = strict_host_checking ) else: (out, err), proc = sshfuncs.rexec(