From f474019d633e83dbe4554be579e56f27d863f307 Mon Sep 17 00:00:00 2001 From: Claudio-Daniel Freire Date: Sat, 1 Oct 2011 10:24:02 +0200 Subject: [PATCH] - Detect SSH misconfigurations in PL nodes - Retry slaves when possible --- src/nepi/testbeds/planetlab/application.py | 20 +++++++++++++------- src/nepi/testbeds/planetlab/node.py | 2 +- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/nepi/testbeds/planetlab/application.py b/src/nepi/testbeds/planetlab/application.py index 1552cbb0..d2f0351a 100644 --- a/src/nepi/testbeds/planetlab/application.py +++ b/src/nepi/testbeds/planetlab/application.py @@ -292,7 +292,7 @@ class Dependency(object): waitmaster = ( "{ " "echo 'Checking master reachability' ; " - "if ping -c 3 %(master_host)s ; then " + "if ping -c 3 %(master_host)s && (. ./.ssh-agent.sh > /dev/null ; ssh -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s echo MASTER SAYS HI ) ; then " "echo 'Master node reachable' ; " "else " "echo 'MASTER NODE UNREACHABLE' && " @@ -393,7 +393,7 @@ class Dependency(object): self._logger.info("Deploying %s at %s", self, self.node.hostname) - def _do_wait_build(self): + def _do_wait_build(self, trial=0): pid = self._build_pid ppid = self._build_ppid @@ -463,11 +463,17 @@ class Dependency(object): if self.check_bad_host(buildlog, err): self.node.blacklist() - - raise RuntimeError, "Failed to set up application %s: "\ - "build failed, got wrong token from pid %s/%s "\ - "(expected %r, got %r), see buildlog at %s:\n%s" % ( - self.home_path, pid, ppid, self._master_token, slave_token, self.node.hostname, buildlog) + elif self._master and trial < 3 and 'BAD TOKEN' in buildlog or 'BAD TOKEN' in err: + # bad sync with master, may try again + # but first wait for master + self._master.async_setup_wait() + self._launch_build() + self._do_wait_build(trial+1) + else: + raise RuntimeError, "Failed to set up application %s: "\ + "build failed, got wrong token from pid %s/%s "\ + "(expected %r, got %r), see buildlog at %s:\n%s" % ( + self.home_path, pid, ppid, self._master_token, slave_token, self.node.hostname, buildlog) self._logger.info("Built %s at %s", self, self.node.hostname) diff --git a/src/nepi/testbeds/planetlab/node.py b/src/nepi/testbeds/planetlab/node.py index 61c73e8c..e6a20b5c 100644 --- a/src/nepi/testbeds/planetlab/node.py +++ b/src/nepi/testbeds/planetlab/node.py @@ -328,7 +328,7 @@ class Node(object): except AttributeError: return - for key, value in __orig_attrs.iteritems(): + for key, value in orig_attrs.iteritems(): setattr(self, key, value) del self.__orig_attrs -- 2.43.0