- if local.capture:
- for spin in xrange(30):
- if self.status() != rspawn.RUNNING:
- break
-
- (out,err),proc = server.popen_ssh_command(
- "cd %(home)s ; grep -c Connected capture" % dict(
- home = server.shell_escape(self.home_path)),
- host = local.node.hostname,
- port = None,
- user = local.node.slicename,
- agent = None,
- ident_key = local.node.ident_path,
- server_key = local.node.server_key
- )
-
- if proc.wait():
- break
-
- if out.strip() != '0':
- break
-
- time.sleep(1.0)
+ retrytime = 2.0
+ for spin in xrange(30):
+ if self.status() != rspawn.RUNNING:
+ self._logger.warn("FAILED TO CONNECT! %s", self)
+ break
+
+ # Connected?
+ (out,err),proc = server.eintr_retry(server.popen_ssh_command)(
+ "cd %(home)s ; grep -c Connected capture" % dict(
+ home = server.shell_escape(self.home_path)),
+ host = local.node.hostname,
+ port = None,
+ user = local.node.slicename,
+ agent = None,
+ ident_key = local.node.ident_path,
+ server_key = local.node.server_key,
+ timeout = 60,
+ err_on_timeout = False
+ )
+ proc.wait()
+
+ if out.strip() == '1':
+ break
+
+ # At least listening?
+ (out,err),proc = server.eintr_retry(server.popen_ssh_command)(
+ "cd %(home)s ; grep -c Listening capture" % dict(
+ home = server.shell_escape(self.home_path)),
+ host = local.node.hostname,
+ port = None,
+ user = local.node.slicename,
+ agent = None,
+ ident_key = local.node.ident_path,
+ server_key = local.node.server_key,
+ timeout = 60,
+ err_on_timeout = False
+ )
+ proc.wait()
+
+ if out.strip() == '1':
+ self._started_listening = True
+
+ time.sleep(min(30.0, retrytime))
+ retrytime *= 1.1
+ else:
+ (out,err),proc = server.eintr_retry(server.popen_ssh_command)(
+ "cat %(home)s/capture" % dict(
+ home = server.shell_escape(self.home_path)),
+ host = local.node.hostname,
+ port = None,
+ user = local.node.slicename,
+ agent = None,
+ ident_key = local.node.ident_path,
+ server_key = local.node.server_key,
+ timeout = 60,
+ retry = 3,
+ err_on_timeout = False
+ )
+ proc.wait()
+
+ raise RuntimeError, "FAILED TO CONNECT %s: %s%s" % (self,out,err)