import traceback
import config
+class ExceptionDoubleSSHError(Exception): pass
+
import signal
class Sopen(subprocess.Popen):
def kill(self, signal = signal.SIGTERM):
def get_bootmanager_log(self):
download(self.c, "/tmp/bm.log", "log/bm.%s.log.gz" % self.node)
- os.system("zcat log/bm.%s.log.gz > log/bm.%s.log" % (self.node, self.node))
+ #os.system("zcat log/bm.%s.log.gz > log/bm.%s.log" % (self.node, self.node))
+ os.system("cp log/bm.%s.log.gz log/bm.%s.log" % (self.node, self.node))
log = open("log/bm.%s.log" % self.node, 'r')
return log
if ret != 0:
print "\tFAILED TWICE"
#sys.exit(1)
- raise Exception("Failed twice trying to login with updated ssh host key")
+ raise ExceptionDoubleSSHError("Failed twice trying to login with updated ssh host key")
t1 = time.time()
# KILL any already running servers.
session = PlanetLabSession(node, False, True)
else:
session = PlanetLabSession(node, config.nosetup, config.verbose)
+ except ExceptionDoubleSSHError, e:
+ msg = "ERROR setting up session for %s" % hostname
+ print msg
+ return False
except Exception, e:
msg = "ERROR setting up session for %s" % hostname
print msg
try:
time.sleep(session.timeout*4)
conn = session.get_connection(config)
+ except EOFError:
+ # failed twice... no need to report this really, it's just in a
+ # weird state...
+ return False
except:
print traceback.print_exc()
from nodecommon import email_exception
- email_exception()
+ email_exception(node)
return False
if forced_action == "reboot":
try:
# TODO: make sleep backoff, before stopping.
- time.sleep(4)
+ time.sleep(8)
ret = s.recv(count, socket.MSG_DONTWAIT)
except socket.error, e:
if e[0] == errno.EAGAIN:
- raise Exception(e[1])
+ #raise Exception(e[1])
+ raise ExceptionNotFound(e[1])
else:
# TODO: not other exceptions.
raise Exception(e)
if e[0] == errno.ECONNREFUSED:
# cannot connect to remote host
raise Exception(e[1])
+ elif e[0] == errno.ETIMEDOUT:
+ raise ExceptionTimeout(e[1])
else:
# TODO: what other conditions are there?
raise Exception(e)