reduce false exceptions that should be ignored or handled better in the code.
authorStephen Soltesz <soltesz@cs.princeton.edu>
Thu, 30 Apr 2009 21:10:11 +0000 (21:10 +0000)
committerStephen Soltesz <soltesz@cs.princeton.edu>
Thu, 30 Apr 2009 21:10:11 +0000 (21:10 +0000)
bootman.py
reboot.py

index c26335f..f8f6d48 100755 (executable)
@@ -26,6 +26,8 @@ from nodeconfig import network_config_to_str
 import traceback
 import config
 
+class ExceptionDoubleSSHError(Exception): pass
+
 import signal
 class Sopen(subprocess.Popen):
        def kill(self, signal = signal.SIGTERM):
@@ -65,7 +67,8 @@ class NodeConnection:
 
        def get_bootmanager_log(self):
                download(self.c, "/tmp/bm.log", "log/bm.%s.log.gz" % self.node)
-               os.system("zcat log/bm.%s.log.gz > log/bm.%s.log" % (self.node, self.node))
+               #os.system("zcat log/bm.%s.log.gz > log/bm.%s.log" % (self.node, self.node))
+               os.system("cp log/bm.%s.log.gz log/bm.%s.log" % (self.node, self.node))
                log = open("log/bm.%s.log" % self.node, 'r')
                return log
 
@@ -232,7 +235,7 @@ class PlanetLabSession:
                        if ret != 0:
                                print "\tFAILED TWICE"
                                #sys.exit(1)
-                               raise Exception("Failed twice trying to login with updated ssh host key")
+                               raise ExceptionDoubleSSHError("Failed twice trying to login with updated ssh host key")
 
                t1 = time.time()
                # KILL any already running servers.
@@ -348,6 +351,10 @@ def reboot(hostname, config=None, forced_action=None):
                        session = PlanetLabSession(node, False, True)
                else:
                        session = PlanetLabSession(node, config.nosetup, config.verbose)
+       except ExceptionDoubleSSHError, e:
+               msg = "ERROR setting up session for %s" % hostname
+               print msg
+               return False
        except Exception, e:
                msg = "ERROR setting up session for %s" % hostname
                print msg
@@ -365,10 +372,14 @@ def reboot(hostname, config=None, forced_action=None):
                try:
                        time.sleep(session.timeout*4)
                        conn = session.get_connection(config)
+               except EOFError:
+                       # failed twice... no need to report this really, it's just in a
+                       # weird state...
+                       return False
                except:
                        print traceback.print_exc()
                        from nodecommon import email_exception
-                       email_exception()
+                       email_exception(node)
                        return False
 
        if forced_action == "reboot":
index 34b9b00..ba641c4 100755 (executable)
--- a/reboot.py
+++ b/reboot.py
@@ -296,11 +296,12 @@ class IPAL(PCUControl):
 
                try:
                        # TODO: make sleep backoff, before stopping.
-                       time.sleep(4)
+                       time.sleep(8)
                        ret = s.recv(count, socket.MSG_DONTWAIT)
                except socket.error, e:
                        if e[0] == errno.EAGAIN:
-                               raise Exception(e[1])
+                               #raise Exception(e[1])
+                               raise ExceptionNotFound(e[1])
                        else:
                                # TODO: not other exceptions.
                                raise Exception(e)
@@ -321,6 +322,8 @@ class IPAL(PCUControl):
                        if e[0] == errno.ECONNREFUSED:
                                # cannot connect to remote host
                                raise Exception(e[1])
+                       elif e[0] == errno.ETIMEDOUT:
+                               raise ExceptionTimeout(e[1])
                        else:
                                # TODO: what other conditions are there?
                                raise Exception(e)