changes for 3.0
[monitor.git] / reboot.py
index 503ca63..ba641c4 100755 (executable)
--- a/reboot.py
+++ b/reboot.py
@@ -275,6 +275,10 @@ class PCUControl(Transport,PCUModel,PCURecord):
                        import traceback
                        traceback.print_exc()
                        return "EOF connection reset" + str(err)
+               except:
+                       from nodecommon import email_exception
+                       email_exception()
+                       raise Exception('unknown')
                
 class IPAL(PCUControl):
        """ 
@@ -292,11 +296,12 @@ class IPAL(PCUControl):
 
                try:
                        # TODO: make sleep backoff, before stopping.
-                       time.sleep(4)
+                       time.sleep(8)
                        ret = s.recv(count, socket.MSG_DONTWAIT)
                except socket.error, e:
                        if e[0] == errno.EAGAIN:
-                               raise Exception(e[1])
+                               #raise Exception(e[1])
+                               raise ExceptionNotFound(e[1])
                        else:
                                # TODO: not other exceptions.
                                raise Exception(e)
@@ -317,6 +322,8 @@ class IPAL(PCUControl):
                        if e[0] == errno.ECONNREFUSED:
                                # cannot connect to remote host
                                raise Exception(e[1])
+                       elif e[0] == errno.ETIMEDOUT:
+                               raise ExceptionTimeout(e[1])
                        else:
                                # TODO: what other conditions are there?
                                raise Exception(e)
@@ -328,7 +335,7 @@ class IPAL(PCUControl):
                print "Current status is '%s'" % ret
 
                if ret == '':
-                       raise Exception("Status returned 'another session already open' %s : %s" % (node_port, ret))
+                       raise Exception("Status returned 'another session already open' on %s %s : %s" % (self.host, node_port, ret))
                        
                                
                if node_port < len(ret):
@@ -339,10 +346,12 @@ class IPAL(PCUControl):
                        elif status == '0':
                                # down
                                power_on = False
+                       elif status == '6':
+                               raise ExceptionPort("IPAL reported 'Cable Error' on %s socket %s : %s" % (self.host, node_port, ret))
                        else:
-                               raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret))
+                               raise Exception("Unknown status for PCU %s socket %s : %s" % (self.host, node_port, ret))
                else:
-                       raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret))
+                       raise Exception("Mismatch between configured port and PCU %s status: %s %s" % (self.host, node_port, ret))
                        
 
                if not dryrun:
@@ -367,10 +376,12 @@ class IPAL(PCUControl):
                                elif status == '0':
                                        # down
                                        power_on = False
+                               elif status == '6':
+                                       raise ExceptionPort("IPAL reported 'Cable Error' on %s socket %s : %s" % (self.host, node_port, ret))
                                else:
-                                       raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret))
+                                       raise Exception("Unknown status for PCU %s socket %s : %s" % (self.host, node_port, ret))
                        else:
-                               raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret))
+                               raise Exception("Mismatch between configured port and PCU %s status: %s %s" % (self.host, node_port, ret))
 
                        if power_on:
                                return 0
@@ -666,6 +677,13 @@ class BayTechAU(PCUControl):
 
 class BayTechGeorgeTown(PCUControl):
        def run(self, node_port, dryrun):
+               # this initial open/close is to prevent things from raising an
+               # exception.  the pcu always is weird during the first connection, and
+               # even if it's not, what does it matter to open a second connection
+               # right away?
+               self.open(self.host, self.username, None, "Enter user name:")
+               self.close()
+               time.sleep(1)
                self.open(self.host, self.username, None, "Enter user name:")
                self.sendPassword(self.password, "Enter Password:")
 
@@ -919,6 +937,8 @@ class ePowerSwitchGood(PCUControl):
                                if self.verbose: print f.read()
                        except:
                                import traceback; traceback.print_exc()
+                               from nodecommon import email_exception
+                               email_exception()
 
                                # fetch url one more time on cmd.html, econtrol.html or whatever.
                                # pass
@@ -1211,14 +1231,14 @@ def reboot_policy(nodename, continue_probe, dryrun):
 
        pcu = plc.getpcu(nodename)
        if not pcu:
-               logger.debug("no pcu for %s" % hostname)
-               print "no pcu for %s" % hostname
+               logger.debug("no pcu for %s" % nodename)
+               print "no pcu for %s" % nodename
                return False # "%s has no pcu" % nodename
 
        values = get_pcu_values(pcu['pcu_id'])
        if values == None:
-               logger.debug("No values for pcu probe %s" % hostname)
-               print "No values for pcu probe %s" % hostname
+               logger.debug("No values for pcu probe %s" % nodename)
+               print "No values for pcu probe %s" % nodename
                return False #"no info for pcu_id %s" % pcu['pcu_id']
        
        # Try the PCU first
@@ -1314,8 +1334,12 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun):
                        # TODO: I don't think DRACRacAdm will throw an exception for the
                        # default method to catch...
                        try:
-                               drac = DRACRacAdm(values, verbose, ['443', '5869'])
-                               rb_ret = drac.reboot(0, dryrun)
+                               if values['pcu_id'] in [1402]:
+                                       drac = DRAC(values, verbose, ['22'])
+                                       rb_ret = drac.reboot(0, dryrun)
+                               else:
+                                       drac = DRACRacAdm(values, verbose, ['443', '5869'])
+                                       rb_ret = drac.reboot(0, dryrun)
                        except:
                                drac = DRAC(values, verbose, ['22'])
                                rb_ret = drac.reboot(0, dryrun)
@@ -1393,6 +1417,8 @@ def main():
                                print "failed"
        except Exception, err:
                import traceback; traceback.print_exc()
+               from nodecommon import email_exception
+               email_exception()
                print err
 
 if __name__ == '__main__':