X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=reboot.py;h=ba641c4724c7cea4f6670f2ef883a42949a1ef49;hb=refs%2Fheads%2F1.0;hp=503ca6311a503f5102e6ca7bebf1a02f5733b452;hpb=ff1455177461f5adf513543fd2a85289f359ff0e;p=monitor.git diff --git a/reboot.py b/reboot.py index 503ca63..ba641c4 100755 --- a/reboot.py +++ b/reboot.py @@ -275,6 +275,10 @@ class PCUControl(Transport,PCUModel,PCURecord): import traceback traceback.print_exc() return "EOF connection reset" + str(err) + except: + from nodecommon import email_exception + email_exception() + raise Exception('unknown') class IPAL(PCUControl): """ @@ -292,11 +296,12 @@ class IPAL(PCUControl): try: # TODO: make sleep backoff, before stopping. - time.sleep(4) + time.sleep(8) ret = s.recv(count, socket.MSG_DONTWAIT) except socket.error, e: if e[0] == errno.EAGAIN: - raise Exception(e[1]) + #raise Exception(e[1]) + raise ExceptionNotFound(e[1]) else: # TODO: not other exceptions. raise Exception(e) @@ -317,6 +322,8 @@ class IPAL(PCUControl): if e[0] == errno.ECONNREFUSED: # cannot connect to remote host raise Exception(e[1]) + elif e[0] == errno.ETIMEDOUT: + raise ExceptionTimeout(e[1]) else: # TODO: what other conditions are there? raise Exception(e) @@ -328,7 +335,7 @@ class IPAL(PCUControl): print "Current status is '%s'" % ret if ret == '': - raise Exception("Status returned 'another session already open' %s : %s" % (node_port, ret)) + raise Exception("Status returned 'another session already open' on %s %s : %s" % (self.host, node_port, ret)) if node_port < len(ret): @@ -339,10 +346,12 @@ class IPAL(PCUControl): elif status == '0': # down power_on = False + elif status == '6': + raise ExceptionPort("IPAL reported 'Cable Error' on %s socket %s : %s" % (self.host, node_port, ret)) else: - raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret)) + raise Exception("Unknown status for PCU %s socket %s : %s" % (self.host, node_port, ret)) else: - raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret)) + raise Exception("Mismatch between configured port and PCU %s status: %s %s" % (self.host, node_port, ret)) if not dryrun: @@ -367,10 +376,12 @@ class IPAL(PCUControl): elif status == '0': # down power_on = False + elif status == '6': + raise ExceptionPort("IPAL reported 'Cable Error' on %s socket %s : %s" % (self.host, node_port, ret)) else: - raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret)) + raise Exception("Unknown status for PCU %s socket %s : %s" % (self.host, node_port, ret)) else: - raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret)) + raise Exception("Mismatch between configured port and PCU %s status: %s %s" % (self.host, node_port, ret)) if power_on: return 0 @@ -666,6 +677,13 @@ class BayTechAU(PCUControl): class BayTechGeorgeTown(PCUControl): def run(self, node_port, dryrun): + # this initial open/close is to prevent things from raising an + # exception. the pcu always is weird during the first connection, and + # even if it's not, what does it matter to open a second connection + # right away? + self.open(self.host, self.username, None, "Enter user name:") + self.close() + time.sleep(1) self.open(self.host, self.username, None, "Enter user name:") self.sendPassword(self.password, "Enter Password:") @@ -919,6 +937,8 @@ class ePowerSwitchGood(PCUControl): if self.verbose: print f.read() except: import traceback; traceback.print_exc() + from nodecommon import email_exception + email_exception() # fetch url one more time on cmd.html, econtrol.html or whatever. # pass @@ -1211,14 +1231,14 @@ def reboot_policy(nodename, continue_probe, dryrun): pcu = plc.getpcu(nodename) if not pcu: - logger.debug("no pcu for %s" % hostname) - print "no pcu for %s" % hostname + logger.debug("no pcu for %s" % nodename) + print "no pcu for %s" % nodename return False # "%s has no pcu" % nodename values = get_pcu_values(pcu['pcu_id']) if values == None: - logger.debug("No values for pcu probe %s" % hostname) - print "No values for pcu probe %s" % hostname + logger.debug("No values for pcu probe %s" % nodename) + print "No values for pcu probe %s" % nodename return False #"no info for pcu_id %s" % pcu['pcu_id'] # Try the PCU first @@ -1314,8 +1334,12 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): # TODO: I don't think DRACRacAdm will throw an exception for the # default method to catch... try: - drac = DRACRacAdm(values, verbose, ['443', '5869']) - rb_ret = drac.reboot(0, dryrun) + if values['pcu_id'] in [1402]: + drac = DRAC(values, verbose, ['22']) + rb_ret = drac.reboot(0, dryrun) + else: + drac = DRACRacAdm(values, verbose, ['443', '5869']) + rb_ret = drac.reboot(0, dryrun) except: drac = DRAC(values, verbose, ['22']) rb_ret = drac.reboot(0, dryrun) @@ -1393,6 +1417,8 @@ def main(): print "failed" except Exception, err: import traceback; traceback.print_exc() + from nodecommon import email_exception + email_exception() print err if __name__ == '__main__':