X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=reboot.py;h=ba641c4724c7cea4f6670f2ef883a42949a1ef49;hb=refs%2Fheads%2F1.0;hp=7c6bea3a7a0aef0e26ed58ea7e912649bfa84552;hpb=c3f62accd9509164351f4895e655412d07f9a42b;p=monitor.git diff --git a/reboot.py b/reboot.py index 7c6bea3..ba641c4 100755 --- a/reboot.py +++ b/reboot.py @@ -11,14 +11,13 @@ import urllib2 import urllib import threading, popen2 import array, struct -#from socket import * -import socket import plc import base64 from subprocess import PIPE, Popen import ssh.pxssh as pxssh import ssh.pexpect as pexpect import socket +import moncommands # Use our versions of telnetlib and pyssh sys.path.insert(0, os.path.dirname(sys.argv[0])) @@ -276,6 +275,10 @@ class PCUControl(Transport,PCUModel,PCURecord): import traceback traceback.print_exc() return "EOF connection reset" + str(err) + except: + from nodecommon import email_exception + email_exception() + raise Exception('unknown') class IPAL(PCUControl): """ @@ -293,11 +296,12 @@ class IPAL(PCUControl): try: # TODO: make sleep backoff, before stopping. - time.sleep(4) + time.sleep(8) ret = s.recv(count, socket.MSG_DONTWAIT) except socket.error, e: if e[0] == errno.EAGAIN: - return Exception(e[1]) + #raise Exception(e[1]) + raise ExceptionNotFound(e[1]) else: # TODO: not other exceptions. raise Exception(e) @@ -317,7 +321,9 @@ class IPAL(PCUControl): s.close() if e[0] == errno.ECONNREFUSED: # cannot connect to remote host - return Exception(e[1]) + raise Exception(e[1]) + elif e[0] == errno.ETIMEDOUT: + raise ExceptionTimeout(e[1]) else: # TODO: what other conditions are there? raise Exception(e) @@ -327,6 +333,10 @@ class IPAL(PCUControl): s.send(self.format_msg("", 'O')) ret = self.recv_noblock(s, 8) print "Current status is '%s'" % ret + + if ret == '': + raise Exception("Status returned 'another session already open' on %s %s : %s" % (self.host, node_port, ret)) + if node_port < len(ret): status = ret[node_port] @@ -336,21 +346,23 @@ class IPAL(PCUControl): elif status == '0': # down power_on = False + elif status == '6': + raise ExceptionPort("IPAL reported 'Cable Error' on %s socket %s : %s" % (self.host, node_port, ret)) else: - raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret)) + raise Exception("Unknown status for PCU %s socket %s : %s" % (self.host, node_port, ret)) else: - raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret)) + raise Exception("Mismatch between configured port and PCU %s status: %s %s" % (self.host, node_port, ret)) if not dryrun: - print "Pulsing %s" % node_port if power_on: + print "Pulsing %s" % node_port s.send(self.format_msg("%s" % node_port, 'P')) else: - # NOTE: turn power on before pulsing the port. - print "power was off, so turning on then pulsing..." + # NOTE: turn power on ; do not pulse the port. + print "Power was off, so turning on ..." s.send(self.format_msg("%s" % node_port, 'E')) - s.send(self.format_msg("%s" % node_port, 'P')) + #s.send(self.format_msg("%s" % node_port, 'P')) print "Receiving response." ret = self.recv_noblock(s, 8) @@ -364,10 +376,12 @@ class IPAL(PCUControl): elif status == '0': # down power_on = False + elif status == '6': + raise ExceptionPort("IPAL reported 'Cable Error' on %s socket %s : %s" % (self.host, node_port, ret)) else: - raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret)) + raise Exception("Unknown status for PCU %s socket %s : %s" % (self.host, node_port, ret)) else: - raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret)) + raise Exception("Mismatch between configured port and PCU %s status: %s %s" % (self.host, node_port, ret)) if power_on: return 0 @@ -555,10 +569,10 @@ class APC(PCUControl): class IntelAMT(PCUControl): def run(self, node_port, dryrun): - import soltesz - cmd = soltesz.CMD() - cmd_str = "IntelAMTSDK/Samples/RemoteControl/remoteControl" + cmd = moncommands.CMD() + #[cmd_str = "IntelAMTSDK/Samples/RemoteControl/remoteControl" + cmd_str = "cmdamt/remoteControl" if dryrun: # NOTE: -p checks the power state of the host. @@ -620,9 +634,8 @@ class HPiLO(PCUControl): class HPiLOHttps(PCUControl): def run(self, node_port, dryrun): - import soltesz - locfg = soltesz.CMD() + locfg = moncommands.CMD() cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % ( self.host, "iloxml/Get_Network.xml", self.username, self.password) @@ -633,7 +646,7 @@ class HPiLOHttps(PCUControl): return sout.strip() if not dryrun: - locfg = soltesz.CMD() + locfg = moncommands.CMD() cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % ( self.host, "iloxml/Reset_Server.xml", self.username, self.password) @@ -664,6 +677,13 @@ class BayTechAU(PCUControl): class BayTechGeorgeTown(PCUControl): def run(self, node_port, dryrun): + # this initial open/close is to prevent things from raising an + # exception. the pcu always is weird during the first connection, and + # even if it's not, what does it matter to open a second connection + # right away? + self.open(self.host, self.username, None, "Enter user name:") + self.close() + time.sleep(1) self.open(self.host, self.username, None, "Enter user name:") self.sendPassword(self.password, "Enter Password:") @@ -702,6 +722,7 @@ class BayTechCtrlCUnibe(PCUControl): # Control Outlets (5 ,1).........5 try: + #index = s.expect("Enter Request") index = s.expect(["Enter Request :"]) if index == 0: @@ -716,7 +737,8 @@ class BayTechCtrlCUnibe(PCUControl): print "Reboot %d" % node_port s.send("Reboot %d\r\n" % node_port) - index = s.expect(["(Y/N)?"]) + time.sleep(5) + index = s.expect(["\(Y/N\)\?", "Port in use", "DS-RPC>"]) if index == 0: if dryrun: print "sending N" @@ -724,16 +746,21 @@ class BayTechCtrlCUnibe(PCUControl): else: print "sending Y" s.send("Y\r\n") + elif index == 1: + raise ExceptionPrompt("PCU Reported 'Port in use.'") + elif index == 2: + raise ExceptionSequence("Issued command 'Reboot' failed.") - #index = s.expect(["DS-RPC>"]) + time.sleep(5) + index = s.expect(["DS-RPC>"]) #print "got prompt back" s.close() except pexpect.EOF: - raise ExceptionPrompt("EOF before 'Enter Request' Prompt") + raise ExceptionPrompt("EOF before expected Prompt") except pexpect.TIMEOUT: - raise ExceptionPrompt("Timeout before 'Enter Request' Prompt") + raise ExceptionPrompt("Timeout before expected Prompt") return 0 @@ -753,37 +780,54 @@ class BayTechCtrlC(PCUControl): # Otherwise, the login succeeded. # Send a ctrl-c to the remote process. - print "sending ctrl-c" + print "SENDING ctrl-c" s.send(chr(3)) # Control Outlets (5 ,1).........5 try: + print "EXPECTING: ", "Enter Request :" index = s.expect(["Enter Request :"]) if index == 0: - print "5" + print "SENDING: 5" s.send("5\r\n") - index = s.expect(["DS-RPC>", "Enter user name:"]) + print "EXPECTING: ", "DS-RPC>" + index = s.expect(["DS-RPC>", "Enter user name:", "Port in use."]) if index == 1: print "sending username" s.send(self.username + "\r\n") index = s.expect(["DS-RPC>"]) + elif index == 2: + raise ExceptionPrompt("PCU Reported 'Port in use.'") if index == 0: - print "Reboot %d" % node_port + print "SENDING: Reboot %d" % node_port s.send("Reboot %d\r\n" % node_port) - index = s.expect(["(Y/N)?"]) + print "SLEEPING: 5" + time.sleep(5) + print "EXPECTING: ", "Y/N?" + index = s.expect(["\(Y/N\)\?", "Port in use", "DS-RPC>"]) if index == 0: if dryrun: print "sending N" s.send("N\r\n") else: - print "sending Y" + print "SENDING: Y" s.send("Y\r\n") - + elif index == 1: + raise ExceptionPrompt("PCU Reported 'Port in use.'") + elif index == 2: + raise ExceptionSequence("Issued command 'Reboot' failed.") + + # NOTE: for some reason, the script times out with the + # following line. In manual tests, it works correctly, but + # with automated tests, evidently it fails. + print "SLEEPING: 5" + time.sleep(5) + #print "TOTAL--", s.allstr, "--EOT" index = s.expect(["DS-RPC>"]) - #print "got prompt back" + print "got prompt back" s.close() @@ -810,6 +854,7 @@ class BayTech(PCUControl): # even after login... print "msg: %s" % msg self.transport.write(self.username + "\r\n") + time.sleep(5) self.ifThenSend("DS-RPC>", "Reboot %d" % node_port) # Reboot Outlet N (Y/N)? @@ -817,6 +862,7 @@ class BayTech(PCUControl): self.ifThenSend("(Y/N)?", "N") else: self.ifThenSend("(Y/N)?", "Y") + time.sleep(5) self.ifThenSend("DS-RPC>", "") self.close() @@ -891,6 +937,8 @@ class ePowerSwitchGood(PCUControl): if self.verbose: print f.read() except: import traceback; traceback.print_exc() + from nodecommon import email_exception + email_exception() # fetch url one more time on cmd.html, econtrol.html or whatever. # pass @@ -900,6 +948,20 @@ class ePowerSwitchGood(PCUControl): self.close() return 0 +class CustomPCU(PCUControl): + def run(self, node_port, dryrun): + url = "https://www-itec.uni-klu.ac.at/plab-pcu/index.php" + + if not dryrun: + # Turn host off, then on + formstr = "plab%s=off" % node_port + os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url)) + time.sleep(5) + formstr = "plab%s=on" % node_port + os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url)) + else: + os.system("curl --user %s:%s --insecure %s" % (self.username, self.password, url)) + class ePowerSwitchOld(PCUControl): def run(self, node_port, dryrun): @@ -1144,12 +1206,16 @@ def pcu_name(pcu): else: return None -import soltesz -fb =soltesz.dbLoad("findbadpcus") +#import database +from monitor import database +fb = None def get_pcu_values(pcu_id): - # TODO: obviously, this shouldn't be loaded each time... - + global fb + if fb == None: + # this shouldn't be loaded each time... + fb = database.dbLoad("findbadpcus") + try: values = fb['nodes']["id_%s" % pcu_id]['values'] except: @@ -1165,14 +1231,14 @@ def reboot_policy(nodename, continue_probe, dryrun): pcu = plc.getpcu(nodename) if not pcu: - logger.debug("no pcu for %s" % hostname) - print "no pcu for %s" % hostname + logger.debug("no pcu for %s" % nodename) + print "no pcu for %s" % nodename return False # "%s has no pcu" % nodename values = get_pcu_values(pcu['pcu_id']) if values == None: - logger.debug("No values for pcu probe %s" % hostname) - print "No values for pcu probe %s" % hostname + logger.debug("No values for pcu probe %s" % nodename) + print "No values for pcu probe %s" % nodename return False #"no info for pcu_id %s" % pcu['pcu_id'] # Try the PCU first @@ -1192,16 +1258,17 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): try: # DataProbe iPal (many sites) - if continue_probe and values['model'].find("Dataprobe IP-41x/IP-81x") >= 0: + if continue_probe and values['model'].find("IP-41x_IP-81x") >= 0: ipal = IPAL(values, verbose, ['23', '80', '9100']) rb_ret = ipal.reboot(values[nodename], dryrun) # APC Masterswitch (Berkeley) - elif continue_probe and values['model'].find("APC AP79xx/Masterswitch") >= 0: + elif continue_probe and ( values['model'].find("AP79xx") >= 0 or \ + values['model'].find("Masterswitch") >= 0 ): print values # TODO: make a more robust version of APC - if values['pcu_id'] in [1163,1055,1111,1231,1113,1127,1128,1148]: + if values['pcu_id'] in [1102,1163,1055,1111,1231,1113,1127,1128,1148]: apc = APCEurope(values, verbose, ['22', '23']) rb_ret = apc.reboot(values[nodename], dryrun) @@ -1209,11 +1276,11 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): apc = APCBrazil(values, verbose, ['22', '23']) rb_ret = apc.reboot(values[nodename], dryrun) - elif values['pcu_id'] in [1221,1225]: + elif values['pcu_id'] in [1221,1225,1220]: apc = APCBerlin(values, verbose, ['22', '23']) rb_ret = apc.reboot(values[nodename], dryrun) - elif values['pcu_id'] in [1173,1221,1220]: + elif values['pcu_id'] in [1173,1240,47]: apc = APCFolsom(values, verbose, ['22', '23']) rb_ret = apc.reboot(values[nodename], dryrun) @@ -1222,8 +1289,8 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): rb_ret = apc.reboot(values[nodename], dryrun) # BayTech DS4-RPC - elif continue_probe and values['model'].find("Baytech DS4-RPC") >= 0: - if values['pcu_id'] in [1237,1052,1209,1002,1008,1041,1013,1022]: + elif continue_probe and values['model'].find("DS4-RPC") >= 0: + if values['pcu_id'] in [1056,1237,1052,1209,1002,1008,1041,1013,1022]: # These require a 'ctrl-c' to be sent... baytech = BayTechCtrlC(values, verbose, ['22', '23']) rb_ret = baytech.reboot(values[nodename], dryrun) @@ -1251,7 +1318,7 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): rb_ret = baytech.reboot(values[nodename], dryrun) # iLO - elif continue_probe and values['model'].find("HP iLO") >= 0: + elif continue_probe and values['model'].find("ilo") >= 0: try: hpilo = HPiLO(values, verbose, ['22']) rb_ret = hpilo.reboot(0, dryrun) @@ -1263,12 +1330,16 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): rb_ret = hpilo.reboot(0, dryrun) # DRAC ssh - elif continue_probe and values['model'].find("Dell RAC") >= 0: + elif continue_probe and values['model'].find("DRAC") >= 0: # TODO: I don't think DRACRacAdm will throw an exception for the # default method to catch... try: - drac = DRACRacAdm(values, verbose, ['443', '5869']) - rb_ret = drac.reboot(0, dryrun) + if values['pcu_id'] in [1402]: + drac = DRAC(values, verbose, ['22']) + rb_ret = drac.reboot(0, dryrun) + else: + drac = DRACRacAdm(values, verbose, ['443', '5869']) + rb_ret = drac.reboot(0, dryrun) except: drac = DRAC(values, verbose, ['22']) rb_ret = drac.reboot(0, dryrun) @@ -1277,15 +1348,12 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): wti = WTIIPS4(values, verbose, ['23']) rb_ret = wti.reboot(values[nodename], dryrun) - elif continue_probe and values['model'].find("Intel AMT") >= 0: + elif continue_probe and values['model'].find("AMT") >= 0: amt = IntelAMT(values, verbose, ['16992']) rb_ret = amt.reboot(values[nodename], dryrun) # BlackBox PSExxx-xx (e.g. PSE505-FR) - elif continue_probe and \ - (values['model'].find("BlackBox PS5xx") >= 0 or - values['model'].find("ePowerSwitch 1/4/8x") >=0 ): - + elif continue_probe and values['model'].find("ePowerSwitch") >=0: # TODO: allow a different port than http 80. if values['pcu_id'] in [1089, 1071, 1046, 1035, 1118]: eps = ePowerSwitchGood(values, verbose, ['80']) @@ -1297,6 +1365,9 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): eps = ePowerSwitchGood(values, verbose, ['80']) rb_ret = eps.reboot(values[nodename], dryrun) + elif continue_probe and values['pcu_id'] in [1122]: + custom = CustomPCU(values, verbose, ['80', '443']) + custom.reboot(values[nodename], dryrun) elif continue_probe: rb_ret = "Unsupported_PCU" @@ -1346,6 +1417,8 @@ def main(): print "failed" except Exception, err: import traceback; traceback.print_exc() + from nodecommon import email_exception + email_exception() print err if __name__ == '__main__':