X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=reboot.py;h=ba641c4724c7cea4f6670f2ef883a42949a1ef49;hb=refs%2Fheads%2F1.0;hp=4cccdf0a3499dec0702a5c6f5d9db5d242c10976;hpb=d0652340b89d51c6115edb13d5c7c72b34dea66f;p=monitor.git diff --git a/reboot.py b/reboot.py index 4cccdf0..ba641c4 100755 --- a/reboot.py +++ b/reboot.py @@ -11,14 +11,13 @@ import urllib2 import urllib import threading, popen2 import array, struct -#from socket import * -import socket import plc import base64 from subprocess import PIPE, Popen import ssh.pxssh as pxssh import ssh.pexpect as pexpect import socket +import moncommands # Use our versions of telnetlib and pyssh sys.path.insert(0, os.path.dirname(sys.argv[0])) @@ -276,6 +275,10 @@ class PCUControl(Transport,PCUModel,PCURecord): import traceback traceback.print_exc() return "EOF connection reset" + str(err) + except: + from nodecommon import email_exception + email_exception() + raise Exception('unknown') class IPAL(PCUControl): """ @@ -293,11 +296,12 @@ class IPAL(PCUControl): try: # TODO: make sleep backoff, before stopping. - time.sleep(4) + time.sleep(8) ret = s.recv(count, socket.MSG_DONTWAIT) except socket.error, e: if e[0] == errno.EAGAIN: - raise Exception(e[1]) + #raise Exception(e[1]) + raise ExceptionNotFound(e[1]) else: # TODO: not other exceptions. raise Exception(e) @@ -318,6 +322,8 @@ class IPAL(PCUControl): if e[0] == errno.ECONNREFUSED: # cannot connect to remote host raise Exception(e[1]) + elif e[0] == errno.ETIMEDOUT: + raise ExceptionTimeout(e[1]) else: # TODO: what other conditions are there? raise Exception(e) @@ -329,7 +335,7 @@ class IPAL(PCUControl): print "Current status is '%s'" % ret if ret == '': - raise Exception("Status returned 'another session already open' %s : %s" % (node_port, ret)) + raise Exception("Status returned 'another session already open' on %s %s : %s" % (self.host, node_port, ret)) if node_port < len(ret): @@ -340,10 +346,12 @@ class IPAL(PCUControl): elif status == '0': # down power_on = False + elif status == '6': + raise ExceptionPort("IPAL reported 'Cable Error' on %s socket %s : %s" % (self.host, node_port, ret)) else: - raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret)) + raise Exception("Unknown status for PCU %s socket %s : %s" % (self.host, node_port, ret)) else: - raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret)) + raise Exception("Mismatch between configured port and PCU %s status: %s %s" % (self.host, node_port, ret)) if not dryrun: @@ -368,10 +376,12 @@ class IPAL(PCUControl): elif status == '0': # down power_on = False + elif status == '6': + raise ExceptionPort("IPAL reported 'Cable Error' on %s socket %s : %s" % (self.host, node_port, ret)) else: - raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret)) + raise Exception("Unknown status for PCU %s socket %s : %s" % (self.host, node_port, ret)) else: - raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret)) + raise Exception("Mismatch between configured port and PCU %s status: %s %s" % (self.host, node_port, ret)) if power_on: return 0 @@ -559,10 +569,10 @@ class APC(PCUControl): class IntelAMT(PCUControl): def run(self, node_port, dryrun): - import soltesz - cmd = soltesz.CMD() - cmd_str = "IntelAMTSDK/Samples/RemoteControl/remoteControl" + cmd = moncommands.CMD() + #[cmd_str = "IntelAMTSDK/Samples/RemoteControl/remoteControl" + cmd_str = "cmdamt/remoteControl" if dryrun: # NOTE: -p checks the power state of the host. @@ -624,9 +634,8 @@ class HPiLO(PCUControl): class HPiLOHttps(PCUControl): def run(self, node_port, dryrun): - import soltesz - locfg = soltesz.CMD() + locfg = moncommands.CMD() cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % ( self.host, "iloxml/Get_Network.xml", self.username, self.password) @@ -637,7 +646,7 @@ class HPiLOHttps(PCUControl): return sout.strip() if not dryrun: - locfg = soltesz.CMD() + locfg = moncommands.CMD() cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % ( self.host, "iloxml/Reset_Server.xml", self.username, self.password) @@ -668,6 +677,13 @@ class BayTechAU(PCUControl): class BayTechGeorgeTown(PCUControl): def run(self, node_port, dryrun): + # this initial open/close is to prevent things from raising an + # exception. the pcu always is weird during the first connection, and + # even if it's not, what does it matter to open a second connection + # right away? + self.open(self.host, self.username, None, "Enter user name:") + self.close() + time.sleep(1) self.open(self.host, self.username, None, "Enter user name:") self.sendPassword(self.password, "Enter Password:") @@ -706,6 +722,7 @@ class BayTechCtrlCUnibe(PCUControl): # Control Outlets (5 ,1).........5 try: + #index = s.expect("Enter Request") index = s.expect(["Enter Request :"]) if index == 0: @@ -720,7 +737,8 @@ class BayTechCtrlCUnibe(PCUControl): print "Reboot %d" % node_port s.send("Reboot %d\r\n" % node_port) - index = s.expect(["(Y/N)?"]) + time.sleep(5) + index = s.expect(["\(Y/N\)\?", "Port in use", "DS-RPC>"]) if index == 0: if dryrun: print "sending N" @@ -728,16 +746,21 @@ class BayTechCtrlCUnibe(PCUControl): else: print "sending Y" s.send("Y\r\n") + elif index == 1: + raise ExceptionPrompt("PCU Reported 'Port in use.'") + elif index == 2: + raise ExceptionSequence("Issued command 'Reboot' failed.") - #index = s.expect(["DS-RPC>"]) + time.sleep(5) + index = s.expect(["DS-RPC>"]) #print "got prompt back" s.close() except pexpect.EOF: - raise ExceptionPrompt("EOF before 'Enter Request' Prompt") + raise ExceptionPrompt("EOF before expected Prompt") except pexpect.TIMEOUT: - raise ExceptionPrompt("Timeout before 'Enter Request' Prompt") + raise ExceptionPrompt("Timeout before expected Prompt") return 0 @@ -757,37 +780,54 @@ class BayTechCtrlC(PCUControl): # Otherwise, the login succeeded. # Send a ctrl-c to the remote process. - print "sending ctrl-c" + print "SENDING ctrl-c" s.send(chr(3)) # Control Outlets (5 ,1).........5 try: + print "EXPECTING: ", "Enter Request :" index = s.expect(["Enter Request :"]) if index == 0: - print "5" + print "SENDING: 5" s.send("5\r\n") - index = s.expect(["DS-RPC>", "Enter user name:"]) + print "EXPECTING: ", "DS-RPC>" + index = s.expect(["DS-RPC>", "Enter user name:", "Port in use."]) if index == 1: print "sending username" s.send(self.username + "\r\n") index = s.expect(["DS-RPC>"]) + elif index == 2: + raise ExceptionPrompt("PCU Reported 'Port in use.'") if index == 0: - print "Reboot %d" % node_port + print "SENDING: Reboot %d" % node_port s.send("Reboot %d\r\n" % node_port) - index = s.expect(["(Y/N)?"]) + print "SLEEPING: 5" + time.sleep(5) + print "EXPECTING: ", "Y/N?" + index = s.expect(["\(Y/N\)\?", "Port in use", "DS-RPC>"]) if index == 0: if dryrun: print "sending N" s.send("N\r\n") else: - print "sending Y" + print "SENDING: Y" s.send("Y\r\n") - + elif index == 1: + raise ExceptionPrompt("PCU Reported 'Port in use.'") + elif index == 2: + raise ExceptionSequence("Issued command 'Reboot' failed.") + + # NOTE: for some reason, the script times out with the + # following line. In manual tests, it works correctly, but + # with automated tests, evidently it fails. + print "SLEEPING: 5" + time.sleep(5) + #print "TOTAL--", s.allstr, "--EOT" index = s.expect(["DS-RPC>"]) - #print "got prompt back" + print "got prompt back" s.close() @@ -814,6 +854,7 @@ class BayTech(PCUControl): # even after login... print "msg: %s" % msg self.transport.write(self.username + "\r\n") + time.sleep(5) self.ifThenSend("DS-RPC>", "Reboot %d" % node_port) # Reboot Outlet N (Y/N)? @@ -821,6 +862,7 @@ class BayTech(PCUControl): self.ifThenSend("(Y/N)?", "N") else: self.ifThenSend("(Y/N)?", "Y") + time.sleep(5) self.ifThenSend("DS-RPC>", "") self.close() @@ -895,6 +937,8 @@ class ePowerSwitchGood(PCUControl): if self.verbose: print f.read() except: import traceback; traceback.print_exc() + from nodecommon import email_exception + email_exception() # fetch url one more time on cmd.html, econtrol.html or whatever. # pass @@ -904,6 +948,20 @@ class ePowerSwitchGood(PCUControl): self.close() return 0 +class CustomPCU(PCUControl): + def run(self, node_port, dryrun): + url = "https://www-itec.uni-klu.ac.at/plab-pcu/index.php" + + if not dryrun: + # Turn host off, then on + formstr = "plab%s=off" % node_port + os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url)) + time.sleep(5) + formstr = "plab%s=on" % node_port + os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url)) + else: + os.system("curl --user %s:%s --insecure %s" % (self.username, self.password, url)) + class ePowerSwitchOld(PCUControl): def run(self, node_port, dryrun): @@ -1148,12 +1206,16 @@ def pcu_name(pcu): else: return None -import soltesz -fb =soltesz.dbLoad("findbadpcus") +#import database +from monitor import database +fb = None def get_pcu_values(pcu_id): - # TODO: obviously, this shouldn't be loaded each time... - + global fb + if fb == None: + # this shouldn't be loaded each time... + fb = database.dbLoad("findbadpcus") + try: values = fb['nodes']["id_%s" % pcu_id]['values'] except: @@ -1169,14 +1231,14 @@ def reboot_policy(nodename, continue_probe, dryrun): pcu = plc.getpcu(nodename) if not pcu: - logger.debug("no pcu for %s" % hostname) - print "no pcu for %s" % hostname + logger.debug("no pcu for %s" % nodename) + print "no pcu for %s" % nodename return False # "%s has no pcu" % nodename values = get_pcu_values(pcu['pcu_id']) if values == None: - logger.debug("No values for pcu probe %s" % hostname) - print "No values for pcu probe %s" % hostname + logger.debug("No values for pcu probe %s" % nodename) + print "No values for pcu probe %s" % nodename return False #"no info for pcu_id %s" % pcu['pcu_id'] # Try the PCU first @@ -1206,7 +1268,7 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): print values # TODO: make a more robust version of APC - if values['pcu_id'] in [1163,1055,1111,1231,1113,1127,1128,1148]: + if values['pcu_id'] in [1102,1163,1055,1111,1231,1113,1127,1128,1148]: apc = APCEurope(values, verbose, ['22', '23']) rb_ret = apc.reboot(values[nodename], dryrun) @@ -1214,11 +1276,11 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): apc = APCBrazil(values, verbose, ['22', '23']) rb_ret = apc.reboot(values[nodename], dryrun) - elif values['pcu_id'] in [1221,1225]: + elif values['pcu_id'] in [1221,1225,1220]: apc = APCBerlin(values, verbose, ['22', '23']) rb_ret = apc.reboot(values[nodename], dryrun) - elif values['pcu_id'] in [1173,1221,1220]: + elif values['pcu_id'] in [1173,1240,47]: apc = APCFolsom(values, verbose, ['22', '23']) rb_ret = apc.reboot(values[nodename], dryrun) @@ -1228,7 +1290,7 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): # BayTech DS4-RPC elif continue_probe and values['model'].find("DS4-RPC") >= 0: - if values['pcu_id'] in [1237,1052,1209,1002,1008,1041,1013,1022]: + if values['pcu_id'] in [1056,1237,1052,1209,1002,1008,1041,1013,1022]: # These require a 'ctrl-c' to be sent... baytech = BayTechCtrlC(values, verbose, ['22', '23']) rb_ret = baytech.reboot(values[nodename], dryrun) @@ -1272,8 +1334,12 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): # TODO: I don't think DRACRacAdm will throw an exception for the # default method to catch... try: - drac = DRACRacAdm(values, verbose, ['443', '5869']) - rb_ret = drac.reboot(0, dryrun) + if values['pcu_id'] in [1402]: + drac = DRAC(values, verbose, ['22']) + rb_ret = drac.reboot(0, dryrun) + else: + drac = DRACRacAdm(values, verbose, ['443', '5869']) + rb_ret = drac.reboot(0, dryrun) except: drac = DRAC(values, verbose, ['22']) rb_ret = drac.reboot(0, dryrun) @@ -1299,6 +1365,9 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): eps = ePowerSwitchGood(values, verbose, ['80']) rb_ret = eps.reboot(values[nodename], dryrun) + elif continue_probe and values['pcu_id'] in [1122]: + custom = CustomPCU(values, verbose, ['80', '443']) + custom.reboot(values[nodename], dryrun) elif continue_probe: rb_ret = "Unsupported_PCU" @@ -1348,6 +1417,8 @@ def main(): print "failed" except Exception, err: import traceback; traceback.print_exc() + from nodecommon import email_exception + email_exception() print err if __name__ == '__main__':