X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=reboot.py;h=337b0b46f359fc173bf4283ced02445510b49873;hb=944d143a6528c4157b71f51ed480aec806cbaa06;hp=495d366d990eae3bd9615f67db293834f291c216;hpb=f38420ff4bc9fb114db5f62b01cc8990dc0a0af2;p=monitor.git diff --git a/reboot.py b/reboot.py index 495d366..337b0b4 100755 --- a/reboot.py +++ b/reboot.py @@ -8,16 +8,16 @@ import os, sys import xml, xmlrpclib import errno, time, traceback import urllib2 +import urllib import threading, popen2 import array, struct -#from socket import * -import socket import plc import base64 from subprocess import PIPE, Popen import ssh.pxssh as pxssh import ssh.pexpect as pexpect import socket +import moncommands # Use our versions of telnetlib and pyssh sys.path.insert(0, os.path.dirname(sys.argv[0])) @@ -115,6 +115,7 @@ class Transport: TELNET = 1 SSH = 2 HTTP = 3 + IPAL = 4 TELNET_TIMEOUT = 60 @@ -123,10 +124,6 @@ class Transport: self.verbose = verbose self.transport = None -# def __del__(self): -# if self.transport: -# self.close() - def open(self, host, username=None, password=None, prompt="User Name"): transport = None @@ -235,8 +232,11 @@ class PCUControl(Transport,PCUModel,PCURecord): elif '443' in supported_ports and self.portstatus['443'] == "open": type = Transport.HTTP elif '5869' in supported_ports and self.portstatus['5869'] == "open": - # For DRAC cards. not sure how much it's used in the - # protocol.. but racadm opens this port. + # For DRAC cards. Racadm opens this port. + type = Transport.HTTP + elif '9100' in supported_ports and self.portstatus['9100'] == "open": + type = Transport.IPAL + elif '16992' in supported_ports and self.portstatus['16992'] == "open": type = Transport.HTTP else: raise ExceptionPort("Unsupported Port: No transport from open ports") @@ -275,38 +275,133 @@ class PCUControl(Transport,PCUModel,PCURecord): import traceback traceback.print_exc() return "EOF connection reset" + str(err) - #except Exception, err: - # if self.verbose: - # logger.debug("reboot: Exception") - # logger.debug(err) - # if self.transport: - # self.transport.close() - # import traceback - # traceback.print_exc() - # return "generic exception; unknown problem." - class IPAL(PCUControl): + """ + This now uses a proprietary format for communicating with the PCU. I + prefer it to Telnet, and Web access, since it's much lighter weight + and, more importantly, IT WORKS!! HHAHHHAHAHAHAHAHA! + """ + + def format_msg(self, data, cmd): + esc = chr(int('1b',16)) + return "%c%s%c%s%c" % (esc, self.password, esc, data, cmd) # esc, 'q', chr(4)) + + def recv_noblock(self, s, count): + import errno + + try: + # TODO: make sleep backoff, before stopping. + time.sleep(4) + ret = s.recv(count, socket.MSG_DONTWAIT) + except socket.error, e: + if e[0] == errno.EAGAIN: + raise Exception(e[1]) + else: + # TODO: not other exceptions. + raise Exception(e) + return ret + def run(self, node_port, dryrun): - self.open(self.host) + import errno - # XXX Some iPals require you to hit Enter a few times first - self.ifThenSend("Password >", "\r\n\r\n", ExceptionNotFound) + power_on = False - # Login - self.ifThenSend("Password >", self.password, ExceptionPassword) - self.transport.write("\r\n\r\n") + print "open socket" + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + print "connect" + s.connect((self.host, 9100)) + except socket.error, e: + s.close() + if e[0] == errno.ECONNREFUSED: + # cannot connect to remote host + raise Exception(e[1]) + else: + # TODO: what other conditions are there? + raise Exception(e) + + # get current status + print "Checking status" + s.send(self.format_msg("", 'O')) + ret = self.recv_noblock(s, 8) + print "Current status is '%s'" % ret + + if ret == '': + raise Exception("Status returned 'another session already open' %s : %s" % (node_port, ret)) + + + if node_port < len(ret): + status = ret[node_port] + if status == '1': + # up + power_on = True + elif status == '0': + # down + power_on = False + else: + raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret)) + else: + raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret)) + - if not dryrun: # P# - Pulse relay - self.ifThenSend("Enter >", - "P%d" % node_port, - ExceptionNotFound) - # Get the next prompt - self.ifElse("Enter >", ExceptionTimeout) + if not dryrun: + if power_on: + print "Pulsing %s" % node_port + s.send(self.format_msg("%s" % node_port, 'P')) + else: + # NOTE: turn power on ; do not pulse the port. + print "Power was off, so turning on ..." + s.send(self.format_msg("%s" % node_port, 'E')) + #s.send(self.format_msg("%s" % node_port, 'P')) + + print "Receiving response." + ret = self.recv_noblock(s, 8) + print "Current status is '%s'" % ret + + if node_port < len(ret): + status = ret[node_port] + if status == '1': + # up + power_on = True + elif status == '0': + # down + power_on = False + else: + raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret)) + else: + raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret)) - self.close() + if power_on: + return 0 + else: + return "Failed Power On" + + s.close() return 0 +# TELNET version of protocol... +# #self.open(self.host) +# ## XXX Some iPals require you to hit Enter a few times first +# #self.ifThenSend("Password >", "\r\n\r\n", ExceptionNotFound) +# # Login +# self.ifThenSend("Password >", self.password, ExceptionPassword) +# self.transport.write("\r\n\r\n") +# if not dryrun: # P# - Pulse relay +# print "node_port %s" % node_port +# self.ifThenSend("Enter >", +# "P7", # % node_port, +# ExceptionNotFound) +# print "send newlines" +# self.transport.write("\r\n\r\n") +# print "after new lines" +# # Get the next prompt +# print "wait for enter" +# self.ifElse("Enter >", ExceptionTimeout) +# print "closing " +# self.close() +# return 0 + class APCEurope(PCUControl): def run(self, node_port, dryrun): self.open(self.host, self.username) @@ -461,6 +556,23 @@ class APC(PCUControl): else: return ret +class IntelAMT(PCUControl): + def run(self, node_port, dryrun): + + cmd = moncommands.CMD() + #[cmd_str = "IntelAMTSDK/Samples/RemoteControl/remoteControl" + cmd_str = "cmdamt/remoteControl" + + if dryrun: + # NOTE: -p checks the power state of the host. + # TODO: parse the output to find out if it's ok or not. + cmd_str += " -p http://%s:16992/RemoteControlService -user admin -pass '%s' " % (self.host, self.password ) + else: + cmd_str += " -A http://%s:16992/RemoteControlService -user admin -pass '%s' " % (self.host, self.password ) + + print cmd_str + return cmd.system(cmd_str, self.TELNET_TIMEOUT) + class DRACRacAdm(PCUControl): def run(self, node_port, dryrun): @@ -511,9 +623,8 @@ class HPiLO(PCUControl): class HPiLOHttps(PCUControl): def run(self, node_port, dryrun): - import soltesz - locfg = soltesz.CMD() + locfg = moncommands.CMD() cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % ( self.host, "iloxml/Get_Network.xml", self.username, self.password) @@ -524,7 +635,7 @@ class HPiLOHttps(PCUControl): return sout.strip() if not dryrun: - locfg = soltesz.CMD() + locfg = moncommands.CMD() cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % ( self.host, "iloxml/Reset_Server.xml", self.username, self.password) @@ -593,7 +704,9 @@ class BayTechCtrlCUnibe(PCUControl): # Control Outlets (5 ,1).........5 try: - index = s.expect(["Enter Request :"]) + print s + print "Enter Request" in s.before + index = s.expect("Enter Request") if index == 0: print "3" @@ -673,7 +786,10 @@ class BayTechCtrlC(PCUControl): print "sending Y" s.send("Y\r\n") - index = s.expect(["DS-RPC>"]) + # NOTE: for some reason, the script times out with the + # following line. In manual tests, it works correctly, but + # with automated tests, evidently it fails. + #index = s.expect(["DS-RPC>"]) #print "got prompt back" s.close() @@ -681,7 +797,7 @@ class BayTechCtrlC(PCUControl): except pexpect.EOF: raise ExceptionPrompt("EOF before 'Enter Request' Prompt") except pexpect.TIMEOUT: - raise ExceptionPrompt("Timeout before 'Enter Request' Prompt") + raise ExceptionPrompt("Timeout before Prompt") return 0 @@ -770,19 +886,41 @@ class ePowerSwitchGood(PCUControl): # failing here means the User/passwd is wrong (hopefully) raise ExceptionPassword("Incorrect username/password") - # TODO: after verifying that the user/password is correct, we should - # actually reboot the given node. - + # NOTE: after verifying that the user/password is correct, + # actually reboot the given node. if not dryrun: - # add data to handler, - # fetch url one more time on cmd.html, econtrol.html or whatever. - pass + try: + data = urllib.urlencode({'P%d' % node_port : "r"}) + req = urllib2.Request(self.url + "cmd.html") + req.add_header("Authorization", authheader) + # add data to handler, + f = urllib2.urlopen(req, data) + if self.verbose: print f.read() + except: + import traceback; traceback.print_exc() - if self.verbose: print f.read() + # fetch url one more time on cmd.html, econtrol.html or whatever. + # pass + else: + if self.verbose: print f.read() self.close() return 0 +class CustomPCU(PCUControl): + def run(self, node_port, dryrun): + url = "https://www-itec.uni-klu.ac.at/plab-pcu/index.php" + + if not dryrun: + # Turn host off, then on + formstr = "plab%s=off" % node_port + os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url)) + time.sleep(5) + formstr = "plab%s=on" % node_port + os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url)) + else: + os.system("curl --user %s:%s --insecure %s" % (self.username, self.password, url)) + class ePowerSwitchOld(PCUControl): def run(self, node_port, dryrun): @@ -1027,12 +1165,16 @@ def pcu_name(pcu): else: return None -import soltesz -fb =soltesz.dbLoad("findbadpcus") +#import database +from monitor import database +fb = None def get_pcu_values(pcu_id): - # TODO: obviously, this shouldn't be loaded each time... - + global fb + if fb == None: + # this shouldn't be loaded each time... + fb = database.dbLoad("findbadpcus") + try: values = fb['nodes']["id_%s" % pcu_id]['values'] except: @@ -1045,7 +1187,6 @@ def reboot(nodename): def reboot_policy(nodename, continue_probe, dryrun): global verbose - print "this is a test of reboot_policy()" pcu = plc.getpcu(nodename) if not pcu: @@ -1062,7 +1203,6 @@ def reboot_policy(nodename, continue_probe, dryrun): # Try the PCU first logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model'])) - print "reboot_test" ret = reboot_test(nodename, values, continue_probe, verbose, dryrun) if ret != 0: @@ -1077,12 +1217,13 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): try: # DataProbe iPal (many sites) - if continue_probe and values['model'].find("Dataprobe IP-41x/IP-81x") >= 0: - ipal = IPAL(values, verbose, ['23']) + if continue_probe and values['model'].find("IP-41x_IP-81x") >= 0: + ipal = IPAL(values, verbose, ['23', '80', '9100']) rb_ret = ipal.reboot(values[nodename], dryrun) # APC Masterswitch (Berkeley) - elif continue_probe and values['model'].find("APC AP79xx/Masterswitch") >= 0: + elif continue_probe and ( values['model'].find("AP79xx") >= 0 or \ + values['model'].find("Masterswitch") >= 0 ): print values # TODO: make a more robust version of APC @@ -1107,7 +1248,7 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): rb_ret = apc.reboot(values[nodename], dryrun) # BayTech DS4-RPC - elif continue_probe and values['model'].find("Baytech DS4-RPC") >= 0: + elif continue_probe and values['model'].find("DS4-RPC") >= 0: if values['pcu_id'] in [1237,1052,1209,1002,1008,1041,1013,1022]: # These require a 'ctrl-c' to be sent... baytech = BayTechCtrlC(values, verbose, ['22', '23']) @@ -1136,7 +1277,7 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): rb_ret = baytech.reboot(values[nodename], dryrun) # iLO - elif continue_probe and values['model'].find("HP iLO") >= 0: + elif continue_probe and values['model'].find("ilo") >= 0: try: hpilo = HPiLO(values, verbose, ['22']) rb_ret = hpilo.reboot(0, dryrun) @@ -1148,7 +1289,7 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): rb_ret = hpilo.reboot(0, dryrun) # DRAC ssh - elif continue_probe and values['model'].find("Dell RAC") >= 0: + elif continue_probe and values['model'].find("DRAC") >= 0: # TODO: I don't think DRACRacAdm will throw an exception for the # default method to catch... try: @@ -1162,20 +1303,26 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun): wti = WTIIPS4(values, verbose, ['23']) rb_ret = wti.reboot(values[nodename], dryrun) - # BlackBox PSExxx-xx (e.g. PSE505-FR) - elif continue_probe and \ - (values['model'].find("BlackBox PS5xx") >= 0 or - values['model'].find("ePowerSwitch 1/4/8x") >=0 ): + elif continue_probe and values['model'].find("AMT") >= 0: + amt = IntelAMT(values, verbose, ['16992']) + rb_ret = amt.reboot(values[nodename], dryrun) + # BlackBox PSExxx-xx (e.g. PSE505-FR) + elif continue_probe and values['model'].find("ePowerSwitch") >=0: # TODO: allow a different port than http 80. if values['pcu_id'] in [1089, 1071, 1046, 1035, 1118]: eps = ePowerSwitchGood(values, verbose, ['80']) elif values['pcu_id'] in [1003]: + # OLD EPOWER + print "OLD EPOWER" eps = ePowerSwitch(values, verbose, ['80']) else: eps = ePowerSwitchGood(values, verbose, ['80']) rb_ret = eps.reboot(values[nodename], dryrun) + elif continue_probe and values['pcu_id'] in [1122]: + custom = CustomPCU(values, verbose, ['80', '443']) + custom.reboot(values[nodename], dryrun) elif continue_probe: rb_ret = "Unsupported_PCU"