changes for 3.0
[monitor.git] / reboot.py
index 6a3b9bb..ba641c4 100755 (executable)
--- a/reboot.py
+++ b/reboot.py
@@ -8,16 +8,16 @@ import os, sys
 import xml, xmlrpclib
 import errno, time, traceback
 import urllib2
+import urllib
 import threading, popen2
 import array, struct
-#from socket import *
-import socket
 import plc
 import base64
 from subprocess import PIPE, Popen
 import ssh.pxssh as pxssh
 import ssh.pexpect as pexpect
 import socket
+import moncommands 
 
 # Use our versions of telnetlib and pyssh
 sys.path.insert(0, os.path.dirname(sys.argv[0]))
@@ -275,6 +275,10 @@ class PCUControl(Transport,PCUModel,PCURecord):
                        import traceback
                        traceback.print_exc()
                        return "EOF connection reset" + str(err)
+               except:
+                       from nodecommon import email_exception
+                       email_exception()
+                       raise Exception('unknown')
                
 class IPAL(PCUControl):
        """ 
@@ -292,11 +296,12 @@ class IPAL(PCUControl):
 
                try:
                        # TODO: make sleep backoff, before stopping.
-                       time.sleep(4)
+                       time.sleep(8)
                        ret = s.recv(count, socket.MSG_DONTWAIT)
                except socket.error, e:
                        if e[0] == errno.EAGAIN:
-                               return Exception(e[1])
+                               #raise Exception(e[1])
+                               raise ExceptionNotFound(e[1])
                        else:
                                # TODO: not other exceptions.
                                raise Exception(e)
@@ -316,7 +321,9 @@ class IPAL(PCUControl):
                        s.close()
                        if e[0] == errno.ECONNREFUSED:
                                # cannot connect to remote host
-                               return Exception(e[1])
+                               raise Exception(e[1])
+                       elif e[0] == errno.ETIMEDOUT:
+                               raise ExceptionTimeout(e[1])
                        else:
                                # TODO: what other conditions are there?
                                raise Exception(e)
@@ -326,6 +333,10 @@ class IPAL(PCUControl):
                s.send(self.format_msg("", 'O'))
                ret = self.recv_noblock(s, 8)
                print "Current status is '%s'" % ret
+
+               if ret == '':
+                       raise Exception("Status returned 'another session already open' on %s %s : %s" % (self.host, node_port, ret))
+                       
                                
                if node_port < len(ret):
                        status = ret[node_port]
@@ -335,21 +346,23 @@ class IPAL(PCUControl):
                        elif status == '0':
                                # down
                                power_on = False
+                       elif status == '6':
+                               raise ExceptionPort("IPAL reported 'Cable Error' on %s socket %s : %s" % (self.host, node_port, ret))
                        else:
-                               raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret))
+                               raise Exception("Unknown status for PCU %s socket %s : %s" % (self.host, node_port, ret))
                else:
-                       raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret))
+                       raise Exception("Mismatch between configured port and PCU %s status: %s %s" % (self.host, node_port, ret))
                        
 
                if not dryrun:
-                       print "Pulsing %s" % node_port
                        if power_on:
+                               print "Pulsing %s" % node_port
                                s.send(self.format_msg("%s" % node_port, 'P'))
                        else:
-                               # NOTE: turn power on before pulsing the port.
-                               print "power was off, so turning on then pulsing..."
+                               # NOTE: turn power on ; do not pulse the port.
+                               print "Power was off, so turning on ..."
                                s.send(self.format_msg("%s" % node_port, 'E'))
-                               s.send(self.format_msg("%s" % node_port, 'P'))
+                               #s.send(self.format_msg("%s" % node_port, 'P'))
 
                        print "Receiving response."
                        ret = self.recv_noblock(s, 8)
@@ -363,10 +376,12 @@ class IPAL(PCUControl):
                                elif status == '0':
                                        # down
                                        power_on = False
+                               elif status == '6':
+                                       raise ExceptionPort("IPAL reported 'Cable Error' on %s socket %s : %s" % (self.host, node_port, ret))
                                else:
-                                       raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret))
+                                       raise Exception("Unknown status for PCU %s socket %s : %s" % (self.host, node_port, ret))
                        else:
-                               raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret))
+                               raise Exception("Mismatch between configured port and PCU %s status: %s %s" % (self.host, node_port, ret))
 
                        if power_on:
                                return 0
@@ -554,10 +569,10 @@ class APC(PCUControl):
 
 class IntelAMT(PCUControl):
        def run(self, node_port, dryrun):
-               import soltesz
 
-               cmd = soltesz.CMD()
-               cmd_str = "IntelAMTSDK/Samples/RemoteControl/remoteControl"
+               cmd = moncommands.CMD()
+               #[cmd_str = "IntelAMTSDK/Samples/RemoteControl/remoteControl"
+               cmd_str = "cmdamt/remoteControl"
 
                if dryrun:
                        # NOTE: -p checks the power state of the host.
@@ -619,9 +634,8 @@ class HPiLO(PCUControl):
                
 class HPiLOHttps(PCUControl):
        def run(self, node_port, dryrun):
-               import soltesz
 
-               locfg = soltesz.CMD()
+               locfg = moncommands.CMD()
                cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
                                        self.host, "iloxml/Get_Network.xml", 
                                        self.username, self.password)
@@ -632,7 +646,7 @@ class HPiLOHttps(PCUControl):
                        return sout.strip()
 
                if not dryrun:
-                       locfg = soltesz.CMD()
+                       locfg = moncommands.CMD()
                        cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
                                                self.host, "iloxml/Reset_Server.xml", 
                                                self.username, self.password)
@@ -663,6 +677,13 @@ class BayTechAU(PCUControl):
 
 class BayTechGeorgeTown(PCUControl):
        def run(self, node_port, dryrun):
+               # this initial open/close is to prevent things from raising an
+               # exception.  the pcu always is weird during the first connection, and
+               # even if it's not, what does it matter to open a second connection
+               # right away?
+               self.open(self.host, self.username, None, "Enter user name:")
+               self.close()
+               time.sleep(1)
                self.open(self.host, self.username, None, "Enter user name:")
                self.sendPassword(self.password, "Enter Password:")
 
@@ -701,6 +722,7 @@ class BayTechCtrlCUnibe(PCUControl):
 
                # Control Outlets  (5 ,1).........5
                try:
+                       #index = s.expect("Enter Request")
                        index = s.expect(["Enter Request :"])
 
                        if index == 0:
@@ -715,7 +737,8 @@ class BayTechCtrlCUnibe(PCUControl):
                                        print "Reboot %d" % node_port
                                        s.send("Reboot %d\r\n" % node_port)
 
-                                       index = s.expect(["(Y/N)?"])
+                                       time.sleep(5)
+                                       index = s.expect(["\(Y/N\)\?", "Port in use", "DS-RPC>"])
                                        if index == 0:
                                                if dryrun:
                                                        print "sending N"
@@ -723,16 +746,21 @@ class BayTechCtrlCUnibe(PCUControl):
                                                else:
                                                        print "sending Y"
                                                        s.send("Y\r\n")
+                                       elif index == 1:
+                                               raise ExceptionPrompt("PCU Reported 'Port in use.'")
+                                       elif index == 2:
+                                               raise ExceptionSequence("Issued command 'Reboot' failed.")
 
-                               #index = s.expect(["DS-RPC>"])
+                               time.sleep(5)
+                               index = s.expect(["DS-RPC>"])
                                #print "got prompt back"
 
                        s.close()
 
                except pexpect.EOF:
-                       raise ExceptionPrompt("EOF before 'Enter Request' Prompt")
+                       raise ExceptionPrompt("EOF before expected Prompt")
                except pexpect.TIMEOUT:
-                       raise ExceptionPrompt("Timeout before 'Enter Request' Prompt")
+                       raise ExceptionPrompt("Timeout before expected Prompt")
 
                return 0
 
@@ -752,37 +780,54 @@ class BayTechCtrlC(PCUControl):
                # Otherwise, the login succeeded.
 
                # Send a ctrl-c to the remote process.
-               print "sending ctrl-c"
+               print "SENDING ctrl-c"
                s.send(chr(3))
 
                # Control Outlets  (5 ,1).........5
                try:
+                       print "EXPECTING: ", "Enter Request :"
                        index = s.expect(["Enter Request :"])
 
                        if index == 0:
-                               print "5"
+                               print "SENDING: 5"
                                s.send("5\r\n")
-                               index = s.expect(["DS-RPC>", "Enter user name:"])
+                               print "EXPECTING: ", "DS-RPC>"
+                               index = s.expect(["DS-RPC>", "Enter user name:", "Port in use."])
                                if index == 1:
                                        print "sending username"
                                        s.send(self.username + "\r\n")
                                        index = s.expect(["DS-RPC>"])
+                               elif index == 2:
+                                       raise ExceptionPrompt("PCU Reported 'Port in use.'")
 
                                if index == 0:
-                                       print "Reboot %d" % node_port
+                                       print "SENDING: Reboot %d" % node_port
                                        s.send("Reboot %d\r\n" % node_port)
 
-                                       index = s.expect(["(Y/N)?"])
+                                       print "SLEEPING: 5"
+                                       time.sleep(5)
+                                       print "EXPECTING: ", "Y/N?"
+                                       index = s.expect(["\(Y/N\)\?", "Port in use", "DS-RPC>"])
                                        if index == 0:
                                                if dryrun:
                                                        print "sending N"
                                                        s.send("N\r\n")
                                                else:
-                                                       print "sending Y"
+                                                       print "SENDING: Y"
                                                        s.send("Y\r\n")
-
+                                       elif index == 1:
+                                               raise ExceptionPrompt("PCU Reported 'Port in use.'")
+                                       elif index == 2:
+                                               raise ExceptionSequence("Issued command 'Reboot' failed.")
+
+                               # NOTE: for some reason, the script times out with the
+                               # following line.  In manual tests, it works correctly, but
+                               # with automated tests, evidently it fails.
+                               print "SLEEPING: 5"
+                               time.sleep(5)
+                               #print "TOTAL--", s.allstr, "--EOT"
                                index = s.expect(["DS-RPC>"])
-                               #print "got prompt back"
+                               print "got prompt back"
 
                        s.close()
 
@@ -809,6 +854,7 @@ class BayTech(PCUControl):
                        # even after login...
                        print "msg: %s" % msg
                        self.transport.write(self.username + "\r\n")
+                       time.sleep(5)
                        self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
 
                # Reboot Outlet  N        (Y/N)?
@@ -816,6 +862,7 @@ class BayTech(PCUControl):
                        self.ifThenSend("(Y/N)?", "N")
                else:
                        self.ifThenSend("(Y/N)?", "Y")
+               time.sleep(5)
                self.ifThenSend("DS-RPC>", "")
 
                self.close()
@@ -878,19 +925,43 @@ class ePowerSwitchGood(PCUControl):
                        # failing here means the User/passwd is wrong (hopefully)
                        raise ExceptionPassword("Incorrect username/password")
 
-               # TODO: after verifying that the user/password is correct, we should
-               # actually reboot the given node.
-
+               # NOTE: after verifying that the user/password is correct, 
+               #               actually reboot the given node.
                if not dryrun:
-                       # add data to handler,
-                       # fetch url one more time on cmd.html, econtrol.html or whatever.
-                       pass
+                       try:
+                               data = urllib.urlencode({'P%d' % node_port : "r"})
+                               req = urllib2.Request(self.url + "cmd.html")
+                               req.add_header("Authorization", authheader)
+                               # add data to handler,
+                               f = urllib2.urlopen(req, data)
+                               if self.verbose: print f.read()
+                       except:
+                               import traceback; traceback.print_exc()
+                               from nodecommon import email_exception
+                               email_exception()
 
-               if self.verbose: print f.read()
+                               # fetch url one more time on cmd.html, econtrol.html or whatever.
+                               # pass
+               else:
+                       if self.verbose: print f.read()
 
                self.close()
                return 0
 
+class CustomPCU(PCUControl):
+       def run(self, node_port, dryrun):
+               url = "https://www-itec.uni-klu.ac.at/plab-pcu/index.php" 
+
+               if not dryrun:
+                       # Turn host off, then on
+                       formstr = "plab%s=off" % node_port
+                       os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url))
+                       time.sleep(5)
+                       formstr = "plab%s=on" % node_port
+                       os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url))
+               else:
+                       os.system("curl --user %s:%s --insecure %s" % (self.username, self.password, url))
+
 
 class ePowerSwitchOld(PCUControl):
        def run(self, node_port, dryrun):
@@ -1135,12 +1206,16 @@ def pcu_name(pcu):
        else:
                return None
 
-import soltesz
-fb =soltesz.dbLoad("findbadpcus")
+#import database
+from monitor import database
+fb = None
 
 def get_pcu_values(pcu_id):
-       # TODO: obviously, this shouldn't be loaded each time...
-
+       global fb
+       if fb == None:
+               # this shouldn't be loaded each time...
+               fb = database.dbLoad("findbadpcus")
+               
        try:
                values = fb['nodes']["id_%s" % pcu_id]['values']
        except:
@@ -1156,14 +1231,14 @@ def reboot_policy(nodename, continue_probe, dryrun):
 
        pcu = plc.getpcu(nodename)
        if not pcu:
-               logger.debug("no pcu for %s" % hostname)
-               print "no pcu for %s" % hostname
+               logger.debug("no pcu for %s" % nodename)
+               print "no pcu for %s" % nodename
                return False # "%s has no pcu" % nodename
 
        values = get_pcu_values(pcu['pcu_id'])
        if values == None:
-               logger.debug("No values for pcu probe %s" % hostname)
-               print "No values for pcu probe %s" % hostname
+               logger.debug("No values for pcu probe %s" % nodename)
+               print "No values for pcu probe %s" % nodename
                return False #"no info for pcu_id %s" % pcu['pcu_id']
        
        # Try the PCU first
@@ -1183,16 +1258,17 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun):
 
        try:
                # DataProbe iPal (many sites)
-               if  continue_probe and values['model'].find("Dataprobe IP-41x/IP-81x") >= 0:
+               if  continue_probe and values['model'].find("IP-41x_IP-81x") >= 0:
                        ipal = IPAL(values, verbose, ['23', '80', '9100'])
                        rb_ret = ipal.reboot(values[nodename], dryrun)
                                
                # APC Masterswitch (Berkeley)
-               elif continue_probe and values['model'].find("APC AP79xx/Masterswitch") >= 0:
+               elif continue_probe and ( values['model'].find("AP79xx") >= 0 or \
+                                                                 values['model'].find("Masterswitch") >= 0 ):
                        print values
 
                        # TODO: make a more robust version of APC
-                       if values['pcu_id'] in [1163,1055,1111,1231,1113,1127,1128,1148]:
+                       if values['pcu_id'] in [1102,1163,1055,1111,1231,1113,1127,1128,1148]:
                                apc = APCEurope(values, verbose, ['22', '23'])
                                rb_ret = apc.reboot(values[nodename], dryrun)
 
@@ -1200,11 +1276,11 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun):
                                apc = APCBrazil(values, verbose, ['22', '23'])
                                rb_ret = apc.reboot(values[nodename], dryrun)
 
-                       elif values['pcu_id'] in [1221,1225]:
+                       elif values['pcu_id'] in [1221,1225,1220]:
                                apc = APCBerlin(values, verbose, ['22', '23'])
                                rb_ret = apc.reboot(values[nodename], dryrun)
 
-                       elif values['pcu_id'] in [1173,1221,1220]:
+                       elif values['pcu_id'] in [1173,1240,47]:
                                apc = APCFolsom(values, verbose, ['22', '23'])
                                rb_ret = apc.reboot(values[nodename], dryrun)
 
@@ -1213,8 +1289,8 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun):
                                rb_ret = apc.reboot(values[nodename], dryrun)
 
                # BayTech DS4-RPC
-               elif continue_probe and values['model'].find("Baytech DS4-RPC") >= 0:
-                       if values['pcu_id'] in [1237,1052,1209,1002,1008,1041,1013,1022]:
+               elif continue_probe and values['model'].find("DS4-RPC") >= 0:
+                       if values['pcu_id'] in [1056,1237,1052,1209,1002,1008,1041,1013,1022]:
                                # These  require a 'ctrl-c' to be sent... 
                                baytech = BayTechCtrlC(values, verbose, ['22', '23'])
                                rb_ret = baytech.reboot(values[nodename], dryrun)
@@ -1242,7 +1318,7 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun):
                                rb_ret = baytech.reboot(values[nodename], dryrun)
 
                # iLO
-               elif continue_probe and values['model'].find("HP iLO") >= 0:
+               elif continue_probe and values['model'].find("ilo") >= 0:
                        try:
                                hpilo = HPiLO(values, verbose, ['22'])
                                rb_ret = hpilo.reboot(0, dryrun)
@@ -1254,12 +1330,16 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun):
                                rb_ret = hpilo.reboot(0, dryrun)
 
                # DRAC ssh
-               elif continue_probe and values['model'].find("Dell RAC") >= 0:
+               elif continue_probe and values['model'].find("DRAC") >= 0:
                        # TODO: I don't think DRACRacAdm will throw an exception for the
                        # default method to catch...
                        try:
-                               drac = DRACRacAdm(values, verbose, ['443', '5869'])
-                               rb_ret = drac.reboot(0, dryrun)
+                               if values['pcu_id'] in [1402]:
+                                       drac = DRAC(values, verbose, ['22'])
+                                       rb_ret = drac.reboot(0, dryrun)
+                               else:
+                                       drac = DRACRacAdm(values, verbose, ['443', '5869'])
+                                       rb_ret = drac.reboot(0, dryrun)
                        except:
                                drac = DRAC(values, verbose, ['22'])
                                rb_ret = drac.reboot(0, dryrun)
@@ -1268,24 +1348,26 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun):
                                wti = WTIIPS4(values, verbose, ['23'])
                                rb_ret = wti.reboot(values[nodename], dryrun)
 
-               elif continue_probe and values['model'].find("Intel AMT") >= 0:
+               elif continue_probe and values['model'].find("AMT") >= 0:
                                amt = IntelAMT(values, verbose, ['16992'])
                                rb_ret = amt.reboot(values[nodename], dryrun)
 
                # BlackBox PSExxx-xx (e.g. PSE505-FR)
-               elif continue_probe and \
-                       (values['model'].find("BlackBox PS5xx") >= 0 or
-                        values['model'].find("ePowerSwitch 1/4/8x") >=0 ):
-
+               elif continue_probe and values['model'].find("ePowerSwitch") >=0:
                        # TODO: allow a different port than http 80.
                        if values['pcu_id'] in [1089, 1071, 1046, 1035, 1118]:
                                eps = ePowerSwitchGood(values, verbose, ['80'])
                        elif values['pcu_id'] in [1003]:
+                               # OLD EPOWER
+                               print "OLD EPOWER"
                                eps = ePowerSwitch(values, verbose, ['80'])
                        else:
                                eps = ePowerSwitchGood(values, verbose, ['80'])
 
                        rb_ret = eps.reboot(values[nodename], dryrun)
+               elif continue_probe and values['pcu_id'] in [1122]:
+                       custom = CustomPCU(values, verbose, ['80', '443'])
+                       custom.reboot(values[nodename], dryrun)
 
                elif continue_probe:
                        rb_ret = "Unsupported_PCU"
@@ -1335,6 +1417,8 @@ def main():
                                print "failed"
        except Exception, err:
                import traceback; traceback.print_exc()
+               from nodecommon import email_exception
+               email_exception()
                print err
 
 if __name__ == '__main__':