changes for 3.0
[monitor.git] / reboot.py
index c41bac8..ba641c4 100755 (executable)
--- a/reboot.py
+++ b/reboot.py
@@ -11,14 +11,13 @@ import urllib2
 import urllib
 import threading, popen2
 import array, struct
 import urllib
 import threading, popen2
 import array, struct
-#from socket import *
-import socket
 import plc
 import base64
 from subprocess import PIPE, Popen
 import ssh.pxssh as pxssh
 import ssh.pexpect as pexpect
 import socket
 import plc
 import base64
 from subprocess import PIPE, Popen
 import ssh.pxssh as pxssh
 import ssh.pexpect as pexpect
 import socket
+import moncommands 
 
 # Use our versions of telnetlib and pyssh
 sys.path.insert(0, os.path.dirname(sys.argv[0]))
 
 # Use our versions of telnetlib and pyssh
 sys.path.insert(0, os.path.dirname(sys.argv[0]))
@@ -276,6 +275,10 @@ class PCUControl(Transport,PCUModel,PCURecord):
                        import traceback
                        traceback.print_exc()
                        return "EOF connection reset" + str(err)
                        import traceback
                        traceback.print_exc()
                        return "EOF connection reset" + str(err)
+               except:
+                       from nodecommon import email_exception
+                       email_exception()
+                       raise Exception('unknown')
                
 class IPAL(PCUControl):
        """ 
                
 class IPAL(PCUControl):
        """ 
@@ -293,11 +296,12 @@ class IPAL(PCUControl):
 
                try:
                        # TODO: make sleep backoff, before stopping.
 
                try:
                        # TODO: make sleep backoff, before stopping.
-                       time.sleep(4)
+                       time.sleep(8)
                        ret = s.recv(count, socket.MSG_DONTWAIT)
                except socket.error, e:
                        if e[0] == errno.EAGAIN:
                        ret = s.recv(count, socket.MSG_DONTWAIT)
                except socket.error, e:
                        if e[0] == errno.EAGAIN:
-                               raise Exception(e[1])
+                               #raise Exception(e[1])
+                               raise ExceptionNotFound(e[1])
                        else:
                                # TODO: not other exceptions.
                                raise Exception(e)
                        else:
                                # TODO: not other exceptions.
                                raise Exception(e)
@@ -318,6 +322,8 @@ class IPAL(PCUControl):
                        if e[0] == errno.ECONNREFUSED:
                                # cannot connect to remote host
                                raise Exception(e[1])
                        if e[0] == errno.ECONNREFUSED:
                                # cannot connect to remote host
                                raise Exception(e[1])
+                       elif e[0] == errno.ETIMEDOUT:
+                               raise ExceptionTimeout(e[1])
                        else:
                                # TODO: what other conditions are there?
                                raise Exception(e)
                        else:
                                # TODO: what other conditions are there?
                                raise Exception(e)
@@ -329,7 +335,7 @@ class IPAL(PCUControl):
                print "Current status is '%s'" % ret
 
                if ret == '':
                print "Current status is '%s'" % ret
 
                if ret == '':
-                       raise Exception("Status returned 'another session already open' %s : %s" % (node_port, ret))
+                       raise Exception("Status returned 'another session already open' on %s %s : %s" % (self.host, node_port, ret))
                        
                                
                if node_port < len(ret):
                        
                                
                if node_port < len(ret):
@@ -340,10 +346,12 @@ class IPAL(PCUControl):
                        elif status == '0':
                                # down
                                power_on = False
                        elif status == '0':
                                # down
                                power_on = False
+                       elif status == '6':
+                               raise ExceptionPort("IPAL reported 'Cable Error' on %s socket %s : %s" % (self.host, node_port, ret))
                        else:
                        else:
-                               raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret))
+                               raise Exception("Unknown status for PCU %s socket %s : %s" % (self.host, node_port, ret))
                else:
                else:
-                       raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret))
+                       raise Exception("Mismatch between configured port and PCU %s status: %s %s" % (self.host, node_port, ret))
                        
 
                if not dryrun:
                        
 
                if not dryrun:
@@ -368,10 +376,12 @@ class IPAL(PCUControl):
                                elif status == '0':
                                        # down
                                        power_on = False
                                elif status == '0':
                                        # down
                                        power_on = False
+                               elif status == '6':
+                                       raise ExceptionPort("IPAL reported 'Cable Error' on %s socket %s : %s" % (self.host, node_port, ret))
                                else:
                                else:
-                                       raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret))
+                                       raise Exception("Unknown status for PCU %s socket %s : %s" % (self.host, node_port, ret))
                        else:
                        else:
-                               raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret))
+                               raise Exception("Mismatch between configured port and PCU %s status: %s %s" % (self.host, node_port, ret))
 
                        if power_on:
                                return 0
 
                        if power_on:
                                return 0
@@ -559,9 +569,8 @@ class APC(PCUControl):
 
 class IntelAMT(PCUControl):
        def run(self, node_port, dryrun):
 
 class IntelAMT(PCUControl):
        def run(self, node_port, dryrun):
-               import soltesz
 
 
-               cmd = soltesz.CMD()
+               cmd = moncommands.CMD()
                #[cmd_str = "IntelAMTSDK/Samples/RemoteControl/remoteControl"
                cmd_str = "cmdamt/remoteControl"
 
                #[cmd_str = "IntelAMTSDK/Samples/RemoteControl/remoteControl"
                cmd_str = "cmdamt/remoteControl"
 
@@ -625,9 +634,8 @@ class HPiLO(PCUControl):
                
 class HPiLOHttps(PCUControl):
        def run(self, node_port, dryrun):
                
 class HPiLOHttps(PCUControl):
        def run(self, node_port, dryrun):
-               import soltesz
 
 
-               locfg = soltesz.CMD()
+               locfg = moncommands.CMD()
                cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
                                        self.host, "iloxml/Get_Network.xml", 
                                        self.username, self.password)
                cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
                                        self.host, "iloxml/Get_Network.xml", 
                                        self.username, self.password)
@@ -638,7 +646,7 @@ class HPiLOHttps(PCUControl):
                        return sout.strip()
 
                if not dryrun:
                        return sout.strip()
 
                if not dryrun:
-                       locfg = soltesz.CMD()
+                       locfg = moncommands.CMD()
                        cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
                                                self.host, "iloxml/Reset_Server.xml", 
                                                self.username, self.password)
                        cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
                                                self.host, "iloxml/Reset_Server.xml", 
                                                self.username, self.password)
@@ -669,6 +677,13 @@ class BayTechAU(PCUControl):
 
 class BayTechGeorgeTown(PCUControl):
        def run(self, node_port, dryrun):
 
 class BayTechGeorgeTown(PCUControl):
        def run(self, node_port, dryrun):
+               # this initial open/close is to prevent things from raising an
+               # exception.  the pcu always is weird during the first connection, and
+               # even if it's not, what does it matter to open a second connection
+               # right away?
+               self.open(self.host, self.username, None, "Enter user name:")
+               self.close()
+               time.sleep(1)
                self.open(self.host, self.username, None, "Enter user name:")
                self.sendPassword(self.password, "Enter Password:")
 
                self.open(self.host, self.username, None, "Enter user name:")
                self.sendPassword(self.password, "Enter Password:")
 
@@ -707,6 +722,7 @@ class BayTechCtrlCUnibe(PCUControl):
 
                # Control Outlets  (5 ,1).........5
                try:
 
                # Control Outlets  (5 ,1).........5
                try:
+                       #index = s.expect("Enter Request")
                        index = s.expect(["Enter Request :"])
 
                        if index == 0:
                        index = s.expect(["Enter Request :"])
 
                        if index == 0:
@@ -721,7 +737,8 @@ class BayTechCtrlCUnibe(PCUControl):
                                        print "Reboot %d" % node_port
                                        s.send("Reboot %d\r\n" % node_port)
 
                                        print "Reboot %d" % node_port
                                        s.send("Reboot %d\r\n" % node_port)
 
-                                       index = s.expect(["(Y/N)?"])
+                                       time.sleep(5)
+                                       index = s.expect(["\(Y/N\)\?", "Port in use", "DS-RPC>"])
                                        if index == 0:
                                                if dryrun:
                                                        print "sending N"
                                        if index == 0:
                                                if dryrun:
                                                        print "sending N"
@@ -729,16 +746,21 @@ class BayTechCtrlCUnibe(PCUControl):
                                                else:
                                                        print "sending Y"
                                                        s.send("Y\r\n")
                                                else:
                                                        print "sending Y"
                                                        s.send("Y\r\n")
+                                       elif index == 1:
+                                               raise ExceptionPrompt("PCU Reported 'Port in use.'")
+                                       elif index == 2:
+                                               raise ExceptionSequence("Issued command 'Reboot' failed.")
 
 
-                               #index = s.expect(["DS-RPC>"])
+                               time.sleep(5)
+                               index = s.expect(["DS-RPC>"])
                                #print "got prompt back"
 
                        s.close()
 
                except pexpect.EOF:
                                #print "got prompt back"
 
                        s.close()
 
                except pexpect.EOF:
-                       raise ExceptionPrompt("EOF before 'Enter Request' Prompt")
+                       raise ExceptionPrompt("EOF before expected Prompt")
                except pexpect.TIMEOUT:
                except pexpect.TIMEOUT:
-                       raise ExceptionPrompt("Timeout before 'Enter Request' Prompt")
+                       raise ExceptionPrompt("Timeout before expected Prompt")
 
                return 0
 
 
                return 0
 
@@ -758,37 +780,54 @@ class BayTechCtrlC(PCUControl):
                # Otherwise, the login succeeded.
 
                # Send a ctrl-c to the remote process.
                # Otherwise, the login succeeded.
 
                # Send a ctrl-c to the remote process.
-               print "sending ctrl-c"
+               print "SENDING ctrl-c"
                s.send(chr(3))
 
                # Control Outlets  (5 ,1).........5
                try:
                s.send(chr(3))
 
                # Control Outlets  (5 ,1).........5
                try:
+                       print "EXPECTING: ", "Enter Request :"
                        index = s.expect(["Enter Request :"])
 
                        if index == 0:
                        index = s.expect(["Enter Request :"])
 
                        if index == 0:
-                               print "5"
+                               print "SENDING: 5"
                                s.send("5\r\n")
                                s.send("5\r\n")
-                               index = s.expect(["DS-RPC>", "Enter user name:"])
+                               print "EXPECTING: ", "DS-RPC>"
+                               index = s.expect(["DS-RPC>", "Enter user name:", "Port in use."])
                                if index == 1:
                                        print "sending username"
                                        s.send(self.username + "\r\n")
                                        index = s.expect(["DS-RPC>"])
                                if index == 1:
                                        print "sending username"
                                        s.send(self.username + "\r\n")
                                        index = s.expect(["DS-RPC>"])
+                               elif index == 2:
+                                       raise ExceptionPrompt("PCU Reported 'Port in use.'")
 
                                if index == 0:
 
                                if index == 0:
-                                       print "Reboot %d" % node_port
+                                       print "SENDING: Reboot %d" % node_port
                                        s.send("Reboot %d\r\n" % node_port)
 
                                        s.send("Reboot %d\r\n" % node_port)
 
-                                       index = s.expect(["(Y/N)?"])
+                                       print "SLEEPING: 5"
+                                       time.sleep(5)
+                                       print "EXPECTING: ", "Y/N?"
+                                       index = s.expect(["\(Y/N\)\?", "Port in use", "DS-RPC>"])
                                        if index == 0:
                                                if dryrun:
                                                        print "sending N"
                                                        s.send("N\r\n")
                                                else:
                                        if index == 0:
                                                if dryrun:
                                                        print "sending N"
                                                        s.send("N\r\n")
                                                else:
-                                                       print "sending Y"
+                                                       print "SENDING: Y"
                                                        s.send("Y\r\n")
                                                        s.send("Y\r\n")
-
+                                       elif index == 1:
+                                               raise ExceptionPrompt("PCU Reported 'Port in use.'")
+                                       elif index == 2:
+                                               raise ExceptionSequence("Issued command 'Reboot' failed.")
+
+                               # NOTE: for some reason, the script times out with the
+                               # following line.  In manual tests, it works correctly, but
+                               # with automated tests, evidently it fails.
+                               print "SLEEPING: 5"
+                               time.sleep(5)
+                               #print "TOTAL--", s.allstr, "--EOT"
                                index = s.expect(["DS-RPC>"])
                                index = s.expect(["DS-RPC>"])
-                               #print "got prompt back"
+                               print "got prompt back"
 
                        s.close()
 
 
                        s.close()
 
@@ -815,6 +854,7 @@ class BayTech(PCUControl):
                        # even after login...
                        print "msg: %s" % msg
                        self.transport.write(self.username + "\r\n")
                        # even after login...
                        print "msg: %s" % msg
                        self.transport.write(self.username + "\r\n")
+                       time.sleep(5)
                        self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
 
                # Reboot Outlet  N        (Y/N)?
                        self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
 
                # Reboot Outlet  N        (Y/N)?
@@ -822,6 +862,7 @@ class BayTech(PCUControl):
                        self.ifThenSend("(Y/N)?", "N")
                else:
                        self.ifThenSend("(Y/N)?", "Y")
                        self.ifThenSend("(Y/N)?", "N")
                else:
                        self.ifThenSend("(Y/N)?", "Y")
+               time.sleep(5)
                self.ifThenSend("DS-RPC>", "")
 
                self.close()
                self.ifThenSend("DS-RPC>", "")
 
                self.close()
@@ -896,6 +937,8 @@ class ePowerSwitchGood(PCUControl):
                                if self.verbose: print f.read()
                        except:
                                import traceback; traceback.print_exc()
                                if self.verbose: print f.read()
                        except:
                                import traceback; traceback.print_exc()
+                               from nodecommon import email_exception
+                               email_exception()
 
                                # fetch url one more time on cmd.html, econtrol.html or whatever.
                                # pass
 
                                # fetch url one more time on cmd.html, econtrol.html or whatever.
                                # pass
@@ -1163,12 +1206,16 @@ def pcu_name(pcu):
        else:
                return None
 
        else:
                return None
 
-import soltesz
-fb =soltesz.dbLoad("findbadpcus")
+#import database
+from monitor import database
+fb = None
 
 def get_pcu_values(pcu_id):
 
 def get_pcu_values(pcu_id):
-       # TODO: obviously, this shouldn't be loaded each time...
-
+       global fb
+       if fb == None:
+               # this shouldn't be loaded each time...
+               fb = database.dbLoad("findbadpcus")
+               
        try:
                values = fb['nodes']["id_%s" % pcu_id]['values']
        except:
        try:
                values = fb['nodes']["id_%s" % pcu_id]['values']
        except:
@@ -1184,14 +1231,14 @@ def reboot_policy(nodename, continue_probe, dryrun):
 
        pcu = plc.getpcu(nodename)
        if not pcu:
 
        pcu = plc.getpcu(nodename)
        if not pcu:
-               logger.debug("no pcu for %s" % hostname)
-               print "no pcu for %s" % hostname
+               logger.debug("no pcu for %s" % nodename)
+               print "no pcu for %s" % nodename
                return False # "%s has no pcu" % nodename
 
        values = get_pcu_values(pcu['pcu_id'])
        if values == None:
                return False # "%s has no pcu" % nodename
 
        values = get_pcu_values(pcu['pcu_id'])
        if values == None:
-               logger.debug("No values for pcu probe %s" % hostname)
-               print "No values for pcu probe %s" % hostname
+               logger.debug("No values for pcu probe %s" % nodename)
+               print "No values for pcu probe %s" % nodename
                return False #"no info for pcu_id %s" % pcu['pcu_id']
        
        # Try the PCU first
                return False #"no info for pcu_id %s" % pcu['pcu_id']
        
        # Try the PCU first
@@ -1221,7 +1268,7 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun):
                        print values
 
                        # TODO: make a more robust version of APC
                        print values
 
                        # TODO: make a more robust version of APC
-                       if values['pcu_id'] in [1163,1055,1111,1231,1113,1127,1128,1148]:
+                       if values['pcu_id'] in [1102,1163,1055,1111,1231,1113,1127,1128,1148]:
                                apc = APCEurope(values, verbose, ['22', '23'])
                                rb_ret = apc.reboot(values[nodename], dryrun)
 
                                apc = APCEurope(values, verbose, ['22', '23'])
                                rb_ret = apc.reboot(values[nodename], dryrun)
 
@@ -1229,11 +1276,11 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun):
                                apc = APCBrazil(values, verbose, ['22', '23'])
                                rb_ret = apc.reboot(values[nodename], dryrun)
 
                                apc = APCBrazil(values, verbose, ['22', '23'])
                                rb_ret = apc.reboot(values[nodename], dryrun)
 
-                       elif values['pcu_id'] in [1221,1225]:
+                       elif values['pcu_id'] in [1221,1225,1220]:
                                apc = APCBerlin(values, verbose, ['22', '23'])
                                rb_ret = apc.reboot(values[nodename], dryrun)
 
                                apc = APCBerlin(values, verbose, ['22', '23'])
                                rb_ret = apc.reboot(values[nodename], dryrun)
 
-                       elif values['pcu_id'] in [1173,1221,1220]:
+                       elif values['pcu_id'] in [1173,1240,47]:
                                apc = APCFolsom(values, verbose, ['22', '23'])
                                rb_ret = apc.reboot(values[nodename], dryrun)
 
                                apc = APCFolsom(values, verbose, ['22', '23'])
                                rb_ret = apc.reboot(values[nodename], dryrun)
 
@@ -1243,7 +1290,7 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun):
 
                # BayTech DS4-RPC
                elif continue_probe and values['model'].find("DS4-RPC") >= 0:
 
                # BayTech DS4-RPC
                elif continue_probe and values['model'].find("DS4-RPC") >= 0:
-                       if values['pcu_id'] in [1237,1052,1209,1002,1008,1041,1013,1022]:
+                       if values['pcu_id'] in [1056,1237,1052,1209,1002,1008,1041,1013,1022]:
                                # These  require a 'ctrl-c' to be sent... 
                                baytech = BayTechCtrlC(values, verbose, ['22', '23'])
                                rb_ret = baytech.reboot(values[nodename], dryrun)
                                # These  require a 'ctrl-c' to be sent... 
                                baytech = BayTechCtrlC(values, verbose, ['22', '23'])
                                rb_ret = baytech.reboot(values[nodename], dryrun)
@@ -1287,8 +1334,12 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun):
                        # TODO: I don't think DRACRacAdm will throw an exception for the
                        # default method to catch...
                        try:
                        # TODO: I don't think DRACRacAdm will throw an exception for the
                        # default method to catch...
                        try:
-                               drac = DRACRacAdm(values, verbose, ['443', '5869'])
-                               rb_ret = drac.reboot(0, dryrun)
+                               if values['pcu_id'] in [1402]:
+                                       drac = DRAC(values, verbose, ['22'])
+                                       rb_ret = drac.reboot(0, dryrun)
+                               else:
+                                       drac = DRACRacAdm(values, verbose, ['443', '5869'])
+                                       rb_ret = drac.reboot(0, dryrun)
                        except:
                                drac = DRAC(values, verbose, ['22'])
                                rb_ret = drac.reboot(0, dryrun)
                        except:
                                drac = DRAC(values, verbose, ['22'])
                                rb_ret = drac.reboot(0, dryrun)
@@ -1366,6 +1417,8 @@ def main():
                                print "failed"
        except Exception, err:
                import traceback; traceback.print_exc()
                                print "failed"
        except Exception, err:
                import traceback; traceback.print_exc()
+               from nodecommon import email_exception
+               email_exception()
                print err
 
 if __name__ == '__main__':
                print err
 
 if __name__ == '__main__':