-added commands to get and set the ticket status so this can be done automatically...
[monitor.git] / reboot.py
index 18cabff..41d30c1 100755 (executable)
--- a/reboot.py
+++ b/reboot.py
@@ -15,8 +15,9 @@ import socket
 import plc
 import base64
 from subprocess import PIPE, Popen
-
-plc_lock = threading.Lock()
+import ssh.pxssh as pxssh
+import ssh.pexpect as pexpect
+import socket
 
 # Use our versions of telnetlib and pyssh
 sys.path.insert(0, os.path.dirname(sys.argv[0]))
@@ -25,7 +26,7 @@ sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")
 import pyssh
 
 # Timeouts in seconds
-TELNET_TIMEOUT = 30
+TELNET_TIMEOUT = 45
 
 # Event class ID from pcu events
 #NODE_POWER_CONTROL = 3
@@ -223,6 +224,7 @@ class PCUControl(Transport,PCUModel,PCURecord):
        def __init__(self, plc_pcu_record, verbose, supported_ports=[]):
                PCUModel.__init__(self, plc_pcu_record)
                PCURecord.__init__(self, plc_pcu_record)
+               type = None
                if self.portstatus:
                        if '22' in supported_ports and self.portstatus['22'] == "open":
                                type = Transport.SSH
@@ -303,84 +305,59 @@ class IPAL(PCUControl):
                self.close()
                return 0
 
-def ipal_reboot(ip, password, port, dryrun):
-       global verbose
-       global plc_lock
-       telnet = None
-
-       try:
-               #plc_lock.acquire()
-               #print "lock acquired"
-
-               #try:
-                       #telnet = telnetlib.Telnet(ip) # , timeout=TELNET_TIMEOUT)
-               telnet = telnetlib.Telnet(ip, timeout=TELNET_TIMEOUT)
-               #except:
-               #       import traceback
-               #       traceback.print_exc()
+class APCEurope(PCUControl):
+       def run(self, node_port, dryrun):
+               self.open(self.host, self.username)
+               self.sendPassword(self.password)
 
+               self.ifThenSend("\r\n> ", "1", ExceptionPassword)
+               self.ifThenSend("\r\n> ", "2")
+               self.ifThenSend("\r\n> ", str(node_port))
+               # 3- Immediate Reboot             
+               self.ifThenSend("\r\n> ", "3")
 
-               telnet.set_debuglevel(verbose)
+               if not dryrun:
+                       self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
+                                                       "YES\r\n",
+                                                       ExceptionSequence)
+               else:
+                       self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
+                                                       "", ExceptionSequence)
+               self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
 
-               # XXX Some iPals require you to hit Enter a few times first
-               telnet_answer(telnet, "Password >", "\r\n\r\n")
+               self.close()
+               return 0
 
-               # Login
-               telnet_answer(telnet, "Password >", password)
+class APCBrazil(PCUControl):
+       def run(self, node_port, dryrun):
+               self.open(self.host, self.username)
+               self.sendPassword(self.password)
 
-               # XXX Some iPals require you to hit Enter a few times first
-               telnet.write("\r\n\r\n")
+               self.ifThenSend("\r\n> ", "1", ExceptionPassword)
+               self.ifThenSend("\r\n> ", str(node_port))
+               # 4- Immediate Reboot             
+               self.ifThenSend("\r\n> ", "4")
 
-               # P# - Pulse relay
                if not dryrun:
-                       telnet_answer(telnet, "Enter >", "P%d" % port)
-
-               telnet.read_until("Enter >", TELNET_TIMEOUT)
-
-               # Close
-               telnet.close()
+                       self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
+                                                       "YES\r\n",
+                                                       ExceptionSequence)
+               else:
+                       self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
+                                                       "", ExceptionSequence)
+               self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
 
-               #print "lock released"
-               #plc_lock.release()
+               self.close()
                return 0
 
-       except EOFError, err:
-               if verbose:
-                       logger.debug("ipal_reboot: EOF")
-                       logger.debug(err)
-               telnet.close()
-               import traceback
-               traceback.print_exc()
-               #print "lock released"
-               #plc_lock.release()
-               return errno.ECONNRESET
-       except socket.error, err:
-               logger.debug("ipal_reboot: Socket Error")
-               logger.debug(err)
-               import traceback
-               traceback.print_exc()
-
-               return errno.ETIMEDOUT
-               
-       except Exception, err:
-               if verbose:
-                       logger.debug("ipal_reboot: Exception")
-                       logger.debug(err)
-               if telnet:
-                       telnet.close()
-               import traceback
-               traceback.print_exc()
-               #print "lock released"
-               #plc_lock.release()
-               return  "ipal error"
-
-class APCEurope(PCUControl):
+class APCBerlin(PCUControl):
        def run(self, node_port, dryrun):
                self.open(self.host, self.username)
                self.sendPassword(self.password)
 
                self.ifThenSend("\r\n> ", "1", ExceptionPassword)
                self.ifThenSend("\r\n> ", "2")
+               self.ifThenSend("\r\n> ", "1")
                self.ifThenSend("\r\n> ", str(node_port))
                # 3- Immediate Reboot             
                self.ifThenSend("\r\n> ", "3")
@@ -536,7 +513,7 @@ class HPiLOHttps(PCUControl):
                                        self.host, "iloxml/Get_Network.xml", 
                                        self.username, self.password)
                p_ilo  = Popen(cmd, stdout=PIPE, shell=True)
-               cmd2 = "/bin/grep 'MESSAGE' | /bin/grep -v 'No error'"
+               cmd2 = "grep 'MESSAGE' | grep -v 'No error'"
                p_grep = Popen(cmd2, stdin=p_ilo.stdout, stdout=PIPE, stderr=PIPE, shell=True)
                sout, serr = p_grep.communicate()
 
@@ -552,12 +529,17 @@ class HPiLOHttps(PCUControl):
                                        self.username, self.password)
                        print cmd
                        p_ilo = Popen(cmd, stdin=PIPE, stdout=PIPE, shell=True)
-                       cmd2 = "/bin/grep 'MESSAGE' | /bin/grep -v 'No error'"
-                       print cmd2
-                       p_grep = Popen(cmd2, stdin=p_ilo.stdout, stdout=PIPE, stderr=PIPE, shell=True)
+                       cmd2 = "grep 'MESSAGE' | grep -v 'No error'"
+                       p_grep = Popen(cmd2, stdin=p_ilo.stdout, stdout=PIPE, stderr=PIPE)
                        sout, serr = p_grep.communicate()
-                       p_ilo.wait()
-                       p_grep.wait()
+                       try: p_ilo.wait()
+                       except: 
+                               print "p_ilo wait failed."
+                               pass
+                       try: p_grep.wait()
+                       except: 
+                               print "p_grep wait failed."
+                               pass
 
                        if sout.strip() != "":
                                print "sout: %s" % sout.strip()
@@ -565,6 +547,24 @@ class HPiLOHttps(PCUControl):
 
                return 0
 
+class BayTechAU(PCUControl):
+       def run(self, node_port, dryrun):
+               self.open(self.host, self.username, None, "Enter user name:")
+               self.sendPassword(self.password, "Enter Password:")
+
+               #self.ifThenSend("RPC-16>", "Status")
+               self.ifThenSend("RPC3-NC>", "Reboot %d" % node_port)
+
+               # Reboot Outlet  N        (Y/N)?
+               if dryrun:
+                       self.ifThenSend("(Y/N)?", "N")
+               else:
+                       self.ifThenSend("(Y/N)?", "Y")
+               self.ifThenSend("RPC3-NC>", "")
+
+               self.close()
+               return 0
+
 class BayTechGeorgeTown(PCUControl):
        def run(self, node_port, dryrun):
                self.open(self.host, self.username, None, "Enter user name:")
@@ -584,7 +584,7 @@ class BayTechGeorgeTown(PCUControl):
                self.close()
                return 0
 
-class BayTechCtrlC(PCUControl):
+class BayTechCtrlCUnibe(PCUControl):
        """
                For some reason, these units let you log in fine, but they hang
                indefinitely, unless you send a Ctrl-C after the password.  No idea
@@ -592,38 +592,109 @@ class BayTechCtrlC(PCUControl):
        """
        def run(self, node_port, dryrun):
                print "BayTechCtrlC %s" % self.host
-               self.open(self.host, self.username)
-               self.sendPassword(self.password)
 
-               #self.transport.write('\ 3')
-               self.transport.write("\r\n")
-               self.transport.write(pyssh.CTRL_C)
-               #self.transport.write(chr(3))
-               #self.transport.write(chr(24))
-               #self.transport.write(chr(26))
-               #self.transport.write('\18')
-               # Control Outlets  (5 ,1).........5
-               self.ifThenSend("Enter Request :", "5")
+               ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
+               s = pxssh.pxssh()
+               if not s.login(self.host, self.username, self.password, ssh_options):
+                       raise ExceptionPassword("Invalid Password")
+               # Otherwise, the login succeeded.
 
-               # Reboot N
+               # Send a ctrl-c to the remote process.
+               print "sending ctrl-c"
+               s.send(chr(3))
+
+               # Control Outlets  (5 ,1).........5
                try:
-                       self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
-               except ExceptionNotFound, msg:
-                       # one machine is configured to ask for a username,
-                       # even after login...
-                       print "msg: %s" % msg
-                       self.transport.write(self.username + "\r\n")
-                       self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
-                       
+                       index = s.expect(["Enter Request :"])
+
+                       if index == 0:
+                               print "3"
+                               s.send("3\r\n")
+                               index = s.expect(["DS-RPC>", "Enter user name:"])
+                               if index == 1:
+                                       s.send(self.username + "\r\n")
+                                       index = s.expect(["DS-RPC>"])
+
+                               if index == 0:
+                                       print "Reboot %d" % node_port
+                                       s.send("Reboot %d\r\n" % node_port)
+
+                                       index = s.expect(["(Y/N)?"])
+                                       if index == 0:
+                                               if dryrun:
+                                                       print "sending N"
+                                                       s.send("N\r\n")
+                                               else:
+                                                       print "sending Y"
+                                                       s.send("Y\r\n")
+
+                               #index = s.expect(["DS-RPC>"])
+                               #print "got prompt back"
+
+                       s.close()
+
+               except pexpect.EOF:
+                       raise ExceptionPrompt("EOF before 'Enter Request' Prompt")
+               except pexpect.TIMEOUT:
+                       raise ExceptionPrompt("Timeout before 'Enter Request' Prompt")
 
-               # Reboot Outlet  N        (Y/N)?
-               if dryrun:
-                       self.ifThenSend("(Y/N)?", "N")
-               else:
-                       self.ifThenSend("(Y/N)?", "Y")
-               self.ifThenSend("DS-RPC>", "")
+               return 0
+
+class BayTechCtrlC(PCUControl):
+       """
+               For some reason, these units let you log in fine, but they hang
+               indefinitely, unless you send a Ctrl-C after the password.  No idea
+               why.
+       """
+       def run(self, node_port, dryrun):
+               print "BayTechCtrlC %s" % self.host
+
+               ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
+               s = pxssh.pxssh()
+               if not s.login(self.host, self.username, self.password, ssh_options):
+                       raise ExceptionPassword("Invalid Password")
+               # Otherwise, the login succeeded.
+
+               # Send a ctrl-c to the remote process.
+               print "sending ctrl-c"
+               s.send(chr(3))
+
+               # Control Outlets  (5 ,1).........5
+               try:
+                       index = s.expect(["Enter Request :"])
+
+                       if index == 0:
+                               print "5"
+                               s.send("5\r\n")
+                               index = s.expect(["DS-RPC>", "Enter user name:"])
+                               if index == 1:
+                                       print "sending username"
+                                       s.send(self.username + "\r\n")
+                                       index = s.expect(["DS-RPC>"])
+
+                               if index == 0:
+                                       print "Reboot %d" % node_port
+                                       s.send("Reboot %d\r\n" % node_port)
+
+                                       index = s.expect(["(Y/N)?"])
+                                       if index == 0:
+                                               if dryrun:
+                                                       print "sending N"
+                                                       s.send("N\r\n")
+                                               else:
+                                                       print "sending Y"
+                                                       s.send("Y\r\n")
+
+                               #index = s.expect(["DS-RPC>"])
+                               #print "got prompt back"
+
+                       s.close()
+
+               except pexpect.EOF:
+                       raise ExceptionPrompt("EOF before 'Enter Request' Prompt")
+               except pexpect.TIMEOUT:
+                       raise ExceptionPrompt("Timeout before 'Enter Request' Prompt")
 
-               self.close()
                return 0
 
 class BayTech(PCUControl):
@@ -636,14 +707,13 @@ class BayTech(PCUControl):
 
                # Reboot N
                try:
-                       self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
+                       self.ifThenSend("DS-RPC>", "Reboot %d" % node_port, ExceptionNotFound)
                except ExceptionNotFound, msg:
                        # one machine is configured to ask for a username,
                        # even after login...
                        print "msg: %s" % msg
                        self.transport.write(self.username + "\r\n")
                        self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
-                       
 
                # Reboot Outlet  N        (Y/N)?
                if dryrun:
@@ -655,6 +725,22 @@ class BayTech(PCUControl):
                self.close()
                return 0
 
+class WTIIPS4(PCUControl):
+       def run(self, node_port, dryrun):
+               self.open(self.host)
+               self.sendPassword(self.password, "Enter Password:")
+
+               self.ifThenSend("IPS> ", "/Boot %s" % node_port)
+               if not dryrun:
+                       self.ifThenSend("Sure? (Y/N): ", "N")
+               else:
+                       self.ifThenSend("Sure? (Y/N): ", "Y")
+
+               self.ifThenSend("IPS> ", "")
+
+               self.close()
+               return 0
+
 class ePowerSwitchGood(PCUControl):
        # NOTE:
        #               The old code used Python's HTTPPasswordMgrWithDefaultRealm()
@@ -920,9 +1006,10 @@ def runcmd(command, args, username, password, timeout = None):
                                out += "; output follows:\n" + data
                        raise Exception, out
 
-def racadm_reboot(ip, username, password, port, dryrun):
+def racadm_reboot(host, username, password, port, dryrun):
        global verbose
 
+       ip = socket.gethostbyname(host)
        try:
                cmd = "/usr/sbin/racadm"
                os.stat(cmd)
@@ -975,9 +1062,6 @@ def check_open_port(values, port_list):
                                ret = True
        
        return ret
-
-def reboot(nodename):
-       return reboot_policy(nodename, True, False)
        
 def reboot_policy(nodename, continue_probe, dryrun):
        global verbose
@@ -994,9 +1078,9 @@ def reboot_policy(nodename, continue_probe, dryrun):
        logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
 
        ret = reboot_test(nodename, values, continue_probe, verbose, dryrun)
-       print ret
 
        if ret != 0:
+               print ret
                return False
        else:
                return True
@@ -1018,7 +1102,15 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun):
                                apc = APCEurope(values, verbose, ['22', '23'])
                                rb_ret = apc.reboot(values[nodename], dryrun)
 
-                       elif values['pcu_id'] in [1173,1221,1220,1225]:
+                       elif values['pcu_id'] in [1110,86]:
+                               apc = APCBrazil(values, verbose, ['22', '23'])
+                               rb_ret = apc.reboot(values[nodename], dryrun)
+
+                       elif values['pcu_id'] in [1221,1225]:
+                               apc = APCBerlin(values, verbose, ['22', '23'])
+                               rb_ret = apc.reboot(values[nodename], dryrun)
+
+                       elif values['pcu_id'] in [1173,1221,1220]:
                                apc = APCFolsom(values, verbose, ['22', '23'])
                                rb_ret = apc.reboot(values[nodename], dryrun)
 
@@ -1028,11 +1120,20 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun):
 
                # BayTech DS4-RPC
                elif continue_probe and values['model'].find("Baytech DS4-RPC") >= 0:
-                       if values['pcu_id'] in [1041,1209,1025,1052,1057]:
+                       if values['pcu_id'] in [1237,1052,1209,1002,1008,1041,1013,1022]:
                                # These  require a 'ctrl-c' to be sent... 
                                baytech = BayTechCtrlC(values, verbose, ['22', '23'])
                                rb_ret = baytech.reboot(values[nodename], dryrun)
 
+                       elif values['pcu_id'] in [93]:
+                               baytech = BayTechAU(values, verbose, ['22', '23'])
+                               rb_ret = baytech.reboot(values[nodename], dryrun)
+
+                       elif values['pcu_id'] in [1057]:
+                               # These  require a 'ctrl-c' to be sent... 
+                               baytech = BayTechCtrlCUnibe(values, verbose, ['22', '23'])
+                               rb_ret = baytech.reboot(values[nodename], dryrun)
+
                        elif values['pcu_id'] in [1012]:
                                # This pcu sometimes doesn't present the 'Username' prompt,
                                # unless you immediately try again...
@@ -1069,6 +1170,10 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun):
                                drac = DRAC(values, verbose, ['22'])
                                rb_ret = drac.reboot(0, dryrun)
 
+               elif continue_probe and values['model'].find("WTI IPS-4") >= 0:
+                               wti = WTIIPS4(values, verbose, ['23'])
+                               rb_ret = wti.reboot(values[nodename], dryrun)
+
                # BlackBox PSExxx-xx (e.g. PSE505-FR)
                elif continue_probe and \
                        (values['model'].find("BlackBox PS5xx") >= 0 or
@@ -1108,48 +1213,6 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun):
        #                                                                 pcu[nodename],
        #                                                                 dryrun)
 
-# Returns true if rebooted via PCU
-def reboot_old(nodename, dryrun):
-       pcu = plc.getpcu(nodename)
-       if not pcu:
-               plc.nodePOD(nodename)
-               return False
-       # Try the PCU first
-       logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
-
-       # APC Masterswitch (Berkeley)
-       if pcu['model'] == "APC Masterswitch":
-               err = apc_reboot(pcu['ip'], pcu['username'],pcu['password'], 
-                               pcu[nodename], pcu['protocol'], dryrun)
-
-       # DataProbe iPal (many sites)
-       elif pcu['protocol'] == "telnet" and pcu['model'].find("IP-4") >= 0:
-               err = ipal_reboot(pcu['ip'],pcu['password'], pcu[nodename], dryrun)
-
-       # BayTech DS4-RPC
-       elif pcu['protocol'] == "ssh" and \
-       (pcu['model'].find("Baytech") >= 0 or pcu['model'].find("DS4") >= 0):
-               err = baytech_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu[nodename], dryrun)
-
-       # BlackBox PSExxx-xx (e.g. PSE505-FR)
-       elif pcu['protocol'] == "http" and (pcu['model'] == "bbpse"):
-               err = bbpse_reboot(pcu['ip'], pcu['username'], pcu['password'], pcu[nodename],80, dryrun)
-
-       # x10toggle
-       elif pcu['protocol'] == "ssh" and (pcu['model'] == "x10toggle"):
-               err = x10toggle_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu[nodename], dryrun)
-
-       # 
-       elif pcu['protocol'] == "racadm" and (pcu['model'] == "RAC"):
-               err = racadm_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu_[nodename], dryrun)
-
-       # Unknown or unsupported
-       else:
-               err = errno.EPROTONOSUPPORT
-               return False
-       return True 
-
-
 def main():
        logger.setLevel(logging.DEBUG)
        ch = logging.StreamHandler()
@@ -1159,8 +1222,12 @@ def main():
        logger.addHandler(ch)
 
        try:
-               reboot("planetlab2.cs.uchicago.edu")
-               reboot("alice.cs.princeton.edu")
+               for node in sys.argv[1:]:
+                       print "Rebooting %s" % node
+                       if reboot_policy(node, True, False):
+                               print "success"
+                       else:
+                               print "failed"
        except Exception, err:
                print err