commit of tools I use, but are not documented or guaranteed to work for anyone
[monitor.git] / reboot.py
index f8d772f..495d366 100755 (executable)
--- a/reboot.py
+++ b/reboot.py
@@ -17,8 +17,7 @@ import base64
 from subprocess import PIPE, Popen
 import ssh.pxssh as pxssh
 import ssh.pexpect as pexpect
-
-plc_lock = threading.Lock()
+import socket
 
 # Use our versions of telnetlib and pyssh
 sys.path.insert(0, os.path.dirname(sys.argv[0]))
@@ -241,6 +240,8 @@ class PCUControl(Transport,PCUModel,PCURecord):
                                type = Transport.HTTP
                        else:
                                raise ExceptionPort("Unsupported Port: No transport from open ports")
+               else:
+                       raise Exception("No Portstatus: No transport because no open ports")
                Transport.__init__(self, type, verbose)
 
        def run(self, node_port, dryrun):
@@ -306,77 +307,6 @@ class IPAL(PCUControl):
                self.close()
                return 0
 
-def ipal_reboot(ip, password, port, dryrun):
-       global verbose
-       global plc_lock
-       telnet = None
-
-       try:
-               #plc_lock.acquire()
-               #print "lock acquired"
-
-               #try:
-                       #telnet = telnetlib.Telnet(ip) # , timeout=TELNET_TIMEOUT)
-               telnet = telnetlib.Telnet(ip, timeout=TELNET_TIMEOUT)
-               #except:
-               #       import traceback
-               #       traceback.print_exc()
-
-
-               telnet.set_debuglevel(verbose)
-
-               # XXX Some iPals require you to hit Enter a few times first
-               telnet_answer(telnet, "Password >", "\r\n\r\n")
-
-               # Login
-               telnet_answer(telnet, "Password >", password)
-
-               # XXX Some iPals require you to hit Enter a few times first
-               telnet.write("\r\n\r\n")
-
-               # P# - Pulse relay
-               if not dryrun:
-                       telnet_answer(telnet, "Enter >", "P%d" % port)
-
-               telnet.read_until("Enter >", TELNET_TIMEOUT)
-
-               # Close
-               telnet.close()
-
-               #print "lock released"
-               #plc_lock.release()
-               return 0
-
-       except EOFError, err:
-               if verbose:
-                       logger.debug("ipal_reboot: EOF")
-                       logger.debug(err)
-               telnet.close()
-               import traceback
-               traceback.print_exc()
-               #print "lock released"
-               #plc_lock.release()
-               return errno.ECONNRESET
-       except socket.error, err:
-               logger.debug("ipal_reboot: Socket Error")
-               logger.debug(err)
-               import traceback
-               traceback.print_exc()
-
-               return errno.ETIMEDOUT
-               
-       except Exception, err:
-               if verbose:
-                       logger.debug("ipal_reboot: Exception")
-                       logger.debug(err)
-               if telnet:
-                       telnet.close()
-               import traceback
-               traceback.print_exc()
-               #print "lock released"
-               #plc_lock.release()
-               return  "ipal error"
-
 class APCEurope(PCUControl):
        def run(self, node_port, dryrun):
                self.open(self.host, self.username)
@@ -474,6 +404,7 @@ class APCFolsom(PCUControl):
 
 class APCMaster(PCUControl):
        def run(self, node_port, dryrun):
+               print "Rebooting %s" % self.host
                self.open(self.host, self.username)
                self.sendPassword(self.password)
 
@@ -580,36 +511,28 @@ class HPiLO(PCUControl):
                
 class HPiLOHttps(PCUControl):
        def run(self, node_port, dryrun):
+               import soltesz
 
-               cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p %s" % (
+               locfg = soltesz.CMD()
+               cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
                                        self.host, "iloxml/Get_Network.xml", 
                                        self.username, self.password)
-               p_ilo  = Popen(cmd, stdout=PIPE, shell=True)
-               cmd2 = "grep 'MESSAGE' | grep -v 'No error'"
-               p_grep = Popen(cmd2, stdin=p_ilo.stdout, stdout=PIPE, stderr=PIPE, shell=True)
-               sout, serr = p_grep.communicate()
+               sout, serr = locfg.run_noexcept(cmd)
 
-               p_ilo.wait()
-               p_grep.wait()
                if sout.strip() != "":
                        print "sout: %s" % sout.strip()
                        return sout.strip()
 
                if not dryrun:
-                       cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p %s" % (
-                                       self.host, "iloxml/Reset_Server.xml", 
-                                       self.username, self.password)
-                       p_ilo = Popen(cmd, stdin=PIPE, stdout=PIPE, shell=True)
-                       cmd2 = "grep 'MESSAGE' | grep -v 'No error'"
-                       p_grep = Popen(cmd2, stdin=p_ilo.stdout, stdout=PIPE, stderr=PIPE)
-                       sout, serr = p_grep.communicate()
-                       p_ilo.wait()
-                       p_grep.wait()
+                       locfg = soltesz.CMD()
+                       cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
+                                               self.host, "iloxml/Reset_Server.xml", 
+                                               self.username, self.password)
+                       sout, serr = locfg.run_noexcept(cmd)
 
                        if sout.strip() != "":
                                print "sout: %s" % sout.strip()
-                               return sout.strip()
-
+                               #return sout.strip()
                return 0
 
 class BayTechAU(PCUControl):
@@ -750,7 +673,7 @@ class BayTechCtrlC(PCUControl):
                                                        print "sending Y"
                                                        s.send("Y\r\n")
 
-                               #index = s.expect(["DS-RPC>"])
+                               index = s.expect(["DS-RPC>"])
                                #print "got prompt back"
 
                        s.close()
@@ -1071,9 +994,10 @@ def runcmd(command, args, username, password, timeout = None):
                                out += "; output follows:\n" + data
                        raise Exception, out
 
-def racadm_reboot(ip, username, password, port, dryrun):
+def racadm_reboot(host, username, password, port, dryrun):
        global verbose
 
+       ip = socket.gethostbyname(host)
        try:
                cmd = "/usr/sbin/racadm"
                os.stat(cmd)
@@ -1103,10 +1027,11 @@ def pcu_name(pcu):
        else:
                return None
 
+import soltesz
+fb =soltesz.dbLoad("findbadpcus")
+
 def get_pcu_values(pcu_id):
        # TODO: obviously, this shouldn't be loaded each time...
-       import soltesz
-       fb =soltesz.dbLoad("findbadpcus")
 
        try:
                values = fb['nodes']["id_%s" % pcu_id]['values']
@@ -1115,37 +1040,36 @@ def get_pcu_values(pcu_id):
 
        return values
 
-def check_open_port(values, port_list):
-       ret = False
-
-       if 'portstatus' in values:
-               for port in port_list:
-                       if      port in values['portstatus'] and \
-                               values['portstatus'][port] == "open":
-
-                               ret = True
-       
-       return ret
+def reboot(nodename):
+       return reboot_policy(nodename, True, False)
        
 def reboot_policy(nodename, continue_probe, dryrun):
        global verbose
+       print "this is a test of reboot_policy()"
 
        pcu = plc.getpcu(nodename)
        if not pcu:
+               logger.debug("no pcu for %s" % hostname)
+               print "no pcu for %s" % hostname
                return False # "%s has no pcu" % nodename
 
        values = get_pcu_values(pcu['pcu_id'])
        if values == None:
+               logger.debug("No values for pcu probe %s" % hostname)
+               print "No values for pcu probe %s" % hostname
                return False #"no info for pcu_id %s" % pcu['pcu_id']
        
        # Try the PCU first
        logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
 
+       print "reboot_test"
        ret = reboot_test(nodename, values, continue_probe, verbose, dryrun)
 
        if ret != 0:
+               print ret
                return False
        else:
+               print "return true"
                return True
 
 def reboot_test(nodename, values, continue_probe, verbose, dryrun):
@@ -1159,6 +1083,7 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun):
                                
                # APC Masterswitch (Berkeley)
                elif continue_probe and values['model'].find("APC AP79xx/Masterswitch") >= 0:
+                       print values
 
                        # TODO: make a more robust version of APC
                        if values['pcu_id'] in [1163,1055,1111,1231,1113,1127,1128,1148]:
@@ -1169,11 +1094,11 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun):
                                apc = APCBrazil(values, verbose, ['22', '23'])
                                rb_ret = apc.reboot(values[nodename], dryrun)
 
-                       elif values['pcu_id'] in [1221]:
+                       elif values['pcu_id'] in [1221,1225]:
                                apc = APCBerlin(values, verbose, ['22', '23'])
                                rb_ret = apc.reboot(values[nodename], dryrun)
 
-                       elif values['pcu_id'] in [1173,1221,1220,1225]:
+                       elif values['pcu_id'] in [1173,1221,1220]:
                                apc = APCFolsom(values, verbose, ['22', '23'])
                                rb_ret = apc.reboot(values[nodename], dryrun)
 
@@ -1183,7 +1108,7 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun):
 
                # BayTech DS4-RPC
                elif continue_probe and values['model'].find("Baytech DS4-RPC") >= 0:
-                       if values['pcu_id'] in [1052,1209,1002,1008,1041,1013,1022]:
+                       if values['pcu_id'] in [1237,1052,1209,1002,1008,1041,1013,1022]:
                                # These  require a 'ctrl-c' to be sent... 
                                baytech = BayTechCtrlC(values, verbose, ['22', '23'])
                                rb_ret = baytech.reboot(values[nodename], dryrun)
@@ -1276,48 +1201,6 @@ def reboot_test(nodename, values, continue_probe, verbose, dryrun):
        #                                                                 pcu[nodename],
        #                                                                 dryrun)
 
-# Returns true if rebooted via PCU
-def reboot_old(nodename, dryrun):
-       pcu = plc.getpcu(nodename)
-       if not pcu:
-               plc.nodePOD(nodename)
-               return False
-       # Try the PCU first
-       logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
-
-       # APC Masterswitch (Berkeley)
-       if pcu['model'] == "APC Masterswitch":
-               err = apc_reboot(pcu['ip'], pcu['username'],pcu['password'], 
-                               pcu[nodename], pcu['protocol'], dryrun)
-
-       # DataProbe iPal (many sites)
-       elif pcu['protocol'] == "telnet" and pcu['model'].find("IP-4") >= 0:
-               err = ipal_reboot(pcu['ip'],pcu['password'], pcu[nodename], dryrun)
-
-       # BayTech DS4-RPC
-       elif pcu['protocol'] == "ssh" and \
-       (pcu['model'].find("Baytech") >= 0 or pcu['model'].find("DS4") >= 0):
-               err = baytech_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu[nodename], dryrun)
-
-       # BlackBox PSExxx-xx (e.g. PSE505-FR)
-       elif pcu['protocol'] == "http" and (pcu['model'] == "bbpse"):
-               err = bbpse_reboot(pcu['ip'], pcu['username'], pcu['password'], pcu[nodename],80, dryrun)
-
-       # x10toggle
-       elif pcu['protocol'] == "ssh" and (pcu['model'] == "x10toggle"):
-               err = x10toggle_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu[nodename], dryrun)
-
-       # 
-       elif pcu['protocol'] == "racadm" and (pcu['model'] == "RAC"):
-               err = racadm_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu_[nodename], dryrun)
-
-       # Unknown or unsupported
-       else:
-               err = errno.EPROTONOSUPPORT
-               return False
-       return True 
-
-
 def main():
        logger.setLevel(logging.DEBUG)
        ch = logging.StreamHandler()
@@ -1327,9 +1210,21 @@ def main():
        logger.addHandler(ch)
 
        try:
-               print "Rebooting %s" % sys.argv[1]
-               reboot_policy(sys.argv[1], True, False)
+               if "test" in sys.argv:
+                       dryrun = True
+               else:
+                       dryrun = False
+
+               for node in sys.argv[1:]:
+                       if node == "test": continue
+
+                       print "Rebooting %s" % node
+                       if reboot_policy(node, True, dryrun):
+                               print "success"
+                       else:
+                               print "failed"
        except Exception, err:
+               import traceback; traceback.print_exc()
                print err
 
 if __name__ == '__main__':