3 # Reboot specified nodes
9 import errno, time, traceback
11 import threading, popen2
17 from subprocess import PIPE, Popen
19 plc_lock = threading.Lock()
21 # Use our versions of telnetlib and pyssh
22 sys.path.insert(0, os.path.dirname(sys.argv[0]))
24 sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")
30 # Event class ID from pcu events
31 #NODE_POWER_CONTROL = 3
34 #MONITOR_USER_ID = 11142
37 logger = logging.getLogger("monitor")
41 class ExceptionNoTransport(Exception): pass
42 class ExceptionNotFound(Exception): pass
43 class ExceptionPassword(Exception): pass
44 class ExceptionTimeout(Exception): pass
45 class ExceptionPrompt(Exception): pass
46 class ExceptionSequence(Exception): pass
47 class ExceptionReset(Exception): pass
48 class ExceptionPort(Exception): pass
49 class ExceptionUsername(Exception): pass
51 def telnet_answer(telnet, expected, buffer):
54 output = telnet.read_until(expected, TELNET_TIMEOUT)
56 # logger.debug(output)
57 if output.find(expected) == -1:
58 raise ExceptionNotFound, "'%s' not found" % expected
60 telnet.write(buffer + "\r\n")
63 # PCU has model, host, preferred-port, user, passwd,
65 # This is an object derived directly form the PLCAPI DB fields
67 def __init__(self, plc_pcu_dict):
68 for field in ['username', 'password', 'site_id',
71 'node_ids', 'ports', ]:
72 if field in plc_pcu_dict:
73 self.__setattr__(field, plc_pcu_dict[field])
75 raise Exception("No such field %s in PCU object" % field)
77 # These are the convenience functions build around the PCU object.
79 def __init__(self, plc_pcu_dict):
80 PCU.__init__(self, plc_pcu_dict)
81 self.host = self.pcu_name()
84 if self.hostname is not None and self.hostname is not "":
86 elif self.ip is not None and self.ip is not "":
91 def nodeidToPort(self, node_id):
92 if node_id in self.node_ids:
93 for i in range(0, len(self.node_ids)):
94 if node_id == self.node_ids[i]:
97 raise Exception("No such Node ID: %d" % node_id)
99 # This class captures the observed pcu records from FindBadPCUs.py
101 def __init__(self, pcu_record_dict):
102 for field in ['nodenames', 'portstatus',
105 if field in pcu_record_dict:
106 if field == "reboot":
107 self.__setattr__("reboot_str", pcu_record_dict[field])
109 self.__setattr__(field, pcu_record_dict[field])
111 raise Exception("No such field %s in pcu record dict" % field)
120 def __init__(self, type, verbose):
122 self.verbose = verbose
123 self.transport = None
129 def open(self, host, username=None, password=None, prompt="User Name"):
132 if self.type == self.TELNET:
133 transport = telnetlib.Telnet(host, timeout=self.TELNET_TIMEOUT)
134 transport.set_debuglevel(self.verbose)
135 if username is not None:
136 self.transport = transport
137 self.ifThenSend(prompt, username, ExceptionUsername)
139 elif self.type == self.SSH:
140 if username is not None:
141 transport = pyssh.Ssh(username, host)
142 transport.set_debuglevel(self.verbose)
144 # TODO: have an ssh set_debuglevel() also...
146 raise Exception("Username cannot be None for ssh transport.")
147 elif self.type == self.HTTP:
148 self.url = "http://%s:%d/" % (host,80)
149 uri = "%s:%d" % (host,80)
152 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
153 authinfo.add_password (None, uri, username, password)
154 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
156 transport = urllib2.build_opener(authhandler)
159 raise Exception("Unknown transport type: %s" % self.type)
161 self.transport = transport
165 if self.type == self.TELNET:
166 self.transport.close()
167 elif self.type == self.SSH:
168 self.transport.close()
169 elif self.type == self.HTTP:
172 raise Exception("Unknown transport type %s" % self.type)
173 self.transport = None
175 def sendHTTP(self, resource, data):
177 print "POSTing '%s' to %s" % (data,self.url + resource)
180 f = self.transport.open(self.url + resource ,data)
185 except urllib2.URLError,err:
186 logger.info('Could not open http connection', err)
187 return "http transport error"
191 def sendPassword(self, password, prompt=None):
192 if self.type == self.TELNET:
194 self.ifThenSend("Password", password, ExceptionPassword)
196 self.ifThenSend(prompt, password, ExceptionPassword)
197 elif self.type == self.SSH:
198 self.ifThenSend("password:", password, ExceptionPassword)
199 elif self.type == self.HTTP:
202 raise Exception("Unknown transport type: %s" % self.type)
204 def ifThenSend(self, expected, buffer, ErrorClass=ExceptionPrompt):
206 if self.transport != None:
207 output = self.transport.read_until(expected, self.TELNET_TIMEOUT)
208 if output.find(expected) == -1:
209 raise ErrorClass, "'%s' not found" % expected
211 self.transport.write(buffer + "\r\n")
213 raise ExceptionNoTransport("transport object is type None")
215 def ifElse(self, expected, ErrorClass):
217 self.transport.read_until(expected, self.TELNET_TIMEOUT)
219 raise ErrorClass("Could not find '%s' within timeout" % expected)
222 class PCUControl(Transport,PCUModel,PCURecord):
223 def __init__(self, plc_pcu_record, verbose, supported_ports=[]):
224 PCUModel.__init__(self, plc_pcu_record)
225 PCURecord.__init__(self, plc_pcu_record)
227 if '22' in supported_ports and self.portstatus['22'] == "open":
229 elif '23' in supported_ports and self.portstatus['23'] == "open":
230 type = Transport.TELNET
231 elif '80' in supported_ports and self.portstatus['80'] == "open":
232 type = Transport.HTTP
233 elif '443' in supported_ports and self.portstatus['443'] == "open":
234 type = Transport.HTTP
235 elif '5869' in supported_ports and self.portstatus['5869'] == "open":
236 # For DRAC cards. not sure how much it's used in the
237 # protocol.. but racadm opens this port.
238 type = Transport.HTTP
240 raise ExceptionPort("Unsupported Port: No transport from open ports")
241 Transport.__init__(self, type, verbose)
243 def run(self, node_port, dryrun):
244 """ This function is to be defined by the specific PCU instance. """
247 def reboot(self, node_port, dryrun):
249 return self.run(node_port, dryrun)
250 except ExceptionNotFound, err:
251 return "error: " + str(err)
252 except ExceptionPassword, err:
253 return "password exception: " + str(err)
254 except ExceptionTimeout, err:
255 return "timeout exception: " + str(err)
256 except ExceptionUsername, err:
257 return "exception: no username prompt: " + str(err)
258 except ExceptionSequence, err:
259 return "sequence error: " + str(err)
260 except ExceptionPrompt, err:
261 return "prompt exception: " + str(err)
262 except ExceptionPort, err:
263 return "no ports exception: " + str(err)
264 except socket.error, err:
265 return "socket error: timeout: " + str(err)
266 except EOFError, err:
268 logger.debug("reboot: EOF")
270 self.transport.close()
272 traceback.print_exc()
273 return "EOF connection reset" + str(err)
274 #except Exception, err:
276 # logger.debug("reboot: Exception")
279 # self.transport.close()
281 # traceback.print_exc()
282 # return "generic exception; unknown problem."
285 class IPAL(PCUControl):
286 def run(self, node_port, dryrun):
289 # XXX Some iPals require you to hit Enter a few times first
290 self.ifThenSend("Password >", "\r\n\r\n", ExceptionNotFound)
293 self.ifThenSend("Password >", self.password, ExceptionPassword)
294 self.transport.write("\r\n\r\n")
296 if not dryrun: # P# - Pulse relay
297 self.ifThenSend("Enter >",
300 # Get the next prompt
301 self.ifElse("Enter >", ExceptionTimeout)
306 def ipal_reboot(ip, password, port, dryrun):
313 #print "lock acquired"
316 #telnet = telnetlib.Telnet(ip) # , timeout=TELNET_TIMEOUT)
317 telnet = telnetlib.Telnet(ip, timeout=TELNET_TIMEOUT)
320 # traceback.print_exc()
323 telnet.set_debuglevel(verbose)
325 # XXX Some iPals require you to hit Enter a few times first
326 telnet_answer(telnet, "Password >", "\r\n\r\n")
329 telnet_answer(telnet, "Password >", password)
331 # XXX Some iPals require you to hit Enter a few times first
332 telnet.write("\r\n\r\n")
336 telnet_answer(telnet, "Enter >", "P%d" % port)
338 telnet.read_until("Enter >", TELNET_TIMEOUT)
343 #print "lock released"
347 except EOFError, err:
349 logger.debug("ipal_reboot: EOF")
353 traceback.print_exc()
354 #print "lock released"
356 return errno.ECONNRESET
357 except socket.error, err:
358 logger.debug("ipal_reboot: Socket Error")
361 traceback.print_exc()
363 return errno.ETIMEDOUT
365 except Exception, err:
367 logger.debug("ipal_reboot: Exception")
372 traceback.print_exc()
373 #print "lock released"
377 class APCEurope(PCUControl):
378 def run(self, node_port, dryrun):
379 self.open(self.host, self.username)
380 self.sendPassword(self.password)
382 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
383 self.ifThenSend("\r\n> ", "2")
384 self.ifThenSend("\r\n> ", str(node_port))
385 # 3- Immediate Reboot
386 self.ifThenSend("\r\n> ", "3")
389 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
393 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
394 "", ExceptionSequence)
395 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
400 class APCFolsom(PCUControl):
401 def run(self, node_port, dryrun):
402 self.open(self.host, self.username)
403 self.sendPassword(self.password)
405 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
406 self.ifThenSend("\r\n> ", "2")
407 self.ifThenSend("\r\n> ", "1")
408 self.ifThenSend("\r\n> ", str(node_port))
409 self.ifThenSend("\r\n> ", "1")
411 # 3- Immediate Reboot
412 self.ifThenSend("\r\n> ", "3")
415 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
419 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
420 "", ExceptionSequence)
421 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
426 class APCMaster(PCUControl):
427 def run(self, node_port, dryrun):
428 self.open(self.host, self.username)
429 self.sendPassword(self.password)
432 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
433 # 3- Outlet Control/Config
434 self.ifThenSend("\r\n> ", "3")
436 self.ifThenSend("\r\n> ", str(node_port))
438 self.ifThenSend("\r\n> ", "1")
439 # 3- Immediate Reboot
440 self.ifThenSend("\r\n> ", "3")
443 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
447 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
448 "", ExceptionSequence)
449 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
454 class APC(PCUControl):
455 def __init__(self, plc_pcu_record, verbose):
456 PCUControl.__init__(self, plc_pcu_record, verbose)
458 self.master = APCMaster(plc_pcu_record, verbose)
459 self.folsom = APCFolsom(plc_pcu_record, verbose)
460 self.europe = APCEurope(plc_pcu_record, verbose)
462 def run(self, node_port, dryrun):
466 for pcu in [self.master, self.europe, self.folsom]:
469 print "-*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"
472 time.sleep(sleep_time)
473 ret = pcu.reboot(node_port, dryrun)
474 except ExceptionSequence, err:
480 return "Unknown reboot sequence for APC PCU"
484 class DRACRacAdm(PCUControl):
485 def run(self, node_port, dryrun):
487 print "trying racadm_reboot..."
488 racadm_reboot(self.host, self.username, self.password, node_port, dryrun)
492 class DRAC(PCUControl):
493 def run(self, node_port, dryrun):
494 self.open(self.host, self.username)
495 self.sendPassword(self.password)
497 print "logging in..."
498 self.transport.write("\r\n")
501 self.ifThenSend("[%s]#" % self.username, "getsysinfo")
504 self.ifThenSend("[%s]#" % self.username, "serveraction powercycle")
506 self.ifThenSend("[%s]#" % self.username, "exit")
511 class HPiLO(PCUControl):
512 def run(self, node_port, dryrun):
513 self.open(self.host, self.username)
514 self.sendPassword(self.password)
517 self.ifThenSend("</>hpiLO->", "cd system1")
519 # Reboot Outlet N (Y/N)?
521 self.ifThenSend("</system1>hpiLO->", "POWER")
524 self.ifThenSend("</system1>hpiLO->", "reset")
526 self.ifThenSend("</system1>hpiLO->", "exit")
532 class HPiLOHttps(PCUControl):
533 def run(self, node_port, dryrun):
535 cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p %s" % (
536 self.host, "iloxml/Get_Network.xml",
537 self.username, self.password)
538 p_ilo = Popen(cmd, stdout=PIPE, shell=True)
539 cmd2 = "grep 'MESSAGE' | grep -v 'No error'"
540 p_grep = Popen(cmd2, stdin=p_ilo.stdout, stdout=PIPE, stderr=PIPE, shell=True)
541 sout, serr = p_grep.communicate()
545 if sout.strip() != "":
546 print "sout: %s" % sout.strip()
550 cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p %s" % (
551 self.host, "iloxml/Reset_Server.xml",
552 self.username, self.password)
553 p_ilo = Popen(cmd, stdin=PIPE, stdout=PIPE, shell=True)
554 cmd2 = "grep 'MESSAGE' | grep -v 'No error'"
555 p_grep = Popen(cmd2, stdin=p_ilo.stdout, stdout=PIPE, stderr=PIPE)
556 sout, serr = p_grep.communicate()
560 if sout.strip() != "":
561 print "sout: %s" % sout.strip()
566 class BayTechGeorgeTown(PCUControl):
567 def run(self, node_port, dryrun):
568 self.open(self.host, self.username, None, "Enter user name:")
569 self.sendPassword(self.password, "Enter Password:")
571 #self.ifThenSend("RPC-16>", "Status")
573 self.ifThenSend("RPC-16>", "Reboot %d" % node_port)
575 # Reboot Outlet N (Y/N)?
577 self.ifThenSend("(Y/N)?", "N")
579 self.ifThenSend("(Y/N)?", "Y")
580 self.ifThenSend("RPC-16>", "")
585 class BayTechCtrlC(PCUControl):
587 For some reason, these units let you log in fine, but they hang
588 indefinitely, unless you send a Ctrl-C after the password. No idea
591 def run(self, node_port, dryrun):
592 print "BayTechCtrlC %s" % self.host
593 self.open(self.host, self.username)
594 self.sendPassword(self.password)
596 #self.transport.write('
\ 3')
597 self.transport.write("\r\n")
598 self.transport.write(pyssh.CTRL_C)
599 #self.transport.write(chr(3))
600 #self.transport.write(chr(24))
601 #self.transport.write(chr(26))
602 #self.transport.write('
\18')
603 # Control Outlets (5 ,1).........5
604 self.ifThenSend("Enter Request :", "5")
608 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
609 except ExceptionNotFound, msg:
610 # one machine is configured to ask for a username,
611 # even after login...
612 print "msg: %s" % msg
613 self.transport.write(self.username + "\r\n")
614 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
617 # Reboot Outlet N (Y/N)?
619 self.ifThenSend("(Y/N)?", "N")
621 self.ifThenSend("(Y/N)?", "Y")
622 self.ifThenSend("DS-RPC>", "")
627 class BayTech(PCUControl):
628 def run(self, node_port, dryrun):
629 self.open(self.host, self.username)
630 self.sendPassword(self.password)
632 # Control Outlets (5 ,1).........5
633 self.ifThenSend("Enter Request :", "5")
637 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
638 except ExceptionNotFound, msg:
639 # one machine is configured to ask for a username,
640 # even after login...
641 print "msg: %s" % msg
642 self.transport.write(self.username + "\r\n")
643 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
646 # Reboot Outlet N (Y/N)?
648 self.ifThenSend("(Y/N)?", "N")
650 self.ifThenSend("(Y/N)?", "Y")
651 self.ifThenSend("DS-RPC>", "")
656 class ePowerSwitchGood(PCUControl):
658 # The old code used Python's HTTPPasswordMgrWithDefaultRealm()
659 # For some reason this both doesn't work and in some cases, actually
660 # hangs the PCU. Definitely not what we want.
662 # The code below is much simpler. Just letting things fail first,
663 # and then, trying again with authentication string in the header.
665 def run(self, node_port, dryrun):
666 self.transport = None
667 self.url = "http://%s:%d/" % (self.host,80)
668 uri = "%s:%d" % (self.host,80)
670 req = urllib2.Request(self.url)
672 handle = urllib2.urlopen(req)
674 # NOTE: this is expected to fail initially
681 return "ERROR: not protected by HTTP authentication"
683 if not hasattr(e, 'code') or e.code != 401:
684 return "ERROR: failed for: %s" % str(e)
686 base64data = base64.encodestring("%s:%s" % (self.username, self.password))[:-1]
687 # NOTE: assuming basic realm authentication.
688 authheader = "Basic %s" % base64data
689 req.add_header("Authorization", authheader)
692 f = urllib2.urlopen(req)
694 # failing here means the User/passwd is wrong (hopefully)
695 raise ExceptionPassword("Incorrect username/password")
697 # TODO: after verifying that the user/password is correct, we should
698 # actually reboot the given node.
701 # add data to handler,
702 # fetch url one more time on cmd.html, econtrol.html or whatever.
705 if self.verbose: print f.read()
711 class ePowerSwitchOld(PCUControl):
712 def run(self, node_port, dryrun):
713 self.url = "http://%s:%d/" % (self.host,80)
714 uri = "%s:%d" % (self.host,80)
717 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
718 authinfo.add_password (None, uri, self.username, self.password)
719 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
721 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
722 transport = urllib2.build_opener(authinfo)
723 f = transport.open(self.url)
724 if self.verbose: print f.read()
727 transport = urllib2.build_opener(authhandler)
728 f = transport.open(self.url + "cmd.html", "P%d=r" % node_port)
729 if self.verbose: print f.read()
734 class ePowerSwitch(PCUControl):
735 def run(self, node_port, dryrun):
736 self.url = "http://%s:%d/" % (self.host,80)
737 uri = "%s:%d" % (self.host,80)
739 # TODO: I'm still not sure what the deal is here.
740 # two independent calls appear to need to be made before the
741 # reboot will succeed. It doesn't seem to be possible to do
742 # this with a single call. I have no idea why.
745 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
746 authinfo.add_password (None, uri, self.username, self.password)
747 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
749 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
750 transport = urllib2.build_opener()
751 f = transport.open(self.url + "elogin.html", "pwd=%s" % self.password)
752 if self.verbose: print f.read()
755 transport = urllib2.build_opener(authhandler)
756 f = transport.open(self.url + "econtrol.html", "P%d=r" % node_port)
757 if self.verbose: print f.read()
759 # data= "P%d=r" % node_port
760 #self.open(self.host, self.username, self.password)
761 #self.sendHTTP("elogin.html", "pwd=%s" % self.password)
762 #self.sendHTTP("econtrol.html", data)
763 #self.sendHTTP("cmd.html", data)
769 ### rebooting european BlackBox PSE boxes
770 # Thierry Parmentelat - May 11 2005
771 # tested on 4-ports models known as PSE505-FR
772 # uses http to POST a data 'P<port>=r'
773 # relies on basic authentication within http1.0
774 # first curl-based script was
775 # curl --http1.0 --basic --user <username>:<password> --data P<port>=r \
776 # http://<hostname>:<http_port>/cmd.html && echo OK
778 def bbpse_reboot (pcu_ip,username,password,port_in_pcu,http_port, dryrun):
782 url = "http://%s:%d/cmd.html" % (pcu_ip,http_port)
783 data= "P%d=r" % port_in_pcu
785 logger.debug("POSTing '%s' on %s" % (data,url))
787 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
788 uri = "%s:%d" % (pcu_ip,http_port)
789 authinfo.add_password (None, uri, username, password)
790 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
792 opener = urllib2.build_opener(authhandler)
793 urllib2.install_opener(opener)
799 f = urllib2.urlopen(url,data)
806 except urllib2.URLError,err:
807 logger.info('Could not open http connection', err)
810 ### rebooting x10toggle based systems addressed by port
811 # Marc E. Fiuczynski - May 31 2005
812 # tested on 4-ports models known as PSE505-FR
813 # uses ssh and password to login to an account
814 # that will cause the system to be powercycled.
816 def x10toggle_reboot(ip, username, password, port, dryrun):
821 ssh = pyssh.Ssh(username, ip)
825 telnet_answer(ssh, "password:", password)
829 telnet_answer(ssh, "x10toggle>", "A%d" % port)
837 except Exception, err:
844 return errno.ETIMEDOUT
846 ### rebooting Dell systems via RAC card
847 # Marc E. Fiuczynski - June 01 2005
848 # tested with David Lowenthal's itchy/scratchy nodes at UGA
851 def runcmd(command, args, username, password, timeout = None):
854 result_ready = threading.Condition()
858 result_ready.acquire()
862 result_ready.notify()
863 result_ready.release()
865 def do_command(command, username, password):
868 # Popen4 is a popen-type class that combines stdout and stderr
869 p = popen2.Popen4(command)
871 # read all output data
872 p.tochild.write("%s\n" % username)
873 p.tochild.write("%s\n" % password)
875 data = p.fromchild.read()
878 # might get interrupted by a signal in poll() or waitpid()
881 set_result((retval, data))
884 if ex.errno == errno.EINTR:
887 except Exception, ex:
891 command = " ".join([command] + args)
893 worker = threading.Thread(target = do_command, args = (command, username, password, ))
894 worker.setDaemon(True)
895 result_ready.acquire()
897 result_ready.wait(timeout)
900 raise Exception, "command timed-out: '%s'" % command
902 result_ready.release()
905 if isinstance(result, Exception):
908 (retval, data) = result
909 if os.WIFEXITED(retval) and os.WEXITSTATUS(retval) == 0:
912 out = "system command ('%s') " % command
913 if os.WIFEXITED(retval):
914 out += "failed, rc = %d" % os.WEXITSTATUS(retval)
916 out += "killed by signal %d" % os.WTERMSIG(retval)
918 out += "; output follows:\n" + data
921 def racadm_reboot(ip, username, password, port, dryrun):
925 cmd = "/usr/sbin/racadm"
928 output = runcmd(cmd, ["-r %s -i serveraction powercycle" % ip],
931 output = runcmd(cmd, ["-r %s -i getsysinfo" % ip],
934 print "RUNCMD: %s" % output
939 except Exception, err:
940 logger.debug("runcmd raised exception %s" % err)
946 if pcu['hostname'] is not None and pcu['hostname'] is not "":
947 return pcu['hostname']
948 elif pcu['ip'] is not None and pcu['ip'] is not "":
953 def get_pcu_values(pcu_id):
954 # TODO: obviously, this shouldn't be loaded each time...
956 fb =soltesz.dbLoad("findbadpcus")
959 values = fb['nodes']["id_%s" % pcu_id]['values']
965 def check_open_port(values, port_list):
968 if 'portstatus' in values:
969 for port in port_list:
970 if port in values['portstatus'] and \
971 values['portstatus'][port] == "open":
977 def reboot_policy(nodename, continue_probe, dryrun):
980 pcu = plc.getpcu(nodename)
982 return False # "%s has no pcu" % nodename
984 values = get_pcu_values(pcu['pcu_id'])
986 return False #"no info for pcu_id %s" % pcu['pcu_id']
989 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
991 ret = reboot_test(nodename, values, continue_probe, verbose, dryrun)
998 def reboot_test(nodename, values, continue_probe, verbose, dryrun):
1002 # DataProbe iPal (many sites)
1003 if continue_probe and values['model'].find("Dataprobe IP-41x/IP-81x") >= 0:
1004 ipal = IPAL(values, verbose, ['23'])
1005 rb_ret = ipal.reboot(values[nodename], dryrun)
1007 # APC Masterswitch (Berkeley)
1008 elif continue_probe and values['model'].find("APC AP79xx/Masterswitch") >= 0:
1010 # TODO: make a more robust version of APC
1011 if values['pcu_id'] in [1163,1055,1111,1231,1113,1127,1128,1148]:
1012 apc = APCEurope(values, verbose, ['22', '23'])
1013 rb_ret = apc.reboot(values[nodename], dryrun)
1015 elif values['pcu_id'] in [1173,1221,1220,1225]:
1016 apc = APCFolsom(values, verbose, ['22', '23'])
1017 rb_ret = apc.reboot(values[nodename], dryrun)
1020 apc = APCMaster(values, verbose, ['22', '23'])
1021 rb_ret = apc.reboot(values[nodename], dryrun)
1024 elif continue_probe and values['model'].find("Baytech DS4-RPC") >= 0:
1025 if values['pcu_id'] in [1041,1209,1025,1052,1057]:
1026 # These require a 'ctrl-c' to be sent...
1027 baytech = BayTechCtrlC(values, verbose, ['22', '23'])
1028 rb_ret = baytech.reboot(values[nodename], dryrun)
1030 elif values['pcu_id'] in [1012]:
1031 # This pcu sometimes doesn't present the 'Username' prompt,
1032 # unless you immediately try again...
1034 baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1035 rb_ret = baytech.reboot(values[nodename], dryrun)
1037 baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1038 rb_ret = baytech.reboot(values[nodename], dryrun)
1040 baytech = BayTech(values, verbose, ['22', '23'])
1041 rb_ret = baytech.reboot(values[nodename], dryrun)
1044 elif continue_probe and values['model'].find("HP iLO") >= 0:
1045 hpilo = HPiLO(values, verbose, ['22'])
1046 rb_ret = hpilo.reboot(0, dryrun)
1048 hpilo = HPiLOHttps(values, verbose, ['443'])
1049 rb_ret = hpilo.reboot(0, dryrun)
1052 elif continue_probe and values['model'].find("Dell RAC") >= 0:
1053 # TODO: I don't think DRACRacAdm will throw an exception for the
1054 # default method to catch...
1056 drac = DRACRacAdm(values, verbose, ['443', '5869'])
1057 rb_ret = drac.reboot(0, dryrun)
1059 drac = DRAC(values, verbose, ['22'])
1060 rb_ret = drac.reboot(0, dryrun)
1062 # BlackBox PSExxx-xx (e.g. PSE505-FR)
1063 elif continue_probe and \
1064 (values['model'].find("BlackBox PS5xx") >= 0 or
1065 values['model'].find("ePowerSwitch 1/4/8x") >=0 ):
1067 # TODO: allow a different port than http 80.
1068 if values['pcu_id'] in [1089, 1071, 1046, 1035, 1118]:
1069 eps = ePowerSwitchGood(values, verbose, ['80'])
1070 elif values['pcu_id'] in [1003]:
1071 eps = ePowerSwitch(values, verbose, ['80'])
1073 eps = ePowerSwitchGood(values, verbose, ['80'])
1075 rb_ret = eps.reboot(values[nodename], dryrun)
1077 elif continue_probe:
1078 rb_ret = "Unsupported_PCU"
1080 elif continue_probe == False:
1081 if 'portstatus' in values:
1088 except ExceptionPort, err:
1093 #elif continue_probe and values['protocol'] == "racadm" and \
1094 # values['model'] == "RAC":
1095 # rb_ret = racadm_reboot(pcu_name(values),
1096 # values['username'],
1097 # values['password'],
1101 # Returns true if rebooted via PCU
1102 def reboot_old(nodename, dryrun):
1103 pcu = plc.getpcu(nodename)
1105 plc.nodePOD(nodename)
1108 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
1110 # APC Masterswitch (Berkeley)
1111 if pcu['model'] == "APC Masterswitch":
1112 err = apc_reboot(pcu['ip'], pcu['username'],pcu['password'],
1113 pcu[nodename], pcu['protocol'], dryrun)
1115 # DataProbe iPal (many sites)
1116 elif pcu['protocol'] == "telnet" and pcu['model'].find("IP-4") >= 0:
1117 err = ipal_reboot(pcu['ip'],pcu['password'], pcu[nodename], dryrun)
1120 elif pcu['protocol'] == "ssh" and \
1121 (pcu['model'].find("Baytech") >= 0 or pcu['model'].find("DS4") >= 0):
1122 err = baytech_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu[nodename], dryrun)
1124 # BlackBox PSExxx-xx (e.g. PSE505-FR)
1125 elif pcu['protocol'] == "http" and (pcu['model'] == "bbpse"):
1126 err = bbpse_reboot(pcu['ip'], pcu['username'], pcu['password'], pcu[nodename],80, dryrun)
1129 elif pcu['protocol'] == "ssh" and (pcu['model'] == "x10toggle"):
1130 err = x10toggle_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu[nodename], dryrun)
1133 elif pcu['protocol'] == "racadm" and (pcu['model'] == "RAC"):
1134 err = racadm_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu_[nodename], dryrun)
1136 # Unknown or unsupported
1138 err = errno.EPROTONOSUPPORT
1144 logger.setLevel(logging.DEBUG)
1145 ch = logging.StreamHandler()
1146 ch.setLevel(logging.DEBUG)
1147 formatter = logging.Formatter('LOGGER - %(message)s')
1148 ch.setFormatter(formatter)
1149 logger.addHandler(ch)
1152 reboot("planetlab2.cs.uchicago.edu")
1153 reboot("alice.cs.princeton.edu")
1154 except Exception, err:
1157 if __name__ == '__main__':
1159 logger = logging.getLogger("monitor")