3 # Reboot specified nodes
9 import errno, time, traceback
11 import threading, popen2
17 from subprocess import PIPE, Popen
19 plc_lock = threading.Lock()
21 # Use our versions of telnetlib and pyssh
22 sys.path.insert(0, os.path.dirname(sys.argv[0]))
24 sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")
30 # Event class ID from pcu events
31 #NODE_POWER_CONTROL = 3
34 #MONITOR_USER_ID = 11142
37 logger = logging.getLogger("monitor")
41 class ExceptionNoTransport(Exception): pass
42 class ExceptionNotFound(Exception): pass
43 class ExceptionPassword(Exception): pass
44 class ExceptionTimeout(Exception): pass
45 class ExceptionPrompt(Exception): pass
46 class ExceptionSequence(Exception): pass
47 class ExceptionReset(Exception): pass
48 class ExceptionPort(Exception): pass
49 class ExceptionUsername(Exception): pass
51 def telnet_answer(telnet, expected, buffer):
54 output = telnet.read_until(expected, TELNET_TIMEOUT)
56 # logger.debug(output)
57 if output.find(expected) == -1:
58 raise ExceptionNotFound, "'%s' not found" % expected
60 telnet.write(buffer + "\r\n")
63 # PCU has model, host, preferred-port, user, passwd,
65 # This is an object derived directly form the PLCAPI DB fields
67 def __init__(self, plc_pcu_dict):
68 for field in ['username', 'password', 'site_id',
71 'node_ids', 'ports', ]:
72 if field in plc_pcu_dict:
73 self.__setattr__(field, plc_pcu_dict[field])
75 raise Exception("No such field %s in PCU object" % field)
77 # These are the convenience functions build around the PCU object.
79 def __init__(self, plc_pcu_dict):
80 PCU.__init__(self, plc_pcu_dict)
81 self.host = self.pcu_name()
84 if self.hostname is not None and self.hostname is not "":
86 elif self.ip is not None and self.ip is not "":
91 def nodeidToPort(self, node_id):
92 if node_id in self.node_ids:
93 for i in range(0, len(self.node_ids)):
94 if node_id == self.node_ids[i]:
97 raise Exception("No such Node ID: %d" % node_id)
99 # This class captures the observed pcu records from FindBadPCUs.py
101 def __init__(self, pcu_record_dict):
102 for field in ['nodenames', 'portstatus',
105 if field in pcu_record_dict:
106 if field == "reboot":
107 self.__setattr__("reboot_str", pcu_record_dict[field])
109 self.__setattr__(field, pcu_record_dict[field])
111 raise Exception("No such field %s in pcu record dict" % field)
120 def __init__(self, type, verbose):
122 self.verbose = verbose
123 self.transport = None
129 def open(self, host, username=None, password=None, prompt="User Name"):
132 if self.type == self.TELNET:
133 transport = telnetlib.Telnet(host, timeout=self.TELNET_TIMEOUT)
134 transport.set_debuglevel(self.verbose)
135 if username is not None:
136 self.transport = transport
137 self.ifThenSend(prompt, username, ExceptionUsername)
139 elif self.type == self.SSH:
140 if username is not None:
141 transport = pyssh.Ssh(username, host)
142 transport.set_debuglevel(self.verbose)
144 # TODO: have an ssh set_debuglevel() also...
146 raise Exception("Username cannot be None for ssh transport.")
147 elif self.type == self.HTTP:
148 self.url = "http://%s:%d/" % (host,80)
149 uri = "%s:%d" % (host,80)
152 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
153 authinfo.add_password (None, uri, username, password)
154 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
156 transport = urllib2.build_opener(authhandler)
159 raise Exception("Unknown transport type: %s" % self.type)
161 self.transport = transport
165 if self.type == self.TELNET:
166 self.transport.close()
167 elif self.type == self.SSH:
168 self.transport.close()
169 elif self.type == self.HTTP:
172 raise Exception("Unknown transport type %s" % self.type)
173 self.transport = None
175 def sendHTTP(self, resource, data):
177 print "POSTing '%s' to %s" % (data,self.url + resource)
180 f = self.transport.open(self.url + resource ,data)
185 except urllib2.URLError,err:
186 logger.info('Could not open http connection', err)
187 return "http transport error"
191 def sendPassword(self, password, prompt=None):
192 if self.type == self.TELNET:
194 self.ifThenSend("Password", password, ExceptionPassword)
196 self.ifThenSend(prompt, password, ExceptionPassword)
197 elif self.type == self.SSH:
198 self.ifThenSend("password:", password, ExceptionPassword)
199 elif self.type == self.HTTP:
202 raise Exception("Unknown transport type: %s" % self.type)
204 def ifThenSend(self, expected, buffer, ErrorClass=ExceptionPrompt):
206 if self.transport != None:
207 output = self.transport.read_until(expected, self.TELNET_TIMEOUT)
208 if output.find(expected) == -1:
209 raise ErrorClass, "'%s' not found" % expected
211 self.transport.write(buffer + "\r\n")
213 raise ExceptionNoTransport("transport object is type None")
215 def ifElse(self, expected, ErrorClass):
217 self.transport.read_until(expected, self.TELNET_TIMEOUT)
219 raise ErrorClass("Could not find '%s' within timeout" % expected)
222 class PCUControl(Transport,PCUModel,PCURecord):
223 def __init__(self, plc_pcu_record, verbose, supported_ports=[]):
224 PCUModel.__init__(self, plc_pcu_record)
225 PCURecord.__init__(self, plc_pcu_record)
227 if '22' in supported_ports and self.portstatus['22'] == "open":
229 elif '23' in supported_ports and self.portstatus['23'] == "open":
230 type = Transport.TELNET
231 elif '80' in supported_ports and self.portstatus['80'] == "open":
232 type = Transport.HTTP
233 elif '443' in supported_ports and self.portstatus['443'] == "open":
234 type = Transport.HTTP
235 elif '5869' in supported_ports and self.portstatus['5869'] == "open":
236 # For DRAC cards. not sure how much it's used in the
237 # protocol.. but racadm opens this port.
238 type = Transport.HTTP
240 raise ExceptionPort("Unsupported Port: No transport from open ports")
241 Transport.__init__(self, type, verbose)
243 def run(self, node_port, dryrun):
244 """ This function is to be defined by the specific PCU instance. """
247 def reboot(self, node_port, dryrun):
249 return self.run(node_port, dryrun)
250 except ExceptionNotFound, err:
251 return "error: " + str(err)
252 except ExceptionPassword, err:
253 return "password exception: " + str(err)
254 except ExceptionTimeout, err:
255 return "timeout exception: " + str(err)
256 except ExceptionUsername, err:
257 return "exception: no username prompt: " + str(err)
258 except ExceptionSequence, err:
259 return "sequence error: " + str(err)
260 except ExceptionPrompt, err:
261 return "prompt exception: " + str(err)
262 except ExceptionPort, err:
263 return "no ports exception: " + str(err)
264 except socket.error, err:
265 return "socket error: timeout: " + str(err)
266 except EOFError, err:
268 logger.debug("reboot: EOF")
270 self.transport.close()
272 traceback.print_exc()
273 return "EOF connection reset" + str(err)
274 #except Exception, err:
276 # logger.debug("reboot: Exception")
279 # self.transport.close()
281 # traceback.print_exc()
282 # return "generic exception; unknown problem."
285 class IPAL(PCUControl):
286 def run(self, node_port, dryrun):
289 # XXX Some iPals require you to hit Enter a few times first
290 self.ifThenSend("Password >", "\r\n\r\n", ExceptionNotFound)
293 self.ifThenSend("Password >", self.password, ExceptionPassword)
294 self.transport.write("\r\n\r\n")
296 if not dryrun: # P# - Pulse relay
297 self.ifThenSend("Enter >",
300 # Get the next prompt
301 self.ifElse("Enter >", ExceptionTimeout)
306 def ipal_reboot(ip, password, port, dryrun):
313 #print "lock acquired"
316 #telnet = telnetlib.Telnet(ip) # , timeout=TELNET_TIMEOUT)
317 telnet = telnetlib.Telnet(ip, timeout=TELNET_TIMEOUT)
320 # traceback.print_exc()
323 telnet.set_debuglevel(verbose)
325 # XXX Some iPals require you to hit Enter a few times first
326 telnet_answer(telnet, "Password >", "\r\n\r\n")
329 telnet_answer(telnet, "Password >", password)
331 # XXX Some iPals require you to hit Enter a few times first
332 telnet.write("\r\n\r\n")
336 telnet_answer(telnet, "Enter >", "P%d" % port)
338 telnet.read_until("Enter >", TELNET_TIMEOUT)
343 #print "lock released"
347 except EOFError, err:
349 logger.debug("ipal_reboot: EOF")
353 traceback.print_exc()
354 #print "lock released"
356 return errno.ECONNRESET
357 except socket.error, err:
358 logger.debug("ipal_reboot: Socket Error")
361 traceback.print_exc()
363 return errno.ETIMEDOUT
365 except Exception, err:
367 logger.debug("ipal_reboot: Exception")
372 traceback.print_exc()
373 #print "lock released"
377 class APCEurope(PCUControl):
378 def run(self, node_port, dryrun):
379 self.open(self.host, self.username)
380 self.sendPassword(self.password)
382 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
383 self.ifThenSend("\r\n> ", "2")
384 self.ifThenSend("\r\n> ", str(node_port))
385 # 3- Immediate Reboot
386 self.ifThenSend("\r\n> ", "3")
389 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
393 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
394 "", ExceptionSequence)
395 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
400 class APCBrazil(PCUControl):
401 def run(self, node_port, dryrun):
402 self.open(self.host, self.username)
403 self.sendPassword(self.password)
405 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
406 self.ifThenSend("\r\n> ", str(node_port))
407 # 4- Immediate Reboot
408 self.ifThenSend("\r\n> ", "4")
411 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
415 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
416 "", ExceptionSequence)
417 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
422 class APCBerlin(PCUControl):
423 def run(self, node_port, dryrun):
424 self.open(self.host, self.username)
425 self.sendPassword(self.password)
427 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
428 self.ifThenSend("\r\n> ", "2")
429 self.ifThenSend("\r\n> ", "1")
430 self.ifThenSend("\r\n> ", str(node_port))
431 # 3- Immediate Reboot
432 self.ifThenSend("\r\n> ", "3")
435 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
439 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
440 "", ExceptionSequence)
441 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
446 class APCFolsom(PCUControl):
447 def run(self, node_port, dryrun):
448 self.open(self.host, self.username)
449 self.sendPassword(self.password)
451 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
452 self.ifThenSend("\r\n> ", "2")
453 self.ifThenSend("\r\n> ", "1")
454 self.ifThenSend("\r\n> ", str(node_port))
455 self.ifThenSend("\r\n> ", "1")
457 # 3- Immediate Reboot
458 self.ifThenSend("\r\n> ", "3")
461 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
465 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
466 "", ExceptionSequence)
467 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
472 class APCMaster(PCUControl):
473 def run(self, node_port, dryrun):
474 self.open(self.host, self.username)
475 self.sendPassword(self.password)
478 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
479 # 3- Outlet Control/Config
480 self.ifThenSend("\r\n> ", "3")
482 self.ifThenSend("\r\n> ", str(node_port))
484 self.ifThenSend("\r\n> ", "1")
485 # 3- Immediate Reboot
486 self.ifThenSend("\r\n> ", "3")
489 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
493 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
494 "", ExceptionSequence)
495 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
500 class APC(PCUControl):
501 def __init__(self, plc_pcu_record, verbose):
502 PCUControl.__init__(self, plc_pcu_record, verbose)
504 self.master = APCMaster(plc_pcu_record, verbose)
505 self.folsom = APCFolsom(plc_pcu_record, verbose)
506 self.europe = APCEurope(plc_pcu_record, verbose)
508 def run(self, node_port, dryrun):
512 for pcu in [self.master, self.europe, self.folsom]:
515 print "-*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"
518 time.sleep(sleep_time)
519 ret = pcu.reboot(node_port, dryrun)
520 except ExceptionSequence, err:
526 return "Unknown reboot sequence for APC PCU"
530 class DRACRacAdm(PCUControl):
531 def run(self, node_port, dryrun):
533 print "trying racadm_reboot..."
534 racadm_reboot(self.host, self.username, self.password, node_port, dryrun)
538 class DRAC(PCUControl):
539 def run(self, node_port, dryrun):
540 self.open(self.host, self.username)
541 self.sendPassword(self.password)
543 print "logging in..."
544 self.transport.write("\r\n")
547 self.ifThenSend("[%s]#" % self.username, "getsysinfo")
550 self.ifThenSend("[%s]#" % self.username, "serveraction powercycle")
552 self.ifThenSend("[%s]#" % self.username, "exit")
557 class HPiLO(PCUControl):
558 def run(self, node_port, dryrun):
559 self.open(self.host, self.username)
560 self.sendPassword(self.password)
563 self.ifThenSend("</>hpiLO->", "cd system1")
565 # Reboot Outlet N (Y/N)?
567 self.ifThenSend("</system1>hpiLO->", "POWER")
570 self.ifThenSend("</system1>hpiLO->", "reset")
572 self.ifThenSend("</system1>hpiLO->", "exit")
578 class HPiLOHttps(PCUControl):
579 def run(self, node_port, dryrun):
581 cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p %s" % (
582 self.host, "iloxml/Get_Network.xml",
583 self.username, self.password)
584 p_ilo = Popen(cmd, stdout=PIPE, shell=True)
585 cmd2 = "grep 'MESSAGE' | grep -v 'No error'"
586 p_grep = Popen(cmd2, stdin=p_ilo.stdout, stdout=PIPE, stderr=PIPE, shell=True)
587 sout, serr = p_grep.communicate()
591 if sout.strip() != "":
592 print "sout: %s" % sout.strip()
596 cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p %s" % (
597 self.host, "iloxml/Reset_Server.xml",
598 self.username, self.password)
599 p_ilo = Popen(cmd, stdin=PIPE, stdout=PIPE, shell=True)
600 cmd2 = "grep 'MESSAGE' | grep -v 'No error'"
601 p_grep = Popen(cmd2, stdin=p_ilo.stdout, stdout=PIPE, stderr=PIPE)
602 sout, serr = p_grep.communicate()
606 if sout.strip() != "":
607 print "sout: %s" % sout.strip()
612 class BayTechGeorgeTown(PCUControl):
613 def run(self, node_port, dryrun):
614 self.open(self.host, self.username, None, "Enter user name:")
615 self.sendPassword(self.password, "Enter Password:")
617 #self.ifThenSend("RPC-16>", "Status")
619 self.ifThenSend("RPC-16>", "Reboot %d" % node_port)
621 # Reboot Outlet N (Y/N)?
623 self.ifThenSend("(Y/N)?", "N")
625 self.ifThenSend("(Y/N)?", "Y")
626 self.ifThenSend("RPC-16>", "")
631 class BayTechCtrlC(PCUControl):
633 For some reason, these units let you log in fine, but they hang
634 indefinitely, unless you send a Ctrl-C after the password. No idea
637 def run(self, node_port, dryrun):
638 print "BayTechCtrlC %s" % self.host
639 self.open(self.host, self.username)
640 self.sendPassword(self.password)
642 #self.transport.write('
\ 3')
643 self.transport.write("\r\n")
644 self.transport.write(pyssh.CTRL_C)
645 #self.transport.write(chr(3))
646 #self.transport.write(chr(24))
647 #self.transport.write(chr(26))
648 #self.transport.write('
\18')
649 # Control Outlets (5 ,1).........5
650 self.ifThenSend("Enter Request :", "5")
654 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
655 except ExceptionNotFound, msg:
656 # one machine is configured to ask for a username,
657 # even after login...
658 print "msg: %s" % msg
659 self.transport.write(self.username + "\r\n")
660 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
663 # Reboot Outlet N (Y/N)?
665 self.ifThenSend("(Y/N)?", "N")
667 self.ifThenSend("(Y/N)?", "Y")
668 self.ifThenSend("DS-RPC>", "")
673 class BayTech(PCUControl):
674 def run(self, node_port, dryrun):
675 self.open(self.host, self.username)
676 self.sendPassword(self.password)
678 # Control Outlets (5 ,1).........5
679 self.ifThenSend("Enter Request :", "5")
683 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
684 except ExceptionNotFound, msg:
685 # one machine is configured to ask for a username,
686 # even after login...
687 print "msg: %s" % msg
688 self.transport.write(self.username + "\r\n")
689 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
692 # Reboot Outlet N (Y/N)?
694 self.ifThenSend("(Y/N)?", "N")
696 self.ifThenSend("(Y/N)?", "Y")
697 self.ifThenSend("DS-RPC>", "")
702 class ePowerSwitchGood(PCUControl):
704 # The old code used Python's HTTPPasswordMgrWithDefaultRealm()
705 # For some reason this both doesn't work and in some cases, actually
706 # hangs the PCU. Definitely not what we want.
708 # The code below is much simpler. Just letting things fail first,
709 # and then, trying again with authentication string in the header.
711 def run(self, node_port, dryrun):
712 self.transport = None
713 self.url = "http://%s:%d/" % (self.host,80)
714 uri = "%s:%d" % (self.host,80)
716 req = urllib2.Request(self.url)
718 handle = urllib2.urlopen(req)
720 # NOTE: this is expected to fail initially
727 return "ERROR: not protected by HTTP authentication"
729 if not hasattr(e, 'code') or e.code != 401:
730 return "ERROR: failed for: %s" % str(e)
732 base64data = base64.encodestring("%s:%s" % (self.username, self.password))[:-1]
733 # NOTE: assuming basic realm authentication.
734 authheader = "Basic %s" % base64data
735 req.add_header("Authorization", authheader)
738 f = urllib2.urlopen(req)
740 # failing here means the User/passwd is wrong (hopefully)
741 raise ExceptionPassword("Incorrect username/password")
743 # TODO: after verifying that the user/password is correct, we should
744 # actually reboot the given node.
747 # add data to handler,
748 # fetch url one more time on cmd.html, econtrol.html or whatever.
751 if self.verbose: print f.read()
757 class ePowerSwitchOld(PCUControl):
758 def run(self, node_port, dryrun):
759 self.url = "http://%s:%d/" % (self.host,80)
760 uri = "%s:%d" % (self.host,80)
763 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
764 authinfo.add_password (None, uri, self.username, self.password)
765 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
767 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
768 transport = urllib2.build_opener(authinfo)
769 f = transport.open(self.url)
770 if self.verbose: print f.read()
773 transport = urllib2.build_opener(authhandler)
774 f = transport.open(self.url + "cmd.html", "P%d=r" % node_port)
775 if self.verbose: print f.read()
780 class ePowerSwitch(PCUControl):
781 def run(self, node_port, dryrun):
782 self.url = "http://%s:%d/" % (self.host,80)
783 uri = "%s:%d" % (self.host,80)
785 # TODO: I'm still not sure what the deal is here.
786 # two independent calls appear to need to be made before the
787 # reboot will succeed. It doesn't seem to be possible to do
788 # this with a single call. I have no idea why.
791 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
792 authinfo.add_password (None, uri, self.username, self.password)
793 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
795 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
796 transport = urllib2.build_opener()
797 f = transport.open(self.url + "elogin.html", "pwd=%s" % self.password)
798 if self.verbose: print f.read()
801 transport = urllib2.build_opener(authhandler)
802 f = transport.open(self.url + "econtrol.html", "P%d=r" % node_port)
803 if self.verbose: print f.read()
805 # data= "P%d=r" % node_port
806 #self.open(self.host, self.username, self.password)
807 #self.sendHTTP("elogin.html", "pwd=%s" % self.password)
808 #self.sendHTTP("econtrol.html", data)
809 #self.sendHTTP("cmd.html", data)
815 ### rebooting european BlackBox PSE boxes
816 # Thierry Parmentelat - May 11 2005
817 # tested on 4-ports models known as PSE505-FR
818 # uses http to POST a data 'P<port>=r'
819 # relies on basic authentication within http1.0
820 # first curl-based script was
821 # curl --http1.0 --basic --user <username>:<password> --data P<port>=r \
822 # http://<hostname>:<http_port>/cmd.html && echo OK
824 def bbpse_reboot (pcu_ip,username,password,port_in_pcu,http_port, dryrun):
828 url = "http://%s:%d/cmd.html" % (pcu_ip,http_port)
829 data= "P%d=r" % port_in_pcu
831 logger.debug("POSTing '%s' on %s" % (data,url))
833 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
834 uri = "%s:%d" % (pcu_ip,http_port)
835 authinfo.add_password (None, uri, username, password)
836 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
838 opener = urllib2.build_opener(authhandler)
839 urllib2.install_opener(opener)
845 f = urllib2.urlopen(url,data)
852 except urllib2.URLError,err:
853 logger.info('Could not open http connection', err)
856 ### rebooting x10toggle based systems addressed by port
857 # Marc E. Fiuczynski - May 31 2005
858 # tested on 4-ports models known as PSE505-FR
859 # uses ssh and password to login to an account
860 # that will cause the system to be powercycled.
862 def x10toggle_reboot(ip, username, password, port, dryrun):
867 ssh = pyssh.Ssh(username, ip)
871 telnet_answer(ssh, "password:", password)
875 telnet_answer(ssh, "x10toggle>", "A%d" % port)
883 except Exception, err:
890 return errno.ETIMEDOUT
892 ### rebooting Dell systems via RAC card
893 # Marc E. Fiuczynski - June 01 2005
894 # tested with David Lowenthal's itchy/scratchy nodes at UGA
897 def runcmd(command, args, username, password, timeout = None):
900 result_ready = threading.Condition()
904 result_ready.acquire()
908 result_ready.notify()
909 result_ready.release()
911 def do_command(command, username, password):
914 # Popen4 is a popen-type class that combines stdout and stderr
915 p = popen2.Popen4(command)
917 # read all output data
918 p.tochild.write("%s\n" % username)
919 p.tochild.write("%s\n" % password)
921 data = p.fromchild.read()
924 # might get interrupted by a signal in poll() or waitpid()
927 set_result((retval, data))
930 if ex.errno == errno.EINTR:
933 except Exception, ex:
937 command = " ".join([command] + args)
939 worker = threading.Thread(target = do_command, args = (command, username, password, ))
940 worker.setDaemon(True)
941 result_ready.acquire()
943 result_ready.wait(timeout)
946 raise Exception, "command timed-out: '%s'" % command
948 result_ready.release()
951 if isinstance(result, Exception):
954 (retval, data) = result
955 if os.WIFEXITED(retval) and os.WEXITSTATUS(retval) == 0:
958 out = "system command ('%s') " % command
959 if os.WIFEXITED(retval):
960 out += "failed, rc = %d" % os.WEXITSTATUS(retval)
962 out += "killed by signal %d" % os.WTERMSIG(retval)
964 out += "; output follows:\n" + data
967 def racadm_reboot(ip, username, password, port, dryrun):
971 cmd = "/usr/sbin/racadm"
974 output = runcmd(cmd, ["-r %s -i serveraction powercycle" % ip],
977 output = runcmd(cmd, ["-r %s -i getsysinfo" % ip],
980 print "RUNCMD: %s" % output
985 except Exception, err:
986 logger.debug("runcmd raised exception %s" % err)
992 if pcu['hostname'] is not None and pcu['hostname'] is not "":
993 return pcu['hostname']
994 elif pcu['ip'] is not None and pcu['ip'] is not "":
999 def get_pcu_values(pcu_id):
1000 # TODO: obviously, this shouldn't be loaded each time...
1002 fb =soltesz.dbLoad("findbadpcus")
1005 values = fb['nodes']["id_%s" % pcu_id]['values']
1011 def check_open_port(values, port_list):
1014 if 'portstatus' in values:
1015 for port in port_list:
1016 if port in values['portstatus'] and \
1017 values['portstatus'][port] == "open":
1023 def reboot_policy(nodename, continue_probe, dryrun):
1026 pcu = plc.getpcu(nodename)
1028 return False # "%s has no pcu" % nodename
1030 values = get_pcu_values(pcu['pcu_id'])
1032 return False #"no info for pcu_id %s" % pcu['pcu_id']
1035 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
1037 ret = reboot_test(nodename, values, continue_probe, verbose, dryrun)
1044 def reboot_test(nodename, values, continue_probe, verbose, dryrun):
1048 # DataProbe iPal (many sites)
1049 if continue_probe and values['model'].find("Dataprobe IP-41x/IP-81x") >= 0:
1050 ipal = IPAL(values, verbose, ['23'])
1051 rb_ret = ipal.reboot(values[nodename], dryrun)
1053 # APC Masterswitch (Berkeley)
1054 elif continue_probe and values['model'].find("APC AP79xx/Masterswitch") >= 0:
1056 # TODO: make a more robust version of APC
1057 if values['pcu_id'] in [1163,1055,1111,1231,1113,1127,1128,1148]:
1058 apc = APCEurope(values, verbose, ['22', '23'])
1059 rb_ret = apc.reboot(values[nodename], dryrun)
1061 elif values['pcu_id'] in [1110,86]:
1062 apc = APCBrazil(values, verbose, ['22', '23'])
1063 rb_ret = apc.reboot(values[nodename], dryrun)
1065 elif values['pcu_id'] in [1221]:
1066 apc = APCBerlin(values, verbose, ['22', '23'])
1067 rb_ret = apc.reboot(values[nodename], dryrun)
1069 elif values['pcu_id'] in [1173,1221,1220,1225]:
1070 apc = APCFolsom(values, verbose, ['22', '23'])
1071 rb_ret = apc.reboot(values[nodename], dryrun)
1074 apc = APCMaster(values, verbose, ['22', '23'])
1075 rb_ret = apc.reboot(values[nodename], dryrun)
1078 elif continue_probe and values['model'].find("Baytech DS4-RPC") >= 0:
1079 if values['pcu_id'] in [1041,1209,1025,1052,1057]:
1080 # These require a 'ctrl-c' to be sent...
1081 baytech = BayTechCtrlC(values, verbose, ['22', '23'])
1082 rb_ret = baytech.reboot(values[nodename], dryrun)
1084 elif values['pcu_id'] in [1012]:
1085 # This pcu sometimes doesn't present the 'Username' prompt,
1086 # unless you immediately try again...
1088 baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1089 rb_ret = baytech.reboot(values[nodename], dryrun)
1091 baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1092 rb_ret = baytech.reboot(values[nodename], dryrun)
1094 baytech = BayTech(values, verbose, ['22', '23'])
1095 rb_ret = baytech.reboot(values[nodename], dryrun)
1098 elif continue_probe and values['model'].find("HP iLO") >= 0:
1100 hpilo = HPiLO(values, verbose, ['22'])
1101 rb_ret = hpilo.reboot(0, dryrun)
1103 hpilo = HPiLOHttps(values, verbose, ['443'])
1104 rb_ret = hpilo.reboot(0, dryrun)
1106 hpilo = HPiLOHttps(values, verbose, ['443'])
1107 rb_ret = hpilo.reboot(0, dryrun)
1110 elif continue_probe and values['model'].find("Dell RAC") >= 0:
1111 # TODO: I don't think DRACRacAdm will throw an exception for the
1112 # default method to catch...
1114 drac = DRACRacAdm(values, verbose, ['443', '5869'])
1115 rb_ret = drac.reboot(0, dryrun)
1117 drac = DRAC(values, verbose, ['22'])
1118 rb_ret = drac.reboot(0, dryrun)
1120 # BlackBox PSExxx-xx (e.g. PSE505-FR)
1121 elif continue_probe and \
1122 (values['model'].find("BlackBox PS5xx") >= 0 or
1123 values['model'].find("ePowerSwitch 1/4/8x") >=0 ):
1125 # TODO: allow a different port than http 80.
1126 if values['pcu_id'] in [1089, 1071, 1046, 1035, 1118]:
1127 eps = ePowerSwitchGood(values, verbose, ['80'])
1128 elif values['pcu_id'] in [1003]:
1129 eps = ePowerSwitch(values, verbose, ['80'])
1131 eps = ePowerSwitchGood(values, verbose, ['80'])
1133 rb_ret = eps.reboot(values[nodename], dryrun)
1135 elif continue_probe:
1136 rb_ret = "Unsupported_PCU"
1138 elif continue_probe == False:
1139 if 'portstatus' in values:
1146 except ExceptionPort, err:
1151 #elif continue_probe and values['protocol'] == "racadm" and \
1152 # values['model'] == "RAC":
1153 # rb_ret = racadm_reboot(pcu_name(values),
1154 # values['username'],
1155 # values['password'],
1159 # Returns true if rebooted via PCU
1160 def reboot_old(nodename, dryrun):
1161 pcu = plc.getpcu(nodename)
1163 plc.nodePOD(nodename)
1166 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
1168 # APC Masterswitch (Berkeley)
1169 if pcu['model'] == "APC Masterswitch":
1170 err = apc_reboot(pcu['ip'], pcu['username'],pcu['password'],
1171 pcu[nodename], pcu['protocol'], dryrun)
1173 # DataProbe iPal (many sites)
1174 elif pcu['protocol'] == "telnet" and pcu['model'].find("IP-4") >= 0:
1175 err = ipal_reboot(pcu['ip'],pcu['password'], pcu[nodename], dryrun)
1178 elif pcu['protocol'] == "ssh" and \
1179 (pcu['model'].find("Baytech") >= 0 or pcu['model'].find("DS4") >= 0):
1180 err = baytech_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu[nodename], dryrun)
1182 # BlackBox PSExxx-xx (e.g. PSE505-FR)
1183 elif pcu['protocol'] == "http" and (pcu['model'] == "bbpse"):
1184 err = bbpse_reboot(pcu['ip'], pcu['username'], pcu['password'], pcu[nodename],80, dryrun)
1187 elif pcu['protocol'] == "ssh" and (pcu['model'] == "x10toggle"):
1188 err = x10toggle_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu[nodename], dryrun)
1191 elif pcu['protocol'] == "racadm" and (pcu['model'] == "RAC"):
1192 err = racadm_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu_[nodename], dryrun)
1194 # Unknown or unsupported
1196 err = errno.EPROTONOSUPPORT
1202 logger.setLevel(logging.DEBUG)
1203 ch = logging.StreamHandler()
1204 ch.setLevel(logging.DEBUG)
1205 formatter = logging.Formatter('LOGGER - %(message)s')
1206 ch.setFormatter(formatter)
1207 logger.addHandler(ch)
1210 reboot("planetlab2.cs.uchicago.edu")
1211 reboot("alice.cs.princeton.edu")
1212 except Exception, err:
1215 if __name__ == '__main__':
1217 logger = logging.getLogger("monitor")