3 # Reboot specified nodes
9 import errno, time, traceback
12 import threading, popen2
14 from monitor.wrapper import plc
16 from subprocess import PIPE, Popen
17 import pcucontrol.transports.ssh.pxssh as pxssh
18 import pcucontrol.transports.ssh.pexpect as pexpect
20 from monitor.util import command
22 # Use our versions of telnetlib and pyssh
23 sys.path.insert(0, os.path.dirname(sys.argv[0]))
24 import pcucontrol.transports.telnetlib as telnetlib
25 sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")
26 import pcucontrol.transports.pyssh as pyssh
27 from monitor import config
32 # Event class ID from pcu events
33 #NODE_POWER_CONTROL = 3
36 #MONITOR_USER_ID = 11142
39 logger = logging.getLogger("monitor")
43 class ExceptionNoTransport(Exception): pass
44 class ExceptionNotFound(Exception): pass
45 class ExceptionPassword(Exception): pass
46 class ExceptionTimeout(Exception): pass
47 class ExceptionPrompt(Exception): pass
48 class ExceptionSequence(Exception): pass
49 class ExceptionReset(Exception): pass
50 class ExceptionPort(Exception): pass
51 class ExceptionUsername(Exception): pass
53 def telnet_answer(telnet, expected, buffer):
56 output = telnet.read_until(expected, TELNET_TIMEOUT)
58 # logger.debug(output)
59 if output.find(expected) == -1:
60 raise ExceptionNotFound, "'%s' not found" % expected
62 telnet.write(buffer + "\r\n")
65 # PCU has model, host, preferred-port, user, passwd,
67 # This is an object derived directly form the PLCAPI DB fields
69 def __init__(self, plc_pcu_dict):
70 for field in ['username', 'password', 'site_id',
73 'node_ids', 'ports', ]:
74 if field in plc_pcu_dict:
75 self.__setattr__(field, plc_pcu_dict[field])
77 raise Exception("No such field %s in PCU object" % field)
79 # These are the convenience functions build around the PCU object.
81 def __init__(self, plc_pcu_dict):
82 PCU.__init__(self, plc_pcu_dict)
83 self.host = self.pcu_name()
86 if self.hostname is not None and self.hostname is not "":
88 elif self.ip is not None and self.ip is not "":
93 def nodeidToPort(self, node_id):
94 if node_id in self.node_ids:
95 for i in range(0, len(self.node_ids)):
96 if node_id == self.node_ids[i]:
99 raise Exception("No such Node ID: %d" % node_id)
101 # This class captures the observed pcu records from FindBadPCUs.py
103 def __init__(self, pcu_record_dict):
104 for field in ['nodenames', 'portstatus',
107 if field in pcu_record_dict:
108 if field == "reboot":
109 self.__setattr__("reboot_str", pcu_record_dict[field])
111 self.__setattr__(field, pcu_record_dict[field])
113 raise Exception("No such field %s in pcu record dict" % field)
123 def __init__(self, type, verbose):
125 self.verbose = verbose
126 self.transport = None
128 def open(self, host, username=None, password=None, prompt="User Name"):
131 if self.type == self.TELNET:
132 transport = telnetlib.Telnet(host, timeout=self.TELNET_TIMEOUT)
133 transport.set_debuglevel(self.verbose)
134 if username is not None:
135 self.transport = transport
136 self.ifThenSend(prompt, username, ExceptionUsername)
138 elif self.type == self.SSH:
139 if username is not None:
140 transport = pyssh.Ssh(username, host)
141 transport.set_debuglevel(self.verbose)
143 # TODO: have an ssh set_debuglevel() also...
145 raise Exception("Username cannot be None for ssh transport.")
146 elif self.type == self.HTTP:
147 self.url = "http://%s:%d/" % (host,80)
148 uri = "%s:%d" % (host,80)
151 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
152 authinfo.add_password (None, uri, username, password)
153 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
155 transport = urllib2.build_opener(authhandler)
158 raise Exception("Unknown transport type: %s" % self.type)
160 self.transport = transport
164 if self.type == self.TELNET:
165 self.transport.close()
166 elif self.type == self.SSH:
167 self.transport.close()
168 elif self.type == self.HTTP:
171 raise Exception("Unknown transport type %s" % self.type)
172 self.transport = None
174 def sendHTTP(self, resource, data):
176 print "POSTing '%s' to %s" % (data,self.url + resource)
179 f = self.transport.open(self.url + resource ,data)
184 except urllib2.URLError,err:
185 logger.info('Could not open http connection', err)
186 return "http transport error"
190 def sendPassword(self, password, prompt=None):
191 if self.type == self.TELNET:
193 self.ifThenSend("Password", password, ExceptionPassword)
195 self.ifThenSend(prompt, password, ExceptionPassword)
196 elif self.type == self.SSH:
197 self.ifThenSend("password:", password, ExceptionPassword)
198 elif self.type == self.HTTP:
201 raise Exception("Unknown transport type: %s" % self.type)
203 def ifThenSend(self, expected, buffer, ErrorClass=ExceptionPrompt):
205 if self.transport != None:
206 output = self.transport.read_until(expected, self.TELNET_TIMEOUT)
207 if output.find(expected) == -1:
208 print "OUTPUT: --%s--" % output
209 raise ErrorClass, "'%s' not found" % expected
211 self.transport.write(buffer + "\r\n")
213 raise ExceptionNoTransport("transport object is type None")
215 def ifElse(self, expected, ErrorClass):
217 self.transport.read_until(expected, self.TELNET_TIMEOUT)
219 raise ErrorClass("Could not find '%s' within timeout" % expected)
222 class PCUControl(Transport,PCUModel,PCURecord):
226 def __init__(self, plc_pcu_record, verbose, supported_ports=[]):
227 PCUModel.__init__(self, plc_pcu_record)
228 PCURecord.__init__(self, plc_pcu_record)
231 if '22' in supported_ports and self.portstatus['22'] == "open":
233 elif '23' in supported_ports and self.portstatus['23'] == "open":
234 type = Transport.TELNET
235 elif '80' in supported_ports and self.portstatus['80'] == "open":
236 type = Transport.HTTP
237 elif '443' in supported_ports and self.portstatus['443'] == "open":
238 type = Transport.HTTP
239 elif '5869' in supported_ports and self.portstatus['5869'] == "open":
240 # For DRAC cards. Racadm opens this port.
241 type = Transport.HTTP
242 elif '9100' in supported_ports and self.portstatus['9100'] == "open":
243 type = Transport.IPAL
244 elif '16992' in supported_ports and self.portstatus['16992'] == "open":
245 type = Transport.HTTP
247 raise ExceptionPort("Unsupported Port: No transport from open ports")
249 raise Exception("No Portstatus: No transport because no open ports")
250 Transport.__init__(self, type, verbose)
252 def run(self, node_port, dryrun):
253 """ This function is to be defined by the specific PCU instance. """
256 def reboot(self, node_port, dryrun):
258 return self.run(node_port, dryrun)
259 except ExceptionNotFound, err:
260 return "error: " + str(err)
261 except ExceptionPassword, err:
262 return "password exception: " + str(err)
263 except ExceptionTimeout, err:
264 return "timeout exception: " + str(err)
265 except ExceptionUsername, err:
266 return "exception: no username prompt: " + str(err)
267 except ExceptionSequence, err:
268 return "sequence error: " + str(err)
269 except ExceptionPrompt, err:
270 return "prompt exception: " + str(err)
271 except ExceptionPort, err:
272 return "no ports exception: " + str(err)
273 except socket.error, err:
274 return "socket error: timeout: " + str(err)
275 except EOFError, err:
277 logger.debug("reboot: EOF")
279 self.transport.close()
281 traceback.print_exc()
282 return "EOF connection reset" + str(err)
284 class IPMI(PCUControl):
286 supported_ports = [80,443,623]
288 # TODO: get exit codes to determine success or failure...
289 def run(self, node_port, dryrun):
292 cmd = "ipmitool -I lanplus -H %s -U %s -P '%s' power cycle"
293 p = os.popen(cmd % ( self.host, self.username, self.password) )
295 print "RESULT: ", result
297 cmd = "ipmitool -I lanplus -H %s -U %s -P '%s' user list"
298 p = os.popen(cmd % ( self.host, self.username, self.password) )
300 print "RESULT: ", result
302 if "Error" in result:
307 class IPAL(PCUControl):
309 This now uses a proprietary format for communicating with the PCU. I
310 prefer it to Telnet, and Web access, since it's much lighter weight
311 and, more importantly, IT WORKS!! HHAHHHAHAHAHAHAHA!
313 supported_ports = [23,80,9100]
315 def format_msg(self, data, cmd):
316 esc = chr(int('1b',16))
317 return "%c%s%c%s%c" % (esc, self.password, esc, data, cmd) # esc, 'q', chr(4))
319 def recv_noblock(self, s, count):
323 # TODO: make sleep backoff, before stopping.
325 ret = s.recv(count, socket.MSG_DONTWAIT)
326 except socket.error, e:
327 if e[0] == errno.EAGAIN:
328 raise Exception(e[1])
330 # TODO: not other exceptions.
334 def run(self, node_port, dryrun):
335 if self.type == Transport.IPAL:
336 return self.run_ipal(node_port, dryrun)
337 elif self.type == Transport.TELNET:
338 return self.run_telnet(node_port, dryrun)
340 raise Exception("Unimplemented Transport for IPAL")
342 def run_telnet(self, node_port, dryrun):
343 # TELNET version of protocol...
345 ## XXX Some iPals require you to hit Enter a few times first
346 self.ifThenSend("Password >", "\r\n\r\n", ExceptionNotFound)
348 self.ifThenSend("Password >", self.password, ExceptionPassword)
349 self.transport.write("\r\n\r\n")
350 if not dryrun: # P# - Pulse relay
351 print "node_port %s" % node_port
352 self.ifThenSend("Enter >",
355 print "send newlines"
356 self.transport.write("\r\n\r\n")
357 print "after new lines"
358 # Get the next prompt
359 print "wait for enter"
360 self.ifElse("Enter >", ExceptionTimeout)
365 def run_ipal(self, node_port, dryrun):
371 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
374 s.connect((self.host, 9100))
375 except socket.error, e:
377 if e[0] == errno.ECONNREFUSED:
378 # cannot connect to remote host
379 raise Exception(e[1])
381 # TODO: what other conditions are there?
385 print "Checking status"
386 s.send(self.format_msg("", 'O'))
387 ret = self.recv_noblock(s, 8)
388 print "Current status is '%s'" % ret
391 raise Exception("Status returned 'another session already open' %s : %s" % (node_port, ret))
393 if node_port < len(ret):
394 status = ret[node_port]
402 raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret))
404 raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret))
409 print "Pulsing %s" % node_port
410 s.send(self.format_msg("%s" % node_port, 'P'))
412 # NOTE: turn power on ; do not pulse the port.
413 print "Power was off, so turning on ..."
414 s.send(self.format_msg("%s" % node_port, 'E'))
415 #s.send(self.format_msg("%s" % node_port, 'P'))
417 print "Receiving response."
418 ret = self.recv_noblock(s, 8)
419 print "Current status is '%s'" % ret
421 if node_port < len(ret):
422 status = ret[node_port]
430 raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret))
432 raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret))
437 return "Failed Power On"
443 class APCEurope(PCUControl):
444 def run(self, node_port, dryrun):
445 self.open(self.host, self.username)
446 self.sendPassword(self.password)
448 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
449 self.ifThenSend("\r\n> ", "2")
450 self.ifThenSend("\r\n> ", str(node_port))
451 # 3- Immediate Reboot
452 self.ifThenSend("\r\n> ", "3")
455 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
459 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
460 "", ExceptionSequence)
461 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
466 class APCBrazil(PCUControl):
467 def run(self, node_port, dryrun):
468 self.open(self.host, self.username)
469 self.sendPassword(self.password)
471 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
472 self.ifThenSend("\r\n> ", str(node_port))
473 # 4- Immediate Reboot
474 self.ifThenSend("\r\n> ", "4")
477 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
481 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
482 "", ExceptionSequence)
483 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
488 class APCBerlin(PCUControl):
489 def run(self, node_port, dryrun):
490 self.open(self.host, self.username)
491 self.sendPassword(self.password)
493 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
494 self.ifThenSend("\r\n> ", "2")
495 self.ifThenSend("\r\n> ", "1")
496 self.ifThenSend("\r\n> ", str(node_port))
497 # 3- Immediate Reboot
498 self.ifThenSend("\r\n> ", "3")
501 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
505 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
506 "", ExceptionSequence)
507 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
512 class APCFolsom(PCUControl):
513 def run(self, node_port, dryrun):
514 self.open(self.host, self.username)
515 self.sendPassword(self.password)
517 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
518 self.ifThenSend("\r\n> ", "2")
519 self.ifThenSend("\r\n> ", "1")
520 self.ifThenSend("\r\n> ", str(node_port))
521 self.ifThenSend("\r\n> ", "1")
523 # 3- Immediate Reboot
524 self.ifThenSend("\r\n> ", "3")
527 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
531 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
532 "", ExceptionSequence)
533 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
538 class APCMaster(PCUControl):
539 supported_ports = [22,23]
540 def run(self, node_port, dryrun):
541 print "Rebooting %s" % self.host
542 self.open(self.host, self.username)
543 self.sendPassword(self.password)
546 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
547 # 3- Outlet Control/Config
548 self.ifThenSend("\r\n> ", "3")
550 self.ifThenSend("\r\n> ", str(node_port))
552 self.ifThenSend("\r\n> ", "1")
553 # 3- Immediate Reboot
554 self.ifThenSend("\r\n> ", "3")
557 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
561 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
562 "", ExceptionSequence)
563 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
568 class APC(PCUControl):
569 def __init__(self, plc_pcu_record, verbose):
570 PCUControl.__init__(self, plc_pcu_record, verbose)
572 self.master = APCMaster(plc_pcu_record, verbose)
573 self.folsom = APCFolsom(plc_pcu_record, verbose)
574 self.europe = APCEurope(plc_pcu_record, verbose)
576 def run(self, node_port, dryrun):
580 for pcu in [self.master, self.europe, self.folsom]:
583 print "-*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"
586 time.sleep(sleep_time)
587 ret = pcu.reboot(node_port, dryrun)
588 except ExceptionSequence, err:
594 return "Unknown reboot sequence for APC PCU"
598 class IntelAMT(PCUControl):
599 supported_ports = [16992]
601 def run(self, node_port, dryrun):
604 # TODO: need to make this path universal; not relative to pwd.
605 cmd_str = config.MONITOR_SCRIPT_ROOT + "/pcucontrol/models/intelamt/remoteControl"
608 # NOTE: -p checks the power state of the host.
609 # TODO: parse the output to find out if it's ok or not.
610 cmd_str += " -p http://%s:16992/RemoteControlService -user admin -pass '%s' " % (self.host, self.password )
612 cmd_str += " -A http://%s:16992/RemoteControlService -user admin -pass '%s' " % (self.host, self.password )
615 return cmd.system(cmd_str, self.TELNET_TIMEOUT)
617 class DRACRacAdm(PCUControl):
618 def run(self, node_port, dryrun):
620 print "trying racadm_reboot..."
621 racadm_reboot(self.host, self.username, self.password, node_port, dryrun)
625 class DRAC(PCUControl):
626 supported_ports = [22,443,5869]
627 def run(self, node_port, dryrun):
628 self.open(self.host, self.username)
629 self.sendPassword(self.password)
631 print "logging in..."
632 self.transport.write("\r\n")
635 self.ifThenSend("[%s]#" % self.username, "getsysinfo")
638 self.ifThenSend("[%s]#" % self.username, "serveraction powercycle")
640 self.ifThenSend("[%s]#" % self.username, "exit")
645 class HPiLO(PCUControl):
646 supported_ports = [22,443]
647 def run(self, node_port, dryrun):
648 self.open(self.host, self.username)
649 self.sendPassword(self.password)
652 self.ifThenSend("</>hpiLO->", "cd system1")
654 # Reboot Outlet N (Y/N)?
656 self.ifThenSend("</system1>hpiLO->", "POWER")
659 self.ifThenSend("</system1>hpiLO->", "reset")
661 self.ifThenSend("</system1>hpiLO->", "exit")
667 class HPiLOHttps(PCUControl):
668 supported_ports = [22,443]
669 def run(self, node_port, dryrun):
671 locfg = command.CMD()
673 cmd_str = config.MONITOR_SCRIPT_ROOT + "/pcucontrol/models/hpilo/"
675 cmd = cmd_str + "locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
676 self.host, cmd_str+"iloxml/Get_Network.xml",
677 self.username, self.password)
678 sout, serr = locfg.run_noexcept(cmd)
680 if sout.strip() != "" or serr.strip() != "":
681 print "sout: %s" % sout.strip()
682 return sout.strip() + serr.strip()
685 locfg = command.CMD()
686 cmd = cmd_str + "locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
687 self.host, cmd_str+"iloxml/Reset_Server.xml",
688 self.username, self.password)
689 sout, serr = locfg.run_noexcept(cmd)
691 if sout.strip() != "":
692 print "sout: %s" % sout.strip()
697 class BayTechAU(PCUControl):
698 def run(self, node_port, dryrun):
699 self.open(self.host, self.username, None, "Enter user name:")
700 self.sendPassword(self.password, "Enter Password:")
702 #self.ifThenSend("RPC-16>", "Status")
703 self.ifThenSend("RPC3-NC>", "Reboot %d" % node_port)
705 # Reboot Outlet N (Y/N)?
707 self.ifThenSend("(Y/N)?", "N")
709 self.ifThenSend("(Y/N)?", "Y")
710 self.ifThenSend("RPC3-NC>", "")
715 class BayTechGeorgeTown(PCUControl):
716 def run(self, node_port, dryrun):
717 self.open(self.host, self.username, None, "Enter user name:")
718 self.sendPassword(self.password, "Enter Password:")
720 #self.ifThenSend("RPC-16>", "Status")
722 self.ifThenSend("RPC-16>", "Reboot %d" % node_port)
724 # Reboot Outlet N (Y/N)?
726 self.ifThenSend("(Y/N)?", "N")
728 self.ifThenSend("(Y/N)?", "Y")
729 self.ifThenSend("RPC-16>", "")
734 class BayTechCtrlCUnibe(PCUControl):
736 For some reason, these units let you log in fine, but they hang
737 indefinitely, unless you send a Ctrl-C after the password. No idea
740 def run(self, node_port, dryrun):
741 print "BayTechCtrlC %s" % self.host
743 ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
745 if not s.login(self.host, self.username, self.password, ssh_options):
746 raise ExceptionPassword("Invalid Password")
747 # Otherwise, the login succeeded.
749 # Send a ctrl-c to the remote process.
750 print "sending ctrl-c"
753 # Control Outlets (5 ,1).........5
755 #index = s.expect("Enter Request")
756 index = s.expect(["Enter Request :"])
761 index = s.expect(["DS-RPC>", "Enter user name:"])
763 s.send(self.username + "\r\n")
764 index = s.expect(["DS-RPC>"])
767 print "Reboot %d" % node_port
768 s.send("Reboot %d\r\n" % node_port)
771 index = s.expect(["\(Y/N\)\?", "Port in use", "DS-RPC>"])
780 raise ExceptionPrompt("PCU Reported 'Port in use.'")
782 raise ExceptionSequence("Issued command 'Reboot' failed.")
785 index = s.expect(["DS-RPC>"])
786 #print "got prompt back"
791 raise ExceptionPrompt("EOF before expected Prompt")
792 except pexpect.TIMEOUT:
793 raise ExceptionPrompt("Timeout before expected Prompt")
797 class BayTechCtrlC(PCUControl):
799 For some reason, these units let you log in fine, but they hang
800 indefinitely, unless you send a Ctrl-C after the password. No idea
803 def run(self, node_port, dryrun):
804 print "BayTechCtrlC %s" % self.host
806 ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
808 if not s.login(self.host, self.username, self.password, ssh_options):
809 raise ExceptionPassword("Invalid Password")
810 # Otherwise, the login succeeded.
812 # Send a ctrl-c to the remote process.
813 print "SENDING ctrl-c"
816 # Control Outlets (5 ,1).........5
818 print "EXPECTING: ", "Enter Request :"
819 index = s.expect(["Enter Request :"])
824 print "EXPECTING: ", "DS-RPC>"
825 index = s.expect(["DS-RPC>", "Enter user name:", "Port in use."])
827 print "sending username"
828 s.send(self.username + "\r\n")
829 index = s.expect(["DS-RPC>"])
831 raise ExceptionPrompt("PCU Reported 'Port in use.'")
834 print "SENDING: Reboot %d" % node_port
835 s.send("Reboot %d\r\n" % node_port)
839 print "EXPECTING: ", "Y/N?"
840 index = s.expect(["\(Y/N\)\?", "Port in use", "DS-RPC>"])
849 raise ExceptionPrompt("PCU Reported 'Port in use.'")
851 raise ExceptionSequence("Issued command 'Reboot' failed.")
853 # NOTE: for some reason, the script times out with the
854 # following line. In manual tests, it works correctly, but
855 # with automated tests, evidently it fails.
858 #print "TOTAL--", s.allstr, "--EOT"
859 index = s.expect(["DS-RPC>"])
860 print "got prompt back"
865 raise ExceptionPrompt("EOF before 'Enter Request' Prompt")
866 except pexpect.TIMEOUT:
867 raise ExceptionPrompt("Timeout before Prompt")
871 class BayTech(PCUControl):
872 supported_ports = [22,23]
873 def run(self, node_port, dryrun):
874 self.open(self.host, self.username)
875 self.sendPassword(self.password)
877 # Control Outlets (5 ,1).........5
878 self.ifThenSend("Enter Request :", "5")
882 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port, ExceptionNotFound)
883 except ExceptionNotFound, msg:
884 # one machine is configured to ask for a username,
885 # even after login...
886 print "msg: %s" % msg
887 self.transport.write(self.username + "\r\n")
889 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
891 # Reboot Outlet N (Y/N)?
893 self.ifThenSend("(Y/N)?", "N")
895 self.ifThenSend("(Y/N)?", "Y")
897 self.ifThenSend("DS-RPC>", "")
902 class WTIIPS4(PCUControl):
903 supported_ports = [23]
904 def run(self, node_port, dryrun):
906 self.sendPassword(self.password, "Enter Password:")
908 self.ifThenSend("IPS> ", "/Boot %s" % node_port)
910 self.ifThenSend("Sure? (Y/N): ", "N")
912 self.ifThenSend("Sure? (Y/N): ", "Y")
914 self.ifThenSend("IPS> ", "")
919 class ePowerSwitchGood(PCUControl):
921 # The old code used Python's HTTPPasswordMgrWithDefaultRealm()
922 # For some reason this both doesn't work and in some cases, actually
923 # hangs the PCU. Definitely not what we want.
925 # The code below is much simpler. Just letting things fail first,
926 # and then, trying again with authentication string in the header.
928 def run(self, node_port, dryrun):
929 self.transport = None
930 self.url = "http://%s:%d/" % (self.host,80)
931 uri = "%s:%d" % (self.host,80)
933 req = urllib2.Request(self.url)
935 handle = urllib2.urlopen(req)
937 # NOTE: this is expected to fail initially
944 return "ERROR: not protected by HTTP authentication"
946 if not hasattr(e, 'code') or e.code != 401:
947 return "ERROR: failed for: %s" % str(e)
949 base64data = base64.encodestring("%s:%s" % (self.username, self.password))[:-1]
950 # NOTE: assuming basic realm authentication.
951 authheader = "Basic %s" % base64data
952 req.add_header("Authorization", authheader)
955 f = urllib2.urlopen(req)
957 # failing here means the User/passwd is wrong (hopefully)
958 raise ExceptionPassword("Incorrect username/password")
960 # NOTE: after verifying that the user/password is correct,
961 # actually reboot the given node.
964 data = urllib.urlencode({'P%d' % node_port : "r"})
965 req = urllib2.Request(self.url + "cmd.html")
966 req.add_header("Authorization", authheader)
967 # add data to handler,
968 f = urllib2.urlopen(req, data)
969 if self.verbose: print f.read()
971 import traceback; traceback.print_exc()
973 # fetch url one more time on cmd.html, econtrol.html or whatever.
976 if self.verbose: print f.read()
981 class CustomPCU(PCUControl):
982 def run(self, node_port, dryrun):
983 url = "https://www-itec.uni-klu.ac.at/plab-pcu/index.php"
986 # Turn host off, then on
987 formstr = "plab%s=off" % node_port
988 os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url))
990 formstr = "plab%s=on" % node_port
991 os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url))
993 os.system("curl --user %s:%s --insecure %s" % (self.username, self.password, url))
996 class ePowerSwitchOld(PCUControl):
997 def run(self, node_port, dryrun):
998 self.url = "http://%s:%d/" % (self.host,80)
999 uri = "%s:%d" % (self.host,80)
1002 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
1003 authinfo.add_password (None, uri, self.username, self.password)
1004 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
1006 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
1007 transport = urllib2.build_opener(authinfo)
1008 f = transport.open(self.url)
1009 if self.verbose: print f.read()
1012 transport = urllib2.build_opener(authhandler)
1013 f = transport.open(self.url + "cmd.html", "P%d=r" % node_port)
1014 if self.verbose: print f.read()
1019 class ePowerSwitch(PCUControl):
1020 supported_ports = [80]
1021 def run(self, node_port, dryrun):
1022 self.url = "http://%s:%d/" % (self.host,80)
1023 uri = "%s:%d" % (self.host,80)
1025 # TODO: I'm still not sure what the deal is here.
1026 # two independent calls appear to need to be made before the
1027 # reboot will succeed. It doesn't seem to be possible to do
1028 # this with a single call. I have no idea why.
1031 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
1032 authinfo.add_password (None, uri, self.username, self.password)
1033 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
1035 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
1036 transport = urllib2.build_opener()
1037 f = transport.open(self.url + "elogin.html", "pwd=%s" % self.password)
1038 if self.verbose: print f.read()
1041 transport = urllib2.build_opener(authhandler)
1042 f = transport.open(self.url + "econtrol.html", "P%d=r" % node_port)
1043 if self.verbose: print f.read()
1045 # data= "P%d=r" % node_port
1046 #self.open(self.host, self.username, self.password)
1047 #self.sendHTTP("elogin.html", "pwd=%s" % self.password)
1048 #self.sendHTTP("econtrol.html", data)
1049 #self.sendHTTP("cmd.html", data)
1054 class ManualPCU(PCUControl):
1055 supported_ports = [22,23,80,443,9100,16992]
1057 def run(self, node_port, dryrun):
1059 # TODO: send email message to monitor admin requesting manual
1060 # intervention. This should always be an option for ridiculous,
1065 ### rebooting european BlackBox PSE boxes
1066 # Thierry Parmentelat - May 11 2005
1067 # tested on 4-ports models known as PSE505-FR
1068 # uses http to POST a data 'P<port>=r'
1069 # relies on basic authentication within http1.0
1070 # first curl-based script was
1071 # curl --http1.0 --basic --user <username>:<password> --data P<port>=r \
1072 # http://<hostname>:<http_port>/cmd.html && echo OK
1077 class BlackBoxPSMaverick(PCUControl):
1078 supported_ports = [80]
1080 def run(self, node_port, dryrun):
1082 # send reboot signal.
1083 cmd = "curl -s --data 'P%s=r' --anyauth --user '%s:%s' http://%s/config/home_f.html" % ( node_port, self.username, self.password, self.host)
1085 # else, just try to log in
1086 cmd = "curl -s --anyauth --user '%s:%s' http://%s/config/home_f.html" % ( self.username, self.password, self.host)
1090 print "RESULT: ", result
1092 if len(result.split()) > 3:
1097 def bbpse_reboot (pcu_ip,username,password,port_in_pcu,http_port, dryrun):
1101 url = "http://%s:%d/cmd.html" % (pcu_ip,http_port)
1102 data= "P%d=r" % port_in_pcu
1104 logger.debug("POSTing '%s' on %s" % (data,url))
1106 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
1107 uri = "%s:%d" % (pcu_ip,http_port)
1108 authinfo.add_password (None, uri, username, password)
1109 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
1111 opener = urllib2.build_opener(authhandler)
1112 urllib2.install_opener(opener)
1118 f = urllib2.urlopen(url,data)
1125 except urllib2.URLError,err:
1126 logger.info('Could not open http connection', err)
1127 return "bbpse error"
1129 ### rebooting x10toggle based systems addressed by port
1130 # Marc E. Fiuczynski - May 31 2005
1131 # tested on 4-ports models known as PSE505-FR
1132 # uses ssh and password to login to an account
1133 # that will cause the system to be powercycled.
1135 def x10toggle_reboot(ip, username, password, port, dryrun):
1140 ssh = pyssh.Ssh(username, ip)
1144 telnet_answer(ssh, "password:", password)
1148 telnet_answer(ssh, "x10toggle>", "A%d" % port)
1151 output = ssh.close()
1153 logger.debug(output)
1156 except Exception, err:
1160 output = ssh.close()
1162 logger.debug(output)
1163 return errno.ETIMEDOUT
1165 ### rebooting Dell systems via RAC card
1166 # Marc E. Fiuczynski - June 01 2005
1167 # tested with David Lowenthal's itchy/scratchy nodes at UGA
1170 def runcmd(command, args, username, password, timeout = None):
1173 result_ready = threading.Condition()
1177 result_ready.acquire()
1181 result_ready.notify()
1182 result_ready.release()
1184 def do_command(command, username, password):
1187 # Popen4 is a popen-type class that combines stdout and stderr
1188 p = popen2.Popen4(command)
1190 # read all output data
1191 p.tochild.write("%s\n" % username)
1192 p.tochild.write("%s\n" % password)
1194 data = p.fromchild.read()
1197 # might get interrupted by a signal in poll() or waitpid()
1200 set_result((retval, data))
1203 if ex.errno == errno.EINTR:
1206 except Exception, ex:
1210 command = " ".join([command] + args)
1212 worker = threading.Thread(target = do_command, args = (command, username, password, ))
1213 worker.setDaemon(True)
1214 result_ready.acquire()
1216 result_ready.wait(timeout)
1218 if result == [None]:
1219 raise Exception, "command timed-out: '%s'" % command
1221 result_ready.release()
1224 if isinstance(result, Exception):
1227 (retval, data) = result
1228 if os.WIFEXITED(retval) and os.WEXITSTATUS(retval) == 0:
1231 out = "system command ('%s') " % command
1232 if os.WIFEXITED(retval):
1233 out += "failed, rc = %d" % os.WEXITSTATUS(retval)
1235 out += "killed by signal %d" % os.WTERMSIG(retval)
1237 out += "; output follows:\n" + data
1238 raise Exception, out
1240 def racadm_reboot(host, username, password, port, dryrun):
1243 ip = socket.gethostbyname(host)
1245 cmd = "/usr/sbin/racadm"
1248 output = runcmd(cmd, ["-r %s -i serveraction powercycle" % ip],
1251 output = runcmd(cmd, ["-r %s -i getsysinfo" % ip],
1254 print "RUNCMD: %s" % output
1256 logger.debug(output)
1259 except Exception, err:
1260 logger.debug("runcmd raised exception %s" % err)
1266 if pcu['hostname'] is not None and pcu['hostname'] is not "":
1267 return pcu['hostname']
1268 elif pcu['ip'] is not None and pcu['ip'] is not "":
1274 from monitor import database
1277 def get_pcu_values(pcu_id):
1280 # this shouldn't be loaded each time...
1281 fb = database.dbLoad("findbadpcus")
1284 values = fb['nodes']["id_%s" % pcu_id]['values']
1290 def reboot(nodename):
1291 return reboot_policy(nodename, True, False)
1293 def reboot_policy(nodename, continue_probe, dryrun):
1296 pcu = plc.getpcu(nodename)
1298 logger.debug("no pcu for %s" % hostname)
1299 print "no pcu for %s" % hostname
1300 return False # "%s has no pcu" % nodename
1302 values = get_pcu_values(pcu['pcu_id'])
1304 logger.debug("No values for pcu probe %s" % hostname)
1305 print "No values for pcu probe %s" % hostname
1306 return False #"no info for pcu_id %s" % pcu['pcu_id']
1309 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
1311 ret = reboot_test(nodename, values, continue_probe, verbose, dryrun)
1320 class Unknown(PCUControl):
1321 supported_ports = [22,23,80,443,5869,9100,16992]
1323 def model_to_object(modelname):
1324 if "AMT" in modelname:
1326 elif "DS4-RPC" in modelname:
1328 elif "ilo2" in modelname:
1330 elif "IP-41x" in modelname:
1332 elif "AP79xx" in modelname or "Masterswitch" in modelname:
1334 elif "DRAC" in modelname:
1336 elif "WTI" in modelname:
1338 elif "ePowerSwitch" in modelname:
1340 elif "ipmi" in modelname:
1342 elif "bbsemaverick" in modelname:
1343 return BlackBoxPSMaverick
1347 def reboot_test(nodename, values, continue_probe, verbose, dryrun):
1349 if 'plc_pcu_stats' in values:
1350 values.update(values['plc_pcu_stats'])
1353 # DataProbe iPal (many sites)
1354 if continue_probe and values['model'].find("IP-41x_IP-81x") >= 0:
1355 ipal = IPAL(values, verbose, ['23', '80', '9100'])
1356 rb_ret = ipal.reboot(values[nodename], dryrun)
1358 # APC Masterswitch (Berkeley)
1359 elif continue_probe and ( values['model'].find("AP79xx") >= 0 or \
1360 values['model'].find("Masterswitch") >= 0 ):
1363 # TODO: make a more robust version of APC
1364 if values['pcu_id'] in [1102,1163,1055,1111,1231,1113,1127,1128,1148]:
1365 apc = APCEurope(values, verbose, ['22', '23'])
1366 rb_ret = apc.reboot(values[nodename], dryrun)
1368 elif values['pcu_id'] in [1110,86]:
1369 apc = APCBrazil(values, verbose, ['22', '23'])
1370 rb_ret = apc.reboot(values[nodename], dryrun)
1372 elif values['pcu_id'] in [1221,1225,1220,1192]:
1373 apc = APCBerlin(values, verbose, ['22', '23'])
1374 rb_ret = apc.reboot(values[nodename], dryrun)
1376 elif values['pcu_id'] in [1173,1240,47,1363,1405,1401,1372,1371]:
1377 apc = APCFolsom(values, verbose, ['22', '23'])
1378 rb_ret = apc.reboot(values[nodename], dryrun)
1381 apc = APCMaster(values, verbose, ['22', '23'])
1382 rb_ret = apc.reboot(values[nodename], dryrun)
1385 elif continue_probe and values['model'].find("DS4-RPC") >= 0:
1386 if values['pcu_id'] in [1056,1237,1052,1209,1002,1008,1041,1013,1022]:
1387 # These require a 'ctrl-c' to be sent...
1388 baytech = BayTechCtrlC(values, verbose, ['22', '23'])
1389 rb_ret = baytech.reboot(values[nodename], dryrun)
1391 elif values['pcu_id'] in [93]:
1392 baytech = BayTechAU(values, verbose, ['22', '23'])
1393 rb_ret = baytech.reboot(values[nodename], dryrun)
1395 elif values['pcu_id'] in [1057]:
1396 # These require a 'ctrl-c' to be sent...
1397 baytech = BayTechCtrlCUnibe(values, verbose, ['22', '23'])
1398 rb_ret = baytech.reboot(values[nodename], dryrun)
1400 elif values['pcu_id'] in [1012]:
1401 # This pcu sometimes doesn't present the 'Username' prompt,
1402 # unless you immediately try again...
1404 baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1405 rb_ret = baytech.reboot(values[nodename], dryrun)
1407 baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1408 rb_ret = baytech.reboot(values[nodename], dryrun)
1410 baytech = BayTech(values, verbose, ['22', '23'])
1411 rb_ret = baytech.reboot(values[nodename], dryrun)
1414 elif continue_probe and values['model'].find("ilo") >= 0:
1416 hpilo = HPiLO(values, verbose, ['22'])
1417 rb_ret = hpilo.reboot(0, dryrun)
1419 hpilo = HPiLOHttps(values, verbose, ['443'])
1420 rb_ret = hpilo.reboot(0, dryrun)
1422 hpilo = HPiLOHttps(values, verbose, ['443'])
1423 rb_ret = hpilo.reboot(0, dryrun)
1426 elif continue_probe and values['model'].find("DRAC") >= 0:
1427 # TODO: I don't think DRACRacAdm will throw an exception for the
1428 # default method to catch...
1430 drac = DRACRacAdm(values, verbose, ['443', '5869'])
1431 rb_ret = drac.reboot(0, dryrun)
1433 drac = DRAC(values, verbose, ['22'])
1434 rb_ret = drac.reboot(0, dryrun)
1436 elif continue_probe and values['model'].find("WTI IPS-4") >= 0:
1437 wti = WTIIPS4(values, verbose, ['23'])
1438 rb_ret = wti.reboot(values[nodename], dryrun)
1440 elif continue_probe and values['model'].find("AMT") >= 0:
1441 amt = IntelAMT(values, verbose, ['16992'])
1442 rb_ret = amt.reboot(values[nodename], dryrun)
1444 elif continue_probe and values['model'].find("bbsemaverick") >=0:
1445 print "TRYING BlackBoxPSMaverick"
1446 bbe = BlackBoxPSMaverick(values, verbose, ['80'])
1447 rb_ret = bbe.reboot(values[nodename], dryrun)
1449 elif continue_probe and values['model'].find("ipmi") >=0:
1452 ipmi = IPMI(values, verbose, ['80', '443', '623'])
1453 rb_ret = ipmi.reboot(values[nodename], dryrun)
1455 elif continue_probe and values['model'].find("ePowerSwitch") >=0:
1456 # TODO: allow a different port than http 80.
1457 if values['pcu_id'] in [1089, 1071, 1046, 1035, 1118]:
1458 eps = ePowerSwitchGood(values, verbose, ['80'])
1459 elif values['pcu_id'] in [1003]:
1462 eps = ePowerSwitch(values, verbose, ['80'])
1464 eps = ePowerSwitchGood(values, verbose, ['80'])
1466 rb_ret = eps.reboot(values[nodename], dryrun)
1467 elif continue_probe and values['pcu_id'] in [1122]:
1468 custom = CustomPCU(values, verbose, ['80', '443'])
1469 custom.reboot(values[nodename], dryrun)
1471 elif continue_probe:
1472 rb_ret = "Unsupported_PCU"
1474 elif continue_probe == False:
1475 if 'portstatus' in values:
1482 except ExceptionPort, err:
1487 #elif continue_probe and values['protocol'] == "racadm" and \
1488 # values['model'] == "RAC":
1489 # rb_ret = racadm_reboot(pcu_name(values),
1490 # values['username'],
1491 # values['password'],
1496 logger.setLevel(logging.DEBUG)
1497 ch = logging.StreamHandler()
1498 ch.setLevel(logging.DEBUG)
1499 formatter = logging.Formatter('LOGGER - %(message)s')
1500 ch.setFormatter(formatter)
1501 logger.addHandler(ch)
1504 if "test" in sys.argv:
1509 for node in sys.argv[1:]:
1510 if node == "test": continue
1512 print "Rebooting %s" % node
1513 if reboot_policy(node, True, dryrun):
1517 except Exception, err:
1518 import traceback; traceback.print_exc()
1521 if __name__ == '__main__':
1523 logger = logging.getLogger("monitor")