3 # Reboot specified nodes
9 import errno, time, traceback
12 import threading, popen2
14 from monitor.wrapper import plc
16 from subprocess import PIPE, Popen
17 import pcucontrol.transports.ssh.pxssh as pxssh
18 import pcucontrol.transports.ssh.pexpect as pexpect
20 from monitor.util import command
22 # Use our versions of telnetlib and pyssh
23 sys.path.insert(0, os.path.dirname(sys.argv[0]))
24 import pcucontrol.transports.telnetlib as telnetlib
25 sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")
26 import pcucontrol.transports.pyssh as pyssh
27 from monitor import config
29 from monitor.database.info.model import FindbadPCURecord
34 # Event class ID from pcu events
35 #NODE_POWER_CONTROL = 3
38 #MONITOR_USER_ID = 11142
41 logger = logging.getLogger("monitor")
45 class ExceptionNoTransport(Exception): pass
46 class ExceptionNotFound(Exception): pass
47 class ExceptionPassword(Exception): pass
48 class ExceptionTimeout(Exception): pass
49 class ExceptionPrompt(Exception): pass
50 class ExceptionSequence(Exception): pass
51 class ExceptionReset(Exception): pass
52 class ExceptionPort(Exception): pass
53 class ExceptionUsername(Exception): pass
55 def telnet_answer(telnet, expected, buffer):
58 output = telnet.read_until(expected, TELNET_TIMEOUT)
60 # logger.debug(output)
61 if output.find(expected) == -1:
62 raise ExceptionNotFound, "'%s' not found" % expected
64 telnet.write(buffer + "\r\n")
67 # PCU has model, host, preferred-port, user, passwd,
69 # This is an object derived directly form the PLCAPI DB fields
71 def __init__(self, plc_pcu_dict):
72 for field in ['username', 'password', 'site_id',
75 'node_ids', 'ports', ]:
76 if field in plc_pcu_dict:
77 self.__setattr__(field, plc_pcu_dict[field])
79 raise Exception("No such field %s in PCU object" % field)
81 # These are the convenience functions build around the PCU object.
83 def __init__(self, plc_pcu_dict):
84 PCU.__init__(self, plc_pcu_dict)
85 self.host = self.pcu_name()
88 if self.hostname is not None and self.hostname is not "":
90 elif self.ip is not None and self.ip is not "":
95 def nodeidToPort(self, node_id):
96 if node_id in self.node_ids:
97 for i in range(0, len(self.node_ids)):
98 if node_id == self.node_ids[i]:
101 raise Exception("No such Node ID: %d" % node_id)
103 # This class captures the observed pcu records from FindBadPCUs.py
105 def __init__(self, pcu_record_dict):
106 for field in ['port_status',
109 if field in pcu_record_dict:
110 if field == "reboot":
111 self.__setattr__("reboot_str", pcu_record_dict[field])
113 self.__setattr__(field, pcu_record_dict[field])
115 raise Exception("No such field %s in pcu record dict" % field)
127 def __init__(self, type, verbose):
129 self.verbose = verbose
130 self.transport = None
132 def open(self, host, username=None, password=None, prompt="User Name"):
135 if self.type == self.TELNET:
136 transport = telnetlib.Telnet(host, timeout=self.TELNET_TIMEOUT)
137 transport.set_debuglevel(self.verbose)
138 if username is not None:
139 self.transport = transport
140 self.ifThenSend(prompt, username, ExceptionUsername)
142 elif self.type == self.SSH:
143 if username is not None:
144 transport = pyssh.Ssh(username, host)
145 transport.set_debuglevel(self.verbose)
147 # TODO: have an ssh set_debuglevel() also...
149 raise Exception("Username cannot be None for ssh transport.")
150 elif self.type == self.HTTP:
151 self.url = "http://%s:%d/" % (host,80)
152 uri = "%s:%d" % (host,80)
155 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
156 authinfo.add_password (None, uri, username, password)
157 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
159 transport = urllib2.build_opener(authhandler)
162 raise Exception("Unknown transport type: %s" % self.type)
164 self.transport = transport
168 if self.type == self.TELNET:
169 self.transport.close()
170 elif self.type == self.SSH:
171 self.transport.close()
172 elif self.type == self.HTTP:
175 raise Exception("Unknown transport type %s" % self.type)
176 self.transport = None
178 def sendHTTP(self, resource, data):
180 print "POSTing '%s' to %s" % (data,self.url + resource)
183 f = self.transport.open(self.url + resource ,data)
188 except urllib2.URLError,err:
189 logger.info('Could not open http connection', err)
190 return "http transport error"
194 def sendPassword(self, password, prompt=None):
195 if self.type == self.TELNET:
197 self.ifThenSend("Password", password, ExceptionPassword)
199 self.ifThenSend(prompt, password, ExceptionPassword)
200 elif self.type == self.SSH:
201 self.ifThenSend("password:", password, ExceptionPassword)
202 elif self.type == self.HTTP:
205 raise Exception("Unknown transport type: %s" % self.type)
207 def ifThenSend(self, expected, buffer, ErrorClass=ExceptionPrompt):
209 if self.transport != None:
210 output = self.transport.read_until(expected, self.TELNET_TIMEOUT)
211 if output.find(expected) == -1:
212 print "OUTPUT: --%s--" % output
213 raise ErrorClass, "'%s' not found" % expected
215 self.transport.write(buffer + "\r\n")
217 raise ExceptionNoTransport("transport object is type None")
219 def ifElse(self, expected, ErrorClass):
221 self.transport.read_until(expected, self.TELNET_TIMEOUT)
223 raise ErrorClass("Could not find '%s' within timeout" % expected)
226 class PCUControl(Transport,PCUModel,PCURecord):
230 def __init__(self, plc_pcu_record, verbose, supported_ports=[]):
231 PCUModel.__init__(self, plc_pcu_record)
232 PCURecord.__init__(self, plc_pcu_record)
235 # NOTE: prefer racadm port over ssh
236 if '5869' in supported_ports and self.port_status['5869'] == "open":
237 type = Transport.DRAC# DRAC cards user this port.
238 elif '22' in supported_ports and self.port_status['22'] == "open":
240 elif '23' in supported_ports and self.port_status['23'] == "open":
241 type = Transport.TELNET
242 # NOTE: prefer https over http
243 elif '443' in supported_ports and self.port_status['443'] == "open":
244 type = Transport.HTTPS
245 elif '80' in supported_ports and self.port_status['80'] == "open":
246 type = Transport.HTTP
247 elif '9100' in supported_ports and self.port_status['9100'] == "open":
248 type = Transport.IPAL
249 elif '16992' in supported_ports and self.port_status['16992'] == "open":
250 type = Transport.HTTP
252 raise ExceptionPort("Unsupported Port: No transport from open ports")
254 raise ExceptionPort("No Portstatus: No transport because no open ports")
255 Transport.__init__(self, type, verbose)
257 def run(self, node_port, dryrun):
258 """ This function is to be defined by the specific PCU instance. """
261 def reboot(self, node_port, dryrun):
263 return self.run(node_port, dryrun)
264 except ExceptionNotFound, err:
265 return "error: " + str(err)
266 except ExceptionPassword, err:
267 return "Password exception: " + str(err)
268 except ExceptionTimeout, err:
269 return "Timeout exception: " + str(err)
270 except ExceptionUsername, err:
271 return "No username prompt: " + str(err)
272 except ExceptionSequence, err:
273 return "Sequence error: " + str(err)
274 except ExceptionPrompt, err:
275 return "Prompt exception: " + str(err)
276 except ExceptionNoTransport, err:
277 return "No Transport: " + str(err)
278 except ExceptionPort, err:
279 return "No ports exception: " + str(err)
280 except socket.error, err:
281 return "socket error: timeout: " + str(err)
282 except urllib2.HTTPError, err:
283 return "HTTPError: " + str(err)
284 except urllib2.URLError, err:
285 return "URLError: " + str(err)
286 except EOFError, err:
288 logger.debug("reboot: EOF")
290 self.transport.close()
292 traceback.print_exc()
293 return "EOF connection reset" + str(err)
295 class IPMI(PCUControl):
297 supported_ports = [80,443,623]
299 # TODO: get exit codes to determine success or failure...
300 def run(self, node_port, dryrun):
303 cmd = "ipmitool -I lanplus -H %s -U %s -P '%s' power cycle"
304 p = os.popen(cmd % ( self.host, self.username, self.password) )
306 print "RESULT: ", result
308 cmd = "ipmitool -I lanplus -H %s -U %s -P '%s' user list"
309 p = os.popen(cmd % ( self.host, self.username, self.password) )
311 print "RESULT: ", result
313 if "Error" in result:
318 class IPAL(PCUControl):
320 This now uses a proprietary format for communicating with the PCU. I
321 prefer it to Telnet, and Web access, since it's much lighter weight
322 and, more importantly, IT WORKS!! HHAHHHAHAHAHAHAHA!
324 supported_ports = [23,80,9100]
326 def format_msg(self, data, cmd):
327 esc = chr(int('1b',16))
328 return "%c%s%c%s%c" % (esc, self.password, esc, data, cmd) # esc, 'q', chr(4))
330 def recv_noblock(self, s, count):
334 # TODO: make sleep backoff, before stopping.
336 ret = s.recv(count, socket.MSG_DONTWAIT)
337 except socket.error, e:
338 if e[0] == errno.EAGAIN:
339 raise Exception(e[1])
341 # TODO: not other exceptions.
345 def run(self, node_port, dryrun):
346 if self.type == Transport.IPAL:
347 ret = self.run_ipal(node_port, dryrun)
349 ret2 = self.run_telnet(node_port, dryrun)
354 elif self.type == Transport.TELNET:
355 return self.run_telnet(node_port, dryrun)
357 raise ExceptionNoTransport("Unimplemented Transport for IPAL")
359 def run_telnet(self, node_port, dryrun):
360 # TELNET version of protocol...
362 ## XXX Some iPals require you to hit Enter a few times first
363 self.ifThenSend("Password >", "\r\n\r\n", ExceptionNotFound)
365 self.ifThenSend("Password >", self.password, ExceptionPassword)
366 self.transport.write("\r\n\r\n")
367 if not dryrun: # P# - Pulse relay
368 print "node_port %s" % node_port
369 self.ifThenSend("Enter >",
372 print "send newlines"
373 self.transport.write("\r\n\r\n")
374 print "after new lines"
375 # Get the next prompt
376 print "wait for enter"
377 self.ifElse("Enter >", ExceptionTimeout)
382 def run_ipal(self, node_port, dryrun):
388 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
391 s.connect((self.host, 9100))
392 except socket.error, e:
394 if e[0] == errno.ECONNREFUSED:
395 # cannot connect to remote host
396 raise Exception(e[1])
398 # TODO: what other conditions are there?
402 print "Checking status"
403 s.send(self.format_msg("", 'O'))
404 ret = self.recv_noblock(s, 8)
405 print "Current status is '%s'" % ret
408 raise Exception("Status returned 'another session already open' %s : %s" % (node_port, ret))
410 if node_port < len(ret):
411 status = ret[node_port]
419 raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret))
421 raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret))
426 print "Pulsing %s" % node_port
427 s.send(self.format_msg("%s" % node_port, 'P'))
429 # NOTE: turn power on ; do not pulse the port.
430 print "Power was off, so turning on ..."
431 s.send(self.format_msg("%s" % node_port, 'E'))
432 #s.send(self.format_msg("%s" % node_port, 'P'))
434 print "Receiving response."
435 ret = self.recv_noblock(s, 8)
436 print "Current status is '%s'" % ret
438 if node_port < len(ret):
439 status = ret[node_port]
447 raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret))
449 raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret))
454 return "Failed Power On"
459 class APCControl(PCUControl):
460 supported_ports = [22,23,80,443]
463 def run(self, node_port, dryrun):
464 print "RUNNING!!!!!!!!!!!!"
465 if self.type == Transport.HTTPS or self.type == Transport.HTTP:
466 print "APC via http...."
467 return self.run_http_or_https(node_port, dryrun)
469 print "APC via telnet/ssh...."
470 return self.run_telnet_or_ssh(node_port, dryrun)
472 def run_telnet_or_ssh(self, node_port, dryrun):
473 self.open(self.host, self.username)
474 self.sendPassword(self.password)
477 for val in self.reboot_sequence:
479 self.ifThenSend("\r\n> ", val, ExceptionPassword)
482 self.ifThenSend("\r\n> ", val)
485 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
489 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
490 "", ExceptionSequence)
491 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
496 def run_http_or_https(self, node_port, dryrun):
498 # send reboot signal.
499 # TODO: send a ManualPCU() reboot request for this PCU.
500 # NOTE: this model defies automation because, the port numbering
501 # and the form numbers are not consistent across models. There is
502 # not direct mapping from port# to form#.
503 return "Manual Reboot Required"
506 # TODO: also send message for https, since that doesn't work this way...
507 if self.type == Transport.HTTPS:
508 cmd = self.get_https_cmd()
509 elif self.type == Transport.HTTP:
510 cmd = self.get_http_cmd()
512 raise ExceptionNoTransport("Unsupported transport for http command")
514 cmd = cmd % ( self.username, self.password, self.host)
515 print "CMD: %s" % cmd
519 if len(result.split('\n')) > 2:
523 # NOTE: an error has occurred, so no need to log out.
524 print "RESULT: ", result
527 def get_https_cmd(self):
528 version = self.get_version()
529 print "VERSION: %s" % version
530 if "AP96" in version:
531 cmd = "curl -s --insecure --user '%s:%s' https://%s/outlets.htm " + \
532 " | grep -E '^[^<]+' " + \
533 " | grep -v 'Protected Object' "
535 # NOTE: no other case known right now...
536 cmd = "curl -s --insecure --user '%s:%s' https://%s/outlets.htm " + \
537 " | grep -E '^[^<]+' " + \
538 " | grep -v 'Protected Object' "
542 def get_http_cmd(self):
543 version = self.get_version()
544 print "VERSION: %s" % version
545 if "AP7900" in version:
546 cmd = "curl -s --anyauth --user '%s:%s' http://%s/rPDUout.htm | grep -E '^[^<]+'"
547 elif "AP7920" in version:
548 cmd = "curl -s --anyauth --user '%s:%s' http://%s/ms3out.htm | grep -E '^[^<]+' "
551 print "USING DEFAULT"
552 cmd = "curl -s --anyauth --user '%s:%s' http://%s/ms3out.htm | grep -E '^[^<]+' "
556 def get_version(self):
557 # NOTE: this command returns and formats all data.
558 #cmd = """curl -s --anyauth --user '%s:%s' http://%s/about.htm """ +
559 # """ | sed -e "s/<[^>]*>//g" -e "s/ //g" -e "/^$/d" """ +
560 # """ | awk '{line=$0 ; if ( ! /:/ && length(pline) > 0 ) \
561 # { print pline, line } else { pline=line} }' """ +
562 # """ | grep Model """
564 # NOTE: we may need to return software version, no model version to
565 # know which file to request on the server.
567 if self.type == Transport.HTTP:
568 cmd = """curl -s --anyauth --user '%s:%s' http://%s/about.htm """ + \
569 """ | sed -e "s/<[^>]*>//g" -e "s/ //g" -e "/^$/d" """ + \
570 """ | grep -E "AP[[:digit:]]+" """
571 #""" | grep -E "v[[:digit:]].*" """
572 elif self.type == Transport.HTTPS:
573 cmd = """curl -s --insecure --user '%s:%s' https://%s/about.htm """ + \
574 """ | sed -e "s/<[^>]*>//g" -e "s/ //g" -e "/^$/d" """ + \
575 """ | grep -E "AP[[:digit:]]+" """
576 #""" | grep -E "v[[:digit:]].*" """
578 raise ExceptionNoTransport("Unsupported transport to get version")
580 cmd = cmd % ( self.username, self.password, self.host)
583 return result.strip()
586 # NOTE: log out again, to allow other uses to access the machine.
587 if self.type == Transport.HTTP:
588 cmd = """curl -s --anyauth --user '%s:%s' http://%s/logout.htm """ + \
589 """ | grep -E '^[^<]+' """
590 elif self.type == Transport.HTTPS:
591 cmd = """curl -s --insecure --user '%s:%s' http://%s/logout.htm """ + \
592 """ | grep -E '^[^<]+' """
594 raise ExceptionNoTransport("Unsupported transport to logout")
596 cmd = cmd % ( self.username, self.password, self.host)
600 class APCControl12p3(APCControl):
601 def run(self, node_port, dryrun):
602 self.reboot_sequence = ["1", "2", str(node_port), "3"]
603 return super(APCControl12p3, self).run(node_port, dryrun)
605 class APCControl1p4(APCControl):
606 def run(self, node_port, dryrun):
607 self.reboot_sequence = ["1", str(node_port), "4"]
608 return super(APCControl1p4, self).run(node_port, dryrun)
610 class APCControl121p3(APCControl):
611 def run(self, node_port, dryrun):
612 self.reboot_sequence = ["1", "2", "1", str(node_port), "3"]
613 return super(APCControl121p3, self).run(node_port, dryrun)
615 class APCControl121p1(APCControl):
616 def run(self, node_port, dryrun):
617 self.reboot_sequence = ["1", "2", "1", str(node_port), "1", "3"]
618 return super(APCControl121p1, self).run(node_port, dryrun)
620 class APCControl13p13(APCControl):
621 def run(self, node_port, dryrun):
622 self.reboot_sequence = ["1", "3", str(node_port), "1", "3"]
623 return super(APCControl13p13, self).run(node_port, dryrun)
626 class IntelAMT(PCUControl):
627 supported_ports = [16992]
629 def run(self, node_port, dryrun):
632 # TODO: need to make this path universal; not relative to pwd.
633 cmd_str = config.MONITOR_SCRIPT_ROOT + "/pcucontrol/models/intelamt/remoteControl"
636 # NOTE: -p checks the power state of the host.
637 # TODO: parse the output to find out if it's ok or not.
638 cmd_str += " -p http://%s:16992/RemoteControlService -user admin -pass '%s' " % (self.host, self.password )
640 cmd_str += " -A http://%s:16992/RemoteControlService -user admin -pass '%s' " % (self.host, self.password )
643 return cmd.system(cmd_str, self.TELNET_TIMEOUT)
645 class DRAC(PCUControl):
646 supported_ports = [22,443,5869]
647 def run(self, node_port, dryrun):
648 if self.type == Transport.DRAC:
649 print "trying racadm_reboot..."
650 return racadm_reboot(self.host, self.username, self.password, node_port, dryrun)
651 elif self.type == Transport.SSH:
652 return self.run_ssh(node_port, dryrun)
654 raise ExceptionNoTransport("No implementation for open ports")
656 def run_ssh(self, node_port, dryrun):
657 ssh_options="-o StrictHostKeyChecking=no "+\
658 "-o PasswordAuthentication=yes "+\
659 "-o PubkeyAuthentication=no"
661 if not s.login(self.host, self.username, self.password, ssh_options,
662 original_prompts="Dell", login_timeout=TELNET_TIMEOUT):
663 raise ExceptionPassword("Invalid Password")
665 print "logging in..."
669 #index = s.expect(["DRAC 5", "[%s]#" % self.username ])
670 # NOTE: be careful to escape any characters used by 're.compile'
671 index = s.expect(["\$", "\[%s\]#" % self.username ])
672 print "INDEX:", index
675 s.send("racadm getsysinfo")
680 s.send("racadm serveraction powercycle")
682 s.send("serveraction powercycle")
687 raise ExceptionPrompt("EOF before expected Prompt")
688 except pexpect.TIMEOUT:
690 raise ExceptionPrompt("Timeout before expected Prompt")
696 class DRACDefault(PCUControl):
697 supported_ports = [22,443,5869]
698 def run(self, node_port, dryrun):
699 self.open(self.host, self.username)
700 self.sendPassword(self.password)
702 print "logging in..."
703 self.transport.write("\r\n")
706 self.ifThenSend("[%s]#" % self.username, "getsysinfo")
709 self.ifThenSend("[%s]#" % self.username, "serveraction powercycle")
711 self.ifThenSend("[%s]#" % self.username, "exit")
716 class HPiLO(PCUControl):
717 supported_ports = [22,443]
718 def run(self, node_port, dryrun):
719 if self.type == Transport.SSH:
720 return self.run_ssh(node_port, dryrun)
721 elif self.type == Transport.HTTP or self.type == Transport.HTTPS:
722 return self.run_https(node_port, dryrun)
724 raise ExceptionNoTransport("Unimplemented Transport for HPiLO %s" % self.type)
726 def run_ssh(self, node_port, dryrun):
728 self.open(self.host, self.username)
729 self.sendPassword(self.password)
732 self.ifThenSend("</>hpiLO->", "cd system1")
734 # Reboot Outlet N (Y/N)?
736 self.ifThenSend("</system1>hpiLO->", "POWER")
739 self.ifThenSend("</system1>hpiLO->", "reset")
741 self.ifThenSend("</system1>hpiLO->", "exit")
746 def run_https(self, node_port, dryrun):
748 locfg = command.CMD()
750 cmd_str = config.MONITOR_SCRIPT_ROOT + "/pcucontrol/models/hpilo/"
752 cmd = cmd_str + "locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
753 self.host, cmd_str+"iloxml/Get_Network.xml",
754 self.username, self.password)
755 sout, serr = locfg.run_noexcept(cmd)
757 if sout.strip() != "" or serr.strip() != "":
758 print "sout: %s" % sout.strip()
759 return sout.strip() + serr.strip()
762 locfg = command.CMD()
763 cmd = cmd_str + "locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
764 self.host, cmd_str+"iloxml/Reset_Server.xml",
765 self.username, self.password)
766 sout, serr = locfg.run_noexcept(cmd)
768 if sout.strip() != "":
769 print "sout: %s" % sout.strip()
774 class BayTechRPC3NC(PCUControl):
775 def run(self, node_port, dryrun):
776 self.open(self.host, self.username, None, "Enter user name:")
777 self.sendPassword(self.password, "Enter Password:")
779 #self.ifThenSend("RPC-16>", "Status")
780 self.ifThenSend("RPC3-NC>", "Reboot %d" % node_port)
782 # Reboot Outlet N (Y/N)?
784 self.ifThenSend("(Y/N)?", "N")
786 self.ifThenSend("(Y/N)?", "Y")
787 self.ifThenSend("RPC3-NC>", "")
792 class BayTechRPC16(PCUControl):
793 def run(self, node_port, dryrun):
794 self.open(self.host, self.username, None, "Enter user name:")
795 self.sendPassword(self.password, "Enter Password:")
797 #self.ifThenSend("RPC-16>", "Status")
799 self.ifThenSend("RPC-16>", "Reboot %d" % node_port)
801 # Reboot Outlet N (Y/N)?
803 self.ifThenSend("(Y/N)?", "N")
805 self.ifThenSend("(Y/N)?", "Y")
806 self.ifThenSend("RPC-16>", "")
811 class BayTechCtrlCUnibe(PCUControl):
813 For some reason, these units let you log in fine, but they hang
814 indefinitely, unless you send a Ctrl-C after the password. No idea
817 def run(self, node_port, dryrun):
818 print "BayTechCtrlC %s" % self.host
820 ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
822 if not s.login(self.host, self.username, self.password, ssh_options):
823 raise ExceptionPassword("Invalid Password")
824 # Otherwise, the login succeeded.
826 # Send a ctrl-c to the remote process.
827 print "sending ctrl-c"
830 # Control Outlets (5 ,1).........5
832 #index = s.expect("Enter Request")
833 index = s.expect(["Enter Request :"])
838 index = s.expect(["DS-RPC>", "Enter user name:"])
840 s.send(self.username + "\r\n")
841 index = s.expect(["DS-RPC>"])
844 print "Reboot %d" % node_port
846 s.send("Reboot %d\r\n" % node_port)
849 index = s.expect(["\(Y/N\)\?", "Port in use", "DS-RPC>"])
858 raise ExceptionPrompt("PCU Reported 'Port in use.'")
860 raise ExceptionSequence("Issued command 'Reboot' failed.")
863 index = s.expect(["DS-RPC>"])
864 #print "got prompt back"
869 raise ExceptionPrompt("EOF before expected Prompt")
870 except pexpect.TIMEOUT:
871 raise ExceptionPrompt("Timeout before expected Prompt")
875 class BayTechCtrlC(PCUControl):
877 For some reason, these units let you log in fine, but they hang
878 indefinitely, unless you send a Ctrl-C after the password. No idea
881 def run(self, node_port, dryrun):
882 print "BayTechCtrlC %s" % self.host
884 ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
886 if not s.login(self.host, self.username, self.password, ssh_options):
887 raise ExceptionPassword("Invalid Password")
888 # Otherwise, the login succeeded.
890 # Send a ctrl-c to the remote process.
891 print "SENDING ctrl-c"
894 # Control Outlets (5 ,1).........5
896 print "EXPECTING: ", "Enter Request :"
897 index = s.expect(["Enter Request :"])
902 print "EXPECTING: ", "DS-RPC>"
903 index = s.expect(["DS-RPC>", "Enter user name:", "Port in use."])
905 print "sending username"
906 s.send(self.username + "\r\n")
907 index = s.expect(["DS-RPC>"])
909 raise ExceptionPrompt("PCU Reported 'Port in use.'")
912 print "SENDING: Reboot %d" % node_port
913 s.send("Reboot %d\r\n" % node_port)
917 print "EXPECTING: ", "Y/N?"
918 index = s.expect(["\(Y/N\)\?", "Port in use", "DS-RPC>"])
927 raise ExceptionPrompt("PCU Reported 'Port in use.'")
929 raise ExceptionSequence("Issued command 'Reboot' failed.")
931 # NOTE: for some reason, the script times out with the
932 # following line. In manual tests, it works correctly, but
933 # with automated tests, evidently it fails.
936 #print "TOTAL--", s.allstr, "--EOT"
937 index = s.expect(["DS-RPC>"])
938 print "got prompt back"
943 raise ExceptionPrompt("EOF before 'Enter Request' Prompt")
944 except pexpect.TIMEOUT:
945 raise ExceptionPrompt("Timeout before Prompt")
949 class BayTech(PCUControl):
950 supported_ports = [22,23]
951 def run(self, node_port, dryrun):
952 self.open(self.host, self.username)
953 self.sendPassword(self.password)
955 # Control Outlets (5 ,1).........5
956 self.ifThenSend("Enter Request :", "5")
960 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port, ExceptionNotFound)
961 except ExceptionNotFound, msg:
962 # one machine is configured to ask for a username,
963 # even after login...
964 print "msg: %s" % msg
965 self.transport.write(self.username + "\r\n")
967 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
969 # Reboot Outlet N (Y/N)?
971 self.ifThenSend("(Y/N)?", "N")
973 self.ifThenSend("(Y/N)?", "Y")
975 self.ifThenSend("DS-RPC>", "")
980 class WTIIPS4(PCUControl):
981 supported_ports = [23]
982 def run(self, node_port, dryrun):
984 self.sendPassword(self.password, "Enter Password:")
986 self.ifThenSend("IPS> ", "/Boot %s" % node_port)
988 self.ifThenSend("Sure? (Y/N): ", "N")
990 self.ifThenSend("Sure? (Y/N): ", "Y")
992 self.ifThenSend("IPS> ", "")
997 class ePowerSwitchNew(PCUControl):
999 # The old code used Python's HTTPPasswordMgrWithDefaultRealm()
1000 # For some reason this both doesn't work and in some cases, actually
1001 # hangs the PCU. Definitely not what we want.
1003 # The code below is much simpler. Just letting things fail first,
1004 # and then, trying again with authentication string in the header.
1006 def run(self, node_port, dryrun):
1007 self.transport = None
1008 self.url = "http://%s:%d/" % (self.host,80)
1009 uri = "%s:%d" % (self.host,80)
1011 req = urllib2.Request(self.url)
1013 handle = urllib2.urlopen(req)
1015 # NOTE: this is expected to fail initially
1022 return "ERROR: not protected by HTTP authentication"
1024 if not hasattr(e, 'code') or e.code != 401:
1025 return "ERROR: failed for: %s" % str(e)
1027 base64data = base64.encodestring("%s:%s" % (self.username, self.password))[:-1]
1028 # NOTE: assuming basic realm authentication.
1029 authheader = "Basic %s" % base64data
1030 req.add_header("Authorization", authheader)
1033 f = urllib2.urlopen(req)
1035 # failing here means the User/passwd is wrong (hopefully)
1036 raise ExceptionPassword("Incorrect username/password")
1038 # NOTE: after verifying that the user/password is correct,
1039 # actually reboot the given node.
1042 data = urllib.urlencode({'P%d' % node_port : "r"})
1043 req = urllib2.Request(self.url + "cmd.html")
1044 req.add_header("Authorization", authheader)
1045 # add data to handler,
1046 f = urllib2.urlopen(req, data)
1047 if self.verbose: print f.read()
1049 import traceback; traceback.print_exc()
1051 # fetch url one more time on cmd.html, econtrol.html or whatever.
1054 if self.verbose: print f.read()
1059 class CustomPCU(PCUControl):
1060 def run(self, node_port, dryrun):
1061 url = "https://www-itec.uni-klu.ac.at/plab-pcu/index.php"
1064 # Turn host off, then on
1065 formstr = "plab%s=off" % node_port
1066 os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url))
1068 formstr = "plab%s=on" % node_port
1069 os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url))
1071 os.system("curl --user %s:%s --insecure %s" % (self.username, self.password, url))
1074 class ePowerSwitchOld(PCUControl):
1075 def run(self, node_port, dryrun):
1076 self.url = "http://%s:%d/" % (self.host,80)
1077 uri = "%s:%d" % (self.host,80)
1080 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
1081 authinfo.add_password (None, uri, self.username, self.password)
1082 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
1084 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
1085 transport = urllib2.build_opener(authinfo)
1086 f = transport.open(self.url)
1087 if self.verbose: print f.read()
1090 transport = urllib2.build_opener(authhandler)
1091 f = transport.open(self.url + "cmd.html", "P%d=r" % node_port)
1092 if self.verbose: print f.read()
1097 class ePowerSwitchOld(PCUControl):
1098 supported_ports = [80]
1099 def run(self, node_port, dryrun):
1100 self.url = "http://%s:%d/" % (self.host,80)
1101 uri = "%s:%d" % (self.host,80)
1103 # TODO: I'm still not sure what the deal is here.
1104 # two independent calls appear to need to be made before the
1105 # reboot will succeed. It doesn't seem to be possible to do
1106 # this with a single call. I have no idea why.
1109 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
1110 authinfo.add_password (None, uri, self.username, self.password)
1111 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
1113 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
1114 transport = urllib2.build_opener()
1115 f = transport.open(self.url + "elogin.html", "pwd=%s" % self.password)
1116 if self.verbose: print f.read()
1119 transport = urllib2.build_opener(authhandler)
1120 f = transport.open(self.url + "econtrol.html", "P%d=r" % node_port)
1121 if self.verbose: print f.read()
1123 # data= "P%d=r" % node_port
1124 #self.open(self.host, self.username, self.password)
1125 #self.sendHTTP("elogin.html", "pwd=%s" % self.password)
1126 #self.sendHTTP("econtrol.html", data)
1127 #self.sendHTTP("cmd.html", data)
1132 class ManualPCU(PCUControl):
1133 supported_ports = [22,23,80,443]
1135 def run(self, node_port, dryrun):
1137 # TODO: send email message to monitor admin requesting manual
1138 # intervention. This should always be an option for ridiculous,
1143 class PM211MIP(ManualPCU):
1144 supported_ports = [80,443]
1146 ### rebooting european BlackBox PSE boxes
1147 # Thierry Parmentelat - May 11 2005
1148 # tested on 4-ports models known as PSE505-FR
1149 # uses http to POST a data 'P<port>=r'
1150 # relies on basic authentication within http1.0
1151 # first curl-based script was
1152 # curl --http1.0 --basic --user <username>:<password> --data P<port>=r \
1153 # http://<hostname>:<http_port>/cmd.html && echo OK
1158 class BlackBoxPSMaverick(PCUControl):
1159 supported_ports = [80]
1161 def run(self, node_port, dryrun):
1163 # send reboot signal.
1164 cmd = "curl -s --data 'P%s=r' --anyauth --user '%s:%s' http://%s/config/home_f.html" % ( node_port, self.username, self.password, self.host)
1166 # else, just try to log in
1167 cmd = "curl -s --anyauth --user '%s:%s' http://%s/config/home_f.html" % ( self.username, self.password, self.host)
1171 print "RESULT: ", result
1173 if len(result.split()) > 3:
1178 def bbpse_reboot (pcu_ip,username,password,port_in_pcu,http_port, dryrun):
1182 url = "http://%s:%d/cmd.html" % (pcu_ip,http_port)
1183 data= "P%d=r" % port_in_pcu
1185 logger.debug("POSTing '%s' on %s" % (data,url))
1187 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
1188 uri = "%s:%d" % (pcu_ip,http_port)
1189 authinfo.add_password (None, uri, username, password)
1190 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
1192 opener = urllib2.build_opener(authhandler)
1193 urllib2.install_opener(opener)
1199 f = urllib2.urlopen(url,data)
1206 except urllib2.URLError,err:
1207 logger.info('Could not open http connection', err)
1208 return "bbpse error"
1210 ### rebooting x10toggle based systems addressed by port
1211 # Marc E. Fiuczynski - May 31 2005
1212 # tested on 4-ports models known as PSE505-FR
1213 # uses ssh and password to login to an account
1214 # that will cause the system to be powercycled.
1216 def x10toggle_reboot(ip, username, password, port, dryrun):
1221 ssh = pyssh.Ssh(username, ip)
1225 telnet_answer(ssh, "password:", password)
1229 telnet_answer(ssh, "x10toggle>", "A%d" % port)
1232 output = ssh.close()
1234 logger.debug(output)
1237 except Exception, err:
1241 output = ssh.close()
1243 logger.debug(output)
1244 return errno.ETIMEDOUT
1246 ### rebooting Dell systems via RAC card
1247 # Marc E. Fiuczynski - June 01 2005
1248 # tested with David Lowenthal's itchy/scratchy nodes at UGA
1251 def runcmd(command, args, username, password, timeout = None):
1254 result_ready = threading.Condition()
1258 result_ready.acquire()
1262 result_ready.notify()
1263 result_ready.release()
1265 def do_command(command, username, password):
1268 # Popen4 is a popen-type class that combines stdout and stderr
1269 p = popen2.Popen4(command)
1271 # read all output data
1272 p.tochild.write("%s\n" % username)
1273 p.tochild.write("%s\n" % password)
1275 data = p.fromchild.read()
1278 # might get interrupted by a signal in poll() or waitpid()
1281 set_result((retval, data))
1284 if ex.errno == errno.EINTR:
1287 except Exception, ex:
1291 command = " ".join([command] + args)
1293 worker = threading.Thread(target = do_command, args = (command, username, password, ))
1294 worker.setDaemon(True)
1295 result_ready.acquire()
1297 result_ready.wait(timeout)
1299 if result == [None]:
1300 raise Exception, "command timed-out: '%s'" % command
1302 result_ready.release()
1305 if isinstance(result, Exception):
1308 (retval, data) = result
1309 if os.WIFEXITED(retval) and os.WEXITSTATUS(retval) == 0:
1312 out = "system command ('%s') " % command
1313 if os.WIFEXITED(retval):
1314 out += "failed, rc = %d" % os.WEXITSTATUS(retval)
1316 out += "killed by signal %d" % os.WTERMSIG(retval)
1318 out += "; output follows:\n" + data
1319 raise Exception, out
1321 def racadm_reboot(host, username, password, port, dryrun):
1324 ip = socket.gethostbyname(host)
1326 cmd = "/usr/sbin/racadm"
1329 output = runcmd(cmd, ["-r %s -i serveraction powercycle" % ip],
1332 output = runcmd(cmd, ["-r %s -i getsysinfo" % ip],
1335 print "RUNCMD: %s" % output
1337 logger.debug(output)
1340 except Exception, err:
1341 logger.debug("runcmd raised exception %s" % err)
1347 if pcu['hostname'] is not None and pcu['hostname'] is not "":
1348 return pcu['hostname']
1349 elif pcu['ip'] is not None and pcu['ip'] is not "":
1355 def get_pcu_values(pcu_id):
1356 print "pcuid: %s" % pcu_id
1358 pcurec = FindbadPCURecord.get_latest_by(plc_pcuid=pcu_id).first()
1360 values = pcurec.to_dict()
1368 def reboot(nodename):
1369 return reboot_policy(nodename, True, False)
1371 def reboot_str(nodename):
1373 continue_probe = True
1376 pcu = plc.getpcu(nodename)
1378 logger.debug("no pcu for %s" % nodename)
1379 print "no pcu for %s" % nodename
1380 return False # "%s has no pcu" % nodename
1382 values = get_pcu_values(pcu['pcu_id'])
1384 logger.debug("No values for pcu probe %s" % nodename)
1385 print "No values for pcu probe %s" % nodename
1386 return False #"no info for pcu_id %s" % pcu['pcu_id']
1389 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
1391 ret = reboot_test_new(nodename, values, verbose, dryrun)
1394 def reboot_policy(nodename, continue_probe, dryrun):
1397 pcu = plc.getpcu(nodename)
1399 logger.debug("no pcu for %s" % nodename)
1400 print "no pcu for %s" % nodename
1401 return False # "%s has no pcu" % nodename
1403 values = get_pcu_values(pcu['pcu_id'])
1405 logger.debug("No values for pcu probe %s" % nodename)
1406 print "No values for pcu probe %s" % nodename
1407 return False #"no info for pcu_id %s" % pcu['pcu_id']
1410 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
1412 ret = reboot_test_new(nodename, values, verbose, dryrun)
1421 class Unknown(PCUControl):
1422 supported_ports = [22,23,80,443,5869,9100,16992]
1424 def model_to_object(modelname):
1425 if modelname is None:
1427 if "AMT" in modelname:
1429 elif "BayTech" in modelname:
1431 elif "HPiLO" in modelname:
1433 elif "IPAL" in modelname:
1435 elif "APC" in modelname:
1437 elif "DRAC" in modelname:
1439 elif "WTI" in modelname:
1441 elif "ePowerSwitch" in modelname:
1442 return ePowerSwitchNew
1443 elif "IPMI" in modelname:
1445 elif "BlackBoxPSMaverick" in modelname:
1446 return BlackBoxPSMaverick
1447 elif "PM211MIP" in modelname:
1449 elif "ManualPCU" in modelname:
1452 print "UNKNOWN model %s"%modelname
1455 def reboot_test_new(nodename, values, verbose, dryrun):
1457 if 'plc_pcu_stats' in values:
1458 values.update(values['plc_pcu_stats'])
1461 modelname = values['model']
1463 object = eval('%s(values, verbose, ["22", "23", "80", "443", "9100", "16992", "5869"])' % modelname)
1464 rb_ret = object.reboot(values[nodename], dryrun)
1467 # TODO: how to handle the weird, georgetown pcus, the drac faults, and ilo faults
1468 except ExceptionPort, err:
1474 def reboot_test(nodename, values, continue_probe, verbose, dryrun):
1476 if 'plc_pcu_stats' in values:
1477 values.update(values['plc_pcu_stats'])
1480 # DataProbe iPal (many sites)
1481 if continue_probe and values['model'].find("IP-41x_IP-81x") >= 0:
1482 ipal = IPAL(values, verbose, ['23', '80', '9100'])
1483 rb_ret = ipal.reboot(values[nodename], dryrun)
1485 # APC Masterswitch (Berkeley)
1486 elif continue_probe and ( values['model'].find("AP79xx") >= 0 or \
1487 values['model'].find("Masterswitch") >= 0 ):
1490 # TODO: make a more robust version of APC
1491 if values['pcu_id'] in [1102,1163,1055,1111,1231,1113,1127,1128,1148]:
1492 apc = APCControl12p3(values, verbose, ['22', '23'])
1493 rb_ret = apc.reboot(values[nodename], dryrun)
1495 elif values['pcu_id'] in [1110,86]:
1496 apc = APCControl1p4(values, verbose, ['22', '23'])
1497 rb_ret = apc.reboot(values[nodename], dryrun)
1499 elif values['pcu_id'] in [1221,1225,1220,1192]:
1500 apc = APCControl121p3(values, verbose, ['22', '23'])
1501 rb_ret = apc.reboot(values[nodename], dryrun)
1503 elif values['pcu_id'] in [1173,1240,47,1363,1405,1401,1372,1371]:
1504 apc = APCControl121p1(values, verbose, ['22', '23'])
1505 rb_ret = apc.reboot(values[nodename], dryrun)
1508 apc = APCControl13p13(values, verbose, ['22', '23'])
1509 rb_ret = apc.reboot(values[nodename], dryrun)
1512 elif continue_probe and values['model'].find("DS4-RPC") >= 0:
1513 if values['pcu_id'] in [1056,1237,1052,1209,1002,1008,1041,1013,1022]:
1514 # These require a 'ctrl-c' to be sent...
1515 baytech = BayTechCtrlC(values, verbose, ['22', '23'])
1516 rb_ret = baytech.reboot(values[nodename], dryrun)
1518 elif values['pcu_id'] in [93]:
1519 baytech = BayTechRPC3NC(values, verbose, ['22', '23'])
1520 rb_ret = baytech.reboot(values[nodename], dryrun)
1522 elif values['pcu_id'] in [1057]:
1523 # These require a 'ctrl-c' to be sent...
1524 baytech = BayTechCtrlCUnibe(values, verbose, ['22', '23'])
1525 rb_ret = baytech.reboot(values[nodename], dryrun)
1527 elif values['pcu_id'] in [1012]:
1528 # This pcu sometimes doesn't present the 'Username' prompt,
1529 # unless you immediately try again...
1531 baytech = BayTechRPC16(values, verbose, ['22', '23'])
1532 rb_ret = baytech.reboot(values[nodename], dryrun)
1534 baytech = BayTechRPC16(values, verbose, ['22', '23'])
1535 rb_ret = baytech.reboot(values[nodename], dryrun)
1537 baytech = BayTech(values, verbose, ['22', '23'])
1538 rb_ret = baytech.reboot(values[nodename], dryrun)
1541 elif continue_probe and values['model'].find("ilo") >= 0:
1543 hpilo = HPiLO(values, verbose, ['22'])
1544 rb_ret = hpilo.reboot(0, dryrun)
1546 hpilo = HPiLOHttps(values, verbose, ['443'])
1547 rb_ret = hpilo.reboot(0, dryrun)
1549 hpilo = HPiLOHttps(values, verbose, ['443'])
1550 rb_ret = hpilo.reboot(0, dryrun)
1553 elif continue_probe and values['model'].find("DRAC") >= 0:
1554 # TODO: I don't think DRAC will throw an exception for the
1555 # default method to catch...
1557 drac = DRAC(values, verbose, ['443', '5869'])
1558 rb_ret = drac.reboot(0, dryrun)
1560 drac = DRACDefault(values, verbose, ['22'])
1561 rb_ret = drac.reboot(0, dryrun)
1563 elif continue_probe and values['model'].find("WTI IPS-4") >= 0:
1564 wti = WTIIPS4(values, verbose, ['23'])
1565 rb_ret = wti.reboot(values[nodename], dryrun)
1567 elif continue_probe and values['model'].find("AMT") >= 0:
1568 amt = IntelAMT(values, verbose, ['16992'])
1569 rb_ret = amt.reboot(values[nodename], dryrun)
1571 elif continue_probe and values['model'].find("bbsemaverick") >=0:
1572 print "TRYING BlackBoxPSMaverick"
1573 bbe = BlackBoxPSMaverick(values, verbose, ['80'])
1574 rb_ret = bbe.reboot(values[nodename], dryrun)
1576 elif continue_probe and values['model'].find("ipmi") >=0:
1579 ipmi = IPMI(values, verbose, ['80', '443', '623'])
1580 rb_ret = ipmi.reboot(values[nodename], dryrun)
1582 elif continue_probe and values['model'].find("ePowerSwitch") >=0:
1583 # TODO: allow a different port than http 80.
1584 if values['pcu_id'] in [1089, 1071, 1046, 1035, 1118]:
1585 eps = ePowerSwitchNew(values, verbose, ['80'])
1586 elif values['pcu_id'] in [1003]:
1589 eps = ePowerSwitchOld(values, verbose, ['80'])
1591 eps = ePowerSwitchNew(values, verbose, ['80'])
1593 rb_ret = eps.reboot(values[nodename], dryrun)
1594 elif continue_probe and values['pcu_id'] in [1122]:
1595 custom = CustomPCU(values, verbose, ['80', '443'])
1596 custom.reboot(values[nodename], dryrun)
1598 elif continue_probe:
1599 rb_ret = "Unsupported_PCU"
1601 elif continue_probe == False:
1602 if 'port_status' in values:
1609 except ExceptionPort, err:
1614 #elif continue_probe and values['protocol'] == "racadm" and \
1615 # values['model'] == "RAC":
1616 # rb_ret = racadm_reboot(pcu_name(values),
1617 # values['username'],
1618 # values['password'],
1623 logger.setLevel(logging.DEBUG)
1624 ch = logging.StreamHandler()
1625 ch.setLevel(logging.DEBUG)
1626 formatter = logging.Formatter('LOGGER - %(message)s')
1627 ch.setFormatter(formatter)
1628 logger.addHandler(ch)
1631 if "test" in sys.argv:
1636 for node in sys.argv[1:]:
1637 if node == "test": continue
1639 print "Rebooting %s" % node
1640 if reboot_policy(node, True, dryrun):
1644 except Exception, err:
1645 import traceback; traceback.print_exc()
1648 if __name__ == '__main__':
1649 logger = logging.getLogger("monitor")