3 # Reboot specified nodes
9 import errno, time, traceback
12 import threading, popen2
14 from monitor.wrapper import plc
16 from subprocess import PIPE, Popen
17 import pcucontrol.transports.ssh.pxssh as pxssh
18 import pcucontrol.transports.ssh.pexpect as pexpect
20 from monitor.util import command
22 # Use our versions of telnetlib and pyssh
23 sys.path.insert(0, os.path.dirname(sys.argv[0]))
24 import pcucontrol.transports.telnetlib as telnetlib
25 sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")
26 import pcucontrol.transports.pyssh as pyssh
31 # Event class ID from pcu events
32 #NODE_POWER_CONTROL = 3
35 #MONITOR_USER_ID = 11142
38 logger = logging.getLogger("monitor")
42 class ExceptionNoTransport(Exception): pass
43 class ExceptionNotFound(Exception): pass
44 class ExceptionPassword(Exception): pass
45 class ExceptionTimeout(Exception): pass
46 class ExceptionPrompt(Exception): pass
47 class ExceptionSequence(Exception): pass
48 class ExceptionReset(Exception): pass
49 class ExceptionPort(Exception): pass
50 class ExceptionUsername(Exception): pass
52 def telnet_answer(telnet, expected, buffer):
55 output = telnet.read_until(expected, TELNET_TIMEOUT)
57 # logger.debug(output)
58 if output.find(expected) == -1:
59 raise ExceptionNotFound, "'%s' not found" % expected
61 telnet.write(buffer + "\r\n")
64 # PCU has model, host, preferred-port, user, passwd,
66 # This is an object derived directly form the PLCAPI DB fields
68 def __init__(self, plc_pcu_dict):
69 for field in ['username', 'password', 'site_id',
72 'node_ids', 'ports', ]:
73 if field in plc_pcu_dict:
74 self.__setattr__(field, plc_pcu_dict[field])
76 raise Exception("No such field %s in PCU object" % field)
78 # These are the convenience functions build around the PCU object.
80 def __init__(self, plc_pcu_dict):
81 PCU.__init__(self, plc_pcu_dict)
82 self.host = self.pcu_name()
85 if self.hostname is not None and self.hostname is not "":
87 elif self.ip is not None and self.ip is not "":
92 def nodeidToPort(self, node_id):
93 if node_id in self.node_ids:
94 for i in range(0, len(self.node_ids)):
95 if node_id == self.node_ids[i]:
98 raise Exception("No such Node ID: %d" % node_id)
100 # This class captures the observed pcu records from FindBadPCUs.py
102 def __init__(self, pcu_record_dict):
103 for field in ['nodenames', 'portstatus',
106 if field in pcu_record_dict:
107 if field == "reboot":
108 self.__setattr__("reboot_str", pcu_record_dict[field])
110 self.__setattr__(field, pcu_record_dict[field])
112 raise Exception("No such field %s in pcu record dict" % field)
122 def __init__(self, type, verbose):
124 self.verbose = verbose
125 self.transport = None
127 def open(self, host, username=None, password=None, prompt="User Name"):
130 if self.type == self.TELNET:
131 transport = telnetlib.Telnet(host, timeout=self.TELNET_TIMEOUT)
132 transport.set_debuglevel(self.verbose)
133 if username is not None:
134 self.transport = transport
135 self.ifThenSend(prompt, username, ExceptionUsername)
137 elif self.type == self.SSH:
138 if username is not None:
139 transport = pyssh.Ssh(username, host)
140 transport.set_debuglevel(self.verbose)
142 # TODO: have an ssh set_debuglevel() also...
144 raise Exception("Username cannot be None for ssh transport.")
145 elif self.type == self.HTTP:
146 self.url = "http://%s:%d/" % (host,80)
147 uri = "%s:%d" % (host,80)
150 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
151 authinfo.add_password (None, uri, username, password)
152 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
154 transport = urllib2.build_opener(authhandler)
157 raise Exception("Unknown transport type: %s" % self.type)
159 self.transport = transport
163 if self.type == self.TELNET:
164 self.transport.close()
165 elif self.type == self.SSH:
166 self.transport.close()
167 elif self.type == self.HTTP:
170 raise Exception("Unknown transport type %s" % self.type)
171 self.transport = None
173 def sendHTTP(self, resource, data):
175 print "POSTing '%s' to %s" % (data,self.url + resource)
178 f = self.transport.open(self.url + resource ,data)
183 except urllib2.URLError,err:
184 logger.info('Could not open http connection', err)
185 return "http transport error"
189 def sendPassword(self, password, prompt=None):
190 if self.type == self.TELNET:
192 self.ifThenSend("Password", password, ExceptionPassword)
194 self.ifThenSend(prompt, password, ExceptionPassword)
195 elif self.type == self.SSH:
196 self.ifThenSend("password:", password, ExceptionPassword)
197 elif self.type == self.HTTP:
200 raise Exception("Unknown transport type: %s" % self.type)
202 def ifThenSend(self, expected, buffer, ErrorClass=ExceptionPrompt):
204 if self.transport != None:
205 output = self.transport.read_until(expected, self.TELNET_TIMEOUT)
206 if output.find(expected) == -1:
207 print "OUTPUT: --%s--" % output
208 raise ErrorClass, "'%s' not found" % expected
210 self.transport.write(buffer + "\r\n")
212 raise ExceptionNoTransport("transport object is type None")
214 def ifElse(self, expected, ErrorClass):
216 self.transport.read_until(expected, self.TELNET_TIMEOUT)
218 raise ErrorClass("Could not find '%s' within timeout" % expected)
221 class PCUControl(Transport,PCUModel,PCURecord):
225 def __init__(self, plc_pcu_record, verbose, supported_ports=[]):
226 PCUModel.__init__(self, plc_pcu_record)
227 PCURecord.__init__(self, plc_pcu_record)
230 if '22' in supported_ports and self.portstatus['22'] == "open":
232 elif '23' in supported_ports and self.portstatus['23'] == "open":
233 type = Transport.TELNET
234 elif '80' in supported_ports and self.portstatus['80'] == "open":
235 type = Transport.HTTP
236 elif '443' in supported_ports and self.portstatus['443'] == "open":
237 type = Transport.HTTP
238 elif '5869' in supported_ports and self.portstatus['5869'] == "open":
239 # For DRAC cards. Racadm opens this port.
240 type = Transport.HTTP
241 elif '9100' in supported_ports and self.portstatus['9100'] == "open":
242 type = Transport.IPAL
243 elif '16992' in supported_ports and self.portstatus['16992'] == "open":
244 type = Transport.HTTP
246 raise ExceptionPort("Unsupported Port: No transport from open ports")
248 raise Exception("No Portstatus: No transport because no open ports")
249 Transport.__init__(self, type, verbose)
251 def run(self, node_port, dryrun):
252 """ This function is to be defined by the specific PCU instance. """
255 def reboot(self, node_port, dryrun):
257 return self.run(node_port, dryrun)
258 except ExceptionNotFound, err:
259 return "error: " + str(err)
260 except ExceptionPassword, err:
261 return "password exception: " + str(err)
262 except ExceptionTimeout, err:
263 return "timeout exception: " + str(err)
264 except ExceptionUsername, err:
265 return "exception: no username prompt: " + str(err)
266 except ExceptionSequence, err:
267 return "sequence error: " + str(err)
268 except ExceptionPrompt, err:
269 return "prompt exception: " + str(err)
270 except ExceptionPort, err:
271 return "no ports exception: " + str(err)
272 except socket.error, err:
273 return "socket error: timeout: " + str(err)
274 except EOFError, err:
276 logger.debug("reboot: EOF")
278 self.transport.close()
280 traceback.print_exc()
281 return "EOF connection reset" + str(err)
283 class IPMI(PCUControl):
285 supported_ports = [80,443,623]
287 # TODO: get exit codes to determine success or failure...
288 def run(self, node_port, dryrun):
291 cmd = "ipmitool -I lanplus -H %s -U %s -P '%s' power cycle"
292 p = os.popen(cmd % ( self.host, self.username, self.password) )
294 print "RESULT: ", result
296 cmd = "ipmitool -I lanplus -H %s -U %s -P '%s' user list"
297 p = os.popen(cmd % ( self.host, self.username, self.password) )
299 print "RESULT: ", result
301 if "Error" in result:
306 class IPAL(PCUControl):
308 This now uses a proprietary format for communicating with the PCU. I
309 prefer it to Telnet, and Web access, since it's much lighter weight
310 and, more importantly, IT WORKS!! HHAHHHAHAHAHAHAHA!
312 supported_ports = [23,80,9100]
314 def format_msg(self, data, cmd):
315 esc = chr(int('1b',16))
316 return "%c%s%c%s%c" % (esc, self.password, esc, data, cmd) # esc, 'q', chr(4))
318 def recv_noblock(self, s, count):
322 # TODO: make sleep backoff, before stopping.
324 ret = s.recv(count, socket.MSG_DONTWAIT)
325 except socket.error, e:
326 if e[0] == errno.EAGAIN:
327 raise Exception(e[1])
329 # TODO: not other exceptions.
333 def run(self, node_port, dryrun):
334 if self.type == Transport.IPAL:
335 return self.run_ipal(node_port, dryrun)
336 elif self.type == Transport.TELNET:
337 return self.run_telnet(node_port, dryrun)
339 raise Exception("Unimplemented Transport for IPAL")
341 def run_telnet(self, node_port, dryrun):
342 # TELNET version of protocol...
344 ## XXX Some iPals require you to hit Enter a few times first
345 self.ifThenSend("Password >", "\r\n\r\n", ExceptionNotFound)
347 self.ifThenSend("Password >", self.password, ExceptionPassword)
348 self.transport.write("\r\n\r\n")
349 if not dryrun: # P# - Pulse relay
350 print "node_port %s" % node_port
351 self.ifThenSend("Enter >",
354 print "send newlines"
355 self.transport.write("\r\n\r\n")
356 print "after new lines"
357 # Get the next prompt
358 print "wait for enter"
359 self.ifElse("Enter >", ExceptionTimeout)
364 def run_ipal(self, node_port, dryrun):
370 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
373 s.connect((self.host, 9100))
374 except socket.error, e:
376 if e[0] == errno.ECONNREFUSED:
377 # cannot connect to remote host
378 raise Exception(e[1])
380 # TODO: what other conditions are there?
384 print "Checking status"
385 s.send(self.format_msg("", 'O'))
386 ret = self.recv_noblock(s, 8)
387 print "Current status is '%s'" % ret
390 raise Exception("Status returned 'another session already open' %s : %s" % (node_port, ret))
392 if node_port < len(ret):
393 status = ret[node_port]
401 raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret))
403 raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret))
408 print "Pulsing %s" % node_port
409 s.send(self.format_msg("%s" % node_port, 'P'))
411 # NOTE: turn power on ; do not pulse the port.
412 print "Power was off, so turning on ..."
413 s.send(self.format_msg("%s" % node_port, 'E'))
414 #s.send(self.format_msg("%s" % node_port, 'P'))
416 print "Receiving response."
417 ret = self.recv_noblock(s, 8)
418 print "Current status is '%s'" % ret
420 if node_port < len(ret):
421 status = ret[node_port]
429 raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret))
431 raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret))
436 return "Failed Power On"
442 class APCEurope(PCUControl):
443 def run(self, node_port, dryrun):
444 self.open(self.host, self.username)
445 self.sendPassword(self.password)
447 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
448 self.ifThenSend("\r\n> ", "2")
449 self.ifThenSend("\r\n> ", str(node_port))
450 # 3- Immediate Reboot
451 self.ifThenSend("\r\n> ", "3")
454 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
458 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
459 "", ExceptionSequence)
460 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
465 class APCBrazil(PCUControl):
466 def run(self, node_port, dryrun):
467 self.open(self.host, self.username)
468 self.sendPassword(self.password)
470 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
471 self.ifThenSend("\r\n> ", str(node_port))
472 # 4- Immediate Reboot
473 self.ifThenSend("\r\n> ", "4")
476 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
480 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
481 "", ExceptionSequence)
482 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
487 class APCBerlin(PCUControl):
488 def run(self, node_port, dryrun):
489 self.open(self.host, self.username)
490 self.sendPassword(self.password)
492 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
493 self.ifThenSend("\r\n> ", "2")
494 self.ifThenSend("\r\n> ", "1")
495 self.ifThenSend("\r\n> ", str(node_port))
496 # 3- Immediate Reboot
497 self.ifThenSend("\r\n> ", "3")
500 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
504 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
505 "", ExceptionSequence)
506 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
511 class APCFolsom(PCUControl):
512 def run(self, node_port, dryrun):
513 self.open(self.host, self.username)
514 self.sendPassword(self.password)
516 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
517 self.ifThenSend("\r\n> ", "2")
518 self.ifThenSend("\r\n> ", "1")
519 self.ifThenSend("\r\n> ", str(node_port))
520 self.ifThenSend("\r\n> ", "1")
522 # 3- Immediate Reboot
523 self.ifThenSend("\r\n> ", "3")
526 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
530 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
531 "", ExceptionSequence)
532 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
537 class APCMaster(PCUControl):
538 supported_ports = [22,23]
539 def run(self, node_port, dryrun):
540 print "Rebooting %s" % self.host
541 self.open(self.host, self.username)
542 self.sendPassword(self.password)
545 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
546 # 3- Outlet Control/Config
547 self.ifThenSend("\r\n> ", "3")
549 self.ifThenSend("\r\n> ", str(node_port))
551 self.ifThenSend("\r\n> ", "1")
552 # 3- Immediate Reboot
553 self.ifThenSend("\r\n> ", "3")
556 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
560 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
561 "", ExceptionSequence)
562 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
567 class APC(PCUControl):
568 def __init__(self, plc_pcu_record, verbose):
569 PCUControl.__init__(self, plc_pcu_record, verbose)
571 self.master = APCMaster(plc_pcu_record, verbose)
572 self.folsom = APCFolsom(plc_pcu_record, verbose)
573 self.europe = APCEurope(plc_pcu_record, verbose)
575 def run(self, node_port, dryrun):
579 for pcu in [self.master, self.europe, self.folsom]:
582 print "-*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"
585 time.sleep(sleep_time)
586 ret = pcu.reboot(node_port, dryrun)
587 except ExceptionSequence, err:
593 return "Unknown reboot sequence for APC PCU"
597 class IntelAMT(PCUControl):
598 supported_ports = [16992]
600 def run(self, node_port, dryrun):
603 # TODO: need to make this path universal; not relative to pwd.
604 cmd_str = "pcucontrol/models/intelamt/remoteControl"
607 # NOTE: -p checks the power state of the host.
608 # TODO: parse the output to find out if it's ok or not.
609 cmd_str += " -p http://%s:16992/RemoteControlService -user admin -pass '%s' " % (self.host, self.password )
611 cmd_str += " -A http://%s:16992/RemoteControlService -user admin -pass '%s' " % (self.host, self.password )
614 return cmd.system(cmd_str, self.TELNET_TIMEOUT)
616 class DRACRacAdm(PCUControl):
617 def run(self, node_port, dryrun):
619 print "trying racadm_reboot..."
620 racadm_reboot(self.host, self.username, self.password, node_port, dryrun)
624 class DRAC(PCUControl):
625 supported_ports = [22,443,5869]
626 def run(self, node_port, dryrun):
627 self.open(self.host, self.username)
628 self.sendPassword(self.password)
630 print "logging in..."
631 self.transport.write("\r\n")
634 self.ifThenSend("[%s]#" % self.username, "getsysinfo")
637 self.ifThenSend("[%s]#" % self.username, "serveraction powercycle")
639 self.ifThenSend("[%s]#" % self.username, "exit")
644 class HPiLO(PCUControl):
645 supported_ports = [22,443]
646 def run(self, node_port, dryrun):
647 self.open(self.host, self.username)
648 self.sendPassword(self.password)
651 self.ifThenSend("</>hpiLO->", "cd system1")
653 # Reboot Outlet N (Y/N)?
655 self.ifThenSend("</system1>hpiLO->", "POWER")
658 self.ifThenSend("</system1>hpiLO->", "reset")
660 self.ifThenSend("</system1>hpiLO->", "exit")
666 class HPiLOHttps(PCUControl):
667 supported_ports = [22,443]
668 def run(self, node_port, dryrun):
670 locfg = command.CMD()
671 cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
672 self.host, "iloxml/Get_Network.xml",
673 self.username, self.password)
674 sout, serr = locfg.run_noexcept(cmd)
676 if sout.strip() != "":
677 print "sout: %s" % sout.strip()
681 locfg = command.CMD()
682 cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
683 self.host, "iloxml/Reset_Server.xml",
684 self.username, self.password)
685 sout, serr = locfg.run_noexcept(cmd)
687 if sout.strip() != "":
688 print "sout: %s" % sout.strip()
692 class BayTechAU(PCUControl):
693 def run(self, node_port, dryrun):
694 self.open(self.host, self.username, None, "Enter user name:")
695 self.sendPassword(self.password, "Enter Password:")
697 #self.ifThenSend("RPC-16>", "Status")
698 self.ifThenSend("RPC3-NC>", "Reboot %d" % node_port)
700 # Reboot Outlet N (Y/N)?
702 self.ifThenSend("(Y/N)?", "N")
704 self.ifThenSend("(Y/N)?", "Y")
705 self.ifThenSend("RPC3-NC>", "")
710 class BayTechGeorgeTown(PCUControl):
711 def run(self, node_port, dryrun):
712 self.open(self.host, self.username, None, "Enter user name:")
713 self.sendPassword(self.password, "Enter Password:")
715 #self.ifThenSend("RPC-16>", "Status")
717 self.ifThenSend("RPC-16>", "Reboot %d" % node_port)
719 # Reboot Outlet N (Y/N)?
721 self.ifThenSend("(Y/N)?", "N")
723 self.ifThenSend("(Y/N)?", "Y")
724 self.ifThenSend("RPC-16>", "")
729 class BayTechCtrlCUnibe(PCUControl):
731 For some reason, these units let you log in fine, but they hang
732 indefinitely, unless you send a Ctrl-C after the password. No idea
735 def run(self, node_port, dryrun):
736 print "BayTechCtrlC %s" % self.host
738 ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
740 if not s.login(self.host, self.username, self.password, ssh_options):
741 raise ExceptionPassword("Invalid Password")
742 # Otherwise, the login succeeded.
744 # Send a ctrl-c to the remote process.
745 print "sending ctrl-c"
748 # Control Outlets (5 ,1).........5
750 #index = s.expect("Enter Request")
751 index = s.expect(["Enter Request :"])
756 index = s.expect(["DS-RPC>", "Enter user name:"])
758 s.send(self.username + "\r\n")
759 index = s.expect(["DS-RPC>"])
762 print "Reboot %d" % node_port
763 s.send("Reboot %d\r\n" % node_port)
766 index = s.expect(["\(Y/N\)\?", "Port in use", "DS-RPC>"])
775 raise ExceptionPrompt("PCU Reported 'Port in use.'")
777 raise ExceptionSequence("Issued command 'Reboot' failed.")
780 index = s.expect(["DS-RPC>"])
781 #print "got prompt back"
786 raise ExceptionPrompt("EOF before expected Prompt")
787 except pexpect.TIMEOUT:
788 raise ExceptionPrompt("Timeout before expected Prompt")
792 class BayTechCtrlC(PCUControl):
794 For some reason, these units let you log in fine, but they hang
795 indefinitely, unless you send a Ctrl-C after the password. No idea
798 def run(self, node_port, dryrun):
799 print "BayTechCtrlC %s" % self.host
801 ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
803 if not s.login(self.host, self.username, self.password, ssh_options):
804 raise ExceptionPassword("Invalid Password")
805 # Otherwise, the login succeeded.
807 # Send a ctrl-c to the remote process.
808 print "SENDING ctrl-c"
811 # Control Outlets (5 ,1).........5
813 print "EXPECTING: ", "Enter Request :"
814 index = s.expect(["Enter Request :"])
819 print "EXPECTING: ", "DS-RPC>"
820 index = s.expect(["DS-RPC>", "Enter user name:", "Port in use."])
822 print "sending username"
823 s.send(self.username + "\r\n")
824 index = s.expect(["DS-RPC>"])
826 raise ExceptionPrompt("PCU Reported 'Port in use.'")
829 print "SENDING: Reboot %d" % node_port
830 s.send("Reboot %d\r\n" % node_port)
834 print "EXPECTING: ", "Y/N?"
835 index = s.expect(["\(Y/N\)\?", "Port in use", "DS-RPC>"])
844 raise ExceptionPrompt("PCU Reported 'Port in use.'")
846 raise ExceptionSequence("Issued command 'Reboot' failed.")
848 # NOTE: for some reason, the script times out with the
849 # following line. In manual tests, it works correctly, but
850 # with automated tests, evidently it fails.
853 #print "TOTAL--", s.allstr, "--EOT"
854 index = s.expect(["DS-RPC>"])
855 print "got prompt back"
860 raise ExceptionPrompt("EOF before 'Enter Request' Prompt")
861 except pexpect.TIMEOUT:
862 raise ExceptionPrompt("Timeout before Prompt")
866 class BayTech(PCUControl):
867 supported_ports = [22,23]
868 def run(self, node_port, dryrun):
869 self.open(self.host, self.username)
870 self.sendPassword(self.password)
872 # Control Outlets (5 ,1).........5
873 self.ifThenSend("Enter Request :", "5")
877 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port, ExceptionNotFound)
878 except ExceptionNotFound, msg:
879 # one machine is configured to ask for a username,
880 # even after login...
881 print "msg: %s" % msg
882 self.transport.write(self.username + "\r\n")
884 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
886 # Reboot Outlet N (Y/N)?
888 self.ifThenSend("(Y/N)?", "N")
890 self.ifThenSend("(Y/N)?", "Y")
892 self.ifThenSend("DS-RPC>", "")
897 class WTIIPS4(PCUControl):
898 supported_ports = [23]
899 def run(self, node_port, dryrun):
901 self.sendPassword(self.password, "Enter Password:")
903 self.ifThenSend("IPS> ", "/Boot %s" % node_port)
905 self.ifThenSend("Sure? (Y/N): ", "N")
907 self.ifThenSend("Sure? (Y/N): ", "Y")
909 self.ifThenSend("IPS> ", "")
914 class ePowerSwitchGood(PCUControl):
916 # The old code used Python's HTTPPasswordMgrWithDefaultRealm()
917 # For some reason this both doesn't work and in some cases, actually
918 # hangs the PCU. Definitely not what we want.
920 # The code below is much simpler. Just letting things fail first,
921 # and then, trying again with authentication string in the header.
923 def run(self, node_port, dryrun):
924 self.transport = None
925 self.url = "http://%s:%d/" % (self.host,80)
926 uri = "%s:%d" % (self.host,80)
928 req = urllib2.Request(self.url)
930 handle = urllib2.urlopen(req)
932 # NOTE: this is expected to fail initially
939 return "ERROR: not protected by HTTP authentication"
941 if not hasattr(e, 'code') or e.code != 401:
942 return "ERROR: failed for: %s" % str(e)
944 base64data = base64.encodestring("%s:%s" % (self.username, self.password))[:-1]
945 # NOTE: assuming basic realm authentication.
946 authheader = "Basic %s" % base64data
947 req.add_header("Authorization", authheader)
950 f = urllib2.urlopen(req)
952 # failing here means the User/passwd is wrong (hopefully)
953 raise ExceptionPassword("Incorrect username/password")
955 # NOTE: after verifying that the user/password is correct,
956 # actually reboot the given node.
959 data = urllib.urlencode({'P%d' % node_port : "r"})
960 req = urllib2.Request(self.url + "cmd.html")
961 req.add_header("Authorization", authheader)
962 # add data to handler,
963 f = urllib2.urlopen(req, data)
964 if self.verbose: print f.read()
966 import traceback; traceback.print_exc()
968 # fetch url one more time on cmd.html, econtrol.html or whatever.
971 if self.verbose: print f.read()
976 class CustomPCU(PCUControl):
977 def run(self, node_port, dryrun):
978 url = "https://www-itec.uni-klu.ac.at/plab-pcu/index.php"
981 # Turn host off, then on
982 formstr = "plab%s=off" % node_port
983 os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url))
985 formstr = "plab%s=on" % node_port
986 os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url))
988 os.system("curl --user %s:%s --insecure %s" % (self.username, self.password, url))
991 class ePowerSwitchOld(PCUControl):
992 def run(self, node_port, dryrun):
993 self.url = "http://%s:%d/" % (self.host,80)
994 uri = "%s:%d" % (self.host,80)
997 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
998 authinfo.add_password (None, uri, self.username, self.password)
999 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
1001 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
1002 transport = urllib2.build_opener(authinfo)
1003 f = transport.open(self.url)
1004 if self.verbose: print f.read()
1007 transport = urllib2.build_opener(authhandler)
1008 f = transport.open(self.url + "cmd.html", "P%d=r" % node_port)
1009 if self.verbose: print f.read()
1014 class ePowerSwitch(PCUControl):
1015 supported_ports = [80]
1016 def run(self, node_port, dryrun):
1017 self.url = "http://%s:%d/" % (self.host,80)
1018 uri = "%s:%d" % (self.host,80)
1020 # TODO: I'm still not sure what the deal is here.
1021 # two independent calls appear to need to be made before the
1022 # reboot will succeed. It doesn't seem to be possible to do
1023 # this with a single call. I have no idea why.
1026 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
1027 authinfo.add_password (None, uri, self.username, self.password)
1028 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
1030 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
1031 transport = urllib2.build_opener()
1032 f = transport.open(self.url + "elogin.html", "pwd=%s" % self.password)
1033 if self.verbose: print f.read()
1036 transport = urllib2.build_opener(authhandler)
1037 f = transport.open(self.url + "econtrol.html", "P%d=r" % node_port)
1038 if self.verbose: print f.read()
1040 # data= "P%d=r" % node_port
1041 #self.open(self.host, self.username, self.password)
1042 #self.sendHTTP("elogin.html", "pwd=%s" % self.password)
1043 #self.sendHTTP("econtrol.html", data)
1044 #self.sendHTTP("cmd.html", data)
1049 class ManualPCU(PCUControl):
1050 supported_ports = [22,23,80,443,9100,16992]
1052 def run(self, node_port, dryrun):
1054 # TODO: send email message to monitor admin requesting manual
1055 # intervention. This should always be an option for ridiculous,
1059 ### rebooting european BlackBox PSE boxes
1060 # Thierry Parmentelat - May 11 2005
1061 # tested on 4-ports models known as PSE505-FR
1062 # uses http to POST a data 'P<port>=r'
1063 # relies on basic authentication within http1.0
1064 # first curl-based script was
1065 # curl --http1.0 --basic --user <username>:<password> --data P<port>=r \
1066 # http://<hostname>:<http_port>/cmd.html && echo OK
1071 class BlackBoxPSMaverick(PCUControl):
1072 supported_ports = [80]
1074 def run(self, node_port, dryrun):
1076 # send reboot signal.
1077 cmd = "curl -s --data 'P%s=r' --anyauth --user '%s:%s' http://%s/config/home_f.html" % ( node_port, self.username, self.password, self.host)
1079 # else, just try to log in
1080 cmd = "curl -s --anyauth --user '%s:%s' http://%s/config/home_f.html" % ( self.username, self.password, self.host)
1084 print "RESULT: ", result
1086 if len(result.split()) > 3:
1091 def bbpse_reboot (pcu_ip,username,password,port_in_pcu,http_port, dryrun):
1095 url = "http://%s:%d/cmd.html" % (pcu_ip,http_port)
1096 data= "P%d=r" % port_in_pcu
1098 logger.debug("POSTing '%s' on %s" % (data,url))
1100 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
1101 uri = "%s:%d" % (pcu_ip,http_port)
1102 authinfo.add_password (None, uri, username, password)
1103 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
1105 opener = urllib2.build_opener(authhandler)
1106 urllib2.install_opener(opener)
1112 f = urllib2.urlopen(url,data)
1119 except urllib2.URLError,err:
1120 logger.info('Could not open http connection', err)
1121 return "bbpse error"
1123 ### rebooting x10toggle based systems addressed by port
1124 # Marc E. Fiuczynski - May 31 2005
1125 # tested on 4-ports models known as PSE505-FR
1126 # uses ssh and password to login to an account
1127 # that will cause the system to be powercycled.
1129 def x10toggle_reboot(ip, username, password, port, dryrun):
1134 ssh = pyssh.Ssh(username, ip)
1138 telnet_answer(ssh, "password:", password)
1142 telnet_answer(ssh, "x10toggle>", "A%d" % port)
1145 output = ssh.close()
1147 logger.debug(output)
1150 except Exception, err:
1154 output = ssh.close()
1156 logger.debug(output)
1157 return errno.ETIMEDOUT
1159 ### rebooting Dell systems via RAC card
1160 # Marc E. Fiuczynski - June 01 2005
1161 # tested with David Lowenthal's itchy/scratchy nodes at UGA
1164 def runcmd(command, args, username, password, timeout = None):
1167 result_ready = threading.Condition()
1171 result_ready.acquire()
1175 result_ready.notify()
1176 result_ready.release()
1178 def do_command(command, username, password):
1181 # Popen4 is a popen-type class that combines stdout and stderr
1182 p = popen2.Popen4(command)
1184 # read all output data
1185 p.tochild.write("%s\n" % username)
1186 p.tochild.write("%s\n" % password)
1188 data = p.fromchild.read()
1191 # might get interrupted by a signal in poll() or waitpid()
1194 set_result((retval, data))
1197 if ex.errno == errno.EINTR:
1200 except Exception, ex:
1204 command = " ".join([command] + args)
1206 worker = threading.Thread(target = do_command, args = (command, username, password, ))
1207 worker.setDaemon(True)
1208 result_ready.acquire()
1210 result_ready.wait(timeout)
1212 if result == [None]:
1213 raise Exception, "command timed-out: '%s'" % command
1215 result_ready.release()
1218 if isinstance(result, Exception):
1221 (retval, data) = result
1222 if os.WIFEXITED(retval) and os.WEXITSTATUS(retval) == 0:
1225 out = "system command ('%s') " % command
1226 if os.WIFEXITED(retval):
1227 out += "failed, rc = %d" % os.WEXITSTATUS(retval)
1229 out += "killed by signal %d" % os.WTERMSIG(retval)
1231 out += "; output follows:\n" + data
1232 raise Exception, out
1234 def racadm_reboot(host, username, password, port, dryrun):
1237 ip = socket.gethostbyname(host)
1239 cmd = "/usr/sbin/racadm"
1242 output = runcmd(cmd, ["-r %s -i serveraction powercycle" % ip],
1245 output = runcmd(cmd, ["-r %s -i getsysinfo" % ip],
1248 print "RUNCMD: %s" % output
1250 logger.debug(output)
1253 except Exception, err:
1254 logger.debug("runcmd raised exception %s" % err)
1260 if pcu['hostname'] is not None and pcu['hostname'] is not "":
1261 return pcu['hostname']
1262 elif pcu['ip'] is not None and pcu['ip'] is not "":
1268 from monitor import database
1271 def get_pcu_values(pcu_id):
1274 # this shouldn't be loaded each time...
1275 fb = database.dbLoad("findbadpcus")
1278 values = fb['nodes']["id_%s" % pcu_id]['values']
1284 def reboot(nodename):
1285 return reboot_policy(nodename, True, False)
1287 def reboot_policy(nodename, continue_probe, dryrun):
1290 pcu = plc.getpcu(nodename)
1292 logger.debug("no pcu for %s" % hostname)
1293 print "no pcu for %s" % hostname
1294 return False # "%s has no pcu" % nodename
1296 values = get_pcu_values(pcu['pcu_id'])
1298 logger.debug("No values for pcu probe %s" % hostname)
1299 print "No values for pcu probe %s" % hostname
1300 return False #"no info for pcu_id %s" % pcu['pcu_id']
1303 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
1305 ret = reboot_test(nodename, values, continue_probe, verbose, dryrun)
1314 class Unknown(PCUControl):
1315 supported_ports = [22,23,80,443,5869,9100,16992]
1317 def model_to_object(modelname):
1318 if "AMT" in modelname:
1320 elif "DS4-RPC" in modelname:
1322 elif "ilo2" in modelname:
1324 elif "IP-41x" in modelname:
1326 elif "AP79xx" in modelname or "Masterswitch" in modelname:
1328 elif "DRAC" in modelname:
1330 elif "WTI" in modelname:
1332 elif "ePowerSwitch" in modelname:
1334 elif "ipmi" in modelname:
1336 elif "bbsemaverick" in modelname:
1337 return BlackBoxPSMaverick
1341 def reboot_test(nodename, values, continue_probe, verbose, dryrun):
1343 if 'plc_pcu_stats' in values:
1344 values.update(values['plc_pcu_stats'])
1347 # DataProbe iPal (many sites)
1348 if continue_probe and values['model'].find("IP-41x_IP-81x") >= 0:
1349 ipal = IPAL(values, verbose, ['23', '80', '9100'])
1350 rb_ret = ipal.reboot(values[nodename], dryrun)
1352 # APC Masterswitch (Berkeley)
1353 elif continue_probe and ( values['model'].find("AP79xx") >= 0 or \
1354 values['model'].find("Masterswitch") >= 0 ):
1357 # TODO: make a more robust version of APC
1358 if values['pcu_id'] in [1102,1163,1055,1111,1231,1113,1127,1128,1148]:
1359 apc = APCEurope(values, verbose, ['22', '23'])
1360 rb_ret = apc.reboot(values[nodename], dryrun)
1362 elif values['pcu_id'] in [1110,86]:
1363 apc = APCBrazil(values, verbose, ['22', '23'])
1364 rb_ret = apc.reboot(values[nodename], dryrun)
1366 elif values['pcu_id'] in [1221,1225,1220,1192]:
1367 apc = APCBerlin(values, verbose, ['22', '23'])
1368 rb_ret = apc.reboot(values[nodename], dryrun)
1370 elif values['pcu_id'] in [1173,1240,47,1363,1405,1401,1372,1371]:
1371 apc = APCFolsom(values, verbose, ['22', '23'])
1372 rb_ret = apc.reboot(values[nodename], dryrun)
1375 apc = APCMaster(values, verbose, ['22', '23'])
1376 rb_ret = apc.reboot(values[nodename], dryrun)
1379 elif continue_probe and values['model'].find("DS4-RPC") >= 0:
1380 if values['pcu_id'] in [1056,1237,1052,1209,1002,1008,1041,1013,1022]:
1381 # These require a 'ctrl-c' to be sent...
1382 baytech = BayTechCtrlC(values, verbose, ['22', '23'])
1383 rb_ret = baytech.reboot(values[nodename], dryrun)
1385 elif values['pcu_id'] in [93]:
1386 baytech = BayTechAU(values, verbose, ['22', '23'])
1387 rb_ret = baytech.reboot(values[nodename], dryrun)
1389 elif values['pcu_id'] in [1057]:
1390 # These require a 'ctrl-c' to be sent...
1391 baytech = BayTechCtrlCUnibe(values, verbose, ['22', '23'])
1392 rb_ret = baytech.reboot(values[nodename], dryrun)
1394 elif values['pcu_id'] in [1012]:
1395 # This pcu sometimes doesn't present the 'Username' prompt,
1396 # unless you immediately try again...
1398 baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1399 rb_ret = baytech.reboot(values[nodename], dryrun)
1401 baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1402 rb_ret = baytech.reboot(values[nodename], dryrun)
1404 baytech = BayTech(values, verbose, ['22', '23'])
1405 rb_ret = baytech.reboot(values[nodename], dryrun)
1408 elif continue_probe and values['model'].find("ilo") >= 0:
1410 hpilo = HPiLO(values, verbose, ['22'])
1411 rb_ret = hpilo.reboot(0, dryrun)
1413 hpilo = HPiLOHttps(values, verbose, ['443'])
1414 rb_ret = hpilo.reboot(0, dryrun)
1416 hpilo = HPiLOHttps(values, verbose, ['443'])
1417 rb_ret = hpilo.reboot(0, dryrun)
1420 elif continue_probe and values['model'].find("DRAC") >= 0:
1421 # TODO: I don't think DRACRacAdm will throw an exception for the
1422 # default method to catch...
1424 drac = DRACRacAdm(values, verbose, ['443', '5869'])
1425 rb_ret = drac.reboot(0, dryrun)
1427 drac = DRAC(values, verbose, ['22'])
1428 rb_ret = drac.reboot(0, dryrun)
1430 elif continue_probe and values['model'].find("WTI IPS-4") >= 0:
1431 wti = WTIIPS4(values, verbose, ['23'])
1432 rb_ret = wti.reboot(values[nodename], dryrun)
1434 elif continue_probe and values['model'].find("AMT") >= 0:
1435 amt = IntelAMT(values, verbose, ['16992'])
1436 rb_ret = amt.reboot(values[nodename], dryrun)
1438 elif continue_probe and values['model'].find("bbsemaverick") >=0:
1439 print "TRYING BlackBoxPSMaverick"
1440 bbe = BlackBoxPSMaverick(values, verbose, ['80'])
1441 rb_ret = bbe.reboot(values[nodename], dryrun)
1443 elif continue_probe and values['model'].find("ipmi") >=0:
1446 ipmi = IPMI(values, verbose, ['80', '443', '623'])
1447 rb_ret = ipmi.reboot(values[nodename], dryrun)
1449 elif continue_probe and values['model'].find("ePowerSwitch") >=0:
1450 # TODO: allow a different port than http 80.
1451 if values['pcu_id'] in [1089, 1071, 1046, 1035, 1118]:
1452 eps = ePowerSwitchGood(values, verbose, ['80'])
1453 elif values['pcu_id'] in [1003]:
1456 eps = ePowerSwitch(values, verbose, ['80'])
1458 eps = ePowerSwitchGood(values, verbose, ['80'])
1460 rb_ret = eps.reboot(values[nodename], dryrun)
1461 elif continue_probe and values['pcu_id'] in [1122]:
1462 custom = CustomPCU(values, verbose, ['80', '443'])
1463 custom.reboot(values[nodename], dryrun)
1465 elif continue_probe:
1466 rb_ret = "Unsupported_PCU"
1468 elif continue_probe == False:
1469 if 'portstatus' in values:
1476 except ExceptionPort, err:
1481 #elif continue_probe and values['protocol'] == "racadm" and \
1482 # values['model'] == "RAC":
1483 # rb_ret = racadm_reboot(pcu_name(values),
1484 # values['username'],
1485 # values['password'],
1490 logger.setLevel(logging.DEBUG)
1491 ch = logging.StreamHandler()
1492 ch.setLevel(logging.DEBUG)
1493 formatter = logging.Formatter('LOGGER - %(message)s')
1494 ch.setFormatter(formatter)
1495 logger.addHandler(ch)
1498 if "test" in sys.argv:
1503 for node in sys.argv[1:]:
1504 if node == "test": continue
1506 print "Rebooting %s" % node
1507 if reboot_policy(node, True, dryrun):
1511 except Exception, err:
1512 import traceback; traceback.print_exc()
1515 if __name__ == '__main__':
1517 logger = logging.getLogger("monitor")