3 # Reboot specified nodes
9 import errno, time, traceback
12 import threading, popen2
14 from monitor.wrapper import plc
16 from subprocess import PIPE, Popen
17 import pcucontrol.transports.ssh.pxssh as pxssh
18 import pcucontrol.transports.ssh.pexpect as pexpect
20 from monitor.util import command
22 # Use our versions of telnetlib and pyssh
23 sys.path.insert(0, os.path.dirname(sys.argv[0]))
24 import pcucontrol.transports.telnetlib as telnetlib
25 sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")
26 import pcucontrol.transports.pyssh as pyssh
31 # Event class ID from pcu events
32 #NODE_POWER_CONTROL = 3
35 #MONITOR_USER_ID = 11142
38 logger = logging.getLogger("monitor")
42 class ExceptionNoTransport(Exception): pass
43 class ExceptionNotFound(Exception): pass
44 class ExceptionPassword(Exception): pass
45 class ExceptionTimeout(Exception): pass
46 class ExceptionPrompt(Exception): pass
47 class ExceptionSequence(Exception): pass
48 class ExceptionReset(Exception): pass
49 class ExceptionPort(Exception): pass
50 class ExceptionUsername(Exception): pass
52 def telnet_answer(telnet, expected, buffer):
55 output = telnet.read_until(expected, TELNET_TIMEOUT)
57 # logger.debug(output)
58 if output.find(expected) == -1:
59 raise ExceptionNotFound, "'%s' not found" % expected
61 telnet.write(buffer + "\r\n")
64 # PCU has model, host, preferred-port, user, passwd,
66 # This is an object derived directly form the PLCAPI DB fields
68 def __init__(self, plc_pcu_dict):
69 for field in ['username', 'password', 'site_id',
72 'node_ids', 'ports', ]:
73 if field in plc_pcu_dict:
74 self.__setattr__(field, plc_pcu_dict[field])
76 raise Exception("No such field %s in PCU object" % field)
78 # These are the convenience functions build around the PCU object.
80 def __init__(self, plc_pcu_dict):
81 PCU.__init__(self, plc_pcu_dict)
82 self.host = self.pcu_name()
85 if self.hostname is not None and self.hostname is not "":
87 elif self.ip is not None and self.ip is not "":
92 def nodeidToPort(self, node_id):
93 if node_id in self.node_ids:
94 for i in range(0, len(self.node_ids)):
95 if node_id == self.node_ids[i]:
98 raise Exception("No such Node ID: %d" % node_id)
100 # This class captures the observed pcu records from FindBadPCUs.py
102 def __init__(self, pcu_record_dict):
103 for field in ['nodenames', 'portstatus',
106 if field in pcu_record_dict:
107 if field == "reboot":
108 self.__setattr__("reboot_str", pcu_record_dict[field])
110 self.__setattr__(field, pcu_record_dict[field])
112 raise Exception("No such field %s in pcu record dict" % field)
122 def __init__(self, type, verbose):
124 self.verbose = verbose
125 self.transport = None
127 def open(self, host, username=None, password=None, prompt="User Name"):
130 if self.type == self.TELNET:
131 transport = telnetlib.Telnet(host, timeout=self.TELNET_TIMEOUT)
132 transport.set_debuglevel(self.verbose)
133 if username is not None:
134 self.transport = transport
135 self.ifThenSend(prompt, username, ExceptionUsername)
137 elif self.type == self.SSH:
138 if username is not None:
139 transport = pyssh.Ssh(username, host)
140 transport.set_debuglevel(self.verbose)
142 # TODO: have an ssh set_debuglevel() also...
144 raise Exception("Username cannot be None for ssh transport.")
145 elif self.type == self.HTTP:
146 self.url = "http://%s:%d/" % (host,80)
147 uri = "%s:%d" % (host,80)
150 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
151 authinfo.add_password (None, uri, username, password)
152 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
154 transport = urllib2.build_opener(authhandler)
157 raise Exception("Unknown transport type: %s" % self.type)
159 self.transport = transport
163 if self.type == self.TELNET:
164 self.transport.close()
165 elif self.type == self.SSH:
166 self.transport.close()
167 elif self.type == self.HTTP:
170 raise Exception("Unknown transport type %s" % self.type)
171 self.transport = None
173 def sendHTTP(self, resource, data):
175 print "POSTing '%s' to %s" % (data,self.url + resource)
178 f = self.transport.open(self.url + resource ,data)
183 except urllib2.URLError,err:
184 logger.info('Could not open http connection', err)
185 return "http transport error"
189 def sendPassword(self, password, prompt=None):
190 if self.type == self.TELNET:
192 self.ifThenSend("Password", password, ExceptionPassword)
194 self.ifThenSend(prompt, password, ExceptionPassword)
195 elif self.type == self.SSH:
196 self.ifThenSend("password:", password, ExceptionPassword)
197 elif self.type == self.HTTP:
200 raise Exception("Unknown transport type: %s" % self.type)
202 def ifThenSend(self, expected, buffer, ErrorClass=ExceptionPrompt):
204 if self.transport != None:
205 output = self.transport.read_until(expected, self.TELNET_TIMEOUT)
206 if output.find(expected) == -1:
207 print "OUTPUT: --%s--" % output
208 raise ErrorClass, "'%s' not found" % expected
210 self.transport.write(buffer + "\r\n")
212 raise ExceptionNoTransport("transport object is type None")
214 def ifElse(self, expected, ErrorClass):
216 self.transport.read_until(expected, self.TELNET_TIMEOUT)
218 raise ErrorClass("Could not find '%s' within timeout" % expected)
221 class PCUControl(Transport,PCUModel,PCURecord):
225 def __init__(self, plc_pcu_record, verbose, supported_ports=[]):
226 PCUModel.__init__(self, plc_pcu_record)
227 PCURecord.__init__(self, plc_pcu_record)
230 if '22' in supported_ports and self.portstatus['22'] == "open":
232 elif '23' in supported_ports and self.portstatus['23'] == "open":
233 type = Transport.TELNET
234 elif '80' in supported_ports and self.portstatus['80'] == "open":
235 type = Transport.HTTP
236 elif '443' in supported_ports and self.portstatus['443'] == "open":
237 type = Transport.HTTP
238 elif '5869' in supported_ports and self.portstatus['5869'] == "open":
239 # For DRAC cards. Racadm opens this port.
240 type = Transport.HTTP
241 elif '9100' in supported_ports and self.portstatus['9100'] == "open":
242 type = Transport.IPAL
243 elif '16992' in supported_ports and self.portstatus['16992'] == "open":
244 type = Transport.HTTP
246 raise ExceptionPort("Unsupported Port: No transport from open ports")
248 raise Exception("No Portstatus: No transport because no open ports")
249 Transport.__init__(self, type, verbose)
251 def run(self, node_port, dryrun):
252 """ This function is to be defined by the specific PCU instance. """
255 def reboot(self, node_port, dryrun):
257 return self.run(node_port, dryrun)
258 except ExceptionNotFound, err:
259 return "error: " + str(err)
260 except ExceptionPassword, err:
261 return "password exception: " + str(err)
262 except ExceptionTimeout, err:
263 return "timeout exception: " + str(err)
264 except ExceptionUsername, err:
265 return "exception: no username prompt: " + str(err)
266 except ExceptionSequence, err:
267 return "sequence error: " + str(err)
268 except ExceptionPrompt, err:
269 return "prompt exception: " + str(err)
270 except ExceptionPort, err:
271 return "no ports exception: " + str(err)
272 except socket.error, err:
273 return "socket error: timeout: " + str(err)
274 except EOFError, err:
276 logger.debug("reboot: EOF")
278 self.transport.close()
280 traceback.print_exc()
281 return "EOF connection reset" + str(err)
283 class IPMI(PCUControl):
285 supported_ports = [80,443,623]
287 # TODO: get exit codes to determine success or failure...
288 def run(self, node_port, dryrun):
291 cmd = "ipmitool -I lanplus -H %s -U %s -P '%s' power cycle"
292 p = os.popen(cmd % ( self.host, self.username, self.password) )
294 print "RESULT: ", result
296 cmd = "ipmitool -I lanplus -H %s -U %s -P '%s' user list"
297 p = os.popen(cmd % ( self.host, self.username, self.password) )
299 print "RESULT: ", result
301 if "Error" in result:
306 class IPAL(PCUControl):
308 This now uses a proprietary format for communicating with the PCU. I
309 prefer it to Telnet, and Web access, since it's much lighter weight
310 and, more importantly, IT WORKS!! HHAHHHAHAHAHAHAHA!
312 supported_ports = [23,80,9100]
314 def format_msg(self, data, cmd):
315 esc = chr(int('1b',16))
316 return "%c%s%c%s%c" % (esc, self.password, esc, data, cmd) # esc, 'q', chr(4))
318 def recv_noblock(self, s, count):
322 # TODO: make sleep backoff, before stopping.
324 ret = s.recv(count, socket.MSG_DONTWAIT)
325 except socket.error, e:
326 if e[0] == errno.EAGAIN:
327 raise Exception(e[1])
329 # TODO: not other exceptions.
333 def run(self, node_port, dryrun):
334 if self.type == Transport.IPAL:
335 return self.run_ipal(node_port, dryrun)
336 elif self.type == Transport.TELNET:
337 return self.run_telnet(node_port, dryrun)
339 raise Exception("Unimplemented Transport for IPAL")
341 def run_telnet(self, node_port, dryrun):
342 # TELNET version of protocol...
344 ## XXX Some iPals require you to hit Enter a few times first
345 self.ifThenSend("Password >", "\r\n\r\n", ExceptionNotFound)
347 self.ifThenSend("Password >", self.password, ExceptionPassword)
348 self.transport.write("\r\n\r\n")
349 if not dryrun: # P# - Pulse relay
350 print "node_port %s" % node_port
351 self.ifThenSend("Enter >",
354 print "send newlines"
355 self.transport.write("\r\n\r\n")
356 print "after new lines"
357 # Get the next prompt
358 print "wait for enter"
359 self.ifElse("Enter >", ExceptionTimeout)
364 def run_ipal(self, node_port, dryrun):
370 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
373 s.connect((self.host, 9100))
374 except socket.error, e:
376 if e[0] == errno.ECONNREFUSED:
377 # cannot connect to remote host
378 raise Exception(e[1])
380 # TODO: what other conditions are there?
384 print "Checking status"
385 s.send(self.format_msg("", 'O'))
386 ret = self.recv_noblock(s, 8)
387 print "Current status is '%s'" % ret
390 raise Exception("Status returned 'another session already open' %s : %s" % (node_port, ret))
392 if node_port < len(ret):
393 status = ret[node_port]
401 raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret))
403 raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret))
408 print "Pulsing %s" % node_port
409 s.send(self.format_msg("%s" % node_port, 'P'))
411 # NOTE: turn power on ; do not pulse the port.
412 print "Power was off, so turning on ..."
413 s.send(self.format_msg("%s" % node_port, 'E'))
414 #s.send(self.format_msg("%s" % node_port, 'P'))
416 print "Receiving response."
417 ret = self.recv_noblock(s, 8)
418 print "Current status is '%s'" % ret
420 if node_port < len(ret):
421 status = ret[node_port]
429 raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret))
431 raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret))
436 return "Failed Power On"
442 class APCEurope(PCUControl):
443 def run(self, node_port, dryrun):
444 self.open(self.host, self.username)
445 self.sendPassword(self.password)
447 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
448 self.ifThenSend("\r\n> ", "2")
449 self.ifThenSend("\r\n> ", str(node_port))
450 # 3- Immediate Reboot
451 self.ifThenSend("\r\n> ", "3")
454 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
458 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
459 "", ExceptionSequence)
460 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
465 class APCBrazil(PCUControl):
466 def run(self, node_port, dryrun):
467 self.open(self.host, self.username)
468 self.sendPassword(self.password)
470 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
471 self.ifThenSend("\r\n> ", str(node_port))
472 # 4- Immediate Reboot
473 self.ifThenSend("\r\n> ", "4")
476 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
480 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
481 "", ExceptionSequence)
482 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
487 class APCBerlin(PCUControl):
488 def run(self, node_port, dryrun):
489 self.open(self.host, self.username)
490 self.sendPassword(self.password)
492 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
493 self.ifThenSend("\r\n> ", "2")
494 self.ifThenSend("\r\n> ", "1")
495 self.ifThenSend("\r\n> ", str(node_port))
496 # 3- Immediate Reboot
497 self.ifThenSend("\r\n> ", "3")
500 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
504 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
505 "", ExceptionSequence)
506 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
511 class APCFolsom(PCUControl):
512 def run(self, node_port, dryrun):
513 self.open(self.host, self.username)
514 self.sendPassword(self.password)
516 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
517 self.ifThenSend("\r\n> ", "2")
518 self.ifThenSend("\r\n> ", "1")
519 self.ifThenSend("\r\n> ", str(node_port))
520 self.ifThenSend("\r\n> ", "1")
522 # 3- Immediate Reboot
523 self.ifThenSend("\r\n> ", "3")
526 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
530 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
531 "", ExceptionSequence)
532 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
537 class APCMaster(PCUControl):
538 supported_ports = [22,23]
539 def run(self, node_port, dryrun):
540 print "Rebooting %s" % self.host
541 self.open(self.host, self.username)
542 self.sendPassword(self.password)
545 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
546 # 3- Outlet Control/Config
547 self.ifThenSend("\r\n> ", "3")
549 self.ifThenSend("\r\n> ", str(node_port))
551 self.ifThenSend("\r\n> ", "1")
552 # 3- Immediate Reboot
553 self.ifThenSend("\r\n> ", "3")
556 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
560 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
561 "", ExceptionSequence)
562 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
567 class APC(PCUControl):
568 def __init__(self, plc_pcu_record, verbose):
569 PCUControl.__init__(self, plc_pcu_record, verbose)
571 self.master = APCMaster(plc_pcu_record, verbose)
572 self.folsom = APCFolsom(plc_pcu_record, verbose)
573 self.europe = APCEurope(plc_pcu_record, verbose)
575 def run(self, node_port, dryrun):
579 for pcu in [self.master, self.europe, self.folsom]:
582 print "-*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"
585 time.sleep(sleep_time)
586 ret = pcu.reboot(node_port, dryrun)
587 except ExceptionSequence, err:
593 return "Unknown reboot sequence for APC PCU"
597 class IntelAMT(PCUControl):
598 supported_ports = [16992]
600 def run(self, node_port, dryrun):
603 # TODO: need to make this path universal; not relative to pwd.
604 cmd_str = "pcucontrol/models/intelamt/remoteControl"
607 # NOTE: -p checks the power state of the host.
608 # TODO: parse the output to find out if it's ok or not.
609 cmd_str += " -p http://%s:16992/RemoteControlService -user admin -pass '%s' " % (self.host, self.password )
611 cmd_str += " -A http://%s:16992/RemoteControlService -user admin -pass '%s' " % (self.host, self.password )
614 return cmd.system(cmd_str, self.TELNET_TIMEOUT)
616 class DRACRacAdm(PCUControl):
617 def run(self, node_port, dryrun):
619 print "trying racadm_reboot..."
620 racadm_reboot(self.host, self.username, self.password, node_port, dryrun)
624 class DRAC(PCUControl):
625 supported_ports = [22,443,5869]
626 def run(self, node_port, dryrun):
627 self.open(self.host, self.username)
628 self.sendPassword(self.password)
630 print "logging in..."
631 self.transport.write("\r\n")
634 self.ifThenSend("[%s]#" % self.username, "getsysinfo")
637 self.ifThenSend("[%s]#" % self.username, "serveraction powercycle")
639 self.ifThenSend("[%s]#" % self.username, "exit")
644 class HPiLO(PCUControl):
645 supported_ports = [22,443]
646 def run(self, node_port, dryrun):
647 self.open(self.host, self.username)
648 self.sendPassword(self.password)
651 self.ifThenSend("</>hpiLO->", "cd system1")
653 # Reboot Outlet N (Y/N)?
655 self.ifThenSend("</system1>hpiLO->", "POWER")
658 self.ifThenSend("</system1>hpiLO->", "reset")
660 self.ifThenSend("</system1>hpiLO->", "exit")
666 class HPiLOHttps(PCUControl):
667 supported_ports = [22,443]
668 def run(self, node_port, dryrun):
670 locfg = command.CMD()
671 cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
672 self.host, "iloxml/Get_Network.xml",
673 self.username, self.password)
674 sout, serr = locfg.run_noexcept(cmd)
676 if sout.strip() != "":
677 print "sout: %s" % sout.strip()
681 locfg = command.CMD()
682 cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
683 self.host, "iloxml/Reset_Server.xml",
684 self.username, self.password)
685 sout, serr = locfg.run_noexcept(cmd)
687 if sout.strip() != "":
688 print "sout: %s" % sout.strip()
692 class BayTechAU(PCUControl):
693 def run(self, node_port, dryrun):
694 self.open(self.host, self.username, None, "Enter user name:")
695 self.sendPassword(self.password, "Enter Password:")
697 #self.ifThenSend("RPC-16>", "Status")
698 self.ifThenSend("RPC3-NC>", "Reboot %d" % node_port)
700 # Reboot Outlet N (Y/N)?
702 self.ifThenSend("(Y/N)?", "N")
704 self.ifThenSend("(Y/N)?", "Y")
705 self.ifThenSend("RPC3-NC>", "")
710 class BayTechGeorgeTown(PCUControl):
711 def run(self, node_port, dryrun):
712 self.open(self.host, self.username, None, "Enter user name:")
713 self.sendPassword(self.password, "Enter Password:")
715 #self.ifThenSend("RPC-16>", "Status")
717 self.ifThenSend("RPC-16>", "Reboot %d" % node_port)
719 # Reboot Outlet N (Y/N)?
721 self.ifThenSend("(Y/N)?", "N")
723 self.ifThenSend("(Y/N)?", "Y")
724 self.ifThenSend("RPC-16>", "")
729 class BayTechCtrlCUnibe(PCUControl):
731 For some reason, these units let you log in fine, but they hang
732 indefinitely, unless you send a Ctrl-C after the password. No idea
735 def run(self, node_port, dryrun):
736 print "BayTechCtrlC %s" % self.host
738 ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
740 if not s.login(self.host, self.username, self.password, ssh_options):
741 raise ExceptionPassword("Invalid Password")
742 # Otherwise, the login succeeded.
744 # Send a ctrl-c to the remote process.
745 print "sending ctrl-c"
748 # Control Outlets (5 ,1).........5
750 #index = s.expect("Enter Request")
751 index = s.expect(["Enter Request :"])
756 index = s.expect(["DS-RPC>", "Enter user name:"])
758 s.send(self.username + "\r\n")
759 index = s.expect(["DS-RPC>"])
762 print "Reboot %d" % node_port
763 s.send("Reboot %d\r\n" % node_port)
766 index = s.expect(["\(Y/N\)\?", "Port in use", "DS-RPC>"])
775 raise ExceptionPrompt("PCU Reported 'Port in use.'")
777 raise ExceptionSequence("Issued command 'Reboot' failed.")
780 index = s.expect(["DS-RPC>"])
781 #print "got prompt back"
786 raise ExceptionPrompt("EOF before expected Prompt")
787 except pexpect.TIMEOUT:
788 raise ExceptionPrompt("Timeout before expected Prompt")
792 class BayTechCtrlC(PCUControl):
794 For some reason, these units let you log in fine, but they hang
795 indefinitely, unless you send a Ctrl-C after the password. No idea
798 def run(self, node_port, dryrun):
799 print "BayTechCtrlC %s" % self.host
801 ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
803 if not s.login(self.host, self.username, self.password, ssh_options):
804 raise ExceptionPassword("Invalid Password")
805 # Otherwise, the login succeeded.
807 # Send a ctrl-c to the remote process.
808 print "SENDING ctrl-c"
811 # Control Outlets (5 ,1).........5
813 print "EXPECTING: ", "Enter Request :"
814 index = s.expect(["Enter Request :"])
819 print "EXPECTING: ", "DS-RPC>"
820 index = s.expect(["DS-RPC>", "Enter user name:", "Port in use."])
822 print "sending username"
823 s.send(self.username + "\r\n")
824 index = s.expect(["DS-RPC>"])
826 raise ExceptionPrompt("PCU Reported 'Port in use.'")
829 print "SENDING: Reboot %d" % node_port
830 s.send("Reboot %d\r\n" % node_port)
834 print "EXPECTING: ", "Y/N?"
835 index = s.expect(["\(Y/N\)\?", "Port in use", "DS-RPC>"])
844 raise ExceptionPrompt("PCU Reported 'Port in use.'")
846 raise ExceptionSequence("Issued command 'Reboot' failed.")
848 # NOTE: for some reason, the script times out with the
849 # following line. In manual tests, it works correctly, but
850 # with automated tests, evidently it fails.
853 #print "TOTAL--", s.allstr, "--EOT"
854 index = s.expect(["DS-RPC>"])
855 print "got prompt back"
860 raise ExceptionPrompt("EOF before 'Enter Request' Prompt")
861 except pexpect.TIMEOUT:
862 raise ExceptionPrompt("Timeout before Prompt")
866 class BayTech(PCUControl):
867 supported_ports = [22,23]
868 def run(self, node_port, dryrun):
869 self.open(self.host, self.username)
870 self.sendPassword(self.password)
872 # Control Outlets (5 ,1).........5
873 self.ifThenSend("Enter Request :", "5")
877 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port, ExceptionNotFound)
878 except ExceptionNotFound, msg:
879 # one machine is configured to ask for a username,
880 # even after login...
881 print "msg: %s" % msg
882 self.transport.write(self.username + "\r\n")
884 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
886 # Reboot Outlet N (Y/N)?
888 self.ifThenSend("(Y/N)?", "N")
890 self.ifThenSend("(Y/N)?", "Y")
892 self.ifThenSend("DS-RPC>", "")
897 class WTIIPS4(PCUControl):
898 supported_ports = [23]
899 def run(self, node_port, dryrun):
901 self.sendPassword(self.password, "Enter Password:")
903 self.ifThenSend("IPS> ", "/Boot %s" % node_port)
905 self.ifThenSend("Sure? (Y/N): ", "N")
907 self.ifThenSend("Sure? (Y/N): ", "Y")
909 self.ifThenSend("IPS> ", "")
914 class ePowerSwitchGood(PCUControl):
916 # The old code used Python's HTTPPasswordMgrWithDefaultRealm()
917 # For some reason this both doesn't work and in some cases, actually
918 # hangs the PCU. Definitely not what we want.
920 # The code below is much simpler. Just letting things fail first,
921 # and then, trying again with authentication string in the header.
923 def run(self, node_port, dryrun):
924 self.transport = None
925 self.url = "http://%s:%d/" % (self.host,80)
926 uri = "%s:%d" % (self.host,80)
928 req = urllib2.Request(self.url)
930 handle = urllib2.urlopen(req)
932 # NOTE: this is expected to fail initially
939 return "ERROR: not protected by HTTP authentication"
941 if not hasattr(e, 'code') or e.code != 401:
942 return "ERROR: failed for: %s" % str(e)
944 base64data = base64.encodestring("%s:%s" % (self.username, self.password))[:-1]
945 # NOTE: assuming basic realm authentication.
946 authheader = "Basic %s" % base64data
947 req.add_header("Authorization", authheader)
950 f = urllib2.urlopen(req)
952 # failing here means the User/passwd is wrong (hopefully)
953 raise ExceptionPassword("Incorrect username/password")
955 # NOTE: after verifying that the user/password is correct,
956 # actually reboot the given node.
959 data = urllib.urlencode({'P%d' % node_port : "r"})
960 req = urllib2.Request(self.url + "cmd.html")
961 req.add_header("Authorization", authheader)
962 # add data to handler,
963 f = urllib2.urlopen(req, data)
964 if self.verbose: print f.read()
966 import traceback; traceback.print_exc()
968 # fetch url one more time on cmd.html, econtrol.html or whatever.
971 if self.verbose: print f.read()
976 class CustomPCU(PCUControl):
977 def run(self, node_port, dryrun):
978 url = "https://www-itec.uni-klu.ac.at/plab-pcu/index.php"
981 # Turn host off, then on
982 formstr = "plab%s=off" % node_port
983 os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url))
985 formstr = "plab%s=on" % node_port
986 os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url))
988 os.system("curl --user %s:%s --insecure %s" % (self.username, self.password, url))
991 class ePowerSwitchOld(PCUControl):
992 def run(self, node_port, dryrun):
993 self.url = "http://%s:%d/" % (self.host,80)
994 uri = "%s:%d" % (self.host,80)
997 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
998 authinfo.add_password (None, uri, self.username, self.password)
999 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
1001 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
1002 transport = urllib2.build_opener(authinfo)
1003 f = transport.open(self.url)
1004 if self.verbose: print f.read()
1007 transport = urllib2.build_opener(authhandler)
1008 f = transport.open(self.url + "cmd.html", "P%d=r" % node_port)
1009 if self.verbose: print f.read()
1014 class ePowerSwitch(PCUControl):
1015 supported_ports = [80]
1016 def run(self, node_port, dryrun):
1017 self.url = "http://%s:%d/" % (self.host,80)
1018 uri = "%s:%d" % (self.host,80)
1020 # TODO: I'm still not sure what the deal is here.
1021 # two independent calls appear to need to be made before the
1022 # reboot will succeed. It doesn't seem to be possible to do
1023 # this with a single call. I have no idea why.
1026 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
1027 authinfo.add_password (None, uri, self.username, self.password)
1028 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
1030 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
1031 transport = urllib2.build_opener()
1032 f = transport.open(self.url + "elogin.html", "pwd=%s" % self.password)
1033 if self.verbose: print f.read()
1036 transport = urllib2.build_opener(authhandler)
1037 f = transport.open(self.url + "econtrol.html", "P%d=r" % node_port)
1038 if self.verbose: print f.read()
1040 # data= "P%d=r" % node_port
1041 #self.open(self.host, self.username, self.password)
1042 #self.sendHTTP("elogin.html", "pwd=%s" % self.password)
1043 #self.sendHTTP("econtrol.html", data)
1044 #self.sendHTTP("cmd.html", data)
1049 class ManualPCU(PCUControl):
1050 supported_ports = [22,23,80,443,9100,16992]
1052 def run(self, node_port, dryrun):
1054 # TODO: send email message to monitor admin requesting manual
1055 # intervention. This should always be an option for ridiculous,
1060 ### rebooting european BlackBox PSE boxes
1061 # Thierry Parmentelat - May 11 2005
1062 # tested on 4-ports models known as PSE505-FR
1063 # uses http to POST a data 'P<port>=r'
1064 # relies on basic authentication within http1.0
1065 # first curl-based script was
1066 # curl --http1.0 --basic --user <username>:<password> --data P<port>=r \
1067 # http://<hostname>:<http_port>/cmd.html && echo OK
1072 class BlackBoxPSMaverick(PCUControl):
1073 supported_ports = [80]
1075 def run(self, node_port, dryrun):
1077 # send reboot signal.
1078 cmd = "curl -s --data 'P%s=r' --anyauth --user '%s:%s' http://%s/config/home_f.html" % ( node_port, self.username, self.password, self.host)
1080 # else, just try to log in
1081 cmd = "curl -s --anyauth --user '%s:%s' http://%s/config/home_f.html" % ( self.username, self.password, self.host)
1085 print "RESULT: ", result
1087 if len(result.split()) > 3:
1092 def bbpse_reboot (pcu_ip,username,password,port_in_pcu,http_port, dryrun):
1096 url = "http://%s:%d/cmd.html" % (pcu_ip,http_port)
1097 data= "P%d=r" % port_in_pcu
1099 logger.debug("POSTing '%s' on %s" % (data,url))
1101 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
1102 uri = "%s:%d" % (pcu_ip,http_port)
1103 authinfo.add_password (None, uri, username, password)
1104 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
1106 opener = urllib2.build_opener(authhandler)
1107 urllib2.install_opener(opener)
1113 f = urllib2.urlopen(url,data)
1120 except urllib2.URLError,err:
1121 logger.info('Could not open http connection', err)
1122 return "bbpse error"
1124 ### rebooting x10toggle based systems addressed by port
1125 # Marc E. Fiuczynski - May 31 2005
1126 # tested on 4-ports models known as PSE505-FR
1127 # uses ssh and password to login to an account
1128 # that will cause the system to be powercycled.
1130 def x10toggle_reboot(ip, username, password, port, dryrun):
1135 ssh = pyssh.Ssh(username, ip)
1139 telnet_answer(ssh, "password:", password)
1143 telnet_answer(ssh, "x10toggle>", "A%d" % port)
1146 output = ssh.close()
1148 logger.debug(output)
1151 except Exception, err:
1155 output = ssh.close()
1157 logger.debug(output)
1158 return errno.ETIMEDOUT
1160 ### rebooting Dell systems via RAC card
1161 # Marc E. Fiuczynski - June 01 2005
1162 # tested with David Lowenthal's itchy/scratchy nodes at UGA
1165 def runcmd(command, args, username, password, timeout = None):
1168 result_ready = threading.Condition()
1172 result_ready.acquire()
1176 result_ready.notify()
1177 result_ready.release()
1179 def do_command(command, username, password):
1182 # Popen4 is a popen-type class that combines stdout and stderr
1183 p = popen2.Popen4(command)
1185 # read all output data
1186 p.tochild.write("%s\n" % username)
1187 p.tochild.write("%s\n" % password)
1189 data = p.fromchild.read()
1192 # might get interrupted by a signal in poll() or waitpid()
1195 set_result((retval, data))
1198 if ex.errno == errno.EINTR:
1201 except Exception, ex:
1205 command = " ".join([command] + args)
1207 worker = threading.Thread(target = do_command, args = (command, username, password, ))
1208 worker.setDaemon(True)
1209 result_ready.acquire()
1211 result_ready.wait(timeout)
1213 if result == [None]:
1214 raise Exception, "command timed-out: '%s'" % command
1216 result_ready.release()
1219 if isinstance(result, Exception):
1222 (retval, data) = result
1223 if os.WIFEXITED(retval) and os.WEXITSTATUS(retval) == 0:
1226 out = "system command ('%s') " % command
1227 if os.WIFEXITED(retval):
1228 out += "failed, rc = %d" % os.WEXITSTATUS(retval)
1230 out += "killed by signal %d" % os.WTERMSIG(retval)
1232 out += "; output follows:\n" + data
1233 raise Exception, out
1235 def racadm_reboot(host, username, password, port, dryrun):
1238 ip = socket.gethostbyname(host)
1240 cmd = "/usr/sbin/racadm"
1243 output = runcmd(cmd, ["-r %s -i serveraction powercycle" % ip],
1246 output = runcmd(cmd, ["-r %s -i getsysinfo" % ip],
1249 print "RUNCMD: %s" % output
1251 logger.debug(output)
1254 except Exception, err:
1255 logger.debug("runcmd raised exception %s" % err)
1261 if pcu['hostname'] is not None and pcu['hostname'] is not "":
1262 return pcu['hostname']
1263 elif pcu['ip'] is not None and pcu['ip'] is not "":
1269 from monitor import database
1272 def get_pcu_values(pcu_id):
1275 # this shouldn't be loaded each time...
1276 fb = database.dbLoad("findbadpcus")
1279 values = fb['nodes']["id_%s" % pcu_id]['values']
1285 def reboot(nodename):
1286 return reboot_policy(nodename, True, False)
1288 def reboot_policy(nodename, continue_probe, dryrun):
1291 pcu = plc.getpcu(nodename)
1293 logger.debug("no pcu for %s" % hostname)
1294 print "no pcu for %s" % hostname
1295 return False # "%s has no pcu" % nodename
1297 values = get_pcu_values(pcu['pcu_id'])
1299 logger.debug("No values for pcu probe %s" % hostname)
1300 print "No values for pcu probe %s" % hostname
1301 return False #"no info for pcu_id %s" % pcu['pcu_id']
1304 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
1306 ret = reboot_test(nodename, values, continue_probe, verbose, dryrun)
1315 class Unknown(PCUControl):
1316 supported_ports = [22,23,80,443,5869,9100,16992]
1318 def model_to_object(modelname):
1319 if "AMT" in modelname:
1321 elif "DS4-RPC" in modelname:
1323 elif "ilo2" in modelname:
1325 elif "IP-41x" in modelname:
1327 elif "AP79xx" in modelname or "Masterswitch" in modelname:
1329 elif "DRAC" in modelname:
1331 elif "WTI" in modelname:
1333 elif "ePowerSwitch" in modelname:
1335 elif "ipmi" in modelname:
1337 elif "bbsemaverick" in modelname:
1338 return BlackBoxPSMaverick
1342 def reboot_test(nodename, values, continue_probe, verbose, dryrun):
1344 if 'plc_pcu_stats' in values:
1345 values.update(values['plc_pcu_stats'])
1348 # DataProbe iPal (many sites)
1349 if continue_probe and values['model'].find("IP-41x_IP-81x") >= 0:
1350 ipal = IPAL(values, verbose, ['23', '80', '9100'])
1351 rb_ret = ipal.reboot(values[nodename], dryrun)
1353 # APC Masterswitch (Berkeley)
1354 elif continue_probe and ( values['model'].find("AP79xx") >= 0 or \
1355 values['model'].find("Masterswitch") >= 0 ):
1358 # TODO: make a more robust version of APC
1359 if values['pcu_id'] in [1102,1163,1055,1111,1231,1113,1127,1128,1148]:
1360 apc = APCEurope(values, verbose, ['22', '23'])
1361 rb_ret = apc.reboot(values[nodename], dryrun)
1363 elif values['pcu_id'] in [1110,86]:
1364 apc = APCBrazil(values, verbose, ['22', '23'])
1365 rb_ret = apc.reboot(values[nodename], dryrun)
1367 elif values['pcu_id'] in [1221,1225,1220,1192]:
1368 apc = APCBerlin(values, verbose, ['22', '23'])
1369 rb_ret = apc.reboot(values[nodename], dryrun)
1371 elif values['pcu_id'] in [1173,1240,47,1363,1405,1401,1372,1371]:
1372 apc = APCFolsom(values, verbose, ['22', '23'])
1373 rb_ret = apc.reboot(values[nodename], dryrun)
1376 apc = APCMaster(values, verbose, ['22', '23'])
1377 rb_ret = apc.reboot(values[nodename], dryrun)
1380 elif continue_probe and values['model'].find("DS4-RPC") >= 0:
1381 if values['pcu_id'] in [1056,1237,1052,1209,1002,1008,1041,1013,1022]:
1382 # These require a 'ctrl-c' to be sent...
1383 baytech = BayTechCtrlC(values, verbose, ['22', '23'])
1384 rb_ret = baytech.reboot(values[nodename], dryrun)
1386 elif values['pcu_id'] in [93]:
1387 baytech = BayTechAU(values, verbose, ['22', '23'])
1388 rb_ret = baytech.reboot(values[nodename], dryrun)
1390 elif values['pcu_id'] in [1057]:
1391 # These require a 'ctrl-c' to be sent...
1392 baytech = BayTechCtrlCUnibe(values, verbose, ['22', '23'])
1393 rb_ret = baytech.reboot(values[nodename], dryrun)
1395 elif values['pcu_id'] in [1012]:
1396 # This pcu sometimes doesn't present the 'Username' prompt,
1397 # unless you immediately try again...
1399 baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1400 rb_ret = baytech.reboot(values[nodename], dryrun)
1402 baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1403 rb_ret = baytech.reboot(values[nodename], dryrun)
1405 baytech = BayTech(values, verbose, ['22', '23'])
1406 rb_ret = baytech.reboot(values[nodename], dryrun)
1409 elif continue_probe and values['model'].find("ilo") >= 0:
1411 hpilo = HPiLO(values, verbose, ['22'])
1412 rb_ret = hpilo.reboot(0, dryrun)
1414 hpilo = HPiLOHttps(values, verbose, ['443'])
1415 rb_ret = hpilo.reboot(0, dryrun)
1417 hpilo = HPiLOHttps(values, verbose, ['443'])
1418 rb_ret = hpilo.reboot(0, dryrun)
1421 elif continue_probe and values['model'].find("DRAC") >= 0:
1422 # TODO: I don't think DRACRacAdm will throw an exception for the
1423 # default method to catch...
1425 drac = DRACRacAdm(values, verbose, ['443', '5869'])
1426 rb_ret = drac.reboot(0, dryrun)
1428 drac = DRAC(values, verbose, ['22'])
1429 rb_ret = drac.reboot(0, dryrun)
1431 elif continue_probe and values['model'].find("WTI IPS-4") >= 0:
1432 wti = WTIIPS4(values, verbose, ['23'])
1433 rb_ret = wti.reboot(values[nodename], dryrun)
1435 elif continue_probe and values['model'].find("AMT") >= 0:
1436 amt = IntelAMT(values, verbose, ['16992'])
1437 rb_ret = amt.reboot(values[nodename], dryrun)
1439 elif continue_probe and values['model'].find("bbsemaverick") >=0:
1440 print "TRYING BlackBoxPSMaverick"
1441 bbe = BlackBoxPSMaverick(values, verbose, ['80'])
1442 rb_ret = bbe.reboot(values[nodename], dryrun)
1444 elif continue_probe and values['model'].find("ipmi") >=0:
1447 ipmi = IPMI(values, verbose, ['80', '443', '623'])
1448 rb_ret = ipmi.reboot(values[nodename], dryrun)
1450 elif continue_probe and values['model'].find("ePowerSwitch") >=0:
1451 # TODO: allow a different port than http 80.
1452 if values['pcu_id'] in [1089, 1071, 1046, 1035, 1118]:
1453 eps = ePowerSwitchGood(values, verbose, ['80'])
1454 elif values['pcu_id'] in [1003]:
1457 eps = ePowerSwitch(values, verbose, ['80'])
1459 eps = ePowerSwitchGood(values, verbose, ['80'])
1461 rb_ret = eps.reboot(values[nodename], dryrun)
1462 elif continue_probe and values['pcu_id'] in [1122]:
1463 custom = CustomPCU(values, verbose, ['80', '443'])
1464 custom.reboot(values[nodename], dryrun)
1466 elif continue_probe:
1467 rb_ret = "Unsupported_PCU"
1469 elif continue_probe == False:
1470 if 'portstatus' in values:
1477 except ExceptionPort, err:
1482 #elif continue_probe and values['protocol'] == "racadm" and \
1483 # values['model'] == "RAC":
1484 # rb_ret = racadm_reboot(pcu_name(values),
1485 # values['username'],
1486 # values['password'],
1491 logger.setLevel(logging.DEBUG)
1492 ch = logging.StreamHandler()
1493 ch.setLevel(logging.DEBUG)
1494 formatter = logging.Formatter('LOGGER - %(message)s')
1495 ch.setFormatter(formatter)
1496 logger.addHandler(ch)
1499 if "test" in sys.argv:
1504 for node in sys.argv[1:]:
1505 if node == "test": continue
1507 print "Rebooting %s" % node
1508 if reboot_policy(node, True, dryrun):
1512 except Exception, err:
1513 import traceback; traceback.print_exc()
1516 if __name__ == '__main__':
1518 logger = logging.getLogger("monitor")