3 # Reboot specified nodes
9 import errno, time, traceback
12 import threading, popen2
16 from subprocess import PIPE, Popen
17 import ssh.pxssh as pxssh
18 import ssh.pexpect as pexpect
22 # Use our versions of telnetlib and pyssh
23 sys.path.insert(0, os.path.dirname(sys.argv[0]))
25 sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")
31 # Event class ID from pcu events
32 #NODE_POWER_CONTROL = 3
35 #MONITOR_USER_ID = 11142
38 logger = logging.getLogger("monitor")
42 class ExceptionNoTransport(Exception): pass
43 class ExceptionNotFound(Exception): pass
44 class ExceptionPassword(Exception): pass
45 class ExceptionTimeout(Exception): pass
46 class ExceptionPrompt(Exception): pass
47 class ExceptionSequence(Exception): pass
48 class ExceptionReset(Exception): pass
49 class ExceptionPort(Exception): pass
50 class ExceptionUsername(Exception): pass
52 def telnet_answer(telnet, expected, buffer):
55 output = telnet.read_until(expected, TELNET_TIMEOUT)
57 # logger.debug(output)
58 if output.find(expected) == -1:
59 raise ExceptionNotFound, "'%s' not found" % expected
61 telnet.write(buffer + "\r\n")
64 # PCU has model, host, preferred-port, user, passwd,
66 # This is an object derived directly form the PLCAPI DB fields
68 def __init__(self, plc_pcu_dict):
69 for field in ['username', 'password', 'site_id',
72 'node_ids', 'ports', ]:
73 if field in plc_pcu_dict:
74 self.__setattr__(field, plc_pcu_dict[field])
76 raise Exception("No such field %s in PCU object" % field)
78 # These are the convenience functions build around the PCU object.
80 def __init__(self, plc_pcu_dict):
81 PCU.__init__(self, plc_pcu_dict)
82 self.host = self.pcu_name()
85 if self.hostname is not None and self.hostname is not "":
87 elif self.ip is not None and self.ip is not "":
92 def nodeidToPort(self, node_id):
93 if node_id in self.node_ids:
94 for i in range(0, len(self.node_ids)):
95 if node_id == self.node_ids[i]:
98 raise Exception("No such Node ID: %d" % node_id)
100 # This class captures the observed pcu records from FindBadPCUs.py
102 def __init__(self, pcu_record_dict):
103 for field in ['nodenames', 'portstatus',
106 if field in pcu_record_dict:
107 if field == "reboot":
108 self.__setattr__("reboot_str", pcu_record_dict[field])
110 self.__setattr__(field, pcu_record_dict[field])
112 raise Exception("No such field %s in pcu record dict" % field)
122 def __init__(self, type, verbose):
124 self.verbose = verbose
125 self.transport = None
127 def open(self, host, username=None, password=None, prompt="User Name"):
130 if self.type == self.TELNET:
131 transport = telnetlib.Telnet(host, timeout=self.TELNET_TIMEOUT)
132 transport.set_debuglevel(self.verbose)
133 if username is not None:
134 self.transport = transport
135 self.ifThenSend(prompt, username, ExceptionUsername)
137 elif self.type == self.SSH:
138 if username is not None:
139 transport = pyssh.Ssh(username, host)
140 transport.set_debuglevel(self.verbose)
142 # TODO: have an ssh set_debuglevel() also...
144 raise Exception("Username cannot be None for ssh transport.")
145 elif self.type == self.HTTP:
146 self.url = "http://%s:%d/" % (host,80)
147 uri = "%s:%d" % (host,80)
150 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
151 authinfo.add_password (None, uri, username, password)
152 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
154 transport = urllib2.build_opener(authhandler)
157 raise Exception("Unknown transport type: %s" % self.type)
159 self.transport = transport
163 if self.type == self.TELNET:
164 self.transport.close()
165 elif self.type == self.SSH:
166 self.transport.close()
167 elif self.type == self.HTTP:
170 raise Exception("Unknown transport type %s" % self.type)
171 self.transport = None
173 def sendHTTP(self, resource, data):
175 print "POSTing '%s' to %s" % (data,self.url + resource)
178 f = self.transport.open(self.url + resource ,data)
183 except urllib2.URLError,err:
184 logger.info('Could not open http connection', err)
185 return "http transport error"
189 def sendPassword(self, password, prompt=None):
190 if self.type == self.TELNET:
192 self.ifThenSend("Password", password, ExceptionPassword)
194 self.ifThenSend(prompt, password, ExceptionPassword)
195 elif self.type == self.SSH:
196 self.ifThenSend("password:", password, ExceptionPassword)
197 elif self.type == self.HTTP:
200 raise Exception("Unknown transport type: %s" % self.type)
202 def ifThenSend(self, expected, buffer, ErrorClass=ExceptionPrompt):
204 if self.transport != None:
205 output = self.transport.read_until(expected, self.TELNET_TIMEOUT)
206 if output.find(expected) == -1:
207 raise ErrorClass, "'%s' not found" % expected
209 self.transport.write(buffer + "\r\n")
211 raise ExceptionNoTransport("transport object is type None")
213 def ifElse(self, expected, ErrorClass):
215 self.transport.read_until(expected, self.TELNET_TIMEOUT)
217 raise ErrorClass("Could not find '%s' within timeout" % expected)
220 class PCUControl(Transport,PCUModel,PCURecord):
221 def __init__(self, plc_pcu_record, verbose, supported_ports=[]):
222 PCUModel.__init__(self, plc_pcu_record)
223 PCURecord.__init__(self, plc_pcu_record)
226 if '22' in supported_ports and self.portstatus['22'] == "open":
228 elif '23' in supported_ports and self.portstatus['23'] == "open":
229 type = Transport.TELNET
230 elif '80' in supported_ports and self.portstatus['80'] == "open":
231 type = Transport.HTTP
232 elif '443' in supported_ports and self.portstatus['443'] == "open":
233 type = Transport.HTTP
234 elif '5869' in supported_ports and self.portstatus['5869'] == "open":
235 # For DRAC cards. Racadm opens this port.
236 type = Transport.HTTP
237 elif '9100' in supported_ports and self.portstatus['9100'] == "open":
238 type = Transport.IPAL
239 elif '16992' in supported_ports and self.portstatus['16992'] == "open":
240 type = Transport.HTTP
242 raise ExceptionPort("Unsupported Port: No transport from open ports")
244 raise Exception("No Portstatus: No transport because no open ports")
245 Transport.__init__(self, type, verbose)
247 def run(self, node_port, dryrun):
248 """ This function is to be defined by the specific PCU instance. """
251 def reboot(self, node_port, dryrun):
253 return self.run(node_port, dryrun)
254 except ExceptionNotFound, err:
255 return "error: " + str(err)
256 except ExceptionPassword, err:
257 return "password exception: " + str(err)
258 except ExceptionTimeout, err:
259 return "timeout exception: " + str(err)
260 except ExceptionUsername, err:
261 return "exception: no username prompt: " + str(err)
262 except ExceptionSequence, err:
263 return "sequence error: " + str(err)
264 except ExceptionPrompt, err:
265 return "prompt exception: " + str(err)
266 except ExceptionPort, err:
267 return "no ports exception: " + str(err)
268 except socket.error, err:
269 return "socket error: timeout: " + str(err)
270 except EOFError, err:
272 logger.debug("reboot: EOF")
274 self.transport.close()
276 traceback.print_exc()
277 return "EOF connection reset" + str(err)
279 from nodecommon import email_exception
281 raise Exception('unknown')
283 class IPAL(PCUControl):
285 This now uses a proprietary format for communicating with the PCU. I
286 prefer it to Telnet, and Web access, since it's much lighter weight
287 and, more importantly, IT WORKS!! HHAHHHAHAHAHAHAHA!
290 def format_msg(self, data, cmd):
291 esc = chr(int('1b',16))
292 return "%c%s%c%s%c" % (esc, self.password, esc, data, cmd) # esc, 'q', chr(4))
294 def recv_noblock(self, s, count):
298 # TODO: make sleep backoff, before stopping.
300 ret = s.recv(count, socket.MSG_DONTWAIT)
301 except socket.error, e:
302 if e[0] == errno.EAGAIN:
303 #raise Exception(e[1])
304 raise ExceptionNotFound(e[1])
306 # TODO: not other exceptions.
310 def run(self, node_port, dryrun):
316 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
319 s.connect((self.host, 9100))
320 except socket.error, e:
322 if e[0] == errno.ECONNREFUSED:
323 # cannot connect to remote host
324 raise Exception(e[1])
325 elif e[0] == errno.ETIMEDOUT:
326 raise ExceptionTimeout(e[1])
328 # TODO: what other conditions are there?
332 print "Checking status"
333 s.send(self.format_msg("", 'O'))
334 ret = self.recv_noblock(s, 8)
335 print "Current status is '%s'" % ret
338 raise Exception("Status returned 'another session already open' on %s %s : %s" % (self.host, node_port, ret))
341 if node_port < len(ret):
342 status = ret[node_port]
350 raise ExceptionPort("IPAL reported 'Cable Error' on %s socket %s : %s" % (self.host, node_port, ret))
352 raise Exception("Unknown status for PCU %s socket %s : %s" % (self.host, node_port, ret))
354 raise Exception("Mismatch between configured port and PCU %s status: %s %s" % (self.host, node_port, ret))
359 print "Pulsing %s" % node_port
360 s.send(self.format_msg("%s" % node_port, 'P'))
362 # NOTE: turn power on ; do not pulse the port.
363 print "Power was off, so turning on ..."
364 s.send(self.format_msg("%s" % node_port, 'E'))
365 #s.send(self.format_msg("%s" % node_port, 'P'))
367 print "Receiving response."
368 ret = self.recv_noblock(s, 8)
369 print "Current status is '%s'" % ret
371 if node_port < len(ret):
372 status = ret[node_port]
380 raise ExceptionPort("IPAL reported 'Cable Error' on %s socket %s : %s" % (self.host, node_port, ret))
382 raise Exception("Unknown status for PCU %s socket %s : %s" % (self.host, node_port, ret))
384 raise Exception("Mismatch between configured port and PCU %s status: %s %s" % (self.host, node_port, ret))
389 return "Failed Power On"
394 # TELNET version of protocol...
395 # #self.open(self.host)
396 # ## XXX Some iPals require you to hit Enter a few times first
397 # #self.ifThenSend("Password >", "\r\n\r\n", ExceptionNotFound)
399 # self.ifThenSend("Password >", self.password, ExceptionPassword)
400 # self.transport.write("\r\n\r\n")
401 # if not dryrun: # P# - Pulse relay
402 # print "node_port %s" % node_port
403 # self.ifThenSend("Enter >",
404 # "P7", # % node_port,
406 # print "send newlines"
407 # self.transport.write("\r\n\r\n")
408 # print "after new lines"
409 # # Get the next prompt
410 # print "wait for enter"
411 # self.ifElse("Enter >", ExceptionTimeout)
416 class APCEurope(PCUControl):
417 def run(self, node_port, dryrun):
418 self.open(self.host, self.username)
419 self.sendPassword(self.password)
421 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
422 self.ifThenSend("\r\n> ", "2")
423 self.ifThenSend("\r\n> ", str(node_port))
424 # 3- Immediate Reboot
425 self.ifThenSend("\r\n> ", "3")
428 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
432 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
433 "", ExceptionSequence)
434 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
439 class APCBrazil(PCUControl):
440 def run(self, node_port, dryrun):
441 self.open(self.host, self.username)
442 self.sendPassword(self.password)
444 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
445 self.ifThenSend("\r\n> ", str(node_port))
446 # 4- Immediate Reboot
447 self.ifThenSend("\r\n> ", "4")
450 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
454 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
455 "", ExceptionSequence)
456 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
461 class APCBerlin(PCUControl):
462 def run(self, node_port, dryrun):
463 self.open(self.host, self.username)
464 self.sendPassword(self.password)
466 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
467 self.ifThenSend("\r\n> ", "2")
468 self.ifThenSend("\r\n> ", "1")
469 self.ifThenSend("\r\n> ", str(node_port))
470 # 3- Immediate Reboot
471 self.ifThenSend("\r\n> ", "3")
474 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
478 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
479 "", ExceptionSequence)
480 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
485 class APCFolsom(PCUControl):
486 def run(self, node_port, dryrun):
487 self.open(self.host, self.username)
488 self.sendPassword(self.password)
490 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
491 self.ifThenSend("\r\n> ", "2")
492 self.ifThenSend("\r\n> ", "1")
493 self.ifThenSend("\r\n> ", str(node_port))
494 self.ifThenSend("\r\n> ", "1")
496 # 3- Immediate Reboot
497 self.ifThenSend("\r\n> ", "3")
500 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
504 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
505 "", ExceptionSequence)
506 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
511 class APCMaster(PCUControl):
512 def run(self, node_port, dryrun):
513 print "Rebooting %s" % self.host
514 self.open(self.host, self.username)
515 self.sendPassword(self.password)
518 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
519 # 3- Outlet Control/Config
520 self.ifThenSend("\r\n> ", "3")
522 self.ifThenSend("\r\n> ", str(node_port))
524 self.ifThenSend("\r\n> ", "1")
525 # 3- Immediate Reboot
526 self.ifThenSend("\r\n> ", "3")
529 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
533 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
534 "", ExceptionSequence)
535 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
540 class APC(PCUControl):
541 def __init__(self, plc_pcu_record, verbose):
542 PCUControl.__init__(self, plc_pcu_record, verbose)
544 self.master = APCMaster(plc_pcu_record, verbose)
545 self.folsom = APCFolsom(plc_pcu_record, verbose)
546 self.europe = APCEurope(plc_pcu_record, verbose)
548 def run(self, node_port, dryrun):
552 for pcu in [self.master, self.europe, self.folsom]:
555 print "-*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"
558 time.sleep(sleep_time)
559 ret = pcu.reboot(node_port, dryrun)
560 except ExceptionSequence, err:
566 return "Unknown reboot sequence for APC PCU"
570 class IntelAMT(PCUControl):
571 def run(self, node_port, dryrun):
573 cmd = moncommands.CMD()
574 #[cmd_str = "IntelAMTSDK/Samples/RemoteControl/remoteControl"
575 cmd_str = "cmdamt/remoteControl"
578 # NOTE: -p checks the power state of the host.
579 # TODO: parse the output to find out if it's ok or not.
580 cmd_str += " -p http://%s:16992/RemoteControlService -user admin -pass '%s' " % (self.host, self.password )
582 cmd_str += " -A http://%s:16992/RemoteControlService -user admin -pass '%s' " % (self.host, self.password )
585 return cmd.system(cmd_str, self.TELNET_TIMEOUT)
587 class DRACRacAdm(PCUControl):
588 def run(self, node_port, dryrun):
590 print "trying racadm_reboot..."
591 racadm_reboot(self.host, self.username, self.password, node_port, dryrun)
595 class DRAC(PCUControl):
596 def run(self, node_port, dryrun):
597 self.open(self.host, self.username)
598 self.sendPassword(self.password)
600 print "logging in..."
601 self.transport.write("\r\n")
604 self.ifThenSend("[%s]#" % self.username, "getsysinfo")
607 self.ifThenSend("[%s]#" % self.username, "serveraction powercycle")
609 self.ifThenSend("[%s]#" % self.username, "exit")
614 class HPiLO(PCUControl):
615 def run(self, node_port, dryrun):
616 self.open(self.host, self.username)
617 self.sendPassword(self.password)
620 self.ifThenSend("</>hpiLO->", "cd system1")
622 # Reboot Outlet N (Y/N)?
624 self.ifThenSend("</system1>hpiLO->", "POWER")
627 self.ifThenSend("</system1>hpiLO->", "reset")
629 self.ifThenSend("</system1>hpiLO->", "exit")
635 class HPiLOHttps(PCUControl):
636 def run(self, node_port, dryrun):
638 locfg = moncommands.CMD()
639 cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
640 self.host, "iloxml/Get_Network.xml",
641 self.username, self.password)
642 sout, serr = locfg.run_noexcept(cmd)
644 if sout.strip() != "":
645 print "sout: %s" % sout.strip()
649 locfg = moncommands.CMD()
650 cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
651 self.host, "iloxml/Reset_Server.xml",
652 self.username, self.password)
653 sout, serr = locfg.run_noexcept(cmd)
655 if sout.strip() != "":
656 print "sout: %s" % sout.strip()
660 class BayTechAU(PCUControl):
661 def run(self, node_port, dryrun):
662 self.open(self.host, self.username, None, "Enter user name:")
663 self.sendPassword(self.password, "Enter Password:")
665 #self.ifThenSend("RPC-16>", "Status")
666 self.ifThenSend("RPC3-NC>", "Reboot %d" % node_port)
668 # Reboot Outlet N (Y/N)?
670 self.ifThenSend("(Y/N)?", "N")
672 self.ifThenSend("(Y/N)?", "Y")
673 self.ifThenSend("RPC3-NC>", "")
678 class BayTechGeorgeTown(PCUControl):
679 def run(self, node_port, dryrun):
680 # this initial open/close is to prevent things from raising an
681 # exception. the pcu always is weird during the first connection, and
682 # even if it's not, what does it matter to open a second connection
684 self.open(self.host, self.username, None, "Enter user name:")
687 self.open(self.host, self.username, None, "Enter user name:")
688 self.sendPassword(self.password, "Enter Password:")
690 #self.ifThenSend("RPC-16>", "Status")
692 self.ifThenSend("RPC-16>", "Reboot %d" % node_port)
694 # Reboot Outlet N (Y/N)?
696 self.ifThenSend("(Y/N)?", "N")
698 self.ifThenSend("(Y/N)?", "Y")
699 self.ifThenSend("RPC-16>", "")
704 class BayTechCtrlCUnibe(PCUControl):
706 For some reason, these units let you log in fine, but they hang
707 indefinitely, unless you send a Ctrl-C after the password. No idea
710 def run(self, node_port, dryrun):
711 print "BayTechCtrlC %s" % self.host
713 ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
715 if not s.login(self.host, self.username, self.password, ssh_options):
716 raise ExceptionPassword("Invalid Password")
717 # Otherwise, the login succeeded.
719 # Send a ctrl-c to the remote process.
720 print "sending ctrl-c"
723 # Control Outlets (5 ,1).........5
725 #index = s.expect("Enter Request")
726 index = s.expect(["Enter Request :"])
731 index = s.expect(["DS-RPC>", "Enter user name:"])
733 s.send(self.username + "\r\n")
734 index = s.expect(["DS-RPC>"])
737 print "Reboot %d" % node_port
738 s.send("Reboot %d\r\n" % node_port)
741 index = s.expect(["\(Y/N\)\?", "Port in use", "DS-RPC>"])
750 raise ExceptionPrompt("PCU Reported 'Port in use.'")
752 raise ExceptionSequence("Issued command 'Reboot' failed.")
755 index = s.expect(["DS-RPC>"])
756 #print "got prompt back"
761 raise ExceptionPrompt("EOF before expected Prompt")
762 except pexpect.TIMEOUT:
763 raise ExceptionPrompt("Timeout before expected Prompt")
767 class BayTechCtrlC(PCUControl):
769 For some reason, these units let you log in fine, but they hang
770 indefinitely, unless you send a Ctrl-C after the password. No idea
773 def run(self, node_port, dryrun):
774 print "BayTechCtrlC %s" % self.host
776 ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
778 if not s.login(self.host, self.username, self.password, ssh_options):
779 raise ExceptionPassword("Invalid Password")
780 # Otherwise, the login succeeded.
782 # Send a ctrl-c to the remote process.
783 print "SENDING ctrl-c"
786 # Control Outlets (5 ,1).........5
788 print "EXPECTING: ", "Enter Request :"
789 index = s.expect(["Enter Request :"])
794 print "EXPECTING: ", "DS-RPC>"
795 index = s.expect(["DS-RPC>", "Enter user name:", "Port in use."])
797 print "sending username"
798 s.send(self.username + "\r\n")
799 index = s.expect(["DS-RPC>"])
801 raise ExceptionPrompt("PCU Reported 'Port in use.'")
804 print "SENDING: Reboot %d" % node_port
805 s.send("Reboot %d\r\n" % node_port)
809 print "EXPECTING: ", "Y/N?"
810 index = s.expect(["\(Y/N\)\?", "Port in use", "DS-RPC>"])
819 raise ExceptionPrompt("PCU Reported 'Port in use.'")
821 raise ExceptionSequence("Issued command 'Reboot' failed.")
823 # NOTE: for some reason, the script times out with the
824 # following line. In manual tests, it works correctly, but
825 # with automated tests, evidently it fails.
828 #print "TOTAL--", s.allstr, "--EOT"
829 index = s.expect(["DS-RPC>"])
830 print "got prompt back"
835 raise ExceptionPrompt("EOF before 'Enter Request' Prompt")
836 except pexpect.TIMEOUT:
837 raise ExceptionPrompt("Timeout before Prompt")
841 class BayTech(PCUControl):
842 def run(self, node_port, dryrun):
843 self.open(self.host, self.username)
844 self.sendPassword(self.password)
846 # Control Outlets (5 ,1).........5
847 self.ifThenSend("Enter Request :", "5")
851 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port, ExceptionNotFound)
852 except ExceptionNotFound, msg:
853 # one machine is configured to ask for a username,
854 # even after login...
855 print "msg: %s" % msg
856 self.transport.write(self.username + "\r\n")
858 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
860 # Reboot Outlet N (Y/N)?
862 self.ifThenSend("(Y/N)?", "N")
864 self.ifThenSend("(Y/N)?", "Y")
866 self.ifThenSend("DS-RPC>", "")
871 class WTIIPS4(PCUControl):
872 def run(self, node_port, dryrun):
874 self.sendPassword(self.password, "Enter Password:")
876 self.ifThenSend("IPS> ", "/Boot %s" % node_port)
878 self.ifThenSend("Sure? (Y/N): ", "N")
880 self.ifThenSend("Sure? (Y/N): ", "Y")
882 self.ifThenSend("IPS> ", "")
887 class ePowerSwitchGood(PCUControl):
889 # The old code used Python's HTTPPasswordMgrWithDefaultRealm()
890 # For some reason this both doesn't work and in some cases, actually
891 # hangs the PCU. Definitely not what we want.
893 # The code below is much simpler. Just letting things fail first,
894 # and then, trying again with authentication string in the header.
896 def run(self, node_port, dryrun):
897 self.transport = None
898 self.url = "http://%s:%d/" % (self.host,80)
899 uri = "%s:%d" % (self.host,80)
901 req = urllib2.Request(self.url)
903 handle = urllib2.urlopen(req)
905 # NOTE: this is expected to fail initially
912 return "ERROR: not protected by HTTP authentication"
914 if not hasattr(e, 'code') or e.code != 401:
915 return "ERROR: failed for: %s" % str(e)
917 base64data = base64.encodestring("%s:%s" % (self.username, self.password))[:-1]
918 # NOTE: assuming basic realm authentication.
919 authheader = "Basic %s" % base64data
920 req.add_header("Authorization", authheader)
923 f = urllib2.urlopen(req)
925 # failing here means the User/passwd is wrong (hopefully)
926 raise ExceptionPassword("Incorrect username/password")
928 # NOTE: after verifying that the user/password is correct,
929 # actually reboot the given node.
932 data = urllib.urlencode({'P%d' % node_port : "r"})
933 req = urllib2.Request(self.url + "cmd.html")
934 req.add_header("Authorization", authheader)
935 # add data to handler,
936 f = urllib2.urlopen(req, data)
937 if self.verbose: print f.read()
939 import traceback; traceback.print_exc()
940 from nodecommon import email_exception
943 # fetch url one more time on cmd.html, econtrol.html or whatever.
946 if self.verbose: print f.read()
951 class CustomPCU(PCUControl):
952 def run(self, node_port, dryrun):
953 url = "https://www-itec.uni-klu.ac.at/plab-pcu/index.php"
956 # Turn host off, then on
957 formstr = "plab%s=off" % node_port
958 os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url))
960 formstr = "plab%s=on" % node_port
961 os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url))
963 os.system("curl --user %s:%s --insecure %s" % (self.username, self.password, url))
966 class ePowerSwitchOld(PCUControl):
967 def run(self, node_port, dryrun):
968 self.url = "http://%s:%d/" % (self.host,80)
969 uri = "%s:%d" % (self.host,80)
972 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
973 authinfo.add_password (None, uri, self.username, self.password)
974 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
976 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
977 transport = urllib2.build_opener(authinfo)
978 f = transport.open(self.url)
979 if self.verbose: print f.read()
982 transport = urllib2.build_opener(authhandler)
983 f = transport.open(self.url + "cmd.html", "P%d=r" % node_port)
984 if self.verbose: print f.read()
989 class ePowerSwitch(PCUControl):
990 def run(self, node_port, dryrun):
991 self.url = "http://%s:%d/" % (self.host,80)
992 uri = "%s:%d" % (self.host,80)
994 # TODO: I'm still not sure what the deal is here.
995 # two independent calls appear to need to be made before the
996 # reboot will succeed. It doesn't seem to be possible to do
997 # this with a single call. I have no idea why.
1000 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
1001 authinfo.add_password (None, uri, self.username, self.password)
1002 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
1004 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
1005 transport = urllib2.build_opener()
1006 f = transport.open(self.url + "elogin.html", "pwd=%s" % self.password)
1007 if self.verbose: print f.read()
1010 transport = urllib2.build_opener(authhandler)
1011 f = transport.open(self.url + "econtrol.html", "P%d=r" % node_port)
1012 if self.verbose: print f.read()
1014 # data= "P%d=r" % node_port
1015 #self.open(self.host, self.username, self.password)
1016 #self.sendHTTP("elogin.html", "pwd=%s" % self.password)
1017 #self.sendHTTP("econtrol.html", data)
1018 #self.sendHTTP("cmd.html", data)
1024 ### rebooting european BlackBox PSE boxes
1025 # Thierry Parmentelat - May 11 2005
1026 # tested on 4-ports models known as PSE505-FR
1027 # uses http to POST a data 'P<port>=r'
1028 # relies on basic authentication within http1.0
1029 # first curl-based script was
1030 # curl --http1.0 --basic --user <username>:<password> --data P<port>=r \
1031 # http://<hostname>:<http_port>/cmd.html && echo OK
1033 def bbpse_reboot (pcu_ip,username,password,port_in_pcu,http_port, dryrun):
1037 url = "http://%s:%d/cmd.html" % (pcu_ip,http_port)
1038 data= "P%d=r" % port_in_pcu
1040 logger.debug("POSTing '%s' on %s" % (data,url))
1042 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
1043 uri = "%s:%d" % (pcu_ip,http_port)
1044 authinfo.add_password (None, uri, username, password)
1045 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
1047 opener = urllib2.build_opener(authhandler)
1048 urllib2.install_opener(opener)
1054 f = urllib2.urlopen(url,data)
1061 except urllib2.URLError,err:
1062 logger.info('Could not open http connection', err)
1063 return "bbpse error"
1065 ### rebooting x10toggle based systems addressed by port
1066 # Marc E. Fiuczynski - May 31 2005
1067 # tested on 4-ports models known as PSE505-FR
1068 # uses ssh and password to login to an account
1069 # that will cause the system to be powercycled.
1071 def x10toggle_reboot(ip, username, password, port, dryrun):
1076 ssh = pyssh.Ssh(username, ip)
1080 telnet_answer(ssh, "password:", password)
1084 telnet_answer(ssh, "x10toggle>", "A%d" % port)
1087 output = ssh.close()
1089 logger.debug(output)
1092 except Exception, err:
1096 output = ssh.close()
1098 logger.debug(output)
1099 return errno.ETIMEDOUT
1101 ### rebooting Dell systems via RAC card
1102 # Marc E. Fiuczynski - June 01 2005
1103 # tested with David Lowenthal's itchy/scratchy nodes at UGA
1106 def runcmd(command, args, username, password, timeout = None):
1109 result_ready = threading.Condition()
1113 result_ready.acquire()
1117 result_ready.notify()
1118 result_ready.release()
1120 def do_command(command, username, password):
1123 # Popen4 is a popen-type class that combines stdout and stderr
1124 p = popen2.Popen4(command)
1126 # read all output data
1127 p.tochild.write("%s\n" % username)
1128 p.tochild.write("%s\n" % password)
1130 data = p.fromchild.read()
1133 # might get interrupted by a signal in poll() or waitpid()
1136 set_result((retval, data))
1139 if ex.errno == errno.EINTR:
1142 except Exception, ex:
1146 command = " ".join([command] + args)
1148 worker = threading.Thread(target = do_command, args = (command, username, password, ))
1149 worker.setDaemon(True)
1150 result_ready.acquire()
1152 result_ready.wait(timeout)
1154 if result == [None]:
1155 raise Exception, "command timed-out: '%s'" % command
1157 result_ready.release()
1160 if isinstance(result, Exception):
1163 (retval, data) = result
1164 if os.WIFEXITED(retval) and os.WEXITSTATUS(retval) == 0:
1167 out = "system command ('%s') " % command
1168 if os.WIFEXITED(retval):
1169 out += "failed, rc = %d" % os.WEXITSTATUS(retval)
1171 out += "killed by signal %d" % os.WTERMSIG(retval)
1173 out += "; output follows:\n" + data
1174 raise Exception, out
1176 def racadm_reboot(host, username, password, port, dryrun):
1179 ip = socket.gethostbyname(host)
1181 cmd = "/usr/sbin/racadm"
1184 output = runcmd(cmd, ["-r %s -i serveraction powercycle" % ip],
1187 output = runcmd(cmd, ["-r %s -i getsysinfo" % ip],
1190 print "RUNCMD: %s" % output
1192 logger.debug(output)
1195 except Exception, err:
1196 logger.debug("runcmd raised exception %s" % err)
1202 if pcu['hostname'] is not None and pcu['hostname'] is not "":
1203 return pcu['hostname']
1204 elif pcu['ip'] is not None and pcu['ip'] is not "":
1210 from monitor import database
1213 def get_pcu_values(pcu_id):
1216 # this shouldn't be loaded each time...
1217 fb = database.dbLoad("findbadpcus")
1220 values = fb['nodes']["id_%s" % pcu_id]['values']
1226 def reboot(nodename):
1227 return reboot_policy(nodename, True, False)
1229 def reboot_policy(nodename, continue_probe, dryrun):
1232 pcu = plc.getpcu(nodename)
1234 logger.debug("no pcu for %s" % nodename)
1235 print "no pcu for %s" % nodename
1236 return False # "%s has no pcu" % nodename
1238 values = get_pcu_values(pcu['pcu_id'])
1240 logger.debug("No values for pcu probe %s" % nodename)
1241 print "No values for pcu probe %s" % nodename
1242 return False #"no info for pcu_id %s" % pcu['pcu_id']
1245 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
1247 ret = reboot_test(nodename, values, continue_probe, verbose, dryrun)
1256 def reboot_test(nodename, values, continue_probe, verbose, dryrun):
1260 # DataProbe iPal (many sites)
1261 if continue_probe and values['model'].find("IP-41x_IP-81x") >= 0:
1262 ipal = IPAL(values, verbose, ['23', '80', '9100'])
1263 rb_ret = ipal.reboot(values[nodename], dryrun)
1265 # APC Masterswitch (Berkeley)
1266 elif continue_probe and ( values['model'].find("AP79xx") >= 0 or \
1267 values['model'].find("Masterswitch") >= 0 ):
1270 # TODO: make a more robust version of APC
1271 if values['pcu_id'] in [1102,1163,1055,1111,1231,1113,1127,1128,1148]:
1272 apc = APCEurope(values, verbose, ['22', '23'])
1273 rb_ret = apc.reboot(values[nodename], dryrun)
1275 elif values['pcu_id'] in [1110,86]:
1276 apc = APCBrazil(values, verbose, ['22', '23'])
1277 rb_ret = apc.reboot(values[nodename], dryrun)
1279 elif values['pcu_id'] in [1221,1225,1220]:
1280 apc = APCBerlin(values, verbose, ['22', '23'])
1281 rb_ret = apc.reboot(values[nodename], dryrun)
1283 elif values['pcu_id'] in [1173,1240,47]:
1284 apc = APCFolsom(values, verbose, ['22', '23'])
1285 rb_ret = apc.reboot(values[nodename], dryrun)
1288 apc = APCMaster(values, verbose, ['22', '23'])
1289 rb_ret = apc.reboot(values[nodename], dryrun)
1292 elif continue_probe and values['model'].find("DS4-RPC") >= 0:
1293 if values['pcu_id'] in [1056,1237,1052,1209,1002,1008,1041,1013,1022]:
1294 # These require a 'ctrl-c' to be sent...
1295 baytech = BayTechCtrlC(values, verbose, ['22', '23'])
1296 rb_ret = baytech.reboot(values[nodename], dryrun)
1298 elif values['pcu_id'] in [93]:
1299 baytech = BayTechAU(values, verbose, ['22', '23'])
1300 rb_ret = baytech.reboot(values[nodename], dryrun)
1302 elif values['pcu_id'] in [1057]:
1303 # These require a 'ctrl-c' to be sent...
1304 baytech = BayTechCtrlCUnibe(values, verbose, ['22', '23'])
1305 rb_ret = baytech.reboot(values[nodename], dryrun)
1307 elif values['pcu_id'] in [1012]:
1308 # This pcu sometimes doesn't present the 'Username' prompt,
1309 # unless you immediately try again...
1311 baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1312 rb_ret = baytech.reboot(values[nodename], dryrun)
1314 baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1315 rb_ret = baytech.reboot(values[nodename], dryrun)
1317 baytech = BayTech(values, verbose, ['22', '23'])
1318 rb_ret = baytech.reboot(values[nodename], dryrun)
1321 elif continue_probe and values['model'].find("ilo") >= 0:
1323 hpilo = HPiLO(values, verbose, ['22'])
1324 rb_ret = hpilo.reboot(0, dryrun)
1326 hpilo = HPiLOHttps(values, verbose, ['443'])
1327 rb_ret = hpilo.reboot(0, dryrun)
1329 hpilo = HPiLOHttps(values, verbose, ['443'])
1330 rb_ret = hpilo.reboot(0, dryrun)
1333 elif continue_probe and values['model'].find("DRAC") >= 0:
1334 # TODO: I don't think DRACRacAdm will throw an exception for the
1335 # default method to catch...
1337 if values['pcu_id'] in [1402]:
1338 drac = DRAC(values, verbose, ['22'])
1339 rb_ret = drac.reboot(0, dryrun)
1341 drac = DRACRacAdm(values, verbose, ['443', '5869'])
1342 rb_ret = drac.reboot(0, dryrun)
1344 drac = DRAC(values, verbose, ['22'])
1345 rb_ret = drac.reboot(0, dryrun)
1347 elif continue_probe and values['model'].find("WTI IPS-4") >= 0:
1348 wti = WTIIPS4(values, verbose, ['23'])
1349 rb_ret = wti.reboot(values[nodename], dryrun)
1351 elif continue_probe and values['model'].find("AMT") >= 0:
1352 amt = IntelAMT(values, verbose, ['16992'])
1353 rb_ret = amt.reboot(values[nodename], dryrun)
1355 # BlackBox PSExxx-xx (e.g. PSE505-FR)
1356 elif continue_probe and values['model'].find("ePowerSwitch") >=0:
1357 # TODO: allow a different port than http 80.
1358 if values['pcu_id'] in [1089, 1071, 1046, 1035, 1118]:
1359 eps = ePowerSwitchGood(values, verbose, ['80'])
1360 elif values['pcu_id'] in [1003]:
1363 eps = ePowerSwitch(values, verbose, ['80'])
1365 eps = ePowerSwitchGood(values, verbose, ['80'])
1367 rb_ret = eps.reboot(values[nodename], dryrun)
1368 elif continue_probe and values['pcu_id'] in [1122]:
1369 custom = CustomPCU(values, verbose, ['80', '443'])
1370 custom.reboot(values[nodename], dryrun)
1372 elif continue_probe:
1373 rb_ret = "Unsupported_PCU"
1375 elif continue_probe == False:
1376 if 'portstatus' in values:
1383 except ExceptionPort, err:
1388 #elif continue_probe and values['protocol'] == "racadm" and \
1389 # values['model'] == "RAC":
1390 # rb_ret = racadm_reboot(pcu_name(values),
1391 # values['username'],
1392 # values['password'],
1397 logger.setLevel(logging.DEBUG)
1398 ch = logging.StreamHandler()
1399 ch.setLevel(logging.DEBUG)
1400 formatter = logging.Formatter('LOGGER - %(message)s')
1401 ch.setFormatter(formatter)
1402 logger.addHandler(ch)
1405 if "test" in sys.argv:
1410 for node in sys.argv[1:]:
1411 if node == "test": continue
1413 print "Rebooting %s" % node
1414 if reboot_policy(node, True, dryrun):
1418 except Exception, err:
1419 import traceback; traceback.print_exc()
1420 from nodecommon import email_exception
1424 if __name__ == '__main__':
1426 logger = logging.getLogger("monitor")