3 # Reboot specified nodes
9 import errno, time, traceback
12 import threading, popen2
14 from monitor.wrapper import plc
16 from subprocess import PIPE, Popen
17 import ssh.pxssh as pxssh
18 import ssh.pexpect as pexpect
20 from monitor.util import command
22 # Use our versions of telnetlib and pyssh
23 sys.path.insert(0, os.path.dirname(sys.argv[0]))
25 sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")
31 # Event class ID from pcu events
32 #NODE_POWER_CONTROL = 3
35 #MONITOR_USER_ID = 11142
38 logger = logging.getLogger("monitor")
42 class ExceptionNoTransport(Exception): pass
43 class ExceptionNotFound(Exception): pass
44 class ExceptionPassword(Exception): pass
45 class ExceptionTimeout(Exception): pass
46 class ExceptionPrompt(Exception): pass
47 class ExceptionSequence(Exception): pass
48 class ExceptionReset(Exception): pass
49 class ExceptionPort(Exception): pass
50 class ExceptionUsername(Exception): pass
52 def telnet_answer(telnet, expected, buffer):
55 output = telnet.read_until(expected, TELNET_TIMEOUT)
57 # logger.debug(output)
58 if output.find(expected) == -1:
59 raise ExceptionNotFound, "'%s' not found" % expected
61 telnet.write(buffer + "\r\n")
64 # PCU has model, host, preferred-port, user, passwd,
66 # This is an object derived directly form the PLCAPI DB fields
68 def __init__(self, plc_pcu_dict):
69 for field in ['username', 'password', 'site_id',
72 'node_ids', 'ports', ]:
73 if field in plc_pcu_dict:
74 self.__setattr__(field, plc_pcu_dict[field])
76 raise Exception("No such field %s in PCU object" % field)
78 # These are the convenience functions build around the PCU object.
80 def __init__(self, plc_pcu_dict):
81 PCU.__init__(self, plc_pcu_dict)
82 self.host = self.pcu_name()
85 if self.hostname is not None and self.hostname is not "":
87 elif self.ip is not None and self.ip is not "":
92 def nodeidToPort(self, node_id):
93 if node_id in self.node_ids:
94 for i in range(0, len(self.node_ids)):
95 if node_id == self.node_ids[i]:
98 raise Exception("No such Node ID: %d" % node_id)
100 # This class captures the observed pcu records from FindBadPCUs.py
102 def __init__(self, pcu_record_dict):
103 for field in ['nodenames', 'portstatus',
106 if field in pcu_record_dict:
107 if field == "reboot":
108 self.__setattr__("reboot_str", pcu_record_dict[field])
110 self.__setattr__(field, pcu_record_dict[field])
112 raise Exception("No such field %s in pcu record dict" % field)
122 def __init__(self, type, verbose):
124 self.verbose = verbose
125 self.transport = None
127 def open(self, host, username=None, password=None, prompt="User Name"):
130 if self.type == self.TELNET:
131 transport = telnetlib.Telnet(host, timeout=self.TELNET_TIMEOUT)
132 transport.set_debuglevel(self.verbose)
133 if username is not None:
134 self.transport = transport
135 self.ifThenSend(prompt, username, ExceptionUsername)
137 elif self.type == self.SSH:
138 if username is not None:
139 transport = pyssh.Ssh(username, host)
140 transport.set_debuglevel(self.verbose)
142 # TODO: have an ssh set_debuglevel() also...
144 raise Exception("Username cannot be None for ssh transport.")
145 elif self.type == self.HTTP:
146 self.url = "http://%s:%d/" % (host,80)
147 uri = "%s:%d" % (host,80)
150 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
151 authinfo.add_password (None, uri, username, password)
152 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
154 transport = urllib2.build_opener(authhandler)
157 raise Exception("Unknown transport type: %s" % self.type)
159 self.transport = transport
163 if self.type == self.TELNET:
164 self.transport.close()
165 elif self.type == self.SSH:
166 self.transport.close()
167 elif self.type == self.HTTP:
170 raise Exception("Unknown transport type %s" % self.type)
171 self.transport = None
173 def sendHTTP(self, resource, data):
175 print "POSTing '%s' to %s" % (data,self.url + resource)
178 f = self.transport.open(self.url + resource ,data)
183 except urllib2.URLError,err:
184 logger.info('Could not open http connection', err)
185 return "http transport error"
189 def sendPassword(self, password, prompt=None):
190 if self.type == self.TELNET:
192 self.ifThenSend("Password", password, ExceptionPassword)
194 self.ifThenSend(prompt, password, ExceptionPassword)
195 elif self.type == self.SSH:
196 self.ifThenSend("password:", password, ExceptionPassword)
197 elif self.type == self.HTTP:
200 raise Exception("Unknown transport type: %s" % self.type)
202 def ifThenSend(self, expected, buffer, ErrorClass=ExceptionPrompt):
204 if self.transport != None:
205 output = self.transport.read_until(expected, self.TELNET_TIMEOUT)
206 if output.find(expected) == -1:
207 raise ErrorClass, "'%s' not found" % expected
209 self.transport.write(buffer + "\r\n")
211 raise ExceptionNoTransport("transport object is type None")
213 def ifElse(self, expected, ErrorClass):
215 self.transport.read_until(expected, self.TELNET_TIMEOUT)
217 raise ErrorClass("Could not find '%s' within timeout" % expected)
220 class PCUControl(Transport,PCUModel,PCURecord):
221 def __init__(self, plc_pcu_record, verbose, supported_ports=[]):
222 PCUModel.__init__(self, plc_pcu_record)
223 PCURecord.__init__(self, plc_pcu_record)
226 if '22' in supported_ports and self.portstatus['22'] == "open":
228 elif '23' in supported_ports and self.portstatus['23'] == "open":
229 type = Transport.TELNET
230 elif '80' in supported_ports and self.portstatus['80'] == "open":
231 type = Transport.HTTP
232 elif '443' in supported_ports and self.portstatus['443'] == "open":
233 type = Transport.HTTP
234 elif '5869' in supported_ports and self.portstatus['5869'] == "open":
235 # For DRAC cards. Racadm opens this port.
236 type = Transport.HTTP
237 elif '9100' in supported_ports and self.portstatus['9100'] == "open":
238 type = Transport.IPAL
239 elif '16992' in supported_ports and self.portstatus['16992'] == "open":
240 type = Transport.HTTP
242 raise ExceptionPort("Unsupported Port: No transport from open ports")
244 raise Exception("No Portstatus: No transport because no open ports")
245 Transport.__init__(self, type, verbose)
247 def run(self, node_port, dryrun):
248 """ This function is to be defined by the specific PCU instance. """
251 def reboot(self, node_port, dryrun):
253 return self.run(node_port, dryrun)
254 except ExceptionNotFound, err:
255 return "error: " + str(err)
256 except ExceptionPassword, err:
257 return "password exception: " + str(err)
258 except ExceptionTimeout, err:
259 return "timeout exception: " + str(err)
260 except ExceptionUsername, err:
261 return "exception: no username prompt: " + str(err)
262 except ExceptionSequence, err:
263 return "sequence error: " + str(err)
264 except ExceptionPrompt, err:
265 return "prompt exception: " + str(err)
266 except ExceptionPort, err:
267 return "no ports exception: " + str(err)
268 except socket.error, err:
269 return "socket error: timeout: " + str(err)
270 except EOFError, err:
272 logger.debug("reboot: EOF")
274 self.transport.close()
276 traceback.print_exc()
277 return "EOF connection reset" + str(err)
279 class IPAL(PCUControl):
281 This now uses a proprietary format for communicating with the PCU. I
282 prefer it to Telnet, and Web access, since it's much lighter weight
283 and, more importantly, IT WORKS!! HHAHHHAHAHAHAHAHA!
286 def format_msg(self, data, cmd):
287 esc = chr(int('1b',16))
288 return "%c%s%c%s%c" % (esc, self.password, esc, data, cmd) # esc, 'q', chr(4))
290 def recv_noblock(self, s, count):
294 # TODO: make sleep backoff, before stopping.
296 ret = s.recv(count, socket.MSG_DONTWAIT)
297 except socket.error, e:
298 if e[0] == errno.EAGAIN:
299 raise Exception(e[1])
301 # TODO: not other exceptions.
305 def run(self, node_port, dryrun):
311 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
314 s.connect((self.host, 9100))
315 except socket.error, e:
317 if e[0] == errno.ECONNREFUSED:
318 # cannot connect to remote host
319 raise Exception(e[1])
321 # TODO: what other conditions are there?
325 print "Checking status"
326 s.send(self.format_msg("", 'O'))
327 ret = self.recv_noblock(s, 8)
328 print "Current status is '%s'" % ret
331 raise Exception("Status returned 'another session already open' %s : %s" % (node_port, ret))
334 if node_port < len(ret):
335 status = ret[node_port]
343 raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret))
345 raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret))
350 print "Pulsing %s" % node_port
351 s.send(self.format_msg("%s" % node_port, 'P'))
353 # NOTE: turn power on ; do not pulse the port.
354 print "Power was off, so turning on ..."
355 s.send(self.format_msg("%s" % node_port, 'E'))
356 #s.send(self.format_msg("%s" % node_port, 'P'))
358 print "Receiving response."
359 ret = self.recv_noblock(s, 8)
360 print "Current status is '%s'" % ret
362 if node_port < len(ret):
363 status = ret[node_port]
371 raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret))
373 raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret))
378 return "Failed Power On"
383 # TELNET version of protocol...
384 # #self.open(self.host)
385 # ## XXX Some iPals require you to hit Enter a few times first
386 # #self.ifThenSend("Password >", "\r\n\r\n", ExceptionNotFound)
388 # self.ifThenSend("Password >", self.password, ExceptionPassword)
389 # self.transport.write("\r\n\r\n")
390 # if not dryrun: # P# - Pulse relay
391 # print "node_port %s" % node_port
392 # self.ifThenSend("Enter >",
393 # "P7", # % node_port,
395 # print "send newlines"
396 # self.transport.write("\r\n\r\n")
397 # print "after new lines"
398 # # Get the next prompt
399 # print "wait for enter"
400 # self.ifElse("Enter >", ExceptionTimeout)
405 class APCEurope(PCUControl):
406 def run(self, node_port, dryrun):
407 self.open(self.host, self.username)
408 self.sendPassword(self.password)
410 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
411 self.ifThenSend("\r\n> ", "2")
412 self.ifThenSend("\r\n> ", str(node_port))
413 # 3- Immediate Reboot
414 self.ifThenSend("\r\n> ", "3")
417 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
421 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
422 "", ExceptionSequence)
423 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
428 class APCBrazil(PCUControl):
429 def run(self, node_port, dryrun):
430 self.open(self.host, self.username)
431 self.sendPassword(self.password)
433 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
434 self.ifThenSend("\r\n> ", str(node_port))
435 # 4- Immediate Reboot
436 self.ifThenSend("\r\n> ", "4")
439 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
443 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
444 "", ExceptionSequence)
445 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
450 class APCBerlin(PCUControl):
451 def run(self, node_port, dryrun):
452 self.open(self.host, self.username)
453 self.sendPassword(self.password)
455 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
456 self.ifThenSend("\r\n> ", "2")
457 self.ifThenSend("\r\n> ", "1")
458 self.ifThenSend("\r\n> ", str(node_port))
459 # 3- Immediate Reboot
460 self.ifThenSend("\r\n> ", "3")
463 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
467 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
468 "", ExceptionSequence)
469 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
474 class APCFolsom(PCUControl):
475 def run(self, node_port, dryrun):
476 self.open(self.host, self.username)
477 self.sendPassword(self.password)
479 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
480 self.ifThenSend("\r\n> ", "2")
481 self.ifThenSend("\r\n> ", "1")
482 self.ifThenSend("\r\n> ", str(node_port))
483 self.ifThenSend("\r\n> ", "1")
485 # 3- Immediate Reboot
486 self.ifThenSend("\r\n> ", "3")
489 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
493 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
494 "", ExceptionSequence)
495 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
500 class APCMaster(PCUControl):
501 def run(self, node_port, dryrun):
502 print "Rebooting %s" % self.host
503 self.open(self.host, self.username)
504 self.sendPassword(self.password)
507 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
508 # 3- Outlet Control/Config
509 self.ifThenSend("\r\n> ", "3")
511 self.ifThenSend("\r\n> ", str(node_port))
513 self.ifThenSend("\r\n> ", "1")
514 # 3- Immediate Reboot
515 self.ifThenSend("\r\n> ", "3")
518 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
522 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
523 "", ExceptionSequence)
524 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
529 class APC(PCUControl):
530 def __init__(self, plc_pcu_record, verbose):
531 PCUControl.__init__(self, plc_pcu_record, verbose)
533 self.master = APCMaster(plc_pcu_record, verbose)
534 self.folsom = APCFolsom(plc_pcu_record, verbose)
535 self.europe = APCEurope(plc_pcu_record, verbose)
537 def run(self, node_port, dryrun):
541 for pcu in [self.master, self.europe, self.folsom]:
544 print "-*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"
547 time.sleep(sleep_time)
548 ret = pcu.reboot(node_port, dryrun)
549 except ExceptionSequence, err:
555 return "Unknown reboot sequence for APC PCU"
559 class IntelAMT(PCUControl):
560 def run(self, node_port, dryrun):
563 #[cmd_str = "IntelAMTSDK/Samples/RemoteControl/remoteControl"
564 cmd_str = "cmdamt/remoteControl"
567 # NOTE: -p checks the power state of the host.
568 # TODO: parse the output to find out if it's ok or not.
569 cmd_str += " -p http://%s:16992/RemoteControlService -user admin -pass '%s' " % (self.host, self.password )
571 cmd_str += " -A http://%s:16992/RemoteControlService -user admin -pass '%s' " % (self.host, self.password )
574 return cmd.system(cmd_str, self.TELNET_TIMEOUT)
576 class DRACRacAdm(PCUControl):
577 def run(self, node_port, dryrun):
579 print "trying racadm_reboot..."
580 racadm_reboot(self.host, self.username, self.password, node_port, dryrun)
584 class DRAC(PCUControl):
585 def run(self, node_port, dryrun):
586 self.open(self.host, self.username)
587 self.sendPassword(self.password)
589 print "logging in..."
590 self.transport.write("\r\n")
593 self.ifThenSend("[%s]#" % self.username, "getsysinfo")
596 self.ifThenSend("[%s]#" % self.username, "serveraction powercycle")
598 self.ifThenSend("[%s]#" % self.username, "exit")
603 class HPiLO(PCUControl):
604 def run(self, node_port, dryrun):
605 self.open(self.host, self.username)
606 self.sendPassword(self.password)
609 self.ifThenSend("</>hpiLO->", "cd system1")
611 # Reboot Outlet N (Y/N)?
613 self.ifThenSend("</system1>hpiLO->", "POWER")
616 self.ifThenSend("</system1>hpiLO->", "reset")
618 self.ifThenSend("</system1>hpiLO->", "exit")
624 class HPiLOHttps(PCUControl):
625 def run(self, node_port, dryrun):
627 locfg = command.CMD()
628 cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
629 self.host, "iloxml/Get_Network.xml",
630 self.username, self.password)
631 sout, serr = locfg.run_noexcept(cmd)
633 if sout.strip() != "":
634 print "sout: %s" % sout.strip()
638 locfg = command.CMD()
639 cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
640 self.host, "iloxml/Reset_Server.xml",
641 self.username, self.password)
642 sout, serr = locfg.run_noexcept(cmd)
644 if sout.strip() != "":
645 print "sout: %s" % sout.strip()
649 class BayTechAU(PCUControl):
650 def run(self, node_port, dryrun):
651 self.open(self.host, self.username, None, "Enter user name:")
652 self.sendPassword(self.password, "Enter Password:")
654 #self.ifThenSend("RPC-16>", "Status")
655 self.ifThenSend("RPC3-NC>", "Reboot %d" % node_port)
657 # Reboot Outlet N (Y/N)?
659 self.ifThenSend("(Y/N)?", "N")
661 self.ifThenSend("(Y/N)?", "Y")
662 self.ifThenSend("RPC3-NC>", "")
667 class BayTechGeorgeTown(PCUControl):
668 def run(self, node_port, dryrun):
669 self.open(self.host, self.username, None, "Enter user name:")
670 self.sendPassword(self.password, "Enter Password:")
672 #self.ifThenSend("RPC-16>", "Status")
674 self.ifThenSend("RPC-16>", "Reboot %d" % node_port)
676 # Reboot Outlet N (Y/N)?
678 self.ifThenSend("(Y/N)?", "N")
680 self.ifThenSend("(Y/N)?", "Y")
681 self.ifThenSend("RPC-16>", "")
686 class BayTechCtrlCUnibe(PCUControl):
688 For some reason, these units let you log in fine, but they hang
689 indefinitely, unless you send a Ctrl-C after the password. No idea
692 def run(self, node_port, dryrun):
693 print "BayTechCtrlC %s" % self.host
695 ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
697 if not s.login(self.host, self.username, self.password, ssh_options):
698 raise ExceptionPassword("Invalid Password")
699 # Otherwise, the login succeeded.
701 # Send a ctrl-c to the remote process.
702 print "sending ctrl-c"
705 # Control Outlets (5 ,1).........5
707 #index = s.expect("Enter Request")
708 index = s.expect(["Enter Request :"])
713 index = s.expect(["DS-RPC>", "Enter user name:"])
715 s.send(self.username + "\r\n")
716 index = s.expect(["DS-RPC>"])
719 print "Reboot %d" % node_port
720 s.send("Reboot %d\r\n" % node_port)
723 index = s.expect(["\(Y/N\)\?", "Port in use", "DS-RPC>"])
732 raise ExceptionPrompt("PCU Reported 'Port in use.'")
734 raise ExceptionSequence("Issued command 'Reboot' failed.")
737 index = s.expect(["DS-RPC>"])
738 #print "got prompt back"
743 raise ExceptionPrompt("EOF before expected Prompt")
744 except pexpect.TIMEOUT:
745 raise ExceptionPrompt("Timeout before expected Prompt")
749 class BayTechCtrlC(PCUControl):
751 For some reason, these units let you log in fine, but they hang
752 indefinitely, unless you send a Ctrl-C after the password. No idea
755 def run(self, node_port, dryrun):
756 print "BayTechCtrlC %s" % self.host
758 ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
760 if not s.login(self.host, self.username, self.password, ssh_options):
761 raise ExceptionPassword("Invalid Password")
762 # Otherwise, the login succeeded.
764 # Send a ctrl-c to the remote process.
765 print "SENDING ctrl-c"
768 # Control Outlets (5 ,1).........5
770 print "EXPECTING: ", "Enter Request :"
771 index = s.expect(["Enter Request :"])
776 print "EXPECTING: ", "DS-RPC>"
777 index = s.expect(["DS-RPC>", "Enter user name:", "Port in use."])
779 print "sending username"
780 s.send(self.username + "\r\n")
781 index = s.expect(["DS-RPC>"])
783 raise ExceptionPrompt("PCU Reported 'Port in use.'")
786 print "SENDING: Reboot %d" % node_port
787 s.send("Reboot %d\r\n" % node_port)
791 print "EXPECTING: ", "Y/N?"
792 index = s.expect(["\(Y/N\)\?", "Port in use", "DS-RPC>"])
801 raise ExceptionPrompt("PCU Reported 'Port in use.'")
803 raise ExceptionSequence("Issued command 'Reboot' failed.")
805 # NOTE: for some reason, the script times out with the
806 # following line. In manual tests, it works correctly, but
807 # with automated tests, evidently it fails.
810 #print "TOTAL--", s.allstr, "--EOT"
811 index = s.expect(["DS-RPC>"])
812 print "got prompt back"
817 raise ExceptionPrompt("EOF before 'Enter Request' Prompt")
818 except pexpect.TIMEOUT:
819 raise ExceptionPrompt("Timeout before Prompt")
823 class BayTech(PCUControl):
824 def run(self, node_port, dryrun):
825 self.open(self.host, self.username)
826 self.sendPassword(self.password)
828 # Control Outlets (5 ,1).........5
829 self.ifThenSend("Enter Request :", "5")
833 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port, ExceptionNotFound)
834 except ExceptionNotFound, msg:
835 # one machine is configured to ask for a username,
836 # even after login...
837 print "msg: %s" % msg
838 self.transport.write(self.username + "\r\n")
840 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
842 # Reboot Outlet N (Y/N)?
844 self.ifThenSend("(Y/N)?", "N")
846 self.ifThenSend("(Y/N)?", "Y")
848 self.ifThenSend("DS-RPC>", "")
853 class WTIIPS4(PCUControl):
854 def run(self, node_port, dryrun):
856 self.sendPassword(self.password, "Enter Password:")
858 self.ifThenSend("IPS> ", "/Boot %s" % node_port)
860 self.ifThenSend("Sure? (Y/N): ", "N")
862 self.ifThenSend("Sure? (Y/N): ", "Y")
864 self.ifThenSend("IPS> ", "")
869 class ePowerSwitchGood(PCUControl):
871 # The old code used Python's HTTPPasswordMgrWithDefaultRealm()
872 # For some reason this both doesn't work and in some cases, actually
873 # hangs the PCU. Definitely not what we want.
875 # The code below is much simpler. Just letting things fail first,
876 # and then, trying again with authentication string in the header.
878 def run(self, node_port, dryrun):
879 self.transport = None
880 self.url = "http://%s:%d/" % (self.host,80)
881 uri = "%s:%d" % (self.host,80)
883 req = urllib2.Request(self.url)
885 handle = urllib2.urlopen(req)
887 # NOTE: this is expected to fail initially
894 return "ERROR: not protected by HTTP authentication"
896 if not hasattr(e, 'code') or e.code != 401:
897 return "ERROR: failed for: %s" % str(e)
899 base64data = base64.encodestring("%s:%s" % (self.username, self.password))[:-1]
900 # NOTE: assuming basic realm authentication.
901 authheader = "Basic %s" % base64data
902 req.add_header("Authorization", authheader)
905 f = urllib2.urlopen(req)
907 # failing here means the User/passwd is wrong (hopefully)
908 raise ExceptionPassword("Incorrect username/password")
910 # NOTE: after verifying that the user/password is correct,
911 # actually reboot the given node.
914 data = urllib.urlencode({'P%d' % node_port : "r"})
915 req = urllib2.Request(self.url + "cmd.html")
916 req.add_header("Authorization", authheader)
917 # add data to handler,
918 f = urllib2.urlopen(req, data)
919 if self.verbose: print f.read()
921 import traceback; traceback.print_exc()
923 # fetch url one more time on cmd.html, econtrol.html or whatever.
926 if self.verbose: print f.read()
931 class CustomPCU(PCUControl):
932 def run(self, node_port, dryrun):
933 url = "https://www-itec.uni-klu.ac.at/plab-pcu/index.php"
936 # Turn host off, then on
937 formstr = "plab%s=off" % node_port
938 os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url))
940 formstr = "plab%s=on" % node_port
941 os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url))
943 os.system("curl --user %s:%s --insecure %s" % (self.username, self.password, url))
946 class ePowerSwitchOld(PCUControl):
947 def run(self, node_port, dryrun):
948 self.url = "http://%s:%d/" % (self.host,80)
949 uri = "%s:%d" % (self.host,80)
952 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
953 authinfo.add_password (None, uri, self.username, self.password)
954 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
956 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
957 transport = urllib2.build_opener(authinfo)
958 f = transport.open(self.url)
959 if self.verbose: print f.read()
962 transport = urllib2.build_opener(authhandler)
963 f = transport.open(self.url + "cmd.html", "P%d=r" % node_port)
964 if self.verbose: print f.read()
969 class ePowerSwitch(PCUControl):
970 def run(self, node_port, dryrun):
971 self.url = "http://%s:%d/" % (self.host,80)
972 uri = "%s:%d" % (self.host,80)
974 # TODO: I'm still not sure what the deal is here.
975 # two independent calls appear to need to be made before the
976 # reboot will succeed. It doesn't seem to be possible to do
977 # this with a single call. I have no idea why.
980 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
981 authinfo.add_password (None, uri, self.username, self.password)
982 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
984 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
985 transport = urllib2.build_opener()
986 f = transport.open(self.url + "elogin.html", "pwd=%s" % self.password)
987 if self.verbose: print f.read()
990 transport = urllib2.build_opener(authhandler)
991 f = transport.open(self.url + "econtrol.html", "P%d=r" % node_port)
992 if self.verbose: print f.read()
994 # data= "P%d=r" % node_port
995 #self.open(self.host, self.username, self.password)
996 #self.sendHTTP("elogin.html", "pwd=%s" % self.password)
997 #self.sendHTTP("econtrol.html", data)
998 #self.sendHTTP("cmd.html", data)
1004 ### rebooting european BlackBox PSE boxes
1005 # Thierry Parmentelat - May 11 2005
1006 # tested on 4-ports models known as PSE505-FR
1007 # uses http to POST a data 'P<port>=r'
1008 # relies on basic authentication within http1.0
1009 # first curl-based script was
1010 # curl --http1.0 --basic --user <username>:<password> --data P<port>=r \
1011 # http://<hostname>:<http_port>/cmd.html && echo OK
1013 def bbpse_reboot (pcu_ip,username,password,port_in_pcu,http_port, dryrun):
1017 url = "http://%s:%d/cmd.html" % (pcu_ip,http_port)
1018 data= "P%d=r" % port_in_pcu
1020 logger.debug("POSTing '%s' on %s" % (data,url))
1022 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
1023 uri = "%s:%d" % (pcu_ip,http_port)
1024 authinfo.add_password (None, uri, username, password)
1025 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
1027 opener = urllib2.build_opener(authhandler)
1028 urllib2.install_opener(opener)
1034 f = urllib2.urlopen(url,data)
1041 except urllib2.URLError,err:
1042 logger.info('Could not open http connection', err)
1043 return "bbpse error"
1045 ### rebooting x10toggle based systems addressed by port
1046 # Marc E. Fiuczynski - May 31 2005
1047 # tested on 4-ports models known as PSE505-FR
1048 # uses ssh and password to login to an account
1049 # that will cause the system to be powercycled.
1051 def x10toggle_reboot(ip, username, password, port, dryrun):
1056 ssh = pyssh.Ssh(username, ip)
1060 telnet_answer(ssh, "password:", password)
1064 telnet_answer(ssh, "x10toggle>", "A%d" % port)
1067 output = ssh.close()
1069 logger.debug(output)
1072 except Exception, err:
1076 output = ssh.close()
1078 logger.debug(output)
1079 return errno.ETIMEDOUT
1081 ### rebooting Dell systems via RAC card
1082 # Marc E. Fiuczynski - June 01 2005
1083 # tested with David Lowenthal's itchy/scratchy nodes at UGA
1086 def runcmd(command, args, username, password, timeout = None):
1089 result_ready = threading.Condition()
1093 result_ready.acquire()
1097 result_ready.notify()
1098 result_ready.release()
1100 def do_command(command, username, password):
1103 # Popen4 is a popen-type class that combines stdout and stderr
1104 p = popen2.Popen4(command)
1106 # read all output data
1107 p.tochild.write("%s\n" % username)
1108 p.tochild.write("%s\n" % password)
1110 data = p.fromchild.read()
1113 # might get interrupted by a signal in poll() or waitpid()
1116 set_result((retval, data))
1119 if ex.errno == errno.EINTR:
1122 except Exception, ex:
1126 command = " ".join([command] + args)
1128 worker = threading.Thread(target = do_command, args = (command, username, password, ))
1129 worker.setDaemon(True)
1130 result_ready.acquire()
1132 result_ready.wait(timeout)
1134 if result == [None]:
1135 raise Exception, "command timed-out: '%s'" % command
1137 result_ready.release()
1140 if isinstance(result, Exception):
1143 (retval, data) = result
1144 if os.WIFEXITED(retval) and os.WEXITSTATUS(retval) == 0:
1147 out = "system command ('%s') " % command
1148 if os.WIFEXITED(retval):
1149 out += "failed, rc = %d" % os.WEXITSTATUS(retval)
1151 out += "killed by signal %d" % os.WTERMSIG(retval)
1153 out += "; output follows:\n" + data
1154 raise Exception, out
1156 def racadm_reboot(host, username, password, port, dryrun):
1159 ip = socket.gethostbyname(host)
1161 cmd = "/usr/sbin/racadm"
1164 output = runcmd(cmd, ["-r %s -i serveraction powercycle" % ip],
1167 output = runcmd(cmd, ["-r %s -i getsysinfo" % ip],
1170 print "RUNCMD: %s" % output
1172 logger.debug(output)
1175 except Exception, err:
1176 logger.debug("runcmd raised exception %s" % err)
1182 if pcu['hostname'] is not None and pcu['hostname'] is not "":
1183 return pcu['hostname']
1184 elif pcu['ip'] is not None and pcu['ip'] is not "":
1190 from monitor import database
1193 def get_pcu_values(pcu_id):
1196 # this shouldn't be loaded each time...
1197 fb = database.dbLoad("findbadpcus")
1200 values = fb['nodes']["id_%s" % pcu_id]['values']
1206 def reboot(nodename):
1207 return reboot_policy(nodename, True, False)
1209 def reboot_policy(nodename, continue_probe, dryrun):
1212 pcu = plc.getpcu(nodename)
1214 logger.debug("no pcu for %s" % hostname)
1215 print "no pcu for %s" % hostname
1216 return False # "%s has no pcu" % nodename
1218 values = get_pcu_values(pcu['pcu_id'])
1220 logger.debug("No values for pcu probe %s" % hostname)
1221 print "No values for pcu probe %s" % hostname
1222 return False #"no info for pcu_id %s" % pcu['pcu_id']
1225 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
1227 ret = reboot_test(nodename, values, continue_probe, verbose, dryrun)
1236 def reboot_test(nodename, values, continue_probe, verbose, dryrun):
1238 if 'plc_pcu_stats' in values:
1239 values.update(values['plc_pcu_stats'])
1242 # DataProbe iPal (many sites)
1243 if continue_probe and values['model'].find("IP-41x_IP-81x") >= 0:
1244 ipal = IPAL(values, verbose, ['23', '80', '9100'])
1245 rb_ret = ipal.reboot(values[nodename], dryrun)
1247 # APC Masterswitch (Berkeley)
1248 elif continue_probe and ( values['model'].find("AP79xx") >= 0 or \
1249 values['model'].find("Masterswitch") >= 0 ):
1252 # TODO: make a more robust version of APC
1253 if values['pcu_id'] in [1102,1163,1055,1111,1231,1113,1127,1128,1148]:
1254 apc = APCEurope(values, verbose, ['22', '23'])
1255 rb_ret = apc.reboot(values[nodename], dryrun)
1257 elif values['pcu_id'] in [1110,86]:
1258 apc = APCBrazil(values, verbose, ['22', '23'])
1259 rb_ret = apc.reboot(values[nodename], dryrun)
1261 elif values['pcu_id'] in [1221,1225,1220]:
1262 apc = APCBerlin(values, verbose, ['22', '23'])
1263 rb_ret = apc.reboot(values[nodename], dryrun)
1265 elif values['pcu_id'] in [1173,1240]:
1266 apc = APCFolsom(values, verbose, ['22', '23'])
1267 rb_ret = apc.reboot(values[nodename], dryrun)
1270 apc = APCMaster(values, verbose, ['22', '23'])
1271 rb_ret = apc.reboot(values[nodename], dryrun)
1274 elif continue_probe and values['model'].find("DS4-RPC") >= 0:
1275 if values['pcu_id'] in [1056,1237,1052,1209,1002,1008,1041,1013,1022]:
1276 # These require a 'ctrl-c' to be sent...
1277 baytech = BayTechCtrlC(values, verbose, ['22', '23'])
1278 rb_ret = baytech.reboot(values[nodename], dryrun)
1280 elif values['pcu_id'] in [93]:
1281 baytech = BayTechAU(values, verbose, ['22', '23'])
1282 rb_ret = baytech.reboot(values[nodename], dryrun)
1284 elif values['pcu_id'] in [1057]:
1285 # These require a 'ctrl-c' to be sent...
1286 baytech = BayTechCtrlCUnibe(values, verbose, ['22', '23'])
1287 rb_ret = baytech.reboot(values[nodename], dryrun)
1289 elif values['pcu_id'] in [1012]:
1290 # This pcu sometimes doesn't present the 'Username' prompt,
1291 # unless you immediately try again...
1293 baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1294 rb_ret = baytech.reboot(values[nodename], dryrun)
1296 baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1297 rb_ret = baytech.reboot(values[nodename], dryrun)
1299 baytech = BayTech(values, verbose, ['22', '23'])
1300 rb_ret = baytech.reboot(values[nodename], dryrun)
1303 elif continue_probe and values['model'].find("ilo") >= 0:
1305 hpilo = HPiLO(values, verbose, ['22'])
1306 rb_ret = hpilo.reboot(0, dryrun)
1308 hpilo = HPiLOHttps(values, verbose, ['443'])
1309 rb_ret = hpilo.reboot(0, dryrun)
1311 hpilo = HPiLOHttps(values, verbose, ['443'])
1312 rb_ret = hpilo.reboot(0, dryrun)
1315 elif continue_probe and values['model'].find("DRAC") >= 0:
1316 # TODO: I don't think DRACRacAdm will throw an exception for the
1317 # default method to catch...
1319 drac = DRACRacAdm(values, verbose, ['443', '5869'])
1320 rb_ret = drac.reboot(0, dryrun)
1322 drac = DRAC(values, verbose, ['22'])
1323 rb_ret = drac.reboot(0, dryrun)
1325 elif continue_probe and values['model'].find("WTI IPS-4") >= 0:
1326 wti = WTIIPS4(values, verbose, ['23'])
1327 rb_ret = wti.reboot(values[nodename], dryrun)
1329 elif continue_probe and values['model'].find("AMT") >= 0:
1330 amt = IntelAMT(values, verbose, ['16992'])
1331 rb_ret = amt.reboot(values[nodename], dryrun)
1333 # BlackBox PSExxx-xx (e.g. PSE505-FR)
1334 elif continue_probe and values['model'].find("ePowerSwitch") >=0:
1335 # TODO: allow a different port than http 80.
1336 if values['pcu_id'] in [1089, 1071, 1046, 1035, 1118]:
1337 eps = ePowerSwitchGood(values, verbose, ['80'])
1338 elif values['pcu_id'] in [1003]:
1341 eps = ePowerSwitch(values, verbose, ['80'])
1343 eps = ePowerSwitchGood(values, verbose, ['80'])
1345 rb_ret = eps.reboot(values[nodename], dryrun)
1346 elif continue_probe and values['pcu_id'] in [1122]:
1347 custom = CustomPCU(values, verbose, ['80', '443'])
1348 custom.reboot(values[nodename], dryrun)
1350 elif continue_probe:
1351 rb_ret = "Unsupported_PCU"
1353 elif continue_probe == False:
1354 if 'portstatus' in values:
1361 except ExceptionPort, err:
1366 #elif continue_probe and values['protocol'] == "racadm" and \
1367 # values['model'] == "RAC":
1368 # rb_ret = racadm_reboot(pcu_name(values),
1369 # values['username'],
1370 # values['password'],
1375 logger.setLevel(logging.DEBUG)
1376 ch = logging.StreamHandler()
1377 ch.setLevel(logging.DEBUG)
1378 formatter = logging.Formatter('LOGGER - %(message)s')
1379 ch.setFormatter(formatter)
1380 logger.addHandler(ch)
1383 if "test" in sys.argv:
1388 for node in sys.argv[1:]:
1389 if node == "test": continue
1391 print "Rebooting %s" % node
1392 if reboot_policy(node, True, dryrun):
1396 except Exception, err:
1397 import traceback; traceback.print_exc()
1400 if __name__ == '__main__':
1402 logger = logging.getLogger("monitor")