3 # Reboot specified nodes
9 import errno, time, traceback
12 import threading, popen2
18 from subprocess import PIPE, Popen
19 import ssh.pxssh as pxssh
20 import ssh.pexpect as pexpect
24 # Use our versions of telnetlib and pyssh
25 sys.path.insert(0, os.path.dirname(sys.argv[0]))
27 sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")
33 # Event class ID from pcu events
34 #NODE_POWER_CONTROL = 3
37 #MONITOR_USER_ID = 11142
40 logger = logging.getLogger("monitor")
44 class ExceptionNoTransport(Exception): pass
45 class ExceptionNotFound(Exception): pass
46 class ExceptionPassword(Exception): pass
47 class ExceptionTimeout(Exception): pass
48 class ExceptionPrompt(Exception): pass
49 class ExceptionSequence(Exception): pass
50 class ExceptionReset(Exception): pass
51 class ExceptionPort(Exception): pass
52 class ExceptionUsername(Exception): pass
54 def telnet_answer(telnet, expected, buffer):
57 output = telnet.read_until(expected, TELNET_TIMEOUT)
59 # logger.debug(output)
60 if output.find(expected) == -1:
61 raise ExceptionNotFound, "'%s' not found" % expected
63 telnet.write(buffer + "\r\n")
66 # PCU has model, host, preferred-port, user, passwd,
68 # This is an object derived directly form the PLCAPI DB fields
70 def __init__(self, plc_pcu_dict):
71 for field in ['username', 'password', 'site_id',
74 'node_ids', 'ports', ]:
75 if field in plc_pcu_dict:
76 self.__setattr__(field, plc_pcu_dict[field])
78 raise Exception("No such field %s in PCU object" % field)
80 # These are the convenience functions build around the PCU object.
82 def __init__(self, plc_pcu_dict):
83 PCU.__init__(self, plc_pcu_dict)
84 self.host = self.pcu_name()
87 if self.hostname is not None and self.hostname is not "":
89 elif self.ip is not None and self.ip is not "":
94 def nodeidToPort(self, node_id):
95 if node_id in self.node_ids:
96 for i in range(0, len(self.node_ids)):
97 if node_id == self.node_ids[i]:
100 raise Exception("No such Node ID: %d" % node_id)
102 # This class captures the observed pcu records from FindBadPCUs.py
104 def __init__(self, pcu_record_dict):
105 for field in ['nodenames', 'portstatus',
108 if field in pcu_record_dict:
109 if field == "reboot":
110 self.__setattr__("reboot_str", pcu_record_dict[field])
112 self.__setattr__(field, pcu_record_dict[field])
114 raise Exception("No such field %s in pcu record dict" % field)
124 def __init__(self, type, verbose):
126 self.verbose = verbose
127 self.transport = None
129 def open(self, host, username=None, password=None, prompt="User Name"):
132 if self.type == self.TELNET:
133 transport = telnetlib.Telnet(host, timeout=self.TELNET_TIMEOUT)
134 transport.set_debuglevel(self.verbose)
135 if username is not None:
136 self.transport = transport
137 self.ifThenSend(prompt, username, ExceptionUsername)
139 elif self.type == self.SSH:
140 if username is not None:
141 transport = pyssh.Ssh(username, host)
142 transport.set_debuglevel(self.verbose)
144 # TODO: have an ssh set_debuglevel() also...
146 raise Exception("Username cannot be None for ssh transport.")
147 elif self.type == self.HTTP:
148 self.url = "http://%s:%d/" % (host,80)
149 uri = "%s:%d" % (host,80)
152 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
153 authinfo.add_password (None, uri, username, password)
154 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
156 transport = urllib2.build_opener(authhandler)
159 raise Exception("Unknown transport type: %s" % self.type)
161 self.transport = transport
165 if self.type == self.TELNET:
166 self.transport.close()
167 elif self.type == self.SSH:
168 self.transport.close()
169 elif self.type == self.HTTP:
172 raise Exception("Unknown transport type %s" % self.type)
173 self.transport = None
175 def sendHTTP(self, resource, data):
177 print "POSTing '%s' to %s" % (data,self.url + resource)
180 f = self.transport.open(self.url + resource ,data)
185 except urllib2.URLError,err:
186 logger.info('Could not open http connection', err)
187 return "http transport error"
191 def sendPassword(self, password, prompt=None):
192 if self.type == self.TELNET:
194 self.ifThenSend("Password", password, ExceptionPassword)
196 self.ifThenSend(prompt, password, ExceptionPassword)
197 elif self.type == self.SSH:
198 self.ifThenSend("password:", password, ExceptionPassword)
199 elif self.type == self.HTTP:
202 raise Exception("Unknown transport type: %s" % self.type)
204 def ifThenSend(self, expected, buffer, ErrorClass=ExceptionPrompt):
206 if self.transport != None:
207 output = self.transport.read_until(expected, self.TELNET_TIMEOUT)
208 if output.find(expected) == -1:
209 raise ErrorClass, "'%s' not found" % expected
211 self.transport.write(buffer + "\r\n")
213 raise ExceptionNoTransport("transport object is type None")
215 def ifElse(self, expected, ErrorClass):
217 self.transport.read_until(expected, self.TELNET_TIMEOUT)
219 raise ErrorClass("Could not find '%s' within timeout" % expected)
222 class PCUControl(Transport,PCUModel,PCURecord):
223 def __init__(self, plc_pcu_record, verbose, supported_ports=[]):
224 PCUModel.__init__(self, plc_pcu_record)
225 PCURecord.__init__(self, plc_pcu_record)
228 if '22' in supported_ports and self.portstatus['22'] == "open":
230 elif '23' in supported_ports and self.portstatus['23'] == "open":
231 type = Transport.TELNET
232 elif '80' in supported_ports and self.portstatus['80'] == "open":
233 type = Transport.HTTP
234 elif '443' in supported_ports and self.portstatus['443'] == "open":
235 type = Transport.HTTP
236 elif '5869' in supported_ports and self.portstatus['5869'] == "open":
237 # For DRAC cards. Racadm opens this port.
238 type = Transport.HTTP
239 elif '9100' in supported_ports and self.portstatus['9100'] == "open":
240 type = Transport.IPAL
241 elif '16992' in supported_ports and self.portstatus['16992'] == "open":
242 type = Transport.HTTP
244 raise ExceptionPort("Unsupported Port: No transport from open ports")
246 raise Exception("No Portstatus: No transport because no open ports")
247 Transport.__init__(self, type, verbose)
249 def run(self, node_port, dryrun):
250 """ This function is to be defined by the specific PCU instance. """
253 def reboot(self, node_port, dryrun):
255 return self.run(node_port, dryrun)
256 except ExceptionNotFound, err:
257 return "error: " + str(err)
258 except ExceptionPassword, err:
259 return "password exception: " + str(err)
260 except ExceptionTimeout, err:
261 return "timeout exception: " + str(err)
262 except ExceptionUsername, err:
263 return "exception: no username prompt: " + str(err)
264 except ExceptionSequence, err:
265 return "sequence error: " + str(err)
266 except ExceptionPrompt, err:
267 return "prompt exception: " + str(err)
268 except ExceptionPort, err:
269 return "no ports exception: " + str(err)
270 except socket.error, err:
271 return "socket error: timeout: " + str(err)
272 except EOFError, err:
274 logger.debug("reboot: EOF")
276 self.transport.close()
278 traceback.print_exc()
279 return "EOF connection reset" + str(err)
281 class IPAL(PCUControl):
283 This now uses a proprietary format for communicating with the PCU. I
284 prefer it to Telnet, and Web access, since it's much lighter weight
285 and, more importantly, IT WORKS!! HHAHHHAHAHAHAHAHA!
288 def format_msg(self, data, cmd):
289 esc = chr(int('1b',16))
290 return "%c%s%c%s%c" % (esc, self.password, esc, data, cmd) # esc, 'q', chr(4))
292 def recv_noblock(self, s, count):
296 # TODO: make sleep backoff, before stopping.
298 ret = s.recv(count, socket.MSG_DONTWAIT)
299 except socket.error, e:
300 if e[0] == errno.EAGAIN:
301 raise Exception(e[1])
303 # TODO: not other exceptions.
307 def run(self, node_port, dryrun):
313 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
316 s.connect((self.host, 9100))
317 except socket.error, e:
319 if e[0] == errno.ECONNREFUSED:
320 # cannot connect to remote host
321 raise Exception(e[1])
323 # TODO: what other conditions are there?
327 print "Checking status"
328 s.send(self.format_msg("", 'O'))
329 ret = self.recv_noblock(s, 8)
330 print "Current status is '%s'" % ret
333 raise Exception("Status returned 'another session already open' %s : %s" % (node_port, ret))
336 if node_port < len(ret):
337 status = ret[node_port]
345 raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret))
347 raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret))
352 print "Pulsing %s" % node_port
353 s.send(self.format_msg("%s" % node_port, 'P'))
355 # NOTE: turn power on ; do not pulse the port.
356 print "Power was off, so turning on ..."
357 s.send(self.format_msg("%s" % node_port, 'E'))
358 #s.send(self.format_msg("%s" % node_port, 'P'))
360 print "Receiving response."
361 ret = self.recv_noblock(s, 8)
362 print "Current status is '%s'" % ret
364 if node_port < len(ret):
365 status = ret[node_port]
373 raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret))
375 raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret))
380 return "Failed Power On"
385 # TELNET version of protocol...
386 # #self.open(self.host)
387 # ## XXX Some iPals require you to hit Enter a few times first
388 # #self.ifThenSend("Password >", "\r\n\r\n", ExceptionNotFound)
390 # self.ifThenSend("Password >", self.password, ExceptionPassword)
391 # self.transport.write("\r\n\r\n")
392 # if not dryrun: # P# - Pulse relay
393 # print "node_port %s" % node_port
394 # self.ifThenSend("Enter >",
395 # "P7", # % node_port,
397 # print "send newlines"
398 # self.transport.write("\r\n\r\n")
399 # print "after new lines"
400 # # Get the next prompt
401 # print "wait for enter"
402 # self.ifElse("Enter >", ExceptionTimeout)
407 class APCEurope(PCUControl):
408 def run(self, node_port, dryrun):
409 self.open(self.host, self.username)
410 self.sendPassword(self.password)
412 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
413 self.ifThenSend("\r\n> ", "2")
414 self.ifThenSend("\r\n> ", str(node_port))
415 # 3- Immediate Reboot
416 self.ifThenSend("\r\n> ", "3")
419 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
423 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
424 "", ExceptionSequence)
425 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
430 class APCBrazil(PCUControl):
431 def run(self, node_port, dryrun):
432 self.open(self.host, self.username)
433 self.sendPassword(self.password)
435 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
436 self.ifThenSend("\r\n> ", str(node_port))
437 # 4- Immediate Reboot
438 self.ifThenSend("\r\n> ", "4")
441 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
445 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
446 "", ExceptionSequence)
447 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
452 class APCBerlin(PCUControl):
453 def run(self, node_port, dryrun):
454 self.open(self.host, self.username)
455 self.sendPassword(self.password)
457 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
458 self.ifThenSend("\r\n> ", "2")
459 self.ifThenSend("\r\n> ", "1")
460 self.ifThenSend("\r\n> ", str(node_port))
461 # 3- Immediate Reboot
462 self.ifThenSend("\r\n> ", "3")
465 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
469 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
470 "", ExceptionSequence)
471 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
476 class APCFolsom(PCUControl):
477 def run(self, node_port, dryrun):
478 self.open(self.host, self.username)
479 self.sendPassword(self.password)
481 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
482 self.ifThenSend("\r\n> ", "2")
483 self.ifThenSend("\r\n> ", "1")
484 self.ifThenSend("\r\n> ", str(node_port))
485 self.ifThenSend("\r\n> ", "1")
487 # 3- Immediate Reboot
488 self.ifThenSend("\r\n> ", "3")
491 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
495 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
496 "", ExceptionSequence)
497 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
502 class APCMaster(PCUControl):
503 def run(self, node_port, dryrun):
504 print "Rebooting %s" % self.host
505 self.open(self.host, self.username)
506 self.sendPassword(self.password)
509 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
510 # 3- Outlet Control/Config
511 self.ifThenSend("\r\n> ", "3")
513 self.ifThenSend("\r\n> ", str(node_port))
515 self.ifThenSend("\r\n> ", "1")
516 # 3- Immediate Reboot
517 self.ifThenSend("\r\n> ", "3")
520 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
524 self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel",
525 "", ExceptionSequence)
526 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
531 class APC(PCUControl):
532 def __init__(self, plc_pcu_record, verbose):
533 PCUControl.__init__(self, plc_pcu_record, verbose)
535 self.master = APCMaster(plc_pcu_record, verbose)
536 self.folsom = APCFolsom(plc_pcu_record, verbose)
537 self.europe = APCEurope(plc_pcu_record, verbose)
539 def run(self, node_port, dryrun):
543 for pcu in [self.master, self.europe, self.folsom]:
546 print "-*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"
549 time.sleep(sleep_time)
550 ret = pcu.reboot(node_port, dryrun)
551 except ExceptionSequence, err:
557 return "Unknown reboot sequence for APC PCU"
561 class IntelAMT(PCUControl):
562 def run(self, node_port, dryrun):
564 cmd = moncommands.CMD()
565 #[cmd_str = "IntelAMTSDK/Samples/RemoteControl/remoteControl"
566 cmd_str = "cmdamt/remoteControl"
569 # NOTE: -p checks the power state of the host.
570 # TODO: parse the output to find out if it's ok or not.
571 cmd_str += " -p http://%s:16992/RemoteControlService -user admin -pass '%s' " % (self.host, self.password )
573 cmd_str += " -A http://%s:16992/RemoteControlService -user admin -pass '%s' " % (self.host, self.password )
576 return cmd.system(cmd_str, self.TELNET_TIMEOUT)
578 class DRACRacAdm(PCUControl):
579 def run(self, node_port, dryrun):
581 print "trying racadm_reboot..."
582 racadm_reboot(self.host, self.username, self.password, node_port, dryrun)
586 class DRAC(PCUControl):
587 def run(self, node_port, dryrun):
588 self.open(self.host, self.username)
589 self.sendPassword(self.password)
591 print "logging in..."
592 self.transport.write("\r\n")
595 self.ifThenSend("[%s]#" % self.username, "getsysinfo")
598 self.ifThenSend("[%s]#" % self.username, "serveraction powercycle")
600 self.ifThenSend("[%s]#" % self.username, "exit")
605 class HPiLO(PCUControl):
606 def run(self, node_port, dryrun):
607 self.open(self.host, self.username)
608 self.sendPassword(self.password)
611 self.ifThenSend("</>hpiLO->", "cd system1")
613 # Reboot Outlet N (Y/N)?
615 self.ifThenSend("</system1>hpiLO->", "POWER")
618 self.ifThenSend("</system1>hpiLO->", "reset")
620 self.ifThenSend("</system1>hpiLO->", "exit")
626 class HPiLOHttps(PCUControl):
627 def run(self, node_port, dryrun):
629 locfg = moncommands.CMD()
630 cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
631 self.host, "iloxml/Get_Network.xml",
632 self.username, self.password)
633 sout, serr = locfg.run_noexcept(cmd)
635 if sout.strip() != "":
636 print "sout: %s" % sout.strip()
640 locfg = moncommands.CMD()
641 cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
642 self.host, "iloxml/Reset_Server.xml",
643 self.username, self.password)
644 sout, serr = locfg.run_noexcept(cmd)
646 if sout.strip() != "":
647 print "sout: %s" % sout.strip()
651 class BayTechAU(PCUControl):
652 def run(self, node_port, dryrun):
653 self.open(self.host, self.username, None, "Enter user name:")
654 self.sendPassword(self.password, "Enter Password:")
656 #self.ifThenSend("RPC-16>", "Status")
657 self.ifThenSend("RPC3-NC>", "Reboot %d" % node_port)
659 # Reboot Outlet N (Y/N)?
661 self.ifThenSend("(Y/N)?", "N")
663 self.ifThenSend("(Y/N)?", "Y")
664 self.ifThenSend("RPC3-NC>", "")
669 class BayTechGeorgeTown(PCUControl):
670 def run(self, node_port, dryrun):
671 self.open(self.host, self.username, None, "Enter user name:")
672 self.sendPassword(self.password, "Enter Password:")
674 #self.ifThenSend("RPC-16>", "Status")
676 self.ifThenSend("RPC-16>", "Reboot %d" % node_port)
678 # Reboot Outlet N (Y/N)?
680 self.ifThenSend("(Y/N)?", "N")
682 self.ifThenSend("(Y/N)?", "Y")
683 self.ifThenSend("RPC-16>", "")
688 class BayTechCtrlCUnibe(PCUControl):
690 For some reason, these units let you log in fine, but they hang
691 indefinitely, unless you send a Ctrl-C after the password. No idea
694 def run(self, node_port, dryrun):
695 print "BayTechCtrlC %s" % self.host
697 ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
699 if not s.login(self.host, self.username, self.password, ssh_options):
700 raise ExceptionPassword("Invalid Password")
701 # Otherwise, the login succeeded.
703 # Send a ctrl-c to the remote process.
704 print "sending ctrl-c"
707 # Control Outlets (5 ,1).........5
710 print "Enter Request" in s.before
711 index = s.expect("Enter Request")
716 index = s.expect(["DS-RPC>", "Enter user name:"])
718 s.send(self.username + "\r\n")
719 index = s.expect(["DS-RPC>"])
722 print "Reboot %d" % node_port
723 s.send("Reboot %d\r\n" % node_port)
725 index = s.expect(["(Y/N)?"])
734 #index = s.expect(["DS-RPC>"])
735 #print "got prompt back"
740 raise ExceptionPrompt("EOF before 'Enter Request' Prompt")
741 except pexpect.TIMEOUT:
742 raise ExceptionPrompt("Timeout before 'Enter Request' Prompt")
746 class BayTechCtrlC(PCUControl):
748 For some reason, these units let you log in fine, but they hang
749 indefinitely, unless you send a Ctrl-C after the password. No idea
752 def run(self, node_port, dryrun):
753 print "BayTechCtrlC %s" % self.host
755 ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
757 if not s.login(self.host, self.username, self.password, ssh_options):
758 raise ExceptionPassword("Invalid Password")
759 # Otherwise, the login succeeded.
761 # Send a ctrl-c to the remote process.
762 print "sending ctrl-c"
765 # Control Outlets (5 ,1).........5
767 index = s.expect(["Enter Request :"])
772 index = s.expect(["DS-RPC>", "Enter user name:"])
774 print "sending username"
775 s.send(self.username + "\r\n")
776 index = s.expect(["DS-RPC>"])
779 print "Reboot %d" % node_port
780 s.send("Reboot %d\r\n" % node_port)
782 index = s.expect(["(Y/N)?"])
791 # NOTE: for some reason, the script times out with the
792 # following line. In manual tests, it works correctly, but
793 # with automated tests, evidently it fails.
794 #index = s.expect(["DS-RPC>"])
795 #print "got prompt back"
800 raise ExceptionPrompt("EOF before 'Enter Request' Prompt")
801 except pexpect.TIMEOUT:
802 raise ExceptionPrompt("Timeout before Prompt")
806 class BayTech(PCUControl):
807 def run(self, node_port, dryrun):
808 self.open(self.host, self.username)
809 self.sendPassword(self.password)
811 # Control Outlets (5 ,1).........5
812 self.ifThenSend("Enter Request :", "5")
816 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port, ExceptionNotFound)
817 except ExceptionNotFound, msg:
818 # one machine is configured to ask for a username,
819 # even after login...
820 print "msg: %s" % msg
821 self.transport.write(self.username + "\r\n")
822 self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
824 # Reboot Outlet N (Y/N)?
826 self.ifThenSend("(Y/N)?", "N")
828 self.ifThenSend("(Y/N)?", "Y")
829 self.ifThenSend("DS-RPC>", "")
834 class WTIIPS4(PCUControl):
835 def run(self, node_port, dryrun):
837 self.sendPassword(self.password, "Enter Password:")
839 self.ifThenSend("IPS> ", "/Boot %s" % node_port)
841 self.ifThenSend("Sure? (Y/N): ", "N")
843 self.ifThenSend("Sure? (Y/N): ", "Y")
845 self.ifThenSend("IPS> ", "")
850 class ePowerSwitchGood(PCUControl):
852 # The old code used Python's HTTPPasswordMgrWithDefaultRealm()
853 # For some reason this both doesn't work and in some cases, actually
854 # hangs the PCU. Definitely not what we want.
856 # The code below is much simpler. Just letting things fail first,
857 # and then, trying again with authentication string in the header.
859 def run(self, node_port, dryrun):
860 self.transport = None
861 self.url = "http://%s:%d/" % (self.host,80)
862 uri = "%s:%d" % (self.host,80)
864 req = urllib2.Request(self.url)
866 handle = urllib2.urlopen(req)
868 # NOTE: this is expected to fail initially
875 return "ERROR: not protected by HTTP authentication"
877 if not hasattr(e, 'code') or e.code != 401:
878 return "ERROR: failed for: %s" % str(e)
880 base64data = base64.encodestring("%s:%s" % (self.username, self.password))[:-1]
881 # NOTE: assuming basic realm authentication.
882 authheader = "Basic %s" % base64data
883 req.add_header("Authorization", authheader)
886 f = urllib2.urlopen(req)
888 # failing here means the User/passwd is wrong (hopefully)
889 raise ExceptionPassword("Incorrect username/password")
891 # NOTE: after verifying that the user/password is correct,
892 # actually reboot the given node.
895 data = urllib.urlencode({'P%d' % node_port : "r"})
896 req = urllib2.Request(self.url + "cmd.html")
897 req.add_header("Authorization", authheader)
898 # add data to handler,
899 f = urllib2.urlopen(req, data)
900 if self.verbose: print f.read()
902 import traceback; traceback.print_exc()
904 # fetch url one more time on cmd.html, econtrol.html or whatever.
907 if self.verbose: print f.read()
912 class CustomPCU(PCUControl):
913 def run(self, node_port, dryrun):
914 url = "https://www-itec.uni-klu.ac.at/plab-pcu/index.php"
917 # Turn host off, then on
918 formstr = "plab%s=off" % node_port
919 os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url))
921 formstr = "plab%s=on" % node_port
922 os.system("curl --user %s:%s --form '%s' --insecure %s" % (self.username, self.password, formstr, url))
924 os.system("curl --user %s:%s --insecure %s" % (self.username, self.password, url))
927 class ePowerSwitchOld(PCUControl):
928 def run(self, node_port, dryrun):
929 self.url = "http://%s:%d/" % (self.host,80)
930 uri = "%s:%d" % (self.host,80)
933 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
934 authinfo.add_password (None, uri, self.username, self.password)
935 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
937 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
938 transport = urllib2.build_opener(authinfo)
939 f = transport.open(self.url)
940 if self.verbose: print f.read()
943 transport = urllib2.build_opener(authhandler)
944 f = transport.open(self.url + "cmd.html", "P%d=r" % node_port)
945 if self.verbose: print f.read()
950 class ePowerSwitch(PCUControl):
951 def run(self, node_port, dryrun):
952 self.url = "http://%s:%d/" % (self.host,80)
953 uri = "%s:%d" % (self.host,80)
955 # TODO: I'm still not sure what the deal is here.
956 # two independent calls appear to need to be made before the
957 # reboot will succeed. It doesn't seem to be possible to do
958 # this with a single call. I have no idea why.
961 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
962 authinfo.add_password (None, uri, self.username, self.password)
963 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
965 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
966 transport = urllib2.build_opener()
967 f = transport.open(self.url + "elogin.html", "pwd=%s" % self.password)
968 if self.verbose: print f.read()
971 transport = urllib2.build_opener(authhandler)
972 f = transport.open(self.url + "econtrol.html", "P%d=r" % node_port)
973 if self.verbose: print f.read()
975 # data= "P%d=r" % node_port
976 #self.open(self.host, self.username, self.password)
977 #self.sendHTTP("elogin.html", "pwd=%s" % self.password)
978 #self.sendHTTP("econtrol.html", data)
979 #self.sendHTTP("cmd.html", data)
985 ### rebooting european BlackBox PSE boxes
986 # Thierry Parmentelat - May 11 2005
987 # tested on 4-ports models known as PSE505-FR
988 # uses http to POST a data 'P<port>=r'
989 # relies on basic authentication within http1.0
990 # first curl-based script was
991 # curl --http1.0 --basic --user <username>:<password> --data P<port>=r \
992 # http://<hostname>:<http_port>/cmd.html && echo OK
994 def bbpse_reboot (pcu_ip,username,password,port_in_pcu,http_port, dryrun):
998 url = "http://%s:%d/cmd.html" % (pcu_ip,http_port)
999 data= "P%d=r" % port_in_pcu
1001 logger.debug("POSTing '%s' on %s" % (data,url))
1003 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
1004 uri = "%s:%d" % (pcu_ip,http_port)
1005 authinfo.add_password (None, uri, username, password)
1006 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
1008 opener = urllib2.build_opener(authhandler)
1009 urllib2.install_opener(opener)
1015 f = urllib2.urlopen(url,data)
1022 except urllib2.URLError,err:
1023 logger.info('Could not open http connection', err)
1024 return "bbpse error"
1026 ### rebooting x10toggle based systems addressed by port
1027 # Marc E. Fiuczynski - May 31 2005
1028 # tested on 4-ports models known as PSE505-FR
1029 # uses ssh and password to login to an account
1030 # that will cause the system to be powercycled.
1032 def x10toggle_reboot(ip, username, password, port, dryrun):
1037 ssh = pyssh.Ssh(username, ip)
1041 telnet_answer(ssh, "password:", password)
1045 telnet_answer(ssh, "x10toggle>", "A%d" % port)
1048 output = ssh.close()
1050 logger.debug(output)
1053 except Exception, err:
1057 output = ssh.close()
1059 logger.debug(output)
1060 return errno.ETIMEDOUT
1062 ### rebooting Dell systems via RAC card
1063 # Marc E. Fiuczynski - June 01 2005
1064 # tested with David Lowenthal's itchy/scratchy nodes at UGA
1067 def runcmd(command, args, username, password, timeout = None):
1070 result_ready = threading.Condition()
1074 result_ready.acquire()
1078 result_ready.notify()
1079 result_ready.release()
1081 def do_command(command, username, password):
1084 # Popen4 is a popen-type class that combines stdout and stderr
1085 p = popen2.Popen4(command)
1087 # read all output data
1088 p.tochild.write("%s\n" % username)
1089 p.tochild.write("%s\n" % password)
1091 data = p.fromchild.read()
1094 # might get interrupted by a signal in poll() or waitpid()
1097 set_result((retval, data))
1100 if ex.errno == errno.EINTR:
1103 except Exception, ex:
1107 command = " ".join([command] + args)
1109 worker = threading.Thread(target = do_command, args = (command, username, password, ))
1110 worker.setDaemon(True)
1111 result_ready.acquire()
1113 result_ready.wait(timeout)
1115 if result == [None]:
1116 raise Exception, "command timed-out: '%s'" % command
1118 result_ready.release()
1121 if isinstance(result, Exception):
1124 (retval, data) = result
1125 if os.WIFEXITED(retval) and os.WEXITSTATUS(retval) == 0:
1128 out = "system command ('%s') " % command
1129 if os.WIFEXITED(retval):
1130 out += "failed, rc = %d" % os.WEXITSTATUS(retval)
1132 out += "killed by signal %d" % os.WTERMSIG(retval)
1134 out += "; output follows:\n" + data
1135 raise Exception, out
1137 def racadm_reboot(host, username, password, port, dryrun):
1140 ip = socket.gethostbyname(host)
1142 cmd = "/usr/sbin/racadm"
1145 output = runcmd(cmd, ["-r %s -i serveraction powercycle" % ip],
1148 output = runcmd(cmd, ["-r %s -i getsysinfo" % ip],
1151 print "RUNCMD: %s" % output
1153 logger.debug(output)
1156 except Exception, err:
1157 logger.debug("runcmd raised exception %s" % err)
1163 if pcu['hostname'] is not None and pcu['hostname'] is not "":
1164 return pcu['hostname']
1165 elif pcu['ip'] is not None and pcu['ip'] is not "":
1171 fb =database.dbLoad("findbadpcus")
1173 def get_pcu_values(pcu_id):
1174 # TODO: obviously, this shouldn't be loaded each time...
1177 values = fb['nodes']["id_%s" % pcu_id]['values']
1183 def reboot(nodename):
1184 return reboot_policy(nodename, True, False)
1186 def reboot_policy(nodename, continue_probe, dryrun):
1189 pcu = plc.getpcu(nodename)
1191 logger.debug("no pcu for %s" % hostname)
1192 print "no pcu for %s" % hostname
1193 return False # "%s has no pcu" % nodename
1195 values = get_pcu_values(pcu['pcu_id'])
1197 logger.debug("No values for pcu probe %s" % hostname)
1198 print "No values for pcu probe %s" % hostname
1199 return False #"no info for pcu_id %s" % pcu['pcu_id']
1202 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
1204 ret = reboot_test(nodename, values, continue_probe, verbose, dryrun)
1213 def reboot_test(nodename, values, continue_probe, verbose, dryrun):
1217 # DataProbe iPal (many sites)
1218 if continue_probe and values['model'].find("IP-41x_IP-81x") >= 0:
1219 ipal = IPAL(values, verbose, ['23', '80', '9100'])
1220 rb_ret = ipal.reboot(values[nodename], dryrun)
1222 # APC Masterswitch (Berkeley)
1223 elif continue_probe and ( values['model'].find("AP79xx") >= 0 or \
1224 values['model'].find("Masterswitch") >= 0 ):
1227 # TODO: make a more robust version of APC
1228 if values['pcu_id'] in [1163,1055,1111,1231,1113,1127,1128,1148]:
1229 apc = APCEurope(values, verbose, ['22', '23'])
1230 rb_ret = apc.reboot(values[nodename], dryrun)
1232 elif values['pcu_id'] in [1110,86]:
1233 apc = APCBrazil(values, verbose, ['22', '23'])
1234 rb_ret = apc.reboot(values[nodename], dryrun)
1236 elif values['pcu_id'] in [1221,1225]:
1237 apc = APCBerlin(values, verbose, ['22', '23'])
1238 rb_ret = apc.reboot(values[nodename], dryrun)
1240 elif values['pcu_id'] in [1173,1221,1220]:
1241 apc = APCFolsom(values, verbose, ['22', '23'])
1242 rb_ret = apc.reboot(values[nodename], dryrun)
1245 apc = APCMaster(values, verbose, ['22', '23'])
1246 rb_ret = apc.reboot(values[nodename], dryrun)
1249 elif continue_probe and values['model'].find("DS4-RPC") >= 0:
1250 if values['pcu_id'] in [1237,1052,1209,1002,1008,1041,1013,1022]:
1251 # These require a 'ctrl-c' to be sent...
1252 baytech = BayTechCtrlC(values, verbose, ['22', '23'])
1253 rb_ret = baytech.reboot(values[nodename], dryrun)
1255 elif values['pcu_id'] in [93]:
1256 baytech = BayTechAU(values, verbose, ['22', '23'])
1257 rb_ret = baytech.reboot(values[nodename], dryrun)
1259 elif values['pcu_id'] in [1057]:
1260 # These require a 'ctrl-c' to be sent...
1261 baytech = BayTechCtrlCUnibe(values, verbose, ['22', '23'])
1262 rb_ret = baytech.reboot(values[nodename], dryrun)
1264 elif values['pcu_id'] in [1012]:
1265 # This pcu sometimes doesn't present the 'Username' prompt,
1266 # unless you immediately try again...
1268 baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1269 rb_ret = baytech.reboot(values[nodename], dryrun)
1271 baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1272 rb_ret = baytech.reboot(values[nodename], dryrun)
1274 baytech = BayTech(values, verbose, ['22', '23'])
1275 rb_ret = baytech.reboot(values[nodename], dryrun)
1278 elif continue_probe and values['model'].find("ilo") >= 0:
1280 hpilo = HPiLO(values, verbose, ['22'])
1281 rb_ret = hpilo.reboot(0, dryrun)
1283 hpilo = HPiLOHttps(values, verbose, ['443'])
1284 rb_ret = hpilo.reboot(0, dryrun)
1286 hpilo = HPiLOHttps(values, verbose, ['443'])
1287 rb_ret = hpilo.reboot(0, dryrun)
1290 elif continue_probe and values['model'].find("DRAC") >= 0:
1291 # TODO: I don't think DRACRacAdm will throw an exception for the
1292 # default method to catch...
1294 drac = DRACRacAdm(values, verbose, ['443', '5869'])
1295 rb_ret = drac.reboot(0, dryrun)
1297 drac = DRAC(values, verbose, ['22'])
1298 rb_ret = drac.reboot(0, dryrun)
1300 elif continue_probe and values['model'].find("WTI IPS-4") >= 0:
1301 wti = WTIIPS4(values, verbose, ['23'])
1302 rb_ret = wti.reboot(values[nodename], dryrun)
1304 elif continue_probe and values['model'].find("AMT") >= 0:
1305 amt = IntelAMT(values, verbose, ['16992'])
1306 rb_ret = amt.reboot(values[nodename], dryrun)
1308 # BlackBox PSExxx-xx (e.g. PSE505-FR)
1309 elif continue_probe and values['model'].find("ePowerSwitch") >=0:
1310 # TODO: allow a different port than http 80.
1311 if values['pcu_id'] in [1089, 1071, 1046, 1035, 1118]:
1312 eps = ePowerSwitchGood(values, verbose, ['80'])
1313 elif values['pcu_id'] in [1003]:
1316 eps = ePowerSwitch(values, verbose, ['80'])
1318 eps = ePowerSwitchGood(values, verbose, ['80'])
1320 rb_ret = eps.reboot(values[nodename], dryrun)
1321 elif continue_probe and values['pcu_id'] in [1122]:
1322 custom = CustomPCU(values, verbose, ['80', '443'])
1323 custom.reboot(values[nodename], dryrun)
1325 elif continue_probe:
1326 rb_ret = "Unsupported_PCU"
1328 elif continue_probe == False:
1329 if 'portstatus' in values:
1336 except ExceptionPort, err:
1341 #elif continue_probe and values['protocol'] == "racadm" and \
1342 # values['model'] == "RAC":
1343 # rb_ret = racadm_reboot(pcu_name(values),
1344 # values['username'],
1345 # values['password'],
1350 logger.setLevel(logging.DEBUG)
1351 ch = logging.StreamHandler()
1352 ch.setLevel(logging.DEBUG)
1353 formatter = logging.Formatter('LOGGER - %(message)s')
1354 ch.setFormatter(formatter)
1355 logger.addHandler(ch)
1358 if "test" in sys.argv:
1363 for node in sys.argv[1:]:
1364 if node == "test": continue
1366 print "Rebooting %s" % node
1367 if reboot_policy(node, True, dryrun):
1371 except Exception, err:
1372 import traceback; traceback.print_exc()
1375 if __name__ == '__main__':
1377 logger = logging.getLogger("monitor")