3 # Reboot specified nodes
9 import errno, time, traceback
12 import threading, popen2
14 from monitor.wrapper import plc
16 from subprocess import PIPE, Popen
17 import pcucontrol.transports.ssh.pxssh as pxssh
18 import pcucontrol.transports.ssh.pexpect as pexpect
20 from monitor.util import command
23 # Use our versions of telnetlib and pyssh
24 sys.path.insert(0, os.path.dirname(sys.argv[0]))
25 import pcucontrol.transports.telnetlib as telnetlib
26 sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")
27 import pcucontrol.transports.pyssh as pyssh
28 from monitor import config
31 # Event class ID from pcu events
32 #NODE_POWER_CONTROL = 3
35 #MONITOR_USER_ID = 11142
38 logger = logging.getLogger("monitor")
42 class ExceptionNoTransport(Exception): pass
43 class ExceptionNotFound(Exception): pass
44 class ExceptionPassword(Exception): pass
45 class ExceptionTimeout(Exception): pass
46 class ExceptionPrompt(Exception): pass
47 class ExceptionSequence(Exception): pass
48 class ExceptionReset(Exception): pass
49 class ExceptionPort(Exception): pass
50 class ExceptionUsername(Exception): pass
54 # PCU has model, host, preferred-port, user, passwd,
56 # This is an object derived directly form the PLCAPI DB fields
58 def __init__(self, plc_pcu_dict):
59 for field in ['username', 'password', 'site_id',
62 'node_ids', 'ports', ]:
63 if field in plc_pcu_dict:
64 self.__setattr__(field, plc_pcu_dict[field])
66 raise Exception("No such field %s in PCU object" % field)
68 # These are the convenience functions build around the PCU object.
70 def __init__(self, plc_pcu_dict):
71 PCU.__init__(self, plc_pcu_dict)
72 self.host = self.pcu_name()
75 if self.hostname is not None and self.hostname is not "":
77 elif self.ip is not None and self.ip is not "":
82 def nodeidToPort(self, node_id):
83 if node_id in self.node_ids:
84 for i in range(0, len(self.node_ids)):
85 if node_id == self.node_ids[i]:
88 raise Exception("No such Node ID: %d" % node_id)
90 # This class captures the observed pcu records from FindBadPCUs.py
92 def __init__(self, pcu_record_dict):
93 for field in ['port_status',
96 if field in pcu_record_dict:
98 self.__setattr__("reboot_str", pcu_record_dict[field])
100 self.__setattr__(field, pcu_record_dict[field])
102 # raise Exception("No such field %s in pcu record dict" % field)
125 def __init__(self, type, verbose):
127 self.verbose = verbose
128 self.transport = None
130 def open(self, host, username=None, password=None, prompt="User Name"):
133 if self.type == self.TELNET:
134 transport = telnetlib.Telnet(host, timeout=self.TELNET_TIMEOUT)
135 transport.set_debuglevel(self.verbose)
136 if username is not None:
137 self.transport = transport
138 self.transport.ifThenSend(prompt, username, ExceptionUsername)
140 elif self.type == self.SSH:
141 if username is not None:
142 transport = pyssh.Ssh(username, host)
143 transport.set_debuglevel(self.verbose)
145 # TODO: have an ssh set_debuglevel() also...
147 raise Exception("Username cannot be None for ssh transport.")
148 elif self.type == self.HTTP:
149 # NOTE: this does not work for all web-based services...
150 self.url = "http://%s:%d/" % (host,80)
151 uri = "%s:%d" % (host,80)
154 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
155 authinfo.add_password (None, uri, username, password)
156 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
158 transport = urllib2.build_opener(authhandler)
160 raise Exception("Unknown transport type: %s" % self.type)
162 self.transport = transport
166 if self.type == self.TELNET:
167 self.transport.close()
168 elif self.type == self.SSH:
169 self.transport.close()
170 elif self.type == self.HTTP:
173 raise Exception("Unknown transport type %s" % self.type)
174 self.transport = None
176 def write(self, msg):
177 return self.send(msg)
180 if self.transport == None:
181 raise ExceptionNoTransport("transport object is type None")
183 return self.transport.write(msg)
185 def sendPassword(self, password, prompt=None):
186 if self.type == self.TELNET:
188 self.ifThenSend("Password", password, ExceptionPassword)
190 self.ifThenSend(prompt, password, ExceptionPassword)
191 elif self.type == self.SSH:
192 self.ifThenSend("password:", password, ExceptionPassword)
193 elif self.type == self.HTTP:
196 raise Exception("Unknown transport type: %s" % self.type)
198 def sendHTTP(self, resource, data):
200 print "POSTing '%s' to %s" % (data,self.url + resource)
203 f = self.transport.open(self.url + resource ,data)
208 except urllib2.URLError,err:
209 logger.info('Could not open http connection', err)
210 return "http transport error"
214 def ifThenSend(self, expected, buffer, ErrorClass=ExceptionPrompt):
216 if self.transport != None:
217 output = self.transport.read_until(expected, self.TELNET_TIMEOUT)
218 if output.find(expected) == -1:
219 print "OUTPUT: --%s--" % output
220 raise ErrorClass, "'%s' not found" % expected
222 self.transport.write(buffer + "\r\n")
224 raise ExceptionNoTransport("transport object is type None")
226 def ifElse(self, expected, ErrorClass):
228 self.transport.read_until(expected, self.TELNET_TIMEOUT)
230 raise ErrorClass("Could not find '%s' within timeout" % expected)
232 class PCUControl(PCUModel,PCURecord):
235 There are three cases:
236 1) the pcu_record passed below includes port_status from an
238 2) the external probe failed, and the values are empty
239 3) this call is made independent of port_status.
241 In the first case, the first open port is used.
242 In the third case, the ports are tried in sequence.
244 In this way, the port_status value serves only as an optimization,
245 because closed ports are avoided. The supported_ports value should
246 order ports by their preferred usage.
251 def __init__(self, plc_pcu_record, verbose, ignored=None):
252 PCUModel.__init__(self, plc_pcu_record)
253 PCURecord.__init__(self, plc_pcu_record)
255 def reboot(self, node_port, dryrun):
258 if hasattr(self, 'port_status') and self.port_status:
259 port_list = filter(lambda x: self.port_status[x] == "open" , self.port_status.keys())
260 port_list = [ int(x) for x in port_list ]
262 raise ExceptionPort("Unsupported Port: No transport from open ports")
264 port_list = self.supported_ports
268 ret = "could not run"
269 for port in port_list:
270 if port not in Transport.porttypemap:
273 type = Transport.porttypemap[port]
274 self.transport = Transport(type, verbose)
276 if hasattr(self, "run_%s" % type):
277 fxn = getattr(self, "run_%s" % type)
278 ret = self.catcherror(fxn, node_port, dryrun)
279 if ret == 0: # NOTE: success!, so stop
286 def run(self, node_port, dryrun):
287 """ This function is to be defined by the specific PCU instance. """
288 raise Exception("This function is not implemented")
291 #def reboot(self, node_port, dryrun):
293 def catcherror(self, function, node_port, dryrun):
295 return function(node_port, dryrun)
296 except ExceptionNotFound, err:
297 return "error: " + str(err)
298 except ExceptionPassword, err:
299 return "Password exception: " + str(err)
300 except ExceptionTimeout, err:
301 return "Timeout exception: " + str(err)
302 except ExceptionUsername, err:
303 return "No username prompt: " + str(err)
304 except ExceptionSequence, err:
305 return "Sequence error: " + str(err)
306 except ExceptionPrompt, err:
307 return "Prompt exception: " + str(err)
308 except ExceptionNoTransport, err:
309 return "No Transport: " + str(err)
310 except ExceptionPort, err:
311 return "No ports exception: " + str(err)
312 except socket.error, err:
313 return "socket error: timeout: " + str(err)
314 except urllib2.HTTPError, err:
315 return "HTTPError: " + str(err)
316 except urllib2.URLError, err:
317 return "URLError: " + str(err)
318 except EOFError, err:
320 logger.debug("reboot: EOF")
322 self.transport.close()
324 traceback.print_exc()
325 return "EOF connection reset" + str(err)
327 from pcucontrol.models import *
330 if pcu['hostname'] is not None and pcu['hostname'] is not "":
331 return pcu['hostname']
332 elif pcu['ip'] is not None and pcu['ip'] is not "":
337 def get_pcu_values(pcu_id):
338 from monitor.database.info.model import FindbadPCURecord
339 print "pcuid: %s" % pcu_id
341 pcurec = FindbadPCURecord.get_latest_by(plc_pcuid=pcu_id).first()
343 values = pcurec.to_dict()
351 def reboot(nodename):
352 return reboot_policy(nodename, True, False)
354 def reboot_str(nodename):
356 continue_probe = True
359 pcu = plc.getpcu(nodename)
361 logger.debug("no pcu for %s" % nodename)
362 print "no pcu for %s" % nodename
363 return False # "%s has no pcu" % nodename
365 values = get_pcu_values(pcu['pcu_id'])
367 logger.debug("No values for pcu probe %s" % nodename)
368 print "No values for pcu probe %s" % nodename
369 return False #"no info for pcu_id %s" % pcu['pcu_id']
372 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
374 ret = reboot_test_new(nodename, values, verbose, dryrun)
377 def reboot_policy(nodename, continue_probe, dryrun):
380 pcu = plc.getpcu(nodename)
382 logger.debug("no pcu for %s" % nodename)
383 print "no pcu for %s" % nodename
384 return False # "%s has no pcu" % nodename
386 values = get_pcu_values(pcu['pcu_id'])
388 logger.debug("No values for pcu probe %s" % nodename)
389 print "No values for pcu probe %s" % nodename
390 return False #"no info for pcu_id %s" % pcu['pcu_id']
393 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
395 ret = reboot_test_new(nodename, values, verbose, dryrun)
404 class Unknown(PCUControl):
405 supported_ports = [22,23,80,443,5869,9100,16992]
407 def model_to_object(modelname):
408 if modelname is None:
410 if "AMT" in modelname:
412 elif "BayTech" in modelname:
414 elif "HPiLO" in modelname:
416 elif "IPAL" in modelname:
418 elif "APC" in modelname:
420 elif "DRAC" in modelname:
422 elif "WTI" in modelname:
424 elif "ePowerSwitch" in modelname:
425 return ePowerSwitchNew
426 elif "IPMI" in modelname:
428 elif "BlackBoxPSMaverick" in modelname:
429 return BlackBoxPSMaverick
430 elif "PM211MIP" in modelname:
432 elif "ManualPCU" in modelname:
435 print "UNKNOWN model %s"%modelname
438 def reboot_api(node, pcu): #, verbose, dryrun):
442 modelname = pcu['model']
444 # get object instance
445 instance = eval('%s(pcu, verbose)' % modelname)
447 i = pcu['node_ids'].index(node['node_id'])
450 rb_ret = instance.reboot(p, False)
452 rb_ret = "No modelname in PCU record."
453 # TODO: how to handle the weird, georgetown pcus, the drac faults, and ilo faults
454 except Exception, err:
459 def reboot_test_new(nodename, values, verbose, dryrun):
461 if 'plc_pcu_stats' in values:
462 values.update(values['plc_pcu_stats'])
465 modelname = values['model']
467 object = eval('%s(values, verbose, ["22", "23", "80", "443", "9100", "16992", "5869"])' % modelname)
468 rb_ret = object.reboot(values[nodename], dryrun)
471 # TODO: how to handle the weird, georgetown pcus, the drac faults, and ilo faults
472 except ExceptionPort, err:
474 except NameError, err:
480 logger.setLevel(logging.DEBUG)
481 ch = logging.StreamHandler()
482 ch.setLevel(logging.DEBUG)
483 formatter = logging.Formatter('LOGGER - %(message)s')
484 ch.setFormatter(formatter)
485 logger.addHandler(ch)
488 if "test" in sys.argv:
493 for node in sys.argv[1:]:
494 if node == "test": continue
496 print "Rebooting %s" % node
497 if reboot_policy(node, True, dryrun):
501 except Exception, err:
502 import traceback; traceback.print_exc()
505 if __name__ == '__main__':
506 logger = logging.getLogger("monitor")
508 f = open("/tmp/rebootlog", 'a')
509 f.write("reboot %s\n" % sys.argv)