3 # Reboot specified nodes
9 import errno, time, traceback
12 import threading, popen2
14 from monitor.wrapper import plc
16 from subprocess import PIPE, Popen
17 import pcucontrol.transports.ssh.pxssh as pxssh
18 import pcucontrol.transports.ssh.pexpect as pexpect
20 from monitor.util import command
23 # Use our versions of telnetlib and pyssh
24 sys.path.insert(0, os.path.dirname(sys.argv[0]))
25 import pcucontrol.transports.telnetlib as telnetlib
26 sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")
27 import pcucontrol.transports.pyssh as pyssh
28 from monitor import config
30 from monitor.database.info.model import FindbadPCURecord
32 # Event class ID from pcu events
33 #NODE_POWER_CONTROL = 3
36 #MONITOR_USER_ID = 11142
39 logger = logging.getLogger("monitor")
43 class ExceptionNoTransport(Exception): pass
44 class ExceptionNotFound(Exception): pass
45 class ExceptionPassword(Exception): pass
46 class ExceptionTimeout(Exception): pass
47 class ExceptionPrompt(Exception): pass
48 class ExceptionSequence(Exception): pass
49 class ExceptionReset(Exception): pass
50 class ExceptionPort(Exception): pass
51 class ExceptionUsername(Exception): pass
55 # PCU has model, host, preferred-port, user, passwd,
57 # This is an object derived directly form the PLCAPI DB fields
59 def __init__(self, plc_pcu_dict):
60 for field in ['username', 'password', 'site_id',
63 'node_ids', 'ports', ]:
64 if field in plc_pcu_dict:
65 self.__setattr__(field, plc_pcu_dict[field])
67 raise Exception("No such field %s in PCU object" % field)
69 # These are the convenience functions build around the PCU object.
71 def __init__(self, plc_pcu_dict):
72 PCU.__init__(self, plc_pcu_dict)
73 self.host = self.pcu_name()
76 if self.hostname is not None and self.hostname is not "":
78 elif self.ip is not None and self.ip is not "":
83 def nodeidToPort(self, node_id):
84 if node_id in self.node_ids:
85 for i in range(0, len(self.node_ids)):
86 if node_id == self.node_ids[i]:
89 raise Exception("No such Node ID: %d" % node_id)
91 # This class captures the observed pcu records from FindBadPCUs.py
93 def __init__(self, pcu_record_dict):
94 for field in ['port_status',
97 if field in pcu_record_dict:
99 self.__setattr__("reboot_str", pcu_record_dict[field])
101 self.__setattr__(field, pcu_record_dict[field])
103 # raise Exception("No such field %s in pcu record dict" % field)
126 def __init__(self, type, verbose):
128 self.verbose = verbose
129 self.transport = None
131 def open(self, host, username=None, password=None, prompt="User Name"):
134 if self.type == self.TELNET:
135 transport = telnetlib.Telnet(host, timeout=self.TELNET_TIMEOUT)
136 transport.set_debuglevel(self.verbose)
137 if username is not None:
138 self.transport = transport
139 self.transport.ifThenSend(prompt, username, ExceptionUsername)
141 elif self.type == self.SSH:
142 if username is not None:
143 transport = pyssh.Ssh(username, host)
144 transport.set_debuglevel(self.verbose)
146 # TODO: have an ssh set_debuglevel() also...
148 raise Exception("Username cannot be None for ssh transport.")
149 elif self.type == self.HTTP:
150 # NOTE: this does not work for all web-based services...
151 self.url = "http://%s:%d/" % (host,80)
152 uri = "%s:%d" % (host,80)
155 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
156 authinfo.add_password (None, uri, username, password)
157 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
159 transport = urllib2.build_opener(authhandler)
161 raise Exception("Unknown transport type: %s" % self.type)
163 self.transport = transport
167 if self.type == self.TELNET:
168 self.transport.close()
169 elif self.type == self.SSH:
170 self.transport.close()
171 elif self.type == self.HTTP:
174 raise Exception("Unknown transport type %s" % self.type)
175 self.transport = None
177 def write(self, msg):
178 return self.send(msg)
181 if self.transport == None:
182 raise ExceptionNoTransport("transport object is type None")
184 return self.transport.write(msg)
186 def sendPassword(self, password, prompt=None):
187 if self.type == self.TELNET:
189 self.ifThenSend("Password", password, ExceptionPassword)
191 self.ifThenSend(prompt, password, ExceptionPassword)
192 elif self.type == self.SSH:
193 self.ifThenSend("password:", password, ExceptionPassword)
194 elif self.type == self.HTTP:
197 raise Exception("Unknown transport type: %s" % self.type)
199 def sendHTTP(self, resource, data):
201 print "POSTing '%s' to %s" % (data,self.url + resource)
204 f = self.transport.open(self.url + resource ,data)
209 except urllib2.URLError,err:
210 logger.info('Could not open http connection', err)
211 return "http transport error"
215 def ifThenSend(self, expected, buffer, ErrorClass=ExceptionPrompt):
217 if self.transport != None:
218 output = self.transport.read_until(expected, self.TELNET_TIMEOUT)
219 if output.find(expected) == -1:
220 print "OUTPUT: --%s--" % output
221 raise ErrorClass, "'%s' not found" % expected
223 self.transport.write(buffer + "\r\n")
225 raise ExceptionNoTransport("transport object is type None")
227 def ifElse(self, expected, ErrorClass):
229 self.transport.read_until(expected, self.TELNET_TIMEOUT)
231 raise ErrorClass("Could not find '%s' within timeout" % expected)
233 class PCUControl(PCUModel,PCURecord):
236 There are three cases:
237 1) the pcu_record passed below includes port_status from an
239 2) the external probe failed, and the values are empty
240 3) this call is made independent of port_status.
242 In the first case, the first open port is used.
243 In the third case, the ports are tried in sequence.
245 In this way, the port_status value serves only as an optimization,
246 because closed ports are avoided. The supported_ports value should
247 order ports by their preferred usage.
252 def __init__(self, plc_pcu_record, verbose, ignored=None):
253 PCUModel.__init__(self, plc_pcu_record)
254 PCURecord.__init__(self, plc_pcu_record)
256 def reboot(self, node_port, dryrun):
259 if hasattr(self, 'port_status') and self.port_status:
260 port_list = filter(lambda x: self.port_status[x] == "open" , self.port_status.keys())
261 port_list = [ int(x) for x in port_list ]
263 raise ExceptionPort("Unsupported Port: No transport from open ports")
265 port_list = self.supported_ports
269 ret = "could not run"
270 for port in port_list:
271 if port not in Transport.porttypemap:
274 type = Transport.porttypemap[port]
275 self.transport = Transport(type, verbose)
277 if hasattr(self, "run_%s" % type):
278 fxn = getattr(self, "run_%s" % type)
279 ret = self.catcherror(fxn, node_port, dryrun)
280 if ret == 0: # NOTE: success!, so stop
287 def run(self, node_port, dryrun):
288 """ This function is to be defined by the specific PCU instance. """
289 raise Exception("This function is not implemented")
292 #def reboot(self, node_port, dryrun):
294 def catcherror(self, function, node_port, dryrun):
296 return function(node_port, dryrun)
297 except ExceptionNotFound, err:
298 return "error: " + str(err)
299 except ExceptionPassword, err:
300 return "Password exception: " + str(err)
301 except ExceptionTimeout, err:
302 return "Timeout exception: " + str(err)
303 except ExceptionUsername, err:
304 return "No username prompt: " + str(err)
305 except ExceptionSequence, err:
306 return "Sequence error: " + str(err)
307 except ExceptionPrompt, err:
308 return "Prompt exception: " + str(err)
309 except ExceptionNoTransport, err:
310 return "No Transport: " + str(err)
311 except ExceptionPort, err:
312 return "No ports exception: " + str(err)
313 except socket.error, err:
314 return "socket error: timeout: " + str(err)
315 except urllib2.HTTPError, err:
316 return "HTTPError: " + str(err)
317 except urllib2.URLError, err:
318 return "URLError: " + str(err)
319 except EOFError, err:
321 logger.debug("reboot: EOF")
323 self.transport.close()
325 traceback.print_exc()
326 return "EOF connection reset" + str(err)
328 from pcucontrol.models import *
331 if pcu['hostname'] is not None and pcu['hostname'] is not "":
332 return pcu['hostname']
333 elif pcu['ip'] is not None and pcu['ip'] is not "":
338 def get_pcu_values(pcu_id):
339 print "pcuid: %s" % pcu_id
341 pcurec = FindbadPCURecord.get_latest_by(plc_pcuid=pcu_id).first()
343 values = pcurec.to_dict()
351 def reboot(nodename):
352 return reboot_policy(nodename, True, False)
354 def reboot_str(nodename):
356 continue_probe = True
359 pcu = plc.getpcu(nodename)
361 logger.debug("no pcu for %s" % nodename)
362 print "no pcu for %s" % nodename
363 return False # "%s has no pcu" % nodename
365 values = get_pcu_values(pcu['pcu_id'])
367 logger.debug("No values for pcu probe %s" % nodename)
368 print "No values for pcu probe %s" % nodename
369 return False #"no info for pcu_id %s" % pcu['pcu_id']
372 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
374 ret = reboot_test_new(nodename, values, verbose, dryrun)
377 def reboot_policy(nodename, continue_probe, dryrun):
380 pcu = plc.getpcu(nodename)
382 logger.debug("no pcu for %s" % nodename)
383 print "no pcu for %s" % nodename
384 return False # "%s has no pcu" % nodename
386 values = get_pcu_values(pcu['pcu_id'])
388 logger.debug("No values for pcu probe %s" % nodename)
389 print "No values for pcu probe %s" % nodename
390 return False #"no info for pcu_id %s" % pcu['pcu_id']
393 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
395 ret = reboot_test_new(nodename, values, verbose, dryrun)
404 class Unknown(PCUControl):
405 supported_ports = [22,23,80,443,5869,9100,16992]
407 def model_to_object(modelname):
408 if modelname is None:
410 if "AMT" in modelname:
412 elif "BayTech" in modelname:
414 elif "HPiLO" in modelname:
416 elif "IPAL" in modelname:
418 elif "APC" in modelname:
420 elif "DRAC" in modelname:
422 elif "WTI" in modelname:
424 elif "ePowerSwitch" in modelname:
425 return ePowerSwitchNew
426 elif "IPMI" in modelname:
428 elif "BlackBoxPSMaverick" in modelname:
429 return BlackBoxPSMaverick
430 elif "PM211MIP" in modelname:
432 elif "ManualPCU" in modelname:
435 print "UNKNOWN model %s"%modelname
438 def reboot_api(node, pcu): #, verbose, dryrun):
442 modelname = pcu['model']
444 # get object instance
445 instance = eval('%s(pcu, verbose)' % modelname)
447 i = pcu['node_ids'].index(node['node_id'])
450 rb_ret = instance.reboot(p, False)
452 rb_ret = "No modelname in PCU record."
453 # TODO: how to handle the weird, georgetown pcus, the drac faults, and ilo faults
454 except Exception, err:
459 def reboot_test_new(nodename, values, verbose, dryrun):
461 if 'plc_pcu_stats' in values:
462 values.update(values['plc_pcu_stats'])
465 modelname = values['model']
467 object = eval('%s(values, verbose, ["22", "23", "80", "443", "9100", "16992", "5869"])' % modelname)
468 rb_ret = object.reboot(values[nodename], dryrun)
471 # TODO: how to handle the weird, georgetown pcus, the drac faults, and ilo faults
472 except ExceptionPort, err:
478 logger.setLevel(logging.DEBUG)
479 ch = logging.StreamHandler()
480 ch.setLevel(logging.DEBUG)
481 formatter = logging.Formatter('LOGGER - %(message)s')
482 ch.setFormatter(formatter)
483 logger.addHandler(ch)
486 if "test" in sys.argv:
491 for node in sys.argv[1:]:
492 if node == "test": continue
494 print "Rebooting %s" % node
495 if reboot_policy(node, True, dryrun):
499 except Exception, err:
500 import traceback; traceback.print_exc()
503 if __name__ == '__main__':
504 logger = logging.getLogger("monitor")