3 # Reboot specified nodes
9 import errno, time, traceback
12 import threading, popen2
14 from monitor.wrapper import plc
16 from subprocess import PIPE, Popen
17 import pcucontrol.transports.ssh.pxssh as pxssh
18 import pcucontrol.transports.ssh.pexpect as pexpect
20 from monitor.util import command
23 # Use our versions of telnetlib and pyssh
24 sys.path.insert(0, os.path.dirname(sys.argv[0]))
25 import pcucontrol.transports.telnetlib as telnetlib
26 sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")
27 import pcucontrol.transports.pyssh as pyssh
28 from monitor import config
31 # Event class ID from pcu events
32 #NODE_POWER_CONTROL = 3
35 #MONITOR_USER_ID = 11142
38 logger = logging.getLogger("monitor")
42 class ExceptionNoTransport(Exception): pass
43 class ExceptionNotFound(Exception): pass
44 class ExceptionPassword(Exception): pass
45 class ExceptionTimeout(Exception): pass
46 class ExceptionPrompt(Exception): pass
47 class ExceptionSequence(Exception): pass
48 class ExceptionReset(Exception): pass
49 class ExceptionPort(Exception): pass
50 class ExceptionUsername(Exception): pass
54 # PCU has model, host, preferred-port, user, passwd,
56 # This is an object derived directly form the PLCAPI DB fields
58 def __init__(self, plc_pcu_dict):
59 for field in ['username', 'password', 'site_id',
62 'node_ids', 'ports', ]:
63 if field in plc_pcu_dict:
64 self.__setattr__(field, plc_pcu_dict[field])
66 raise Exception("No such field %s in PCU object" % field)
68 # These are the convenience functions build around the PCU object.
70 def __init__(self, plc_pcu_dict):
71 PCU.__init__(self, plc_pcu_dict)
72 self.host = self.pcu_name()
75 if self.hostname is not None and self.hostname is not "":
77 elif self.ip is not None and self.ip is not "":
82 def nodeidToPort(self, node_id):
83 if node_id in self.node_ids:
84 for i in range(0, len(self.node_ids)):
85 if node_id == self.node_ids[i]:
88 raise Exception("No such Node ID: %d" % node_id)
90 # This class captures the observed pcu records from FindBadPCUs.py
92 def __init__(self, pcu_record_dict):
93 for field in ['port_status',
96 if field in pcu_record_dict:
98 self.__setattr__("reboot_str", pcu_record_dict[field])
100 self.__setattr__(field, pcu_record_dict[field])
102 # raise Exception("No such field %s in pcu record dict" % field)
125 def __init__(self, type, verbose):
127 self.verbose = verbose
128 self.transport = None
130 def open(self, host, username=None, password=None, prompt="User Name"):
133 if self.type == self.TELNET:
134 transport = telnetlib.Telnet(host, timeout=self.TELNET_TIMEOUT)
135 transport.set_debuglevel(self.verbose)
136 if username is not None:
137 self.transport = transport
138 self.ifThenSend(prompt, username, ExceptionUsername)
140 elif self.type == self.SSH:
141 if username is not None:
142 transport = pyssh.Ssh(username, host)
143 transport.set_debuglevel(self.verbose)
145 # TODO: have an ssh set_debuglevel() also...
147 raise Exception("Username cannot be None for ssh transport.")
148 elif self.type == self.HTTP:
149 # NOTE: this does not work for all web-based services...
150 self.url = "http://%s:%d/" % (host,80)
151 uri = "%s:%d" % (host,80)
154 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
155 authinfo.add_password (None, uri, username, password)
156 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
158 transport = urllib2.build_opener(authhandler)
160 raise Exception("Unknown transport type: %s" % self.type)
162 self.transport = transport
166 if self.type == self.TELNET:
167 self.transport.close()
168 elif self.type == self.SSH:
169 self.transport.close()
170 elif self.type == self.HTTP:
173 raise Exception("Unknown transport type %s" % self.type)
174 self.transport = None
176 def write(self, msg):
177 return self.send(msg)
180 if self.transport == None:
181 raise ExceptionNoTransport("transport object is type None")
183 return self.transport.write(msg)
185 def sendPassword(self, password, prompt=None):
186 if self.type == self.TELNET:
188 self.ifThenSend("Password", password, ExceptionPassword)
190 self.ifThenSend(prompt, password, ExceptionPassword)
191 elif self.type == self.SSH:
192 self.ifThenSend("password:", password, ExceptionPassword)
193 elif self.type == self.HTTP:
196 raise Exception("Unknown transport type: %s" % self.type)
198 def sendHTTP(self, resource, data):
200 print "POSTing '%s' to %s" % (data,self.url + resource)
203 f = self.transport.open(self.url + resource ,data)
208 except urllib2.URLError,err:
209 logger.info('Could not open http connection', err)
210 return "http transport error"
214 def ifThenSend(self, expected, buffer, ErrorClass=ExceptionPrompt):
216 if self.transport != None:
217 output = self.transport.read_until(expected, self.TELNET_TIMEOUT)
218 if output.find(expected) == -1:
219 print "OUTPUT: --%s--" % output
220 raise ErrorClass, "'%s' not found" % expected
222 self.transport.write(buffer + "\r\n")
224 raise ExceptionNoTransport("transport object is type None")
226 def ifElse(self, expected, ErrorClass):
228 self.transport.read_until(expected, self.TELNET_TIMEOUT)
230 raise ErrorClass("Could not find '%s' within timeout" % expected)
232 class PCUControl(PCUModel,PCURecord):
235 There are three cases:
236 1) the pcu_record passed below includes port_status from an
238 2) the external probe failed, and the values are empty
239 3) this call is made independent of port_status.
241 In the first case, the first open port is used.
242 In the third case, the ports are tried in sequence.
244 In this way, the port_status value serves only as an optimization,
245 because closed ports are avoided. The supported_ports value should
246 order ports by their preferred usage.
251 def __init__(self, plc_pcu_record, verbose, ignored=None):
252 PCUModel.__init__(self, plc_pcu_record)
253 PCURecord.__init__(self, plc_pcu_record)
255 def reboot(self, node_port, dryrun):
258 # There are two sources of potential ports. Those that are open and
259 # those that are part of the PCU's supported_ports.
260 # I think we should start with supported_ports and then filter that
263 port_list = self.supported_ports
265 if hasattr(self, 'port_status') and self.port_status:
266 # get out the open ports
267 port_list = filter(lambda x: self.port_status[x] == "open" , self.port_status.keys())
268 port_list = [ int(x) for x in port_list ]
269 # take only the open ports that are supported_ports
270 port_list = filter(lambda x: x in self.supported_ports, port_list)
272 raise ExceptionPort("No Open Port: No transport from open ports")
276 ret = "No implementation for open ports on selected PCU model"
277 for port in port_list:
278 if port not in Transport.porttypemap:
281 type = Transport.porttypemap[port]
282 self.transport = Transport(type, verbose)
284 print "checking for run_%s" % type
285 if hasattr(self, "run_%s" % type):
286 print "found run_%s" % type
287 fxn = getattr(self, "run_%s" % type)
288 ret = self.catcherror(fxn, node_port, dryrun)
289 if ret == 0: # NOTE: success!, so stop
296 def run(self, node_port, dryrun):
297 """ This function is to be defined by the specific PCU instance. """
298 raise Exception("This function is not implemented")
301 #def reboot(self, node_port, dryrun):
303 def catcherror(self, function, node_port, dryrun):
305 return function(node_port, dryrun)
306 except ExceptionNotFound, err:
307 return "error: " + str(err)
308 except ExceptionPassword, err:
309 return "Password exception: " + str(err)
310 except ExceptionTimeout, err:
311 return "Timeout exception: " + str(err)
312 except ExceptionUsername, err:
313 return "No username prompt: " + str(err)
314 except ExceptionSequence, err:
315 return "Sequence error: " + str(err)
316 except ExceptionPrompt, err:
317 return "Prompt exception: " + str(err)
318 except ExceptionNoTransport, err:
319 return "No Transport: " + str(err)
320 except ExceptionPort, err:
321 return "No ports exception: " + str(err)
322 except socket.error, err:
323 return "socket error: timeout: " + str(err)
324 except urllib2.HTTPError, err:
325 return "HTTPError: " + str(err)
326 except urllib2.URLError, err:
327 return "URLError: " + str(err)
328 except EOFError, err:
329 self.transport.close()
331 traceback.print_exc()
332 return "EOF connection reset" + str(err)
333 except Exception, err:
334 from monitor.common import email_exception
335 email_exception(self.host)
338 from pcucontrol.models import *
341 if pcu['hostname'] is not None and pcu['hostname'] is not "":
342 return pcu['hostname']
343 elif pcu['ip'] is not None and pcu['ip'] is not "":
348 def get_pcu_values(pcu_id):
349 from monitor.database.info.model import FindbadPCURecord
350 print "pcuid: %s" % pcu_id
352 pcurec = FindbadPCURecord.get_latest_by(plc_pcuid=pcu_id).first()
354 values = pcurec.to_dict()
362 def reboot(nodename):
363 return reboot_policy(nodename, True, False)
365 def reboot_str(nodename):
367 continue_probe = True
370 pcu = plc.getpcu(nodename)
372 logger.debug("no pcu for %s" % nodename)
373 print "no pcu for %s" % nodename
374 return False # "%s has no pcu" % nodename
376 values = get_pcu_values(pcu['pcu_id'])
378 logger.debug("No values for pcu probe %s" % nodename)
379 print "No values for pcu probe %s" % nodename
380 return False #"no info for pcu_id %s" % pcu['pcu_id']
383 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
385 ret = reboot_test_new(nodename, values, verbose, dryrun)
388 def reboot_policy(nodename, continue_probe, dryrun):
391 pcu = plc.getpcu(nodename)
393 logger.debug("no pcu for %s" % nodename)
394 print "no pcu for %s" % nodename
395 return False # "%s has no pcu" % nodename
397 values = get_pcu_values(pcu['pcu_id'])
399 logger.debug("No values for pcu probe %s" % nodename)
400 print "No values for pcu probe %s" % nodename
401 return False #"no info for pcu_id %s" % pcu['pcu_id']
404 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
406 ret = reboot_test_new(nodename, values, verbose, dryrun)
415 class Unknown(PCUControl):
416 supported_ports = [22,23,80,443,5869,9100,16992]
418 def model_to_object(modelname):
419 if modelname is None:
421 if "AMT" in modelname:
423 elif "BayTech" in modelname:
425 elif "HPiLO" in modelname:
427 elif "IPAL" in modelname:
429 elif "APC" in modelname:
431 elif "DRAC" in modelname:
433 elif "WTI" in modelname:
435 elif "ePowerSwitch" in modelname:
436 return ePowerSwitchNew
437 elif "IPMI" in modelname:
439 elif "BlackBoxPSMaverick" in modelname:
440 return BlackBoxPSMaverick
441 elif "PM211MIP" in modelname:
443 elif "ManualPCU" in modelname:
446 print "UNKNOWN model %s"%modelname
449 def reboot_api(node, pcu): #, verbose, dryrun):
453 modelname = pcu['model']
455 # get object instance
456 instance = eval('%s(pcu, verbose)' % modelname)
458 i = pcu['node_ids'].index(node['node_id'])
461 rb_ret = instance.reboot(p, False)
463 rb_ret = "No modelname in PCU record."
464 # TODO: how to handle the weird, georgetown pcus, the drac faults, and ilo faults
465 except Exception, err:
470 def convert_oldmodelname_to_newmodelname(oldmodelname, pcu_id):
472 update = { 'AP79xx' : 'APCControl13p13',
473 'Masterswitch' : 'APCControl13p13',
474 'DS4-RPC' : 'BayTech',
475 'IP-41x_IP-81x' : 'IPAL',
478 'ePowerSwitch' : 'ePowerSwitchOld',
481 'PM211-MIP' : 'PM211MIP',
482 'AMT2.5' : 'IntelAMT',
483 'AMT3.0' : 'IntelAMT',
484 'WTI_IPS-4' : 'WTIIPS4',
485 'unknown' : 'ManualPCU',
488 'bbsemaverick' : 'BlackBoxPSMaverick',
489 'manualadmin' : 'ManualPCU',
492 if oldmodelname in update:
493 newmodelname = update[oldmodelname]
495 newmodelname = oldmodelname
497 if pcu_id in [1102,1163,1055,1111,1231,1113,1127,1128,1148]:
498 newmodelname = 'APCControl12p3'
499 elif pcu_id in [1110,86]:
500 newmodelname = 'APCControl1p4'
501 elif pcu_id in [1221,1225,1220,1192]:
502 newmodelname = 'APCControl121p3'
503 elif pcu_id in [1173,1240,47,1363,1405,1401,1372,1371]:
504 newmodelname = 'APCControl121p1'
505 elif pcu_id in [1056,1237,1052,1209,1002,1008,1013,1022]:
506 newmodelname = 'BayTechCtrlC'
508 newmodelname = 'BayTechRPC3NC'
509 elif pcu_id in [1057]:
510 newmodelname = 'BayTechCtrlCUnibe'
511 elif pcu_id in [1012]:
512 newmodelname = 'BayTechRPC16'
513 elif pcu_id in [1089, 1071, 1046, 1035, 1118]:
514 newmodelname = 'ePowerSwitchNew'
518 def reboot_test_new(nodename, values, verbose, dryrun):
520 if 'plc_pcu_stats' in values:
521 values.update(values['plc_pcu_stats'])
524 modelname = convert_oldmodelname_to_newmodelname(values['model'], values['pcu_id'])
526 object = eval('%s(values, verbose)' % modelname)
527 rb_ret = object.reboot(values[nodename], dryrun)
530 # TODO: how to handle the weird, georgetown pcus, the drac faults, and ilo faults
531 except ExceptionPort, err:
533 except NameError, err:
539 logger.setLevel(logging.DEBUG)
540 ch = logging.StreamHandler()
541 ch.setLevel(logging.DEBUG)
542 formatter = logging.Formatter('LOGGER - %(message)s')
543 ch.setFormatter(formatter)
544 logger.addHandler(ch)
547 if "test" in sys.argv:
552 for node in sys.argv[1:]:
553 if node == "test": continue
555 print "Rebooting %s" % node
556 if reboot_policy(node, True, dryrun):
560 except Exception, err:
561 import traceback; traceback.print_exc()
562 from monitor.common import email_exception
563 email_exception(node)
566 if __name__ == '__main__':
567 logger = logging.getLogger("monitor")
569 f = open("/tmp/rebootlog", 'a')
570 f.write("reboot %s\n" % sys.argv)