3 # Reboot specified nodes
9 import errno, time, traceback
12 import threading, popen2
15 from subprocess import PIPE, Popen
16 import pcucontrol.transports.ssh.pxssh as pxssh
17 import pcucontrol.transports.ssh.pexpect as pexpect
22 # Use our versions of telnetlib and pyssh
23 sys.path.insert(0, os.path.dirname(sys.argv[0]))
24 import pcucontrol.transports.telnetlib as telnetlib
25 sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")
26 import pcucontrol.transports.pyssh as pyssh
28 from monitor import config
29 from monitor.util import command
30 from monitor.wrapper import plc
33 # Event class ID from pcu events
34 #NODE_POWER_CONTROL = 3
37 #MONITOR_USER_ID = 11142
40 logger = logging.getLogger("monitor")
44 class ExceptionNoTransport(Exception): pass
45 class ExceptionNotFound(Exception): pass
46 class ExceptionPassword(Exception): pass
47 class ExceptionTimeout(Exception): pass
48 class ExceptionPrompt(Exception): pass
49 class ExceptionSequence(Exception): pass
50 class ExceptionReset(Exception): pass
51 class ExceptionPort(Exception): pass
52 class ExceptionUsername(Exception): pass
56 # PCU has model, host, preferred-port, user, passwd,
58 # This is an object derived directly form the PLCAPI DB fields
60 def __init__(self, plc_pcu_dict):
61 for field in ['username', 'password', 'site_id',
64 'node_ids', 'ports', ]:
65 if field in plc_pcu_dict:
66 self.__setattr__(field, plc_pcu_dict[field])
68 raise Exception("No such field %s in PCU object" % field)
70 # These are the convenience functions build around the PCU object.
72 def __init__(self, plc_pcu_dict):
73 PCU.__init__(self, plc_pcu_dict)
74 self.host = self.pcu_name()
77 if self.hostname is not None and self.hostname is not "":
79 elif self.ip is not None and self.ip is not "":
84 def nodeidToPort(self, node_id):
85 if node_id in self.node_ids:
86 for i in range(0, len(self.node_ids)):
87 if node_id == self.node_ids[i]:
90 raise Exception("No such Node ID: %d" % node_id)
92 # This class captures the observed pcu records from FindBadPCUs.py
94 def __init__(self, pcu_record_dict):
95 for field in ['port_status',
98 if field in pcu_record_dict:
100 self.__setattr__("reboot_str", pcu_record_dict[field])
102 self.__setattr__(field, pcu_record_dict[field])
104 # raise Exception("No such field %s in pcu record dict" % field)
127 def __init__(self, type, verbose):
129 self.verbose = verbose
130 self.transport = None
132 def open(self, host, username=None, password=None, prompt="User Name"):
135 if self.type == self.TELNET:
136 transport = telnetlib.Telnet(host, timeout=self.TELNET_TIMEOUT)
137 transport.set_debuglevel(self.verbose)
138 if username is not None:
139 self.transport = transport
140 self.ifThenSend(prompt, username, ExceptionUsername)
142 elif self.type == self.SSH:
143 if username is not None:
144 transport = pyssh.Ssh(username, host)
145 transport.set_debuglevel(self.verbose)
147 # TODO: have an ssh set_debuglevel() also...
149 raise Exception("Username cannot be None for ssh transport.")
150 elif self.type == self.HTTP:
151 # NOTE: this does not work for all web-based services...
152 self.url = "http://%s:%d/" % (host,80)
153 uri = "%s:%d" % (host,80)
156 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
157 authinfo.add_password (None, uri, username, password)
158 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
160 transport = urllib2.build_opener(authhandler)
162 raise Exception("Unknown transport type: %s" % self.type)
164 self.transport = transport
168 if self.type == self.TELNET:
169 self.transport.close()
170 elif self.type == self.SSH:
171 self.transport.close()
172 elif self.type == self.HTTP:
175 raise Exception("Unknown transport type %s" % self.type)
176 self.transport = None
178 def write(self, msg):
179 return self.send(msg)
182 if self.transport == None:
183 raise ExceptionNoTransport("transport object is type None")
185 return self.transport.write(msg)
187 def sendPassword(self, password, prompt=None):
188 if self.type == self.TELNET:
190 self.ifThenSend("Password", password, ExceptionPassword)
192 self.ifThenSend(prompt, password, ExceptionPassword)
193 elif self.type == self.SSH:
194 self.ifThenSend("password:", password, ExceptionPassword)
195 elif self.type == self.HTTP:
198 raise Exception("Unknown transport type: %s" % self.type)
200 def sendHTTP(self, resource, data):
202 print "POSTing '%s' to %s" % (data,self.url + resource)
205 f = self.transport.open(self.url + resource ,data)
210 except urllib2.URLError,err:
211 logger.info('Could not open http connection', err)
212 return "http transport error"
216 def ifThenSend(self, expected, buffer, ErrorClass=ExceptionPrompt):
218 if self.transport != None:
219 output = self.transport.read_until(expected, self.TELNET_TIMEOUT)
220 if output.find(expected) == -1:
221 print "OUTPUT: --%s--" % output
222 raise ErrorClass, "'%s' not found" % expected
224 self.transport.write(buffer + "\r\n")
226 raise ExceptionNoTransport("transport object is type None")
228 def ifElse(self, expected, ErrorClass):
230 self.transport.read_until(expected, self.TELNET_TIMEOUT)
232 raise ErrorClass("Could not find '%s' within timeout" % expected)
234 class PCUControl(PCUModel,PCURecord):
237 There are three cases:
238 1) the pcu_record passed below includes port_status from an
240 2) the external probe failed, and the values are empty
241 3) this call is made independent of port_status.
243 In the first case, the first open port is used.
244 In the third case, the ports are tried in sequence.
246 In this way, the port_status value serves only as an optimization,
247 because closed ports are avoided. The supported_ports value should
248 order ports by their preferred usage.
253 def __init__(self, plc_pcu_record, verbose, ignored=None):
254 PCUModel.__init__(self, plc_pcu_record)
255 PCURecord.__init__(self, plc_pcu_record)
257 def reboot(self, node_port, dryrun):
260 # There are two sources of potential ports. Those that are open and
261 # those that are part of the PCU's supported_ports.
262 # I think we should start with supported_ports and then filter that
265 port_list = self.supported_ports
267 if hasattr(self, 'port_status') and self.port_status:
268 # get out the open ports
269 port_list = filter(lambda x: self.port_status[x] == "open" , self.port_status.keys())
270 port_list = [ int(x) for x in port_list ]
271 # take only the open ports that are supported_ports
272 port_list = filter(lambda x: x in self.supported_ports, port_list)
274 raise ExceptionPort("No Open Port: No transport from open ports")
278 ret = "No implementation for open ports on selected PCU model"
279 for port in port_list:
280 if port not in Transport.porttypemap:
283 type = Transport.porttypemap[port]
284 self.transport = Transport(type, verbose)
286 print "checking for run_%s" % type
287 if hasattr(self, "run_%s" % type):
288 print "found run_%s" % type
289 fxn = getattr(self, "run_%s" % type)
290 ret = self.catcherror(fxn, node_port, dryrun)
291 if ret == 0: # NOTE: success!, so stop
298 def run(self, node_port, dryrun):
299 """ This function is to be defined by the specific PCU instance. """
300 raise Exception("This function is not implemented")
303 #def reboot(self, node_port, dryrun):
305 def catcherror(self, function, node_port, dryrun):
307 return function(node_port, dryrun)
308 except ExceptionNotFound, err:
309 return "error: " + str(err)
310 except ExceptionPassword, err:
311 return "Password exception: " + str(err)
312 except ExceptionTimeout, err:
313 return "Timeout exception: " + str(err)
314 except ExceptionUsername, err:
315 return "No username prompt: " + str(err)
316 except ExceptionSequence, err:
317 return "Sequence error: " + str(err)
318 except ExceptionPrompt, err:
319 return "Prompt exception: " + str(err)
320 except ExceptionNoTransport, err:
321 return "No Transport: " + str(err)
322 except ExceptionPort, err:
323 return "No ports exception: " + str(err)
324 except socket.error, err:
325 return "socket error: timeout: " + str(err)
326 except urllib2.HTTPError, err:
327 return "HTTPError: " + str(err)
328 except urllib2.URLError, err:
329 return "URLError: " + str(err)
330 except EOFError, err:
331 self.transport.close()
333 traceback.print_exc()
334 return "EOF connection reset" + str(err)
335 except Exception, err:
336 from monitor.common import email_exception
337 email_exception(self.host)
340 from pcucontrol.models import *
343 if pcu['hostname'] is not None and pcu['hostname'] is not "":
344 return pcu['hostname']
345 elif pcu['ip'] is not None and pcu['ip'] is not "":
350 def get_pcu_values(pcu_id):
351 from monitor.database.info.model import FindbadPCURecord
352 print "pcuid: %s" % pcu_id
354 pcurec = FindbadPCURecord.get_latest_by(plc_pcuid=pcu_id).first()
356 values = pcurec.to_dict()
364 def reboot(nodename):
365 return reboot_policy(nodename, True, False)
367 def reboot_str(nodename):
369 continue_probe = True
372 pcu = plc.getpcu(nodename)
374 logger.debug("no pcu for %s" % nodename)
375 print "no pcu for %s" % nodename
376 return False # "%s has no pcu" % nodename
378 values = get_pcu_values(pcu['pcu_id'])
380 logger.debug("No values for pcu probe %s" % nodename)
381 print "No values for pcu probe %s" % nodename
382 return False #"no info for pcu_id %s" % pcu['pcu_id']
385 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
387 ret = reboot_test_new(nodename, values, verbose, dryrun)
390 def reboot_policy(nodename, continue_probe, dryrun):
393 pcu = plc.getpcu(nodename)
395 logger.debug("no pcu for %s" % nodename)
396 print "no pcu for %s" % nodename
397 return False # "%s has no pcu" % nodename
399 values = get_pcu_values(pcu['pcu_id'])
401 logger.debug("No values for pcu probe %s" % nodename)
402 print "No values for pcu probe %s" % nodename
403 return False #"no info for pcu_id %s" % pcu['pcu_id']
406 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
408 ret = reboot_test_new(nodename, values, verbose, dryrun)
417 class Unknown(PCUControl):
418 supported_ports = [22,23,80,443,5869,9100,16992]
420 def model_to_object(modelname):
421 if modelname is None:
423 if "AMT" in modelname:
425 elif "BayTech" in modelname:
427 elif "HPiLO" in modelname:
429 elif "IPAL" in modelname:
431 elif "APC" in modelname:
433 elif "DRAC" in modelname:
435 elif "WTI" in modelname:
437 elif "ePowerSwitch" in modelname:
438 return ePowerSwitchNew
439 elif "IPMI" in modelname:
441 elif "BlackBoxPSMaverick" in modelname:
442 return BlackBoxPSMaverick
443 elif "PM211MIP" in modelname:
445 elif "ManualPCU" in modelname:
448 print "UNKNOWN model %s"%modelname
451 def reboot_api(node, pcu): #, verbose, dryrun):
455 modelname = pcu['model']
457 # get object instance
458 instance = eval('%s(pcu, verbose)' % modelname)
460 i = pcu['node_ids'].index(node['node_id'])
463 rb_ret = instance.reboot(p, False)
465 rb_ret = "No modelname in PCU record."
466 # TODO: how to handle the weird, georgetown pcus, the drac faults, and ilo faults
467 except Exception, err:
472 def convert_oldmodelname_to_newmodelname(oldmodelname, pcu_id):
474 update = { 'AP79xx' : 'APCControl13p13',
475 'Masterswitch' : 'APCControl13p13',
476 'DS4-RPC' : 'BayTech',
477 'IP-41x_IP-81x' : 'IPAL',
480 'ePowerSwitch' : 'ePowerSwitchOld',
483 'PM211-MIP' : 'PM211MIP',
484 'AMT2.5' : 'IntelAMT',
485 'AMT3.0' : 'IntelAMT',
486 'WTI_IPS-4' : 'WTIIPS4',
487 'unknown' : 'ManualPCU',
490 'bbsemaverick' : 'BlackBoxPSMaverick',
491 'manualadmin' : 'ManualPCU',
494 if oldmodelname in update:
495 newmodelname = update[oldmodelname]
497 newmodelname = oldmodelname
499 if pcu_id in [1102,1163,1055,1111,1231,1113,1127,1128,1148]:
500 newmodelname = 'APCControl12p3'
501 elif pcu_id in [1110,86]:
502 newmodelname = 'APCControl1p4'
503 elif pcu_id in [1221,1225,1220,1192]:
504 newmodelname = 'APCControl121p3'
505 elif pcu_id in [1173,1240,47,1363,1405,1401,1372,1371]:
506 newmodelname = 'APCControl121p1'
507 elif pcu_id in [1056,1237,1052,1209,1002,1008,1013,1022]:
508 newmodelname = 'BayTechCtrlC'
510 newmodelname = 'BayTechRPC3NC'
511 elif pcu_id in [1057]:
512 newmodelname = 'BayTechCtrlCUnibe'
513 elif pcu_id in [1012]:
514 newmodelname = 'BayTechRPC16'
515 elif pcu_id in [1089, 1071, 1046, 1035, 1118]:
516 newmodelname = 'ePowerSwitchNew'
520 def reboot_test_new(nodename, values, verbose, dryrun):
522 if 'plc_pcu_stats' in values:
523 values.update(values['plc_pcu_stats'])
526 modelname = convert_oldmodelname_to_newmodelname(values['model'], values['pcu_id'])
528 object = eval('%s(values, verbose)' % modelname)
529 rb_ret = object.reboot(values[nodename], dryrun)
532 # TODO: how to handle the weird, georgetown pcus, the drac faults, and ilo faults
533 except ExceptionPort, err:
535 except NameError, err:
541 logger.setLevel(logging.DEBUG)
542 ch = logging.StreamHandler()
543 ch.setLevel(logging.DEBUG)
544 formatter = logging.Formatter('LOGGER - %(message)s')
545 ch.setFormatter(formatter)
546 logger.addHandler(ch)
549 if "test" in sys.argv:
554 for node in sys.argv[1:]:
555 if node == "test": continue
557 print "Rebooting %s" % node
558 if reboot_policy(node, True, dryrun):
562 except Exception, err:
563 import traceback; traceback.print_exc()
564 from monitor.common import email_exception
565 email_exception(node)
568 if __name__ == '__main__':
569 logger = logging.getLogger("monitor")
571 f = open("/tmp/rebootlog", 'a')
572 f.write("reboot %s\n" % sys.argv)