3 # Reboot specified nodes
9 import errno, time, traceback
12 import threading, popen2
14 from monitor.wrapper import plc
16 from subprocess import PIPE, Popen
17 import pcucontrol.transports.ssh.pxssh as pxssh
18 import pcucontrol.transports.ssh.pexpect as pexpect
20 from monitor.util import command
23 # Use our versions of telnetlib and pyssh
24 sys.path.insert(0, os.path.dirname(sys.argv[0]))
25 import pcucontrol.transports.telnetlib as telnetlib
26 sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")
27 import pcucontrol.transports.pyssh as pyssh
28 from monitor import config
31 # Event class ID from pcu events
32 #NODE_POWER_CONTROL = 3
35 #MONITOR_USER_ID = 11142
38 logger = logging.getLogger("monitor")
42 class ExceptionNoTransport(Exception): pass
43 class ExceptionNotFound(Exception): pass
44 class ExceptionPassword(Exception): pass
45 class ExceptionTimeout(Exception): pass
46 class ExceptionPrompt(Exception): pass
47 class ExceptionSequence(Exception): pass
48 class ExceptionReset(Exception): pass
49 class ExceptionPort(Exception): pass
50 class ExceptionUsername(Exception): pass
54 # PCU has model, host, preferred-port, user, passwd,
56 # This is an object derived directly form the PLCAPI DB fields
58 def __init__(self, plc_pcu_dict):
59 for field in ['username', 'password', 'site_id',
62 'node_ids', 'ports', ]:
63 if field in plc_pcu_dict:
64 self.__setattr__(field, plc_pcu_dict[field])
66 raise Exception("No such field %s in PCU object" % field)
68 # These are the convenience functions build around the PCU object.
70 def __init__(self, plc_pcu_dict):
71 PCU.__init__(self, plc_pcu_dict)
72 self.host = self.pcu_name()
75 if self.hostname is not None and self.hostname is not "":
77 elif self.ip is not None and self.ip is not "":
82 def nodeidToPort(self, node_id):
83 if node_id in self.node_ids:
84 for i in range(0, len(self.node_ids)):
85 if node_id == self.node_ids[i]:
88 raise Exception("No such Node ID: %d" % node_id)
90 # This class captures the observed pcu records from FindBadPCUs.py
92 def __init__(self, pcu_record_dict):
93 for field in ['port_status',
96 if field in pcu_record_dict:
98 self.__setattr__("reboot_str", pcu_record_dict[field])
100 self.__setattr__(field, pcu_record_dict[field])
102 # raise Exception("No such field %s in pcu record dict" % field)
125 def __init__(self, type, verbose):
127 self.verbose = verbose
128 self.transport = None
130 def open(self, host, username=None, password=None, prompt="User Name"):
133 if self.type == self.TELNET:
134 transport = telnetlib.Telnet(host, timeout=self.TELNET_TIMEOUT)
135 transport.set_debuglevel(self.verbose)
136 if username is not None:
137 self.transport = transport
138 self.ifThenSend(prompt, username, ExceptionUsername)
140 elif self.type == self.SSH:
141 if username is not None:
142 transport = pyssh.Ssh(username, host)
143 transport.set_debuglevel(self.verbose)
145 # TODO: have an ssh set_debuglevel() also...
147 raise Exception("Username cannot be None for ssh transport.")
148 elif self.type == self.HTTP:
149 # NOTE: this does not work for all web-based services...
150 self.url = "http://%s:%d/" % (host,80)
151 uri = "%s:%d" % (host,80)
154 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
155 authinfo.add_password (None, uri, username, password)
156 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
158 transport = urllib2.build_opener(authhandler)
160 raise Exception("Unknown transport type: %s" % self.type)
162 self.transport = transport
166 if self.type == self.TELNET:
167 self.transport.close()
168 elif self.type == self.SSH:
169 self.transport.close()
170 elif self.type == self.HTTP:
173 raise Exception("Unknown transport type %s" % self.type)
174 self.transport = None
176 def write(self, msg):
177 return self.send(msg)
180 if self.transport == None:
181 raise ExceptionNoTransport("transport object is type None")
183 return self.transport.write(msg)
185 def sendPassword(self, password, prompt=None):
186 if self.type == self.TELNET:
188 self.ifThenSend("Password", password, ExceptionPassword)
190 self.ifThenSend(prompt, password, ExceptionPassword)
191 elif self.type == self.SSH:
192 self.ifThenSend("password:", password, ExceptionPassword)
193 elif self.type == self.HTTP:
196 raise Exception("Unknown transport type: %s" % self.type)
198 def sendHTTP(self, resource, data):
200 print "POSTing '%s' to %s" % (data,self.url + resource)
203 f = self.transport.open(self.url + resource ,data)
208 except urllib2.URLError,err:
209 logger.info('Could not open http connection', err)
210 return "http transport error"
214 def ifThenSend(self, expected, buffer, ErrorClass=ExceptionPrompt):
216 if self.transport != None:
217 output = self.transport.read_until(expected, self.TELNET_TIMEOUT)
218 if output.find(expected) == -1:
219 print "OUTPUT: --%s--" % output
220 raise ErrorClass, "'%s' not found" % expected
222 self.transport.write(buffer + "\r\n")
224 raise ExceptionNoTransport("transport object is type None")
226 def ifElse(self, expected, ErrorClass):
228 self.transport.read_until(expected, self.TELNET_TIMEOUT)
230 raise ErrorClass("Could not find '%s' within timeout" % expected)
232 class PCUControl(PCUModel,PCURecord):
235 There are three cases:
236 1) the pcu_record passed below includes port_status from an
238 2) the external probe failed, and the values are empty
239 3) this call is made independent of port_status.
241 In the first case, the first open port is used.
242 In the third case, the ports are tried in sequence.
244 In this way, the port_status value serves only as an optimization,
245 because closed ports are avoided. The supported_ports value should
246 order ports by their preferred usage.
251 def __init__(self, plc_pcu_record, verbose, ignored=None):
252 PCUModel.__init__(self, plc_pcu_record)
253 PCURecord.__init__(self, plc_pcu_record)
255 def reboot(self, node_port, dryrun):
258 # There are two sources of potential ports. Those that are open and
259 # those that are part of the PCU's supported_ports.
260 # I think we should start with supported_ports and then filter that
263 port_list = self.supported_ports
265 if hasattr(self, 'port_status') and self.port_status:
266 # get out the open ports
267 port_list = filter(lambda x: self.port_status[x] == "open" , self.port_status.keys())
268 port_list = [ int(x) for x in port_list ]
269 # take only the open ports that are supported_ports
270 port_list = filter(lambda x: x in self.supported_ports, port_list)
272 raise ExceptionPort("No Open Port: No transport from open ports")
276 ret = "No implementation for open ports on selected PCU model"
277 for port in port_list:
278 if port not in Transport.porttypemap:
281 type = Transport.porttypemap[port]
282 self.transport = Transport(type, verbose)
284 print "checking for run_%s" % type
285 if hasattr(self, "run_%s" % type):
286 print "found run_%s" % type
287 fxn = getattr(self, "run_%s" % type)
288 ret = self.catcherror(fxn, node_port, dryrun)
289 if ret == 0: # NOTE: success!, so stop
296 def run(self, node_port, dryrun):
297 """ This function is to be defined by the specific PCU instance. """
298 raise Exception("This function is not implemented")
301 #def reboot(self, node_port, dryrun):
303 def catcherror(self, function, node_port, dryrun):
305 return function(node_port, dryrun)
306 except ExceptionNotFound, err:
307 return "error: " + str(err)
308 except ExceptionPassword, err:
309 return "Password exception: " + str(err)
310 except ExceptionTimeout, err:
311 return "Timeout exception: " + str(err)
312 except ExceptionUsername, err:
313 return "No username prompt: " + str(err)
314 except ExceptionSequence, err:
315 return "Sequence error: " + str(err)
316 except ExceptionPrompt, err:
317 return "Prompt exception: " + str(err)
318 except ExceptionNoTransport, err:
319 return "No Transport: " + str(err)
320 except ExceptionPort, err:
321 return "No ports exception: " + str(err)
322 except socket.error, err:
323 return "socket error: timeout: " + str(err)
324 except urllib2.HTTPError, err:
325 return "HTTPError: " + str(err)
326 except urllib2.URLError, err:
327 return "URLError: " + str(err)
328 except EOFError, err:
329 self.transport.close()
331 traceback.print_exc()
332 return "EOF connection reset" + str(err)
334 from pcucontrol.models import *
337 if pcu['hostname'] is not None and pcu['hostname'] is not "":
338 return pcu['hostname']
339 elif pcu['ip'] is not None and pcu['ip'] is not "":
344 def get_pcu_values(pcu_id):
345 from monitor.database.info.model import FindbadPCURecord
346 print "pcuid: %s" % pcu_id
348 pcurec = FindbadPCURecord.get_latest_by(plc_pcuid=pcu_id).first()
350 values = pcurec.to_dict()
358 def reboot(nodename):
359 return reboot_policy(nodename, True, False)
361 def reboot_str(nodename):
363 continue_probe = True
366 pcu = plc.getpcu(nodename)
368 logger.debug("no pcu for %s" % nodename)
369 print "no pcu for %s" % nodename
370 return False # "%s has no pcu" % nodename
372 values = get_pcu_values(pcu['pcu_id'])
374 logger.debug("No values for pcu probe %s" % nodename)
375 print "No values for pcu probe %s" % nodename
376 return False #"no info for pcu_id %s" % pcu['pcu_id']
379 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
381 ret = reboot_test_new(nodename, values, verbose, dryrun)
384 def reboot_policy(nodename, continue_probe, dryrun):
387 pcu = plc.getpcu(nodename)
389 logger.debug("no pcu for %s" % nodename)
390 print "no pcu for %s" % nodename
391 return False # "%s has no pcu" % nodename
393 values = get_pcu_values(pcu['pcu_id'])
395 logger.debug("No values for pcu probe %s" % nodename)
396 print "No values for pcu probe %s" % nodename
397 return False #"no info for pcu_id %s" % pcu['pcu_id']
400 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
402 ret = reboot_test_new(nodename, values, verbose, dryrun)
411 class Unknown(PCUControl):
412 supported_ports = [22,23,80,443,5869,9100,16992]
414 def model_to_object(modelname):
415 if modelname is None:
417 if "AMT" in modelname:
419 elif "BayTech" in modelname:
421 elif "HPiLO" in modelname:
423 elif "IPAL" in modelname:
425 elif "APC" in modelname:
427 elif "DRAC" in modelname:
429 elif "WTI" in modelname:
431 elif "ePowerSwitch" in modelname:
432 return ePowerSwitchNew
433 elif "IPMI" in modelname:
435 elif "BlackBoxPSMaverick" in modelname:
436 return BlackBoxPSMaverick
437 elif "PM211MIP" in modelname:
439 elif "ManualPCU" in modelname:
442 print "UNKNOWN model %s"%modelname
445 def reboot_api(node, pcu): #, verbose, dryrun):
449 modelname = pcu['model']
451 # get object instance
452 instance = eval('%s(pcu, verbose)' % modelname)
454 i = pcu['node_ids'].index(node['node_id'])
457 rb_ret = instance.reboot(p, False)
459 rb_ret = "No modelname in PCU record."
460 # TODO: how to handle the weird, georgetown pcus, the drac faults, and ilo faults
461 except Exception, err:
466 def convert_oldmodelname_to_newmodelname(oldmodelname, pcu_id):
468 update = { 'AP79xx' : 'APCControl13p13',
469 'Masterswitch' : 'APCControl13p13',
470 'DS4-RPC' : 'BayTech',
471 'IP-41x_IP-81x' : 'IPAL',
474 'ePowerSwitch' : 'ePowerSwitchOld',
477 'PM211-MIP' : 'PM211MIP',
478 'AMT2.5' : 'IntelAMT',
479 'AMT3.0' : 'IntelAMT',
480 'WTI_IPS-4' : 'WTIIPS4',
481 'unknown' : 'ManualPCU',
484 'bbsemaverick' : 'BlackBoxPSMaverick',
485 'manualadmin' : 'ManualPCU',
488 if oldmodelname in update:
489 newmodelname = update[oldmodelname]
491 newmodelname = oldmodelname
493 if pcu_id in [1102,1163,1055,1111,1231,1113,1127,1128,1148]:
494 newmodelname = 'APCControl12p3'
495 elif pcu_id in [1110,86]:
496 newmodelname = 'APCControl1p4'
497 elif pcu_id in [1221,1225,1220,1192]:
498 newmodelname = 'APCControl121p3'
499 elif pcu_id in [1173,1240,47,1363,1405,1401,1372,1371]:
500 newmodelname = 'APCControl121p1'
501 elif pcu_id in [1056,1237,1052,1209,1002,1008,1013,1022]:
502 newmodelname = 'BayTechCtrlC'
504 newmodelname = 'BayTechRPC3NC'
505 elif pcu_id in [1057]:
506 newmodelname = 'BayTechCtrlCUnibe'
507 elif pcu_id in [1012]:
508 newmodelname = 'BayTechRPC16'
509 elif pcu_id in [1089, 1071, 1046, 1035, 1118]:
510 newmodelname = 'ePowerSwitchNew'
514 def reboot_test_new(nodename, values, verbose, dryrun):
516 if 'plc_pcu_stats' in values:
517 values.update(values['plc_pcu_stats'])
520 modelname = convert_oldmodelname_to_newmodelname(values['model'], values['pcu_id'])
522 object = eval('%s(values, verbose)' % modelname)
523 rb_ret = object.reboot(values[nodename], dryrun)
526 # TODO: how to handle the weird, georgetown pcus, the drac faults, and ilo faults
527 except ExceptionPort, err:
529 except NameError, err:
535 logger.setLevel(logging.DEBUG)
536 ch = logging.StreamHandler()
537 ch.setLevel(logging.DEBUG)
538 formatter = logging.Formatter('LOGGER - %(message)s')
539 ch.setFormatter(formatter)
540 logger.addHandler(ch)
543 if "test" in sys.argv:
548 for node in sys.argv[1:]:
549 if node == "test": continue
551 print "Rebooting %s" % node
552 if reboot_policy(node, True, dryrun):
556 except Exception, err:
557 import traceback; traceback.print_exc()
560 if __name__ == '__main__':
561 logger = logging.getLogger("monitor")
563 f = open("/tmp/rebootlog", 'a')
564 f.write("reboot %s\n" % sys.argv)