improve error handling and reporting for hpilos.
[pcucontrol.git] / pcucontrol / reboot.py
1 #!/usr/bin/python
2 #
3 # Reboot specified nodes
4 #
5
6 import getpass, getopt
7 import os, sys
8 import xml, xmlrpclib
9 import errno, time, traceback
10 import urllib2
11 import urllib
12 import array, struct
13 import base64
14 from subprocess import PIPE, Popen
15 import subprocess
16 import pcucontrol.transports.ssh.pxssh as pxssh
17 import pcucontrol.transports.ssh.pexpect as pexpect
18 import socket
19
20
21
22 # Use our versions of telnetlib and pyssh
23 sys.path.insert(0, os.path.dirname(sys.argv[0]))
24 import pcucontrol.transports.telnetlib as telnetlib
25 sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")    
26 import pcucontrol.transports.pyssh as pyssh
27
28 # Event class ID from pcu events
29 #NODE_POWER_CONTROL = 3
30
31 # Monitor user ID
32 #MONITOR_USER_ID = 11142
33
34 import logging
35 verbose = 1
36 #dryrun = 0;
37
38 class ExceptionNoTransport(Exception): pass
39 class ExceptionNotFound(Exception): pass
40 class ExceptionPassword(Exception): pass
41 class ExceptionTimeout(Exception): pass
42 class ExceptionPrompt(Exception): pass
43 class ExceptionSequence(Exception): pass
44 class ExceptionReset(Exception): pass
45 class ExceptionPort(Exception): pass
46 class ExceptionUsername(Exception): pass
47
48
49
50 # PCU has model, host, preferred-port, user, passwd, 
51
52 # This is an object derived directly form the PLCAPI DB fields
53 class PCU(object):
54         def __init__(self, plc_pcu_dict):
55                 for field in ['username', 'password', 'site_id', 
56                                                 'hostname', 'ip', 
57                                                 'pcu_id', 'model', 
58                                                 'node_ids', 'ports', ]:
59                         if field in plc_pcu_dict:
60                                 if type(u"") == type(plc_pcu_dict[field]):
61                                         # NOTE: if is a unicode string, convert it.
62                                         self.__setattr__(field, str(plc_pcu_dict[field]))
63                                 else:
64                                         self.__setattr__(field, plc_pcu_dict[field])
65                         else:
66                                 raise Exception("No such field %s in PCU object" % field)
67
68 # These are the convenience functions build around the PCU object.
69 class PCUModel(PCU):
70         def __init__(self, plc_pcu_dict):
71                 PCU.__init__(self, plc_pcu_dict)
72                 self.host = self.pcu_name()
73
74         def pcu_name(self):
75                 if self.hostname is not None and self.hostname is not "":
76                         return self.hostname
77                 elif self.ip is not None and self.ip is not "":
78                         return self.ip
79                 else:
80                         return None
81
82         def nodeidToPort(self, node_id):
83                 if node_id in self.node_ids:
84                         for i in range(0, len(self.node_ids)):
85                                 if node_id == self.node_ids[i]:
86                                         return self.ports[i]
87
88                 raise Exception("No such Node ID: %d" % node_id)
89
90 # This class captures the observed pcu records from FindBadPCUs.py
91 class PCURecord:
92         def __init__(self, pcu_record_dict):
93                 for field in ['port_status', 
94                                                 'dns_status', 
95                                                 'entry_complete', ]:
96                         if field in pcu_record_dict:
97                                 if field == "reboot":
98                                         self.__setattr__("reboot_str", pcu_record_dict[field])
99                                 else:
100                                         self.__setattr__(field, pcu_record_dict[field])
101                         #else:
102                         #       raise Exception("No such field %s in pcu record dict" % field)
103
104 class Transport:
105         TELNET = "telnet"
106         SSH    = "ssh"
107         HTTP   = "http"
108         HTTPS  = "https"
109         IPAL   = "ipal"
110         IPMI   = "ipmi"
111         DRAC   = "drac"
112         AMT    = "amt"
113
114         TELNET_TIMEOUT = 120
115
116         porttypemap = {
117                         5869 : DRAC,
118                         22 : SSH,
119                         23 : TELNET,
120                         443 : HTTPS,
121                         80 :  HTTP,
122                         9100 : IPAL,
123                         623 : IPMI,
124                         16992 : AMT,
125                 }
126
127         def __init__(self, type, verbose):
128                 self.type = type
129                 self.verbose = verbose
130                 self.transport = None
131
132         def open(self, host, username=None, password=None, prompt="User Name"):
133                 transport = None
134
135                 if self.type == self.TELNET:
136                         transport = telnetlib.Telnet(host, timeout=self.TELNET_TIMEOUT)
137                         transport.set_debuglevel(self.verbose)
138                         if username is not None:
139                                 self.transport = transport
140                                 self.ifThenSend(prompt, username, ExceptionUsername)
141
142                 elif self.type == self.SSH:
143                         if username is not None:
144                                 transport = pyssh.Ssh(username, host)
145                                 transport.set_debuglevel(self.verbose)
146                                 transport.open()
147                                 # TODO: have an ssh set_debuglevel() also...
148                         else:
149                                 raise Exception("Username cannot be None for ssh transport.")
150                 elif self.type == self.HTTP:
151                         # NOTE: this does not work for all web-based services...
152                         self.url = "http://%s:%d/" % (host,80)
153                         uri = "%s:%d" % (host,80)
154
155                         # create authinfo
156                         authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
157                         authinfo.add_password (None, uri, username, password)
158                         authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
159
160                         transport = urllib2.build_opener(authhandler)
161                 else:
162                         raise Exception("Unknown transport type: %s" % self.type)
163
164                 self.transport = transport
165                 return True
166
167         def close(self):
168                 if self.type == self.TELNET:
169                         self.transport.close() 
170                 elif self.type == self.SSH:
171                         self.transport.close() 
172                 elif self.type == self.HTTP:
173                         pass
174                 else:
175                         raise Exception("Unknown transport type %s" % self.type)
176                 self.transport = None
177
178         def write(self, msg):
179                 return self.send(msg)
180
181         def send(self, msg):
182                 if self.transport == None:
183                         raise ExceptionNoTransport("transport object is type None")
184                         
185                 return self.transport.write(msg)
186
187         def sendPassword(self, password, prompt=None):
188                 if self.type == self.TELNET:
189                         if prompt == None:
190                                 self.ifThenSend("Password", password, ExceptionPassword)
191                         else:
192                                 self.ifThenSend(prompt, password, ExceptionPassword)
193                 elif self.type == self.SSH:
194                         self.ifThenSend("password:", password, ExceptionPassword)
195                 elif self.type == self.HTTP:
196                         pass
197                 else:
198                         raise Exception("Unknown transport type: %s" % self.type)
199
200         def sendHTTP(self, resource, data):
201                 if self.verbose:
202                         print "POSTing '%s' to %s" % (data,self.url + resource)
203
204                 try:
205                         f = self.transport.open(self.url + resource ,data)
206                         r = f.read()
207                         if self.verbose:
208                                 print r
209
210                 except urllib2.URLError,err:
211                         print 'Could not open http connection', err
212                         return "http transport error"
213
214                 return 0
215
216         def ifThenSend(self, expected, buffer, ErrorClass=ExceptionPrompt):
217
218                 if self.transport != None:
219                         output = self.transport.read_until(expected, self.TELNET_TIMEOUT)
220                         if output.find(expected) == -1:
221                                 print "OUTPUT: --%s--" % output
222                                 raise ErrorClass, "'%s' not found" % expected
223                         else:
224                                 self.transport.write(buffer + "\r\n")
225                 else:
226                         raise ExceptionNoTransport("transport object is type None")
227
228         def ifElse(self, expected, ErrorClass):
229                 try:
230                         self.transport.read_until(expected, self.TELNET_TIMEOUT)
231                 except:
232                         raise ErrorClass("Could not find '%s' within timeout" % expected)
233
234 class PCUControl(PCUModel,PCURecord):
235
236         """ 
237                 There are three cases:
238                         1) the pcu_record passed below includes port_status from an
239                                 external probe.
240                         2) the external probe failed, and the values are empty
241                         3) this call is made independent of port_status.
242
243                 In the first case, the first open port is used.
244                 In the third case, the ports are tried in sequence.
245
246                 In this way, the port_status value serves only as an optimization,
247                 because closed ports are avoided.  The supported_ports value should
248                 order ports by their preferred usage.
249         """
250
251         supported_ports = []
252
253         def __init__(self, plc_pcu_record, verbose, ignored=None):
254                 PCUModel.__init__(self, plc_pcu_record)
255                 PCURecord.__init__(self, plc_pcu_record)
256
257         def reboot(self, node_port, dryrun):
258
259                 port_list = []
260                 # There are two sources of potential ports.  Those that are open and
261                 # those that are part of the PCU's supported_ports.  
262                 #  I think we should start with supported_ports and then filter that
263                 #  by the open ports.
264
265                 port_list = self.supported_ports
266
267                 if hasattr(self, 'port_status') and self.port_status:
268                         # get out the open ports
269                         port_list = filter(lambda x: self.port_status[x] == "open" , self.port_status.keys())
270                         port_list = [ int(x) for x in port_list ]
271                         # take only the open ports that are supported_ports
272                         port_list = filter(lambda x: x in self.supported_ports, port_list)
273                         if port_list == []:
274                                 raise ExceptionPort("No Open Port: No transport from open ports")
275
276                 print port_list
277
278                 ret = "No implementation for open ports on selected PCU model"
279                 for port in port_list:
280                         if port not in Transport.porttypemap:
281                                 continue
282
283                         type = Transport.porttypemap[port]
284                         self.transport = Transport(type, verbose)
285
286                         print "checking for run_%s" % type
287                         if hasattr(self, "run_%s" % type):
288                                 print "found run_%s" % type
289                                 fxn = getattr(self, "run_%s" % type)
290                                 ret = self.catcherror(fxn, node_port, dryrun)
291                                 if ret == 0: # NOTE: success!, so stop
292                                         break
293                         else:
294                                 continue
295
296                 return ret
297
298         def run(self, node_port, dryrun):
299                 """ This function is to be defined by the specific PCU instance.  """
300                 raise Exception("This function is not implemented")
301                 pass
302
303         #def reboot(self, node_port, dryrun):
304
305         def catcherror(self, function, node_port, dryrun):
306                 try:
307                         return function(node_port, dryrun)
308                 except ExceptionNotFound, err:
309                         return "error: " + str(err)
310                 except ExceptionPassword, err:
311                         return "Password exception: " + str(err)
312                 except ExceptionTimeout, err:
313                         return "Timeout exception: " + str(err)
314                 except ExceptionUsername, err:
315                         return "No username prompt: " + str(err)
316                 except ExceptionSequence, err:
317                         return "Sequence error: " + str(err)
318                 except ExceptionPrompt, err:
319                         return "Prompt exception: " + str(err)
320                 except ExceptionNoTransport, err:
321                         return "No Transport: " + str(err)
322                 except ExceptionPort, err:
323                         return "No ports exception: " + str(err)
324                 except socket.error, err:
325                         return "socket error: timeout: " + str(err)
326                 except urllib2.HTTPError, err:
327                         return "HTTPError: " + str(err)
328                 except urllib2.URLError, err:
329                         return "URLError: " + str(err)
330                 except EOFError, err:
331                         self.transport.close()
332                         import traceback
333                         traceback.print_exc()
334                         return "EOF connection reset" + str(err)
335                 except Exception, err:
336                         #from monitor.common import email_exception
337                         #email_exception(self.host)
338                         raise Exception(err)
339
340 from pcucontrol.util import command
341 from pcucontrol.models import *
342
343 def pcu_name(pcu):
344         if pcu['hostname'] is not None and pcu['hostname'] is not "":
345                 return pcu['hostname']
346         elif pcu['ip'] is not None and pcu['ip'] is not "":
347                 return pcu['ip']
348         else:
349                 return None
350
351 class Unknown(PCUControl):
352         supported_ports = [22,23,80,443,5869,9100,16992]
353
354 def model_to_object(modelname):
355         if modelname is None:
356                 return ManualPCU 
357         if "AMT" in modelname:
358                 return IntelAMT
359         elif "BayTech" in modelname:
360                 return BayTech
361         elif "HPiLO" in modelname:
362                 return HPiLO
363         elif "IPAL" in modelname:
364                 return IPAL
365         elif "APC" in modelname:
366                 return APCControl
367         elif "DRAC" in modelname:
368                 return DRAC
369         elif "WTI" in modelname:
370                 return WTIIPS4
371         elif "ePowerSwitch" in modelname:
372                 return ePowerSwitchNew
373         elif "IPMI" in modelname:
374                 return OpenIPMI
375         elif "BlackBoxPSMaverick" in modelname:
376                 return BlackBoxPSMaverick
377         elif "PM211MIP" in modelname:
378                 return PM211MIP
379         elif "ManualPCU" in modelname:
380                 return ManualPCU 
381         else:
382                 print "UNKNOWN model %s"%modelname
383                 return Unknown
384
385 def reboot_api(node, pcu, testrun=False):
386         rb_ret = ""
387
388         try:
389                 modelname = pcu['model']
390                 if modelname:
391                         # get object instance 
392                         instance = eval('%s(pcu, verbose)' % modelname)
393                         # get pcu port 
394                         i = pcu['node_ids'].index(node['node_id'])
395                         p = pcu['ports'][i]
396                         # reboot
397                         rb_ret = instance.reboot(p, testrun)
398                 else:
399                         rb_ret =  "No modelname in PCU record."
400                 # TODO: how to handle the weird, georgetown pcus, the drac faults, and ilo faults
401         except Exception, err:
402                 rb_ret = "Exception Model(%s): " % modelname 
403                 rb_ret += str(err)
404
405         return rb_ret
406
407 def convert_oldmodelname_to_newmodelname(oldmodelname, pcu_id):
408         newmodelname = None
409         update = {      'AP79xx' : 'APCControl13p13',
410                                 'Masterswitch' : 'APCControl13p13',
411                                 'DS4-RPC' : 'BayTech',
412                                 'IP-41x_IP-81x' : 'IPAL',
413                                 'DRAC3' : 'DRAC',
414                                 'DRAC4' : 'DRAC',
415                                 'ePowerSwitch' : 'ePowerSwitchOld',
416                                 'ilo2' : 'HPiLO',
417                                 'ilo1' : 'HPiLO',
418                                 'PM211-MIP' : 'PM211MIP',
419                                 'AMT2.5' : 'IntelAMT',
420                                 'AMT3.0' : 'IntelAMT',
421                                 'WTI_IPS-4' : 'WTIIPS4',
422                                 'unknown'  : 'ManualPCU',
423                                 'DRAC5' : 'DRAC',
424                                 'ipmi'  : 'OpenIPMI',
425                                 'bbsemaverick' : 'BlackBoxPSMaverick',
426                                 'manualadmin'  : 'ManualPCU',
427         }
428
429         if oldmodelname in update:
430                 newmodelname = update[oldmodelname]
431         else:
432                 newmodelname = oldmodelname
433
434         if pcu_id in [1102,1163,1055,1111,1231,1113,1127,1128,1148]:
435                 newmodelname = 'APCControl12p3'
436         elif pcu_id in [1110,86]:
437                 newmodelname = 'APCControl1p4'
438         elif pcu_id in [1221,1225,1220,1192]:
439                 newmodelname = 'APCControl121p3'
440         elif pcu_id in [1173,1240,47,1363,1405,1401,1372,1371]:
441                 newmodelname = 'APCControl121p1'
442         elif pcu_id in [1056,1237,1052,1209,1002,1008,1013,1022]:
443                 newmodelname = 'BayTechCtrlC'
444         elif pcu_id in [93]:
445                 newmodelname = 'BayTechRPC3NC'
446         elif pcu_id in [1057]:
447                 newmodelname = 'BayTechCtrlCUnibe'
448         elif pcu_id in [1012]:
449                 newmodelname = 'BayTechRPC16'
450         elif pcu_id in [1089, 1071, 1046, 1035, 1118]:
451                 newmodelname = 'ePowerSwitchNew'
452
453         return newmodelname
454
455 def reboot_test_new(nodename, values, verbose, dryrun):
456         rb_ret = ""
457         if 'plc_pcu_stats' in values:
458                 values.update(values['plc_pcu_stats'])
459
460         try:
461                 #modelname = convert_oldmodelname_to_newmodelname(values['model'], values['pcu_id'])
462                 modelname = values['model']
463                 if modelname:
464                         object = eval('%s(values, verbose)' % modelname)
465                         rb_ret = object.reboot(values[nodename], dryrun)
466                 else:
467                         rb_ret =  "Not_Run"
468                 # TODO: how to handle the weird, georgetown pcus, the drac faults, and ilo faults
469         except ExceptionPort, err:
470                 rb_ret = str(err)
471         except NameError, err:
472                 rb_ret = str(err)
473
474         return rb_ret
475
476 def main():
477         print "this does not work."
478
479 if __name__ == '__main__':
480         main()