Includes support for IntelAMT as well as better support for existing IPAL over
[monitor.git] / reboot.py
1 #!/usr/bin/python
2 #
3 # Reboot specified nodes
4 #
5
6 import getpass, getopt
7 import os, sys
8 import xml, xmlrpclib
9 import errno, time, traceback
10 import urllib2
11 import threading, popen2
12 import array, struct
13 #from socket import *
14 import socket
15 import plc
16 import base64
17 from subprocess import PIPE, Popen
18 import ssh.pxssh as pxssh
19 import ssh.pexpect as pexpect
20 import socket
21
22 # Use our versions of telnetlib and pyssh
23 sys.path.insert(0, os.path.dirname(sys.argv[0]))
24 import telnetlib
25 sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")    
26 import pyssh
27
28 # Timeouts in seconds
29 TELNET_TIMEOUT = 45
30
31 # Event class ID from pcu events
32 #NODE_POWER_CONTROL = 3
33
34 # Monitor user ID
35 #MONITOR_USER_ID = 11142
36
37 import logging
38 logger = logging.getLogger("monitor")
39 verbose = 1
40 #dryrun = 0;
41
42 class ExceptionNoTransport(Exception): pass
43 class ExceptionNotFound(Exception): pass
44 class ExceptionPassword(Exception): pass
45 class ExceptionTimeout(Exception): pass
46 class ExceptionPrompt(Exception): pass
47 class ExceptionSequence(Exception): pass
48 class ExceptionReset(Exception): pass
49 class ExceptionPort(Exception): pass
50 class ExceptionUsername(Exception): pass
51
52 def telnet_answer(telnet, expected, buffer):
53         global verbose
54
55         output = telnet.read_until(expected, TELNET_TIMEOUT)
56         #if verbose:
57         #       logger.debug(output)
58         if output.find(expected) == -1:
59                 raise ExceptionNotFound, "'%s' not found" % expected
60         else:
61                 telnet.write(buffer + "\r\n")
62
63
64 # PCU has model, host, preferred-port, user, passwd, 
65
66 # This is an object derived directly form the PLCAPI DB fields
67 class PCU(object):
68         def __init__(self, plc_pcu_dict):
69                 for field in ['username', 'password', 'site_id', 
70                                                 'hostname', 'ip', 
71                                                 'pcu_id', 'model', 
72                                                 'node_ids', 'ports', ]:
73                         if field in plc_pcu_dict:
74                                 self.__setattr__(field, plc_pcu_dict[field])
75                         else:
76                                 raise Exception("No such field %s in PCU object" % field)
77
78 # These are the convenience functions build around the PCU object.
79 class PCUModel(PCU):
80         def __init__(self, plc_pcu_dict):
81                 PCU.__init__(self, plc_pcu_dict)
82                 self.host = self.pcu_name()
83
84         def pcu_name(self):
85                 if self.hostname is not None and self.hostname is not "":
86                         return self.hostname
87                 elif self.ip is not None and self.ip is not "":
88                         return self.ip
89                 else:
90                         return None
91
92         def nodeidToPort(self, node_id):
93                 if node_id in self.node_ids:
94                         for i in range(0, len(self.node_ids)):
95                                 if node_id == self.node_ids[i]:
96                                         return self.ports[i]
97
98                 raise Exception("No such Node ID: %d" % node_id)
99
100 # This class captures the observed pcu records from FindBadPCUs.py
101 class PCURecord:
102         def __init__(self, pcu_record_dict):
103                 for field in ['nodenames', 'portstatus', 
104                                                 'dnsmatch', 
105                                                 'complete_entry', ]:
106                         if field in pcu_record_dict:
107                                 if field == "reboot":
108                                         self.__setattr__("reboot_str", pcu_record_dict[field])
109                                 else:
110                                         self.__setattr__(field, pcu_record_dict[field])
111                         else:
112                                 raise Exception("No such field %s in pcu record dict" % field)
113
114 class Transport:
115         TELNET = 1
116         SSH    = 2
117         HTTP   = 3
118         IPAL   = 4
119
120         TELNET_TIMEOUT = 60
121
122         def __init__(self, type, verbose):
123                 self.type = type
124                 self.verbose = verbose
125                 self.transport = None
126
127         def open(self, host, username=None, password=None, prompt="User Name"):
128                 transport = None
129
130                 if self.type == self.TELNET:
131                         transport = telnetlib.Telnet(host, timeout=self.TELNET_TIMEOUT)
132                         transport.set_debuglevel(self.verbose)
133                         if username is not None:
134                                 self.transport = transport
135                                 self.ifThenSend(prompt, username, ExceptionUsername)
136
137                 elif self.type == self.SSH:
138                         if username is not None:
139                                 transport = pyssh.Ssh(username, host)
140                                 transport.set_debuglevel(self.verbose)
141                                 transport.open()
142                                 # TODO: have an ssh set_debuglevel() also...
143                         else:
144                                 raise Exception("Username cannot be None for ssh transport.")
145                 elif self.type == self.HTTP:
146                         self.url = "http://%s:%d/" % (host,80)
147                         uri = "%s:%d" % (host,80)
148
149                         # create authinfo
150                         authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
151                         authinfo.add_password (None, uri, username, password)
152                         authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
153
154                         transport = urllib2.build_opener(authhandler)
155
156                 else:
157                         raise Exception("Unknown transport type: %s" % self.type)
158
159                 self.transport = transport
160                 return True
161
162         def close(self):
163                 if self.type == self.TELNET:
164                         self.transport.close() 
165                 elif self.type == self.SSH:
166                         self.transport.close() 
167                 elif self.type == self.HTTP:
168                         pass
169                 else:
170                         raise Exception("Unknown transport type %s" % self.type)
171                 self.transport = None
172
173         def sendHTTP(self, resource, data):
174                 if self.verbose:
175                         print "POSTing '%s' to %s" % (data,self.url + resource)
176
177                 try:
178                         f = self.transport.open(self.url + resource ,data)
179                         r = f.read()
180                         if self.verbose:
181                                 print r
182
183                 except urllib2.URLError,err:
184                         logger.info('Could not open http connection', err)
185                         return "http transport error"
186
187                 return 0
188
189         def sendPassword(self, password, prompt=None):
190                 if self.type == self.TELNET:
191                         if prompt == None:
192                                 self.ifThenSend("Password", password, ExceptionPassword)
193                         else:
194                                 self.ifThenSend(prompt, password, ExceptionPassword)
195                 elif self.type == self.SSH:
196                         self.ifThenSend("password:", password, ExceptionPassword)
197                 elif self.type == self.HTTP:
198                         pass
199                 else:
200                         raise Exception("Unknown transport type: %s" % self.type)
201
202         def ifThenSend(self, expected, buffer, ErrorClass=ExceptionPrompt):
203
204                 if self.transport != None:
205                         output = self.transport.read_until(expected, self.TELNET_TIMEOUT)
206                         if output.find(expected) == -1:
207                                 raise ErrorClass, "'%s' not found" % expected
208                         else:
209                                 self.transport.write(buffer + "\r\n")
210                 else:
211                         raise ExceptionNoTransport("transport object is type None")
212
213         def ifElse(self, expected, ErrorClass):
214                 try:
215                         self.transport.read_until(expected, self.TELNET_TIMEOUT)
216                 except:
217                         raise ErrorClass("Could not find '%s' within timeout" % expected)
218                         
219
220 class PCUControl(Transport,PCUModel,PCURecord):
221         def __init__(self, plc_pcu_record, verbose, supported_ports=[]):
222                 PCUModel.__init__(self, plc_pcu_record)
223                 PCURecord.__init__(self, plc_pcu_record)
224                 type = None
225                 if self.portstatus:
226                         if '22' in supported_ports and self.portstatus['22'] == "open":
227                                 type = Transport.SSH
228                         elif '23' in supported_ports and self.portstatus['23'] == "open":
229                                 type = Transport.TELNET
230                         elif '80' in supported_ports and self.portstatus['80'] == "open":
231                                 type = Transport.HTTP
232                         elif '443' in supported_ports and self.portstatus['443'] == "open":
233                                 type = Transport.HTTP
234                         elif '5869' in supported_ports and self.portstatus['5869'] == "open":
235                                 # For DRAC cards. Racadm opens this port.
236                                 type = Transport.HTTP
237                         elif '9100' in supported_ports and self.portstatus['9100'] == "open":
238                                 type = Transport.IPAL
239                         elif '16992' in supported_ports and self.portstatus['16992'] == "open":
240                                 type = Transport.HTTP
241                         else:
242                                 raise ExceptionPort("Unsupported Port: No transport from open ports")
243                 else:
244                         raise Exception("No Portstatus: No transport because no open ports")
245                 Transport.__init__(self, type, verbose)
246
247         def run(self, node_port, dryrun):
248                 """ This function is to be defined by the specific PCU instance.  """
249                 pass
250                 
251         def reboot(self, node_port, dryrun):
252                 try:
253                         return self.run(node_port, dryrun)
254                 except ExceptionNotFound, err:
255                         return "error: " + str(err)
256                 except ExceptionPassword, err:
257                         return "password exception: " + str(err)
258                 except ExceptionTimeout, err:
259                         return "timeout exception: " + str(err)
260                 except ExceptionUsername, err:
261                         return "exception: no username prompt: " + str(err)
262                 except ExceptionSequence, err:
263                         return "sequence error: " + str(err)
264                 except ExceptionPrompt, err:
265                         return "prompt exception: " + str(err)
266                 except ExceptionPort, err:
267                         return "no ports exception: " + str(err)
268                 except socket.error, err:
269                         return "socket error: timeout: " + str(err)
270                 except EOFError, err:
271                         if self.verbose:
272                                 logger.debug("reboot: EOF")
273                                 logger.debug(err)
274                         self.transport.close()
275                         import traceback
276                         traceback.print_exc()
277                         return "EOF connection reset" + str(err)
278                 
279 class IPAL(PCUControl):
280         """ 
281                 This now uses a proprietary format for communicating with the PCU.  I
282                 prefer it to Telnet, and Web access, since it's much lighter weight
283                 and, more importantly, IT WORKS!! HHAHHHAHAHAHAHAHA!
284         """
285
286         def format_msg(self, data, cmd):
287                 esc = chr(int('1b',16))
288                 return "%c%s%c%s%c" % (esc, self.password, esc, data, cmd) # esc, 'q', chr(4))
289         
290         def recv_noblock(self, s, count):
291                 import errno
292
293                 try:
294                         # TODO: make sleep backoff, before stopping.
295                         time.sleep(4)
296                         ret = s.recv(count, socket.MSG_DONTWAIT)
297                 except socket.error, e:
298                         if e[0] == errno.EAGAIN:
299                                 return Exception(e[1])
300                         else:
301                                 # TODO: not other exceptions.
302                                 raise Exception(e)
303                 return ret
304
305         def run(self, node_port, dryrun):
306                 import errno
307
308                 power_on = False
309
310                 print "open socket"
311                 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
312                 try:
313                         print "connect"
314                         s.connect((self.host, 9100))
315                 except socket.error, e:
316                         s.close()
317                         if e[0] == errno.ECONNREFUSED:
318                                 # cannot connect to remote host
319                                 return Exception(e[1])
320                         else:
321                                 # TODO: what other conditions are there?
322                                 raise Exception(e)
323                                 
324                 # get current status
325                 print "Checking status"
326                 s.send(self.format_msg("", 'O'))
327                 ret = self.recv_noblock(s, 8)
328                 print "Current status is '%s'" % ret
329                                 
330                 if node_port < len(ret):
331                         status = ret[node_port]
332                         if status == '1':
333                                 # up
334                                 power_on = True
335                         elif status == '0':
336                                 # down
337                                 power_on = False
338                         else:
339                                 raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret))
340                 else:
341                         raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret))
342                         
343
344                 if not dryrun:
345                         print "Pulsing %s" % node_port
346                         if power_on:
347                                 s.send(self.format_msg("%s" % node_port, 'P'))
348                         else:
349                                 # NOTE: turn power on before pulsing the port.
350                                 print "power was off, so turning on then pulsing..."
351                                 s.send(self.format_msg("%s" % node_port, 'E'))
352                                 s.send(self.format_msg("%s" % node_port, 'P'))
353
354                         print "Receiving response."
355                         ret = self.recv_noblock(s, 8)
356                         print "Current status is '%s'" % ret
357
358                         if node_port < len(ret):
359                                 status = ret[node_port]
360                                 if status == '1':
361                                         # up
362                                         power_on = True
363                                 elif status == '0':
364                                         # down
365                                         power_on = False
366                                 else:
367                                         raise Exception("Unknown status for PCU socket %s : %s" % (node_port, ret))
368                         else:
369                                 raise Exception("Mismatch between configured port and PCU status: %s %s" % (node_port, ret))
370
371                         if power_on:
372                                 return 0
373                         else:
374                                 return "Failed Power On"
375
376                 s.close()
377                 return 0
378
379 # TELNET version of protocol...
380 #               #self.open(self.host)
381 #               ## XXX Some iPals require you to hit Enter a few times first
382 #               #self.ifThenSend("Password >", "\r\n\r\n", ExceptionNotFound)
383 #               # Login
384 #               self.ifThenSend("Password >", self.password, ExceptionPassword)
385 #               self.transport.write("\r\n\r\n")
386 #               if not dryrun: # P# - Pulse relay
387 #                       print "node_port %s" % node_port
388 #                       self.ifThenSend("Enter >", 
389 #                                                       "P7", # % node_port, 
390 #                                                       ExceptionNotFound)
391 #                       print "send newlines"
392 #                       self.transport.write("\r\n\r\n")
393 #                       print "after new lines"
394 #               # Get the next prompt
395 #               print "wait for enter"
396 #               self.ifElse("Enter >", ExceptionTimeout)
397 #               print "closing "
398 #               self.close()
399 #               return 0
400
401 class APCEurope(PCUControl):
402         def run(self, node_port, dryrun):
403                 self.open(self.host, self.username)
404                 self.sendPassword(self.password)
405
406                 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
407                 self.ifThenSend("\r\n> ", "2")
408                 self.ifThenSend("\r\n> ", str(node_port))
409                 # 3- Immediate Reboot             
410                 self.ifThenSend("\r\n> ", "3")
411
412                 if not dryrun:
413                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
414                                                         "YES\r\n",
415                                                         ExceptionSequence)
416                 else:
417                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
418                                                         "", ExceptionSequence)
419                 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
420
421                 self.close()
422                 return 0
423
424 class APCBrazil(PCUControl):
425         def run(self, node_port, dryrun):
426                 self.open(self.host, self.username)
427                 self.sendPassword(self.password)
428
429                 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
430                 self.ifThenSend("\r\n> ", str(node_port))
431                 # 4- Immediate Reboot             
432                 self.ifThenSend("\r\n> ", "4")
433
434                 if not dryrun:
435                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
436                                                         "YES\r\n",
437                                                         ExceptionSequence)
438                 else:
439                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
440                                                         "", ExceptionSequence)
441                 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
442
443                 self.close()
444                 return 0
445
446 class APCBerlin(PCUControl):
447         def run(self, node_port, dryrun):
448                 self.open(self.host, self.username)
449                 self.sendPassword(self.password)
450
451                 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
452                 self.ifThenSend("\r\n> ", "2")
453                 self.ifThenSend("\r\n> ", "1")
454                 self.ifThenSend("\r\n> ", str(node_port))
455                 # 3- Immediate Reboot             
456                 self.ifThenSend("\r\n> ", "3")
457
458                 if not dryrun:
459                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
460                                                         "YES\r\n",
461                                                         ExceptionSequence)
462                 else:
463                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
464                                                         "", ExceptionSequence)
465                 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
466
467                 self.close()
468                 return 0
469
470 class APCFolsom(PCUControl):
471         def run(self, node_port, dryrun):
472                 self.open(self.host, self.username)
473                 self.sendPassword(self.password)
474
475                 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
476                 self.ifThenSend("\r\n> ", "2")
477                 self.ifThenSend("\r\n> ", "1")
478                 self.ifThenSend("\r\n> ", str(node_port))
479                 self.ifThenSend("\r\n> ", "1")
480
481                 # 3- Immediate Reboot             
482                 self.ifThenSend("\r\n> ", "3")
483
484                 if not dryrun:
485                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
486                                                         "YES\r\n",
487                                                         ExceptionSequence)
488                 else:
489                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
490                                                         "", ExceptionSequence)
491                 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
492
493                 self.close()
494                 return 0
495
496 class APCMaster(PCUControl):
497         def run(self, node_port, dryrun):
498                 print "Rebooting %s" % self.host
499                 self.open(self.host, self.username)
500                 self.sendPassword(self.password)
501
502                 # 1- Device Manager
503                 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
504                 # 3- Outlet Control/Config
505                 self.ifThenSend("\r\n> ", "3")
506                 # n- Outlet n
507                 self.ifThenSend("\r\n> ", str(node_port))
508                 # 1- Control Outlet
509                 self.ifThenSend("\r\n> ", "1")
510                 # 3- Immediate Reboot             
511                 self.ifThenSend("\r\n> ", "3")
512
513                 if not dryrun:
514                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
515                                                         "YES\r\n",
516                                                         ExceptionSequence)
517                 else:
518                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
519                                                         "", ExceptionSequence)
520                 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
521
522                 self.close()
523                 return 0
524
525 class APC(PCUControl):
526         def __init__(self, plc_pcu_record, verbose):
527                 PCUControl.__init__(self, plc_pcu_record, verbose)
528
529                 self.master = APCMaster(plc_pcu_record, verbose)
530                 self.folsom = APCFolsom(plc_pcu_record, verbose)
531                 self.europe = APCEurope(plc_pcu_record, verbose)
532
533         def run(self, node_port, dryrun):
534                 try_again = True
535                 sleep_time = 1
536
537                 for pcu in [self.master, self.europe, self.folsom]:
538                         if try_again:
539                                 try:
540                                         print "-*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"
541                                         try_again = False
542                                         print "sleeping 5"
543                                         time.sleep(sleep_time)
544                                         ret = pcu.reboot(node_port, dryrun)
545                                 except ExceptionSequence, err:
546                                         del pcu
547                                         sleep_time = 130
548                                         try_again = True
549
550                 if try_again:
551                         return "Unknown reboot sequence for APC PCU"
552                 else:
553                         return ret
554
555 class IntelAMT(PCUControl):
556         def run(self, node_port, dryrun):
557                 import soltesz
558
559                 cmd = soltesz.CMD()
560                 cmd_str = "IntelAMTSDK/Samples/RemoteControl/remoteControl"
561
562                 if dryrun:
563                         # NOTE: -p checks the power state of the host.
564                         # TODO: parse the output to find out if it's ok or not.
565                         cmd_str += " -p http://%s:16992/RemoteControlService  -user admin -pass '%s' " % (self.host, self.password )
566                 else:
567                         cmd_str += " -A http://%s:16992/RemoteControlService -user admin -pass '%s' " % (self.host, self.password )
568                         
569                 print cmd_str
570                 return cmd.system(cmd_str, self.TELNET_TIMEOUT)
571
572 class DRACRacAdm(PCUControl):
573         def run(self, node_port, dryrun):
574
575                 print "trying racadm_reboot..."
576                 racadm_reboot(self.host, self.username, self.password, node_port, dryrun)
577
578                 return 0
579
580 class DRAC(PCUControl):
581         def run(self, node_port, dryrun):
582                 self.open(self.host, self.username)
583                 self.sendPassword(self.password)
584
585                 print "logging in..."
586                 self.transport.write("\r\n")
587                 # Testing Reboot ?
588                 if dryrun:
589                         self.ifThenSend("[%s]#" % self.username, "getsysinfo")
590                 else:
591                         # Reset this machine
592                         self.ifThenSend("[%s]#" % self.username, "serveraction powercycle")
593
594                 self.ifThenSend("[%s]#" % self.username, "exit")
595
596                 self.close()
597                 return 0
598
599 class HPiLO(PCUControl):
600         def run(self, node_port, dryrun):
601                 self.open(self.host, self.username)
602                 self.sendPassword(self.password)
603
604                 # </>hpiLO-> 
605                 self.ifThenSend("</>hpiLO->", "cd system1")
606
607                 # Reboot Outlet  N        (Y/N)?
608                 if dryrun:
609                         self.ifThenSend("</system1>hpiLO->", "POWER")
610                 else:
611                         # Reset this machine
612                         self.ifThenSend("</system1>hpiLO->", "reset")
613
614                 self.ifThenSend("</system1>hpiLO->", "exit")
615
616                 self.close()
617                 return 0
618
619                 
620 class HPiLOHttps(PCUControl):
621         def run(self, node_port, dryrun):
622                 import soltesz
623
624                 locfg = soltesz.CMD()
625                 cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
626                                         self.host, "iloxml/Get_Network.xml", 
627                                         self.username, self.password)
628                 sout, serr = locfg.run_noexcept(cmd)
629
630                 if sout.strip() != "":
631                         print "sout: %s" % sout.strip()
632                         return sout.strip()
633
634                 if not dryrun:
635                         locfg = soltesz.CMD()
636                         cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
637                                                 self.host, "iloxml/Reset_Server.xml", 
638                                                 self.username, self.password)
639                         sout, serr = locfg.run_noexcept(cmd)
640
641                         if sout.strip() != "":
642                                 print "sout: %s" % sout.strip()
643                                 #return sout.strip()
644                 return 0
645
646 class BayTechAU(PCUControl):
647         def run(self, node_port, dryrun):
648                 self.open(self.host, self.username, None, "Enter user name:")
649                 self.sendPassword(self.password, "Enter Password:")
650
651                 #self.ifThenSend("RPC-16>", "Status")
652                 self.ifThenSend("RPC3-NC>", "Reboot %d" % node_port)
653
654                 # Reboot Outlet  N        (Y/N)?
655                 if dryrun:
656                         self.ifThenSend("(Y/N)?", "N")
657                 else:
658                         self.ifThenSend("(Y/N)?", "Y")
659                 self.ifThenSend("RPC3-NC>", "")
660
661                 self.close()
662                 return 0
663
664 class BayTechGeorgeTown(PCUControl):
665         def run(self, node_port, dryrun):
666                 self.open(self.host, self.username, None, "Enter user name:")
667                 self.sendPassword(self.password, "Enter Password:")
668
669                 #self.ifThenSend("RPC-16>", "Status")
670
671                 self.ifThenSend("RPC-16>", "Reboot %d" % node_port)
672
673                 # Reboot Outlet  N        (Y/N)?
674                 if dryrun:
675                         self.ifThenSend("(Y/N)?", "N")
676                 else:
677                         self.ifThenSend("(Y/N)?", "Y")
678                 self.ifThenSend("RPC-16>", "")
679
680                 self.close()
681                 return 0
682
683 class BayTechCtrlCUnibe(PCUControl):
684         """
685                 For some reason, these units let you log in fine, but they hang
686                 indefinitely, unless you send a Ctrl-C after the password.  No idea
687                 why.
688         """
689         def run(self, node_port, dryrun):
690                 print "BayTechCtrlC %s" % self.host
691
692                 ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
693                 s = pxssh.pxssh()
694                 if not s.login(self.host, self.username, self.password, ssh_options):
695                         raise ExceptionPassword("Invalid Password")
696                 # Otherwise, the login succeeded.
697
698                 # Send a ctrl-c to the remote process.
699                 print "sending ctrl-c"
700                 s.send(chr(3))
701
702                 # Control Outlets  (5 ,1).........5
703                 try:
704                         index = s.expect(["Enter Request :"])
705
706                         if index == 0:
707                                 print "3"
708                                 s.send("3\r\n")
709                                 index = s.expect(["DS-RPC>", "Enter user name:"])
710                                 if index == 1:
711                                         s.send(self.username + "\r\n")
712                                         index = s.expect(["DS-RPC>"])
713
714                                 if index == 0:
715                                         print "Reboot %d" % node_port
716                                         s.send("Reboot %d\r\n" % node_port)
717
718                                         index = s.expect(["(Y/N)?"])
719                                         if index == 0:
720                                                 if dryrun:
721                                                         print "sending N"
722                                                         s.send("N\r\n")
723                                                 else:
724                                                         print "sending Y"
725                                                         s.send("Y\r\n")
726
727                                 #index = s.expect(["DS-RPC>"])
728                                 #print "got prompt back"
729
730                         s.close()
731
732                 except pexpect.EOF:
733                         raise ExceptionPrompt("EOF before 'Enter Request' Prompt")
734                 except pexpect.TIMEOUT:
735                         raise ExceptionPrompt("Timeout before 'Enter Request' Prompt")
736
737                 return 0
738
739 class BayTechCtrlC(PCUControl):
740         """
741                 For some reason, these units let you log in fine, but they hang
742                 indefinitely, unless you send a Ctrl-C after the password.  No idea
743                 why.
744         """
745         def run(self, node_port, dryrun):
746                 print "BayTechCtrlC %s" % self.host
747
748                 ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
749                 s = pxssh.pxssh()
750                 if not s.login(self.host, self.username, self.password, ssh_options):
751                         raise ExceptionPassword("Invalid Password")
752                 # Otherwise, the login succeeded.
753
754                 # Send a ctrl-c to the remote process.
755                 print "sending ctrl-c"
756                 s.send(chr(3))
757
758                 # Control Outlets  (5 ,1).........5
759                 try:
760                         index = s.expect(["Enter Request :"])
761
762                         if index == 0:
763                                 print "5"
764                                 s.send("5\r\n")
765                                 index = s.expect(["DS-RPC>", "Enter user name:"])
766                                 if index == 1:
767                                         print "sending username"
768                                         s.send(self.username + "\r\n")
769                                         index = s.expect(["DS-RPC>"])
770
771                                 if index == 0:
772                                         print "Reboot %d" % node_port
773                                         s.send("Reboot %d\r\n" % node_port)
774
775                                         index = s.expect(["(Y/N)?"])
776                                         if index == 0:
777                                                 if dryrun:
778                                                         print "sending N"
779                                                         s.send("N\r\n")
780                                                 else:
781                                                         print "sending Y"
782                                                         s.send("Y\r\n")
783
784                                 index = s.expect(["DS-RPC>"])
785                                 #print "got prompt back"
786
787                         s.close()
788
789                 except pexpect.EOF:
790                         raise ExceptionPrompt("EOF before 'Enter Request' Prompt")
791                 except pexpect.TIMEOUT:
792                         raise ExceptionPrompt("Timeout before Prompt")
793
794                 return 0
795
796 class BayTech(PCUControl):
797         def run(self, node_port, dryrun):
798                 self.open(self.host, self.username)
799                 self.sendPassword(self.password)
800
801                 # Control Outlets  (5 ,1).........5
802                 self.ifThenSend("Enter Request :", "5")
803
804                 # Reboot N
805                 try:
806                         self.ifThenSend("DS-RPC>", "Reboot %d" % node_port, ExceptionNotFound)
807                 except ExceptionNotFound, msg:
808                         # one machine is configured to ask for a username,
809                         # even after login...
810                         print "msg: %s" % msg
811                         self.transport.write(self.username + "\r\n")
812                         self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
813
814                 # Reboot Outlet  N        (Y/N)?
815                 if dryrun:
816                         self.ifThenSend("(Y/N)?", "N")
817                 else:
818                         self.ifThenSend("(Y/N)?", "Y")
819                 self.ifThenSend("DS-RPC>", "")
820
821                 self.close()
822                 return 0
823
824 class WTIIPS4(PCUControl):
825         def run(self, node_port, dryrun):
826                 self.open(self.host)
827                 self.sendPassword(self.password, "Enter Password:")
828
829                 self.ifThenSend("IPS> ", "/Boot %s" % node_port)
830                 if not dryrun:
831                         self.ifThenSend("Sure? (Y/N): ", "N")
832                 else:
833                         self.ifThenSend("Sure? (Y/N): ", "Y")
834
835                 self.ifThenSend("IPS> ", "")
836
837                 self.close()
838                 return 0
839
840 class ePowerSwitchGood(PCUControl):
841         # NOTE:
842         #               The old code used Python's HTTPPasswordMgrWithDefaultRealm()
843         #               For some reason this both doesn't work and in some cases, actually
844         #               hangs the PCU.  Definitely not what we want.
845         #               
846         #               The code below is much simpler.  Just letting things fail first,
847         #               and then, trying again with authentication string in the header.
848         #               
849         def run(self, node_port, dryrun):
850                 self.transport = None
851                 self.url = "http://%s:%d/" % (self.host,80)
852                 uri = "%s:%d" % (self.host,80)
853
854                 req = urllib2.Request(self.url)
855                 try:
856                         handle = urllib2.urlopen(req)
857                 except IOError, e:
858                         # NOTE: this is expected to fail initially
859                         pass
860                 else:
861                         print self.url
862                         print "-----------"
863                         print handle.read()
864                         print "-----------"
865                         return "ERROR: not protected by HTTP authentication"
866
867                 if not hasattr(e, 'code') or e.code != 401:
868                         return "ERROR: failed for: %s" % str(e)
869
870                 base64data = base64.encodestring("%s:%s" % (self.username, self.password))[:-1]
871                 # NOTE: assuming basic realm authentication.
872                 authheader = "Basic %s" % base64data
873                 req.add_header("Authorization", authheader)
874
875                 try:
876                         f = urllib2.urlopen(req)
877                 except IOError, e:
878                         # failing here means the User/passwd is wrong (hopefully)
879                         raise ExceptionPassword("Incorrect username/password")
880
881                 # TODO: after verifying that the user/password is correct, we should
882                 # actually reboot the given node.
883
884                 if not dryrun:
885                         # add data to handler,
886                         # fetch url one more time on cmd.html, econtrol.html or whatever.
887                         pass
888
889                 if self.verbose: print f.read()
890
891                 self.close()
892                 return 0
893
894
895 class ePowerSwitchOld(PCUControl):
896         def run(self, node_port, dryrun):
897                 self.url = "http://%s:%d/" % (self.host,80)
898                 uri = "%s:%d" % (self.host,80)
899
900                 # create authinfo
901                 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
902                 authinfo.add_password (None, uri, self.username, self.password)
903                 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
904
905                 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
906                 transport = urllib2.build_opener(authinfo)
907                 f = transport.open(self.url)
908                 if self.verbose: print f.read()
909
910                 if not dryrun:
911                         transport = urllib2.build_opener(authhandler)
912                         f = transport.open(self.url + "cmd.html", "P%d=r" % node_port)
913                         if self.verbose: print f.read()
914
915                 self.close()
916                 return 0
917
918 class ePowerSwitch(PCUControl):
919         def run(self, node_port, dryrun):
920                 self.url = "http://%s:%d/" % (self.host,80)
921                 uri = "%s:%d" % (self.host,80)
922
923                 # TODO: I'm still not sure what the deal is here.
924                 #               two independent calls appear to need to be made before the
925                 #               reboot will succeed.  It doesn't seem to be possible to do
926                 #               this with a single call.  I have no idea why.
927
928                 # create authinfo
929                 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
930                 authinfo.add_password (None, uri, self.username, self.password)
931                 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
932
933                 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
934                 transport = urllib2.build_opener()
935                 f = transport.open(self.url + "elogin.html", "pwd=%s" % self.password)
936                 if self.verbose: print f.read()
937
938                 if not dryrun:
939                         transport = urllib2.build_opener(authhandler)
940                         f = transport.open(self.url + "econtrol.html", "P%d=r" % node_port)
941                         if self.verbose: print f.read()
942
943                 #       data= "P%d=r" % node_port
944                 #self.open(self.host, self.username, self.password)
945                 #self.sendHTTP("elogin.html", "pwd=%s" % self.password)
946                 #self.sendHTTP("econtrol.html", data)
947                 #self.sendHTTP("cmd.html", data)
948
949                 self.close()
950                 return 0
951                 
952
953 ### rebooting european BlackBox PSE boxes
954 # Thierry Parmentelat - May 11 2005
955 # tested on 4-ports models known as PSE505-FR
956 # uses http to POST a data 'P<port>=r'
957 # relies on basic authentication within http1.0
958 # first curl-based script was
959 # curl --http1.0 --basic --user <username>:<password> --data P<port>=r \
960 #       http://<hostname>:<http_port>/cmd.html && echo OK
961
962 def bbpse_reboot (pcu_ip,username,password,port_in_pcu,http_port, dryrun):
963
964         global verbose
965
966         url = "http://%s:%d/cmd.html" % (pcu_ip,http_port)
967         data= "P%d=r" % port_in_pcu
968         if verbose:
969                 logger.debug("POSTing '%s' on %s" % (data,url))
970
971         authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
972         uri = "%s:%d" % (pcu_ip,http_port)
973         authinfo.add_password (None, uri, username, password)
974         authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
975
976         opener = urllib2.build_opener(authhandler)
977         urllib2.install_opener(opener)
978
979         if (dryrun):
980                 return 0
981
982         try:
983                 f = urllib2.urlopen(url,data)
984
985                 r= f.read()
986                 if verbose:
987                         logger.debug(r)
988                 return 0
989
990         except urllib2.URLError,err:
991                 logger.info('Could not open http connection', err)
992                 return "bbpse error"
993
994 ### rebooting x10toggle based systems addressed by port
995 # Marc E. Fiuczynski - May 31 2005
996 # tested on 4-ports models known as PSE505-FR
997 # uses ssh and password to login to an account
998 # that will cause the system to be powercycled.
999
1000 def x10toggle_reboot(ip, username, password, port, dryrun):
1001         global verbose
1002
1003         ssh = None
1004         try:
1005                 ssh = pyssh.Ssh(username, ip)
1006                 ssh.open()
1007
1008                 # Login
1009                 telnet_answer(ssh, "password:", password)
1010
1011                 if not dryrun:
1012                         # Reboot
1013                         telnet_answer(ssh, "x10toggle>", "A%d" % port)
1014
1015                 # Close
1016                 output = ssh.close()
1017                 if verbose:
1018                         logger.debug(output)
1019                 return 0
1020
1021         except Exception, err:
1022                 if verbose:
1023                         logger.debug(err)
1024                 if ssh:
1025                         output = ssh.close()
1026                         if verbose:
1027                                 logger.debug(output)
1028                 return errno.ETIMEDOUT
1029
1030 ### rebooting Dell systems via RAC card
1031 # Marc E. Fiuczynski - June 01 2005
1032 # tested with David Lowenthal's itchy/scratchy nodes at UGA
1033 #
1034
1035 def runcmd(command, args, username, password, timeout = None):
1036
1037         result = [None]
1038         result_ready = threading.Condition()
1039
1040         def set_result(x):
1041
1042                 result_ready.acquire()
1043                 try:
1044                         result[0] = x
1045                 finally:
1046                         result_ready.notify()
1047                         result_ready.release()
1048
1049         def do_command(command, username, password):
1050
1051                 try:
1052                         # Popen4 is a popen-type class that combines stdout and stderr
1053                         p = popen2.Popen4(command)
1054
1055                         # read all output data
1056                         p.tochild.write("%s\n" % username)
1057                         p.tochild.write("%s\n" % password)
1058                         p.tochild.close()
1059                         data = p.fromchild.read()
1060
1061                         while True:
1062                                 # might get interrupted by a signal in poll() or waitpid()
1063                                 try:
1064                                         retval = p.wait()
1065                                         set_result((retval, data))
1066                                         break
1067                                 except OSError, ex:
1068                                         if ex.errno == errno.EINTR:
1069                                                 continue
1070                                         raise ex
1071                 except Exception, ex:
1072                         set_result(ex)
1073
1074         if args:
1075                 command = " ".join([command] + args)
1076
1077         worker = threading.Thread(target = do_command, args = (command, username, password, ))
1078         worker.setDaemon(True)
1079         result_ready.acquire()
1080         worker.start()
1081         result_ready.wait(timeout)
1082         try:
1083                 if result == [None]:
1084                         raise Exception, "command timed-out: '%s'" % command
1085         finally:
1086                 result_ready.release()
1087         result = result[0]
1088
1089         if isinstance(result, Exception):
1090                 raise result
1091         else:
1092                 (retval, data) = result
1093                 if os.WIFEXITED(retval) and os.WEXITSTATUS(retval) == 0:
1094                         return data
1095                 else:
1096                         out = "system command ('%s') " % command
1097                         if os.WIFEXITED(retval):
1098                                 out += "failed, rc = %d" % os.WEXITSTATUS(retval)
1099                         else:
1100                                 out += "killed by signal %d" % os.WTERMSIG(retval)
1101                         if data:
1102                                 out += "; output follows:\n" + data
1103                         raise Exception, out
1104
1105 def racadm_reboot(host, username, password, port, dryrun):
1106         global verbose
1107
1108         ip = socket.gethostbyname(host)
1109         try:
1110                 cmd = "/usr/sbin/racadm"
1111                 os.stat(cmd)
1112                 if not dryrun:
1113                         output = runcmd(cmd, ["-r %s -i serveraction powercycle" % ip],
1114                                 username, password)
1115                 else:
1116                         output = runcmd(cmd, ["-r %s -i getsysinfo" % ip],
1117                                 username, password)
1118
1119                 print "RUNCMD: %s" % output
1120                 if verbose:
1121                         logger.debug(output)
1122                 return 0
1123
1124         except Exception, err:
1125                 logger.debug("runcmd raised exception %s" % err)
1126                 if verbose:
1127                         logger.debug(err)
1128                 return -1
1129
1130 def pcu_name(pcu):
1131         if pcu['hostname'] is not None and pcu['hostname'] is not "":
1132                 return pcu['hostname']
1133         elif pcu['ip'] is not None and pcu['ip'] is not "":
1134                 return pcu['ip']
1135         else:
1136                 return None
1137
1138 import soltesz
1139 fb =soltesz.dbLoad("findbadpcus")
1140
1141 def get_pcu_values(pcu_id):
1142         # TODO: obviously, this shouldn't be loaded each time...
1143
1144         try:
1145                 values = fb['nodes']["id_%s" % pcu_id]['values']
1146         except:
1147                 values = None
1148
1149         return values
1150
1151 def reboot(nodename):
1152         return reboot_policy(nodename, True, False)
1153         
1154 def reboot_policy(nodename, continue_probe, dryrun):
1155         global verbose
1156
1157         pcu = plc.getpcu(nodename)
1158         if not pcu:
1159                 logger.debug("no pcu for %s" % hostname)
1160                 print "no pcu for %s" % hostname
1161                 return False # "%s has no pcu" % nodename
1162
1163         values = get_pcu_values(pcu['pcu_id'])
1164         if values == None:
1165                 logger.debug("No values for pcu probe %s" % hostname)
1166                 print "No values for pcu probe %s" % hostname
1167                 return False #"no info for pcu_id %s" % pcu['pcu_id']
1168         
1169         # Try the PCU first
1170         logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
1171
1172         ret = reboot_test(nodename, values, continue_probe, verbose, dryrun)
1173
1174         if ret != 0:
1175                 print ret
1176                 return False
1177         else:
1178                 print "return true"
1179                 return True
1180
1181 def reboot_test(nodename, values, continue_probe, verbose, dryrun):
1182         rb_ret = ""
1183
1184         try:
1185                 # DataProbe iPal (many sites)
1186                 if  continue_probe and values['model'].find("Dataprobe IP-41x/IP-81x") >= 0:
1187                         ipal = IPAL(values, verbose, ['23', '80', '9100'])
1188                         rb_ret = ipal.reboot(values[nodename], dryrun)
1189                                 
1190                 # APC Masterswitch (Berkeley)
1191                 elif continue_probe and values['model'].find("APC AP79xx/Masterswitch") >= 0:
1192                         print values
1193
1194                         # TODO: make a more robust version of APC
1195                         if values['pcu_id'] in [1163,1055,1111,1231,1113,1127,1128,1148]:
1196                                 apc = APCEurope(values, verbose, ['22', '23'])
1197                                 rb_ret = apc.reboot(values[nodename], dryrun)
1198
1199                         elif values['pcu_id'] in [1110,86]:
1200                                 apc = APCBrazil(values, verbose, ['22', '23'])
1201                                 rb_ret = apc.reboot(values[nodename], dryrun)
1202
1203                         elif values['pcu_id'] in [1221,1225]:
1204                                 apc = APCBerlin(values, verbose, ['22', '23'])
1205                                 rb_ret = apc.reboot(values[nodename], dryrun)
1206
1207                         elif values['pcu_id'] in [1173,1221,1220]:
1208                                 apc = APCFolsom(values, verbose, ['22', '23'])
1209                                 rb_ret = apc.reboot(values[nodename], dryrun)
1210
1211                         else:
1212                                 apc = APCMaster(values, verbose, ['22', '23'])
1213                                 rb_ret = apc.reboot(values[nodename], dryrun)
1214
1215                 # BayTech DS4-RPC
1216                 elif continue_probe and values['model'].find("Baytech DS4-RPC") >= 0:
1217                         if values['pcu_id'] in [1237,1052,1209,1002,1008,1041,1013,1022]:
1218                                 # These  require a 'ctrl-c' to be sent... 
1219                                 baytech = BayTechCtrlC(values, verbose, ['22', '23'])
1220                                 rb_ret = baytech.reboot(values[nodename], dryrun)
1221
1222                         elif values['pcu_id'] in [93]:
1223                                 baytech = BayTechAU(values, verbose, ['22', '23'])
1224                                 rb_ret = baytech.reboot(values[nodename], dryrun)
1225
1226                         elif values['pcu_id'] in [1057]:
1227                                 # These  require a 'ctrl-c' to be sent... 
1228                                 baytech = BayTechCtrlCUnibe(values, verbose, ['22', '23'])
1229                                 rb_ret = baytech.reboot(values[nodename], dryrun)
1230
1231                         elif values['pcu_id'] in [1012]:
1232                                 # This pcu sometimes doesn't present the 'Username' prompt,
1233                                 # unless you immediately try again...
1234                                 try:
1235                                         baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1236                                         rb_ret = baytech.reboot(values[nodename], dryrun)
1237                                 except:
1238                                         baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1239                                         rb_ret = baytech.reboot(values[nodename], dryrun)
1240                         else:
1241                                 baytech = BayTech(values, verbose, ['22', '23'])
1242                                 rb_ret = baytech.reboot(values[nodename], dryrun)
1243
1244                 # iLO
1245                 elif continue_probe and values['model'].find("HP iLO") >= 0:
1246                         try:
1247                                 hpilo = HPiLO(values, verbose, ['22'])
1248                                 rb_ret = hpilo.reboot(0, dryrun)
1249                                 if rb_ret != 0:
1250                                         hpilo = HPiLOHttps(values, verbose, ['443'])
1251                                         rb_ret = hpilo.reboot(0, dryrun)
1252                         except:
1253                                 hpilo = HPiLOHttps(values, verbose, ['443'])
1254                                 rb_ret = hpilo.reboot(0, dryrun)
1255
1256                 # DRAC ssh
1257                 elif continue_probe and values['model'].find("Dell RAC") >= 0:
1258                         # TODO: I don't think DRACRacAdm will throw an exception for the
1259                         # default method to catch...
1260                         try:
1261                                 drac = DRACRacAdm(values, verbose, ['443', '5869'])
1262                                 rb_ret = drac.reboot(0, dryrun)
1263                         except:
1264                                 drac = DRAC(values, verbose, ['22'])
1265                                 rb_ret = drac.reboot(0, dryrun)
1266
1267                 elif continue_probe and values['model'].find("WTI IPS-4") >= 0:
1268                                 wti = WTIIPS4(values, verbose, ['23'])
1269                                 rb_ret = wti.reboot(values[nodename], dryrun)
1270
1271                 elif continue_probe and values['model'].find("Intel AMT") >= 0:
1272                                 amt = IntelAMT(values, verbose, ['16992'])
1273                                 rb_ret = amt.reboot(values[nodename], dryrun)
1274
1275                 # BlackBox PSExxx-xx (e.g. PSE505-FR)
1276                 elif continue_probe and \
1277                         (values['model'].find("BlackBox PS5xx") >= 0 or
1278                          values['model'].find("ePowerSwitch 1/4/8x") >=0 ):
1279
1280                         # TODO: allow a different port than http 80.
1281                         if values['pcu_id'] in [1089, 1071, 1046, 1035, 1118]:
1282                                 eps = ePowerSwitchGood(values, verbose, ['80'])
1283                         elif values['pcu_id'] in [1003]:
1284                                 eps = ePowerSwitch(values, verbose, ['80'])
1285                         else:
1286                                 eps = ePowerSwitchGood(values, verbose, ['80'])
1287
1288                         rb_ret = eps.reboot(values[nodename], dryrun)
1289
1290                 elif continue_probe:
1291                         rb_ret = "Unsupported_PCU"
1292
1293                 elif continue_probe == False:
1294                         if 'portstatus' in values:
1295                                 rb_ret = "NetDown"
1296                         else:
1297                                 rb_ret = "Not_Run"
1298                 else:
1299                         rb_ret = -1
1300
1301         except ExceptionPort, err:
1302                 rb_ret = str(err)
1303
1304         return rb_ret
1305         # ????
1306         #elif continue_probe and values['protocol'] == "racadm" and \
1307         #               values['model'] == "RAC":
1308         #       rb_ret = racadm_reboot(pcu_name(values),
1309         #                                                                 values['username'],
1310         #                                                                 values['password'],
1311         #                                                                 pcu[nodename],
1312         #                                                                 dryrun)
1313
1314 def main():
1315         logger.setLevel(logging.DEBUG)
1316         ch = logging.StreamHandler()
1317         ch.setLevel(logging.DEBUG)
1318         formatter = logging.Formatter('LOGGER - %(message)s')
1319         ch.setFormatter(formatter)
1320         logger.addHandler(ch)
1321
1322         try:
1323                 if "test" in sys.argv:
1324                         dryrun = True
1325                 else:
1326                         dryrun = False
1327
1328                 for node in sys.argv[1:]:
1329                         if node == "test": continue
1330
1331                         print "Rebooting %s" % node
1332                         if reboot_policy(node, True, dryrun):
1333                                 print "success"
1334                         else:
1335                                 print "failed"
1336         except Exception, err:
1337                 import traceback; traceback.print_exc()
1338                 print err
1339
1340 if __name__ == '__main__':
1341         import plc
1342         logger = logging.getLogger("monitor")
1343         main()