Add a new BayTech prompt type.
[monitor.git] / reboot.py
1 #!/usr/bin/python
2 #
3 # Reboot specified nodes
4 #
5
6 import getpass, getopt
7 import os, sys
8 import xml, xmlrpclib
9 import errno, time, traceback
10 import urllib2
11 import threading, popen2
12 import array, struct
13 #from socket import *
14 import socket
15 import plc
16 import base64
17 from subprocess import PIPE, Popen
18 import ssh.pxssh as pxssh
19 import ssh.pexpect as pexpect
20
21 plc_lock = threading.Lock()
22
23 # Use our versions of telnetlib and pyssh
24 sys.path.insert(0, os.path.dirname(sys.argv[0]))
25 import telnetlib
26 sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")    
27 import pyssh
28
29 # Timeouts in seconds
30 TELNET_TIMEOUT = 45
31
32 # Event class ID from pcu events
33 #NODE_POWER_CONTROL = 3
34
35 # Monitor user ID
36 #MONITOR_USER_ID = 11142
37
38 import logging
39 logger = logging.getLogger("monitor")
40 verbose = 1
41 #dryrun = 0;
42
43 class ExceptionNoTransport(Exception): pass
44 class ExceptionNotFound(Exception): pass
45 class ExceptionPassword(Exception): pass
46 class ExceptionTimeout(Exception): pass
47 class ExceptionPrompt(Exception): pass
48 class ExceptionSequence(Exception): pass
49 class ExceptionReset(Exception): pass
50 class ExceptionPort(Exception): pass
51 class ExceptionUsername(Exception): pass
52
53 def telnet_answer(telnet, expected, buffer):
54         global verbose
55
56         output = telnet.read_until(expected, TELNET_TIMEOUT)
57         #if verbose:
58         #       logger.debug(output)
59         if output.find(expected) == -1:
60                 raise ExceptionNotFound, "'%s' not found" % expected
61         else:
62                 telnet.write(buffer + "\r\n")
63
64
65 # PCU has model, host, preferred-port, user, passwd, 
66
67 # This is an object derived directly form the PLCAPI DB fields
68 class PCU(object):
69         def __init__(self, plc_pcu_dict):
70                 for field in ['username', 'password', 'site_id', 
71                                                 'hostname', 'ip', 
72                                                 'pcu_id', 'model', 
73                                                 'node_ids', 'ports', ]:
74                         if field in plc_pcu_dict:
75                                 self.__setattr__(field, plc_pcu_dict[field])
76                         else:
77                                 raise Exception("No such field %s in PCU object" % field)
78
79 # These are the convenience functions build around the PCU object.
80 class PCUModel(PCU):
81         def __init__(self, plc_pcu_dict):
82                 PCU.__init__(self, plc_pcu_dict)
83                 self.host = self.pcu_name()
84
85         def pcu_name(self):
86                 if self.hostname is not None and self.hostname is not "":
87                         return self.hostname
88                 elif self.ip is not None and self.ip is not "":
89                         return self.ip
90                 else:
91                         return None
92
93         def nodeidToPort(self, node_id):
94                 if node_id in self.node_ids:
95                         for i in range(0, len(self.node_ids)):
96                                 if node_id == self.node_ids[i]:
97                                         return self.ports[i]
98
99                 raise Exception("No such Node ID: %d" % node_id)
100
101 # This class captures the observed pcu records from FindBadPCUs.py
102 class PCURecord:
103         def __init__(self, pcu_record_dict):
104                 for field in ['nodenames', 'portstatus', 
105                                                 'dnsmatch', 
106                                                 'complete_entry', ]:
107                         if field in pcu_record_dict:
108                                 if field == "reboot":
109                                         self.__setattr__("reboot_str", pcu_record_dict[field])
110                                 else:
111                                         self.__setattr__(field, pcu_record_dict[field])
112                         else:
113                                 raise Exception("No such field %s in pcu record dict" % field)
114
115 class Transport:
116         TELNET = 1
117         SSH    = 2
118         HTTP   = 3
119
120         TELNET_TIMEOUT = 60
121
122         def __init__(self, type, verbose):
123                 self.type = type
124                 self.verbose = verbose
125                 self.transport = None
126
127 #       def __del__(self):
128 #               if self.transport:
129 #                       self.close()
130
131         def open(self, host, username=None, password=None, prompt="User Name"):
132                 transport = None
133
134                 if self.type == self.TELNET:
135                         transport = telnetlib.Telnet(host, timeout=self.TELNET_TIMEOUT)
136                         transport.set_debuglevel(self.verbose)
137                         if username is not None:
138                                 self.transport = transport
139                                 self.ifThenSend(prompt, username, ExceptionUsername)
140
141                 elif self.type == self.SSH:
142                         if username is not None:
143                                 transport = pyssh.Ssh(username, host)
144                                 transport.set_debuglevel(self.verbose)
145                                 transport.open()
146                                 # TODO: have an ssh set_debuglevel() also...
147                         else:
148                                 raise Exception("Username cannot be None for ssh transport.")
149                 elif self.type == self.HTTP:
150                         self.url = "http://%s:%d/" % (host,80)
151                         uri = "%s:%d" % (host,80)
152
153                         # create authinfo
154                         authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
155                         authinfo.add_password (None, uri, username, password)
156                         authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
157
158                         transport = urllib2.build_opener(authhandler)
159
160                 else:
161                         raise Exception("Unknown transport type: %s" % self.type)
162
163                 self.transport = transport
164                 return True
165
166         def close(self):
167                 if self.type == self.TELNET:
168                         self.transport.close() 
169                 elif self.type == self.SSH:
170                         self.transport.close() 
171                 elif self.type == self.HTTP:
172                         pass
173                 else:
174                         raise Exception("Unknown transport type %s" % self.type)
175                 self.transport = None
176
177         def sendHTTP(self, resource, data):
178                 if self.verbose:
179                         print "POSTing '%s' to %s" % (data,self.url + resource)
180
181                 try:
182                         f = self.transport.open(self.url + resource ,data)
183                         r = f.read()
184                         if self.verbose:
185                                 print r
186
187                 except urllib2.URLError,err:
188                         logger.info('Could not open http connection', err)
189                         return "http transport error"
190
191                 return 0
192
193         def sendPassword(self, password, prompt=None):
194                 if self.type == self.TELNET:
195                         if prompt == None:
196                                 self.ifThenSend("Password", password, ExceptionPassword)
197                         else:
198                                 self.ifThenSend(prompt, password, ExceptionPassword)
199                 elif self.type == self.SSH:
200                         self.ifThenSend("password:", password, ExceptionPassword)
201                 elif self.type == self.HTTP:
202                         pass
203                 else:
204                         raise Exception("Unknown transport type: %s" % self.type)
205
206         def ifThenSend(self, expected, buffer, ErrorClass=ExceptionPrompt):
207
208                 if self.transport != None:
209                         output = self.transport.read_until(expected, self.TELNET_TIMEOUT)
210                         if output.find(expected) == -1:
211                                 raise ErrorClass, "'%s' not found" % expected
212                         else:
213                                 self.transport.write(buffer + "\r\n")
214                 else:
215                         raise ExceptionNoTransport("transport object is type None")
216
217         def ifElse(self, expected, ErrorClass):
218                 try:
219                         self.transport.read_until(expected, self.TELNET_TIMEOUT)
220                 except:
221                         raise ErrorClass("Could not find '%s' within timeout" % expected)
222                         
223
224 class PCUControl(Transport,PCUModel,PCURecord):
225         def __init__(self, plc_pcu_record, verbose, supported_ports=[]):
226                 PCUModel.__init__(self, plc_pcu_record)
227                 PCURecord.__init__(self, plc_pcu_record)
228                 type = None
229                 if self.portstatus:
230                         if '22' in supported_ports and self.portstatus['22'] == "open":
231                                 type = Transport.SSH
232                         elif '23' in supported_ports and self.portstatus['23'] == "open":
233                                 type = Transport.TELNET
234                         elif '80' in supported_ports and self.portstatus['80'] == "open":
235                                 type = Transport.HTTP
236                         elif '443' in supported_ports and self.portstatus['443'] == "open":
237                                 type = Transport.HTTP
238                         elif '5869' in supported_ports and self.portstatus['5869'] == "open":
239                                 # For DRAC cards.  not sure how much it's used in the
240                                 # protocol.. but racadm opens this port.
241                                 type = Transport.HTTP
242                         else:
243                                 raise ExceptionPort("Unsupported Port: No transport from open ports")
244                 Transport.__init__(self, type, verbose)
245
246         def run(self, node_port, dryrun):
247                 """ This function is to be defined by the specific PCU instance.  """
248                 pass
249                 
250         def reboot(self, node_port, dryrun):
251                 try:
252                         return self.run(node_port, dryrun)
253                 except ExceptionNotFound, err:
254                         return "error: " + str(err)
255                 except ExceptionPassword, err:
256                         return "password exception: " + str(err)
257                 except ExceptionTimeout, err:
258                         return "timeout exception: " + str(err)
259                 except ExceptionUsername, err:
260                         return "exception: no username prompt: " + str(err)
261                 except ExceptionSequence, err:
262                         return "sequence error: " + str(err)
263                 except ExceptionPrompt, err:
264                         return "prompt exception: " + str(err)
265                 except ExceptionPort, err:
266                         return "no ports exception: " + str(err)
267                 except socket.error, err:
268                         return "socket error: timeout: " + str(err)
269                 except EOFError, err:
270                         if self.verbose:
271                                 logger.debug("reboot: EOF")
272                                 logger.debug(err)
273                         self.transport.close()
274                         import traceback
275                         traceback.print_exc()
276                         return "EOF connection reset" + str(err)
277                 #except Exception, err:
278                 #       if self.verbose:
279                 #               logger.debug("reboot: Exception")
280                 #               logger.debug(err)
281                 #       if self.transport:
282                 #               self.transport.close()
283                 #       import traceback
284                 #       traceback.print_exc()
285                 #       return  "generic exception; unknown problem."
286
287                 
288 class IPAL(PCUControl):
289         def run(self, node_port, dryrun):
290                 self.open(self.host)
291
292                 # XXX Some iPals require you to hit Enter a few times first
293                 self.ifThenSend("Password >", "\r\n\r\n", ExceptionNotFound)
294
295                 # Login
296                 self.ifThenSend("Password >", self.password, ExceptionPassword)
297                 self.transport.write("\r\n\r\n")
298
299                 if not dryrun: # P# - Pulse relay
300                         self.ifThenSend("Enter >", 
301                                                         "P%d" % node_port, 
302                                                         ExceptionNotFound)
303                 # Get the next prompt
304                 self.ifElse("Enter >", ExceptionTimeout)
305
306                 self.close()
307                 return 0
308
309 def ipal_reboot(ip, password, port, dryrun):
310         global verbose
311         global plc_lock
312         telnet = None
313
314         try:
315                 #plc_lock.acquire()
316                 #print "lock acquired"
317
318                 #try:
319                         #telnet = telnetlib.Telnet(ip) # , timeout=TELNET_TIMEOUT)
320                 telnet = telnetlib.Telnet(ip, timeout=TELNET_TIMEOUT)
321                 #except:
322                 #       import traceback
323                 #       traceback.print_exc()
324
325
326                 telnet.set_debuglevel(verbose)
327
328                 # XXX Some iPals require you to hit Enter a few times first
329                 telnet_answer(telnet, "Password >", "\r\n\r\n")
330
331                 # Login
332                 telnet_answer(telnet, "Password >", password)
333
334                 # XXX Some iPals require you to hit Enter a few times first
335                 telnet.write("\r\n\r\n")
336
337                 # P# - Pulse relay
338                 if not dryrun:
339                         telnet_answer(telnet, "Enter >", "P%d" % port)
340
341                 telnet.read_until("Enter >", TELNET_TIMEOUT)
342
343                 # Close
344                 telnet.close()
345
346                 #print "lock released"
347                 #plc_lock.release()
348                 return 0
349
350         except EOFError, err:
351                 if verbose:
352                         logger.debug("ipal_reboot: EOF")
353                         logger.debug(err)
354                 telnet.close()
355                 import traceback
356                 traceback.print_exc()
357                 #print "lock released"
358                 #plc_lock.release()
359                 return errno.ECONNRESET
360         except socket.error, err:
361                 logger.debug("ipal_reboot: Socket Error")
362                 logger.debug(err)
363                 import traceback
364                 traceback.print_exc()
365
366                 return errno.ETIMEDOUT
367                 
368         except Exception, err:
369                 if verbose:
370                         logger.debug("ipal_reboot: Exception")
371                         logger.debug(err)
372                 if telnet:
373                         telnet.close()
374                 import traceback
375                 traceback.print_exc()
376                 #print "lock released"
377                 #plc_lock.release()
378                 return  "ipal error"
379
380 class APCEurope(PCUControl):
381         def run(self, node_port, dryrun):
382                 self.open(self.host, self.username)
383                 self.sendPassword(self.password)
384
385                 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
386                 self.ifThenSend("\r\n> ", "2")
387                 self.ifThenSend("\r\n> ", str(node_port))
388                 # 3- Immediate Reboot             
389                 self.ifThenSend("\r\n> ", "3")
390
391                 if not dryrun:
392                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
393                                                         "YES\r\n",
394                                                         ExceptionSequence)
395                 else:
396                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
397                                                         "", ExceptionSequence)
398                 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
399
400                 self.close()
401                 return 0
402
403 class APCBrazil(PCUControl):
404         def run(self, node_port, dryrun):
405                 self.open(self.host, self.username)
406                 self.sendPassword(self.password)
407
408                 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
409                 self.ifThenSend("\r\n> ", str(node_port))
410                 # 4- Immediate Reboot             
411                 self.ifThenSend("\r\n> ", "4")
412
413                 if not dryrun:
414                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
415                                                         "YES\r\n",
416                                                         ExceptionSequence)
417                 else:
418                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
419                                                         "", ExceptionSequence)
420                 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
421
422                 self.close()
423                 return 0
424
425 class APCBerlin(PCUControl):
426         def run(self, node_port, dryrun):
427                 self.open(self.host, self.username)
428                 self.sendPassword(self.password)
429
430                 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
431                 self.ifThenSend("\r\n> ", "2")
432                 self.ifThenSend("\r\n> ", "1")
433                 self.ifThenSend("\r\n> ", str(node_port))
434                 # 3- Immediate Reboot             
435                 self.ifThenSend("\r\n> ", "3")
436
437                 if not dryrun:
438                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
439                                                         "YES\r\n",
440                                                         ExceptionSequence)
441                 else:
442                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
443                                                         "", ExceptionSequence)
444                 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
445
446                 self.close()
447                 return 0
448
449 class APCFolsom(PCUControl):
450         def run(self, node_port, dryrun):
451                 self.open(self.host, self.username)
452                 self.sendPassword(self.password)
453
454                 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
455                 self.ifThenSend("\r\n> ", "2")
456                 self.ifThenSend("\r\n> ", "1")
457                 self.ifThenSend("\r\n> ", str(node_port))
458                 self.ifThenSend("\r\n> ", "1")
459
460                 # 3- Immediate Reboot             
461                 self.ifThenSend("\r\n> ", "3")
462
463                 if not dryrun:
464                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
465                                                         "YES\r\n",
466                                                         ExceptionSequence)
467                 else:
468                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
469                                                         "", ExceptionSequence)
470                 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
471
472                 self.close()
473                 return 0
474
475 class APCMaster(PCUControl):
476         def run(self, node_port, dryrun):
477                 self.open(self.host, self.username)
478                 self.sendPassword(self.password)
479
480                 # 1- Device Manager
481                 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
482                 # 3- Outlet Control/Config
483                 self.ifThenSend("\r\n> ", "3")
484                 # n- Outlet n
485                 self.ifThenSend("\r\n> ", str(node_port))
486                 # 1- Control Outlet
487                 self.ifThenSend("\r\n> ", "1")
488                 # 3- Immediate Reboot             
489                 self.ifThenSend("\r\n> ", "3")
490
491                 if not dryrun:
492                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
493                                                         "YES\r\n",
494                                                         ExceptionSequence)
495                 else:
496                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
497                                                         "", ExceptionSequence)
498                 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
499
500                 self.close()
501                 return 0
502
503 class APC(PCUControl):
504         def __init__(self, plc_pcu_record, verbose):
505                 PCUControl.__init__(self, plc_pcu_record, verbose)
506
507                 self.master = APCMaster(plc_pcu_record, verbose)
508                 self.folsom = APCFolsom(plc_pcu_record, verbose)
509                 self.europe = APCEurope(plc_pcu_record, verbose)
510
511         def run(self, node_port, dryrun):
512                 try_again = True
513                 sleep_time = 1
514
515                 for pcu in [self.master, self.europe, self.folsom]:
516                         if try_again:
517                                 try:
518                                         print "-*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"
519                                         try_again = False
520                                         print "sleeping 5"
521                                         time.sleep(sleep_time)
522                                         ret = pcu.reboot(node_port, dryrun)
523                                 except ExceptionSequence, err:
524                                         del pcu
525                                         sleep_time = 130
526                                         try_again = True
527
528                 if try_again:
529                         return "Unknown reboot sequence for APC PCU"
530                 else:
531                         return ret
532
533 class DRACRacAdm(PCUControl):
534         def run(self, node_port, dryrun):
535
536                 print "trying racadm_reboot..."
537                 racadm_reboot(self.host, self.username, self.password, node_port, dryrun)
538
539                 return 0
540
541 class DRAC(PCUControl):
542         def run(self, node_port, dryrun):
543                 self.open(self.host, self.username)
544                 self.sendPassword(self.password)
545
546                 print "logging in..."
547                 self.transport.write("\r\n")
548                 # Testing Reboot ?
549                 if dryrun:
550                         self.ifThenSend("[%s]#" % self.username, "getsysinfo")
551                 else:
552                         # Reset this machine
553                         self.ifThenSend("[%s]#" % self.username, "serveraction powercycle")
554
555                 self.ifThenSend("[%s]#" % self.username, "exit")
556
557                 self.close()
558                 return 0
559
560 class HPiLO(PCUControl):
561         def run(self, node_port, dryrun):
562                 self.open(self.host, self.username)
563                 self.sendPassword(self.password)
564
565                 # </>hpiLO-> 
566                 self.ifThenSend("</>hpiLO->", "cd system1")
567
568                 # Reboot Outlet  N        (Y/N)?
569                 if dryrun:
570                         self.ifThenSend("</system1>hpiLO->", "POWER")
571                 else:
572                         # Reset this machine
573                         self.ifThenSend("</system1>hpiLO->", "reset")
574
575                 self.ifThenSend("</system1>hpiLO->", "exit")
576
577                 self.close()
578                 return 0
579
580                 
581 class HPiLOHttps(PCUControl):
582         def run(self, node_port, dryrun):
583
584                 cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p %s" % (
585                                         self.host, "iloxml/Get_Network.xml", 
586                                         self.username, self.password)
587                 p_ilo  = Popen(cmd, stdout=PIPE, shell=True)
588                 cmd2 = "grep 'MESSAGE' | grep -v 'No error'"
589                 p_grep = Popen(cmd2, stdin=p_ilo.stdout, stdout=PIPE, stderr=PIPE, shell=True)
590                 sout, serr = p_grep.communicate()
591
592                 p_ilo.wait()
593                 p_grep.wait()
594                 if sout.strip() != "":
595                         print "sout: %s" % sout.strip()
596                         return sout.strip()
597
598                 if not dryrun:
599                         cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p %s" % (
600                                         self.host, "iloxml/Reset_Server.xml", 
601                                         self.username, self.password)
602                         p_ilo = Popen(cmd, stdin=PIPE, stdout=PIPE, shell=True)
603                         cmd2 = "grep 'MESSAGE' | grep -v 'No error'"
604                         p_grep = Popen(cmd2, stdin=p_ilo.stdout, stdout=PIPE, stderr=PIPE)
605                         sout, serr = p_grep.communicate()
606                         p_ilo.wait()
607                         p_grep.wait()
608
609                         if sout.strip() != "":
610                                 print "sout: %s" % sout.strip()
611                                 return sout.strip()
612
613                 return 0
614
615 class BayTechAU(PCUControl):
616         def run(self, node_port, dryrun):
617                 self.open(self.host, self.username, None, "Enter user name:")
618                 self.sendPassword(self.password, "Enter Password:")
619
620                 #self.ifThenSend("RPC-16>", "Status")
621                 self.ifThenSend("RPC3-NC>", "Reboot %d" % node_port)
622
623                 # Reboot Outlet  N        (Y/N)?
624                 if dryrun:
625                         self.ifThenSend("(Y/N)?", "N")
626                 else:
627                         self.ifThenSend("(Y/N)?", "Y")
628                 self.ifThenSend("RPC3-NC>", "")
629
630                 self.close()
631                 return 0
632
633 class BayTechGeorgeTown(PCUControl):
634         def run(self, node_port, dryrun):
635                 self.open(self.host, self.username, None, "Enter user name:")
636                 self.sendPassword(self.password, "Enter Password:")
637
638                 #self.ifThenSend("RPC-16>", "Status")
639
640                 self.ifThenSend("RPC-16>", "Reboot %d" % node_port)
641
642                 # Reboot Outlet  N        (Y/N)?
643                 if dryrun:
644                         self.ifThenSend("(Y/N)?", "N")
645                 else:
646                         self.ifThenSend("(Y/N)?", "Y")
647                 self.ifThenSend("RPC-16>", "")
648
649                 self.close()
650                 return 0
651
652 class BayTechCtrlCUnibe(PCUControl):
653         """
654                 For some reason, these units let you log in fine, but they hang
655                 indefinitely, unless you send a Ctrl-C after the password.  No idea
656                 why.
657         """
658         def run(self, node_port, dryrun):
659                 print "BayTechCtrlC %s" % self.host
660
661                 ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
662                 s = pxssh.pxssh()
663                 if not s.login(self.host, self.username, self.password, ssh_options):
664                         raise ExceptionPassword("Invalid Password")
665                 # Otherwise, the login succeeded.
666
667                 # Send a ctrl-c to the remote process.
668                 print "sending ctrl-c"
669                 s.send(chr(3))
670
671                 # Control Outlets  (5 ,1).........5
672                 try:
673                         index = s.expect(["Enter Request :"])
674
675                         if index == 0:
676                                 print "3"
677                                 s.send("3\r\n")
678                                 index = s.expect(["DS-RPC>", "Enter user name:"])
679                                 if index == 1:
680                                         s.send(self.username + "\r\n")
681                                         index = s.expect(["DS-RPC>"])
682
683                                 if index == 0:
684                                         print "Reboot %d" % node_port
685                                         s.send("Reboot %d\r\n" % node_port)
686
687                                         index = s.expect(["(Y/N)?"])
688                                         if index == 0:
689                                                 if dryrun:
690                                                         print "sending N"
691                                                         s.send("N\r\n")
692                                                 else:
693                                                         print "sending Y"
694                                                         s.send("Y\r\n")
695
696                                 #index = s.expect(["DS-RPC>"])
697                                 #print "got prompt back"
698
699                         s.close()
700
701                 except pexpect.EOF:
702                         raise ExceptionPrompt("EOF before 'Enter Request' Prompt")
703                 except pexpect.TIMEOUT:
704                         raise ExceptionPrompt("Timeout before 'Enter Request' Prompt")
705
706                 return 0
707
708 class BayTechCtrlC(PCUControl):
709         """
710                 For some reason, these units let you log in fine, but they hang
711                 indefinitely, unless you send a Ctrl-C after the password.  No idea
712                 why.
713         """
714         def run(self, node_port, dryrun):
715                 print "BayTechCtrlC %s" % self.host
716
717                 ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
718                 s = pxssh.pxssh()
719                 if not s.login(self.host, self.username, self.password, ssh_options):
720                         raise ExceptionPassword("Invalid Password")
721                 # Otherwise, the login succeeded.
722
723                 # Send a ctrl-c to the remote process.
724                 print "sending ctrl-c"
725                 s.send(chr(3))
726
727                 # Control Outlets  (5 ,1).........5
728                 try:
729                         index = s.expect(["Enter Request :"])
730
731                         if index == 0:
732                                 print "5"
733                                 s.send("5\r\n")
734                                 index = s.expect(["DS-RPC>", "Enter user name:"])
735                                 if index == 1:
736                                         print "sending username"
737                                         s.send(self.username + "\r\n")
738                                         index = s.expect(["DS-RPC>"])
739
740                                 if index == 0:
741                                         print "Reboot %d" % node_port
742                                         s.send("Reboot %d\r\n" % node_port)
743
744                                         index = s.expect(["(Y/N)?"])
745                                         if index == 0:
746                                                 if dryrun:
747                                                         print "sending N"
748                                                         s.send("N\r\n")
749                                                 else:
750                                                         print "sending Y"
751                                                         s.send("Y\r\n")
752
753                                 #index = s.expect(["DS-RPC>"])
754                                 #print "got prompt back"
755
756                         s.close()
757
758                 except pexpect.EOF:
759                         raise ExceptionPrompt("EOF before 'Enter Request' Prompt")
760                 except pexpect.TIMEOUT:
761                         raise ExceptionPrompt("Timeout before 'Enter Request' Prompt")
762
763                 return 0
764
765 class BayTech(PCUControl):
766         def run(self, node_port, dryrun):
767                 self.open(self.host, self.username)
768                 self.sendPassword(self.password)
769
770                 # Control Outlets  (5 ,1).........5
771                 self.ifThenSend("Enter Request :", "5")
772
773                 # Reboot N
774                 try:
775                         self.ifThenSend("DS-RPC>", "Reboot %d" % node_port, ExceptionNotFound)
776                 except ExceptionNotFound, msg:
777                         # one machine is configured to ask for a username,
778                         # even after login...
779                         print "msg: %s" % msg
780                         self.transport.write(self.username + "\r\n")
781                         self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
782
783                 # Reboot Outlet  N        (Y/N)?
784                 if dryrun:
785                         self.ifThenSend("(Y/N)?", "N")
786                 else:
787                         self.ifThenSend("(Y/N)?", "Y")
788                 self.ifThenSend("DS-RPC>", "")
789
790                 self.close()
791                 return 0
792
793 class WTIIPS4(PCUControl):
794         def run(self, node_port, dryrun):
795                 self.open(self.host)
796                 self.sendPassword(self.password, "Enter Password:")
797
798                 self.ifThenSend("IPS> ", "/Boot %s" % node_port)
799                 if not dryrun:
800                         self.ifThenSend("Sure? (Y/N): ", "N")
801                 else:
802                         self.ifThenSend("Sure? (Y/N): ", "Y")
803
804                 self.ifThenSend("IPS> ", "")
805
806                 self.close()
807                 return 0
808
809 class ePowerSwitchGood(PCUControl):
810         # NOTE:
811         #               The old code used Python's HTTPPasswordMgrWithDefaultRealm()
812         #               For some reason this both doesn't work and in some cases, actually
813         #               hangs the PCU.  Definitely not what we want.
814         #               
815         #               The code below is much simpler.  Just letting things fail first,
816         #               and then, trying again with authentication string in the header.
817         #               
818         def run(self, node_port, dryrun):
819                 self.transport = None
820                 self.url = "http://%s:%d/" % (self.host,80)
821                 uri = "%s:%d" % (self.host,80)
822
823                 req = urllib2.Request(self.url)
824                 try:
825                         handle = urllib2.urlopen(req)
826                 except IOError, e:
827                         # NOTE: this is expected to fail initially
828                         pass
829                 else:
830                         print self.url
831                         print "-----------"
832                         print handle.read()
833                         print "-----------"
834                         return "ERROR: not protected by HTTP authentication"
835
836                 if not hasattr(e, 'code') or e.code != 401:
837                         return "ERROR: failed for: %s" % str(e)
838
839                 base64data = base64.encodestring("%s:%s" % (self.username, self.password))[:-1]
840                 # NOTE: assuming basic realm authentication.
841                 authheader = "Basic %s" % base64data
842                 req.add_header("Authorization", authheader)
843
844                 try:
845                         f = urllib2.urlopen(req)
846                 except IOError, e:
847                         # failing here means the User/passwd is wrong (hopefully)
848                         raise ExceptionPassword("Incorrect username/password")
849
850                 # TODO: after verifying that the user/password is correct, we should
851                 # actually reboot the given node.
852
853                 if not dryrun:
854                         # add data to handler,
855                         # fetch url one more time on cmd.html, econtrol.html or whatever.
856                         pass
857
858                 if self.verbose: print f.read()
859
860                 self.close()
861                 return 0
862
863
864 class ePowerSwitchOld(PCUControl):
865         def run(self, node_port, dryrun):
866                 self.url = "http://%s:%d/" % (self.host,80)
867                 uri = "%s:%d" % (self.host,80)
868
869                 # create authinfo
870                 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
871                 authinfo.add_password (None, uri, self.username, self.password)
872                 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
873
874                 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
875                 transport = urllib2.build_opener(authinfo)
876                 f = transport.open(self.url)
877                 if self.verbose: print f.read()
878
879                 if not dryrun:
880                         transport = urllib2.build_opener(authhandler)
881                         f = transport.open(self.url + "cmd.html", "P%d=r" % node_port)
882                         if self.verbose: print f.read()
883
884                 self.close()
885                 return 0
886
887 class ePowerSwitch(PCUControl):
888         def run(self, node_port, dryrun):
889                 self.url = "http://%s:%d/" % (self.host,80)
890                 uri = "%s:%d" % (self.host,80)
891
892                 # TODO: I'm still not sure what the deal is here.
893                 #               two independent calls appear to need to be made before the
894                 #               reboot will succeed.  It doesn't seem to be possible to do
895                 #               this with a single call.  I have no idea why.
896
897                 # create authinfo
898                 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
899                 authinfo.add_password (None, uri, self.username, self.password)
900                 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
901
902                 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
903                 transport = urllib2.build_opener()
904                 f = transport.open(self.url + "elogin.html", "pwd=%s" % self.password)
905                 if self.verbose: print f.read()
906
907                 if not dryrun:
908                         transport = urllib2.build_opener(authhandler)
909                         f = transport.open(self.url + "econtrol.html", "P%d=r" % node_port)
910                         if self.verbose: print f.read()
911
912                 #       data= "P%d=r" % node_port
913                 #self.open(self.host, self.username, self.password)
914                 #self.sendHTTP("elogin.html", "pwd=%s" % self.password)
915                 #self.sendHTTP("econtrol.html", data)
916                 #self.sendHTTP("cmd.html", data)
917
918                 self.close()
919                 return 0
920                 
921
922 ### rebooting european BlackBox PSE boxes
923 # Thierry Parmentelat - May 11 2005
924 # tested on 4-ports models known as PSE505-FR
925 # uses http to POST a data 'P<port>=r'
926 # relies on basic authentication within http1.0
927 # first curl-based script was
928 # curl --http1.0 --basic --user <username>:<password> --data P<port>=r \
929 #       http://<hostname>:<http_port>/cmd.html && echo OK
930
931 def bbpse_reboot (pcu_ip,username,password,port_in_pcu,http_port, dryrun):
932
933         global verbose
934
935         url = "http://%s:%d/cmd.html" % (pcu_ip,http_port)
936         data= "P%d=r" % port_in_pcu
937         if verbose:
938                 logger.debug("POSTing '%s' on %s" % (data,url))
939
940         authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
941         uri = "%s:%d" % (pcu_ip,http_port)
942         authinfo.add_password (None, uri, username, password)
943         authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
944
945         opener = urllib2.build_opener(authhandler)
946         urllib2.install_opener(opener)
947
948         if (dryrun):
949                 return 0
950
951         try:
952                 f = urllib2.urlopen(url,data)
953
954                 r= f.read()
955                 if verbose:
956                         logger.debug(r)
957                 return 0
958
959         except urllib2.URLError,err:
960                 logger.info('Could not open http connection', err)
961                 return "bbpse error"
962
963 ### rebooting x10toggle based systems addressed by port
964 # Marc E. Fiuczynski - May 31 2005
965 # tested on 4-ports models known as PSE505-FR
966 # uses ssh and password to login to an account
967 # that will cause the system to be powercycled.
968
969 def x10toggle_reboot(ip, username, password, port, dryrun):
970         global verbose
971
972         ssh = None
973         try:
974                 ssh = pyssh.Ssh(username, ip)
975                 ssh.open()
976
977                 # Login
978                 telnet_answer(ssh, "password:", password)
979
980                 if not dryrun:
981                         # Reboot
982                         telnet_answer(ssh, "x10toggle>", "A%d" % port)
983
984                 # Close
985                 output = ssh.close()
986                 if verbose:
987                         logger.debug(output)
988                 return 0
989
990         except Exception, err:
991                 if verbose:
992                         logger.debug(err)
993                 if ssh:
994                         output = ssh.close()
995                         if verbose:
996                                 logger.debug(output)
997                 return errno.ETIMEDOUT
998
999 ### rebooting Dell systems via RAC card
1000 # Marc E. Fiuczynski - June 01 2005
1001 # tested with David Lowenthal's itchy/scratchy nodes at UGA
1002 #
1003
1004 def runcmd(command, args, username, password, timeout = None):
1005
1006         result = [None]
1007         result_ready = threading.Condition()
1008
1009         def set_result(x):
1010
1011                 result_ready.acquire()
1012                 try:
1013                         result[0] = x
1014                 finally:
1015                         result_ready.notify()
1016                         result_ready.release()
1017
1018         def do_command(command, username, password):
1019
1020                 try:
1021                         # Popen4 is a popen-type class that combines stdout and stderr
1022                         p = popen2.Popen4(command)
1023
1024                         # read all output data
1025                         p.tochild.write("%s\n" % username)
1026                         p.tochild.write("%s\n" % password)
1027                         p.tochild.close()
1028                         data = p.fromchild.read()
1029
1030                         while True:
1031                                 # might get interrupted by a signal in poll() or waitpid()
1032                                 try:
1033                                         retval = p.wait()
1034                                         set_result((retval, data))
1035                                         break
1036                                 except OSError, ex:
1037                                         if ex.errno == errno.EINTR:
1038                                                 continue
1039                                         raise ex
1040                 except Exception, ex:
1041                         set_result(ex)
1042
1043         if args:
1044                 command = " ".join([command] + args)
1045
1046         worker = threading.Thread(target = do_command, args = (command, username, password, ))
1047         worker.setDaemon(True)
1048         result_ready.acquire()
1049         worker.start()
1050         result_ready.wait(timeout)
1051         try:
1052                 if result == [None]:
1053                         raise Exception, "command timed-out: '%s'" % command
1054         finally:
1055                 result_ready.release()
1056         result = result[0]
1057
1058         if isinstance(result, Exception):
1059                 raise result
1060         else:
1061                 (retval, data) = result
1062                 if os.WIFEXITED(retval) and os.WEXITSTATUS(retval) == 0:
1063                         return data
1064                 else:
1065                         out = "system command ('%s') " % command
1066                         if os.WIFEXITED(retval):
1067                                 out += "failed, rc = %d" % os.WEXITSTATUS(retval)
1068                         else:
1069                                 out += "killed by signal %d" % os.WTERMSIG(retval)
1070                         if data:
1071                                 out += "; output follows:\n" + data
1072                         raise Exception, out
1073
1074 def racadm_reboot(ip, username, password, port, dryrun):
1075         global verbose
1076
1077         try:
1078                 cmd = "/usr/sbin/racadm"
1079                 os.stat(cmd)
1080                 if not dryrun:
1081                         output = runcmd(cmd, ["-r %s -i serveraction powercycle" % ip],
1082                                 username, password)
1083                 else:
1084                         output = runcmd(cmd, ["-r %s -i getsysinfo" % ip],
1085                                 username, password)
1086
1087                 print "RUNCMD: %s" % output
1088                 if verbose:
1089                         logger.debug(output)
1090                 return 0
1091
1092         except Exception, err:
1093                 logger.debug("runcmd raised exception %s" % err)
1094                 if verbose:
1095                         logger.debug(err)
1096                 return -1
1097
1098 def pcu_name(pcu):
1099         if pcu['hostname'] is not None and pcu['hostname'] is not "":
1100                 return pcu['hostname']
1101         elif pcu['ip'] is not None and pcu['ip'] is not "":
1102                 return pcu['ip']
1103         else:
1104                 return None
1105
1106 def get_pcu_values(pcu_id):
1107         # TODO: obviously, this shouldn't be loaded each time...
1108         import soltesz
1109         fb =soltesz.dbLoad("findbadpcus")
1110
1111         try:
1112                 values = fb['nodes']["id_%s" % pcu_id]['values']
1113         except:
1114                 values = None
1115
1116         return values
1117
1118 def check_open_port(values, port_list):
1119         ret = False
1120
1121         if 'portstatus' in values:
1122                 for port in port_list:
1123                         if      port in values['portstatus'] and \
1124                                 values['portstatus'][port] == "open":
1125
1126                                 ret = True
1127         
1128         return ret
1129         
1130 def reboot_policy(nodename, continue_probe, dryrun):
1131         global verbose
1132
1133         pcu = plc.getpcu(nodename)
1134         if not pcu:
1135                 return False # "%s has no pcu" % nodename
1136
1137         values = get_pcu_values(pcu['pcu_id'])
1138         if values == None:
1139                 return False #"no info for pcu_id %s" % pcu['pcu_id']
1140         
1141         # Try the PCU first
1142         logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
1143
1144         ret = reboot_test(nodename, values, continue_probe, verbose, dryrun)
1145
1146         if ret != 0:
1147                 return False
1148         else:
1149                 return True
1150
1151 def reboot_test(nodename, values, continue_probe, verbose, dryrun):
1152         rb_ret = ""
1153
1154         try:
1155                 # DataProbe iPal (many sites)
1156                 if  continue_probe and values['model'].find("Dataprobe IP-41x/IP-81x") >= 0:
1157                         ipal = IPAL(values, verbose, ['23'])
1158                         rb_ret = ipal.reboot(values[nodename], dryrun)
1159                                 
1160                 # APC Masterswitch (Berkeley)
1161                 elif continue_probe and values['model'].find("APC AP79xx/Masterswitch") >= 0:
1162
1163                         # TODO: make a more robust version of APC
1164                         if values['pcu_id'] in [1163,1055,1111,1231,1113,1127,1128,1148]:
1165                                 apc = APCEurope(values, verbose, ['22', '23'])
1166                                 rb_ret = apc.reboot(values[nodename], dryrun)
1167
1168                         elif values['pcu_id'] in [1110,86]:
1169                                 apc = APCBrazil(values, verbose, ['22', '23'])
1170                                 rb_ret = apc.reboot(values[nodename], dryrun)
1171
1172                         elif values['pcu_id'] in [1221]:
1173                                 apc = APCBerlin(values, verbose, ['22', '23'])
1174                                 rb_ret = apc.reboot(values[nodename], dryrun)
1175
1176                         elif values['pcu_id'] in [1173,1221,1220,1225]:
1177                                 apc = APCFolsom(values, verbose, ['22', '23'])
1178                                 rb_ret = apc.reboot(values[nodename], dryrun)
1179
1180                         else:
1181                                 apc = APCMaster(values, verbose, ['22', '23'])
1182                                 rb_ret = apc.reboot(values[nodename], dryrun)
1183
1184                 # BayTech DS4-RPC
1185                 elif continue_probe and values['model'].find("Baytech DS4-RPC") >= 0:
1186                         if values['pcu_id'] in [1052,1209,1002,1008,1041,1013,1022]:
1187                                 # These  require a 'ctrl-c' to be sent... 
1188                                 baytech = BayTechCtrlC(values, verbose, ['22', '23'])
1189                                 rb_ret = baytech.reboot(values[nodename], dryrun)
1190
1191                         elif values['pcu_id'] in [93]:
1192                                 baytech = BayTechAU(values, verbose, ['22', '23'])
1193                                 rb_ret = baytech.reboot(values[nodename], dryrun)
1194
1195                         elif values['pcu_id'] in [1057]:
1196                                 # These  require a 'ctrl-c' to be sent... 
1197                                 baytech = BayTechCtrlCUnibe(values, verbose, ['22', '23'])
1198                                 rb_ret = baytech.reboot(values[nodename], dryrun)
1199
1200                         elif values['pcu_id'] in [1012]:
1201                                 # This pcu sometimes doesn't present the 'Username' prompt,
1202                                 # unless you immediately try again...
1203                                 try:
1204                                         baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1205                                         rb_ret = baytech.reboot(values[nodename], dryrun)
1206                                 except:
1207                                         baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1208                                         rb_ret = baytech.reboot(values[nodename], dryrun)
1209                         else:
1210                                 baytech = BayTech(values, verbose, ['22', '23'])
1211                                 rb_ret = baytech.reboot(values[nodename], dryrun)
1212
1213                 # iLO
1214                 elif continue_probe and values['model'].find("HP iLO") >= 0:
1215                         try:
1216                                 hpilo = HPiLO(values, verbose, ['22'])
1217                                 rb_ret = hpilo.reboot(0, dryrun)
1218                                 if rb_ret != 0:
1219                                         hpilo = HPiLOHttps(values, verbose, ['443'])
1220                                         rb_ret = hpilo.reboot(0, dryrun)
1221                         except:
1222                                 hpilo = HPiLOHttps(values, verbose, ['443'])
1223                                 rb_ret = hpilo.reboot(0, dryrun)
1224
1225                 # DRAC ssh
1226                 elif continue_probe and values['model'].find("Dell RAC") >= 0:
1227                         # TODO: I don't think DRACRacAdm will throw an exception for the
1228                         # default method to catch...
1229                         try:
1230                                 drac = DRACRacAdm(values, verbose, ['443', '5869'])
1231                                 rb_ret = drac.reboot(0, dryrun)
1232                         except:
1233                                 drac = DRAC(values, verbose, ['22'])
1234                                 rb_ret = drac.reboot(0, dryrun)
1235
1236                 elif continue_probe and values['model'].find("WTI IPS-4") >= 0:
1237                                 wti = WTIIPS4(values, verbose, ['23'])
1238                                 rb_ret = wti.reboot(values[nodename], dryrun)
1239
1240                 # BlackBox PSExxx-xx (e.g. PSE505-FR)
1241                 elif continue_probe and \
1242                         (values['model'].find("BlackBox PS5xx") >= 0 or
1243                          values['model'].find("ePowerSwitch 1/4/8x") >=0 ):
1244
1245                         # TODO: allow a different port than http 80.
1246                         if values['pcu_id'] in [1089, 1071, 1046, 1035, 1118]:
1247                                 eps = ePowerSwitchGood(values, verbose, ['80'])
1248                         elif values['pcu_id'] in [1003]:
1249                                 eps = ePowerSwitch(values, verbose, ['80'])
1250                         else:
1251                                 eps = ePowerSwitchGood(values, verbose, ['80'])
1252
1253                         rb_ret = eps.reboot(values[nodename], dryrun)
1254
1255                 elif continue_probe:
1256                         rb_ret = "Unsupported_PCU"
1257
1258                 elif continue_probe == False:
1259                         if 'portstatus' in values:
1260                                 rb_ret = "NetDown"
1261                         else:
1262                                 rb_ret = "Not_Run"
1263                 else:
1264                         rb_ret = -1
1265
1266         except ExceptionPort, err:
1267                 rb_ret = str(err)
1268
1269         return rb_ret
1270         # ????
1271         #elif continue_probe and values['protocol'] == "racadm" and \
1272         #               values['model'] == "RAC":
1273         #       rb_ret = racadm_reboot(pcu_name(values),
1274         #                                                                 values['username'],
1275         #                                                                 values['password'],
1276         #                                                                 pcu[nodename],
1277         #                                                                 dryrun)
1278
1279 # Returns true if rebooted via PCU
1280 def reboot_old(nodename, dryrun):
1281         pcu = plc.getpcu(nodename)
1282         if not pcu:
1283                 plc.nodePOD(nodename)
1284                 return False
1285         # Try the PCU first
1286         logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
1287
1288         # APC Masterswitch (Berkeley)
1289         if pcu['model'] == "APC Masterswitch":
1290                 err = apc_reboot(pcu['ip'], pcu['username'],pcu['password'], 
1291                                 pcu[nodename], pcu['protocol'], dryrun)
1292
1293         # DataProbe iPal (many sites)
1294         elif pcu['protocol'] == "telnet" and pcu['model'].find("IP-4") >= 0:
1295                 err = ipal_reboot(pcu['ip'],pcu['password'], pcu[nodename], dryrun)
1296
1297         # BayTech DS4-RPC
1298         elif pcu['protocol'] == "ssh" and \
1299         (pcu['model'].find("Baytech") >= 0 or pcu['model'].find("DS4") >= 0):
1300                 err = baytech_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu[nodename], dryrun)
1301
1302         # BlackBox PSExxx-xx (e.g. PSE505-FR)
1303         elif pcu['protocol'] == "http" and (pcu['model'] == "bbpse"):
1304                 err = bbpse_reboot(pcu['ip'], pcu['username'], pcu['password'], pcu[nodename],80, dryrun)
1305
1306         # x10toggle
1307         elif pcu['protocol'] == "ssh" and (pcu['model'] == "x10toggle"):
1308                 err = x10toggle_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu[nodename], dryrun)
1309
1310         # 
1311         elif pcu['protocol'] == "racadm" and (pcu['model'] == "RAC"):
1312                 err = racadm_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu_[nodename], dryrun)
1313
1314         # Unknown or unsupported
1315         else:
1316                 err = errno.EPROTONOSUPPORT
1317                 return False
1318         return True 
1319
1320
1321 def main():
1322         logger.setLevel(logging.DEBUG)
1323         ch = logging.StreamHandler()
1324         ch.setLevel(logging.DEBUG)
1325         formatter = logging.Formatter('LOGGER - %(message)s')
1326         ch.setFormatter(formatter)
1327         logger.addHandler(ch)
1328
1329         try:
1330                 print "Rebooting %s" % sys.argv[1]
1331                 reboot_policy(sys.argv[1], True, False)
1332         except Exception, err:
1333                 print err
1334
1335 if __name__ == '__main__':
1336         import plc
1337         logger = logging.getLogger("monitor")
1338         main()