Massive commit. Just put all local changes into svn.
[monitor.git] / reboot.py
1 #!/usr/bin/python
2 #
3 # Reboot specified nodes
4 #
5
6 import getpass, getopt
7 import os, sys
8 import xml, xmlrpclib
9 import errno, time, traceback
10 import urllib2
11 import threading, popen2
12 import array, struct
13 #from socket import *
14 import socket
15 import plc
16 import base64
17 from subprocess import PIPE, Popen
18 import ssh.pxssh as pxssh
19 import ssh.pexpect as pexpect
20 import socket
21
22 # Use our versions of telnetlib and pyssh
23 sys.path.insert(0, os.path.dirname(sys.argv[0]))
24 import telnetlib
25 sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")    
26 import pyssh
27
28 # Timeouts in seconds
29 TELNET_TIMEOUT = 45
30
31 # Event class ID from pcu events
32 #NODE_POWER_CONTROL = 3
33
34 # Monitor user ID
35 #MONITOR_USER_ID = 11142
36
37 import logging
38 logger = logging.getLogger("monitor")
39 verbose = 1
40 #dryrun = 0;
41
42 class ExceptionNoTransport(Exception): pass
43 class ExceptionNotFound(Exception): pass
44 class ExceptionPassword(Exception): pass
45 class ExceptionTimeout(Exception): pass
46 class ExceptionPrompt(Exception): pass
47 class ExceptionSequence(Exception): pass
48 class ExceptionReset(Exception): pass
49 class ExceptionPort(Exception): pass
50 class ExceptionUsername(Exception): pass
51
52 def telnet_answer(telnet, expected, buffer):
53         global verbose
54
55         output = telnet.read_until(expected, TELNET_TIMEOUT)
56         #if verbose:
57         #       logger.debug(output)
58         if output.find(expected) == -1:
59                 raise ExceptionNotFound, "'%s' not found" % expected
60         else:
61                 telnet.write(buffer + "\r\n")
62
63
64 # PCU has model, host, preferred-port, user, passwd, 
65
66 # This is an object derived directly form the PLCAPI DB fields
67 class PCU(object):
68         def __init__(self, plc_pcu_dict):
69                 for field in ['username', 'password', 'site_id', 
70                                                 'hostname', 'ip', 
71                                                 'pcu_id', 'model', 
72                                                 'node_ids', 'ports', ]:
73                         if field in plc_pcu_dict:
74                                 self.__setattr__(field, plc_pcu_dict[field])
75                         else:
76                                 raise Exception("No such field %s in PCU object" % field)
77
78 # These are the convenience functions build around the PCU object.
79 class PCUModel(PCU):
80         def __init__(self, plc_pcu_dict):
81                 PCU.__init__(self, plc_pcu_dict)
82                 self.host = self.pcu_name()
83
84         def pcu_name(self):
85                 if self.hostname is not None and self.hostname is not "":
86                         return self.hostname
87                 elif self.ip is not None and self.ip is not "":
88                         return self.ip
89                 else:
90                         return None
91
92         def nodeidToPort(self, node_id):
93                 if node_id in self.node_ids:
94                         for i in range(0, len(self.node_ids)):
95                                 if node_id == self.node_ids[i]:
96                                         return self.ports[i]
97
98                 raise Exception("No such Node ID: %d" % node_id)
99
100 # This class captures the observed pcu records from FindBadPCUs.py
101 class PCURecord:
102         def __init__(self, pcu_record_dict):
103                 for field in ['nodenames', 'portstatus', 
104                                                 'dnsmatch', 
105                                                 'complete_entry', ]:
106                         if field in pcu_record_dict:
107                                 if field == "reboot":
108                                         self.__setattr__("reboot_str", pcu_record_dict[field])
109                                 else:
110                                         self.__setattr__(field, pcu_record_dict[field])
111                         else:
112                                 raise Exception("No such field %s in pcu record dict" % field)
113
114 class Transport:
115         TELNET = 1
116         SSH    = 2
117         HTTP   = 3
118
119         TELNET_TIMEOUT = 60
120
121         def __init__(self, type, verbose):
122                 self.type = type
123                 self.verbose = verbose
124                 self.transport = None
125
126 #       def __del__(self):
127 #               if self.transport:
128 #                       self.close()
129
130         def open(self, host, username=None, password=None, prompt="User Name"):
131                 transport = None
132
133                 if self.type == self.TELNET:
134                         transport = telnetlib.Telnet(host, timeout=self.TELNET_TIMEOUT)
135                         transport.set_debuglevel(self.verbose)
136                         if username is not None:
137                                 self.transport = transport
138                                 self.ifThenSend(prompt, username, ExceptionUsername)
139
140                 elif self.type == self.SSH:
141                         if username is not None:
142                                 transport = pyssh.Ssh(username, host)
143                                 transport.set_debuglevel(self.verbose)
144                                 transport.open()
145                                 # TODO: have an ssh set_debuglevel() also...
146                         else:
147                                 raise Exception("Username cannot be None for ssh transport.")
148                 elif self.type == self.HTTP:
149                         self.url = "http://%s:%d/" % (host,80)
150                         uri = "%s:%d" % (host,80)
151
152                         # create authinfo
153                         authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
154                         authinfo.add_password (None, uri, username, password)
155                         authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
156
157                         transport = urllib2.build_opener(authhandler)
158
159                 else:
160                         raise Exception("Unknown transport type: %s" % self.type)
161
162                 self.transport = transport
163                 return True
164
165         def close(self):
166                 if self.type == self.TELNET:
167                         self.transport.close() 
168                 elif self.type == self.SSH:
169                         self.transport.close() 
170                 elif self.type == self.HTTP:
171                         pass
172                 else:
173                         raise Exception("Unknown transport type %s" % self.type)
174                 self.transport = None
175
176         def sendHTTP(self, resource, data):
177                 if self.verbose:
178                         print "POSTing '%s' to %s" % (data,self.url + resource)
179
180                 try:
181                         f = self.transport.open(self.url + resource ,data)
182                         r = f.read()
183                         if self.verbose:
184                                 print r
185
186                 except urllib2.URLError,err:
187                         logger.info('Could not open http connection', err)
188                         return "http transport error"
189
190                 return 0
191
192         def sendPassword(self, password, prompt=None):
193                 if self.type == self.TELNET:
194                         if prompt == None:
195                                 self.ifThenSend("Password", password, ExceptionPassword)
196                         else:
197                                 self.ifThenSend(prompt, password, ExceptionPassword)
198                 elif self.type == self.SSH:
199                         self.ifThenSend("password:", password, ExceptionPassword)
200                 elif self.type == self.HTTP:
201                         pass
202                 else:
203                         raise Exception("Unknown transport type: %s" % self.type)
204
205         def ifThenSend(self, expected, buffer, ErrorClass=ExceptionPrompt):
206
207                 if self.transport != None:
208                         output = self.transport.read_until(expected, self.TELNET_TIMEOUT)
209                         if output.find(expected) == -1:
210                                 raise ErrorClass, "'%s' not found" % expected
211                         else:
212                                 self.transport.write(buffer + "\r\n")
213                 else:
214                         raise ExceptionNoTransport("transport object is type None")
215
216         def ifElse(self, expected, ErrorClass):
217                 try:
218                         self.transport.read_until(expected, self.TELNET_TIMEOUT)
219                 except:
220                         raise ErrorClass("Could not find '%s' within timeout" % expected)
221                         
222
223 class PCUControl(Transport,PCUModel,PCURecord):
224         def __init__(self, plc_pcu_record, verbose, supported_ports=[]):
225                 PCUModel.__init__(self, plc_pcu_record)
226                 PCURecord.__init__(self, plc_pcu_record)
227                 type = None
228                 if self.portstatus:
229                         if '22' in supported_ports and self.portstatus['22'] == "open":
230                                 type = Transport.SSH
231                         elif '23' in supported_ports and self.portstatus['23'] == "open":
232                                 type = Transport.TELNET
233                         elif '80' in supported_ports and self.portstatus['80'] == "open":
234                                 type = Transport.HTTP
235                         elif '443' in supported_ports and self.portstatus['443'] == "open":
236                                 type = Transport.HTTP
237                         elif '5869' in supported_ports and self.portstatus['5869'] == "open":
238                                 # For DRAC cards.  not sure how much it's used in the
239                                 # protocol.. but racadm opens this port.
240                                 type = Transport.HTTP
241                         else:
242                                 raise ExceptionPort("Unsupported Port: No transport from open ports")
243                 else:
244                         raise Exception("No Portstatus: No transport because no open ports")
245                 Transport.__init__(self, type, verbose)
246
247         def run(self, node_port, dryrun):
248                 """ This function is to be defined by the specific PCU instance.  """
249                 pass
250                 
251         def reboot(self, node_port, dryrun):
252                 try:
253                         return self.run(node_port, dryrun)
254                 except ExceptionNotFound, err:
255                         return "error: " + str(err)
256                 except ExceptionPassword, err:
257                         return "password exception: " + str(err)
258                 except ExceptionTimeout, err:
259                         return "timeout exception: " + str(err)
260                 except ExceptionUsername, err:
261                         return "exception: no username prompt: " + str(err)
262                 except ExceptionSequence, err:
263                         return "sequence error: " + str(err)
264                 except ExceptionPrompt, err:
265                         return "prompt exception: " + str(err)
266                 except ExceptionPort, err:
267                         return "no ports exception: " + str(err)
268                 except socket.error, err:
269                         return "socket error: timeout: " + str(err)
270                 except EOFError, err:
271                         if self.verbose:
272                                 logger.debug("reboot: EOF")
273                                 logger.debug(err)
274                         self.transport.close()
275                         import traceback
276                         traceback.print_exc()
277                         return "EOF connection reset" + str(err)
278                 #except Exception, err:
279                 #       if self.verbose:
280                 #               logger.debug("reboot: Exception")
281                 #               logger.debug(err)
282                 #       if self.transport:
283                 #               self.transport.close()
284                 #       import traceback
285                 #       traceback.print_exc()
286                 #       return  "generic exception; unknown problem."
287
288                 
289 class IPAL(PCUControl):
290         def run(self, node_port, dryrun):
291                 self.open(self.host)
292
293                 # XXX Some iPals require you to hit Enter a few times first
294                 self.ifThenSend("Password >", "\r\n\r\n", ExceptionNotFound)
295
296                 # Login
297                 self.ifThenSend("Password >", self.password, ExceptionPassword)
298                 self.transport.write("\r\n\r\n")
299
300                 if not dryrun: # P# - Pulse relay
301                         self.ifThenSend("Enter >", 
302                                                         "P%d" % node_port, 
303                                                         ExceptionNotFound)
304                 # Get the next prompt
305                 self.ifElse("Enter >", ExceptionTimeout)
306
307                 self.close()
308                 return 0
309
310 class APCEurope(PCUControl):
311         def run(self, node_port, dryrun):
312                 self.open(self.host, self.username)
313                 self.sendPassword(self.password)
314
315                 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
316                 self.ifThenSend("\r\n> ", "2")
317                 self.ifThenSend("\r\n> ", str(node_port))
318                 # 3- Immediate Reboot             
319                 self.ifThenSend("\r\n> ", "3")
320
321                 if not dryrun:
322                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
323                                                         "YES\r\n",
324                                                         ExceptionSequence)
325                 else:
326                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
327                                                         "", ExceptionSequence)
328                 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
329
330                 self.close()
331                 return 0
332
333 class APCBrazil(PCUControl):
334         def run(self, node_port, dryrun):
335                 self.open(self.host, self.username)
336                 self.sendPassword(self.password)
337
338                 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
339                 self.ifThenSend("\r\n> ", str(node_port))
340                 # 4- Immediate Reboot             
341                 self.ifThenSend("\r\n> ", "4")
342
343                 if not dryrun:
344                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
345                                                         "YES\r\n",
346                                                         ExceptionSequence)
347                 else:
348                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
349                                                         "", ExceptionSequence)
350                 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
351
352                 self.close()
353                 return 0
354
355 class APCBerlin(PCUControl):
356         def run(self, node_port, dryrun):
357                 self.open(self.host, self.username)
358                 self.sendPassword(self.password)
359
360                 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
361                 self.ifThenSend("\r\n> ", "2")
362                 self.ifThenSend("\r\n> ", "1")
363                 self.ifThenSend("\r\n> ", str(node_port))
364                 # 3- Immediate Reboot             
365                 self.ifThenSend("\r\n> ", "3")
366
367                 if not dryrun:
368                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
369                                                         "YES\r\n",
370                                                         ExceptionSequence)
371                 else:
372                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
373                                                         "", ExceptionSequence)
374                 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
375
376                 self.close()
377                 return 0
378
379 class APCFolsom(PCUControl):
380         def run(self, node_port, dryrun):
381                 self.open(self.host, self.username)
382                 self.sendPassword(self.password)
383
384                 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
385                 self.ifThenSend("\r\n> ", "2")
386                 self.ifThenSend("\r\n> ", "1")
387                 self.ifThenSend("\r\n> ", str(node_port))
388                 self.ifThenSend("\r\n> ", "1")
389
390                 # 3- Immediate Reboot             
391                 self.ifThenSend("\r\n> ", "3")
392
393                 if not dryrun:
394                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
395                                                         "YES\r\n",
396                                                         ExceptionSequence)
397                 else:
398                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
399                                                         "", ExceptionSequence)
400                 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
401
402                 self.close()
403                 return 0
404
405 class APCMaster(PCUControl):
406         def run(self, node_port, dryrun):
407                 print "Rebooting %s" % self.host
408                 self.open(self.host, self.username)
409                 self.sendPassword(self.password)
410
411                 # 1- Device Manager
412                 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
413                 # 3- Outlet Control/Config
414                 self.ifThenSend("\r\n> ", "3")
415                 # n- Outlet n
416                 self.ifThenSend("\r\n> ", str(node_port))
417                 # 1- Control Outlet
418                 self.ifThenSend("\r\n> ", "1")
419                 # 3- Immediate Reboot             
420                 self.ifThenSend("\r\n> ", "3")
421
422                 if not dryrun:
423                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
424                                                         "YES\r\n",
425                                                         ExceptionSequence)
426                 else:
427                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
428                                                         "", ExceptionSequence)
429                 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
430
431                 self.close()
432                 return 0
433
434 class APC(PCUControl):
435         def __init__(self, plc_pcu_record, verbose):
436                 PCUControl.__init__(self, plc_pcu_record, verbose)
437
438                 self.master = APCMaster(plc_pcu_record, verbose)
439                 self.folsom = APCFolsom(plc_pcu_record, verbose)
440                 self.europe = APCEurope(plc_pcu_record, verbose)
441
442         def run(self, node_port, dryrun):
443                 try_again = True
444                 sleep_time = 1
445
446                 for pcu in [self.master, self.europe, self.folsom]:
447                         if try_again:
448                                 try:
449                                         print "-*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"
450                                         try_again = False
451                                         print "sleeping 5"
452                                         time.sleep(sleep_time)
453                                         ret = pcu.reboot(node_port, dryrun)
454                                 except ExceptionSequence, err:
455                                         del pcu
456                                         sleep_time = 130
457                                         try_again = True
458
459                 if try_again:
460                         return "Unknown reboot sequence for APC PCU"
461                 else:
462                         return ret
463
464 class DRACRacAdm(PCUControl):
465         def run(self, node_port, dryrun):
466
467                 print "trying racadm_reboot..."
468                 racadm_reboot(self.host, self.username, self.password, node_port, dryrun)
469
470                 return 0
471
472 class DRAC(PCUControl):
473         def run(self, node_port, dryrun):
474                 self.open(self.host, self.username)
475                 self.sendPassword(self.password)
476
477                 print "logging in..."
478                 self.transport.write("\r\n")
479                 # Testing Reboot ?
480                 if dryrun:
481                         self.ifThenSend("[%s]#" % self.username, "getsysinfo")
482                 else:
483                         # Reset this machine
484                         self.ifThenSend("[%s]#" % self.username, "serveraction powercycle")
485
486                 self.ifThenSend("[%s]#" % self.username, "exit")
487
488                 self.close()
489                 return 0
490
491 class HPiLO(PCUControl):
492         def run(self, node_port, dryrun):
493                 self.open(self.host, self.username)
494                 self.sendPassword(self.password)
495
496                 # </>hpiLO-> 
497                 self.ifThenSend("</>hpiLO->", "cd system1")
498
499                 # Reboot Outlet  N        (Y/N)?
500                 if dryrun:
501                         self.ifThenSend("</system1>hpiLO->", "POWER")
502                 else:
503                         # Reset this machine
504                         self.ifThenSend("</system1>hpiLO->", "reset")
505
506                 self.ifThenSend("</system1>hpiLO->", "exit")
507
508                 self.close()
509                 return 0
510
511                 
512 class HPiLOHttps(PCUControl):
513         def run(self, node_port, dryrun):
514                 import soltesz
515
516                 locfg = soltesz.CMD()
517                 cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
518                                         self.host, "iloxml/Get_Network.xml", 
519                                         self.username, self.password)
520                 sout, serr = locfg.run_noexcept(cmd)
521
522                 if sout.strip() != "":
523                         print "sout: %s" % sout.strip()
524                         return sout.strip()
525
526                 if not dryrun:
527                         locfg = soltesz.CMD()
528                         cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
529                                                 self.host, "iloxml/Reset_Server.xml", 
530                                                 self.username, self.password)
531                         sout, serr = locfg.run_noexcept(cmd)
532
533                         if sout.strip() != "":
534                                 print "sout: %s" % sout.strip()
535                                 #return sout.strip()
536                 return 0
537
538 class BayTechAU(PCUControl):
539         def run(self, node_port, dryrun):
540                 self.open(self.host, self.username, None, "Enter user name:")
541                 self.sendPassword(self.password, "Enter Password:")
542
543                 #self.ifThenSend("RPC-16>", "Status")
544                 self.ifThenSend("RPC3-NC>", "Reboot %d" % node_port)
545
546                 # Reboot Outlet  N        (Y/N)?
547                 if dryrun:
548                         self.ifThenSend("(Y/N)?", "N")
549                 else:
550                         self.ifThenSend("(Y/N)?", "Y")
551                 self.ifThenSend("RPC3-NC>", "")
552
553                 self.close()
554                 return 0
555
556 class BayTechGeorgeTown(PCUControl):
557         def run(self, node_port, dryrun):
558                 self.open(self.host, self.username, None, "Enter user name:")
559                 self.sendPassword(self.password, "Enter Password:")
560
561                 #self.ifThenSend("RPC-16>", "Status")
562
563                 self.ifThenSend("RPC-16>", "Reboot %d" % node_port)
564
565                 # Reboot Outlet  N        (Y/N)?
566                 if dryrun:
567                         self.ifThenSend("(Y/N)?", "N")
568                 else:
569                         self.ifThenSend("(Y/N)?", "Y")
570                 self.ifThenSend("RPC-16>", "")
571
572                 self.close()
573                 return 0
574
575 class BayTechCtrlCUnibe(PCUControl):
576         """
577                 For some reason, these units let you log in fine, but they hang
578                 indefinitely, unless you send a Ctrl-C after the password.  No idea
579                 why.
580         """
581         def run(self, node_port, dryrun):
582                 print "BayTechCtrlC %s" % self.host
583
584                 ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
585                 s = pxssh.pxssh()
586                 if not s.login(self.host, self.username, self.password, ssh_options):
587                         raise ExceptionPassword("Invalid Password")
588                 # Otherwise, the login succeeded.
589
590                 # Send a ctrl-c to the remote process.
591                 print "sending ctrl-c"
592                 s.send(chr(3))
593
594                 # Control Outlets  (5 ,1).........5
595                 try:
596                         index = s.expect(["Enter Request :"])
597
598                         if index == 0:
599                                 print "3"
600                                 s.send("3\r\n")
601                                 index = s.expect(["DS-RPC>", "Enter user name:"])
602                                 if index == 1:
603                                         s.send(self.username + "\r\n")
604                                         index = s.expect(["DS-RPC>"])
605
606                                 if index == 0:
607                                         print "Reboot %d" % node_port
608                                         s.send("Reboot %d\r\n" % node_port)
609
610                                         index = s.expect(["(Y/N)?"])
611                                         if index == 0:
612                                                 if dryrun:
613                                                         print "sending N"
614                                                         s.send("N\r\n")
615                                                 else:
616                                                         print "sending Y"
617                                                         s.send("Y\r\n")
618
619                                 #index = s.expect(["DS-RPC>"])
620                                 #print "got prompt back"
621
622                         s.close()
623
624                 except pexpect.EOF:
625                         raise ExceptionPrompt("EOF before 'Enter Request' Prompt")
626                 except pexpect.TIMEOUT:
627                         raise ExceptionPrompt("Timeout before 'Enter Request' Prompt")
628
629                 return 0
630
631 class BayTechCtrlC(PCUControl):
632         """
633                 For some reason, these units let you log in fine, but they hang
634                 indefinitely, unless you send a Ctrl-C after the password.  No idea
635                 why.
636         """
637         def run(self, node_port, dryrun):
638                 print "BayTechCtrlC %s" % self.host
639
640                 ssh_options="-o StrictHostKeyChecking=no -o PasswordAuthentication=yes -o PubkeyAuthentication=no"
641                 s = pxssh.pxssh()
642                 if not s.login(self.host, self.username, self.password, ssh_options):
643                         raise ExceptionPassword("Invalid Password")
644                 # Otherwise, the login succeeded.
645
646                 # Send a ctrl-c to the remote process.
647                 print "sending ctrl-c"
648                 s.send(chr(3))
649
650                 # Control Outlets  (5 ,1).........5
651                 try:
652                         index = s.expect(["Enter Request :"])
653
654                         if index == 0:
655                                 print "5"
656                                 s.send("5\r\n")
657                                 index = s.expect(["DS-RPC>", "Enter user name:"])
658                                 if index == 1:
659                                         print "sending username"
660                                         s.send(self.username + "\r\n")
661                                         index = s.expect(["DS-RPC>"])
662
663                                 if index == 0:
664                                         print "Reboot %d" % node_port
665                                         s.send("Reboot %d\r\n" % node_port)
666
667                                         index = s.expect(["(Y/N)?"])
668                                         if index == 0:
669                                                 if dryrun:
670                                                         print "sending N"
671                                                         s.send("N\r\n")
672                                                 else:
673                                                         print "sending Y"
674                                                         s.send("Y\r\n")
675
676                                 index = s.expect(["DS-RPC>"])
677                                 #print "got prompt back"
678
679                         s.close()
680
681                 except pexpect.EOF:
682                         raise ExceptionPrompt("EOF before 'Enter Request' Prompt")
683                 except pexpect.TIMEOUT:
684                         raise ExceptionPrompt("Timeout before 'Enter Request' Prompt")
685
686                 return 0
687
688 class BayTech(PCUControl):
689         def run(self, node_port, dryrun):
690                 self.open(self.host, self.username)
691                 self.sendPassword(self.password)
692
693                 # Control Outlets  (5 ,1).........5
694                 self.ifThenSend("Enter Request :", "5")
695
696                 # Reboot N
697                 try:
698                         self.ifThenSend("DS-RPC>", "Reboot %d" % node_port, ExceptionNotFound)
699                 except ExceptionNotFound, msg:
700                         # one machine is configured to ask for a username,
701                         # even after login...
702                         print "msg: %s" % msg
703                         self.transport.write(self.username + "\r\n")
704                         self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
705
706                 # Reboot Outlet  N        (Y/N)?
707                 if dryrun:
708                         self.ifThenSend("(Y/N)?", "N")
709                 else:
710                         self.ifThenSend("(Y/N)?", "Y")
711                 self.ifThenSend("DS-RPC>", "")
712
713                 self.close()
714                 return 0
715
716 class WTIIPS4(PCUControl):
717         def run(self, node_port, dryrun):
718                 self.open(self.host)
719                 self.sendPassword(self.password, "Enter Password:")
720
721                 self.ifThenSend("IPS> ", "/Boot %s" % node_port)
722                 if not dryrun:
723                         self.ifThenSend("Sure? (Y/N): ", "N")
724                 else:
725                         self.ifThenSend("Sure? (Y/N): ", "Y")
726
727                 self.ifThenSend("IPS> ", "")
728
729                 self.close()
730                 return 0
731
732 class ePowerSwitchGood(PCUControl):
733         # NOTE:
734         #               The old code used Python's HTTPPasswordMgrWithDefaultRealm()
735         #               For some reason this both doesn't work and in some cases, actually
736         #               hangs the PCU.  Definitely not what we want.
737         #               
738         #               The code below is much simpler.  Just letting things fail first,
739         #               and then, trying again with authentication string in the header.
740         #               
741         def run(self, node_port, dryrun):
742                 self.transport = None
743                 self.url = "http://%s:%d/" % (self.host,80)
744                 uri = "%s:%d" % (self.host,80)
745
746                 req = urllib2.Request(self.url)
747                 try:
748                         handle = urllib2.urlopen(req)
749                 except IOError, e:
750                         # NOTE: this is expected to fail initially
751                         pass
752                 else:
753                         print self.url
754                         print "-----------"
755                         print handle.read()
756                         print "-----------"
757                         return "ERROR: not protected by HTTP authentication"
758
759                 if not hasattr(e, 'code') or e.code != 401:
760                         return "ERROR: failed for: %s" % str(e)
761
762                 base64data = base64.encodestring("%s:%s" % (self.username, self.password))[:-1]
763                 # NOTE: assuming basic realm authentication.
764                 authheader = "Basic %s" % base64data
765                 req.add_header("Authorization", authheader)
766
767                 try:
768                         f = urllib2.urlopen(req)
769                 except IOError, e:
770                         # failing here means the User/passwd is wrong (hopefully)
771                         raise ExceptionPassword("Incorrect username/password")
772
773                 # TODO: after verifying that the user/password is correct, we should
774                 # actually reboot the given node.
775
776                 if not dryrun:
777                         # add data to handler,
778                         # fetch url one more time on cmd.html, econtrol.html or whatever.
779                         pass
780
781                 if self.verbose: print f.read()
782
783                 self.close()
784                 return 0
785
786
787 class ePowerSwitchOld(PCUControl):
788         def run(self, node_port, dryrun):
789                 self.url = "http://%s:%d/" % (self.host,80)
790                 uri = "%s:%d" % (self.host,80)
791
792                 # create authinfo
793                 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
794                 authinfo.add_password (None, uri, self.username, self.password)
795                 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
796
797                 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
798                 transport = urllib2.build_opener(authinfo)
799                 f = transport.open(self.url)
800                 if self.verbose: print f.read()
801
802                 if not dryrun:
803                         transport = urllib2.build_opener(authhandler)
804                         f = transport.open(self.url + "cmd.html", "P%d=r" % node_port)
805                         if self.verbose: print f.read()
806
807                 self.close()
808                 return 0
809
810 class ePowerSwitch(PCUControl):
811         def run(self, node_port, dryrun):
812                 self.url = "http://%s:%d/" % (self.host,80)
813                 uri = "%s:%d" % (self.host,80)
814
815                 # TODO: I'm still not sure what the deal is here.
816                 #               two independent calls appear to need to be made before the
817                 #               reboot will succeed.  It doesn't seem to be possible to do
818                 #               this with a single call.  I have no idea why.
819
820                 # create authinfo
821                 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
822                 authinfo.add_password (None, uri, self.username, self.password)
823                 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
824
825                 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
826                 transport = urllib2.build_opener()
827                 f = transport.open(self.url + "elogin.html", "pwd=%s" % self.password)
828                 if self.verbose: print f.read()
829
830                 if not dryrun:
831                         transport = urllib2.build_opener(authhandler)
832                         f = transport.open(self.url + "econtrol.html", "P%d=r" % node_port)
833                         if self.verbose: print f.read()
834
835                 #       data= "P%d=r" % node_port
836                 #self.open(self.host, self.username, self.password)
837                 #self.sendHTTP("elogin.html", "pwd=%s" % self.password)
838                 #self.sendHTTP("econtrol.html", data)
839                 #self.sendHTTP("cmd.html", data)
840
841                 self.close()
842                 return 0
843                 
844
845 ### rebooting european BlackBox PSE boxes
846 # Thierry Parmentelat - May 11 2005
847 # tested on 4-ports models known as PSE505-FR
848 # uses http to POST a data 'P<port>=r'
849 # relies on basic authentication within http1.0
850 # first curl-based script was
851 # curl --http1.0 --basic --user <username>:<password> --data P<port>=r \
852 #       http://<hostname>:<http_port>/cmd.html && echo OK
853
854 def bbpse_reboot (pcu_ip,username,password,port_in_pcu,http_port, dryrun):
855
856         global verbose
857
858         url = "http://%s:%d/cmd.html" % (pcu_ip,http_port)
859         data= "P%d=r" % port_in_pcu
860         if verbose:
861                 logger.debug("POSTing '%s' on %s" % (data,url))
862
863         authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
864         uri = "%s:%d" % (pcu_ip,http_port)
865         authinfo.add_password (None, uri, username, password)
866         authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
867
868         opener = urllib2.build_opener(authhandler)
869         urllib2.install_opener(opener)
870
871         if (dryrun):
872                 return 0
873
874         try:
875                 f = urllib2.urlopen(url,data)
876
877                 r= f.read()
878                 if verbose:
879                         logger.debug(r)
880                 return 0
881
882         except urllib2.URLError,err:
883                 logger.info('Could not open http connection', err)
884                 return "bbpse error"
885
886 ### rebooting x10toggle based systems addressed by port
887 # Marc E. Fiuczynski - May 31 2005
888 # tested on 4-ports models known as PSE505-FR
889 # uses ssh and password to login to an account
890 # that will cause the system to be powercycled.
891
892 def x10toggle_reboot(ip, username, password, port, dryrun):
893         global verbose
894
895         ssh = None
896         try:
897                 ssh = pyssh.Ssh(username, ip)
898                 ssh.open()
899
900                 # Login
901                 telnet_answer(ssh, "password:", password)
902
903                 if not dryrun:
904                         # Reboot
905                         telnet_answer(ssh, "x10toggle>", "A%d" % port)
906
907                 # Close
908                 output = ssh.close()
909                 if verbose:
910                         logger.debug(output)
911                 return 0
912
913         except Exception, err:
914                 if verbose:
915                         logger.debug(err)
916                 if ssh:
917                         output = ssh.close()
918                         if verbose:
919                                 logger.debug(output)
920                 return errno.ETIMEDOUT
921
922 ### rebooting Dell systems via RAC card
923 # Marc E. Fiuczynski - June 01 2005
924 # tested with David Lowenthal's itchy/scratchy nodes at UGA
925 #
926
927 def runcmd(command, args, username, password, timeout = None):
928
929         result = [None]
930         result_ready = threading.Condition()
931
932         def set_result(x):
933
934                 result_ready.acquire()
935                 try:
936                         result[0] = x
937                 finally:
938                         result_ready.notify()
939                         result_ready.release()
940
941         def do_command(command, username, password):
942
943                 try:
944                         # Popen4 is a popen-type class that combines stdout and stderr
945                         p = popen2.Popen4(command)
946
947                         # read all output data
948                         p.tochild.write("%s\n" % username)
949                         p.tochild.write("%s\n" % password)
950                         p.tochild.close()
951                         data = p.fromchild.read()
952
953                         while True:
954                                 # might get interrupted by a signal in poll() or waitpid()
955                                 try:
956                                         retval = p.wait()
957                                         set_result((retval, data))
958                                         break
959                                 except OSError, ex:
960                                         if ex.errno == errno.EINTR:
961                                                 continue
962                                         raise ex
963                 except Exception, ex:
964                         set_result(ex)
965
966         if args:
967                 command = " ".join([command] + args)
968
969         worker = threading.Thread(target = do_command, args = (command, username, password, ))
970         worker.setDaemon(True)
971         result_ready.acquire()
972         worker.start()
973         result_ready.wait(timeout)
974         try:
975                 if result == [None]:
976                         raise Exception, "command timed-out: '%s'" % command
977         finally:
978                 result_ready.release()
979         result = result[0]
980
981         if isinstance(result, Exception):
982                 raise result
983         else:
984                 (retval, data) = result
985                 if os.WIFEXITED(retval) and os.WEXITSTATUS(retval) == 0:
986                         return data
987                 else:
988                         out = "system command ('%s') " % command
989                         if os.WIFEXITED(retval):
990                                 out += "failed, rc = %d" % os.WEXITSTATUS(retval)
991                         else:
992                                 out += "killed by signal %d" % os.WTERMSIG(retval)
993                         if data:
994                                 out += "; output follows:\n" + data
995                         raise Exception, out
996
997 def racadm_reboot(host, username, password, port, dryrun):
998         global verbose
999
1000         ip = socket.gethostbyname(host)
1001         try:
1002                 cmd = "/usr/sbin/racadm"
1003                 os.stat(cmd)
1004                 if not dryrun:
1005                         output = runcmd(cmd, ["-r %s -i serveraction powercycle" % ip],
1006                                 username, password)
1007                 else:
1008                         output = runcmd(cmd, ["-r %s -i getsysinfo" % ip],
1009                                 username, password)
1010
1011                 print "RUNCMD: %s" % output
1012                 if verbose:
1013                         logger.debug(output)
1014                 return 0
1015
1016         except Exception, err:
1017                 logger.debug("runcmd raised exception %s" % err)
1018                 if verbose:
1019                         logger.debug(err)
1020                 return -1
1021
1022 def pcu_name(pcu):
1023         if pcu['hostname'] is not None and pcu['hostname'] is not "":
1024                 return pcu['hostname']
1025         elif pcu['ip'] is not None and pcu['ip'] is not "":
1026                 return pcu['ip']
1027         else:
1028                 return None
1029
1030 import soltesz
1031 fb =soltesz.dbLoad("findbadpcus")
1032
1033 def get_pcu_values(pcu_id):
1034         # TODO: obviously, this shouldn't be loaded each time...
1035
1036         try:
1037                 values = fb['nodes']["id_%s" % pcu_id]['values']
1038         except:
1039                 values = None
1040
1041         return values
1042
1043 def reboot(nodename):
1044         return reboot_policy(nodename, True, False)
1045         
1046 def reboot_policy(nodename, continue_probe, dryrun):
1047         global verbose
1048         print "this is a test of reboot_policy()"
1049
1050         pcu = plc.getpcu(nodename)
1051         if not pcu:
1052                 logger.debug("no pcu for %s" % hostname)
1053                 print "no pcu for %s" % hostname
1054                 return False # "%s has no pcu" % nodename
1055
1056         values = get_pcu_values(pcu['pcu_id'])
1057         if values == None:
1058                 logger.debug("No values for pcu probe %s" % hostname)
1059                 print "No values for pcu probe %s" % hostname
1060                 return False #"no info for pcu_id %s" % pcu['pcu_id']
1061         
1062         # Try the PCU first
1063         logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
1064
1065         print "reboot_test"
1066         ret = reboot_test(nodename, values, continue_probe, verbose, dryrun)
1067
1068         if ret != 0:
1069                 print ret
1070                 return False
1071         else:
1072                 print "return true"
1073                 return True
1074
1075 def reboot_test(nodename, values, continue_probe, verbose, dryrun):
1076         rb_ret = ""
1077
1078         try:
1079                 # DataProbe iPal (many sites)
1080                 if  continue_probe and values['model'].find("Dataprobe IP-41x/IP-81x") >= 0:
1081                         ipal = IPAL(values, verbose, ['23'])
1082                         rb_ret = ipal.reboot(values[nodename], dryrun)
1083                                 
1084                 # APC Masterswitch (Berkeley)
1085                 elif continue_probe and values['model'].find("APC AP79xx/Masterswitch") >= 0:
1086                         print values
1087
1088                         # TODO: make a more robust version of APC
1089                         if values['pcu_id'] in [1163,1055,1111,1231,1113,1127,1128,1148]:
1090                                 apc = APCEurope(values, verbose, ['22', '23'])
1091                                 rb_ret = apc.reboot(values[nodename], dryrun)
1092
1093                         elif values['pcu_id'] in [1110,86]:
1094                                 apc = APCBrazil(values, verbose, ['22', '23'])
1095                                 rb_ret = apc.reboot(values[nodename], dryrun)
1096
1097                         elif values['pcu_id'] in [1221,1225]:
1098                                 apc = APCBerlin(values, verbose, ['22', '23'])
1099                                 rb_ret = apc.reboot(values[nodename], dryrun)
1100
1101                         elif values['pcu_id'] in [1173,1221,1220]:
1102                                 apc = APCFolsom(values, verbose, ['22', '23'])
1103                                 rb_ret = apc.reboot(values[nodename], dryrun)
1104
1105                         else:
1106                                 apc = APCMaster(values, verbose, ['22', '23'])
1107                                 rb_ret = apc.reboot(values[nodename], dryrun)
1108
1109                 # BayTech DS4-RPC
1110                 elif continue_probe and values['model'].find("Baytech DS4-RPC") >= 0:
1111                         if values['pcu_id'] in [1237,1052,1209,1002,1008,1041,1013,1022]:
1112                                 # These  require a 'ctrl-c' to be sent... 
1113                                 baytech = BayTechCtrlC(values, verbose, ['22', '23'])
1114                                 rb_ret = baytech.reboot(values[nodename], dryrun)
1115
1116                         elif values['pcu_id'] in [93]:
1117                                 baytech = BayTechAU(values, verbose, ['22', '23'])
1118                                 rb_ret = baytech.reboot(values[nodename], dryrun)
1119
1120                         elif values['pcu_id'] in [1057]:
1121                                 # These  require a 'ctrl-c' to be sent... 
1122                                 baytech = BayTechCtrlCUnibe(values, verbose, ['22', '23'])
1123                                 rb_ret = baytech.reboot(values[nodename], dryrun)
1124
1125                         elif values['pcu_id'] in [1012]:
1126                                 # This pcu sometimes doesn't present the 'Username' prompt,
1127                                 # unless you immediately try again...
1128                                 try:
1129                                         baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1130                                         rb_ret = baytech.reboot(values[nodename], dryrun)
1131                                 except:
1132                                         baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1133                                         rb_ret = baytech.reboot(values[nodename], dryrun)
1134                         else:
1135                                 baytech = BayTech(values, verbose, ['22', '23'])
1136                                 rb_ret = baytech.reboot(values[nodename], dryrun)
1137
1138                 # iLO
1139                 elif continue_probe and values['model'].find("HP iLO") >= 0:
1140                         try:
1141                                 hpilo = HPiLO(values, verbose, ['22'])
1142                                 rb_ret = hpilo.reboot(0, dryrun)
1143                                 if rb_ret != 0:
1144                                         hpilo = HPiLOHttps(values, verbose, ['443'])
1145                                         rb_ret = hpilo.reboot(0, dryrun)
1146                         except:
1147                                 hpilo = HPiLOHttps(values, verbose, ['443'])
1148                                 rb_ret = hpilo.reboot(0, dryrun)
1149
1150                 # DRAC ssh
1151                 elif continue_probe and values['model'].find("Dell RAC") >= 0:
1152                         # TODO: I don't think DRACRacAdm will throw an exception for the
1153                         # default method to catch...
1154                         try:
1155                                 drac = DRACRacAdm(values, verbose, ['443', '5869'])
1156                                 rb_ret = drac.reboot(0, dryrun)
1157                         except:
1158                                 drac = DRAC(values, verbose, ['22'])
1159                                 rb_ret = drac.reboot(0, dryrun)
1160
1161                 elif continue_probe and values['model'].find("WTI IPS-4") >= 0:
1162                                 wti = WTIIPS4(values, verbose, ['23'])
1163                                 rb_ret = wti.reboot(values[nodename], dryrun)
1164
1165                 # BlackBox PSExxx-xx (e.g. PSE505-FR)
1166                 elif continue_probe and \
1167                         (values['model'].find("BlackBox PS5xx") >= 0 or
1168                          values['model'].find("ePowerSwitch 1/4/8x") >=0 ):
1169
1170                         # TODO: allow a different port than http 80.
1171                         if values['pcu_id'] in [1089, 1071, 1046, 1035, 1118]:
1172                                 eps = ePowerSwitchGood(values, verbose, ['80'])
1173                         elif values['pcu_id'] in [1003]:
1174                                 eps = ePowerSwitch(values, verbose, ['80'])
1175                         else:
1176                                 eps = ePowerSwitchGood(values, verbose, ['80'])
1177
1178                         rb_ret = eps.reboot(values[nodename], dryrun)
1179
1180                 elif continue_probe:
1181                         rb_ret = "Unsupported_PCU"
1182
1183                 elif continue_probe == False:
1184                         if 'portstatus' in values:
1185                                 rb_ret = "NetDown"
1186                         else:
1187                                 rb_ret = "Not_Run"
1188                 else:
1189                         rb_ret = -1
1190
1191         except ExceptionPort, err:
1192                 rb_ret = str(err)
1193
1194         return rb_ret
1195         # ????
1196         #elif continue_probe and values['protocol'] == "racadm" and \
1197         #               values['model'] == "RAC":
1198         #       rb_ret = racadm_reboot(pcu_name(values),
1199         #                                                                 values['username'],
1200         #                                                                 values['password'],
1201         #                                                                 pcu[nodename],
1202         #                                                                 dryrun)
1203
1204 def main():
1205         logger.setLevel(logging.DEBUG)
1206         ch = logging.StreamHandler()
1207         ch.setLevel(logging.DEBUG)
1208         formatter = logging.Formatter('LOGGER - %(message)s')
1209         ch.setFormatter(formatter)
1210         logger.addHandler(ch)
1211
1212         try:
1213                 if "test" in sys.argv:
1214                         dryrun = True
1215                 else:
1216                         dryrun = False
1217
1218                 for node in sys.argv[1:]:
1219                         if node == "test": continue
1220
1221                         print "Rebooting %s" % node
1222                         if reboot_policy(node, True, dryrun):
1223                                 print "success"
1224                         else:
1225                                 print "failed"
1226         except Exception, err:
1227                 import traceback; traceback.print_exc()
1228                 print err
1229
1230 if __name__ == '__main__':
1231         import plc
1232         logger = logging.getLogger("monitor")
1233         main()