Added two more APC models for brazil and berlin.
[monitor.git] / reboot.py
1 #!/usr/bin/python
2 #
3 # Reboot specified nodes
4 #
5
6 import getpass, getopt
7 import os, sys
8 import xml, xmlrpclib
9 import errno, time, traceback
10 import urllib2
11 import threading, popen2
12 import array, struct
13 #from socket import *
14 import socket
15 import plc
16 import base64
17 from subprocess import PIPE, Popen
18
19 plc_lock = threading.Lock()
20
21 # Use our versions of telnetlib and pyssh
22 sys.path.insert(0, os.path.dirname(sys.argv[0]))
23 import telnetlib
24 sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")    
25 import pyssh
26
27 # Timeouts in seconds
28 TELNET_TIMEOUT = 45
29
30 # Event class ID from pcu events
31 #NODE_POWER_CONTROL = 3
32
33 # Monitor user ID
34 #MONITOR_USER_ID = 11142
35
36 import logging
37 logger = logging.getLogger("monitor")
38 verbose = 1
39 #dryrun = 0;
40
41 class ExceptionNoTransport(Exception): pass
42 class ExceptionNotFound(Exception): pass
43 class ExceptionPassword(Exception): pass
44 class ExceptionTimeout(Exception): pass
45 class ExceptionPrompt(Exception): pass
46 class ExceptionSequence(Exception): pass
47 class ExceptionReset(Exception): pass
48 class ExceptionPort(Exception): pass
49 class ExceptionUsername(Exception): pass
50
51 def telnet_answer(telnet, expected, buffer):
52         global verbose
53
54         output = telnet.read_until(expected, TELNET_TIMEOUT)
55         #if verbose:
56         #       logger.debug(output)
57         if output.find(expected) == -1:
58                 raise ExceptionNotFound, "'%s' not found" % expected
59         else:
60                 telnet.write(buffer + "\r\n")
61
62
63 # PCU has model, host, preferred-port, user, passwd, 
64
65 # This is an object derived directly form the PLCAPI DB fields
66 class PCU(object):
67         def __init__(self, plc_pcu_dict):
68                 for field in ['username', 'password', 'site_id', 
69                                                 'hostname', 'ip', 
70                                                 'pcu_id', 'model', 
71                                                 'node_ids', 'ports', ]:
72                         if field in plc_pcu_dict:
73                                 self.__setattr__(field, plc_pcu_dict[field])
74                         else:
75                                 raise Exception("No such field %s in PCU object" % field)
76
77 # These are the convenience functions build around the PCU object.
78 class PCUModel(PCU):
79         def __init__(self, plc_pcu_dict):
80                 PCU.__init__(self, plc_pcu_dict)
81                 self.host = self.pcu_name()
82
83         def pcu_name(self):
84                 if self.hostname is not None and self.hostname is not "":
85                         return self.hostname
86                 elif self.ip is not None and self.ip is not "":
87                         return self.ip
88                 else:
89                         return None
90
91         def nodeidToPort(self, node_id):
92                 if node_id in self.node_ids:
93                         for i in range(0, len(self.node_ids)):
94                                 if node_id == self.node_ids[i]:
95                                         return self.ports[i]
96
97                 raise Exception("No such Node ID: %d" % node_id)
98
99 # This class captures the observed pcu records from FindBadPCUs.py
100 class PCURecord:
101         def __init__(self, pcu_record_dict):
102                 for field in ['nodenames', 'portstatus', 
103                                                 'dnsmatch', 
104                                                 'complete_entry', ]:
105                         if field in pcu_record_dict:
106                                 if field == "reboot":
107                                         self.__setattr__("reboot_str", pcu_record_dict[field])
108                                 else:
109                                         self.__setattr__(field, pcu_record_dict[field])
110                         else:
111                                 raise Exception("No such field %s in pcu record dict" % field)
112
113 class Transport:
114         TELNET = 1
115         SSH    = 2
116         HTTP   = 3
117
118         TELNET_TIMEOUT = 60
119
120         def __init__(self, type, verbose):
121                 self.type = type
122                 self.verbose = verbose
123                 self.transport = None
124
125 #       def __del__(self):
126 #               if self.transport:
127 #                       self.close()
128
129         def open(self, host, username=None, password=None, prompt="User Name"):
130                 transport = None
131
132                 if self.type == self.TELNET:
133                         transport = telnetlib.Telnet(host, timeout=self.TELNET_TIMEOUT)
134                         transport.set_debuglevel(self.verbose)
135                         if username is not None:
136                                 self.transport = transport
137                                 self.ifThenSend(prompt, username, ExceptionUsername)
138
139                 elif self.type == self.SSH:
140                         if username is not None:
141                                 transport = pyssh.Ssh(username, host)
142                                 transport.set_debuglevel(self.verbose)
143                                 transport.open()
144                                 # TODO: have an ssh set_debuglevel() also...
145                         else:
146                                 raise Exception("Username cannot be None for ssh transport.")
147                 elif self.type == self.HTTP:
148                         self.url = "http://%s:%d/" % (host,80)
149                         uri = "%s:%d" % (host,80)
150
151                         # create authinfo
152                         authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
153                         authinfo.add_password (None, uri, username, password)
154                         authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
155
156                         transport = urllib2.build_opener(authhandler)
157
158                 else:
159                         raise Exception("Unknown transport type: %s" % self.type)
160
161                 self.transport = transport
162                 return True
163
164         def close(self):
165                 if self.type == self.TELNET:
166                         self.transport.close() 
167                 elif self.type == self.SSH:
168                         self.transport.close() 
169                 elif self.type == self.HTTP:
170                         pass
171                 else:
172                         raise Exception("Unknown transport type %s" % self.type)
173                 self.transport = None
174
175         def sendHTTP(self, resource, data):
176                 if self.verbose:
177                         print "POSTing '%s' to %s" % (data,self.url + resource)
178
179                 try:
180                         f = self.transport.open(self.url + resource ,data)
181                         r = f.read()
182                         if self.verbose:
183                                 print r
184
185                 except urllib2.URLError,err:
186                         logger.info('Could not open http connection', err)
187                         return "http transport error"
188
189                 return 0
190
191         def sendPassword(self, password, prompt=None):
192                 if self.type == self.TELNET:
193                         if prompt == None:
194                                 self.ifThenSend("Password", password, ExceptionPassword)
195                         else:
196                                 self.ifThenSend(prompt, password, ExceptionPassword)
197                 elif self.type == self.SSH:
198                         self.ifThenSend("password:", password, ExceptionPassword)
199                 elif self.type == self.HTTP:
200                         pass
201                 else:
202                         raise Exception("Unknown transport type: %s" % self.type)
203
204         def ifThenSend(self, expected, buffer, ErrorClass=ExceptionPrompt):
205
206                 if self.transport != None:
207                         output = self.transport.read_until(expected, self.TELNET_TIMEOUT)
208                         if output.find(expected) == -1:
209                                 raise ErrorClass, "'%s' not found" % expected
210                         else:
211                                 self.transport.write(buffer + "\r\n")
212                 else:
213                         raise ExceptionNoTransport("transport object is type None")
214
215         def ifElse(self, expected, ErrorClass):
216                 try:
217                         self.transport.read_until(expected, self.TELNET_TIMEOUT)
218                 except:
219                         raise ErrorClass("Could not find '%s' within timeout" % expected)
220                         
221
222 class PCUControl(Transport,PCUModel,PCURecord):
223         def __init__(self, plc_pcu_record, verbose, supported_ports=[]):
224                 PCUModel.__init__(self, plc_pcu_record)
225                 PCURecord.__init__(self, plc_pcu_record)
226                 if self.portstatus:
227                         if '22' in supported_ports and self.portstatus['22'] == "open":
228                                 type = Transport.SSH
229                         elif '23' in supported_ports and self.portstatus['23'] == "open":
230                                 type = Transport.TELNET
231                         elif '80' in supported_ports and self.portstatus['80'] == "open":
232                                 type = Transport.HTTP
233                         elif '443' in supported_ports and self.portstatus['443'] == "open":
234                                 type = Transport.HTTP
235                         elif '5869' in supported_ports and self.portstatus['5869'] == "open":
236                                 # For DRAC cards.  not sure how much it's used in the
237                                 # protocol.. but racadm opens this port.
238                                 type = Transport.HTTP
239                         else:
240                                 raise ExceptionPort("Unsupported Port: No transport from open ports")
241                 Transport.__init__(self, type, verbose)
242
243         def run(self, node_port, dryrun):
244                 """ This function is to be defined by the specific PCU instance.  """
245                 pass
246                 
247         def reboot(self, node_port, dryrun):
248                 try:
249                         return self.run(node_port, dryrun)
250                 except ExceptionNotFound, err:
251                         return "error: " + str(err)
252                 except ExceptionPassword, err:
253                         return "password exception: " + str(err)
254                 except ExceptionTimeout, err:
255                         return "timeout exception: " + str(err)
256                 except ExceptionUsername, err:
257                         return "exception: no username prompt: " + str(err)
258                 except ExceptionSequence, err:
259                         return "sequence error: " + str(err)
260                 except ExceptionPrompt, err:
261                         return "prompt exception: " + str(err)
262                 except ExceptionPort, err:
263                         return "no ports exception: " + str(err)
264                 except socket.error, err:
265                         return "socket error: timeout: " + str(err)
266                 except EOFError, err:
267                         if self.verbose:
268                                 logger.debug("reboot: EOF")
269                                 logger.debug(err)
270                         self.transport.close()
271                         import traceback
272                         traceback.print_exc()
273                         return "EOF connection reset" + str(err)
274                 #except Exception, err:
275                 #       if self.verbose:
276                 #               logger.debug("reboot: Exception")
277                 #               logger.debug(err)
278                 #       if self.transport:
279                 #               self.transport.close()
280                 #       import traceback
281                 #       traceback.print_exc()
282                 #       return  "generic exception; unknown problem."
283
284                 
285 class IPAL(PCUControl):
286         def run(self, node_port, dryrun):
287                 self.open(self.host)
288
289                 # XXX Some iPals require you to hit Enter a few times first
290                 self.ifThenSend("Password >", "\r\n\r\n", ExceptionNotFound)
291
292                 # Login
293                 self.ifThenSend("Password >", self.password, ExceptionPassword)
294                 self.transport.write("\r\n\r\n")
295
296                 if not dryrun: # P# - Pulse relay
297                         self.ifThenSend("Enter >", 
298                                                         "P%d" % node_port, 
299                                                         ExceptionNotFound)
300                 # Get the next prompt
301                 self.ifElse("Enter >", ExceptionTimeout)
302
303                 self.close()
304                 return 0
305
306 def ipal_reboot(ip, password, port, dryrun):
307         global verbose
308         global plc_lock
309         telnet = None
310
311         try:
312                 #plc_lock.acquire()
313                 #print "lock acquired"
314
315                 #try:
316                         #telnet = telnetlib.Telnet(ip) # , timeout=TELNET_TIMEOUT)
317                 telnet = telnetlib.Telnet(ip, timeout=TELNET_TIMEOUT)
318                 #except:
319                 #       import traceback
320                 #       traceback.print_exc()
321
322
323                 telnet.set_debuglevel(verbose)
324
325                 # XXX Some iPals require you to hit Enter a few times first
326                 telnet_answer(telnet, "Password >", "\r\n\r\n")
327
328                 # Login
329                 telnet_answer(telnet, "Password >", password)
330
331                 # XXX Some iPals require you to hit Enter a few times first
332                 telnet.write("\r\n\r\n")
333
334                 # P# - Pulse relay
335                 if not dryrun:
336                         telnet_answer(telnet, "Enter >", "P%d" % port)
337
338                 telnet.read_until("Enter >", TELNET_TIMEOUT)
339
340                 # Close
341                 telnet.close()
342
343                 #print "lock released"
344                 #plc_lock.release()
345                 return 0
346
347         except EOFError, err:
348                 if verbose:
349                         logger.debug("ipal_reboot: EOF")
350                         logger.debug(err)
351                 telnet.close()
352                 import traceback
353                 traceback.print_exc()
354                 #print "lock released"
355                 #plc_lock.release()
356                 return errno.ECONNRESET
357         except socket.error, err:
358                 logger.debug("ipal_reboot: Socket Error")
359                 logger.debug(err)
360                 import traceback
361                 traceback.print_exc()
362
363                 return errno.ETIMEDOUT
364                 
365         except Exception, err:
366                 if verbose:
367                         logger.debug("ipal_reboot: Exception")
368                         logger.debug(err)
369                 if telnet:
370                         telnet.close()
371                 import traceback
372                 traceback.print_exc()
373                 #print "lock released"
374                 #plc_lock.release()
375                 return  "ipal error"
376
377 class APCEurope(PCUControl):
378         def run(self, node_port, dryrun):
379                 self.open(self.host, self.username)
380                 self.sendPassword(self.password)
381
382                 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
383                 self.ifThenSend("\r\n> ", "2")
384                 self.ifThenSend("\r\n> ", str(node_port))
385                 # 3- Immediate Reboot             
386                 self.ifThenSend("\r\n> ", "3")
387
388                 if not dryrun:
389                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
390                                                         "YES\r\n",
391                                                         ExceptionSequence)
392                 else:
393                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
394                                                         "", ExceptionSequence)
395                 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
396
397                 self.close()
398                 return 0
399
400 class APCBrazil(PCUControl):
401         def run(self, node_port, dryrun):
402                 self.open(self.host, self.username)
403                 self.sendPassword(self.password)
404
405                 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
406                 self.ifThenSend("\r\n> ", str(node_port))
407                 # 4- Immediate Reboot             
408                 self.ifThenSend("\r\n> ", "4")
409
410                 if not dryrun:
411                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
412                                                         "YES\r\n",
413                                                         ExceptionSequence)
414                 else:
415                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
416                                                         "", ExceptionSequence)
417                 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
418
419                 self.close()
420                 return 0
421
422 class APCBerlin(PCUControl):
423         def run(self, node_port, dryrun):
424                 self.open(self.host, self.username)
425                 self.sendPassword(self.password)
426
427                 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
428                 self.ifThenSend("\r\n> ", "2")
429                 self.ifThenSend("\r\n> ", "1")
430                 self.ifThenSend("\r\n> ", str(node_port))
431                 # 3- Immediate Reboot             
432                 self.ifThenSend("\r\n> ", "3")
433
434                 if not dryrun:
435                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
436                                                         "YES\r\n",
437                                                         ExceptionSequence)
438                 else:
439                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
440                                                         "", ExceptionSequence)
441                 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
442
443                 self.close()
444                 return 0
445
446 class APCFolsom(PCUControl):
447         def run(self, node_port, dryrun):
448                 self.open(self.host, self.username)
449                 self.sendPassword(self.password)
450
451                 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
452                 self.ifThenSend("\r\n> ", "2")
453                 self.ifThenSend("\r\n> ", "1")
454                 self.ifThenSend("\r\n> ", str(node_port))
455                 self.ifThenSend("\r\n> ", "1")
456
457                 # 3- Immediate Reboot             
458                 self.ifThenSend("\r\n> ", "3")
459
460                 if not dryrun:
461                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
462                                                         "YES\r\n",
463                                                         ExceptionSequence)
464                 else:
465                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
466                                                         "", ExceptionSequence)
467                 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
468
469                 self.close()
470                 return 0
471
472 class APCMaster(PCUControl):
473         def run(self, node_port, dryrun):
474                 self.open(self.host, self.username)
475                 self.sendPassword(self.password)
476
477                 # 1- Device Manager
478                 self.ifThenSend("\r\n> ", "1", ExceptionPassword)
479                 # 3- Outlet Control/Config
480                 self.ifThenSend("\r\n> ", "3")
481                 # n- Outlet n
482                 self.ifThenSend("\r\n> ", str(node_port))
483                 # 1- Control Outlet
484                 self.ifThenSend("\r\n> ", "1")
485                 # 3- Immediate Reboot             
486                 self.ifThenSend("\r\n> ", "3")
487
488                 if not dryrun:
489                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
490                                                         "YES\r\n",
491                                                         ExceptionSequence)
492                 else:
493                         self.ifThenSend("Enter 'YES' to continue or <ENTER> to cancel", 
494                                                         "", ExceptionSequence)
495                 self.ifThenSend("Press <ENTER> to continue...", "", ExceptionSequence)
496
497                 self.close()
498                 return 0
499
500 class APC(PCUControl):
501         def __init__(self, plc_pcu_record, verbose):
502                 PCUControl.__init__(self, plc_pcu_record, verbose)
503
504                 self.master = APCMaster(plc_pcu_record, verbose)
505                 self.folsom = APCFolsom(plc_pcu_record, verbose)
506                 self.europe = APCEurope(plc_pcu_record, verbose)
507
508         def run(self, node_port, dryrun):
509                 try_again = True
510                 sleep_time = 1
511
512                 for pcu in [self.master, self.europe, self.folsom]:
513                         if try_again:
514                                 try:
515                                         print "-*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"
516                                         try_again = False
517                                         print "sleeping 5"
518                                         time.sleep(sleep_time)
519                                         ret = pcu.reboot(node_port, dryrun)
520                                 except ExceptionSequence, err:
521                                         del pcu
522                                         sleep_time = 130
523                                         try_again = True
524
525                 if try_again:
526                         return "Unknown reboot sequence for APC PCU"
527                 else:
528                         return ret
529
530 class DRACRacAdm(PCUControl):
531         def run(self, node_port, dryrun):
532
533                 print "trying racadm_reboot..."
534                 racadm_reboot(self.host, self.username, self.password, node_port, dryrun)
535
536                 return 0
537
538 class DRAC(PCUControl):
539         def run(self, node_port, dryrun):
540                 self.open(self.host, self.username)
541                 self.sendPassword(self.password)
542
543                 print "logging in..."
544                 self.transport.write("\r\n")
545                 # Testing Reboot ?
546                 if dryrun:
547                         self.ifThenSend("[%s]#" % self.username, "getsysinfo")
548                 else:
549                         # Reset this machine
550                         self.ifThenSend("[%s]#" % self.username, "serveraction powercycle")
551
552                 self.ifThenSend("[%s]#" % self.username, "exit")
553
554                 self.close()
555                 return 0
556
557 class HPiLO(PCUControl):
558         def run(self, node_port, dryrun):
559                 self.open(self.host, self.username)
560                 self.sendPassword(self.password)
561
562                 # </>hpiLO-> 
563                 self.ifThenSend("</>hpiLO->", "cd system1")
564
565                 # Reboot Outlet  N        (Y/N)?
566                 if dryrun:
567                         self.ifThenSend("</system1>hpiLO->", "POWER")
568                 else:
569                         # Reset this machine
570                         self.ifThenSend("</system1>hpiLO->", "reset")
571
572                 self.ifThenSend("</system1>hpiLO->", "exit")
573
574                 self.close()
575                 return 0
576
577                 
578 class HPiLOHttps(PCUControl):
579         def run(self, node_port, dryrun):
580
581                 cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p %s" % (
582                                         self.host, "iloxml/Get_Network.xml", 
583                                         self.username, self.password)
584                 p_ilo  = Popen(cmd, stdout=PIPE, shell=True)
585                 cmd2 = "grep 'MESSAGE' | grep -v 'No error'"
586                 p_grep = Popen(cmd2, stdin=p_ilo.stdout, stdout=PIPE, stderr=PIPE, shell=True)
587                 sout, serr = p_grep.communicate()
588
589                 p_ilo.wait()
590                 p_grep.wait()
591                 if sout.strip() != "":
592                         print "sout: %s" % sout.strip()
593                         return sout.strip()
594
595                 if not dryrun:
596                         cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p %s" % (
597                                         self.host, "iloxml/Reset_Server.xml", 
598                                         self.username, self.password)
599                         p_ilo = Popen(cmd, stdin=PIPE, stdout=PIPE, shell=True)
600                         cmd2 = "grep 'MESSAGE' | grep -v 'No error'"
601                         p_grep = Popen(cmd2, stdin=p_ilo.stdout, stdout=PIPE, stderr=PIPE)
602                         sout, serr = p_grep.communicate()
603                         p_ilo.wait()
604                         p_grep.wait()
605
606                         if sout.strip() != "":
607                                 print "sout: %s" % sout.strip()
608                                 return sout.strip()
609
610                 return 0
611
612 class BayTechGeorgeTown(PCUControl):
613         def run(self, node_port, dryrun):
614                 self.open(self.host, self.username, None, "Enter user name:")
615                 self.sendPassword(self.password, "Enter Password:")
616
617                 #self.ifThenSend("RPC-16>", "Status")
618
619                 self.ifThenSend("RPC-16>", "Reboot %d" % node_port)
620
621                 # Reboot Outlet  N        (Y/N)?
622                 if dryrun:
623                         self.ifThenSend("(Y/N)?", "N")
624                 else:
625                         self.ifThenSend("(Y/N)?", "Y")
626                 self.ifThenSend("RPC-16>", "")
627
628                 self.close()
629                 return 0
630
631 class BayTechCtrlC(PCUControl):
632         """
633                 For some reason, these units let you log in fine, but they hang
634                 indefinitely, unless you send a Ctrl-C after the password.  No idea
635                 why.
636         """
637         def run(self, node_port, dryrun):
638                 print "BayTechCtrlC %s" % self.host
639                 self.open(self.host, self.username)
640                 self.sendPassword(self.password)
641
642                 #self.transport.write('\ 3')
643                 self.transport.write("\r\n")
644                 self.transport.write(pyssh.CTRL_C)
645                 #self.transport.write(chr(3))
646                 #self.transport.write(chr(24))
647                 #self.transport.write(chr(26))
648                 #self.transport.write('\18')
649                 # Control Outlets  (5 ,1).........5
650                 self.ifThenSend("Enter Request :", "5")
651
652                 # Reboot N
653                 try:
654                         self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
655                 except ExceptionNotFound, msg:
656                         # one machine is configured to ask for a username,
657                         # even after login...
658                         print "msg: %s" % msg
659                         self.transport.write(self.username + "\r\n")
660                         self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
661                         
662
663                 # Reboot Outlet  N        (Y/N)?
664                 if dryrun:
665                         self.ifThenSend("(Y/N)?", "N")
666                 else:
667                         self.ifThenSend("(Y/N)?", "Y")
668                 self.ifThenSend("DS-RPC>", "")
669
670                 self.close()
671                 return 0
672
673 class BayTech(PCUControl):
674         def run(self, node_port, dryrun):
675                 self.open(self.host, self.username)
676                 self.sendPassword(self.password)
677
678                 # Control Outlets  (5 ,1).........5
679                 self.ifThenSend("Enter Request :", "5")
680
681                 # Reboot N
682                 try:
683                         self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
684                 except ExceptionNotFound, msg:
685                         # one machine is configured to ask for a username,
686                         # even after login...
687                         print "msg: %s" % msg
688                         self.transport.write(self.username + "\r\n")
689                         self.ifThenSend("DS-RPC>", "Reboot %d" % node_port)
690                         
691
692                 # Reboot Outlet  N        (Y/N)?
693                 if dryrun:
694                         self.ifThenSend("(Y/N)?", "N")
695                 else:
696                         self.ifThenSend("(Y/N)?", "Y")
697                 self.ifThenSend("DS-RPC>", "")
698
699                 self.close()
700                 return 0
701
702 class ePowerSwitchGood(PCUControl):
703         # NOTE:
704         #               The old code used Python's HTTPPasswordMgrWithDefaultRealm()
705         #               For some reason this both doesn't work and in some cases, actually
706         #               hangs the PCU.  Definitely not what we want.
707         #               
708         #               The code below is much simpler.  Just letting things fail first,
709         #               and then, trying again with authentication string in the header.
710         #               
711         def run(self, node_port, dryrun):
712                 self.transport = None
713                 self.url = "http://%s:%d/" % (self.host,80)
714                 uri = "%s:%d" % (self.host,80)
715
716                 req = urllib2.Request(self.url)
717                 try:
718                         handle = urllib2.urlopen(req)
719                 except IOError, e:
720                         # NOTE: this is expected to fail initially
721                         pass
722                 else:
723                         print self.url
724                         print "-----------"
725                         print handle.read()
726                         print "-----------"
727                         return "ERROR: not protected by HTTP authentication"
728
729                 if not hasattr(e, 'code') or e.code != 401:
730                         return "ERROR: failed for: %s" % str(e)
731
732                 base64data = base64.encodestring("%s:%s" % (self.username, self.password))[:-1]
733                 # NOTE: assuming basic realm authentication.
734                 authheader = "Basic %s" % base64data
735                 req.add_header("Authorization", authheader)
736
737                 try:
738                         f = urllib2.urlopen(req)
739                 except IOError, e:
740                         # failing here means the User/passwd is wrong (hopefully)
741                         raise ExceptionPassword("Incorrect username/password")
742
743                 # TODO: after verifying that the user/password is correct, we should
744                 # actually reboot the given node.
745
746                 if not dryrun:
747                         # add data to handler,
748                         # fetch url one more time on cmd.html, econtrol.html or whatever.
749                         pass
750
751                 if self.verbose: print f.read()
752
753                 self.close()
754                 return 0
755
756
757 class ePowerSwitchOld(PCUControl):
758         def run(self, node_port, dryrun):
759                 self.url = "http://%s:%d/" % (self.host,80)
760                 uri = "%s:%d" % (self.host,80)
761
762                 # create authinfo
763                 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
764                 authinfo.add_password (None, uri, self.username, self.password)
765                 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
766
767                 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
768                 transport = urllib2.build_opener(authinfo)
769                 f = transport.open(self.url)
770                 if self.verbose: print f.read()
771
772                 if not dryrun:
773                         transport = urllib2.build_opener(authhandler)
774                         f = transport.open(self.url + "cmd.html", "P%d=r" % node_port)
775                         if self.verbose: print f.read()
776
777                 self.close()
778                 return 0
779
780 class ePowerSwitch(PCUControl):
781         def run(self, node_port, dryrun):
782                 self.url = "http://%s:%d/" % (self.host,80)
783                 uri = "%s:%d" % (self.host,80)
784
785                 # TODO: I'm still not sure what the deal is here.
786                 #               two independent calls appear to need to be made before the
787                 #               reboot will succeed.  It doesn't seem to be possible to do
788                 #               this with a single call.  I have no idea why.
789
790                 # create authinfo
791                 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
792                 authinfo.add_password (None, uri, self.username, self.password)
793                 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
794
795                 # NOTE: it doesn't seem to matter whether this authinfo is here or not.
796                 transport = urllib2.build_opener()
797                 f = transport.open(self.url + "elogin.html", "pwd=%s" % self.password)
798                 if self.verbose: print f.read()
799
800                 if not dryrun:
801                         transport = urllib2.build_opener(authhandler)
802                         f = transport.open(self.url + "econtrol.html", "P%d=r" % node_port)
803                         if self.verbose: print f.read()
804
805                 #       data= "P%d=r" % node_port
806                 #self.open(self.host, self.username, self.password)
807                 #self.sendHTTP("elogin.html", "pwd=%s" % self.password)
808                 #self.sendHTTP("econtrol.html", data)
809                 #self.sendHTTP("cmd.html", data)
810
811                 self.close()
812                 return 0
813                 
814
815 ### rebooting european BlackBox PSE boxes
816 # Thierry Parmentelat - May 11 2005
817 # tested on 4-ports models known as PSE505-FR
818 # uses http to POST a data 'P<port>=r'
819 # relies on basic authentication within http1.0
820 # first curl-based script was
821 # curl --http1.0 --basic --user <username>:<password> --data P<port>=r \
822 #       http://<hostname>:<http_port>/cmd.html && echo OK
823
824 def bbpse_reboot (pcu_ip,username,password,port_in_pcu,http_port, dryrun):
825
826         global verbose
827
828         url = "http://%s:%d/cmd.html" % (pcu_ip,http_port)
829         data= "P%d=r" % port_in_pcu
830         if verbose:
831                 logger.debug("POSTing '%s' on %s" % (data,url))
832
833         authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
834         uri = "%s:%d" % (pcu_ip,http_port)
835         authinfo.add_password (None, uri, username, password)
836         authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
837
838         opener = urllib2.build_opener(authhandler)
839         urllib2.install_opener(opener)
840
841         if (dryrun):
842                 return 0
843
844         try:
845                 f = urllib2.urlopen(url,data)
846
847                 r= f.read()
848                 if verbose:
849                         logger.debug(r)
850                 return 0
851
852         except urllib2.URLError,err:
853                 logger.info('Could not open http connection', err)
854                 return "bbpse error"
855
856 ### rebooting x10toggle based systems addressed by port
857 # Marc E. Fiuczynski - May 31 2005
858 # tested on 4-ports models known as PSE505-FR
859 # uses ssh and password to login to an account
860 # that will cause the system to be powercycled.
861
862 def x10toggle_reboot(ip, username, password, port, dryrun):
863         global verbose
864
865         ssh = None
866         try:
867                 ssh = pyssh.Ssh(username, ip)
868                 ssh.open()
869
870                 # Login
871                 telnet_answer(ssh, "password:", password)
872
873                 if not dryrun:
874                         # Reboot
875                         telnet_answer(ssh, "x10toggle>", "A%d" % port)
876
877                 # Close
878                 output = ssh.close()
879                 if verbose:
880                         logger.debug(output)
881                 return 0
882
883         except Exception, err:
884                 if verbose:
885                         logger.debug(err)
886                 if ssh:
887                         output = ssh.close()
888                         if verbose:
889                                 logger.debug(output)
890                 return errno.ETIMEDOUT
891
892 ### rebooting Dell systems via RAC card
893 # Marc E. Fiuczynski - June 01 2005
894 # tested with David Lowenthal's itchy/scratchy nodes at UGA
895 #
896
897 def runcmd(command, args, username, password, timeout = None):
898
899         result = [None]
900         result_ready = threading.Condition()
901
902         def set_result(x):
903
904                 result_ready.acquire()
905                 try:
906                         result[0] = x
907                 finally:
908                         result_ready.notify()
909                         result_ready.release()
910
911         def do_command(command, username, password):
912
913                 try:
914                         # Popen4 is a popen-type class that combines stdout and stderr
915                         p = popen2.Popen4(command)
916
917                         # read all output data
918                         p.tochild.write("%s\n" % username)
919                         p.tochild.write("%s\n" % password)
920                         p.tochild.close()
921                         data = p.fromchild.read()
922
923                         while True:
924                                 # might get interrupted by a signal in poll() or waitpid()
925                                 try:
926                                         retval = p.wait()
927                                         set_result((retval, data))
928                                         break
929                                 except OSError, ex:
930                                         if ex.errno == errno.EINTR:
931                                                 continue
932                                         raise ex
933                 except Exception, ex:
934                         set_result(ex)
935
936         if args:
937                 command = " ".join([command] + args)
938
939         worker = threading.Thread(target = do_command, args = (command, username, password, ))
940         worker.setDaemon(True)
941         result_ready.acquire()
942         worker.start()
943         result_ready.wait(timeout)
944         try:
945                 if result == [None]:
946                         raise Exception, "command timed-out: '%s'" % command
947         finally:
948                 result_ready.release()
949         result = result[0]
950
951         if isinstance(result, Exception):
952                 raise result
953         else:
954                 (retval, data) = result
955                 if os.WIFEXITED(retval) and os.WEXITSTATUS(retval) == 0:
956                         return data
957                 else:
958                         out = "system command ('%s') " % command
959                         if os.WIFEXITED(retval):
960                                 out += "failed, rc = %d" % os.WEXITSTATUS(retval)
961                         else:
962                                 out += "killed by signal %d" % os.WTERMSIG(retval)
963                         if data:
964                                 out += "; output follows:\n" + data
965                         raise Exception, out
966
967 def racadm_reboot(ip, username, password, port, dryrun):
968         global verbose
969
970         try:
971                 cmd = "/usr/sbin/racadm"
972                 os.stat(cmd)
973                 if not dryrun:
974                         output = runcmd(cmd, ["-r %s -i serveraction powercycle" % ip],
975                                 username, password)
976                 else:
977                         output = runcmd(cmd, ["-r %s -i getsysinfo" % ip],
978                                 username, password)
979
980                 print "RUNCMD: %s" % output
981                 if verbose:
982                         logger.debug(output)
983                 return 0
984
985         except Exception, err:
986                 logger.debug("runcmd raised exception %s" % err)
987                 if verbose:
988                         logger.debug(err)
989                 return -1
990
991 def pcu_name(pcu):
992         if pcu['hostname'] is not None and pcu['hostname'] is not "":
993                 return pcu['hostname']
994         elif pcu['ip'] is not None and pcu['ip'] is not "":
995                 return pcu['ip']
996         else:
997                 return None
998
999 def get_pcu_values(pcu_id):
1000         # TODO: obviously, this shouldn't be loaded each time...
1001         import soltesz
1002         fb =soltesz.dbLoad("findbadpcus")
1003
1004         try:
1005                 values = fb['nodes']["id_%s" % pcu_id]['values']
1006         except:
1007                 values = None
1008
1009         return values
1010
1011 def check_open_port(values, port_list):
1012         ret = False
1013
1014         if 'portstatus' in values:
1015                 for port in port_list:
1016                         if      port in values['portstatus'] and \
1017                                 values['portstatus'][port] == "open":
1018
1019                                 ret = True
1020         
1021         return ret
1022         
1023 def reboot_policy(nodename, continue_probe, dryrun):
1024         global verbose
1025
1026         pcu = plc.getpcu(nodename)
1027         if not pcu:
1028                 return False # "%s has no pcu" % nodename
1029
1030         values = get_pcu_values(pcu['pcu_id'])
1031         if values == None:
1032                 return False #"no info for pcu_id %s" % pcu['pcu_id']
1033         
1034         # Try the PCU first
1035         logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
1036
1037         ret = reboot_test(nodename, values, continue_probe, verbose, dryrun)
1038
1039         if rb_ret != 0:
1040                 return False
1041         else:
1042                 return True
1043
1044 def reboot_test(nodename, values, continue_probe, verbose, dryrun):
1045         rb_ret = ""
1046
1047         try:
1048                 # DataProbe iPal (many sites)
1049                 if  continue_probe and values['model'].find("Dataprobe IP-41x/IP-81x") >= 0:
1050                         ipal = IPAL(values, verbose, ['23'])
1051                         rb_ret = ipal.reboot(values[nodename], dryrun)
1052                                 
1053                 # APC Masterswitch (Berkeley)
1054                 elif continue_probe and values['model'].find("APC AP79xx/Masterswitch") >= 0:
1055
1056                         # TODO: make a more robust version of APC
1057                         if values['pcu_id'] in [1163,1055,1111,1231,1113,1127,1128,1148]:
1058                                 apc = APCEurope(values, verbose, ['22', '23'])
1059                                 rb_ret = apc.reboot(values[nodename], dryrun)
1060
1061                         elif values['pcu_id'] in [1110,86]:
1062                                 apc = APCBrazil(values, verbose, ['22', '23'])
1063                                 rb_ret = apc.reboot(values[nodename], dryrun)
1064
1065                         elif values['pcu_id'] in [1221]:
1066                                 apc = APCBerlin(values, verbose, ['22', '23'])
1067                                 rb_ret = apc.reboot(values[nodename], dryrun)
1068
1069                         elif values['pcu_id'] in [1173,1221,1220,1225]:
1070                                 apc = APCFolsom(values, verbose, ['22', '23'])
1071                                 rb_ret = apc.reboot(values[nodename], dryrun)
1072
1073                         else:
1074                                 apc = APCMaster(values, verbose, ['22', '23'])
1075                                 rb_ret = apc.reboot(values[nodename], dryrun)
1076
1077                 # BayTech DS4-RPC
1078                 elif continue_probe and values['model'].find("Baytech DS4-RPC") >= 0:
1079                         if values['pcu_id'] in [1041,1209,1025,1052,1057]:
1080                                 # These  require a 'ctrl-c' to be sent... 
1081                                 baytech = BayTechCtrlC(values, verbose, ['22', '23'])
1082                                 rb_ret = baytech.reboot(values[nodename], dryrun)
1083
1084                         elif values['pcu_id'] in [1012]:
1085                                 # This pcu sometimes doesn't present the 'Username' prompt,
1086                                 # unless you immediately try again...
1087                                 try:
1088                                         baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1089                                         rb_ret = baytech.reboot(values[nodename], dryrun)
1090                                 except:
1091                                         baytech = BayTechGeorgeTown(values, verbose, ['22', '23'])
1092                                         rb_ret = baytech.reboot(values[nodename], dryrun)
1093                         else:
1094                                 baytech = BayTech(values, verbose, ['22', '23'])
1095                                 rb_ret = baytech.reboot(values[nodename], dryrun)
1096
1097                 # iLO
1098                 elif continue_probe and values['model'].find("HP iLO") >= 0:
1099                         hpilo = HPiLO(values, verbose, ['22'])
1100                         rb_ret = hpilo.reboot(0, dryrun)
1101                         if rb_ret != 0:
1102                                 hpilo = HPiLOHttps(values, verbose, ['443'])
1103                                 rb_ret = hpilo.reboot(0, dryrun)
1104
1105                 # DRAC ssh
1106                 elif continue_probe and values['model'].find("Dell RAC") >= 0:
1107                         # TODO: I don't think DRACRacAdm will throw an exception for the
1108                         # default method to catch...
1109                         try:
1110                                 drac = DRACRacAdm(values, verbose, ['443', '5869'])
1111                                 rb_ret = drac.reboot(0, dryrun)
1112                         except:
1113                                 drac = DRAC(values, verbose, ['22'])
1114                                 rb_ret = drac.reboot(0, dryrun)
1115
1116                 # BlackBox PSExxx-xx (e.g. PSE505-FR)
1117                 elif continue_probe and \
1118                         (values['model'].find("BlackBox PS5xx") >= 0 or
1119                          values['model'].find("ePowerSwitch 1/4/8x") >=0 ):
1120
1121                         # TODO: allow a different port than http 80.
1122                         if values['pcu_id'] in [1089, 1071, 1046, 1035, 1118]:
1123                                 eps = ePowerSwitchGood(values, verbose, ['80'])
1124                         elif values['pcu_id'] in [1003]:
1125                                 eps = ePowerSwitch(values, verbose, ['80'])
1126                         else:
1127                                 eps = ePowerSwitchGood(values, verbose, ['80'])
1128
1129                         rb_ret = eps.reboot(values[nodename], dryrun)
1130
1131                 elif continue_probe:
1132                         rb_ret = "Unsupported_PCU"
1133
1134                 elif continue_probe == False:
1135                         if 'portstatus' in values:
1136                                 rb_ret = "NetDown"
1137                         else:
1138                                 rb_ret = "Not_Run"
1139                 else:
1140                         rb_ret = -1
1141
1142         except ExceptionPort, err:
1143                 rb_ret = str(err)
1144
1145         return rb_ret
1146         # ????
1147         #elif continue_probe and values['protocol'] == "racadm" and \
1148         #               values['model'] == "RAC":
1149         #       rb_ret = racadm_reboot(pcu_name(values),
1150         #                                                                 values['username'],
1151         #                                                                 values['password'],
1152         #                                                                 pcu[nodename],
1153         #                                                                 dryrun)
1154
1155 # Returns true if rebooted via PCU
1156 def reboot_old(nodename, dryrun):
1157         pcu = plc.getpcu(nodename)
1158         if not pcu:
1159                 plc.nodePOD(nodename)
1160                 return False
1161         # Try the PCU first
1162         logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
1163
1164         # APC Masterswitch (Berkeley)
1165         if pcu['model'] == "APC Masterswitch":
1166                 err = apc_reboot(pcu['ip'], pcu['username'],pcu['password'], 
1167                                 pcu[nodename], pcu['protocol'], dryrun)
1168
1169         # DataProbe iPal (many sites)
1170         elif pcu['protocol'] == "telnet" and pcu['model'].find("IP-4") >= 0:
1171                 err = ipal_reboot(pcu['ip'],pcu['password'], pcu[nodename], dryrun)
1172
1173         # BayTech DS4-RPC
1174         elif pcu['protocol'] == "ssh" and \
1175         (pcu['model'].find("Baytech") >= 0 or pcu['model'].find("DS4") >= 0):
1176                 err = baytech_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu[nodename], dryrun)
1177
1178         # BlackBox PSExxx-xx (e.g. PSE505-FR)
1179         elif pcu['protocol'] == "http" and (pcu['model'] == "bbpse"):
1180                 err = bbpse_reboot(pcu['ip'], pcu['username'], pcu['password'], pcu[nodename],80, dryrun)
1181
1182         # x10toggle
1183         elif pcu['protocol'] == "ssh" and (pcu['model'] == "x10toggle"):
1184                 err = x10toggle_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu[nodename], dryrun)
1185
1186         # 
1187         elif pcu['protocol'] == "racadm" and (pcu['model'] == "RAC"):
1188                 err = racadm_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu_[nodename], dryrun)
1189
1190         # Unknown or unsupported
1191         else:
1192                 err = errno.EPROTONOSUPPORT
1193                 return False
1194         return True 
1195
1196
1197 def main():
1198         logger.setLevel(logging.DEBUG)
1199         ch = logging.StreamHandler()
1200         ch.setLevel(logging.DEBUG)
1201         formatter = logging.Formatter('LOGGER - %(message)s')
1202         ch.setFormatter(formatter)
1203         logger.addHandler(ch)
1204
1205         try:
1206                 reboot("planetlab2.cs.uchicago.edu")
1207                 reboot("alice.cs.princeton.edu")
1208         except Exception, err:
1209                 print err
1210
1211 if __name__ == '__main__':
1212         import plc
1213         logger = logging.getLogger("monitor")
1214         main()