updated so that plc.py can be used also nicely from the command line
[monitor.git] / reboot.py
1 #!/usr/bin/python
2 #
3 # Reboot specified nodes
4 #
5
6 import getpass, getopt
7 import os, sys
8 import xml, xmlrpclib
9 import errno, time, traceback
10 import urllib2
11 import threading, popen2
12 import array, struct
13 from socket import *
14 import plc
15
16 # Use our versions of telnetlib and pyssh
17 sys.path.insert(0, os.path.dirname(sys.argv[0]))
18 import telnetlib
19 sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")    
20 import pyssh
21
22 # Timeouts in seconds
23 TELNET_TIMEOUT = 20
24
25 # Event class ID from pcu events
26 #NODE_POWER_CONTROL = 3
27
28 # Monitor user ID
29 #MONITOR_USER_ID = 11142
30
31 import logging
32 logger = logging.getLogger("monitor")
33 verbose = 1
34 dryrun = 0;
35
36 def telnet_answer(telnet, expected, buffer):
37         global verbose
38
39         output = telnet.read_until(expected, TELNET_TIMEOUT)
40         if verbose:
41                 logger.debug(output)
42         if output.find(expected) == -1:
43                 raise Exception, "'%s' not found" % expected
44         else:
45                 telnet.write(buffer + "\r\n")
46
47
48 def ipal_reboot(ip, password, port):
49         global dryrun, verbose
50
51         telnet = None
52
53         try:
54                 telnet = telnetlib.Telnet(ip, timeout=TELNET_TIMEOUT)
55                 telnet.set_debuglevel(verbose)
56
57                 # XXX Some iPals require you to hit Enter a few times first
58                 telnet_answer(telnet, "Password >", "\r\n\r\n")
59
60                 # Login
61                 telnet_answer(telnet, "Password >", password)
62
63                 # P# - Pulse relay
64                 if not dryrun:
65                         telnet_answer(telnet, "Enter >", "P%d" % port)
66
67                 telnet.read_until("Enter >", TELNET_TIMEOUT)
68
69                 # Close
70                 telnet.close()
71                 return 0
72
73         except EOFError, err:
74                 if verbose:
75                         logger.debug(err)
76                 telnet.close()
77                 return errno.ECONNRESET
78         except Exception, err:
79                 if verbose:
80                         logger.debug(err)
81                 if telnet:
82                         telnet.close()
83                 return errno.ETIMEDOUT
84
85
86 def apc_reboot(ip, username, password, port):
87         global dryrun, verbose
88
89         telnet = None
90
91         try:
92                 telnet = telnetlib.Telnet(ip, timeout=TELNET_TIMEOUT)
93                 telnet.set_debuglevel(verbose)
94
95                 # Login
96                 telnet_answer(telnet, "User Name", username)
97                 telnet_answer(telnet, "Password", password)
98
99                 # 1- Device Manager
100                 # 2- Network
101                 # 3- System
102                 # 4- Logout
103
104                 # 1- Device Manager
105                 telnet_answer(telnet, "\r\n> ", "1")
106
107                 # 1- Phase Monitor/Configuration
108                 # 2- Outlet Restriction Configuration
109                 # 3- Outlet Control/Config
110                 # 4- Power Supply Status
111
112                 # 3- Outlet Control/Config
113                 telnet_answer(telnet, "\r\n> ", "3")
114
115                 # 1- Outlet 1
116                 # 2- Outlet 2
117                 # ...
118
119                 # n- Outlet n
120                 telnet_answer(telnet, "\r\n> ", str(port))
121                 
122                 # 1- Control Outlet
123                 # 2- Configure Outlet
124
125                 # 1- Control Outlet
126                 telnet_answer(telnet, "\r\n> ", "1")
127
128                 # 1- Immediate On                         
129                 # 2- Immediate Off                       
130                 # 3- Immediate Reboot             
131                 # 4- Delayed On                         
132                 # 5- Delayed Off                           
133                 # 6- Delayed Reboot                     
134                 # 7- Cancel                                     
135
136                 # 3- Immediate Reboot             
137                 telnet_answer(telnet, "\r\n> ", "3")
138
139                 if not dryrun:
140                         telnet_answer(telnet, 
141                                 "Enter 'YES' to continue or <ENTER> to cancel", "YES\r\n")
142                         telnet_answer(telnet, 
143                                 "Press <ENTER> to continue...", "")
144
145                 # Close
146                 telnet.close()
147                 return 0
148
149         except EOFError, err:
150                 if verbose:
151                         logger.debug(err)
152                 if telnet:
153                         telnet.close()
154                 return errno.ECONNRESET
155         except Exception, err:
156                 if verbose:
157                         logger.debug(err)
158                 if telnet:
159                         telnet.close()
160                 return errno.ETIMEDOUT
161
162
163 def baytech_reboot(ip, username, password, port):
164         global dryrun, verbose
165
166         ssh = None
167
168         try:
169                 ssh = pyssh.Ssh(username, ip)
170                 ssh.open()
171
172                 # Login
173                 telnet_answer(ssh, "password:", password)
174
175                 # PL1 comm output  (2 ,1).........1
176                 # PL2 comm output  (2 ,2).........2
177                 # PL3 comm output  (2 ,3).........3
178                 # no machine       (2 ,4).........4
179                 # Control Outlets  (5 ,1).........5
180                 # Logout..........................T
181
182                 # Control Outlets  (5 ,1).........5
183                 telnet_answer(ssh, "Enter Request :", "5")
184
185                 # Reboot N
186                 telnet_answer(ssh, "DS-RPC>", "Reboot %d" % port)
187
188                 # Reboot Outlet  N        (Y/N)?
189                 if dryrun:
190                         telnet_answer(ssh, "(Y/N)?", "N")
191                 else:
192                         telnet_answer(ssh, "(Y/N)?", "Y")
193                 telnet_answer(ssh, "DS-RPC>", "")
194
195                 # Close
196                 output = ssh.close()
197                 if verbose:
198                         logger.debug(err)
199                 return 0
200
201         except Exception, err:
202                 if verbose:
203                         logger.debug(err)
204                 if ssh:
205                         output = ssh.close()
206                         if verbose:
207                                 logger.debug(err)
208                 return errno.ETIMEDOUT
209
210 ### rebooting european BlackBox PSE boxes
211 # Thierry Parmentelat - May 11 2005
212 # tested on 4-ports models known as PSE505-FR
213 # uses http to POST a data 'P<port>=r'
214 # relies on basic authentication within http1.0
215 # first curl-based script was
216 # curl --http1.0 --basic --user <username>:<password> --data P<port>=r \
217 #       http://<hostname>:<http_port>/cmd.html && echo OK
218
219 def bbpse_reboot (pcu_ip,username,password,port_in_pcu,http_port):
220
221         global dryrun, verbose
222
223         url = "http://%s:%d/cmd.html" % (pcu_ip,http_port)
224         data= "P%d=r" % port_in_pcu
225         if verbose:
226                 logger.debug("POSTing '%s' on %s" % (data,url))
227
228         authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
229         uri = "%s:%d" % (pcu_ip,http_port)
230         authinfo.add_password (None, uri, username, password)
231         authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
232
233         opener = urllib2.build_opener(authhandler)
234         urllib2.install_opener(opener)
235
236         if (dryrun):
237                 return 0
238
239         try:
240                 f = urllib2.urlopen(url,data)
241
242                 r= f.read()
243                 if verbose:
244                         logger.debug(r)
245                 return 0
246
247         except urllib2.URLError,err:
248                 logger.info('Could not open http connection', err)
249                 return -1
250
251 ### rebooting x10toggle based systems addressed by port
252 # Marc E. Fiuczynski - May 31 2005
253 # tested on 4-ports models known as PSE505-FR
254 # uses ssh and password to login to an account
255 # that will cause the system to be powercycled.
256
257 def x10toggle_reboot(ip, username, password, port):
258         global dryrun, verbose
259
260         ssh = None
261         try:
262                 ssh = pyssh.Ssh(username, ip)
263                 ssh.open()
264
265                 # Login
266                 telnet_answer(ssh, "password:", password)
267
268                 if not dryrun:
269                         # Reboot
270                         telnet_answer(ssh, "x10toggle>", "A%d" % port)
271
272                 # Close
273                 output = ssh.close()
274                 if verbose:
275                         logger.debug(output)
276                 return 0
277
278         except Exception, err:
279                 if verbose:
280                         logger.debug(err)
281                 if ssh:
282                         output = ssh.close()
283                         if verbose:
284                                 logger.debug(output)
285                 return errno.ETIMEDOUT
286
287 ### rebooting Dell systems via RAC card
288 # Marc E. Fiuczynski - June 01 2005
289 # tested with David Lowenthal's itchy/scratchy nodes at UGA
290 #
291
292 def runcmd(command, args, username, password, timeout = None):
293
294         result = [None]
295         result_ready = threading.Condition()
296
297         def set_result(x):
298
299                 result_ready.acquire()
300                 try:
301                         result[0] = x
302                 finally:
303                         result_ready.notify()
304                         result_ready.release()
305
306         def do_command(command, username, password):
307
308                 try:
309                         # Popen4 is a popen-type class that combines stdout and stderr
310                         p = popen2.Popen4(command)
311
312                         # read all output data
313                         p.tochild.write("%s\n" % username)
314                         p.tochild.write("%s\n" % password)
315                         p.tochild.close()
316                         data = p.fromchild.read()
317
318                         while True:
319                                 # might get interrupted by a signal in poll() or waitpid()
320                                 try:
321                                         retval = p.wait()
322                                         set_result((retval, data))
323                                         break
324                                 except OSError, ex:
325                                         if ex.errno == errno.EINTR:
326                                                 continue
327                                         raise ex
328                 except Exception, ex:
329                         set_result(ex)
330
331         if args:
332                 command = " ".join([command] + args)
333
334         worker = threading.Thread(target = do_command, args = (command, username, password, ))
335         worker.setDaemon(True)
336         result_ready.acquire()
337         worker.start()
338         result_ready.wait(timeout)
339         try:
340                 if result == [None]:
341                         raise Exception, "command timed-out: '%s'" % command
342         finally:
343                 result_ready.release()
344         result = result[0]
345
346         if isinstance(result, Exception):
347                 raise result
348         else:
349                 (retval, data) = result
350                 if os.WIFEXITED(retval) and os.WEXITSTATUS(retval) == 0:
351                         return data
352                 else:
353                         out = "system command ('%s') " % command
354                         if os.WIFEXITED(retval):
355                                 out += "failed, rc = %d" % os.WEXITSTATUS(retval)
356                         else:
357                                 out += "killed by signal %d" % os.WTERMSIG(retval)
358                         if data:
359                                 out += "; output follows:\n" + data
360                         raise Exception, out
361
362 def racadm_reboot(ip, username, password, port):
363         global dryrun, verbose
364
365         try:
366                 cmd = "/usr/sbin/racadm"
367                 os.stat(cmd)
368                 if not dryrun:
369                         output = runcmd(cmd, ["-r %s -i serveraction powercycle" % ip],
370                                 username, password)
371                 else:
372                         output = "dryrun of racadm command"
373
374                 logger.debug("runcmd returned without output %s" % output)
375                 if verbose:
376                         logger.debug(output)
377                 return 0
378
379         except Exception, err:
380                 logger.debug("runcmd raised exception %s" % err)
381                 if verbose:
382                         logger.debug(err)
383                 return errno.ETIMEDOUT
384
385 # Returns true if rebooted via PCU
386 def reboot(nodename):
387         pcu = plc.getpcu([nodename])
388         if not pcu:
389                 plc.nodePOD([nodename])
390                 return False
391         # Try the PCU first
392         logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
393
394         # APC Masterswitch (Berkeley)
395         if pcu['protocol'] == "telnet" and pcu['model'] == "APC Masterswitch":
396                 err = apc_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu[nodename])
397
398         # DataProbe iPal (many sites)
399         elif pcu['protocol'] == "telnet" and pcu['model'].find("IP-4") >= 0:
400                 err = ipal_reboot(pcu['ip'],pcu['password'], pcu[nodename])
401
402         # BayTech DS4-RPC
403         elif pcu['protocol'] == "ssh" and \
404         (pcu['model'].find("Baytech") >= 0 or pcu['model'].find("DS4") >= 0):
405                 err = baytech_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu[nodename])
406
407         # BlackBox PSExxx-xx (e.g. PSE505-FR)
408         elif pcu['protocol'] == "http" and (pcu['model'] == "bbpse"):
409                 err = bbpse_reboot(pcu['ip'], pcu['username'], pcu['password'], pcu[nodename],80)
410
411         # x10toggle
412         elif pcu['protocol'] == "ssh" and (pcu['model'] == "x10toggle"):
413                 err = x10toggle_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu[nodename])
414
415         # x10toggle
416         elif pcu['protocol'] == "racadm" and (pcu['model'] == "RAC"):
417                 err = racadm_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu_[nodename])
418
419         # Unknown or unsupported
420         else:
421                 err = errno.EPROTONOSUPPORT
422                 return False
423         return True 
424
425 #def get_suggested(suggestion_id,db):
426 #
427 #       sql= """select node_id,pcu_id from nodes where suggestion = %d """\
428 #                       % (suggestion_id)
429 #       try:
430 #               nodes = db.query(sql).dictresult()
431 #       except pg.ProgrammingError, err:
432 #               print( "Database error for query: %s\n%s" % (sql,err) )
433 #               sys.exit(1)
434 #       return nodes
435
436 #def get_pcu_info(node_id,pcu_id,db):
437 #       sql= """select port_number from pcu_ports where node_id = %d and pcu_id = %d """\
438 #                       % (node_id,pcu_id)
439 #       try:
440 #          port_number = db.query(sql).dictresult()
441 #       except pg.ProgrammingError, err:
442 #               print( "Database error for query: %s\n%s" % (sql,err) )
443 #               sys.exit(1)
444 #       
445 #       sql= """select * from pcu where pcu_id = %d """\
446 #                       % (pcu_id)
447 #       try:
448 #               pcu = db.query(sql).dictresult()
449 #       except pg.ProgrammingError, err:
450 #               print( "Database error for query: %s\n%s" % (sql,err) )
451 #               sys.exit(1)
452 #
453 #       result = {'node_id':node_id,'pcu_id':pcu_id,'port_number':port_number[0]['port_number'], 
454 #                         'ip':pcu[0]['ip'],'username':pcu[0]['username'],'password':pcu[0]['password'],\
455 #                         'model':pcu[0]['model'],'protocol':pcu[0]['protocol'],'hostname':pcu[0]['hostname']}
456 #
457 #       return result
458
459 #def add_plc_event(node_id,err,db):
460 #       site_id = plc_db_utils.get_site_from_node_id(node_id,db)
461 #       message = "PCU reboot by monitor-msgs@planet-lab.org: %s" % os.strerror(err)
462 #
463 #       sql = """insert into events (event_class_id,message,person_id,node_id,site_id) values """\
464 #                 """(%d,'%s',%d,%d,%d)""" % (NODE_POWER_CONTROL,message,MONITOR_USER_ID,node_id,site_id)
465 #       print sql
466 #
467 #       try:
468 #               db.query(sql)
469 #       except pg.ProgrammingError, err:
470 #               print( "Database error for: %s\n%s" % (sql,err) )
471 #               sys.exit(1)
472
473
474 def main():
475         logger.setLevel(logging.DEBUG)
476         ch = logging.StreamHandler()
477         ch.setLevel(logging.DEBUG)
478         formatter = logging.Formatter('LOGGER - %(message)s')
479         ch.setFormatter(formatter)
480         logger.addHandler(ch)
481
482
483         try:
484                 reboot("planetlab2.cs.uchicago.edu")
485                 reboot("alice.cs.princeton.edu")
486         except Exception, err:
487                 print err
488         # used later for pretty printing
489 #       pp = pprint.PrettyPrinter(indent=2)
490
491 #       user = "Monitor"
492 #       password = None
493
494 #       plc_db = plc_dbs.open_plc_db_write()
495 #       mon_db = plc_dbs.open_mon_db()
496
497         # 5 = needs script reboot - fix this later
498 #       nodes = get_suggested(5,mon_db)
499
500 #       for row in nodes:
501                 
502 #               pcu = get_pcu_info(row['node_id'],row['pcu_id'],plc_db)
503 #               add_plc_event(row['node_id'],err,plc_db)
504
505 if __name__ == '__main__':
506         import plc
507         logger = logging.getLogger("monitor")
508         main()