3 # Reboot specified nodes
9 import errno, time, traceback
11 import threading, popen2
16 # Use our versions of telnetlib and pyssh
17 sys.path.insert(0, os.path.dirname(sys.argv[0]))
19 sys.path.insert(0, os.path.dirname(sys.argv[0]) + "/pyssh")
25 # Event class ID from pcu events
26 #NODE_POWER_CONTROL = 3
29 #MONITOR_USER_ID = 11142
32 logger = logging.getLogger("monitor")
36 def telnet_answer(telnet, expected, buffer):
39 output = telnet.read_until(expected, TELNET_TIMEOUT)
42 if output.find(expected) == -1:
43 raise Exception, "'%s' not found" % expected
45 telnet.write(buffer + "\r\n")
48 def ipal_reboot(ip, password, port):
49 global dryrun, verbose
54 telnet = telnetlib.Telnet(ip, timeout=TELNET_TIMEOUT)
55 telnet.set_debuglevel(verbose)
57 # XXX Some iPals require you to hit Enter a few times first
58 telnet_answer(telnet, "Password >", "\r\n\r\n")
61 telnet_answer(telnet, "Password >", password)
65 telnet_answer(telnet, "Enter >", "P%d" % port)
67 telnet.read_until("Enter >", TELNET_TIMEOUT)
77 return errno.ECONNRESET
78 except Exception, err:
83 return errno.ETIMEDOUT
86 def apc_reboot(ip, username, password, port):
87 global dryrun, verbose
92 telnet = telnetlib.Telnet(ip, timeout=TELNET_TIMEOUT)
93 telnet.set_debuglevel(verbose)
96 telnet_answer(telnet, "User Name", username)
97 telnet_answer(telnet, "Password", password)
105 telnet_answer(telnet, "\r\n> ", "1")
107 # 1- Phase Monitor/Configuration
108 # 2- Outlet Restriction Configuration
109 # 3- Outlet Control/Config
110 # 4- Power Supply Status
112 # 3- Outlet Control/Config
113 telnet_answer(telnet, "\r\n> ", "3")
120 telnet_answer(telnet, "\r\n> ", str(port))
123 # 2- Configure Outlet
126 telnet_answer(telnet, "\r\n> ", "1")
130 # 3- Immediate Reboot
136 # 3- Immediate Reboot
137 telnet_answer(telnet, "\r\n> ", "3")
140 telnet_answer(telnet,
141 "Enter 'YES' to continue or <ENTER> to cancel", "YES\r\n")
142 telnet_answer(telnet,
143 "Press <ENTER> to continue...", "")
149 except EOFError, err:
154 return errno.ECONNRESET
155 except Exception, err:
160 return errno.ETIMEDOUT
163 def baytech_reboot(ip, username, password, port):
164 global dryrun, verbose
169 ssh = pyssh.Ssh(username, ip)
173 telnet_answer(ssh, "password:", password)
175 # PL1 comm output (2 ,1).........1
176 # PL2 comm output (2 ,2).........2
177 # PL3 comm output (2 ,3).........3
178 # no machine (2 ,4).........4
179 # Control Outlets (5 ,1).........5
180 # Logout..........................T
182 # Control Outlets (5 ,1).........5
183 telnet_answer(ssh, "Enter Request :", "5")
186 telnet_answer(ssh, "DS-RPC>", "Reboot %d" % port)
188 # Reboot Outlet N (Y/N)?
190 telnet_answer(ssh, "(Y/N)?", "N")
192 telnet_answer(ssh, "(Y/N)?", "Y")
193 telnet_answer(ssh, "DS-RPC>", "")
201 except Exception, err:
208 return errno.ETIMEDOUT
210 ### rebooting european BlackBox PSE boxes
211 # Thierry Parmentelat - May 11 2005
212 # tested on 4-ports models known as PSE505-FR
213 # uses http to POST a data 'P<port>=r'
214 # relies on basic authentication within http1.0
215 # first curl-based script was
216 # curl --http1.0 --basic --user <username>:<password> --data P<port>=r \
217 # http://<hostname>:<http_port>/cmd.html && echo OK
219 def bbpse_reboot (pcu_ip,username,password,port_in_pcu,http_port):
221 global dryrun, verbose
223 url = "http://%s:%d/cmd.html" % (pcu_ip,http_port)
224 data= "P%d=r" % port_in_pcu
226 logger.debug("POSTing '%s' on %s" % (data,url))
228 authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
229 uri = "%s:%d" % (pcu_ip,http_port)
230 authinfo.add_password (None, uri, username, password)
231 authhandler = urllib2.HTTPBasicAuthHandler( authinfo )
233 opener = urllib2.build_opener(authhandler)
234 urllib2.install_opener(opener)
240 f = urllib2.urlopen(url,data)
247 except urllib2.URLError,err:
248 logger.info('Could not open http connection', err)
251 ### rebooting x10toggle based systems addressed by port
252 # Marc E. Fiuczynski - May 31 2005
253 # tested on 4-ports models known as PSE505-FR
254 # uses ssh and password to login to an account
255 # that will cause the system to be powercycled.
257 def x10toggle_reboot(ip, username, password, port):
258 global dryrun, verbose
262 ssh = pyssh.Ssh(username, ip)
266 telnet_answer(ssh, "password:", password)
270 telnet_answer(ssh, "x10toggle>", "A%d" % port)
278 except Exception, err:
285 return errno.ETIMEDOUT
287 ### rebooting Dell systems via RAC card
288 # Marc E. Fiuczynski - June 01 2005
289 # tested with David Lowenthal's itchy/scratchy nodes at UGA
292 def runcmd(command, args, username, password, timeout = None):
295 result_ready = threading.Condition()
299 result_ready.acquire()
303 result_ready.notify()
304 result_ready.release()
306 def do_command(command, username, password):
309 # Popen4 is a popen-type class that combines stdout and stderr
310 p = popen2.Popen4(command)
312 # read all output data
313 p.tochild.write("%s\n" % username)
314 p.tochild.write("%s\n" % password)
316 data = p.fromchild.read()
319 # might get interrupted by a signal in poll() or waitpid()
322 set_result((retval, data))
325 if ex.errno == errno.EINTR:
328 except Exception, ex:
332 command = " ".join([command] + args)
334 worker = threading.Thread(target = do_command, args = (command, username, password, ))
335 worker.setDaemon(True)
336 result_ready.acquire()
338 result_ready.wait(timeout)
341 raise Exception, "command timed-out: '%s'" % command
343 result_ready.release()
346 if isinstance(result, Exception):
349 (retval, data) = result
350 if os.WIFEXITED(retval) and os.WEXITSTATUS(retval) == 0:
353 out = "system command ('%s') " % command
354 if os.WIFEXITED(retval):
355 out += "failed, rc = %d" % os.WEXITSTATUS(retval)
357 out += "killed by signal %d" % os.WTERMSIG(retval)
359 out += "; output follows:\n" + data
362 def racadm_reboot(ip, username, password, port):
363 global dryrun, verbose
366 cmd = "/usr/sbin/racadm"
369 output = runcmd(cmd, ["-r %s -i serveraction powercycle" % ip],
372 output = "dryrun of racadm command"
374 logger.debug("runcmd returned without output %s" % output)
379 except Exception, err:
380 logger.debug("runcmd raised exception %s" % err)
383 return errno.ETIMEDOUT
385 # Returns true if rebooted via PCU
386 def reboot(nodename):
387 pcu = plc.getpcu(nodename)
389 plc.nodePOD(nodename)
392 logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
394 # APC Masterswitch (Berkeley)
395 if pcu['protocol'] == "telnet" and pcu['model'] == "APC Masterswitch":
396 err = apc_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu[nodename])
398 # DataProbe iPal (many sites)
399 elif pcu['protocol'] == "telnet" and pcu['model'].find("IP-4") >= 0:
400 err = ipal_reboot(pcu['ip'],pcu['password'], pcu[nodename])
403 elif pcu['protocol'] == "ssh" and \
404 (pcu['model'].find("Baytech") >= 0 or pcu['model'].find("DS4") >= 0):
405 err = baytech_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu[nodename])
407 # BlackBox PSExxx-xx (e.g. PSE505-FR)
408 elif pcu['protocol'] == "http" and (pcu['model'] == "bbpse"):
409 err = bbpse_reboot(pcu['ip'], pcu['username'], pcu['password'], pcu[nodename],80)
412 elif pcu['protocol'] == "ssh" and (pcu['model'] == "x10toggle"):
413 err = x10toggle_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu[nodename])
416 elif pcu['protocol'] == "racadm" and (pcu['model'] == "RAC"):
417 err = racadm_reboot(pcu['ip'], pcu['username'],pcu['password'], pcu_[nodename])
419 # Unknown or unsupported
421 err = errno.EPROTONOSUPPORT
425 #def get_suggested(suggestion_id,db):
427 # sql= """select node_id,pcu_id from nodes where suggestion = %d """\
430 # nodes = db.query(sql).dictresult()
431 # except pg.ProgrammingError, err:
432 # print( "Database error for query: %s\n%s" % (sql,err) )
436 #def get_pcu_info(node_id,pcu_id,db):
437 # sql= """select port_number from pcu_ports where node_id = %d and pcu_id = %d """\
440 # port_number = db.query(sql).dictresult()
441 # except pg.ProgrammingError, err:
442 # print( "Database error for query: %s\n%s" % (sql,err) )
445 # sql= """select * from pcu where pcu_id = %d """\
448 # pcu = db.query(sql).dictresult()
449 # except pg.ProgrammingError, err:
450 # print( "Database error for query: %s\n%s" % (sql,err) )
453 # result = {'node_id':node_id,'pcu_id':pcu_id,'port_number':port_number[0]['port_number'],
454 # 'ip':pcu[0]['ip'],'username':pcu[0]['username'],'password':pcu[0]['password'],\
455 # 'model':pcu[0]['model'],'protocol':pcu[0]['protocol'],'hostname':pcu[0]['hostname']}
459 #def add_plc_event(node_id,err,db):
460 # site_id = plc_db_utils.get_site_from_node_id(node_id,db)
461 # message = "PCU reboot by monitor-msgs@planet-lab.org: %s" % os.strerror(err)
463 # sql = """insert into events (event_class_id,message,person_id,node_id,site_id) values """\
464 # """(%d,'%s',%d,%d,%d)""" % (NODE_POWER_CONTROL,message,MONITOR_USER_ID,node_id,site_id)
469 # except pg.ProgrammingError, err:
470 # print( "Database error for: %s\n%s" % (sql,err) )
475 logger.setLevel(logging.DEBUG)
476 ch = logging.StreamHandler()
477 ch.setLevel(logging.DEBUG)
478 formatter = logging.Formatter('LOGGER - %(message)s')
479 ch.setFormatter(formatter)
480 logger.addHandler(ch)
484 reboot("planetlab2.cs.uchicago.edu")
485 reboot("alice.cs.princeton.edu")
486 except Exception, err:
488 # used later for pretty printing
489 # pp = pprint.PrettyPrinter(indent=2)
494 # plc_db = plc_dbs.open_plc_db_write()
495 # mon_db = plc_dbs.open_mon_db()
497 # 5 = needs script reboot - fix this later
498 # nodes = get_suggested(5,mon_db)
502 # pcu = get_pcu_info(row['node_id'],row['pcu_id'],plc_db)
503 # add_plc_event(row['node_id'],err,plc_db)
505 if __name__ == '__main__':
507 logger = logging.getLogger("monitor")