From: Stephen Soltesz Date: Tue, 28 Jul 2009 22:16:17 +0000 (+0000) Subject: add email notice for 'failed twice' nodes X-Git-Tag: Monitor-3.0-19~5 X-Git-Url: http://git.onelab.eu/?p=monitor.git;a=commitdiff_plain;h=28582f7068d5ef8e74cb3b70134f682d4ab471bc add email notice for 'failed twice' nodes added extra bootmanager sequence string --- diff --git a/monitor/bootman.py b/monitor/bootman.py index 96bc740..7bd0cb3 100755 --- a/monitor/bootman.py +++ b/monitor/bootman.py @@ -300,7 +300,7 @@ class PlanetLabSession: print ret if ret != 0: print "\tFAILED TWICE" - #sys.exit(1) + email_exception("%s rsync failed twice" % self.node) raise ExceptionDoubleSSHError("Failed twice trying to login with updated ssh host key") t1 = time.time() @@ -316,19 +316,6 @@ class PlanetLabSession: python Rpyc/Servers/forking_server.py &> server.log & echo "done" >> out.log EOF""") - #cmd = """ssh %(user)s@%(hostname)s """ + \ - # """'ps ax | grep Rpyc | grep -v grep | awk "{print \$1}" | xargs kill 2> /dev/null' """ - #cmd = cmd % args - #if self.verbose: print cmd - ## TODO: Add timeout - #print localos.system(cmd,timeout) - - ## START a new rpyc server. - #cmd = """ssh -n %(user)s@%(hostname)s "export PYTHONPATH=\$HOME; """ + \ - # """python Rpyc/Servers/forking_server.py &> server.log < /dev/null &" """ - #cmd = cmd % args - #if self.verbose: print cmd - #print localos.system(cmd,timeout) print "setup rpyc server over ssh" print ssh.ret @@ -491,6 +478,7 @@ class DebugInterface: for n in ["bminit-cfg-auth-bootcheckfail-authfail-exception-update-bootupdatefail-authfail-debug-validate-exception-done", "bminit-cfg-auth-bootcheckfail-authfail-exception-update-bootupdatefail-authfail-debug-done", "bminit-cfg-auth-bootcheckfail-authfail-exception-update-debug-validate-exception-done", + "bminit-cfg-auth-bootcheckfail-authfail-exception-authfail-debug-validate-exception-done", ]: sequences.update({n: "repair_node_keys"})