try:
k = SSHKnownHosts(); k.update(node); k.write(); del k
except:
+ from monitor.common import email_exception
+ email_exception()
print traceback.print_exc()
return False
else:
session = PlanetLabSession(node, config.nosetup, config.verbose)
except Exception, e:
- print "ERROR setting up session for %s" % hostname
+ msg = "ERROR setting up session for %s" % hostname
+ print msg
print traceback.print_exc()
+ from monitor.common import email_exception
+ email_exception(msg)
print e
return False
conn = session.get_connection(config)
except:
print traceback.print_exc()
+ from monitor.common import email_exception
+ email_exception()
return False
if forced_action == "reboot":
node = api.GetNodes(hostname)[0]
net = api.GetNodeNetworks(node['nodenetwork_ids'])[0]
except:
+ from monitor.common import email_exception
+ email_exception()
print traceback.print_exc()
# TODO: api error. skip email, b/c all info is not available,
# flag_set will not be recorded.
main()
except Exception, err:
print traceback.print_exc()
+ from monitor.common import email_exception
+ email_exception()
print "Exception: %s" % err
print "Saving data... exitting."
sys.exit(0)
time.sleep(1)
except Exception, err:
traceback.print_exc()
+ from monitor.common import email_exception
+ email_exception()
print "Exception: %s" % err
print "Saving data... exitting."
sys.exit(0)
return ret
except Exception,e:
+ email_exception()
print traceback.print_exc(); print e
# NOTE: this failure could be an implementation issue on
return ret
except Exception,e:
+ email_exception()
print traceback.print_exc(); print e
# NOTE: this failure could be an implementation issue on
try:
return mailmonitor.reboot(host)
except Exception, e:
+ email_exception(host)
print traceback.print_exc(); print e
return False
try:
node = api.GetNodes(host)[0]
except:
+ email_exception()
print traceback.print_exc();
print "FAILED GETNODES for host: %s" % host
continue
# todo: send thank you, etc.
mailmonitor.reboot(host)
except Exception, e:
+ email_exception()
print traceback.print_exc(); print e
continue
print "Killed by interrupt"
sys.exit(0)
except:
+ email_exception()
print traceback.print_exc();
print "Continuing..."
return l_nodes
-def email_exception():
- from monitor import config
- import traceback
- msg=traceback.format_exc()
- m=Message("exception running monitor", msg, False)
- m.send([config.cc_email])
- return
+def email_exception(content=None):
+ import config
+ from unified_model import Message
+ import traceback
+ msg=traceback.format_exc()
+ if content:
+ msg = content + "\n" + msg
+ m=Message("exception running monitor", msg, False)
+ m.send([config.cc_email])
+ return
except ExceptionTimeout:
print traceback.print_exc()
return ("", "SCRIPTTIMEOUT")
+ except:
+ from monitor.common import email_exception
+ email_exception()
def system(self, cmd, timeout=COMMAND_TIMEOUT*2):
(o,e) = self.run(cmd, timeout)
import traceback
traceback.print_exc()
return "EOF connection reset" + str(err)
+ except Exception, err:
+ from monitor.common import email_exception
+ email_exception(self.host)
+ raise Exception(err)
from pcucontrol.models import *
print "failed"
except Exception, err:
import traceback; traceback.print_exc()
+ from monitor.common import email_exception
+ email_exception(node)
print err
if __name__ == '__main__':