From 58169a282f00178a05413752d07a63553f3374b1 Mon Sep 17 00:00:00 2001 From: Stephen Soltesz Date: Tue, 10 Mar 2009 20:48:06 +0000 Subject: [PATCH] add email_exception() calls throughout code. --- bootman.py | 11 ++++++++++- findbad.py | 2 ++ findbadpcu.py | 2 ++ grouprins.py | 6 ++++++ monitor/common.py | 17 ++++++++++------- monitor/util/command.py | 3 +++ pcucontrol/reboot.py | 6 ++++++ 7 files changed, 39 insertions(+), 8 deletions(-) diff --git a/bootman.py b/bootman.py index 67ce675..7ec552f 100755 --- a/bootman.py +++ b/bootman.py @@ -331,6 +331,8 @@ def reboot(hostname, config=None, forced_action=None): try: k = SSHKnownHosts(); k.update(node); k.write(); del k except: + from monitor.common import email_exception + email_exception() print traceback.print_exc() return False @@ -340,8 +342,11 @@ def reboot(hostname, config=None, forced_action=None): else: session = PlanetLabSession(node, config.nosetup, config.verbose) except Exception, e: - print "ERROR setting up session for %s" % hostname + msg = "ERROR setting up session for %s" % hostname + print msg print traceback.print_exc() + from monitor.common import email_exception + email_exception(msg) print e return False @@ -355,6 +360,8 @@ def reboot(hostname, config=None, forced_action=None): conn = session.get_connection(config) except: print traceback.print_exc() + from monitor.common import email_exception + email_exception() return False if forced_action == "reboot": @@ -793,6 +800,8 @@ def reboot(hostname, config=None, forced_action=None): node = api.GetNodes(hostname)[0] net = api.GetNodeNetworks(node['nodenetwork_ids'])[0] except: + from monitor.common import email_exception + email_exception() print traceback.print_exc() # TODO: api error. skip email, b/c all info is not available, # flag_set will not be recorded. diff --git a/findbad.py b/findbad.py index 7bb31a0..0d68845 100755 --- a/findbad.py +++ b/findbad.py @@ -175,6 +175,8 @@ if __name__ == '__main__': main() except Exception, err: print traceback.print_exc() + from monitor.common import email_exception + email_exception() print "Exception: %s" % err print "Saving data... exitting." sys.exit(0) diff --git a/findbadpcu.py b/findbadpcu.py index b63a96a..7e84513 100755 --- a/findbadpcu.py +++ b/findbadpcu.py @@ -206,6 +206,8 @@ if __name__ == '__main__': time.sleep(1) except Exception, err: traceback.print_exc() + from monitor.common import email_exception + email_exception() print "Exception: %s" % err print "Saving data... exitting." sys.exit(0) diff --git a/grouprins.py b/grouprins.py index ed6149d..433ecd3 100755 --- a/grouprins.py +++ b/grouprins.py @@ -73,6 +73,7 @@ class Reboot(object): return ret except Exception,e: + email_exception() print traceback.print_exc(); print e # NOTE: this failure could be an implementation issue on @@ -95,6 +96,7 @@ class Reboot(object): return ret except Exception,e: + email_exception() print traceback.print_exc(); print e # NOTE: this failure could be an implementation issue on @@ -138,6 +140,7 @@ class Reboot(object): try: return mailmonitor.reboot(host) except Exception, e: + email_exception(host) print traceback.print_exc(); print e return False @@ -261,6 +264,7 @@ for host in hostnames: try: node = api.GetNodes(host)[0] except: + email_exception() print traceback.print_exc(); print "FAILED GETNODES for host: %s" % host continue @@ -285,6 +289,7 @@ for host in hostnames: # todo: send thank you, etc. mailmonitor.reboot(host) except Exception, e: + email_exception() print traceback.print_exc(); print e continue @@ -355,6 +360,7 @@ for host in hostnames: print "Killed by interrupt" sys.exit(0) except: + email_exception() print traceback.print_exc(); print "Continuing..." diff --git a/monitor/common.py b/monitor/common.py index 65b82b8..8bddae1 100644 --- a/monitor/common.py +++ b/monitor/common.py @@ -212,10 +212,13 @@ def get_nodeset(config): return l_nodes -def email_exception(): - from monitor import config - import traceback - msg=traceback.format_exc() - m=Message("exception running monitor", msg, False) - m.send([config.cc_email]) - return +def email_exception(content=None): + import config + from unified_model import Message + import traceback + msg=traceback.format_exc() + if content: + msg = content + "\n" + msg + m=Message("exception running monitor", msg, False) + m.send([config.cc_email]) + return diff --git a/monitor/util/command.py b/monitor/util/command.py index da7ddae..e3e81ca 100644 --- a/monitor/util/command.py +++ b/monitor/util/command.py @@ -37,6 +37,9 @@ class CMD: except ExceptionTimeout: print traceback.print_exc() return ("", "SCRIPTTIMEOUT") + except: + from monitor.common import email_exception + email_exception() def system(self, cmd, timeout=COMMAND_TIMEOUT*2): (o,e) = self.run(cmd, timeout) diff --git a/pcucontrol/reboot.py b/pcucontrol/reboot.py index 2361d89..b762d89 100755 --- a/pcucontrol/reboot.py +++ b/pcucontrol/reboot.py @@ -330,6 +330,10 @@ class PCUControl(PCUModel,PCURecord): import traceback traceback.print_exc() return "EOF connection reset" + str(err) + except Exception, err: + from monitor.common import email_exception + email_exception(self.host) + raise Exception(err) from pcucontrol.models import * @@ -555,6 +559,8 @@ def main(): print "failed" except Exception, err: import traceback; traceback.print_exc() + from monitor.common import email_exception + email_exception(node) print err if __name__ == '__main__': -- 2.43.0