add a better bmlog history ; better late than never.
authorStephen Soltesz <soltesz@cs.princeton.edu>
Fri, 15 May 2009 23:22:21 +0000 (23:22 +0000)
committerStephen Soltesz <soltesz@cs.princeton.edu>
Fri, 15 May 2009 23:22:21 +0000 (23:22 +0000)
plc try to bail if we get a series of errors, but this doens't seem to work...

bootman.py
plc.py

index f8f6d48..6dc7ac8 100755 (executable)
@@ -60,15 +60,18 @@ class NodeConnection:
                        return "unknown"
 
        def get_dmesg(self):
+               t_stamp = time.strftime("%Y-%m-%d-%H:%M")
                self.c.modules.os.system("dmesg > /var/log/dmesg.bm.log")
-               download(self.c, "/var/log/dmesg.bm.log", "log/dmesg.%s.log" % self.node)
+               download(self.c, "/var/log/dmesg.bm.log", "log/history/%s-dmesg.%s.log" % (t_stamp, self.node))
+               os.system("cp log/history/%s-dmesg.%s.log log/dmesg.%s.log" % (t_stamp, self.node, self.node))
                log = open("log/dmesg.%s.log" % self.node, 'r')
                return log
 
        def get_bootmanager_log(self):
-               download(self.c, "/tmp/bm.log", "log/bm.%s.log.gz" % self.node)
+               t_stamp = time.strftime("%Y-%m-%d-%H:%M")
+               download(self.c, "/tmp/bm.log", "log/history/%s-bm.%s.log" % (t_stamp, self.node))
                #os.system("zcat log/bm.%s.log.gz > log/bm.%s.log" % (self.node, self.node))
-               os.system("cp log/bm.%s.log.gz log/bm.%s.log" % (self.node, self.node))
+               os.system("cp log/history/%s-bm.%s.log log/bm.%s.log" % (t_stamp, self.node, self.node))
                log = open("log/bm.%s.log" % self.node, 'r')
                return log
 
diff --git a/plc.py b/plc.py
index a41aecb..6ca4695 100644 (file)
--- a/plc.py
+++ b/plc.py
@@ -45,6 +45,8 @@ except:
 
 api = xmlrpclib.Server(auth.server, verbose=False, allow_none=True)
 
+global_error_count = 0
+
 class PLC:
        def __init__(self, auth, url):
                self.auth = auth
@@ -56,7 +58,17 @@ class PLC:
                if method is None:
                        raise AssertionError("method does not exist")
 
-               return lambda *params : method(self.auth, *params)
+               try:
+                       return lambda *params : method(self.auth, *params)
+               except ProtocolError:
+                       traceback.print_exc()
+                       global_error_count += 1
+                       if global_error_count >= 10:
+                               print "maximum error count exceeded; exiting..."
+                               sys.exit(1)
+                       else:
+                               print "%s errors have occurred" % global_error_count
+                       raise Exception("ProtocolError continuing")
 
        def __repr__(self):
                return self.api.__repr__()