Modified FailureManager to abort only when critical resources fail
[nepi.git] / src / nepi / resources / all / collector.py
index 407d0bb..864750e 100644 (file)
 
 from nepi.execution.attribute import Attribute, Flags, Types
 from nepi.execution.trace import Trace, TraceAttr
-from nepi.execution.resource import ResourceManager, clsinit, ResourceState, \
-        ResourceAction
+from nepi.execution.resource import ResourceManager, clsinit_copy, \
+        ResourceState, ResourceAction, failtrap
 from nepi.util.sshfuncs import ProcStatus
 
 import os
 import tempfile
 
-@clsinit
+@clsinit_copy
 class Collector(ResourceManager):
     """ The collector is reponsible of collecting traces
     of a same type associated to RMs into a local directory.
@@ -37,11 +37,13 @@ class Collector(ResourceManager):
         :type ec: ExperimentController
         :param guid: guid of the RM
         :type guid: int
-        :param creds: Credentials to communicate with the rm (XmppClient)
-        :type creds: dict
 
     """
     _rtype = "Collector"
+    _help = "A Collector can be attached to a trace name on another " \
+        "ResourceManager and will retrieve and store the trace content " \
+        "in a local file at the end of the experiment"
+    _backend_type = "all"
 
     @classmethod
     def _register_attributes(cls):
@@ -50,9 +52,15 @@ class Collector(ResourceManager):
         store_dir = Attribute("storeDir", "Path to local directory to store trace results", 
                 default = tempfile.gettempdir(),
                 flags = Flags.ExecReadOnly)
+        sub_dir = Attribute("subDir", "Sub directory to collect traces into", 
+                flags = Flags.ExecReadOnly)
+        rename = Attribute("rename", "Name to give to the collected trace file", 
+                flags = Flags.ExecReadOnly)
 
         cls._register_attribute(trace_name)
         cls._register_attribute(store_dir)
+        cls._register_attribute(sub_dir)
+        cls._register_attribute(rename)
 
     def __init__(self, ec, guid):
         super(Collector, self).__init__(ec, guid)
@@ -61,7 +69,8 @@ class Collector(ResourceManager):
     @property
     def store_path(self):
         return self._store_path
-    
+   
+    @failtrap
     def provision(self):
         trace_name = self.get("traceName")
         if not trace_name:
@@ -73,6 +82,10 @@ class Collector(ResourceManager):
 
         store_dir = self.get("storeDir")
         self._store_path = os.path.join(store_dir, self.ec.exp_id, self.ec.run_id)
+
+        subdir = self.get("subDir")
+        if subdir:
+            self._store_path = os.path.join(self._store_path, subdir)
         
         msg = "Creating local directory at %s to store %s traces " % (
             store_dir, trace_name)
@@ -85,31 +98,40 @@ class Collector(ResourceManager):
 
         super(Collector, self).provision()
 
+    @failtrap
     def deploy(self):
-        try:
-            self.discover()
-            self.provision()
-        except:
-            self.fail()
-            raise
+        self.discover()
+        self.provision()
 
         super(Collector, self).deploy()
 
     def release(self):
-        trace_name = self.get("traceName")
-
-        msg = "Collecting '%s' traces to local directory %s" % (
-            trace_name, self.store_path)
-        self.info(msg)
-
-        rms = self.get_connected()
-        for rm in rms:
-            result = self.ec.trace(rm.guid, trace_name)
-            fpath = os.path.join(self.store_path, "%d.%s" % (rm.guid, 
-                trace_name))
-            f = open(fpath, "w")
-            f.write(result)
-            f.close()
+        try:
+            trace_name = self.get("traceName")
+            rename = self.get("rename") or trace_name
+
+            msg = "Collecting '%s' traces to local directory %s" % (
+                trace_name, self.store_path)
+            self.info(msg)
+
+            rms = self.get_connected()
+            for rm in rms:
+                result = self.ec.trace(rm.guid, trace_name)
+                fpath = os.path.join(self.store_path, "%d.%s" % (rm.guid, 
+                    rename))
+                try:
+                    f = open(fpath, "w")
+                    f.write(result)
+                    f.close()
+                except:
+                    msg = "Couldn't retrieve trace %s for %d at %s " % (trace_name, 
+                            rm.guid, fpath)
+                    self.error(msg)
+                    continue
+        except:
+            import traceback
+            err = traceback.format_exc()
+            self.error(err)
 
         super(Collector, self).release()