+import functools
+import logging
+import os
+import random
+import sys
+import time
+import threading
+import weakref
+
+class FailureLevel(object):
+ """ Describes the system failure state
+ """
+ OK = 1
+ RM_FAILURE = 2
+ EC_FAILURE = 3
+
+class FailureManager(object):
+ """ The FailureManager is responsible for handling errors,
+ and deciding whether an experiment should be aborted
+ """
+
+ def __init__(self, ec):
+ self._ec = weakref.ref(ec)
+ self._failure_level = FailureLevel.OK
+
+ @property
+ def ec(self):
+ """ Returns the Experiment Controller """
+ return self._ec()
+
+ @property
+ def abort(self):
+ if self._failure_level == FailureLevel.OK:
+ for guid in self.ec.resources:
+ state = self.ec.state(guid)
+ critical = self.ec.get(guid, "critical")
+ if state == ResourceState.FAILED and critical:
+ self._failure_level = FailureLevel.RM_FAILURE
+ self.ec.logger.debug("RM critical failure occurred on guid %d." \
+ " Setting EC FAILURE LEVEL to RM_FAILURE" % guid)
+ break
+
+ return self._failure_level != FailureLevel.OK
+
+ def set_ec_failure(self):
+ self._failure_level = FailureLevel.EC_FAILURE
+
+