ResourceState, ResourceState2str
from nepi.execution.scheduler import HeapScheduler, Task, TaskStatus
from nepi.execution.trace import TraceAttr
+from nepi.util.serializer import ECSerializer, SFormats
+from nepi.util.plotter import ECPlotter, PFormats
+from nepi.util.netgraph import NetGraph, TopologyType
# TODO: use multiprocessing instead of threading
# TODO: Allow to reconnect to a running experiment instance! (reconnect mode vs deploy mode)
import logging
import os
import sys
+import tempfile
import time
import threading
import weakref
def __init__(self, ec):
self._ec = weakref.ref(ec)
self._failure_level = FailureLevel.OK
+ self._abort = False
@property
def ec(self):
@property
def abort(self):
+ return self._abort
+
+ def eval_failure(self, guid):
if self._failure_level == FailureLevel.OK:
- for guid in self.ec.resources:
- state = self.ec.state(guid)
- critical = self.ec.get(guid, "critical")
- if state == ResourceState.FAILED and critical:
- self._failure_level = FailureLevel.RM_FAILURE
- self.ec.logger.debug("RM critical failure occurred on guid %d." \
- " Setting EC FAILURE LEVEL to RM_FAILURE" % guid)
- break
+ rm = self.ec.get_resource(guid)
+ state = rm.state
+ critical = rm.get("critical")
- return self._failure_level != FailureLevel.OK
+ if state == ResourceState.FAILED and critical:
+ self._failure_level = FailureLevel.RM_FAILURE
+ self._abort = True
+ self.ec.logger.debug("RM critical failure occurred on guid %d." \
+ " Setting EC FAILURE LEVEL to RM_FAILURE" % guid)
def set_ec_failure(self):
self._failure_level = FailureLevel.EC_FAILURE
-
class ECState(object):
""" Possible states for an ExperimentController
.. note::
An experiment, or scenario, is defined by a concrete set of resources,
- behavior, configuration and interconnection of those resources.
+ and the behavior, configuration and interconnection of those resources.
The Experiment Description (ED) is a detailed representation of a
single experiment. It contains all the necessary information to
allow repeating the experiment. NEPI allows to describe
recreated (and re-run) by instantiating an EC and recreating
the same experiment description.
- In NEPI, an experiment is represented as a graph of interconnected
+ An experiment is represented as a graph of interconnected
resources. A resource is a generic concept in the sense that any
component taking part of an experiment, whether physical of
virtual, is considered a resource. A resources could be a host,
single resource. ResourceManagers are specific to a resource
type (i.e. An RM to control a Linux application will not be
the same as the RM used to control a ns-3 simulation).
- To support a new type of resource in NEPI, a new RM must be
- implemented. NEPI already provides a variety of
- RMs to control basic resources, and new can be extended from
- the existing ones.
+ To support a new type of resource, a new RM must be implemented.
+ NEPI already provides a variety of RMs to control basic resources,
+ and new can be extended from the existing ones.
Through the EC interface the user can create ResourceManagers (RMs),
configure them and interconnect them, to describe an experiment.
exp_id, which can be re-used in different ExperimentController,
and the run_id, which is unique to one ExperimentController instance, and
is automatically generated by NEPI.
-
+
"""
- def __init__(self, exp_id = None):
+ @classmethod
+ def load(cls, filepath, format = SFormats.XML):
+ serializer = ECSerializer()
+ ec = serializer.load(filepath)
+ return ec
+
+ def __init__(self, exp_id = None, local_dir = None, persist = False,
+ add_node_callback = None, add_edge_callback = None, **kwargs):
+ """ ExperimentController entity to model an execute a network
+ experiment.
+
+ :param exp_id: Human readable name to identify the experiment
+ :type exp_id: str
+
+ :param local_dir: Path to local directory where to store experiment
+ related files
+ :type local_dir: str
+
+ :param persist: Save an XML description of the experiment after
+ completion at local_dir
+ :type persist: bool
+
+ :param add_node_callback: Callback to invoke for node instantiation
+ when automatic topology creation mode is used
+ :type add_node_callback: function
+
+ :param add_edge_callback: Callback to invoke for edge instantiation
+ when automatic topology creation mode is used
+ :type add_edge_callback: function
+
+ """
super(ExperimentController, self).__init__()
# Logging
# resources used, etc)
self._exp_id = exp_id or "exp-%s" % os.urandom(8).encode('hex')
+ # Local path where to store experiment related files (results, etc)
+ if not local_dir:
+ local_dir = tempfile.gettempdir() # /tmp
+
+ self._local_dir = local_dir
+ self._exp_dir = os.path.join(local_dir, self.exp_id)
+ self._run_dir = os.path.join(self.exp_dir, self.run_id)
+
+ # If True persist the experiment controller in XML format, after completion
+ self._persist = persist
+
# generator of globally unique ids
self._guid_generator = guid.GuidGenerator()
# EC state
self._state = ECState.RUNNING
- # Blacklist file for PL nodes
- nepi_home = os.path.join(os.path.expanduser("~"), ".nepi")
- plblacklist_file = os.path.join(nepi_home, "plblacklist.txt")
- if not os.path.exists(plblacklist_file):
- if os.path.isdir(nepi_home):
- open(plblacklist_file, 'w').close()
- else:
- os.makedirs(nepi_home)
- open(plblacklist_file, 'w').close()
-
+ # Automatically construct experiment description
+ self._netgraph = None
+ if add_node_callback or add_edge_callback or kwargs.get("topology"):
+ self._build_from_netgraph(add_node_callback, add_edge_callback,
+ **kwargs)
+
# The runner is a pool of threads used to parallelize
# execution of tasks
- nthreads = int(os.environ.get("NEPI_NTHREADS", "50"))
- self._runner = ParallelRun(maxthreads = nthreads)
+ self._nthreads = 20
+ self._runner = None
# Event processing thread
self._cond = threading.Condition()
self._thread = threading.Thread(target = self._process)
self._thread.setDaemon(True)
self._thread.start()
-
+
@property
def logger(self):
""" Returns the logger instance of the Experiment Controller
"""
return self._logger
+ @property
+ def failure_level(self):
+ """ Returns the level of FAILURE of th experiment
+
+ """
+
+ return self._fm._failure_level
+
@property
def ecstate(self):
""" Returns the state of the Experiment Controller
"""
return self._run_id
+ @property
+ def nthreads(self):
+ """ Returns the number of processing nthreads used
+
+ """
+ return self._nthreads
+
+ @property
+ def local_dir(self):
+ """ Root local directory for experiment files
+
+ """
+ return self._local_dir
+
+ @property
+ def exp_dir(self):
+ """ Local directory to store results and other files related to the
+ experiment.
+
+ """
+ return self._exp_dir
+
+ @property
+ def run_dir(self):
+ """ Local directory to store results and other files related to the
+ experiment run.
+
+ """
+ return self._run_dir
+
+ @property
+ def persist(self):
+ """ If True, persists the ExperimentController to XML format upon
+ experiment completion
+
+ """
+ return self._persist
+
+ @property
+ def netgraph(self):
+ """ Return NetGraph instance if experiment description was automatically
+ generated
+
+ """
+ return self._netgraph
+
@property
def abort(self):
""" Returns True if the experiment has failed and should be interrupted,
"""
return self._fm.abort
+ def inform_failure(self, guid):
+ """ Reports a failure in a RM to the EC for evaluation
+
+ :param guid: Resource id
+ :type guid: int
+
+ """
+
+ return self._fm.eval_failure(guid)
+
def wait_finished(self, guids):
""" Blocking method that waits until all RMs in the 'guids' list
have reached a state >= STOPPED (i.e. STOPPED, FAILED or
:type guids: list
"""
-
if isinstance(guids, int):
guids = [guids]
break
# If a guid reached one of the target states, remove it from list
- guid = guids[0]
- rstate = self.state(guid)
+ guid = guids.pop()
+ rm = self.get_resource(guid)
+ rstate = rm.state
- hrrstate = ResourceState2str.get(rstate)
- hrstate = ResourceState2str.get(state)
-
if rstate >= state:
- guids.remove(guid)
- rm = self.get_resource(guid)
self.logger.debug(" %s guid %d DONE - state is %s, required is >= %s " % (
- rm.get_rtype(), guid, hrrstate, hrstate))
+ rm.get_rtype(), guid, rstate, state))
else:
# Debug...
self.logger.debug(" WAITING FOR guid %d - state is %s, required is >= %s " % (
- guid, hrrstate, hrstate))
+ guid, rstate, state))
+
+ guids.append(guid)
+
time.sleep(0.5)
-
+
+ def plot(self, dirpath = None, format= PFormats.FIGURE, show = False):
+ plotter = ECPlotter()
+ fpath = plotter.plot(self, dirpath = dirpath, format= format,
+ show = show)
+ return fpath
+
+ def serialize(self, format = SFormats.XML):
+ serializer = ECSerializer()
+ sec = serializer.load(self, format = format)
+ return sec
+
+ def save(self, dirpath = None, format = SFormats.XML):
+ if dirpath == None:
+ dirpath = self.run_dir
+
+ try:
+ os.makedirs(dirpath)
+ except OSError:
+ pass
+
+ serializer = ECSerializer()
+ path = serializer.save(self, dirpath, format = format)
+ return path
+
def get_task(self, tid):
""" Returns a task by its id
def get_resource(self, guid):
""" Returns a registered ResourceManager by its guid
- :param guid: Id of the task
+ :param guid: Id of the resource
:type guid: int
:rtype: ResourceManager
"""
- return self._resources.get(guid)
+ rm = self._resources.get(guid)
+ return rm
+
+ def get_resources_by_type(self, rtype):
+ """ Returns the ResourceManager objects of type rtype
+
+ :param rtype: Resource type
+ :type rtype: string
+
+ :rtype: list of ResourceManagers
+
+ """
+ rms = []
+ for guid, rm in self._resources.iteritems():
+ if rm.get_rtype() == rtype:
+ rms.append(rm)
+ return rms
+
+ def remove_resource(self, guid):
+ del self._resources[guid]
@property
def resources(self):
- """ Returns the set() of guids of all the ResourceManager
+ """ Returns the guids of all ResourceManagers
:return: Set of all RM guids
- :rtype: set
+ :rtype: list
"""
- return self._resources.keys()
+ keys = self._resources.keys()
+
+ return keys
+
+ def filter_resources(self, rtype):
+ """ Returns the guids of all ResourceManagers of type rtype
+
+ :param rtype: Resource type
+ :type rtype: string
+
+ :rtype: list of guids
+
+ """
+ rms = []
+ for guid, rm in self._resources.iteritems():
+ if rm.get_rtype() == rtype:
+ rms.append(rm.guid)
+ return rms
def register_resource(self, rtype, guid = None):
""" Registers a new ResourceManager of type 'rtype' in the experiment
if not guids:
# If no guids list was passed, all 'NEW' RMs will be deployed
guids = []
- for guid in self.resources:
- if self.state(guid) == ResourceState.NEW:
+ for guid, rm in self._resources.iteritems():
+ if rm.state == ResourceState.NEW:
guids.append(guid)
if isinstance(guids, int):
self.wait_released(guids)
+ if self.persist:
+ self.save()
+
for guid in guids:
if self.get(guid, "hardRelease"):
- del self._resources[guid]
+ self.remove_resource(guid)
def shutdown(self):
""" Releases all resources and stops the ExperimentController
"""
+ self._nthreads = int(os.environ.get("NEPI_NTHREADS", str(self._nthreads)))
+ self._runner = ParallelRun(maxthreads = self.nthreads)
self._runner.start()
while not self._stop:
self._cond.notify()
self._cond.release()
+ def _build_from_netgraph(self, add_node_callback, add_edge_callback,
+ **kwargs):
+ """ Automates experiment description using a NetGraph instance.
+ """
+ self._netgraph = NetGraph(**kwargs)
+
+ if add_node_callback:
+ ### Add resources to the EC
+ for nid in self.netgraph.nodes():
+ add_node_callback(self, nid)
+
+ if add_edge_callback:
+ #### Add connections between resources
+ for nid1, nid2 in self.netgraph.edges():
+ add_edge_callback(self, nid1, nid2)
+