from nepi.util.timefuncs import tnow, tdiff, tdiffsec, stabsformat
from nepi.util.logger import Logger
+from nepi.execution.attribute import Attribute, Flags, Types
from nepi.execution.trace import TraceAttr
import copy
import logging
import os
import pkgutil
+import sys
+import threading
import weakref
reschedule_delay = "1s"
def clsinit(cls):
""" Initializes template information (i.e. attributes and traces)
- for the ResourceManager class
- """
+ on classes derived from the ResourceManager class.
+
+ It is used as a decorator in the class declaration as follows:
+
+ @clsinit
+ class MyResourceManager(ResourceManager):
+
+ ...
+
+ """
+
cls._clsinit()
return cls
def clsinit_copy(cls):
""" Initializes template information (i.e. attributes and traces)
- for the ResourceManager class, inheriting attributes and traces
- from the parent class
+ on classes direved from the ResourceManager class.
+ It differs from the clsinit method in that it forces inheritance
+ of attributes and traces from the parent class.
+
+ It is used as a decorator in the class declaration as follows:
+
+ @clsinit
+ class MyResourceManager(ResourceManager):
+
+ ...
+
+
+ clsinit_copy should be prefered to clsinit when creating new
+ ResourceManager child classes.
+
"""
+
cls._clsinit_copy()
return cls
-# Decorator to invoke class initialization method
+def failtrap(func):
+ """ Decorator function for instance methods that should set the
+ RM state to FAILED when an error is raised. The methods that must be
+ decorated are: discover, provision, deploy, start, stop and finish.
+
+ """
+ def wrapped(self, *args, **kwargs):
+ try:
+ return func(self, *args, **kwargs)
+ except:
+ import traceback
+ err = traceback.format_exc()
+ self.error(err)
+ self.debug("SETTING guid %d to state FAILED" % self.guid)
+ self.fail()
+ raise
+
+ return wrapped
+
@clsinit
class ResourceManager(Logger):
+ """ Base clase for all ResourceManagers.
+
+ A ResourceManger is specific to a resource type (e.g. Node,
+ Switch, Application, etc) on a specific backend (e.g. PlanetLab,
+ OMF, etc).
+
+ The ResourceManager instances are responsible for interacting with
+ and controlling concrete (physical or virtual) resources in the
+ experimental backends.
+
+ """
_rtype = "Resource"
_attributes = None
_traces = None
+ _help = None
+ _backend = None
@classmethod
def _register_attribute(cls, attr):
resource attribute
"""
+
cls._attributes[attr.name] = attr
@classmethod
resource attribute
"""
+
del cls._attributes[name]
@classmethod
resource trace
"""
+
cls._traces[trace.name] = trace
@classmethod
resource trace
"""
+
del cls._traces[name]
@classmethod
def _register_attributes(cls):
""" Resource subclasses will invoke this method to register
- resource attributes
+ resource attributes.
- """
- pass
+ This method should be overriden in the RMs that define
+ attributes.
+ """
+
+ critical = Attribute("critical",
+ "Defines whether the resource is critical. "
+ "A failure on a critical resource will interrupt "
+ "the experiment. ",
+ type = Types.Bool,
+ default = True,
+ flags = Flags.ExecReadOnly)
+
+ cls._register_attribute(critical)
+
@classmethod
def _register_traces(cls):
""" Resource subclasses will invoke this method to register
resource traces
+ This method should be overriden in the RMs that define traces.
+
"""
+
pass
@classmethod
def _clsinit(cls):
- """ ResourceManager child classes have different attributes and traces.
- Since the templates that hold the information of attributes and traces
- are 'class attribute' dictionaries, initially they all point to the
- parent class ResourceManager instances of those dictionaries.
- In order to make these templates independent from the parent's one,
- it is necessary re-initialize the corresponding dictionaries.
- This is the objective of the _clsinit method
+ """ ResourceManager classes have different attributes and traces.
+ Attribute and traces are stored in 'class attribute' dictionaries.
+ When a new ResourceManager class is created, the _clsinit method is
+ called to create a new instance of those dictionaries and initialize
+ them.
+
+ The _clsinit method is called by the clsinit decorator method.
+
"""
+
# static template for resource attributes
cls._attributes = dict()
cls._register_attributes()
@classmethod
def _clsinit_copy(cls):
- """ Same as _clsinit, except that it also inherits all attributes and traces
- from the parent class.
+ """ Same as _clsinit, except that after creating new instances of the
+ dictionaries it copies all the attributes and traces from the parent
+ class.
+
+ The _clsinit_copy method is called by the clsinit_copy decorator method.
+
"""
# static template for resource attributes
cls._attributes = copy.deepcopy(cls._attributes)
"""
return copy.deepcopy(cls._traces.values())
+ @classmethod
+ def get_help(cls):
+ """ Returns the description of the type of Resource
+
+ """
+ return cls._help
+
+ @classmethod
+ def get_backend(cls):
+ """ Returns the identified of the backend (i.e. testbed, environment)
+ for the Resource
+
+ """
+ return cls._backend
+
def __init__(self, ec, guid):
super(ResourceManager, self).__init__(self.rtype())
self._state = ResourceState.NEW
+ # instance lock to synchronize exclusive state change methods (such
+ # as deploy and release methods), in order to prevent them from being
+ # executed at the same time
+ self._release_lock = threading.Lock()
+
@property
def guid(self):
""" Returns the global unique identifier of the RM """
@property
def ec(self):
- """ Returns the Experiment Controller """
+ """ Returns the Experiment Controller of the RM """
return self._ec()
@property
def connections(self):
- """ Returns the set of guids of connected RMs"""
+ """ Returns the set of guids of connected RMs """
return self._connections
@property
def conditions(self):
""" Returns the conditions to which the RM is subjected to.
- The object returned by this method is a dictionary indexed by
- ResourceAction."""
+ This method returns a dictionary of conditions lists indexed by
+ a ResourceAction.
+
+ """
return self._conditions
@property
def start_time(self):
- """ Returns the start time of the RM as a timestamp"""
+ """ Returns the start time of the RM as a timestamp """
return self._start_time
@property
def stop_time(self):
- """ Returns the stop time of the RM as a timestamp"""
+ """ Returns the stop time of the RM as a timestamp """
return self._stop_time
@property
def discover_time(self):
- """ Returns the time discovering was finished for the RM as a timestamp"""
+ """ Returns the discover time of the RM as a timestamp """
return self._discover_time
@property
def provision_time(self):
- """ Returns the time provisioning was finished for the RM as a timestamp"""
+ """ Returns the provision time of the RM as a timestamp """
return self._provision_time
@property
def ready_time(self):
- """ Returns the time deployment was finished for the RM as a timestamp"""
+ """ Returns the deployment time of the RM as a timestamp """
return self._ready_time
@property
def release_time(self):
- """ Returns the release time of the RM as a timestamp"""
+ """ Returns the release time of the RM as a timestamp """
return self._release_time
@property
def finish_time(self):
- """ Returns the finalization time of the RM as a timestamp"""
+ """ Returns the finalization time of the RM as a timestamp """
return self._finish_time
@property
def failed_time(self):
- """ Returns the time failure occured for the RM as a timestamp"""
+ """ Returns the time failure occured for the RM as a timestamp """
return self._failed_time
@property
def state(self):
- """ Get the state of the current RM """
+ """ Get the current state of the RM """
return self._state
def log_message(self, msg):
:param msg: text message
:type msg: str
:rtype: str
+
"""
return " %s guid: %d - %s " % (self._rtype, self.guid, msg)
def register_connection(self, guid):
""" Registers a connection to the RM identified by guid
+ This method should not be overriden. Specific functionality
+ should be added in the do_connect method.
+
:param guid: Global unique identified of the RM to connect to
:type guid: int
+
"""
if self.valid_connection(guid):
- self.connect(guid)
+ self.do_connect(guid)
self._connections.add(guid)
def unregister_connection(self, guid):
""" Removes a registered connection to the RM identified by guid
+
+ This method should not be overriden. Specific functionality
+ should be added in the do_disconnect method.
:param guid: Global unique identified of the RM to connect to
:type guid: int
+
"""
if guid in self._connections:
- self.disconnect(guid)
+ self.do_disconnect(guid)
self._connections.remove(guid)
+ @failtrap
def discover(self):
""" Performs resource discovery.
-
- This method is resposible for selecting an individual resource
+
+ This method is responsible for selecting an individual resource
matching user requirements.
- This method should be redefined when necessary in child classes.
+
+ This method should not be overriden directly. Specific functionality
+ should be added in the do_discover method.
+
"""
- self.set_discovered()
+ with self._release_lock:
+ if self._state != ResourceState.RELEASED:
+ self.do_discover()
+ @failtrap
def provision(self):
""" Performs resource provisioning.
- This method is resposible for provisioning one resource.
+ This method is responsible for provisioning one resource.
After this method has been successfully invoked, the resource
- should be acccesible/controllable by the RM.
- This method should be redefined when necessary in child classes.
+ should be accessible/controllable by the RM.
+
+ This method should not be overriden directly. Specific functionality
+ should be added in the do_provision method.
+
"""
- self.set_provisioned()
+ with self._release_lock:
+ if self._state != ResourceState.RELEASED:
+ self.do_provision()
+ @failtrap
def start(self):
- """ Starts the resource.
-
- There is no generic start behavior for all resources.
- This method should be redefined when necessary in child classes.
+ """ Starts the RM (e.g. launch remote process).
+
+ There is no standard start behavior. Some RMs will not need to perform
+ any actions upon start.
+
+ This method should not be overriden directly. Specific functionality
+ should be added in the do_start method.
+
"""
if not self.state in [ResourceState.READY, ResourceState.STOPPED]:
self.error("Wrong state %s for start" % self.state)
return
- self.set_started()
+ with self._release_lock:
+ if self._state != ResourceState.RELEASED:
+ self.do_start()
+ @failtrap
def stop(self):
- """ Stops the resource.
-
- There is no generic stop behavior for all resources.
- This method should be redefined when necessary in child classes.
+ """ Interrupts the RM, stopping any tasks the RM was performing.
+
+ There is no standard stop behavior. Some RMs will not need to perform
+ any actions upon stop.
+
+ This method should not be overriden directly. Specific functionality
+ should be added in the do_stop method.
+
"""
if not self.state in [ResourceState.STARTED]:
self.error("Wrong state %s for stop" % self.state)
return
- self.set_stopped()
+ with self._release_lock:
+ self.do_stop()
+ @failtrap
def deploy(self):
- """ Execute all steps required for the RM to reach the state READY
+ """ Execute all steps required for the RM to reach the state READY.
+ This method is responsible for deploying the resource (and invoking
+ the discover and provision methods).
+
+ This method should not be overriden directly. Specific functionality
+ should be added in the do_deploy method.
+
"""
if self.state > ResourceState.READY:
self.error("Wrong state %s for deploy" % self.state)
return
- self.debug("----- READY ---- ")
- self.set_ready()
+ with self._release_lock:
+ if self._state != ResourceState.RELEASED:
+ self.do_deploy()
+ self.debug("----- READY ---- ")
def release(self):
- self.set_released()
-
+ """ Perform actions to free resources used by the RM.
+
+ This method is responsible for releasing resources that were
+ used during the experiment by the RM.
+
+ This method should not be overriden directly. Specific functionality
+ should be added in the do_release method.
+
+ """
+ with self._release_lock:
+ try:
+ self.do_release()
+ except:
+ import traceback
+ err = traceback.format_exc()
+ self.error(err)
+
+ self.set_released()
+ self.debug("----- RELEASED ---- ")
+
+ @failtrap
def finish(self):
- self.set_finished()
-
+ """ Sets the RM to state FINISHED.
+
+ The FINISHED state is different from STOPPED state in that it
+ should not be directly invoked by the user.
+ STOPPED indicates that the user interrupted the RM, FINISHED means
+ that the RM concluded normally the actions it was supposed to perform.
+
+ This method should not be overriden directly. Specific functionality
+ should be added in the do_finish method.
+
+ """
+ with self._release_lock:
+ if self._state != ResourceState.RELEASED:
+ self.do_finish()
+
def fail(self):
- self.set_failed()
+ """ Sets the RM to state FAILED.
+
+ This method should not be overriden directly. Specific functionality
+ should be added in the do_fail method.
+
+ """
+ with self._release_lock:
+ if self._state != ResourceState.RELEASED:
+ self.do_fail()
def set(self, name, value):
""" Set the value of the attribute
:type action: str
:param group: Group of RMs to wait for (list of guids)
:type group: int or list of int
- :param state: State to wait for on all RM in group. (either 'STARTED' or 'STOPPED')
+ :param state: State to wait for on all RM in group. (either 'STARTED', 'STOPPED' or 'READY')
:type state: str
:param time: Time to wait after 'state' is reached on all RMs in group. (e.g. '2s')
:type time: str
def unregister_condition(self, group, action = None):
""" Removed conditions for a certain group of guids
- :param action: Action to restrict to condition (either 'START' or 'STOP')
+ :param action: Action to restrict to condition (either 'START', 'STOP' or 'READY')
:type action: str
:param group: Group of RMs to wait for (list of guids)
:param group: Group of RMs to wait for (list of guids)
:type group: int or list of int
- :param state: State to wait for on all RM in group. (either 'STARTED' or 'STOPPED')
+ :param state: State to wait for on all RM in group. (either 'STARTED', 'STOPPED' or 'READY')
:type state: str
:param time: Time to wait after 'state' is reached on all RMs in group. (e.g. '2s')
:type time: str
elif state == ResourceState.STOPPED:
t = rm.stop_time
else:
- # Only keep time information for START and STOP
break
# time already elapsed since RM changed state
reschedule = False
delay = reschedule_delay
- ## evaluate if set conditions are met
+ ## evaluate if conditions to start are met
+ if self.ec.abort:
+ return
- # only can start when RM is either STOPPED or READY
+ # Can only start when RM is either STOPPED or READY
if self.state not in [ResourceState.STOPPED, ResourceState.READY]:
reschedule = True
self.debug("---- RESCHEDULING START ---- state %s " % self.state )
reschedule = False
delay = reschedule_delay
- ## evaluate if set conditions are met
+ ## evaluate if conditions to stop are met
+ if self.ec.abort:
+ return
# only can stop when RM is STARTED
if self.state != ResourceState.STARTED:
reschedule = True
+ self.debug("---- RESCHEDULING STOP ---- state %s " % self.state )
else:
self.debug(" ---- STOP CONDITIONS ---- %s" %
self.conditions.get(ResourceAction.STOP))
self.debug(" ----- STOPPING ---- ")
self.stop()
- def connect(self, guid):
+ def deploy_with_conditions(self):
+ """ Deploy RM when all the conditions in self.conditions for
+ action 'READY' are satisfied.
+
+ """
+ reschedule = False
+ delay = reschedule_delay
+
+ ## evaluate if conditions to deploy are met
+ if self.ec.abort:
+ return
+
+ # only can deploy when RM is either NEW, DISCOVERED or PROVISIONED
+ if self.state not in [ResourceState.NEW, ResourceState.DISCOVERED,
+ ResourceState.PROVISIONED]:
+ reschedule = True
+ self.debug("---- RESCHEDULING DEPLOY ---- state %s " % self.state )
+ else:
+ deploy_conditions = self.conditions.get(ResourceAction.DEPLOY, [])
+
+ self.debug("---- DEPLOY CONDITIONS ---- %s" % deploy_conditions)
+
+ # Verify all start conditions are met
+ for (group, state, time) in deploy_conditions:
+ # Uncomment for debug
+ #unmet = []
+ #for guid in group:
+ # rm = self.ec.get_resource(guid)
+ # unmet.append((guid, rm._state))
+ #
+ #self.debug("---- WAITED STATES ---- %s" % unmet )
+
+ reschedule, delay = self._needs_reschedule(group, state, time)
+ if reschedule:
+ break
+
+ if reschedule:
+ self.ec.schedule(delay, self.deploy_with_conditions)
+ else:
+ self.debug("----- STARTING ---- ")
+ self.deploy()
+
+ def do_connect(self, guid):
""" Performs actions that need to be taken upon associating RMs.
This method should be redefined when necessary in child classes.
"""
pass
- def disconnect(self, guid):
+ def do_disconnect(self, guid):
""" Performs actions that need to be taken upon disassociating RMs.
This method should be redefined when necessary in child classes.
"""
"""
# TODO: Validate!
return True
-
+
+ def do_discover(self):
+ self.set_discovered()
+
+ def do_provision(self):
+ self.set_provisioned()
+
+ def do_start(self):
+ self.set_started()
+
+ def do_stop(self):
+ self.set_stopped()
+
+ def do_deploy(self):
+ self.set_ready()
+
+ def do_release(self):
+ pass
+
+ def do_finish(self):
+ self.set_finished()
+
+ def do_fail(self):
+ self.set_failed()
+
def set_started(self):
""" Mark ResourceManager as STARTED """
- self._start_time = tnow()
- self._state = ResourceState.STARTED
+ self.set_state(ResourceState.STARTED, "_start_time")
def set_stopped(self):
""" Mark ResourceManager as STOPPED """
- self._stop_time = tnow()
- self._state = ResourceState.STOPPED
+ self.set_state(ResourceState.STOPPED, "_stop_time")
def set_ready(self):
""" Mark ResourceManager as READY """
- self._ready_time = tnow()
- self._state = ResourceState.READY
+ self.set_state(ResourceState.READY, "_ready_time")
def set_released(self):
""" Mark ResourceManager as REALEASED """
- self._release_time = tnow()
- self._state = ResourceState.RELEASED
+ self.set_state(ResourceState.RELEASED, "_release_time")
def set_finished(self):
""" Mark ResourceManager as FINISHED """
- self._finish_time = tnow()
- self._state = ResourceState.FINISHED
+ self.set_state(ResourceState.FINISHED, "_finish_time")
def set_failed(self):
""" Mark ResourceManager as FAILED """
- self._failed_time = tnow()
- self._state = ResourceState.FAILED
+ self.set_state(ResourceState.FAILED, "_failed_time")
def set_discovered(self):
""" Mark ResourceManager as DISCOVERED """
- self._discover_time = tnow()
- self._state = ResourceState.DISCOVERED
+ self.set_state(ResourceState.DISCOVERED, "_discover_time")
def set_provisioned(self):
""" Mark ResourceManager as PROVISIONED """
- self._provision_time = tnow()
- self._state = ResourceState.PROVISIONED
+ self.set_state(ResourceState.PROVISIONED, "_provision_time")
+
+ def set_state(self, state, state_time_attr):
+ # Ensure that RM state will not change after released
+ if self._state == ResourceState.RELEASED:
+ return
+
+ setattr(self, state_time_attr, tnow())
+ self._state = state
class ResourceFactory(object):
_resource_types = dict()
try:
# Notice: Repeated calls to load_module will act as a reload of teh module
- module = loader.load_module(modname)
+ if modname in sys.modules:
+ module = sys.modules.get(modname)
+ else:
+ module = loader.load_module(modname)
for attrname in dir(module):
if attrname.startswith("_"):
if issubclass(attr, ResourceManager):
types.append(attr)
+
+ if not modname in sys.modules:
+ sys.modules[modname] = module
+
except:
import traceback
import logging