src/nepi/execution/runner.py

   1 #
   2 #    NEPI, a framework to manage network experiments
   3 #    Copyright (C) 2013 INRIA
   4 #
   5 #    This program is free software: you can redistribute it and/or modify
   6 #    it under the terms of the GNU General Public License as published by
   7 #    the Free Software Foundation, either version 3 of the License, or
   8 #    (at your option) any later version.
   9 #
  10 #    This program is distributed in the hope that it will be useful,
  11 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 #    GNU General Public License for more details.
  14 #
  15 #    You should have received a copy of the GNU General Public License
  16 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  17 #
  18 # Author: Alina Quereilhac <alina.quereilhac@inria.fr>
  19
  20 from nepi.execution.ec import ExperimentController, ECState
  21
  22 import math
  23 import numpy
  24 import os
  25 import time
  26
  27 class ExperimentRunner(object):
  28     """ The ExperimentRunner entity is responsible of
  29     re-running an experiment described by an ExperimentController
  30     multiple time
  31
  32     """
  33     def __init__(self):
  34         super(ExperimentRunner, self).__init__()
  35
  36     def run(self, ec, min_runs = 1, max_runs = -1, wait_time = 0,
  37             wait_guids = [], compute_metric_callback = None,
  38             evaluate_convergence_callback = None ):
  39         """ Run a same experiment independently multiple times, until the
  40         evaluate_convergence_callback function returns True
  41
  42         :param ec: Description of experiment to replicate.
  43             The runner takes care of deploying the EC, so ec.deploy()
  44             must not be invoked directly before or after invoking
  45             runner.run().
  46         :type ec: ExperimentController
  47
  48         :param min_runs: Minimum number of times the experiment must be
  49             replicated
  50         :type min_runs: int
  51
  52         :param max_runs: Maximum number of times the experiment can be
  53             replicated
  54         :type max_runs: int
  55
  56         :param wait_time: Time to wait in seconds on each run between invoking
  57             ec.deploy() and ec.release().
  58         :type wait_time: float
  59
  60         :param wait_guids: List of guids wait for finalization on each run.
  61             This list is passed to ec.wait_finished()
  62         :type wait_guids: list
  63
  64         :param compute_metric_callback: User defined function invoked after
  65             each experiment run to compute a metric. The metric is usually
  66             a network measurement obtained from the data collected
  67             during experiment execution.
  68             The function is invoked passing the ec and the run number as arguments.
  69             It must return the value for the computed metric(s) (usually a single
  70             numerical value, but it can be several).
  71
  72                 metric = compute_metric_callback(ec, run)
  73
  74         :type compute_metric_callback: function
  75
  76         :param evaluate_convergence_callback: User defined function invoked after
  77             computing the metric on each run, to evaluate the experiment was
  78             run enough times. It takes the list of cumulated metrics produced by
  79             the compute_metric_callback up to the current run, and decided
  80             whether the metrics have statistically converged to a meaningful value
  81             or not. It must return either True or False.
  82
  83                 stop = evaluate_convergence_callback(ec, run, metrics)
  84
  85             If stop is True, then the runner will exit.
  86
  87         :type evaluate_convergence_callback: function
  88
  89         """
  90
  91         if (not max_runs or max_runs < 0) and not compute_metric_callback:
  92             msg = "Undefined STOP condition, set stop_callback or max_runs"
  93             raise RuntimeError, msg
  94
  95         if compute_metric_callback and not evaluate_convergence_callback:
  96             evaluate_convergence_callback = self.evaluate_normal_convergence
  97             ec.logger.info(" Treating data as normal to evaluate convergence. "
  98                     "Experiment will stop when the standard error with 95% "
  99                     "confidence interval is >= 5% of the mean of the collected samples ")
 100
 101         # Force persistence of experiment controller
 102         ec._persist = True
 103
 104         filepath = ec.save(dirpath = ec.exp_dir)
 105
 106         samples = []
 107         run = 0
 108         stop = False
 109
 110         while not stop:
 111             run += 1
 112
 113             ec = self.run_experiment(filepath, wait_time, wait_guids)
 114
 115             ec.logger.info(" RUN %d \n" % run)
 116
 117             if compute_metric_callback:
 118                 metric = compute_metric_callback(ec, run)
 119                 if metric is not None:
 120                     samples.append(metric)
 121
 122                     if run >= min_runs and evaluate_convergence_callback:
 123                         if evaluate_convergence_callback(ec, run, samples):
 124                             stop = True
 125
 126             if run >= min_runs and max_runs > -1 and run >= max_runs :
 127                 stop = True
 128
 129             ec.shutdown()
 130             del ec
 131
 132         return run
 133
 134     def evaluate_normal_convergence(self, ec, run, metrics):
 135         """ Returns True when the confidence interval of the sample mean is
 136         less than 5% of the mean value, for a 95% confidence level,
 137         assuming normal distribution of the data
 138         """
 139
 140         if len(metrics) == 0:
 141             msg = "0 samples collected"
 142             raise RuntimeError, msg
 143
 144         x = numpy.array(metrics)
 145         n = len(metrics)
 146         std = x.std()
 147         se = std / math.sqrt(n)
 148         m = x.mean()
 149
 150         # Confidence interval for 95% confidence level,
 151         # assuming normally distributed data.
 152         ci95 = se * 2
 153
 154         ec.logger.info(" RUN %d - SAMPLES %d MEAN %.2f STD %.2f CI (95%%) %.2f \n" % (
 155             run, n, m, std, ci95 ) )
 156
 157         return m * 0.05 >= ci95
 158
 159     def run_experiment(self, filepath, wait_time, wait_guids):
 160         """ Run an experiment based on the description stored
 161         in filepath.
 162
 163         """
 164         ec = ExperimentController.load(filepath)
 165
 166         ec.deploy()
 167
 168         ec.wait_finished(wait_guids)
 169         time.sleep(wait_time)
 170
 171         ec.release()
 172
 173         if ec.state == ECState.FAILED:
 174             raise RuntimeError, "Experiment failed"
 175
 176         return ec
 177