From 56e6fc254d9c11b459e7f8771343fa360939e1ab Mon Sep 17 00:00:00 2001 From: Claudio-Daniel Freire Date: Thu, 23 Jun 2011 14:09:27 +0200 Subject: [PATCH] Remember blacklisting of nodes, and accelerate detection of unresponsive nodes if they haven't been provisioned recently --- src/nepi/testbeds/planetlab/execute.py | 47 ++++++++++++++++++++++++-- src/nepi/testbeds/planetlab/node.py | 4 +-- src/nepi/util/environ.py | 15 +++++++- 3 files changed, 61 insertions(+), 5 deletions(-) diff --git a/src/nepi/testbeds/planetlab/execute.py b/src/nepi/testbeds/planetlab/execute.py index 3a1b65ab..63b035e3 100644 --- a/src/nepi/testbeds/planetlab/execute.py +++ b/src/nepi/testbeds/planetlab/execute.py @@ -6,6 +6,7 @@ from nepi.core import testbed_impl from nepi.util.constants import TIME_NOW from nepi.util.graphtools import mst from nepi.util import ipaddr2 +from nepi.util import environ import sys import os import os.path @@ -39,6 +40,9 @@ class TestbedController(testbed_impl.TestbedController): self._app = application self._blacklist = set() + self._just_provisioned = set() + + self._load_blacklist() @property def home_directory(self): @@ -71,7 +75,34 @@ class TestbedController(testbed_impl.TestbedController): # If it wasn't found, don't remember this failure, keep trying return None return self._slice_id - + + def _load_blacklist(self): + blpath = environ.homepath('plblacklist') + + try: + bl = open(blpath, "r") + except: + self._blacklist = set() + return + + try: + self._blacklist = set( + map(int, + map(str.strip, bl.readlines()) + ) + ) + finally: + bl.close() + + def _save_blacklist(self): + blpath = environ.homepath('plblacklist') + bl = open(blpath, "w") + try: + bl.writelines( + map('%s\n'.__mod__, self._blacklist)) + finally: + bl.close() + def do_setup(self): self._home_directory = self._attributes.\ get_attribute_value("homeDirectory") @@ -151,6 +182,7 @@ class TestbedController(testbed_impl.TestbedController): elif not candidates: # Try again including unassigned nodes candidates = node.find_candidates() + candidates -= reserved if len(candidates) > 1: continue if len(candidates) == 1: @@ -201,6 +233,7 @@ class TestbedController(testbed_impl.TestbedController): self.plapi.UpdateSlice(self.slicename, nodes=new_nodes) # cleanup + self._just_provisioned = self._to_provision del self._to_provision def do_wait_nodes(self): @@ -220,7 +253,9 @@ class TestbedController(testbed_impl.TestbedController): print "Waiting for Node", guid, "configured at", node.hostname, sys.stdout.flush() - node.wait_provisioning() + node.wait_provisioning( + (20*60 if node._node_id in self._just_provisioned else 60) + ) print "READY" except self._node.UnresponsiveNodeError: @@ -235,6 +270,14 @@ class TestbedController(testbed_impl.TestbedController): print "Blacklisting", node.hostname, "for unresponsiveness" self._blacklist.add(node._node_id) node.unassign_node() + + try: + self._save_blacklist() + except: + # not important... + import traceback + traceback.print_exc() + raise def do_spanning_deployment_plan(self): diff --git a/src/nepi/testbeds/planetlab/node.py b/src/nepi/testbeds/planetlab/node.py index 9e56c32f..e534257f 100644 --- a/src/nepi/testbeds/planetlab/node.py +++ b/src/nepi/testbeds/planetlab/node.py @@ -335,7 +335,7 @@ class Node(object): if proc.wait(): raise RuntimeError, "Failed to set up application: %s %s" % (out,err,) - def wait_provisioning(self): + def wait_provisioning(self, timeout = 20*60): # recently provisioned nodes may not be up yet sleeptime = 1.0 totaltime = 0.0 @@ -344,7 +344,7 @@ class Node(object): totaltime += sleeptime sleeptime = min(30.0, sleeptime*1.5) - if totaltime > 20*60: + if totaltime > timeout: # PlanetLab has a 15' delay on configuration propagation # If we're above that delay, the unresponsiveness is not due # to this delay. diff --git a/src/nepi/util/environ.py b/src/nepi/util/environ.py index ced1239b..3426ec64 100644 --- a/src/nepi/util/environ.py +++ b/src/nepi/util/environ.py @@ -1,6 +1,6 @@ # vim:ts=4:sw=4:et:ai:sts=4 -import os, subprocess +import os, subprocess, os.path __all__ = ["python", "ssh_path"] __all__ += ["rsh", "tcpdump_path", "sshd_path"] @@ -57,3 +57,16 @@ def backticks(cmd): raise RuntimeError("Error executing `%s': %s" % (" ".join(cmd), err)) return out +def homepath(path, app='.nepi', mode = 0500): + home = os.environ.get('HOME') + if home is None: + home = os.path.join(os.sep, 'home', os.getlogin()) + + path = os.path.join(home, app, path) + dirname = os.path.dirname(path) + if not os.path.exists(dirname): + os.makedirs(dirname) + + return path + + -- 2.47.0