X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=src%2Fnepi%2Fresources%2Fplanetlab%2Fnode.py;h=dc70df5632e7fa6972876489916f6192b32ac5e4;hb=ac866efb762875550bdc0c05d693e5eb026f435e;hp=34e54886a3434401fb55e0d116de8f6585ca43e0;hpb=b8197bb5e325aaed5f3b1b418a9d152e29b03e56;p=nepi.git diff --git a/src/nepi/resources/planetlab/node.py b/src/nepi/resources/planetlab/node.py index 34e54886..dc70df56 100644 --- a/src/nepi/resources/planetlab/node.py +++ b/src/nepi/resources/planetlab/node.py @@ -20,17 +20,20 @@ from nepi.execution.attribute import Attribute, Flags, Types from nepi.execution.resource import ResourceManager, clsinit_copy, \ - ResourceState, reschedule_delay + ResourceState from nepi.resources.linux.node import LinuxNode from nepi.resources.planetlab.plcapi import PLCAPIFactory from nepi.util.execfuncs import lexec from nepi.util import sshfuncs from random import randint +import re +import os import time import socket import threading import datetime +import weakref @clsinit_copy class PlanetlabNode(LinuxNode): @@ -94,14 +97,6 @@ class PlanetlabNode(LinuxNode): "other"], flags = Flags.Filter) - #site = Attribute("site", "Constrain the PlanetLab site this node \ - # should reside on.", - # type = Types.Enumerate, - # allowed = ["PLE", - # "PLC", - # "PLJ"], - # flags = Flags.Filter) - min_reliability = Attribute("minReliability", "Constrain reliability \ while picking PlanetLab nodes. Specifies a lower \ acceptable bound.", @@ -169,21 +164,19 @@ class PlanetlabNode(LinuxNode): "year"], flags = Flags.Filter) -# plblacklist = Attribute("blacklist", "Take into account the file plblacklist \ -# in the user's home directory under .nepi directory. This file \ -# contains a list of PL nodes to blacklist, and at the end \ -# of the experiment execution the new blacklisted nodes are added.", -# type = Types.Bool, -# default = True, -# flags = Flags.ReadOnly) -# + plblacklist = Attribute("persist_blacklist", "Take into account the file plblacklist \ + in the user's home directory under .nepi directory. This file \ + contains a list of PL nodes to blacklist, and at the end \ + of the experiment execution the new blacklisted nodes are added.", + type = Types.Bool, + default = False, + flags = Flags.Global) cls._register_attribute(ip) cls._register_attribute(pl_url) cls._register_attribute(pl_ptn) cls._register_attribute(pl_user) cls._register_attribute(pl_password) - #cls._register_attribute(site) cls._register_attribute(city) cls._register_attribute(country) cls._register_attribute(region) @@ -198,10 +191,12 @@ class PlanetlabNode(LinuxNode): cls._register_attribute(min_cpu) cls._register_attribute(max_cpu) cls._register_attribute(timeframe) + cls._register_attribute(plblacklist) def __init__(self, ec, guid): super(PlanetlabNode, self).__init__(ec, guid) + self._ecobj = weakref.ref(ec) self._plapi = None self._node_to_provision = None self._slicenode = False @@ -211,6 +206,16 @@ class PlanetlabNode(LinuxNode): self.set("gateway", None) self.set("gatewayUser", None) + # Blacklist file + nepi_home = os.path.join(os.path.expanduser("~"), ".nepi") + plblacklist_file = os.path.join(nepi_home, "plblacklist.txt") + if not os.path.exists(plblacklist_file): + if os.path.isdir(nepi_home): + open(plblacklist_file, 'w').close() + else: + os.makedirs(nepi_home) + open(plblacklist_file, 'w').close() + def _skip_provision(self): pl_user = self.get("pluser") pl_pass = self.get("plpassword") @@ -225,14 +230,15 @@ class PlanetlabNode(LinuxNode): pl_pass = self.get("plpassword") pl_url = self.get("plcApiUrl") pl_ptn = self.get("plcApiPattern") - - self._plapi = PLCAPIFactory.get_api(pl_user, pl_pass, pl_url, - pl_ptn) + _plapi = PLCAPIFactory.get_api(pl_user, pl_pass, pl_url, + pl_ptn, self._ecobj()) - if not self._plapi: + if not _plapi: self.fail_plapi() + + self._plapi = weakref.ref(_plapi) - return self._plapi + return self._plapi() def do_discover(self): """ @@ -324,7 +330,15 @@ class PlanetlabNode(LinuxNode): node = self._node_to_provision if not self._slicenode: self._add_node_to_slice(node) - + if self._check_if_in_slice([node]): + self.debug( "Node added to slice" ) + else: + self.warning(" Could not add to slice ") + with PlanetlabNode.lock: + self._blacklist_node(node) + self.do_discover() + continue + # check ssh connection t = 0 while t < timeout and not ssh_ok: @@ -332,10 +346,12 @@ class PlanetlabNode(LinuxNode): cmd = 'echo \'GOOD NODE\'' ((out, err), proc) = self.execute(cmd) if out.find("GOOD NODE") < 0: + self.debug( "No SSH connection, waiting 60s" ) t = t + 60 time.sleep(60) continue else: + self.debug( "SSH OK" ) ssh_ok = True continue else: @@ -349,7 +365,7 @@ class PlanetlabNode(LinuxNode): # the node is blacklisted, deleted from the slice, and a new # node to provision is discovered with PlanetlabNode.lock: - self.warn(" Could not SSH login ") + self.warning(" Could not SSH login ") self._blacklist_node(node) #self._delete_node_from_slice(node) self.do_discover() @@ -365,7 +381,7 @@ class PlanetlabNode(LinuxNode): if out1.find("/proc type proc") < 0 or \ "Read-only file system".lower() in err2.lower(): with PlanetlabNode.lock: - self.warn(" Corrupted file system ") + self.warning(" Corrupted file system ") self._blacklist_node(node) #self._delete_node_from_slice(node) self.do_discover() @@ -382,6 +398,12 @@ class PlanetlabNode(LinuxNode): super(PlanetlabNode, self).do_provision() + def do_release(self): + super(PlanetlabNode, self).do_release() + if self.state == ResourceState.RELEASED and not self._skip_provision(): + self.debug(" Releasing PLC API ") + self.plapi.release() + def _filter_based_on_attributes(self): """ Retrive the list of nodes ids that match user's constraints @@ -394,7 +416,6 @@ class PlanetlabNode(LinuxNode): 'region' : 'region', 'architecture' : 'arch', 'operatingSystem' : 'fcdistro', - #'site' : 'pldistro', 'minReliability' : 'reliability%s' % timeframe, 'maxReliability' : 'reliability%s' % timeframe, 'minBandwidth' : 'bw%s' % timeframe, @@ -537,7 +558,7 @@ class PlanetlabNode(LinuxNode): ping_ok = self._do_ping(node_id) if not ping_ok: self._set_hostname_attr(node_id) - self.warn(" Node not responding PING ") + self.warning(" Node not responding PING ") self._blacklist_node(node_id) else: # discovered node for provision, added to provision list @@ -552,20 +573,21 @@ class PlanetlabNode(LinuxNode): slicename = self.get("username") with PlanetlabNode.lock: slice_nodes = self.plapi.get_slice_nodes(slicename) + self.debug(" Previous slice nodes %s " % slice_nodes) slice_nodes.append(node_id) self.plapi.add_slice_nodes(slicename, slice_nodes) def _delete_node_from_slice(self, node): - self.warn(" Deleting node from slice ") + self.warning(" Deleting node from slice ") slicename = self.get("username") self.plapi.delete_slice_node(slicename, [node]) def _get_hostname(self): hostname = self.get("hostname") - ip = self.get("ip") if hostname: return hostname - elif ip: + ip = self.get("ip") + if ip: hostname = socket.gethostbyaddr(ip)[0] self.set('hostname', hostname) return hostname @@ -596,22 +618,21 @@ class PlanetlabNode(LinuxNode): """ ping_ok = False ip = self._get_ip(node_id) - if not ip: return ping_ok - - command = "ping -c4 %s" % ip - - (out, err) = lexec(command) - if not str(out).find("2 received") or not str(out).find("3 received") or not \ - str(out).find("4 received") < 0: - ping_ok = True - + if ip: + command = "ping -c4 %s" % ip + (out, err) = lexec(command) + + m = re.search("(\d+)% packet loss", str(out)) + if m and int(m.groups()[0]) < 50: + ping_ok = True + return ping_ok def _blacklist_node(self, node): """ Add node mal functioning node to blacklist """ - self.warn(" Blacklisting malfunctioning node ") + self.warning(" Blacklisting malfunctioning node ") self.plapi.blacklist_host(node) if not self._hostname: self.set('hostname', None) @@ -627,9 +648,10 @@ class PlanetlabNode(LinuxNode): """ Query PLCAPI for the IP of a node with certain node id """ - hostname = self.plapi.get_nodes(node_id, ['hostname'])[0] + hostname = self.get("hostname") or \ + self.plapi.get_nodes(node_id, ['hostname'])[0]['hostname'] try: - ip = sshfuncs.gethostbyname(hostname['hostname']) + ip = sshfuncs.gethostbyname(hostname) except: # Fail while trying to find the IP return None