X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=src%2Fnepi%2Fresources%2Fplanetlab%2Fnode.py;h=eadcd0df4be25905ca2bf65a738dd21eef6045e2;hb=450b5dd0a993f63eb2ec34bbc656c558572eb44c;hp=57196b35f75446f6717563e5675fcc9c3e0d16d7;hpb=99d8b2a4431d8fafd0385e189375106d46f1abd9;p=nepi.git diff --git a/src/nepi/resources/planetlab/node.py b/src/nepi/resources/planetlab/node.py index 57196b35..eadcd0df 100644 --- a/src/nepi/resources/planetlab/node.py +++ b/src/nepi/resources/planetlab/node.py @@ -24,6 +24,7 @@ from nepi.execution.resource import ResourceManager, clsinit_copy, \ from nepi.resources.linux.node import LinuxNode from nepi.resources.planetlab.plcapi import PLCAPIFactory from nepi.util.execfuncs import lexec +from nepi.util import sshfuncs from random import randint import time @@ -36,14 +37,7 @@ class PlanetlabNode(LinuxNode): "associated to a PlanetLab user account" _backend = "planetlab" - blacklist = list() - provisionlist = list() - - lock_blist = threading.Lock() - lock_plist = threading.Lock() - - lock_slice = threading.Lock() - + lock = threading.Lock() @classmethod def _register_attributes(cls): @@ -64,7 +58,7 @@ class PlanetlabNode(LinuxNode): authenticate in the website) ", flags = Flags.Credential) - pl_password = Attribute("password", + pl_password = Attribute("plpassword", "PlanetLab account password, as \ the one to authenticate in the website) ", flags = Flags.Credential) @@ -98,13 +92,13 @@ class PlanetlabNode(LinuxNode): "other"], flags = Flags.Filter) - site = Attribute("site", "Constrain the PlanetLab site this node \ - should reside on.", - type = Types.Enumerate, - allowed = ["PLE", - "PLC", - "PLJ"], - flags = Flags.Filter) + #site = Attribute("site", "Constrain the PlanetLab site this node \ + # should reside on.", + # type = Types.Enumerate, + # allowed = ["PLE", + # "PLC", + # "PLJ"], + # flags = Flags.Filter) min_reliability = Attribute("minReliability", "Constrain reliability \ while picking PlanetLab nodes. Specifies a lower \ @@ -178,7 +172,7 @@ class PlanetlabNode(LinuxNode): cls._register_attribute(pl_ptn) cls._register_attribute(pl_user) cls._register_attribute(pl_password) - cls._register_attribute(site) + #cls._register_attribute(site) cls._register_attribute(city) cls._register_attribute(country) cls._register_attribute(region) @@ -204,7 +198,7 @@ class PlanetlabNode(LinuxNode): def plapi(self): if not self._plapi: pl_user = self.get("pluser") - pl_pass = self.get("password") + pl_pass = self.get("plpassword") pl_url = self.get("plcApiUrl") pl_ptn = self.get("plcApiPattern") @@ -213,35 +207,40 @@ class PlanetlabNode(LinuxNode): return self._plapi - def discoverl(self): + def discover(self): """ Based on the attributes defined by the user, discover the suitable nodes """ - hostname = self.get("hostname") + hostname = self._get_hostname() + print self.guid, hostname if hostname: # the user specified one particular node to be provisioned # check with PLCAPI if it is alvive node_id = self._query_if_alive(hostname=hostname) node_id = node_id.pop() + print self.guid, node_id - # check that the node is not blacklisted or already being provision + # check that the node is not blacklisted or being provisioned # by other RM - blist = PlanetlabNode.blacklist - plist = PlanetlabNode.provisionlist - if node_id not in blist and node_id not in plist: + with PlanetlabNode.lock: + plist = self.plapi.reserved() + blist = self.plapi.blacklisted() + print self.guid,plist + print self.guid,blist + if node_id not in blist and node_id not in plist: - # check that is really alive, by performing ping - ping_ok = self._do_ping(node_id) - if not ping_ok: - self._blacklist_node(node_id) - self.fail_node_not_alive(hostname) - else: - self._node_to_provision = node_id - self._put_node_in_provision(node_id) - super(PlanetlabNode, self).discover() + # check that is really alive, by performing ping + ping_ok = self._do_ping(node_id) + if not ping_ok: + self._blacklist_node(node_id) + self.fail_node_not_alive(hostname) + else: + self._put_node_in_provision(node_id) + self._node_to_provision = node_id + super(PlanetlabNode, self).discover() - else: - self.fail_node_not_available(hostname) + else: + self.fail_node_not_available(hostname) else: # the user specifies constraints based on attributes, zero, one or @@ -258,18 +257,19 @@ class PlanetlabNode(LinuxNode): if nodes_inslice: node_id = self._choose_random_node(nodes_inslice) - if not node_id and nodes_not_inslice: + if not node_id: # Either there were no matching nodes in the user's slice, or # the nodes in the slice were blacklisted or being provisioned # by other RM. Note nodes_not_inslice is never empty node_id = self._choose_random_node(nodes_not_inslice) - if not node_id: - self.fail_not_enough_nodes() - self._node_to_provision = node_id - super(PlanetlabNode, self).discover() + if node_id: + self._node_to_provision = node_id + super(PlanetlabNode, self).discover() + else: + self.fail_not_enough_nodes() - def provisionl(self): + def provision(self): """ Add node to user's slice after verifing that the node is functioning correctly @@ -277,11 +277,20 @@ class PlanetlabNode(LinuxNode): provision_ok = False ssh_ok = False proc_ok = False - timeout = 1200 + timeout = 120 while not provision_ok: node = self._node_to_provision - self._set_hostname_attr(node) + # Adding try catch to set hostname because sometimes MyPLC fails + # when trying to retrive node's hostname + try: + self._set_hostname_attr(node) + except: + with PlanetlabNode.lock: + self._blacklist_node(node) + self.discover() + continue + self._add_node_to_slice(node) # check ssh connection @@ -302,8 +311,10 @@ class PlanetlabNode(LinuxNode): # the timeout was reach without establishing ssh connection # the node is blacklisted, deleted from the slice, and a new # node to provision is discovered - self._blacklist_node(node) - self._delete_node_from_slice(node) + with PlanetlabNode.lock: + self._blacklist_node(node) + self._delete_node_from_slice(node) + self.set('hostname', None) self.discover() continue @@ -312,8 +323,10 @@ class PlanetlabNode(LinuxNode): cmd = 'mount |grep proc' ((out, err), proc) = self.execute(cmd) if out.find("/proc type proc") < 0: - self._blacklist_node(node) - self._delete_node_from_slice(node) + with PlanetlabNode.lock: + self._blacklist_node(node) + self._delete_node_from_slice(node) + self.set('hostname', None) self.discover() continue @@ -368,6 +381,11 @@ class PlanetlabNode(LinuxNode): # filter nodes by range constraints e.g. max bandwidth elif ('min' or 'max') in attr_name: nodes_id = self._filter_by_range_attr(attr_name, attr_value, filters, nodes_id) + + if not filters: + nodes = self.plapi.get_nodes() + for node in nodes: + nodes_id.append(node['node_id']) return nodes_id @@ -473,7 +491,7 @@ class PlanetlabNode(LinuxNode): filters['run_level'] = 'boot' filters['boot_state'] = 'boot' filters['node_type'] = 'regular' - filters['>last_contact'] = int(time.time()) - 2*3600 + #filters['>last_contact'] = int(time.time()) - 2*3600 # adding node_id or hostname to the filters to check for the particular # node @@ -485,7 +503,7 @@ class PlanetlabNode(LinuxNode): alive_nodes_id = self._get_nodes_id(filters) if len(alive_nodes_id) == 0: - self.fail_discovery() + self.fail_node_not_alive(self, hostname) else: nodes_id = list() for node_id in alive_nodes_id: @@ -499,9 +517,6 @@ class PlanetlabNode(LinuxNode): From the possible nodes for provision, choose randomly to decrese the probability of different RMs choosing the same node for provision """ - blist = PlanetlabNode.blacklist - plist = PlanetlabNode.provisionlist - size = len(nodes) while size: size = size - 1 @@ -511,22 +526,28 @@ class PlanetlabNode(LinuxNode): # check the node is not blacklisted or being provision by other RM # and perform ping to check that is really alive - if node_id not in blist and node_id not in plist: - ping_ok = self._do_ping(node_id) - if not ping_ok: - self._blacklist_node(node_id) - else: - # discovered node for provision, added to provision list - self._put_node_in_provision(node_id) - return node_id + with PlanetlabNode.lock: + + blist = self.plapi.blacklisted() + plist = self.plapi.reserved() + if node_id not in blist and node_id not in plist: + ping_ok = self._do_ping(node_id) + print " ### ping_ok #### %s guid %s" % (ping_ok, self.guid) + if not ping_ok: + self._blacklist_node(node_id) + else: + # discovered node for provision, added to provision list + self._put_node_in_provision(node_id) + print "node_id %s , guid %s" % (node_id, self.guid) + return node_id def _get_nodes_id(self, filters): return self.plapi.get_nodes(filters, fields=['node_id']) def _add_node_to_slice(self, node_id): - self.warn(" Adding node to slice ") + self.info(" Selected node to provision ") slicename = self.get("username") - with PlanetlabNode.lock_slice: + with PlanetlabNode.lock: slice_nodes = self.plapi.get_slice_nodes(slicename) slice_nodes.append(node_id) self.plapi.add_slice_nodes(slicename, slice_nodes) @@ -536,6 +557,17 @@ class PlanetlabNode(LinuxNode): slicename = self.get("username") self.plapi.delete_slice_node(slicename, [node]) + def _get_hostname(self): + hostname = self.get("hostname") + ip = self.get("ip") + if hostname: + return hostname + elif ip: + hostname = sshfuncs.gethostbyname(ip) + return hostname + else: + return None + def _set_hostname_attr(self, node): """ Query PLCAPI for the hostname of a certain node id and sets the @@ -552,7 +584,6 @@ class PlanetlabNode(LinuxNode): slicename = self.get("username") slice_nodes = self.plapi.get_slice_nodes(slicename) nodes_inslice = list(set(nodes_id) & set(slice_nodes)) - return nodes_inslice def _do_ping(self, node_id): @@ -561,41 +592,43 @@ class PlanetlabNode(LinuxNode): """ ping_ok = False ip = self._get_ip(node_id) - command = "ping -c2 %s | echo \"PING OK\"" % ip + print "ip de do_ping %s, guid %s" % (ip, self.guid) + if not ip: return ping_ok + + command = "ping -c2 %s" % ip (out, err) = lexec(command) - if not out.find("PING OK") < 0: + print "out de do_ping %s, guid %s" % (out, self.guid) + if not out.find("2 received") < 0: ping_ok = True - + + print "ping_ok de do_ping %s, guid %s" % (ping_ok, self.guid) return ping_ok def _blacklist_node(self, node): """ Add node mal functioning node to blacklist """ - blist = PlanetlabNode.blacklist - self.warn(" Blacklisting malfunctioning node ") - with PlanetlabNode.lock_blist: - blist.append(node) + self._plapi.blacklist_host(node) def _put_node_in_provision(self, node): """ Add node to the list of nodes being provisioned, in order for other RMs to not try to provision the same one again """ - plist = PlanetlabNode.provisionlist - - self.warn(" Provisioning node ") - with PlanetlabNode.lock_plist: - plist.append(node) + self._plapi.reserve_host(node) def _get_ip(self, node_id): """ Query PLCAPI for the IP of a node with certain node id """ - ip = self.plapi.get_interfaces({'node_id':node_id}, fields=['ip']) - ip = ip[0]['ip'] + hostname = self.plapi.get_nodes(node_id, ['hostname'])[0] + print "#### HOSTNAME ##### %s ### guid %s " % (hostname['hostname'], self.guid) + ip = sshfuncs.gethostbyname(hostname['hostname']) + if not ip: + # Fail while trying to find the IP + return None return ip def fail_discovery(self): @@ -604,16 +637,18 @@ class PlanetlabNode(LinuxNode): self.error(msg) raise RuntimeError, msg - def fail_node_not_alive(self, hostname): - msg = "Node %s not alive, pick another node" % hostname + def fail_node_not_alive(self, hostname=None): + self.fail() + msg = "Node %s not alive" % hostname raise RuntimeError, msg def fail_node_not_available(self, hostname): - msg = "Node %s not available for provisioning, pick another \ - node" % hostname + self.fail() + msg = "Node %s not available for provisioning" % hostname raise RuntimeError, msg def fail_not_enough_nodes(self): + self.fail() msg = "Not enough nodes available for provisioning" raise RuntimeError, msg