merged ex_shutdown into nepi-3-dev
[nepi.git] / src / nepi / resources / planetlab / node.py
index 57196b3..eadcd0d 100644 (file)
@@ -24,6 +24,7 @@ from nepi.execution.resource import ResourceManager, clsinit_copy, \
 from nepi.resources.linux.node import LinuxNode
 from nepi.resources.planetlab.plcapi import PLCAPIFactory 
 from nepi.util.execfuncs import lexec
+from nepi.util import sshfuncs
 
 from random import randint
 import time
@@ -36,14 +37,7 @@ class PlanetlabNode(LinuxNode):
             "associated to a PlanetLab user account"
     _backend = "planetlab"
 
-    blacklist = list()
-    provisionlist = list()
-
-    lock_blist = threading.Lock()
-    lock_plist = threading.Lock()
-
-    lock_slice = threading.Lock()
-
+    lock = threading.Lock()
 
     @classmethod
     def _register_attributes(cls):
@@ -64,7 +58,7 @@ class PlanetlabNode(LinuxNode):
                     authenticate in the website) ",
                     flags = Flags.Credential)
 
-        pl_password = Attribute("password", 
+        pl_password = Attribute("plpassword", 
                         "PlanetLab account password, as \
                         the one to authenticate in the website) ",
                         flags = Flags.Credential)
@@ -98,13 +92,13 @@ class PlanetlabNode(LinuxNode):
                                         "other"],
                             flags = Flags.Filter)
 
-        site = Attribute("site", "Constrain the PlanetLab site this node \
-                should reside on.",
-                type = Types.Enumerate,
-                allowed = ["PLE",
-                            "PLC",
-                            "PLJ"],
-                flags = Flags.Filter)
+        #site = Attribute("site", "Constrain the PlanetLab site this node \
+        #        should reside on.",
+        #        type = Types.Enumerate,
+        #        allowed = ["PLE",
+        #                    "PLC",
+        #                    "PLJ"],
+        #        flags = Flags.Filter)
 
         min_reliability = Attribute("minReliability", "Constrain reliability \
                             while picking PlanetLab nodes. Specifies a lower \
@@ -178,7 +172,7 @@ class PlanetlabNode(LinuxNode):
         cls._register_attribute(pl_ptn)
         cls._register_attribute(pl_user)
         cls._register_attribute(pl_password)
-        cls._register_attribute(site)
+        #cls._register_attribute(site)
         cls._register_attribute(city)
         cls._register_attribute(country)
         cls._register_attribute(region)
@@ -204,7 +198,7 @@ class PlanetlabNode(LinuxNode):
     def plapi(self):
         if not self._plapi:
             pl_user = self.get("pluser")
-            pl_pass = self.get("password")
+            pl_pass = self.get("plpassword")
             pl_url = self.get("plcApiUrl")
             pl_ptn = self.get("plcApiPattern")
 
@@ -213,35 +207,40 @@ class PlanetlabNode(LinuxNode):
             
         return self._plapi
 
-    def discoverl(self):
+    def discover(self):
         """
         Based on the attributes defined by the user, discover the suitable nodes
         """
-        hostname = self.get("hostname")
+        hostname = self._get_hostname()
+        print self.guid, hostname 
         if hostname:
             # the user specified one particular node to be provisioned
             # check with PLCAPI if it is alvive
             node_id = self._query_if_alive(hostname=hostname)
             node_id = node_id.pop()
+            print self.guid, node_id
 
-            # check that the node is not blacklisted or already being provision 
+            # check that the node is not blacklisted or being provisioned
             # by other RM
-            blist = PlanetlabNode.blacklist
-            plist = PlanetlabNode.provisionlist
-            if node_id not in blist and node_id not in plist:
+            with PlanetlabNode.lock:
+                plist = self.plapi.reserved()
+                blist = self.plapi.blacklisted()
+                print self.guid,plist
+                print self.guid,blist
+                if node_id not in blist and node_id not in plist:
                 
-                # check that is really alive, by performing ping
-                ping_ok = self._do_ping(node_id)
-                if not ping_ok:
-                    self._blacklist_node(node_id)
-                    self.fail_node_not_alive(hostname)
-                else:
-                    self._node_to_provision = node_id
-                    self._put_node_in_provision(node_id)
-                    super(PlanetlabNode, self).discover()
+                    # check that is really alive, by performing ping
+                    ping_ok = self._do_ping(node_id)
+                    if not ping_ok:
+                        self._blacklist_node(node_id)
+                        self.fail_node_not_alive(hostname)
+                    else:
+                        self._put_node_in_provision(node_id)
+                        self._node_to_provision = node_id
+                        super(PlanetlabNode, self).discover()
                 
-            else:
-                self.fail_node_not_available(hostname)                
+                else:
+                    self.fail_node_not_available(hostname)
         
         else:
             # the user specifies constraints based on attributes, zero, one or 
@@ -258,18 +257,19 @@ class PlanetlabNode(LinuxNode):
             if nodes_inslice:
                 node_id = self._choose_random_node(nodes_inslice)
                 
-            if not node_id and nodes_not_inslice:
+            if not node_id:
                 # Either there were no matching nodes in the user's slice, or
                 # the nodes in the slice  were blacklisted or being provisioned
                 # by other RM. Note nodes_not_inslice is never empty
                 node_id = self._choose_random_node(nodes_not_inslice)
-            if not node_id:
-                self.fail_not_enough_nodes()
 
-            self._node_to_provision = node_id
-            super(PlanetlabNode, self).discover()
+            if node_id:
+                self._node_to_provision = node_id
+                super(PlanetlabNode, self).discover()
+            else:
+               self.fail_not_enough_nodes() 
             
-    def provisionl(self):
+    def provision(self):
         """
         Add node to user's slice after verifing that the node is functioning
         correctly
@@ -277,11 +277,20 @@ class PlanetlabNode(LinuxNode):
         provision_ok = False
         ssh_ok = False
         proc_ok = False
-        timeout = 1200
+        timeout = 120
 
         while not provision_ok:
             node = self._node_to_provision
-            self._set_hostname_attr(node)
+            # Adding try catch to set hostname because sometimes MyPLC fails
+            # when trying to retrive node's hostname
+            try:
+                self._set_hostname_attr(node)
+            except:
+                with PlanetlabNode.lock:
+                    self._blacklist_node(node)
+                self.discover()
+                continue
+
             self._add_node_to_slice(node)
             
             # check ssh connection
@@ -302,8 +311,10 @@ class PlanetlabNode(LinuxNode):
                 # the timeout was reach without establishing ssh connection
                 # the node is blacklisted, deleted from the slice, and a new
                 # node to provision is discovered
-                self._blacklist_node(node)
-                self._delete_node_from_slice(node)
+                with PlanetlabNode.lock:
+                    self._blacklist_node(node)
+                    self._delete_node_from_slice(node)
+                self.set('hostname', None)
                 self.discover()
                 continue
             
@@ -312,8 +323,10 @@ class PlanetlabNode(LinuxNode):
                 cmd = 'mount |grep proc'
                 ((out, err), proc) = self.execute(cmd)
                 if out.find("/proc type proc") < 0:
-                    self._blacklist_node(node)
-                    self._delete_node_from_slice(node)
+                    with PlanetlabNode.lock:
+                        self._blacklist_node(node)
+                        self._delete_node_from_slice(node)
+                    self.set('hostname', None)
                     self.discover()
                     continue
             
@@ -368,6 +381,11 @@ class PlanetlabNode(LinuxNode):
                 # filter nodes by range constraints e.g. max bandwidth
                 elif ('min' or 'max') in attr_name:
                     nodes_id = self._filter_by_range_attr(attr_name, attr_value, filters, nodes_id)
+
+        if not filters:
+            nodes = self.plapi.get_nodes()
+            for node in nodes:
+                nodes_id.append(node['node_id'])
                 
         return nodes_id
                     
@@ -473,7 +491,7 @@ class PlanetlabNode(LinuxNode):
         filters['run_level'] = 'boot'
         filters['boot_state'] = 'boot'
         filters['node_type'] = 'regular' 
-        filters['>last_contact'] =  int(time.time()) - 2*3600
+        #filters['>last_contact'] =  int(time.time()) - 2*3600
 
         # adding node_id or hostname to the filters to check for the particular
         # node
@@ -485,7 +503,7 @@ class PlanetlabNode(LinuxNode):
             alive_nodes_id = self._get_nodes_id(filters)
 
         if len(alive_nodes_id) == 0:
-            self.fail_discovery()
+            self.fail_node_not_alive(self, hostname)
         else:
             nodes_id = list()
             for node_id in alive_nodes_id:
@@ -499,9 +517,6 @@ class PlanetlabNode(LinuxNode):
         From the possible nodes for provision, choose randomly to decrese the
         probability of different RMs choosing the same node for provision
         """
-        blist = PlanetlabNode.blacklist
-        plist = PlanetlabNode.provisionlist
-
         size = len(nodes)
         while size:
             size = size - 1
@@ -511,22 +526,28 @@ class PlanetlabNode(LinuxNode):
 
             # check the node is not blacklisted or being provision by other RM
             # and perform ping to check that is really alive
-            if node_id not in blist and node_id not in plist:
-                ping_ok = self._do_ping(node_id)
-                if not ping_ok:
-                    self._blacklist_node(node_id)
-                else:
-                    # discovered node for provision, added to provision list
-                    self._put_node_in_provision(node_id)
-                    return node_id
+            with PlanetlabNode.lock:
+
+                blist = self.plapi.blacklisted()
+                plist = self.plapi.reserved()
+                if node_id not in blist and node_id not in plist:
+                    ping_ok = self._do_ping(node_id)
+                    print " ### ping_ok #### %s guid %s" % (ping_ok, self.guid)
+                    if not ping_ok:
+                        self._blacklist_node(node_id)
+                    else:
+                        # discovered node for provision, added to provision list
+                        self._put_node_in_provision(node_id)
+                        print "node_id %s , guid %s" % (node_id, self.guid)
+                        return node_id
 
     def _get_nodes_id(self, filters):
         return self.plapi.get_nodes(filters, fields=['node_id'])
 
     def _add_node_to_slice(self, node_id):
-        self.warn(" Adding node to slice ")
+        self.info(" Selected node to provision ")
         slicename = self.get("username")
-        with PlanetlabNode.lock_slice:
+        with PlanetlabNode.lock:
             slice_nodes = self.plapi.get_slice_nodes(slicename)
             slice_nodes.append(node_id)
             self.plapi.add_slice_nodes(slicename, slice_nodes)
@@ -536,6 +557,17 @@ class PlanetlabNode(LinuxNode):
         slicename = self.get("username")
         self.plapi.delete_slice_node(slicename, [node])
 
+    def _get_hostname(self):
+        hostname = self.get("hostname")
+        ip = self.get("ip")
+        if hostname:
+            return hostname
+        elif ip:
+            hostname = sshfuncs.gethostbyname(ip)
+            return hostname
+        else:
+            return None
+
     def _set_hostname_attr(self, node):
         """
         Query PLCAPI for the hostname of a certain node id and sets the
@@ -552,7 +584,6 @@ class PlanetlabNode(LinuxNode):
         slicename = self.get("username")
         slice_nodes = self.plapi.get_slice_nodes(slicename)
         nodes_inslice = list(set(nodes_id) & set(slice_nodes))
-
         return nodes_inslice
 
     def _do_ping(self, node_id):
@@ -561,41 +592,43 @@ class PlanetlabNode(LinuxNode):
         """
         ping_ok = False
         ip = self._get_ip(node_id)
-        command = "ping -c2 %s | echo \"PING OK\"" % ip
+        print "ip de do_ping %s, guid %s" % (ip, self.guid)
+        if not ip: return ping_ok
+
+        command = "ping -c2 %s" % ip
 
         (out, err) = lexec(command)
-        if not out.find("PING OK") < 0:
+        print "out de do_ping %s, guid %s" % (out, self.guid)
+        if not out.find("2 received") < 0:
             ping_ok = True
-
+        
+        print "ping_ok de do_ping %s, guid %s" % (ping_ok, self.guid)
         return ping_ok 
 
     def _blacklist_node(self, node):
         """
         Add node mal functioning node to blacklist
         """
-        blist = PlanetlabNode.blacklist
-
         self.warn(" Blacklisting malfunctioning node ")
-        with PlanetlabNode.lock_blist:
-            blist.append(node)
+        self._plapi.blacklist_host(node)
 
     def _put_node_in_provision(self, node):
         """
         Add node to the list of nodes being provisioned, in order for other RMs
         to not try to provision the same one again
         """
-        plist = PlanetlabNode.provisionlist
-
-        self.warn(" Provisioning node ")
-        with PlanetlabNode.lock_plist:
-            plist.append(node)
+        self._plapi.reserve_host(node)
 
     def _get_ip(self, node_id):
         """
         Query PLCAPI for the IP of a node with certain node id
         """
-        ip = self.plapi.get_interfaces({'node_id':node_id}, fields=['ip'])
-        ip = ip[0]['ip']
+        hostname = self.plapi.get_nodes(node_id, ['hostname'])[0]
+        print "#### HOSTNAME ##### %s ### guid %s " % (hostname['hostname'], self.guid)
+        ip = sshfuncs.gethostbyname(hostname['hostname'])
+        if not ip:
+            # Fail while trying to find the IP
+            return None
         return ip
 
     def fail_discovery(self):
@@ -604,16 +637,18 @@ class PlanetlabNode(LinuxNode):
         self.error(msg)
         raise RuntimeError, msg
 
-    def fail_node_not_alive(self, hostname):
-        msg = "Node %s not alive, pick another node" % hostname
+    def fail_node_not_alive(self, hostname=None):
+        self.fail()
+        msg = "Node %s not alive" % hostname
         raise RuntimeError, msg
     
     def fail_node_not_available(self, hostname):
-        msg = "Node %s not available for provisioning, pick another \
-                node" % hostname
+        self.fail()
+        msg = "Node %s not available for provisioning" % hostname
         raise RuntimeError, msg
 
     def fail_not_enough_nodes(self):
+        self.fail()
         msg = "Not enough nodes available for provisioning"
         raise RuntimeError, msg