Changing reschedule_delay internals
[nepi.git] / src / nepi / resources / planetlab / node.py
index 34e5488..dc70df5 100644 (file)
 
 from nepi.execution.attribute import Attribute, Flags, Types
 from nepi.execution.resource import ResourceManager, clsinit_copy, \
 
 from nepi.execution.attribute import Attribute, Flags, Types
 from nepi.execution.resource import ResourceManager, clsinit_copy, \
-        ResourceState, reschedule_delay 
+        ResourceState 
 from nepi.resources.linux.node import LinuxNode
 from nepi.resources.planetlab.plcapi import PLCAPIFactory 
 from nepi.util.execfuncs import lexec
 from nepi.util import sshfuncs
 
 from random import randint
 from nepi.resources.linux.node import LinuxNode
 from nepi.resources.planetlab.plcapi import PLCAPIFactory 
 from nepi.util.execfuncs import lexec
 from nepi.util import sshfuncs
 
 from random import randint
+import re
+import os
 import time
 import socket
 import threading
 import datetime
 import time
 import socket
 import threading
 import datetime
+import weakref
 
 @clsinit_copy
 class PlanetlabNode(LinuxNode):
 
 @clsinit_copy
 class PlanetlabNode(LinuxNode):
@@ -94,14 +97,6 @@ class PlanetlabNode(LinuxNode):
                                         "other"],
                             flags = Flags.Filter)
 
                                         "other"],
                             flags = Flags.Filter)
 
-        #site = Attribute("site", "Constrain the PlanetLab site this node \
-        #        should reside on.",
-        #        type = Types.Enumerate,
-        #        allowed = ["PLE",
-        #                    "PLC",
-        #                    "PLJ"],
-        #        flags = Flags.Filter)
-
         min_reliability = Attribute("minReliability", "Constrain reliability \
                             while picking PlanetLab nodes. Specifies a lower \
                             acceptable bound.",
         min_reliability = Attribute("minReliability", "Constrain reliability \
                             while picking PlanetLab nodes. Specifies a lower \
                             acceptable bound.",
@@ -169,21 +164,19 @@ class PlanetlabNode(LinuxNode):
                                     "year"],
                         flags = Flags.Filter)
 
                                     "year"],
                         flags = Flags.Filter)
 
-#        plblacklist = Attribute("blacklist", "Take into account the file plblacklist \
-#                        in the user's home directory under .nepi directory. This file \
-#                        contains a list of PL nodes to blacklist, and at the end \
-#                        of the experiment execution the new blacklisted nodes are added.",
-#                    type = Types.Bool,
-#                    default = True,
-#                    flags = Flags.ReadOnly)
-#
+        plblacklist = Attribute("persist_blacklist", "Take into account the file plblacklist \
+                        in the user's home directory under .nepi directory. This file \
+                        contains a list of PL nodes to blacklist, and at the end \
+                        of the experiment execution the new blacklisted nodes are added.",
+                    type = Types.Bool,
+                    default = False,
+                    flags = Flags.Global)
 
         cls._register_attribute(ip)
         cls._register_attribute(pl_url)
         cls._register_attribute(pl_ptn)
         cls._register_attribute(pl_user)
         cls._register_attribute(pl_password)
 
         cls._register_attribute(ip)
         cls._register_attribute(pl_url)
         cls._register_attribute(pl_ptn)
         cls._register_attribute(pl_user)
         cls._register_attribute(pl_password)
-        #cls._register_attribute(site)
         cls._register_attribute(city)
         cls._register_attribute(country)
         cls._register_attribute(region)
         cls._register_attribute(city)
         cls._register_attribute(country)
         cls._register_attribute(region)
@@ -198,10 +191,12 @@ class PlanetlabNode(LinuxNode):
         cls._register_attribute(min_cpu)
         cls._register_attribute(max_cpu)
         cls._register_attribute(timeframe)
         cls._register_attribute(min_cpu)
         cls._register_attribute(max_cpu)
         cls._register_attribute(timeframe)
+        cls._register_attribute(plblacklist)
 
     def __init__(self, ec, guid):
         super(PlanetlabNode, self).__init__(ec, guid)
 
 
     def __init__(self, ec, guid):
         super(PlanetlabNode, self).__init__(ec, guid)
 
+        self._ecobj = weakref.ref(ec)
         self._plapi = None
         self._node_to_provision = None
         self._slicenode = False
         self._plapi = None
         self._node_to_provision = None
         self._slicenode = False
@@ -211,6 +206,16 @@ class PlanetlabNode(LinuxNode):
             self.set("gateway", None)
             self.set("gatewayUser", None)
 
             self.set("gateway", None)
             self.set("gatewayUser", None)
 
+        # Blacklist file
+        nepi_home = os.path.join(os.path.expanduser("~"), ".nepi")
+        plblacklist_file = os.path.join(nepi_home, "plblacklist.txt")
+        if not os.path.exists(plblacklist_file):
+            if os.path.isdir(nepi_home):
+                open(plblacklist_file, 'w').close()
+            else:
+                os.makedirs(nepi_home)
+                open(plblacklist_file, 'w').close()
+
     def _skip_provision(self):
         pl_user = self.get("pluser")
         pl_pass = self.get("plpassword")
     def _skip_provision(self):
         pl_user = self.get("pluser")
         pl_pass = self.get("plpassword")
@@ -225,14 +230,15 @@ class PlanetlabNode(LinuxNode):
             pl_pass = self.get("plpassword")
             pl_url = self.get("plcApiUrl")
             pl_ptn = self.get("plcApiPattern")
             pl_pass = self.get("plpassword")
             pl_url = self.get("plcApiUrl")
             pl_ptn = self.get("plcApiPattern")
-
-            self._plapi =  PLCAPIFactory.get_api(pl_user, pl_pass, pl_url,
-                pl_ptn)
+            _plapi = PLCAPIFactory.get_api(pl_user, pl_pass, pl_url,
+                pl_ptn, self._ecobj())
             
             
-            if not self._plapi:
+            if not _plapi:
                 self.fail_plapi()
                 self.fail_plapi()
+        
+            self._plapi = weakref.ref(_plapi)
 
 
-        return self._plapi
+        return self._plapi()
 
     def do_discover(self):
         """
 
     def do_discover(self):
         """
@@ -324,7 +330,15 @@ class PlanetlabNode(LinuxNode):
             node = self._node_to_provision
             if not self._slicenode:
                 self._add_node_to_slice(node)
             node = self._node_to_provision
             if not self._slicenode:
                 self._add_node_to_slice(node)
-            
+                if self._check_if_in_slice([node]):
+                    self.debug( "Node added to slice" )
+                else:
+                    self.warning(" Could not add to slice ")
+                    with PlanetlabNode.lock:
+                        self._blacklist_node(node)
+                    self.do_discover()
+                    continue
+
                 # check ssh connection
                 t = 0 
                 while t < timeout and not ssh_ok:
                 # check ssh connection
                 t = 0 
                 while t < timeout and not ssh_ok:
@@ -332,10 +346,12 @@ class PlanetlabNode(LinuxNode):
                     cmd = 'echo \'GOOD NODE\''
                     ((out, err), proc) = self.execute(cmd)
                     if out.find("GOOD NODE") < 0:
                     cmd = 'echo \'GOOD NODE\''
                     ((out, err), proc) = self.execute(cmd)
                     if out.find("GOOD NODE") < 0:
+                        self.debug( "No SSH connection, waiting 60s" )
                         t = t + 60
                         time.sleep(60)
                         continue
                     else:
                         t = t + 60
                         time.sleep(60)
                         continue
                     else:
+                        self.debug( "SSH OK" )
                         ssh_ok = True
                         continue
             else:
                         ssh_ok = True
                         continue
             else:
@@ -349,7 +365,7 @@ class PlanetlabNode(LinuxNode):
                 # the node is blacklisted, deleted from the slice, and a new
                 # node to provision is discovered
                 with PlanetlabNode.lock:
                 # the node is blacklisted, deleted from the slice, and a new
                 # node to provision is discovered
                 with PlanetlabNode.lock:
-                    self.warn(" Could not SSH login ")
+                    self.warning(" Could not SSH login ")
                     self._blacklist_node(node)
                     #self._delete_node_from_slice(node)
                 self.do_discover()
                     self._blacklist_node(node)
                     #self._delete_node_from_slice(node)
                 self.do_discover()
@@ -365,7 +381,7 @@ class PlanetlabNode(LinuxNode):
                 if out1.find("/proc type proc") < 0 or \
                     "Read-only file system".lower() in err2.lower():
                     with PlanetlabNode.lock:
                 if out1.find("/proc type proc") < 0 or \
                     "Read-only file system".lower() in err2.lower():
                     with PlanetlabNode.lock:
-                        self.warn(" Corrupted file system ")
+                        self.warning(" Corrupted file system ")
                         self._blacklist_node(node)
                         #self._delete_node_from_slice(node)
                     self.do_discover()
                         self._blacklist_node(node)
                         #self._delete_node_from_slice(node)
                     self.do_discover()
@@ -382,6 +398,12 @@ class PlanetlabNode(LinuxNode):
             
         super(PlanetlabNode, self).do_provision()
 
             
         super(PlanetlabNode, self).do_provision()
 
+    def do_release(self):
+        super(PlanetlabNode, self).do_release()
+        if self.state == ResourceState.RELEASED and not self._skip_provision():
+            self.debug(" Releasing PLC API ")
+            self.plapi.release()
+
     def _filter_based_on_attributes(self):
         """
         Retrive the list of nodes ids that match user's constraints 
     def _filter_based_on_attributes(self):
         """
         Retrive the list of nodes ids that match user's constraints 
@@ -394,7 +416,6 @@ class PlanetlabNode(LinuxNode):
             'region' : 'region',
             'architecture' : 'arch',
             'operatingSystem' : 'fcdistro',
             'region' : 'region',
             'architecture' : 'arch',
             'operatingSystem' : 'fcdistro',
-            #'site' : 'pldistro',
             'minReliability' : 'reliability%s' % timeframe,
             'maxReliability' : 'reliability%s' % timeframe,
             'minBandwidth' : 'bw%s' % timeframe,
             'minReliability' : 'reliability%s' % timeframe,
             'maxReliability' : 'reliability%s' % timeframe,
             'minBandwidth' : 'bw%s' % timeframe,
@@ -537,7 +558,7 @@ class PlanetlabNode(LinuxNode):
                     ping_ok = self._do_ping(node_id)
                     if not ping_ok:
                         self._set_hostname_attr(node_id)
                     ping_ok = self._do_ping(node_id)
                     if not ping_ok:
                         self._set_hostname_attr(node_id)
-                        self.warn(" Node not responding PING ")
+                        self.warning(" Node not responding PING ")
                         self._blacklist_node(node_id)
                     else:
                         # discovered node for provision, added to provision list
                         self._blacklist_node(node_id)
                     else:
                         # discovered node for provision, added to provision list
@@ -552,20 +573,21 @@ class PlanetlabNode(LinuxNode):
         slicename = self.get("username")
         with PlanetlabNode.lock:
             slice_nodes = self.plapi.get_slice_nodes(slicename)
         slicename = self.get("username")
         with PlanetlabNode.lock:
             slice_nodes = self.plapi.get_slice_nodes(slicename)
+            self.debug(" Previous slice nodes %s " % slice_nodes)
             slice_nodes.append(node_id)
             self.plapi.add_slice_nodes(slicename, slice_nodes)
 
     def _delete_node_from_slice(self, node):
             slice_nodes.append(node_id)
             self.plapi.add_slice_nodes(slicename, slice_nodes)
 
     def _delete_node_from_slice(self, node):
-        self.warn(" Deleting node from slice ")
+        self.warning(" Deleting node from slice ")
         slicename = self.get("username")
         self.plapi.delete_slice_node(slicename, [node])
 
     def _get_hostname(self):
         hostname = self.get("hostname")
         slicename = self.get("username")
         self.plapi.delete_slice_node(slicename, [node])
 
     def _get_hostname(self):
         hostname = self.get("hostname")
-        ip = self.get("ip")
         if hostname:
             return hostname
         if hostname:
             return hostname
-        elif ip:
+        ip = self.get("ip")
+        if ip:
             hostname = socket.gethostbyaddr(ip)[0]
             self.set('hostname', hostname)
             return hostname
             hostname = socket.gethostbyaddr(ip)[0]
             self.set('hostname', hostname)
             return hostname
@@ -596,22 +618,21 @@ class PlanetlabNode(LinuxNode):
         """
         ping_ok = False
         ip = self._get_ip(node_id)
         """
         ping_ok = False
         ip = self._get_ip(node_id)
-        if not ip: return ping_ok
-
-        command = "ping -c4 %s" % ip
-
-        (out, err) = lexec(command)
-        if not str(out).find("2 received") or not str(out).find("3 received") or not \
-            str(out).find("4 received") < 0:
-            ping_ok = True
-        
+        if ip:
+            command = "ping -c4 %s" % ip
+            (out, err) = lexec(command)
+
+            m = re.search("(\d+)% packet loss", str(out))
+            if m and int(m.groups()[0]) < 50:
+                ping_ok = True
+       
         return ping_ok 
 
     def _blacklist_node(self, node):
         """
         Add node mal functioning node to blacklist
         """
         return ping_ok 
 
     def _blacklist_node(self, node):
         """
         Add node mal functioning node to blacklist
         """
-        self.warn(" Blacklisting malfunctioning node ")
+        self.warning(" Blacklisting malfunctioning node ")
         self.plapi.blacklist_host(node)
         if not self._hostname:
             self.set('hostname', None)
         self.plapi.blacklist_host(node)
         if not self._hostname:
             self.set('hostname', None)
@@ -627,9 +648,10 @@ class PlanetlabNode(LinuxNode):
         """
         Query PLCAPI for the IP of a node with certain node id
         """
         """
         Query PLCAPI for the IP of a node with certain node id
         """
-        hostname = self.plapi.get_nodes(node_id, ['hostname'])[0]
+        hostname = self.get("hostname") or \
+            self.plapi.get_nodes(node_id, ['hostname'])[0]['hostname']
         try:
         try:
-            ip = sshfuncs.gethostbyname(hostname['hostname'])
+            ip = sshfuncs.gethostbyname(hostname)
         except:
             # Fail while trying to find the IP
             return None
         except:
             # Fail while trying to find the IP
             return None