Changing reschedule_delay internals
[nepi.git] / src / nepi / resources / planetlab / node.py
index 34e5488..dc70df5 100644 (file)
 
 from nepi.execution.attribute import Attribute, Flags, Types
 from nepi.execution.resource import ResourceManager, clsinit_copy, \
-        ResourceState, reschedule_delay 
+        ResourceState 
 from nepi.resources.linux.node import LinuxNode
 from nepi.resources.planetlab.plcapi import PLCAPIFactory 
 from nepi.util.execfuncs import lexec
 from nepi.util import sshfuncs
 
 from random import randint
+import re
+import os
 import time
 import socket
 import threading
 import datetime
+import weakref
 
 @clsinit_copy
 class PlanetlabNode(LinuxNode):
@@ -94,14 +97,6 @@ class PlanetlabNode(LinuxNode):
                                         "other"],
                             flags = Flags.Filter)
 
-        #site = Attribute("site", "Constrain the PlanetLab site this node \
-        #        should reside on.",
-        #        type = Types.Enumerate,
-        #        allowed = ["PLE",
-        #                    "PLC",
-        #                    "PLJ"],
-        #        flags = Flags.Filter)
-
         min_reliability = Attribute("minReliability", "Constrain reliability \
                             while picking PlanetLab nodes. Specifies a lower \
                             acceptable bound.",
@@ -169,21 +164,19 @@ class PlanetlabNode(LinuxNode):
                                     "year"],
                         flags = Flags.Filter)
 
-#        plblacklist = Attribute("blacklist", "Take into account the file plblacklist \
-#                        in the user's home directory under .nepi directory. This file \
-#                        contains a list of PL nodes to blacklist, and at the end \
-#                        of the experiment execution the new blacklisted nodes are added.",
-#                    type = Types.Bool,
-#                    default = True,
-#                    flags = Flags.ReadOnly)
-#
+        plblacklist = Attribute("persist_blacklist", "Take into account the file plblacklist \
+                        in the user's home directory under .nepi directory. This file \
+                        contains a list of PL nodes to blacklist, and at the end \
+                        of the experiment execution the new blacklisted nodes are added.",
+                    type = Types.Bool,
+                    default = False,
+                    flags = Flags.Global)
 
         cls._register_attribute(ip)
         cls._register_attribute(pl_url)
         cls._register_attribute(pl_ptn)
         cls._register_attribute(pl_user)
         cls._register_attribute(pl_password)
-        #cls._register_attribute(site)
         cls._register_attribute(city)
         cls._register_attribute(country)
         cls._register_attribute(region)
@@ -198,10 +191,12 @@ class PlanetlabNode(LinuxNode):
         cls._register_attribute(min_cpu)
         cls._register_attribute(max_cpu)
         cls._register_attribute(timeframe)
+        cls._register_attribute(plblacklist)
 
     def __init__(self, ec, guid):
         super(PlanetlabNode, self).__init__(ec, guid)
 
+        self._ecobj = weakref.ref(ec)
         self._plapi = None
         self._node_to_provision = None
         self._slicenode = False
@@ -211,6 +206,16 @@ class PlanetlabNode(LinuxNode):
             self.set("gateway", None)
             self.set("gatewayUser", None)
 
+        # Blacklist file
+        nepi_home = os.path.join(os.path.expanduser("~"), ".nepi")
+        plblacklist_file = os.path.join(nepi_home, "plblacklist.txt")
+        if not os.path.exists(plblacklist_file):
+            if os.path.isdir(nepi_home):
+                open(plblacklist_file, 'w').close()
+            else:
+                os.makedirs(nepi_home)
+                open(plblacklist_file, 'w').close()
+
     def _skip_provision(self):
         pl_user = self.get("pluser")
         pl_pass = self.get("plpassword")
@@ -225,14 +230,15 @@ class PlanetlabNode(LinuxNode):
             pl_pass = self.get("plpassword")
             pl_url = self.get("plcApiUrl")
             pl_ptn = self.get("plcApiPattern")
-
-            self._plapi =  PLCAPIFactory.get_api(pl_user, pl_pass, pl_url,
-                pl_ptn)
+            _plapi = PLCAPIFactory.get_api(pl_user, pl_pass, pl_url,
+                pl_ptn, self._ecobj())
             
-            if not self._plapi:
+            if not _plapi:
                 self.fail_plapi()
+        
+            self._plapi = weakref.ref(_plapi)
 
-        return self._plapi
+        return self._plapi()
 
     def do_discover(self):
         """
@@ -324,7 +330,15 @@ class PlanetlabNode(LinuxNode):
             node = self._node_to_provision
             if not self._slicenode:
                 self._add_node_to_slice(node)
-            
+                if self._check_if_in_slice([node]):
+                    self.debug( "Node added to slice" )
+                else:
+                    self.warning(" Could not add to slice ")
+                    with PlanetlabNode.lock:
+                        self._blacklist_node(node)
+                    self.do_discover()
+                    continue
+
                 # check ssh connection
                 t = 0 
                 while t < timeout and not ssh_ok:
@@ -332,10 +346,12 @@ class PlanetlabNode(LinuxNode):
                     cmd = 'echo \'GOOD NODE\''
                     ((out, err), proc) = self.execute(cmd)
                     if out.find("GOOD NODE") < 0:
+                        self.debug( "No SSH connection, waiting 60s" )
                         t = t + 60
                         time.sleep(60)
                         continue
                     else:
+                        self.debug( "SSH OK" )
                         ssh_ok = True
                         continue
             else:
@@ -349,7 +365,7 @@ class PlanetlabNode(LinuxNode):
                 # the node is blacklisted, deleted from the slice, and a new
                 # node to provision is discovered
                 with PlanetlabNode.lock:
-                    self.warn(" Could not SSH login ")
+                    self.warning(" Could not SSH login ")
                     self._blacklist_node(node)
                     #self._delete_node_from_slice(node)
                 self.do_discover()
@@ -365,7 +381,7 @@ class PlanetlabNode(LinuxNode):
                 if out1.find("/proc type proc") < 0 or \
                     "Read-only file system".lower() in err2.lower():
                     with PlanetlabNode.lock:
-                        self.warn(" Corrupted file system ")
+                        self.warning(" Corrupted file system ")
                         self._blacklist_node(node)
                         #self._delete_node_from_slice(node)
                     self.do_discover()
@@ -382,6 +398,12 @@ class PlanetlabNode(LinuxNode):
             
         super(PlanetlabNode, self).do_provision()
 
+    def do_release(self):
+        super(PlanetlabNode, self).do_release()
+        if self.state == ResourceState.RELEASED and not self._skip_provision():
+            self.debug(" Releasing PLC API ")
+            self.plapi.release()
+
     def _filter_based_on_attributes(self):
         """
         Retrive the list of nodes ids that match user's constraints 
@@ -394,7 +416,6 @@ class PlanetlabNode(LinuxNode):
             'region' : 'region',
             'architecture' : 'arch',
             'operatingSystem' : 'fcdistro',
-            #'site' : 'pldistro',
             'minReliability' : 'reliability%s' % timeframe,
             'maxReliability' : 'reliability%s' % timeframe,
             'minBandwidth' : 'bw%s' % timeframe,
@@ -537,7 +558,7 @@ class PlanetlabNode(LinuxNode):
                     ping_ok = self._do_ping(node_id)
                     if not ping_ok:
                         self._set_hostname_attr(node_id)
-                        self.warn(" Node not responding PING ")
+                        self.warning(" Node not responding PING ")
                         self._blacklist_node(node_id)
                     else:
                         # discovered node for provision, added to provision list
@@ -552,20 +573,21 @@ class PlanetlabNode(LinuxNode):
         slicename = self.get("username")
         with PlanetlabNode.lock:
             slice_nodes = self.plapi.get_slice_nodes(slicename)
+            self.debug(" Previous slice nodes %s " % slice_nodes)
             slice_nodes.append(node_id)
             self.plapi.add_slice_nodes(slicename, slice_nodes)
 
     def _delete_node_from_slice(self, node):
-        self.warn(" Deleting node from slice ")
+        self.warning(" Deleting node from slice ")
         slicename = self.get("username")
         self.plapi.delete_slice_node(slicename, [node])
 
     def _get_hostname(self):
         hostname = self.get("hostname")
-        ip = self.get("ip")
         if hostname:
             return hostname
-        elif ip:
+        ip = self.get("ip")
+        if ip:
             hostname = socket.gethostbyaddr(ip)[0]
             self.set('hostname', hostname)
             return hostname
@@ -596,22 +618,21 @@ class PlanetlabNode(LinuxNode):
         """
         ping_ok = False
         ip = self._get_ip(node_id)
-        if not ip: return ping_ok
-
-        command = "ping -c4 %s" % ip
-
-        (out, err) = lexec(command)
-        if not str(out).find("2 received") or not str(out).find("3 received") or not \
-            str(out).find("4 received") < 0:
-            ping_ok = True
-        
+        if ip:
+            command = "ping -c4 %s" % ip
+            (out, err) = lexec(command)
+
+            m = re.search("(\d+)% packet loss", str(out))
+            if m and int(m.groups()[0]) < 50:
+                ping_ok = True
+       
         return ping_ok 
 
     def _blacklist_node(self, node):
         """
         Add node mal functioning node to blacklist
         """
-        self.warn(" Blacklisting malfunctioning node ")
+        self.warning(" Blacklisting malfunctioning node ")
         self.plapi.blacklist_host(node)
         if not self._hostname:
             self.set('hostname', None)
@@ -627,9 +648,10 @@ class PlanetlabNode(LinuxNode):
         """
         Query PLCAPI for the IP of a node with certain node id
         """
-        hostname = self.plapi.get_nodes(node_id, ['hostname'])[0]
+        hostname = self.get("hostname") or \
+            self.plapi.get_nodes(node_id, ['hostname'])[0]['hostname']
         try:
-            ip = sshfuncs.gethostbyname(hostname['hostname'])
+            ip = sshfuncs.gethostbyname(hostname)
         except:
             # Fail while trying to find the IP
             return None