Corrections to chapters 1 and 2
[nepi.git] / src / nepi / resources / planetlab / node.py
index c518fab..2e4c2bd 100644 (file)
@@ -19,8 +19,8 @@
 #         Lucia Guevgeozian <lucia.guevgeozian_odizzio@inria.fr>
 
 from nepi.execution.attribute import Attribute, Flags, Types
-from nepi.execution.resource import ResourceManager, clsinit_copy, ResourceState, \
-        reschedule_delay
+from nepi.execution.resource import ResourceManager, clsinit_copy, \
+        ResourceState, reschedule_delay 
 from nepi.resources.linux.node import LinuxNode
 from nepi.resources.planetlab.plcapi import PLCAPIFactory 
 from nepi.util.execfuncs import lexec
@@ -29,6 +29,7 @@ from nepi.util import sshfuncs
 from random import randint
 import time
 import threading
+import datetime
 
 @clsinit_copy
 class PlanetlabNode(LinuxNode):
@@ -187,13 +188,13 @@ class PlanetlabNode(LinuxNode):
         cls._register_attribute(min_cpu)
         cls._register_attribute(max_cpu)
         cls._register_attribute(timeframe)
-        
 
     def __init__(self, ec, guid):
         super(PlanetlabNode, self).__init__(ec, guid)
 
         self._plapi = None
         self._node_to_provision = None
+        self._slicenode = False
     
     @property
     def plapi(self):
@@ -206,28 +207,28 @@ class PlanetlabNode(LinuxNode):
             self._plapi =  PLCAPIFactory.get_api(pl_user, pl_pass, pl_url,
                     pl_ptn)
             
+            if not self._plapi:
+                self.fail_plapi()
+
         return self._plapi
 
-    def discover(self):
+    def do_discover(self):
         """
-        Based on the attributes defined by the user, discover the suitable nodes
+        Based on the attributes defined by the user, discover the suitable 
+        nodes for provision.
         """
         hostname = self._get_hostname()
-        print self.guid, hostname 
         if hostname:
             # the user specified one particular node to be provisioned
             # check with PLCAPI if it is alvive
             node_id = self._query_if_alive(hostname=hostname)
             node_id = node_id.pop()
-            print self.guid, node_id
 
             # check that the node is not blacklisted or being provisioned
             # by other RM
             with PlanetlabNode.lock:
                 plist = self.plapi.reserved()
                 blist = self.plapi.blacklisted()
-                print self.guid,plist
-                print self.guid,blist
                 if node_id not in blist and node_id not in plist:
                 
                     # check that is really alive, by performing ping
@@ -236,9 +237,11 @@ class PlanetlabNode(LinuxNode):
                         self._blacklist_node(node_id)
                         self.fail_node_not_alive(hostname)
                     else:
+                        if self._check_if_in_slice([node_id]):
+                            self._slicenode = True
                         self._put_node_in_provision(node_id)
                         self._node_to_provision = node_id
-                        super(PlanetlabNode, self).discover()
+                        super(PlanetlabNode, self).do_discover()
                 
                 else:
                     self.fail_node_not_available(hostname)
@@ -257,20 +260,30 @@ class PlanetlabNode(LinuxNode):
             node_id = None
             if nodes_inslice:
                 node_id = self._choose_random_node(nodes_inslice)
+                self._slicenode = True                
                 
             if not node_id:
                 # Either there were no matching nodes in the user's slice, or
                 # the nodes in the slice  were blacklisted or being provisioned
                 # by other RM. Note nodes_not_inslice is never empty
                 node_id = self._choose_random_node(nodes_not_inslice)
+                self._slicenode = False
 
             if node_id:
                 self._node_to_provision = node_id
-                super(PlanetlabNode, self).discover()
+                try:
+                    self._set_hostname_attr(node_id)
+                    self.info(" Selected node to provision ")
+                except:
+                    with PlanetlabNode.lock:
+                        self._blacklist_node(node_id)
+                        self.do_discover()
+                super(PlanetlabNode, self).do_discover()
             else:
                self.fail_not_enough_nodes() 
             
-    def provision(self):
+    def do_provision(self):
         """
         Add node to user's slice after verifing that the node is functioning
         correctly
@@ -278,45 +291,42 @@ class PlanetlabNode(LinuxNode):
         provision_ok = False
         ssh_ok = False
         proc_ok = False
-        timeout = 120
+        timeout = 1800
 
         while not provision_ok:
             node = self._node_to_provision
-            # Adding try catch to set hostname because sometimes MyPLC fails
-            # when trying to retrive node's hostname
-            try:
-                self._set_hostname_attr(node)
-            except:
-                with PlanetlabNode.lock:
-                    self._blacklist_node(node)
-                self.discover()
-                continue
-
-            self._add_node_to_slice(node)
+            if not self._slicenode:
+                self._add_node_to_slice(node)
             
-            # check ssh connection
-            t = 0 
-            while t < timeout and not ssh_ok:
-
+                # check ssh connection
+                t = 0 
+                while t < timeout and not ssh_ok:
+
+                    cmd = 'echo \'GOOD NODE\''
+                    ((out, err), proc) = self.execute(cmd)
+                    if out.find("GOOD NODE") < 0:
+                        t = t + 60
+                        time.sleep(60)
+                        continue
+                    else:
+                        ssh_ok = True
+                        continue
+            else:
                 cmd = 'echo \'GOOD NODE\''
                 ((out, err), proc) = self.execute(cmd)
-                if out.find("GOOD NODE") < 0:
-                    t = t + 60
-                    time.sleep(60)
-                    continue
-                else:
+                if not out.find("GOOD NODE") < 0:
                     ssh_ok = True
-                    continue
 
             if not ssh_ok:
                 # the timeout was reach without establishing ssh connection
                 # the node is blacklisted, deleted from the slice, and a new
                 # node to provision is discovered
                 with PlanetlabNode.lock:
+                    self.warn(" Could not SSH login ")
                     self._blacklist_node(node)
-                    self._delete_node_from_slice(node)
+                    #self._delete_node_from_slice(node)
                 self.set('hostname', None)
-                self.discover()
+                self.do_discover()
                 continue
             
             # check /proc directory is mounted (ssh_ok = True)
@@ -325,10 +335,11 @@ class PlanetlabNode(LinuxNode):
                 ((out, err), proc) = self.execute(cmd)
                 if out.find("/proc type proc") < 0:
                     with PlanetlabNode.lock:
+                        self.warn(" Could not find directory /proc ")
                         self._blacklist_node(node)
-                        self._delete_node_from_slice(node)
+                        #self._delete_node_from_slice(node)
                     self.set('hostname', None)
-                    self.discover()
+                    self.do_discover()
                     continue
             
                 else:
@@ -337,7 +348,7 @@ class PlanetlabNode(LinuxNode):
                     ip = self._get_ip(node)
                     self.set("ip", ip)
             
-        super(PlanetlabNode, self).provision()
+        super(PlanetlabNode, self).do_provision()
 
     def _filter_based_on_attributes(self):
         """
@@ -387,7 +398,7 @@ class PlanetlabNode(LinuxNode):
             nodes = self.plapi.get_nodes()
             for node in nodes:
                 nodes_id.append(node['node_id'])
-                
+        
         return nodes_id
                     
 
@@ -504,7 +515,7 @@ class PlanetlabNode(LinuxNode):
             alive_nodes_id = self._get_nodes_id(filters)
 
         if len(alive_nodes_id) == 0:
-            self.fail_node_not_alive(self, hostname)
+            self.fail_node_not_alive(hostname)
         else:
             nodes_id = list()
             for node_id in alive_nodes_id:
@@ -533,20 +544,21 @@ class PlanetlabNode(LinuxNode):
                 plist = self.plapi.reserved()
                 if node_id not in blist and node_id not in plist:
                     ping_ok = self._do_ping(node_id)
-                    print " ### ping_ok #### %s guid %s" % (ping_ok, self.guid)
                     if not ping_ok:
+                        self._set_hostname_attr(node_id)
+                        self.warn(" Node not responding PING ")
                         self._blacklist_node(node_id)
+                        self.set('hostname', None)
                     else:
                         # discovered node for provision, added to provision list
                         self._put_node_in_provision(node_id)
-                        print "node_id %s , guid %s" % (node_id, self.guid)
                         return node_id
 
     def _get_nodes_id(self, filters):
         return self.plapi.get_nodes(filters, fields=['node_id'])
 
     def _add_node_to_slice(self, node_id):
-        self.info(" Selected node to provision ")
+        self.info(" Adding node to slice ")
         slicename = self.get("username")
         with PlanetlabNode.lock:
             slice_nodes = self.plapi.get_slice_nodes(slicename)
@@ -593,17 +605,14 @@ class PlanetlabNode(LinuxNode):
         """
         ping_ok = False
         ip = self._get_ip(node_id)
-        print "ip de do_ping %s, guid %s" % (ip, self.guid)
         if not ip: return ping_ok
 
         command = "ping -c2 %s" % ip
 
         (out, err) = lexec(command)
-        print "out de do_ping %s, guid %s" % (out, self.guid)
         if not out.find("2 received") < 0:
             ping_ok = True
         
-        print "ping_ok de do_ping %s, guid %s" % (ping_ok, self.guid)
         return ping_ok 
 
     def _blacklist_node(self, node):
@@ -625,34 +634,34 @@ class PlanetlabNode(LinuxNode):
         Query PLCAPI for the IP of a node with certain node id
         """
         hostname = self.plapi.get_nodes(node_id, ['hostname'])[0]
-        print "#### HOSTNAME ##### %s ### guid %s " % (hostname['hostname'], self.guid)
-        ip = sshfuncs.gethostbyname(hostname['hostname'])
-        if not ip:
+        try:
+            ip = sshfuncs.gethostbyname(hostname['hostname'])
+        except:
             # Fail while trying to find the IP
             return None
         return ip
 
     def fail_discovery(self):
-        self.fail()
         msg = "Discovery failed. No candidates found for node"
         self.error(msg)
         raise RuntimeError, msg
 
     def fail_node_not_alive(self, hostname=None):
-        self.fail()
         msg = "Node %s not alive" % hostname
         raise RuntimeError, msg
     
     def fail_node_not_available(self, hostname):
-        self.fail()
         msg = "Node %s not available for provisioning" % hostname
         raise RuntimeError, msg
 
     def fail_not_enough_nodes(self):
-        self.fail()
         msg = "Not enough nodes available for provisioning"
         raise RuntimeError, msg
 
+    def fail_plapi(self):
+        msg = "Failing while trying to instanciate the PLC API"
+        raise RuntimeError, msg
+
     def valid_connection(self, guid):
         # TODO: Validate!
         return True