Minor typos
[nepi.git] / src / neco / resources / linux / node.py
index 4907c21..3f30633 100644 (file)
@@ -15,9 +15,11 @@ import threading
 # TODO: Verify files and dirs exists already
 # TODO: Blacklist nodes!
 # TODO: Unify delays!!
+# TODO: Validate outcome of uploads!! 
 
 reschedule_delay = "0.5s"
 
+
 @clsinit
 class LinuxNode(ResourceManager):
     _rtype = "LinuxNode"
@@ -79,18 +81,16 @@ class LinuxNode(ResourceManager):
 
     @property
     def home(self):
-        return self.get("home") or "/tmp"
+        return self.get("home") or ""
 
     @property
-    def exp_dir(self):
-        exp_dir = os.path.join(self.home, self.ec.exp_id)
-        return exp_dir if exp_dir.startswith('/') or \
-                exp_dir.startswith("~/") else "~/"
+    def exp_home(self):
+        return os.path.join(self.home, self.ec.exp_id)
 
     @property
     def node_home(self):
         node_home = "node-%d" % self.guid
-        return os.path.join(self.exp_dir, node_home)
+        return os.path.join(self.exp_home, node_home)
 
     @property
     def os(self):
@@ -131,8 +131,9 @@ class LinuxNode(ResourceManager):
     def provision(self, filters = None):
         if not self.is_alive():
             self._state = ResourceState.FAILED
-            self.error("Deploy failed. Unresponsive node")
-            return
+            msg = "Deploy failed. Unresponsive node %s" % self.get("hostname")
+            self.error(msg)
+            raise RuntimeError, msg
 
         if self.get("cleanProcesses"):
             self.clean_processes()
@@ -197,10 +198,15 @@ class LinuxNode(ResourceManager):
             
     def clean_home(self):
         self.info("Cleaning up home")
-
-        cmd = ("cd %s ; " % self.home +
-            "find . -maxdepth 1  \( -name '.cache' -o -name '.local' -o -name '.config' -o -name 'nepi-*' \)"+
-            " -execdir rm -rf {} + ")
+        
+        cmd = (
+            # "find . -maxdepth 1  \( -name '.cache' -o -name '.local' -o -name '.config' -o -name 'nepi-*' \)" +
+            "find . -maxdepth 1 -name 'nepi-*' " +
+            " -execdir rm -rf {} + "
+            )
+            
+        if self.home:
+            cmd = "cd %s ; " % self.home + cmd
 
         out = err = ""
         (out, err), proc = self.execute(cmd, with_lock = True)
@@ -407,29 +413,28 @@ class LinuxNode(ResourceManager):
 
         out = err = ""
         try:
-            (out, err), proc = self.execute("echo 'ALIVE'", with_lock = True)
+            # TODO: FIX NOT ALIVE!!!!
+            (out, err), proc = self.execute("echo 'ALIVE' || (echo 'NOTALIVE') >&2", retry = 5, 
+                    with_lock = True)
         except:
             import traceback
             trace = traceback.format_exc()
-            msg = "Unresponsive host "
-            self.warn(msg, out, trace)
+            msg = "Unresponsive host  %s " % err
+            self.error(msg, out, trace)
             return False
 
         if out.strip().startswith('ALIVE'):
             return True
         else:
             msg = "Unresponsive host "
-            self.warn(msg, out, err)
+            self.error(msg, out, err)
             return False
 
-            # TODO!
-            #if self.check_bad_host(out,err):
-            #    self.blacklist()
-
     def copy(self, src, dst):
         if self.localhost:
             (out, err), proc =  execfuncs.lcopy(source, dest, 
-                    recursive = True)
+                    recursive = True,
+                    strict_host_checking = False)
         else:
             with self._lock:
                 (out, err), proc = sshfuncs.rcopy(
@@ -437,7 +442,8 @@ class LinuxNode(ResourceManager):
                     port = self.get("port"),
                     identity = self.get("identity"),
                     server_key = self.get("serverKey"),
-                    recursive = True)
+                    recursive = True,
+                    strict_host_checking = False)
 
         return (out, err), proc
 
@@ -451,6 +457,7 @@ class LinuxNode(ResourceManager):
             retry = 3,
             err_on_timeout = True,
             connect_timeout = 30,
+            strict_host_checking = False,
             persistent = True,
             with_lock = False
             ):
@@ -484,7 +491,8 @@ class LinuxNode(ResourceManager):
                         retry = retry,
                         err_on_timeout = err_on_timeout,
                         connect_timeout = connect_timeout,
-                        persistent = persistent
+                        persistent = persistent,
+                        strict_host_checking = strict_host_checking
                         )
             else:
                 (out, err), proc = sshfuncs.rexec(
@@ -511,7 +519,7 @@ class LinuxNode(ResourceManager):
 
     def run(self, command, 
             home = None,
-            create_home = True,
+            create_home = False,
             pidfile = "pid",
             stdin = None, 
             stdout = 'stdout',