PlanetLab bugfixes: ssh conection timeouts and ControlPath key generation
authorAlina Quereilhac <alina.quereilhac@inria.fr>
Tue, 9 Apr 2013 15:23:36 +0000 (17:23 +0200)
committerAlina Quereilhac <alina.quereilhac@inria.fr>
Tue, 9 Apr 2013 15:23:36 +0000 (17:23 +0200)
src/nepi/testbeds/planetlab/execute.py
src/nepi/testbeds/planetlab/node.py
src/nepi/testbeds/planetlab/util.py
src/nepi/util/server.py

index 9ae17b4..e39fee0 100644 (file)
@@ -7,6 +7,7 @@ from nepi.util.constants import TIME_NOW
 from nepi.util.graphtools import mst
 from nepi.util import ipaddr2
 from nepi.util import environ
+from nepi.util import server
 from nepi.util.parallel import ParallelRun
 import threading
 import sys
@@ -421,7 +422,7 @@ class TestbedController(testbed_impl.TestbedController):
                 # Obtain all IPs in numeric format
                 # (which means faster distance computations)
                 for dep in group:
-                    dep._ip = socket.gethostbyname(dep.node.hostname)
+                    dep._ip = server.gethostbyname(dep.node.hostname)
                     dep._ip_n = struct.unpack('!L', socket.inet_aton(dep._ip))[0]
                 
                 # Compute plan
index 0710a1a..c4695e0 100644 (file)
@@ -296,7 +296,7 @@ class Node(object):
 
             def resolvable(node_id):
                 try:
-                    addr = socket.gethostbyname(hostnames[node_id])
+                    addr = server.gethostbyname(hostnames[node_id])
                     return addr is not None
                 except:
                     return False
@@ -436,7 +436,7 @@ class Node(object):
             orig_attrs['server_key'] = self.server_key
             self.server_key = info['ssh_rsa_key']
         
-        self.hostip = socket.gethostbyname(self.hostname)
+        self.hostip = server.gethostbyname(self.hostname)
         
         try:
             self.__orig_attrs
@@ -483,7 +483,7 @@ class Node(object):
                 }
             else:
                 rpmFusion = ''
-            
+           
             if rpmFusion:
                 (out,err),proc = server.popen_ssh_command(
                     rpmFusion,
@@ -499,7 +499,7 @@ class Node(object):
                 if proc.wait():
                     if self.check_bad_host(out,err):
                         self.blacklist()
-                    raise RuntimeError, "Failed to set up application: %s %s" % (out,err,)
+                    raise RuntimeError, "Failed to set up application on host %s: %s %s" % (self.hostname, out,err,)
             
             # Launch p2p yum dependency installer
             self._yum_dependencies.async_setup()
@@ -612,7 +612,7 @@ class Node(object):
         self._logger.info("Cleaning up home on %s", self.hostname)
         
         cmds = [
-            "find . -maxdepth 1 -name 'nepi-*' -execdir rm -rf {} + "
+            "find . -maxdepth 1  \( -name '.cache' -o -name '.local' -o -name '.config' -o -name 'nepi-*' \) -execdir rm -rf {} + "
         ]
 
         for cmd in cmds:
index 30fcf9d..e320c4c 100644 (file)
@@ -7,6 +7,7 @@ from nepi.util.constants import TIME_NOW
 from nepi.util.graphtools import mst
 from nepi.util import ipaddr2
 from nepi.util import environ
+from nepi.util import server
 from nepi.util.parallel import ParallelRun
 import sys
 import os
@@ -108,7 +109,7 @@ def getSpanningTree(nodes, root = None, maxbranching = 2, hostgetter = operator.
     # Obtain all IPs in numeric format
     # (which means faster distance computations)
     for node in nodes:
-        node._ip = socket.gethostbyname(hostgetter(node))
+        node._ip = server.gethostbyname(hostgetter(node))
         node._ip_n = struct.unpack('!L', socket.inet_aton(node._ip))[0]
     
     # Compute plan
index 0ece803..918a3e0 100644 (file)
@@ -43,6 +43,13 @@ SHELL_SAFE = re.compile('^[-a-zA-Z0-9_=+:.,/]*$')
 
 hostbyname_cache = dict()
 
+def gethostbyname(host):
+    hostbyname = hostbyname_cache.get(host)
+    if not hostbyname:
+        hostbyname = socket.gethostbyname(host)
+        hostbyname_cache[host] = hostbyname
+    return hostbyname
+
 def openssh_has_persist():
     global OPENSSH_HAS_PERSIST
     if OPENSSH_HAS_PERSIST is None:
@@ -572,10 +579,7 @@ def _make_server_key_args(server_key, host, port, args):
     # Create a temporary server key file
     tmp_known_hosts = tempfile.NamedTemporaryFile()
    
-    hostbyname = hostbyname_cache.get(host)
-    if not hostbyname:
-        hostbyname = socket.gethostbyname(host)
-        hostbyname_cache[host] = hostbyname
+    hostbyname = gethostbyname(host) 
 
     # Add the intended host key
     tmp_known_hosts.write('%s,%s %s\n' % (host, hostbyname, server_key))
@@ -594,12 +598,6 @@ def _make_server_key_args(server_key, host, port, args):
     
     return tmp_known_hosts
 
-def make_connkey(user, host, port):
-    connkey = repr((user,host,port)).encode("base64").strip().replace('/','.')
-    if len(connkey) > 60:
-        connkey = hashlib.sha1(connkey).hexdigest()
-    return connkey
-
 def popen_ssh_command(command, host, port, user, agent, 
         stdin="", 
         ident_key = None,
@@ -608,7 +606,7 @@ def popen_ssh_command(command, host, port, user, agent,
         timeout = None,
         retry = 0,
         err_on_timeout = True,
-        connect_timeout = 1200,
+        connect_timeout = 900,
         persistent = True,
         hostip = None):
     """
@@ -618,10 +616,11 @@ def popen_ssh_command(command, host, port, user, agent,
         print "ssh", host, command
     
     tmp_known_hosts = None
-    connkey = make_connkey(user,host,port)
     args = ['ssh', '-C',
             # Don't bother with localhost. Makes test easier
             '-o', 'NoHostAuthenticationForLocalhost=yes',
+            # XXX: Security vulnerability
+            #'-o', 'StrictHostKeyChecking=no',
             '-o', 'ConnectTimeout=%d' % (int(connect_timeout),),
             '-o', 'ConnectionAttempts=3',
             '-o', 'ServerAliveInterval=30',
@@ -630,7 +629,7 @@ def popen_ssh_command(command, host, port, user, agent,
     if persistent and openssh_has_persist():
         args.extend([
             '-o', 'ControlMaster=auto',
-            '-o', 'ControlPath=/tmp/nepi_ssh_pl_%s' % ( connkey, ),
+            '-o', 'ControlPath=/tmp/nepi_ssh-%r@%h:%p',
             '-o', 'ControlPersist=60' ])
     if agent:
         args.append('-A')
@@ -656,7 +655,7 @@ def popen_ssh_command(command, host, port, user, agent,
         # attach tempfile object to the process, to make sure the file stays
         # alive until the process is finished with it
         proc._known_hosts = tmp_known_hosts
-        
+    
         try:
             out, err = _communicate(proc, stdin, timeout, err_on_timeout)
             if proc.poll():
@@ -733,11 +732,12 @@ def popen_scp(source, dest,
         user,host = remspec.rsplit('@',1)
         tmp_known_hosts = None
         
-        connkey = make_connkey(user,host,port)
         args = ['ssh', '-l', user, '-C',
                 # Don't bother with localhost. Makes test easier
                 '-o', 'NoHostAuthenticationForLocalhost=yes',
-                '-o', 'ConnectTimeout=1200',
+                # XXX: Security vulnerability
+                #'-o', 'StrictHostKeyChecking=no',
+                '-o', 'ConnectTimeout=900',
                 '-o', 'ConnectionAttempts=3',
                 '-o', 'ServerAliveInterval=30',
                 '-o', 'TCPKeepAlive=yes',
@@ -745,7 +745,7 @@ def popen_scp(source, dest,
         if openssh_has_persist():
             args.extend([
                 '-o', 'ControlMaster=auto',
-                '-o', 'ControlPath=/tmp/nepi_ssh_pl_%s' % ( connkey, ),
+                '-o', 'ControlPath=/tmp/nepi_ssh-%r@%h:%p',
                 '-o', 'ControlPersist=60' ])
         if port:
             args.append('-P%d' % port)
@@ -871,7 +871,9 @@ def popen_scp(source, dest,
         args = ['scp', '-q', '-p', '-C',
                 # Don't bother with localhost. Makes test easier
                 '-o', 'NoHostAuthenticationForLocalhost=yes',
-                '-o', 'ConnectTimeout=1200',
+                # XXX: Security vulnerability
+                #'-o', 'StrictHostKeyChecking=no',
+                '-o', 'ConnectTimeout=900',
                 '-o', 'ConnectionAttempts=3',
                 '-o', 'ServerAliveInterval=30',
                 '-o', 'TCPKeepAlive=yes' ]
@@ -890,10 +892,9 @@ def popen_scp(source, dest,
             args.extend(source)
         else:
             if openssh_has_persist():
-                connkey = make_connkey(user,host,port)
                 args.extend([
-                    '-o', 'ControlMaster=no',
-                    '-o', 'ControlPath=/tmp/nepi_ssh_pl_%s' % ( connkey, ) ])
+                    '-o', 'ControlMaster=auto',
+                    '-o', 'ControlPath=/tmp/nepi_ssh-%r@%h:%p'])
             args.append(source)
         args.append(dest)
 
@@ -973,6 +974,8 @@ def popen_python(python_code,
         args = ['ssh', '-C',
                 # Don't bother with localhost. Makes test easier
                 '-o', 'NoHostAuthenticationForLocalhost=yes',
+                # XXX: Security vulnerability
+                #'-o', 'StrictHostKeyChecking=no',
                 '-o', 'ConnectionAttempts=3',
                 '-o', 'ServerAliveInterval=30',
                 '-o', 'TCPKeepAlive=yes',