Fixing CCNx on PlanetLab to accept a user defined bind port.
[nepi.git] / src / nepi / testbeds / planetlab / application.py
index 3d680ea..a192707 100644 (file)
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 from constants import TESTBED_ID
@@ -20,6 +19,8 @@ import re
 
 from nepi.util.constants import ApplicationStatus as AS
 
+_ccnre = re.compile("\s*(udp|tcp)\s+(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\s*$")
+
 class Dependency(object):
     """
     A Dependency is in every respect like an application.
@@ -207,10 +208,13 @@ class Dependency(object):
             raise RuntimeError, "Failed to set up application %s: %s %s" % (self.home_path, e.args[0], e.args[1],)
         
         if self.stdin:
+            stdin = self.stdin
+            if not os.path.isfile(stdin):
+                stdin = cStringIO.StringIO(self.stdin)
+
             # Write program input
             try:
-                self._popen_scp(
-                    cStringIO.StringIO(self.stdin),
+                self._popen_scp(stdin,
                     '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
                         os.path.join(self.home_path, 'stdin') ),
                     )
@@ -228,9 +232,10 @@ class Dependency(object):
             .replace("${SOURCES}", root+server.shell_escape(self.home_path))
             .replace("${BUILD}", root+server.shell_escape(os.path.join(self.home_path,'build'))) )
 
-    def _launch_build(self):
+    def _launch_build(self, trial=0):
         if self._master is not None:
-            self._do_install_keys()
+            if not trial or self._master_prk is not None:
+                self._do_install_keys()
             buildscript = self._do_build_slave()
         else:
             buildscript = self._do_build_master()
@@ -266,14 +271,14 @@ class Dependency(object):
         if self.sources:
             sources = self.sources.split(' ')
             files.update(
-                "%s@%s:%s" % (self._master.node.slicename, self._master.node.hostname
+                "%s@%s:%s" % (self._master.node.slicename, self._master.node.hostip
                     os.path.join(self._master.home_path, os.path.basename(source)),)
                 for source in sources
             )
         
         if self.build:
             files.add(
-                "%s@%s:%s" % (self._master.node.slicename, self._master.node.hostname
+                "%s@%s:%s" % (self._master.node.slicename, self._master.node.hostip
                     os.path.join(self._master.home_path, 'build.tar.gz'),)
             )
         
@@ -290,13 +295,22 @@ class Dependency(object):
         kill_agent = "kill $SSH_AGENT_PID"
         
         waitmaster = (
-            "{ . ./.ssh-agent.sh ; "
+            "{ "
+            "echo 'Checking master reachability' ; "
+            "if ping -c 3 %(master_host)s && (. ./.ssh-agent.sh > /dev/null ; ssh -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s echo MASTER SAYS HI ) ; then "
+            "echo 'Master node reachable' ; "
+            "else "
+            "echo 'MASTER NODE UNREACHABLE' && "
+            "exit 1 ; "
+            "fi ; "
+            ". ./.ssh-agent.sh ; "
             "while [[ $(. ./.ssh-agent.sh > /dev/null ; ssh -q -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s cat %(token_path)s.retcode || /bin/true) != %(token)s ]] ; do sleep 5 ; done ; "
             "if [[ $(. ./.ssh-agent.sh > /dev/null ; ssh -q -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s cat %(token_path)s || /bin/true) != %(token)s ]] ; then echo BAD TOKEN ; exit 1 ; fi ; "
             "}" 
         ) % {
             'hostkey' : 'master_known_hosts',
-            'master' : "%s@%s" % (self._master.node.slicename, self._master.node.hostname),
+            'master' : "%s@%s" % (self._master.node.slicename, self._master.node.hostip),
+            'master_host' : self._master.node.hostip,
             'token_path' : os.path.join(self._master.home_path, 'build.token'),
             'token' : server.shell_escape(self._master._master_token),
             'sshopts' : sshopts,
@@ -350,7 +364,8 @@ class Dependency(object):
             user = self.node.slicename,
             agent = None,
             ident_key = self.node.ident_path,
-            server_key = self.node.server_key
+            server_key = self.node.server_key,
+            hostip = self.node.hostip,
             )
         
         if proc.wait():
@@ -369,7 +384,8 @@ class Dependency(object):
                 user = self.node.slicename,
                 agent = None,
                 ident_key = self.node.ident_path,
-                server_key = self.node.server_key
+                server_key = self.node.server_key,
+                hostip = self.node.hostip
                 )
             
             if pidtuple:
@@ -384,7 +400,7 @@ class Dependency(object):
 
         self._logger.info("Deploying %s at %s", self, self.node.hostname)
         
-    def _do_wait_build(self):
+    def _do_wait_build(self, trial=0):
         pid = self._build_pid
         ppid = self._build_ppid
         
@@ -400,13 +416,16 @@ class Dependency(object):
                     user = self.node.slicename,
                     agent = None,
                     ident_key = self.node.ident_path,
-                    server_key = self.node.server_key
+                    server_key = self.node.server_key,
+                    hostip = self.node.hostip
                     )
                 
                 if status is rspawn.FINISHED:
                     self._build_pid = self._build_ppid = None
                     break
                 elif status is not rspawn.RUNNING:
+                    self._logger.warn("Busted waiting for %s to finish building at %s %s", self, self.node.hostname,
+                            "(build slave)" if self._master is not None else "(build master)")
                     bustspin += 1
                     time.sleep(delay*(5.5+random.random()))
                     if bustspin > 12:
@@ -454,12 +473,28 @@ class Dependency(object):
                 
                 if self.check_bad_host(buildlog, err):
                     self.node.blacklist()
-                
-                raise RuntimeError, "Failed to set up application %s: "\
-                        "build failed, got wrong token from pid %s/%s "\
-                        "(expected %r, got %r), see buildlog at %s:\n%s" % (
-                    self.home_path, pid, ppid, self._master_token, slave_token, self.node.hostname, buildlog)
+                elif self._master and trial < 3 and 'BAD TOKEN' in buildlog or 'BAD TOKEN' in err:
+                    # bad sync with master, may try again
+                    # but first wait for master
+                    self._master.async_setup_wait()
+                    self._launch_build(trial+1)
+                    return self._do_wait_build(trial+1)
+                elif trial < 3:
+                    return self._do_wait_build(trial+1)
+                else:
+                    # No longer need'em
+                    self._master_prk = None
+                    self._master_puk = None
+        
+                    raise RuntimeError, "Failed to set up application %s: "\
+                            "build failed, got wrong token from pid %s/%s "\
+                            "(expected %r, got %r), see buildlog at %s:\n%s" % (
+                        self.home_path, pid, ppid, self._master_token, slave_token, self.node.hostname, buildlog)
 
+            # No longer need'em
+            self._master_prk = None
+            self._master_puk = None
+        
             self._logger.info("Built %s at %s", self, self.node.hostname)
 
     def _do_kill_build(self):
@@ -474,7 +509,8 @@ class Dependency(object):
                 port = None,
                 user = self.node.slicename,
                 agent = None,
-                ident_key = self.node.ident_path
+                ident_key = self.node.ident_path,
+                hostip = self.node.hostip
                 )
         
         
@@ -533,7 +569,7 @@ class Dependency(object):
     def _do_install(self):
         if self.install:
             self._logger.info("Installing %s at %s", self, self.node.hostname)
-            
+           
             # Install application
             try:
                 self._popen_ssh_command(
@@ -575,7 +611,7 @@ class Dependency(object):
         try:
             self._popen_scp(
                 cStringIO.StringIO('%s,%s %s\n' % (
-                    self._master.node.hostname, socket.gethostbyname(self._master.node.hostname)
+                    self._master.node.hostname, self._master.node.hostip
                     self._master.node.server_key)),
                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
                     os.path.join(self.home_path,"master_known_hosts") )
@@ -584,13 +620,14 @@ class Dependency(object):
             raise RuntimeError, "Failed to set up application deployment keys: %s %s" \
                     % (e.args[0], e.args[1],)
         
-        # No longer need'em
-        self._master_prk = None
-        self._master_puk = None
     
     def cleanup(self):
         # make sure there's no leftover build processes
         self._do_kill_build()
+        
+        # No longer need'em
+        self._master_prk = None
+        self._master_puk = None
 
     @server.eintr_retry
     def _popen_scp(self, src, dst, retry = 3):
@@ -865,13 +902,15 @@ class NS3Dependency(Dependency):
     def __init__(self, api = None):
         super(NS3Dependency, self).__init__(api)
         
-        self.buildDepends = 'make waf gcc gcc-c++ gccxml unzip'
+        self.buildDepends = 'make waf gcc gcc-c++ gccxml unzip bzr'
         
         # We have to download the sources, untar, build...
-        pybindgen_source_url = "http://yans.pl.sophia.inria.fr/trac/nepi/raw-attachment/wiki/WikiStart/pybindgen-r794.tar.gz"
         pygccxml_source_url = "http://leaseweb.dl.sourceforge.net/project/pygccxml/pygccxml/pygccxml-1.0/pygccxml-1.0.0.zip"
-        ns3_source_url = "http://yans.pl.sophia.inria.fr/code/hgwebdir.cgi/ns-3.11-nepi/archive/tip.tar.gz"
-        passfd_source_url = "http://yans.pl.sophia.inria.fr/code/hgwebdir.cgi/python-passfd/archive/tip.tar.gz"
+        ns3_source_url = "http://nepi.pl.sophia.inria.fr/code/nepi-ns3.13/archive/tip.tar.gz"
+        passfd_source_url = "http://nepi.pl.sophia.inria.fr/code/python-passfd/archive/tip.tar.gz"
+        
+        pybindgen_version = "797"
+
         self.build =(
             " ( "
             "  cd .. && "
@@ -885,26 +924,23 @@ class NS3Dependency(Dependency):
                 # Not working, rebuild
                      # Archive SHA1 sums to check
                      "echo '7158877faff2254e6c094bf18e6b4283cac19137  pygccxml-1.0.0.zip' > archive_sums.txt && "
-                     "echo 'a18c2ccffd0df517bc37e2f3a2475092517c43f2  pybindgen-src.tar.gz' >> archive_sums.txt && "
                      " ( " # check existing files
                      " sha1sum -c archive_sums.txt && "
                      " test -f passfd-src.tar.gz && "
                      " test -f ns3-src.tar.gz "
                      " ) || ( " # nope? re-download
-                     " rm -f pybindgen-src.zip pygccxml-1.0.0.zip passfd-src.tar.gz ns3-src.tar.gz && "
-                     " wget -q -c -O pybindgen-src.tar.gz %(pybindgen_source_url)s && " # continue, to exploit the case when it has already been dl'ed
+                     " rm -rf pybindgen pygccxml-1.0.0.zip passfd-src.tar.gz ns3-src.tar.gz && "
+                     " bzr checkout lp:pybindgen -r %(pybindgen_version)s && " # continue, to exploit the case when it has already been dl'ed
                      " wget -q -c -O pygccxml-1.0.0.zip %(pygccxml_source_url)s && " 
                      " wget -q -c -O passfd-src.tar.gz %(passfd_source_url)s && "
                      " wget -q -c -O ns3-src.tar.gz %(ns3_source_url)s && "  
                      " sha1sum -c archive_sums.txt " # Check SHA1 sums when applicable
                      " ) && "
                      "unzip -n pygccxml-1.0.0.zip && "
-                     "mkdir -p pybindgen-src && "
                      "mkdir -p ns3-src && "
                      "mkdir -p passfd-src && "
                      "tar xzf ns3-src.tar.gz --strip-components=1 -C ns3-src && "
                      "tar xzf passfd-src.tar.gz --strip-components=1 -C passfd-src && "
-                     "tar xzf pybindgen-src.tar.gz --strip-components=1 -C pybindgen-src && "
                      "rm -rf target && "    # mv doesn't like unclean targets
                      "mkdir -p target && "
                      "cd pygccxml-1.0.0 && "
@@ -912,7 +948,7 @@ class NS3Dependency(Dependency):
                      "python setup.py build && "
                      "python setup.py install --install-lib ${BUILD}/target && "
                      "python setup.py clean && "
-                     "cd ../pybindgen-src && "
+                     "cd ../pybindgen && "
                      "export PYTHONPATH=$PYTHONPATH:${BUILD}/target && "
                      "./waf configure --prefix=${BUILD}/target -d release && "
                      "./waf && "
@@ -929,12 +965,12 @@ class NS3Dependency(Dependency):
                      "./waf &&"
                      "./waf install && "
                      "rm -f ${BUILD}/target/lib/*.so && "
-                     "cp -a ${BUILD}/ns3-src/build/release/libns3*.so ${BUILD}/target/lib && "
-                     "cp -a ${BUILD}/ns3-src/build/release/bindings/python/ns ${BUILD}/target/lib &&"
+                     "cp -a ${BUILD}/ns3-src/build/libns3*.so ${BUILD}/target/lib && "
+                     "cp -a ${BUILD}/ns3-src/build/bindings/python/ns ${BUILD}/target/lib &&"
                      "./waf clean "
              " )"
                      % dict(
-                        pybindgen_source_url = server.shell_escape(pybindgen_source_url),
+                        pybindgen_version = server.shell_escape(pybindgen_version),
                         pygccxml_source_url = server.shell_escape(pygccxml_source_url),
                         ns3_source_url = server.shell_escape(ns3_source_url),
                         passfd_source_url = server.shell_escape(passfd_source_url),
@@ -1054,6 +1090,171 @@ class YumDependency(Dependency):
                            r'The GPG keys listed for the ".*" repository are already installed but they are not correct for this package'
                            r'|Error: Cannot retrieve repository metadata (repomd.xml) for repository: .*[.] Please verify its path and try again'
                            r'|Error: disk I/O error'
+                           r'|MASTER NODE UNREACHABLE'
                            r')', 
                            re.I)
-        return badre.search(out) or badre.search(err)
+        return badre.search(out) or badre.search(err) or self.node.check_bad_host(out,err)
+
+
+class CCNxDaemon(Application):
+    """
+    An application also has dependencies, but also a command to be ran and monitored.
+    
+    It adds the output of that command as traces.
+    """
+    
+    def __init__(self, api=None):
+        super(CCNxDaemon,self).__init__(api)
+        
+        # Attributes
+        self.ccnLocalPort = None
+        self.ccnRoutes = None
+        self.ccnxVersion = "ccnx-0.6.0"
+        
+        self.ccnx_0_5_1_sources = "http://www.ccnx.org/releases/ccnx-0.5.1.tar.gz"
+        self.ccnx_0_6_0_sources = "http://www.ccnx.org/releases/ccnx-0.6.0.tar.gz"
+        self.buildDepends = 'make gcc development-tools openssl-devel expat-devel libpcap-devel libxml2-devel'
+
+        self.ccnx_0_5_1_build = (
+            " ( "
+            "  cd .. && "
+            "  test -d ccnx-0.5.1-src/build/bin "
+            " ) || ( "
+                # Not working, rebuild
+                "("
+                     " mkdir -p ccnx-0.5.1-src && "
+                     " wget -q -c -O ccnx-0.5.1-src.tar.gz %(ccnx_source_url)s &&"
+                     " tar xf ccnx-0.5.1-src.tar.gz --strip-components=1 -C ccnx-0.5.1-src "
+                ") && "
+                     "cd ccnx-0.5.1-src && "
+                     "mkdir -p build/include &&"
+                     "mkdir -p build/lib &&"
+                     "mkdir -p build/bin &&"
+                     "I=$PWD/build && "
+                     "INSTALL_BASE=$I ./configure &&"
+                     "make && make install"
+             " )") % dict(
+                     ccnx_source_url = server.shell_escape(self.ccnx_0_5_1_sources),
+                )
+
+        self.ccnx_0_5_1_install = (
+            " ( "
+            "  test -d ${BUILD}/ccnx-0.5.1-src/build/bin && "
+            "  cp -r ${BUILD}/ccnx-0.5.1-src/build/bin ${SOURCES}"
+            " )"
+        )
+
+        self.ccnx_0_6_0_build = (
+            " ( "
+            "  cd .. && "
+            "  test -d ccnx-0.6.0-src/build/bin "
+            " ) || ( "
+                # Not working, rebuild
+                "("
+                     " mkdir -p ccnx-0.6.0-src && "
+                     " wget -q -c -O ccnx-0.6.0-src.tar.gz %(ccnx_source_url)s &&"
+                     " tar xf ccnx-0.6.0-src.tar.gz --strip-components=1 -C ccnx-0.6.0-src "
+                ") && "
+                     "cd ccnx-0.6.0-src && "
+                     "./configure && make"
+             " )") % dict(
+                     ccnx_source_url = server.shell_escape(self.ccnx_0_6_0_sources),
+                )
+
+        self.ccnx_0_6_0_install = (
+            " ( "
+            "  test -d ${BUILD}/ccnx-0.6.0-src/bin && "
+            "  cp -r ${BUILD}/ccnx-0.6.0-src/bin ${SOURCES}"
+            " )"
+        )
+
+        self.env['PATH'] = "$PATH:${SOURCES}/bin"
+
+    def setup(self):
+        # setting ccn sources
+        if not self.build:
+            if self.ccnxVersion == 'ccnx-0.6.0':
+                self.build = self.ccnx_0_6_0_build
+            elif self.ccnxVersion == 'ccnx-0.5.1':
+                self.build = self.ccnx_0_5_1_build
+
+        if not self.install:
+            if self.ccnxVersion == 'ccnx-0.6.0':
+                self.install = self.ccnx_0_6_0_install
+            elif self.ccnxVersion == 'ccnx-0.5.1':
+                self.install = self.ccnx_0_5_1_install
+
+        super(CCNxDaemon, self).setup()
+
+    def start(self):
+        self.command = ""
+        if self.ccnLocalPort:
+            self.command = "export CCN_LOCAL_PORT=%s ; " % self.ccnLocalPort
+        self.command += " ccndstart "
+
+        # configure ccn routes
+        if self.ccnRoutes:
+            routes = self.ccnRoutes.split("|")
+            
+            if self.ccnLocalPort:
+                routes = map(lambda route: "%s %s" %(route, 
+                    self.ccnLocalPort) if _ccnre.match(route) else route, 
+                        routes)
+
+            routes = map(lambda route: "ccndc add ccnx:/ %s" % route, 
+                routes)
+
+            routescmd = " ; ".join(routes)
+            self.command += " ; "
+            self.command += routescmd
+
+        # Start will be invoked in prestart step
+        super(CCNxDaemon, self).start()
+            
+    def kill(self):
+        self._logger.info("Killing %s", self)
+
+        command = "${SOURCES}/bin/ccndstop"
+
+        if self.ccnLocalPort:
+            self.command = "export CCN_LOCAL_PORT=%s; %s" % (self.ccnLocalPort, command)
+
+        cmd = self._replace_paths(command)
+        command = cStringIO.StringIO()
+        command.write(cmd)
+        command.seek(0)
+
+        try:
+            self._popen_scp(
+                command,
+                '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
+                    os.path.join(self.home_path, "kill.sh"))
+                )
+        except RuntimeError, e:
+            raise RuntimeError, "Failed to kill ccndxdaemon: %s %s" \
+                    % (e.args[0], e.args[1],)
+        
+
+        script = "bash ./kill.sh"
+        (out,err),proc = rspawn.remote_spawn(
+            script,
+            pidfile = 'kill-pid',
+            home = self.home_path,
+            stdin = '/dev/null',
+            stdout = 'killlog',
+            stderr = rspawn.STDOUT,
+            
+            host = self.node.hostname,
+            port = None,
+            user = self.node.slicename,
+            agent = None,
+            ident_key = self.node.ident_path,
+            server_key = self.node.server_key,
+            hostip = self.node.hostip,
+            )
+        
+        if proc.wait():
+            raise RuntimeError, "Failed to kill cnnxdaemon: %s %s" % (out,err,)
+        
+        super(CCNxDaemon, self).kill()