+ Implemented option to cleanup directories on PlanetLab slivers.
[nepi.git] / src / nepi / testbeds / planetlab / application.py
1 # -*- coding: utf-8 -*-
2
3 from constants import TESTBED_ID
4 import plcapi
5 import operator
6 import os
7 import os.path
8 import sys
9 import nepi.util.server as server
10 import cStringIO
11 import subprocess
12 import rspawn
13 import random
14 import time
15 import socket
16 import threading
17 import logging
18 import re
19
20 from nepi.util.constants import ApplicationStatus as AS
21
22 class Dependency(object):
23     """
24     A Dependency is in every respect like an application.
25     
26     It depends on some packages, it may require building binaries, it must deploy
27     them...
28     
29     But it has no command. Dependencies aren't ever started, or stopped, and have
30     no status.
31     """
32
33     TRACES = ('buildlog')
34
35     def __init__(self, api=None):
36         if not api:
37             api = plcapi.PLCAPI()
38         self._api = api
39         
40         # Attributes
41         self.command = None
42         self.sudo = False
43         
44         self.build = None
45         self.install = None
46         self.depends = None
47         self.buildDepends = None
48         self.sources = None
49         self.rpmFusion = False
50         self.env = {}
51         
52         self.stdin = None
53         self.stdout = None
54         self.stderr = None
55         self.buildlog = None
56         
57         self.add_to_path = True
58         
59         # Those are filled when the app is configured
60         self.home_path = None
61         
62         # Those are filled when an actual node is connected
63         self.node = None
64         
65         # Those are filled when the app is started
66         #   Having both pid and ppid makes it harder
67         #   for pid rollover to induce tracking mistakes
68         self._started = False
69         self._setup = False
70         self._setuper = None
71         self._pid = None
72         self._ppid = None
73
74         # Spanning tree deployment
75         self._master = None
76         self._master_passphrase = None
77         self._master_prk = None
78         self._master_puk = None
79         self._master_token = os.urandom(8).encode("hex")
80         self._build_pid = None
81         self._build_ppid = None
82         
83         # Logging
84         self._logger = logging.getLogger('nepi.testbeds.planetlab')
85         
86     
87     def __str__(self):
88         return "%s<%s>" % (
89             self.__class__.__name__,
90             ' '.join(filter(bool,(self.depends, self.sources)))
91         )
92     
93     def validate(self):
94         if self.home_path is None:
95             raise AssertionError, "Misconfigured application: missing home path"
96         if self.node.ident_path is None or not os.access(self.node.ident_path, os.R_OK):
97             raise AssertionError, "Misconfigured application: missing slice SSH key"
98         if self.node is None:
99             raise AssertionError, "Misconfigured application: unconnected node"
100         if self.node.hostname is None:
101             raise AssertionError, "Misconfigured application: misconfigured node"
102         if self.node.slicename is None:
103             raise AssertionError, "Misconfigured application: unspecified slice"
104     
105     def check_bad_host(self, out, err):
106         """
107         Called whenever an operation fails, it's given the output to be checked for
108         telltale signs of unhealthy hosts.
109         """
110         return False
111     
112     def remote_trace_path(self, whichtrace):
113         if whichtrace in self.TRACES:
114             tracefile = os.path.join(self.home_path, whichtrace)
115         else:
116             tracefile = None
117         
118         return tracefile
119
120     def remote_trace_name(self, whichtrace):
121         if whichtrace in self.TRACES:
122             return whichtrace
123         return None
124
125     def sync_trace(self, local_dir, whichtrace):
126         tracefile = self.remote_trace_path(whichtrace)
127         if not tracefile:
128             return None
129         
130         local_path = os.path.join(local_dir, tracefile)
131         
132         # create parent local folders
133         proc = subprocess.Popen(
134             ["mkdir", "-p", os.path.dirname(local_path)],
135             stdout = open("/dev/null","w"),
136             stdin = open("/dev/null","r"))
137
138         if proc.wait():
139             raise RuntimeError, "Failed to synchronize trace"
140         
141         # sync files
142         try:
143             self._popen_scp(
144                 '%s@%s:%s' % (self.node.slicename, self.node.hostname,
145                     tracefile),
146                 local_path
147                 )
148         except RuntimeError, e:
149             raise RuntimeError, "Failed to synchronize trace: %s %s" \
150                     % (e.args[0], e.args[1],)
151         
152         return local_path
153     
154     def recover(self):
155         # We assume a correct deployment, so recovery only
156         # means we mark this dependency as deployed
157         self._setup = True
158
159     def setup(self):
160         self._logger.info("Setting up %s", self)
161         self._make_home()
162         self._launch_build()
163         self._finish_build()
164         self._setup = True
165     
166     def async_setup(self):
167         if not self._setuper:
168             def setuper():
169                 try:
170                     self.setup()
171                 except:
172                     self._setuper._exc.append(sys.exc_info())
173             self._setuper = threading.Thread(
174                 target = setuper)
175             self._setuper._exc = []
176             self._setuper.start()
177     
178     def async_setup_wait(self):
179         if not self._setup:
180             self._logger.info("Waiting for %s to be setup", self)
181             if self._setuper:
182                 self._setuper.join()
183                 if not self._setup:
184                     if self._setuper._exc:
185                         exctyp,exval,exctrace = self._setuper._exc[0]
186                         raise exctyp,exval,exctrace
187                     else:
188                         raise RuntimeError, "Failed to setup application"
189                 else:
190                     self._logger.info("Setup ready: %s at %s", self, self.node.hostname)
191             else:
192                 self.setup()
193         
194     def _make_home(self):
195         # Make sure all the paths are created where 
196         # they have to be created for deployment
197         # sync files
198         try:
199             self._popen_ssh_command(
200                 "mkdir -p %(home)s && ( rm -f %(home)s/{pid,build-pid,nepi-build.sh} >/dev/null 2>&1 || /bin/true )" \
201                     % { 'home' : server.shell_escape(self.home_path) },
202                 timeout = 120,
203                 retry = 3
204                 )
205         except RuntimeError, e:
206             raise RuntimeError, "Failed to set up application %s: %s %s" % (self.home_path, e.args[0], e.args[1],)
207         
208         if self.stdin:
209             stdin = self.stdin
210             if not os.path.isfile(stdin):
211                 stdin = cStringIO.StringIO(self.stdin)
212
213             # Write program input
214             try:
215                 self._popen_scp(stdin,
216                     '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
217                         os.path.join(self.home_path, 'stdin') ),
218                     )
219             except RuntimeError, e:
220                 raise RuntimeError, "Failed to set up application %s: %s %s" \
221                         % (self.home_path, e.args[0], e.args[1],)
222
223     def _replace_paths(self, command):
224         """
225         Replace all special path tags with shell-escaped actual paths.
226         """
227         # need to append ${HOME} if paths aren't absolute, to MAKE them absolute.
228         root = '' if self.home_path.startswith('/') else "${HOME}/"
229         return ( command
230             .replace("${SOURCES}", root+server.shell_escape(self.home_path))
231             .replace("${BUILD}", root+server.shell_escape(os.path.join(self.home_path,'build'))) )
232
233     def _launch_build(self, trial=0):
234         if self._master is not None:
235             if not trial or self._master_prk is not None:
236                 self._do_install_keys()
237             buildscript = self._do_build_slave()
238         else:
239             buildscript = self._do_build_master()
240             
241         if buildscript is not None:
242             self._logger.info("Building %s at %s", self, self.node.hostname)
243             
244             # upload build script
245             try:
246                 self._popen_scp(
247                     buildscript,
248                     '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
249                         os.path.join(self.home_path, 'nepi-build.sh') )
250                     )
251             except RuntimeError, e:
252                 raise RuntimeError, "Failed to set up application %s: %s %s" \
253                         % (self.home_path, e.args[0], e.args[1],)
254             
255             # launch build
256             self._do_launch_build()
257     
258     def _finish_build(self):
259         self._do_wait_build()
260         self._do_install()
261
262     def _do_build_slave(self):
263         if not self.sources and not self.build:
264             return None
265             
266         # Create build script
267         files = set()
268         
269         if self.sources:
270             sources = self.sources.split(' ')
271             files.update(
272                 "%s@%s:%s" % (self._master.node.slicename, self._master.node.hostip, 
273                     os.path.join(self._master.home_path, os.path.basename(source)),)
274                 for source in sources
275             )
276         
277         if self.build:
278             files.add(
279                 "%s@%s:%s" % (self._master.node.slicename, self._master.node.hostip, 
280                     os.path.join(self._master.home_path, 'build.tar.gz'),)
281             )
282         
283         sshopts = "-o ConnectTimeout=30 -o ConnectionAttempts=3 -o ServerAliveInterval=30 -o TCPKeepAlive=yes"
284         
285         launch_agent = "{ ( echo -e '#!/bin/sh\\ncat' > .ssh-askpass ) && chmod u+x .ssh-askpass"\
286                         " && export SSH_ASKPASS=$(pwd)/.ssh-askpass "\
287                         " && ssh-agent > .ssh-agent.sh ; } && . ./.ssh-agent.sh && ( echo $NEPI_MASTER_PASSPHRASE | ssh-add %(prk)s ) && rm -rf %(prk)s %(puk)s" %  \
288         {
289             'prk' : server.shell_escape(self._master_prk_name),
290             'puk' : server.shell_escape(self._master_puk_name),
291         }
292         
293         kill_agent = "kill $SSH_AGENT_PID"
294         
295         waitmaster = (
296             "{ "
297             "echo 'Checking master reachability' ; "
298             "if ping -c 3 %(master_host)s && (. ./.ssh-agent.sh > /dev/null ; ssh -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s echo MASTER SAYS HI ) ; then "
299             "echo 'Master node reachable' ; "
300             "else "
301             "echo 'MASTER NODE UNREACHABLE' && "
302             "exit 1 ; "
303             "fi ; "
304             ". ./.ssh-agent.sh ; "
305             "while [[ $(. ./.ssh-agent.sh > /dev/null ; ssh -q -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s cat %(token_path)s.retcode || /bin/true) != %(token)s ]] ; do sleep 5 ; done ; "
306             "if [[ $(. ./.ssh-agent.sh > /dev/null ; ssh -q -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s cat %(token_path)s || /bin/true) != %(token)s ]] ; then echo BAD TOKEN ; exit 1 ; fi ; "
307             "}" 
308         ) % {
309             'hostkey' : 'master_known_hosts',
310             'master' : "%s@%s" % (self._master.node.slicename, self._master.node.hostip),
311             'master_host' : self._master.node.hostip,
312             'token_path' : os.path.join(self._master.home_path, 'build.token'),
313             'token' : server.shell_escape(self._master._master_token),
314             'sshopts' : sshopts,
315         }
316         
317         syncfiles = ". ./.ssh-agent.sh && scp -p -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(files)s ." % {
318             'hostkey' : 'master_known_hosts',
319             'files' : ' '.join(files),
320             'sshopts' : sshopts,
321         }
322         if self.build:
323             syncfiles += " && tar xzf build.tar.gz"
324         syncfiles += " && ( echo %s > build.token )" % (server.shell_escape(self._master_token),)
325         syncfiles += " && ( echo %s > build.token.retcode )" % (server.shell_escape(self._master_token),)
326         syncfiles = "{ . ./.ssh-agent.sh ; %s ; }" % (syncfiles,)
327         
328         cleanup = "{ . ./.ssh-agent.sh ; kill $SSH_AGENT_PID ; rm -rf %(prk)s %(puk)s master_known_hosts .ssh-askpass ; }" % {
329             'prk' : server.shell_escape(self._master_prk_name),
330             'puk' : server.shell_escape(self._master_puk_name),
331         }
332         
333         slavescript = "( ( %(launch_agent)s && %(waitmaster)s && %(syncfiles)s && %(kill_agent)s && %(cleanup)s ) || %(cleanup)s ) ; echo %(token)s > build.token.retcode" % {
334             'waitmaster' : waitmaster,
335             'syncfiles' : syncfiles,
336             'cleanup' : cleanup,
337             'kill_agent' : kill_agent,
338             'launch_agent' : launch_agent,
339             'home' : server.shell_escape(self.home_path),
340             'token' : server.shell_escape(self._master_token),
341         }
342         
343         return cStringIO.StringIO(slavescript)
344          
345     def _do_launch_build(self):
346         script = "bash ./nepi-build.sh"
347         if self._master_passphrase:
348             script = "NEPI_MASTER_PASSPHRASE=%s %s" % (
349                 server.shell_escape(self._master_passphrase),
350                 script
351             )
352         (out,err),proc = rspawn.remote_spawn(
353             script,
354             pidfile = 'build-pid',
355             home = self.home_path,
356             stdin = '/dev/null',
357             stdout = 'buildlog',
358             stderr = rspawn.STDOUT,
359             
360             host = self.node.hostname,
361             port = None,
362             user = self.node.slicename,
363             agent = None,
364             ident_key = self.node.ident_path,
365             server_key = self.node.server_key,
366             hostip = self.node.hostip,
367             )
368         
369         if proc.wait():
370             if self.check_bad_host(out, err):
371                 self.node.blacklist()
372             raise RuntimeError, "Failed to set up build slave %s: %s %s" % (self.home_path, out,err,)
373         
374         
375         pid = ppid = None
376         delay = 1.0
377         for i in xrange(5):
378             pidtuple = rspawn.remote_check_pid(
379                 os.path.join(self.home_path,'build-pid'),
380                 host = self.node.hostname,
381                 port = None,
382                 user = self.node.slicename,
383                 agent = None,
384                 ident_key = self.node.ident_path,
385                 server_key = self.node.server_key,
386                 hostip = self.node.hostip
387                 )
388             
389             if pidtuple:
390                 pid, ppid = pidtuple
391                 self._build_pid, self._build_ppid = pidtuple
392                 break
393             else:
394                 time.sleep(delay)
395                 delay = min(30,delay*1.2)
396         else:
397             raise RuntimeError, "Failed to set up build slave %s: cannot get pid" % (self.home_path,)
398
399         self._logger.info("Deploying %s at %s", self, self.node.hostname)
400         
401     def _do_wait_build(self, trial=0):
402         pid = self._build_pid
403         ppid = self._build_ppid
404         
405         if pid and ppid:
406             delay = 1.0
407             first = True
408             bustspin = 0
409             while True:
410                 status = rspawn.remote_status(
411                     pid, ppid,
412                     host = self.node.hostname,
413                     port = None,
414                     user = self.node.slicename,
415                     agent = None,
416                     ident_key = self.node.ident_path,
417                     server_key = self.node.server_key,
418                     hostip = self.node.hostip
419                     )
420                 
421                 if status is rspawn.FINISHED:
422                     self._build_pid = self._build_ppid = None
423                     break
424                 elif status is not rspawn.RUNNING:
425                     self._logger.warn("Busted waiting for %s to finish building at %s %s", self, self.node.hostname,
426                             "(build slave)" if self._master is not None else "(build master)")
427                     bustspin += 1
428                     time.sleep(delay*(5.5+random.random()))
429                     if bustspin > 12:
430                         self._build_pid = self._build_ppid = None
431                         break
432                 else:
433                     if first:
434                         self._logger.info("Waiting for %s to finish building at %s %s", self, self.node.hostname,
435                             "(build slave)" if self._master is not None else "(build master)")
436                         
437                         first = False
438                     time.sleep(delay*(0.5+random.random()))
439                     delay = min(30,delay*1.2)
440                     bustspin = 0
441             
442             # check build token
443             slave_token = ""
444             for i in xrange(3):
445                 (out, err), proc = self._popen_ssh_command(
446                     "cat %(token_path)s" % {
447                         'token_path' : os.path.join(self.home_path, 'build.token'),
448                     },
449                     timeout = 120,
450                     noerrors = True)
451                 if not proc.wait() and out:
452                     slave_token = out.strip()
453                 
454                 if slave_token:
455                     break
456                 else:
457                     time.sleep(2)
458             
459             if slave_token != self._master_token:
460                 # Get buildlog for the error message
461
462                 (buildlog, err), proc = self._popen_ssh_command(
463                     "cat %(buildlog)s" % {
464                         'buildlog' : os.path.join(self.home_path, 'buildlog'),
465                         'buildscript' : os.path.join(self.home_path, 'nepi-build.sh'),
466                     },
467                     timeout = 120,
468                     noerrors = True)
469                 
470                 proc.wait()
471                 
472                 if self.check_bad_host(buildlog, err):
473                     self.node.blacklist()
474                 elif self._master and trial < 3 and 'BAD TOKEN' in buildlog or 'BAD TOKEN' in err:
475                     # bad sync with master, may try again
476                     # but first wait for master
477                     self._master.async_setup_wait()
478                     self._launch_build(trial+1)
479                     return self._do_wait_build(trial+1)
480                 elif trial < 3:
481                     return self._do_wait_build(trial+1)
482                 else:
483                     # No longer need'em
484                     self._master_prk = None
485                     self._master_puk = None
486         
487                     raise RuntimeError, "Failed to set up application %s: "\
488                             "build failed, got wrong token from pid %s/%s "\
489                             "(expected %r, got %r), see buildlog at %s:\n%s" % (
490                         self.home_path, pid, ppid, self._master_token, slave_token, self.node.hostname, buildlog)
491
492             # No longer need'em
493             self._master_prk = None
494             self._master_puk = None
495         
496             self._logger.info("Built %s at %s", self, self.node.hostname)
497
498     def _do_kill_build(self):
499         pid = self._build_pid
500         ppid = self._build_ppid
501         
502         if pid and ppid:
503             self._logger.info("Killing build of %s", self)
504             rspawn.remote_kill(
505                 pid, ppid,
506                 host = self.node.hostname,
507                 port = None,
508                 user = self.node.slicename,
509                 agent = None,
510                 ident_key = self.node.ident_path,
511                 hostip = self.node.hostip
512                 )
513         
514         
515     def _do_build_master(self):
516         if not self.sources and not self.build and not self.buildDepends:
517             return None
518             
519         if self.sources:
520             sources = self.sources.split(' ')
521             
522             # Copy all sources
523             try:
524                 self._popen_scp(
525                     sources,
526                     "%s@%s:%s" % (self.node.slicename, self.node.hostname, 
527                         os.path.join(self.home_path,'.'),)
528                     )
529             except RuntimeError, e:
530                 raise RuntimeError, "Failed upload source file %r: %s %s" \
531                         % (sources, e.args[0], e.args[1],)
532             
533         buildscript = cStringIO.StringIO()
534         
535         buildscript.write("(\n")
536         
537         if self.buildDepends:
538             # Install build dependencies
539             buildscript.write(
540                 "sudo -S yum -y install %(packages)s\n" % {
541                     'packages' : self.buildDepends
542                 }
543             )
544         
545             
546         if self.build:
547             # Build sources
548             buildscript.write(
549                 "mkdir -p build && ( cd build && ( %(command)s ) )\n" % {
550                     'command' : self._replace_paths(self.build),
551                     'home' : server.shell_escape(self.home_path),
552                 }
553             )
554         
555             # Make archive
556             buildscript.write("tar czf build.tar.gz build\n")
557         
558         # Write token
559         buildscript.write("echo %(master_token)s > build.token ) ; echo %(master_token)s > build.token.retcode" % {
560             'master_token' : server.shell_escape(self._master_token)
561         })
562         
563         buildscript.seek(0)
564
565         return buildscript
566
567     def _do_install(self):
568         if self.install:
569             self._logger.info("Installing %s at %s", self, self.node.hostname)
570            
571             # Install application
572             try:
573                 self._popen_ssh_command(
574                     "cd %(home)s && cd build && ( %(command)s ) > ${HOME}/%(home)s/installlog 2>&1 || ( tail ${HOME}/%(home)s/{install,build}log >&2 && false )" % \
575                         {
576                         'command' : self._replace_paths(self.install),
577                         'home' : server.shell_escape(self.home_path),
578                         },
579                     )
580             except RuntimeError, e:
581                 if self.check_bad_host(e.args[0], e.args[1]):
582                     self.node.blacklist()
583                 raise RuntimeError, "Failed install build sources: %s %s" % (e.args[0], e.args[1],)
584
585     def set_master(self, master):
586         self._master = master
587         
588     def install_keys(self, prk, puk, passphrase):
589         # Install keys
590         self._master_passphrase = passphrase
591         self._master_prk = prk
592         self._master_puk = puk
593         self._master_prk_name = os.path.basename(prk.name)
594         self._master_puk_name = os.path.basename(puk.name)
595         
596     def _do_install_keys(self):
597         prk = self._master_prk
598         puk = self._master_puk
599        
600         try:
601             self._popen_scp(
602                 [ prk.name, puk.name ],
603                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, self.home_path )
604                 )
605         except RuntimeError, e:
606             raise RuntimeError, "Failed to set up application deployment keys: %s %s" \
607                     % (e.args[0], e.args[1],)
608
609         try:
610             self._popen_scp(
611                 cStringIO.StringIO('%s,%s %s\n' % (
612                     self._master.node.hostname, self._master.node.hostip, 
613                     self._master.node.server_key)),
614                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
615                     os.path.join(self.home_path,"master_known_hosts") )
616                 )
617         except RuntimeError, e:
618             raise RuntimeError, "Failed to set up application deployment keys: %s %s" \
619                     % (e.args[0], e.args[1],)
620         
621     
622     def cleanup(self):
623         # make sure there's no leftover build processes
624         self._do_kill_build()
625         
626         # No longer need'em
627         self._master_prk = None
628         self._master_puk = None
629
630     @server.eintr_retry
631     def _popen_scp(self, src, dst, retry = 3):
632         while 1:
633             try:
634                 (out,err),proc = server.popen_scp(
635                     src,
636                     dst, 
637                     port = None,
638                     agent = None,
639                     ident_key = self.node.ident_path,
640                     server_key = self.node.server_key
641                     )
642
643                 if server.eintr_retry(proc.wait)():
644                     raise RuntimeError, (out, err)
645                 return (out, err), proc
646             except:
647                 if retry <= 0:
648                     raise
649                 else:
650                     retry -= 1
651   
652
653     @server.eintr_retry
654     def _popen_ssh_command(self, command, retry = 0, noerrors=False, timeout=None):
655         (out,err),proc = server.popen_ssh_command(
656             command,
657             host = self.node.hostname,
658             port = None,
659             user = self.node.slicename,
660             agent = None,
661             ident_key = self.node.ident_path,
662             server_key = self.node.server_key,
663             timeout = timeout,
664             retry = retry
665             )
666
667         if server.eintr_retry(proc.wait)():
668             if not noerrors:
669                 raise RuntimeError, (out, err)
670         return (out, err), proc
671
672 class Application(Dependency):
673     """
674     An application also has dependencies, but also a command to be ran and monitored.
675     
676     It adds the output of that command as traces.
677     """
678     
679     TRACES = ('stdout','stderr','buildlog', 'output')
680     
681     def __init__(self, api=None):
682         super(Application,self).__init__(api)
683         
684         # Attributes
685         self.command = None
686         self.sudo = False
687         
688         self.stdin = None
689         self.stdout = None
690         self.stderr = None
691         self.output = None
692         
693         # Those are filled when the app is started
694         #   Having both pid and ppid makes it harder
695         #   for pid rollover to induce tracking mistakes
696         self._started = False
697         self._pid = None
698         self._ppid = None
699
700         # Do not add to the python path of nodes
701         self.add_to_path = False
702     
703     def __str__(self):
704         return "%s<command:%s%s>" % (
705             self.__class__.__name__,
706             "sudo " if self.sudo else "",
707             self.command,
708         )
709     
710     def start(self):
711         self._logger.info("Starting %s", self)
712         
713         # Create shell script with the command
714         # This way, complex commands and scripts can be ran seamlessly
715         # sync files
716         command = cStringIO.StringIO()
717         command.write('export PYTHONPATH=$PYTHONPATH:%s\n' % (
718             ':'.join(["${HOME}/"+server.shell_escape(s) for s in self.node.pythonpath])
719         ))
720         command.write('export PATH=$PATH:%s\n' % (
721             ':'.join(["${HOME}/"+server.shell_escape(s) for s in self.node.pythonpath])
722         ))
723         if self.node.env:
724             for envkey, envvals in self.node.env.iteritems():
725                 for envval in envvals:
726                     command.write('export %s=%s\n' % (envkey, envval))
727         command.write(self.command)
728         command.seek(0)
729
730         try:
731             self._popen_scp(
732                 command,
733                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
734                     os.path.join(self.home_path, "app.sh"))
735                 )
736         except RuntimeError, e:
737             raise RuntimeError, "Failed to set up application: %s %s" \
738                     % (e.args[0], e.args[1],)
739         
740         # Start process in a "daemonized" way, using nohup and heavy
741         # stdin/out redirection to avoid connection issues
742         (out,err),proc = rspawn.remote_spawn(
743             self._replace_paths("bash ./app.sh"),
744             
745             pidfile = './pid',
746             home = self.home_path,
747             stdin = 'stdin' if self.stdin is not None else '/dev/null',
748             stdout = 'stdout' if self.stdout else '/dev/null',
749             stderr = 'stderr' if self.stderr else '/dev/null',
750             sudo = self.sudo,
751             
752             host = self.node.hostname,
753             port = None,
754             user = self.node.slicename,
755             agent = None,
756             ident_key = self.node.ident_path,
757             server_key = self.node.server_key
758             )
759         
760         if proc.wait():
761             if self.check_bad_host(out, err):
762                 self.node.blacklist()
763             raise RuntimeError, "Failed to set up application: %s %s" % (out,err,)
764
765         self._started = True
766     
767     def recover(self):
768         # Assuming the application is running on PlanetLab,
769         # proper pidfiles should be present at the app's home path.
770         # So we mark this application as started, and check the pidfiles
771         self._started = True
772         self.checkpid()
773
774     def checkpid(self):            
775         # Get PID/PPID
776         # NOTE: wait a bit for the pidfile to be created
777         if self._started and not self._pid or not self._ppid:
778             pidtuple = rspawn.remote_check_pid(
779                 os.path.join(self.home_path,'pid'),
780                 host = self.node.hostname,
781                 port = None,
782                 user = self.node.slicename,
783                 agent = None,
784                 ident_key = self.node.ident_path,
785                 server_key = self.node.server_key
786                 )
787             
788             if pidtuple:
789                 self._pid, self._ppid = pidtuple
790     
791     def status(self):
792         self.checkpid()
793         if not self._started:
794             return AS.STATUS_NOT_STARTED
795         elif not self._pid or not self._ppid:
796             return AS.STATUS_NOT_STARTED
797         else:
798             status = rspawn.remote_status(
799                 self._pid, self._ppid,
800                 host = self.node.hostname,
801                 port = None,
802                 user = self.node.slicename,
803                 agent = None,
804                 ident_key = self.node.ident_path,
805                 server_key = self.node.server_key
806                 )
807             
808             if status is rspawn.NOT_STARTED:
809                 return AS.STATUS_NOT_STARTED
810             elif status is rspawn.RUNNING:
811                 return AS.STATUS_RUNNING
812             elif status is rspawn.FINISHED:
813                 return AS.STATUS_FINISHED
814             else:
815                 # WTF?
816                 return AS.STATUS_NOT_STARTED
817     
818     def kill(self):
819         status = self.status()
820         if status == AS.STATUS_RUNNING:
821             # kill by ppid+pid - SIGTERM first, then try SIGKILL
822             rspawn.remote_kill(
823                 self._pid, self._ppid,
824                 host = self.node.hostname,
825                 port = None,
826                 user = self.node.slicename,
827                 agent = None,
828                 ident_key = self.node.ident_path,
829                 server_key = self.node.server_key,
830                 sudo = self.sudo
831                 )
832             self._logger.info("Killed %s", self)
833
834
835 class NepiDependency(Dependency):
836     """
837     This dependency adds nepi itself to the python path,
838     so that you may run testbeds within PL nodes.
839     """
840     
841     # Class attribute holding a *weak* reference to the shared NEPI tar file
842     # so that they may share it. Don't operate on the file itself, it would
843     # be a mess, just use its path.
844     _shared_nepi_tar = None
845     
846     def __init__(self, api = None):
847         super(NepiDependency, self).__init__(api)
848         
849         self._tarball = None
850         
851         self.depends = 'python python-ipaddr python-setuptools'
852         
853         # our sources are in our ad-hoc tarball
854         self.sources = self.tarball.name
855         
856         tarname = os.path.basename(self.tarball.name)
857         
858         # it's already built - just move the tarball into place
859         self.build = "mv -f ${SOURCES}/%s ." % (tarname,)
860         
861         # unpack it into sources, and we're done
862         self.install = "tar xzf ${BUILD}/%s -C .." % (tarname,)
863     
864     @property
865     def tarball(self):
866         if self._tarball is None:
867             shared_tar = self._shared_nepi_tar and self._shared_nepi_tar()
868             if shared_tar is not None:
869                 self._tarball = shared_tar
870             else:
871                 # Build an ad-hoc tarball
872                 # Prebuilt
873                 import nepi
874                 import tempfile
875                 
876                 shared_tar = tempfile.NamedTemporaryFile(prefix='nepi-src-', suffix='.tar.gz')
877                 
878                 proc = subprocess.Popen(
879                     ["tar", "czf", shared_tar.name, 
880                         '-C', os.path.join(os.path.dirname(os.path.dirname(nepi.__file__)),'.'), 
881                         'nepi'],
882                     stdout = open("/dev/null","w"),
883                     stdin = open("/dev/null","r"))
884
885                 if proc.wait():
886                     raise RuntimeError, "Failed to create nepi tarball"
887                 
888                 self._tarball = self._shared_nepi_tar = shared_tar
889                 
890         return self._tarball
891
892 class NS3Dependency(Dependency):
893     """
894     This dependency adds NS3 libraries to the library paths,
895     so that you may run the NS3 testbed within PL nodes.
896     
897     You'll also need the NepiDependency.
898     """
899     
900     def __init__(self, api = None):
901         super(NS3Dependency, self).__init__(api)
902         
903         self.buildDepends = 'make waf gcc gcc-c++ gccxml unzip bzr'
904         
905         # We have to download the sources, untar, build...
906         pygccxml_source_url = "http://leaseweb.dl.sourceforge.net/project/pygccxml/pygccxml/pygccxml-1.0/pygccxml-1.0.0.zip"
907         ns3_source_url = "http://nepi.pl.sophia.inria.fr/code/nepi-ns3.13/archive/tip.tar.gz"
908         passfd_source_url = "http://nepi.pl.sophia.inria.fr/code/python-passfd/archive/tip.tar.gz"
909         
910         pybindgen_version = "797"
911
912         self.build =(
913             " ( "
914             "  cd .. && "
915             "  python -c 'import pygccxml, pybindgen, passfd' && "
916             "  test -f lib/ns/_core.so && "
917             "  test -f lib/ns/__init__.py && "
918             "  test -f lib/ns/core.py && "
919             "  test -f lib/libns3-core.so && "
920             "  LD_LIBRARY_PATH=lib PYTHONPATH=lib python -c 'import ns.core' "
921             " ) || ( "
922                 # Not working, rebuild
923                      # Archive SHA1 sums to check
924                      "echo '7158877faff2254e6c094bf18e6b4283cac19137  pygccxml-1.0.0.zip' > archive_sums.txt && "
925                      " ( " # check existing files
926                      " sha1sum -c archive_sums.txt && "
927                      " test -f passfd-src.tar.gz && "
928                      " test -f ns3-src.tar.gz "
929                      " ) || ( " # nope? re-download
930                      " rm -rf pybindgen pygccxml-1.0.0.zip passfd-src.tar.gz ns3-src.tar.gz && "
931                      " bzr checkout lp:pybindgen -r %(pybindgen_version)s && " # continue, to exploit the case when it has already been dl'ed
932                      " wget -q -c -O pygccxml-1.0.0.zip %(pygccxml_source_url)s && " 
933                      " wget -q -c -O passfd-src.tar.gz %(passfd_source_url)s && "
934                      " wget -q -c -O ns3-src.tar.gz %(ns3_source_url)s && "  
935                      " sha1sum -c archive_sums.txt " # Check SHA1 sums when applicable
936                      " ) && "
937                      "unzip -n pygccxml-1.0.0.zip && "
938                      "mkdir -p ns3-src && "
939                      "mkdir -p passfd-src && "
940                      "tar xzf ns3-src.tar.gz --strip-components=1 -C ns3-src && "
941                      "tar xzf passfd-src.tar.gz --strip-components=1 -C passfd-src && "
942                      "rm -rf target && "    # mv doesn't like unclean targets
943                      "mkdir -p target && "
944                      "cd pygccxml-1.0.0 && "
945                      "rm -rf unittests docs && " # pygccxml has ~100M of unit tests - excessive - docs aren't needed either
946                      "python setup.py build && "
947                      "python setup.py install --install-lib ${BUILD}/target && "
948                      "python setup.py clean && "
949                      "cd ../pybindgen && "
950                      "export PYTHONPATH=$PYTHONPATH:${BUILD}/target && "
951                      "./waf configure --prefix=${BUILD}/target -d release && "
952                      "./waf && "
953                      "./waf install && "
954                      "./waf clean && "
955                      "mv -f ${BUILD}/target/lib/python*/site-packages/pybindgen ${BUILD}/target/. && "
956                      "rm -rf ${BUILD}/target/lib && "
957                      "cd ../passfd-src && "
958                      "python setup.py build && "
959                      "python setup.py install --install-lib ${BUILD}/target && "
960                      "python setup.py clean && "
961                      "cd ../ns3-src && "
962                      "./waf configure --prefix=${BUILD}/target --with-pybindgen=../pybindgen-src -d release --disable-examples --disable-tests && "
963                      "./waf &&"
964                      "./waf install && "
965                      "rm -f ${BUILD}/target/lib/*.so && "
966                      "cp -a ${BUILD}/ns3-src/build/libns3*.so ${BUILD}/target/lib && "
967                      "cp -a ${BUILD}/ns3-src/build/bindings/python/ns ${BUILD}/target/lib &&"
968                      "./waf clean "
969              " )"
970                      % dict(
971                         pybindgen_version = server.shell_escape(pybindgen_version),
972                         pygccxml_source_url = server.shell_escape(pygccxml_source_url),
973                         ns3_source_url = server.shell_escape(ns3_source_url),
974                         passfd_source_url = server.shell_escape(passfd_source_url),
975                      ))
976         
977         # Just move ${BUILD}/target
978         self.install = (
979             " ( "
980             "  cd .. && "
981             "  python -c 'import pygccxml, pybindgen, passfd' && "
982             "  test -f lib/ns/_core.so && "
983             "  test -f lib/ns/__init__.py && "
984             "  test -f lib/ns/core.py && "
985             "  test -f lib/libns3-core.so && "
986             "  LD_LIBRARY_PATH=lib PYTHONPATH=lib python -c 'import ns.core' "
987             " ) || ( "
988                 # Not working, reinstall
989                     "test -d ${BUILD}/target && "
990                     "[[ \"x\" != \"x$(find ${BUILD}/target -mindepth 1 -print -quit)\" ]] &&"
991                     "( for i in ${BUILD}/target/* ; do rm -rf ${SOURCES}/${i##*/} ; done ) && " # mv doesn't like unclean targets
992                     "mv -f ${BUILD}/target/* ${SOURCES}"
993             " )"
994         )
995         
996         # Set extra environment paths
997         self.env['NEPI_NS3BINDINGS'] = "${SOURCES}/lib"
998         self.env['NEPI_NS3LIBRARY'] = "${SOURCES}/lib"
999     
1000     @property
1001     def tarball(self):
1002         if self._tarball is None:
1003             shared_tar = self._shared_nepi_tar and self._shared_nepi_tar()
1004             if shared_tar is not None:
1005                 self._tarball = shared_tar
1006             else:
1007                 # Build an ad-hoc tarball
1008                 # Prebuilt
1009                 import nepi
1010                 import tempfile
1011                 
1012                 shared_tar = tempfile.NamedTemporaryFile(prefix='nepi-src-', suffix='.tar.gz')
1013                 
1014                 proc = subprocess.Popen(
1015                     ["tar", "czf", shared_tar.name, 
1016                         '-C', os.path.join(os.path.dirname(os.path.dirname(nepi.__file__)),'.'), 
1017                         'nepi'],
1018                     stdout = open("/dev/null","w"),
1019                     stdin = open("/dev/null","r"))
1020
1021                 if proc.wait():
1022                     raise RuntimeError, "Failed to create nepi tarball"
1023                 
1024                 self._tarball = self._shared_nepi_tar = shared_tar
1025                 
1026         return self._tarball
1027
1028 class YumDependency(Dependency):
1029     """
1030     This dependency is an internal helper class used to
1031     efficiently distribute yum-downloaded rpms.
1032     
1033     It temporarily sets the yum cache as persistent in the
1034     build master, and installs all the required packages.
1035     
1036     The rpm packages left in the yum cache are gathered and
1037     distributed by the underlying Dependency in an efficient
1038     manner. Build slaves will then install those rpms back in
1039     the cache before issuing the install command.
1040     
1041     When packages have been installed already, nothing but an
1042     empty tar is distributed.
1043     """
1044     
1045     # Class attribute holding a *weak* reference to the shared NEPI tar file
1046     # so that they may share it. Don't operate on the file itself, it would
1047     # be a mess, just use its path.
1048     _shared_nepi_tar = None
1049     
1050     def _build_get(self):
1051         # canonical representation of dependencies
1052         depends = ' '.join( sorted( (self.depends or "").split(' ') ) )
1053         
1054         # download rpms and pack into a tar archive
1055         return (
1056             "sudo -S nice yum -y makecache && "
1057             "sudo -S sed -i -r 's/keepcache *= *0/keepcache=1/' /etc/yum.conf && "
1058             " ( ( "
1059                 "sudo -S nice yum -y install %s ; "
1060                 "rm -f ${BUILD}/packages.tar ; "
1061                 "tar -C /var/cache/yum -rf ${BUILD}/packages.tar $(cd /var/cache/yum ; find -iname '*.rpm')"
1062             " ) || /bin/true ) && "
1063             "sudo -S sed -i -r 's/keepcache *= *1/keepcache=0/' /etc/yum.conf && "
1064             "( sudo -S nice yum -y clean packages || /bin/true ) "
1065         ) % ( depends, )
1066     def _build_set(self, value):
1067         # ignore
1068         return
1069     build = property(_build_get, _build_set)
1070     
1071     def _install_get(self):
1072         # canonical representation of dependencies
1073         depends = ' '.join( sorted( (self.depends or "").split(' ') ) )
1074         
1075         # unpack cached rpms into yum cache, install, and cleanup
1076         return (
1077             "sudo -S tar -k --keep-newer-files -C /var/cache/yum -xf packages.tar && "
1078             "sudo -S nice yum -y install %s && "
1079             "( sudo -S nice yum -y clean packages || /bin/true ) "
1080         ) % ( depends, )
1081     def _install_set(self, value):
1082         # ignore
1083         return
1084     install = property(_install_get, _install_set)
1085         
1086     def check_bad_host(self, out, err):
1087         badre = re.compile(r'(?:'
1088                            r'The GPG keys listed for the ".*" repository are already installed but they are not correct for this package'
1089                            r'|Error: Cannot retrieve repository metadata (repomd.xml) for repository: .*[.] Please verify its path and try again'
1090                            r'|Error: disk I/O error'
1091                            r'|MASTER NODE UNREACHABLE'
1092                            r')', 
1093                            re.I)
1094         return badre.search(out) or badre.search(err) or self.node.check_bad_host(out,err)
1095
1096
1097 class CCNxDaemon(Application):
1098     """
1099     An application also has dependencies, but also a command to be ran and monitored.
1100     
1101     It adds the output of that command as traces.
1102     """
1103     
1104     def __init__(self, api=None):
1105         super(CCNxDaemon,self).__init__(api)
1106         
1107         # Attributes
1108         self.ccnroutes = None
1109         self.ccnsources = None
1110         self.ccnxversion = "ccnx-0.6.0"
1111         
1112         self.ccnx_0_5_1_sources = "http://www.ccnx.org/releases/ccnx-0.5.1.tar.gz"
1113         self.ccnx_0_6_0_sources = "http://www.ccnx.org/releases/ccnx-0.6.0.tar.gz"
1114         self.buildDepends = 'make gcc development-tools openssl-devel expat-devel libpcap-devel libxml2-devel'
1115
1116         self.ccnx_0_5_1_build = (
1117             " ( "
1118             "  cd .. && "
1119             "  test -d ccnx-0.5.1-src/build/bin "
1120             " ) || ( "
1121                 # Not working, rebuild
1122                 "("
1123                      " mkdir -p ccnx-0.5.1-src && "
1124                      " wget -q -c -O ccnx-0.5.1-src.tar.gz %(ccnx_source_url)s &&"
1125                      " tar xf ccnx-0.5.1-src.tar.gz --strip-components=1 -C ccnx-0.5.1-src "
1126                 ") && "
1127                      "cd ccnx-0.5.1-src && "
1128                      "mkdir -p build/include &&"
1129                      "mkdir -p build/lib &&"
1130                      "mkdir -p build/bin &&"
1131                      "I=$PWD/build && "
1132                      "INSTALL_BASE=$I ./configure &&"
1133                      "make && make install"
1134              " )") % dict(
1135                      ccnx_source_url = server.shell_escape(self.ccnx_0_5_1_sources),
1136                 )
1137
1138         self.ccnx_0_5_1_install = (
1139             " ( "
1140             "  test -d ${BUILD}/ccnx-0.5.1-src/build/bin && "
1141             "  cp -r ${BUILD}/ccnx-0.5.1-src/build/bin ${SOURCES}"
1142             " )"
1143         )
1144
1145         self.ccnx_0_6_0_build = (
1146             " ( "
1147             "  cd .. && "
1148             "  test -d ccnx-0.6.0-src/build/bin "
1149             " ) || ( "
1150                 # Not working, rebuild
1151                 "("
1152                      " mkdir -p ccnx-0.6.0-src && "
1153                      " wget -q -c -O ccnx-0.6.0-src.tar.gz %(ccnx_source_url)s &&"
1154                      " tar xf ccnx-0.6.0-src.tar.gz --strip-components=1 -C ccnx-0.6.0-src "
1155                 ") && "
1156                      "cd ccnx-0.6.0-src && "
1157                      "./configure && make"
1158              " )") % dict(
1159                      ccnx_source_url = server.shell_escape(self.ccnx_0_6_0_sources),
1160                 )
1161
1162         self.ccnx_0_6_0_install = (
1163             " ( "
1164             "  test -d ${BUILD}/ccnx-0.6.0-src/bin && "
1165             "  cp -r ${BUILD}/ccnx-0.6.0-src/bin ${SOURCES}"
1166             " )"
1167         )
1168
1169         self.env['PATH'] = "$PATH:${SOURCES}/bin"
1170
1171     def setup(self):
1172         # setting ccn sources
1173         if not self.build:
1174             if self.ccnxversion == 'ccnx-0.6.0':
1175                 self.build = self.ccnx_0_6_0_build
1176             elif self.ccnxversion == 'ccnx-0.5.1':
1177                 self.build = self.ccnx_0_5_1_build
1178
1179         if not self.install:
1180             if self.ccnxversion == 'ccnx-0.6.0':
1181                 self.install = self.ccnx_0_6_0_install
1182             elif self.ccnxversion == 'ccnx-0.5.1':
1183                 self.install = self.ccnx_0_5_1_install
1184
1185         super(CCNxDaemon, self).setup()
1186
1187     def start(self):
1188         # configure ccn routes
1189         routes = ""
1190         if self.ccnroutes:
1191             routes = map(lambda route: "ccndc add ccnx:/ %s" % route, 
1192                 self.ccnroutes.split("|"))
1193             routes = "; " + " ; ".join(routes)
1194         self.command = "ccndstart %s" % routes
1195
1196         # Start will be invoked in prestart step
1197         super(CCNxDaemon, self).start()
1198             
1199     def kill(self):
1200         self._logger.info("Killing %s", self)
1201
1202         cmd = self._replace_paths("${SOURCES}/bin/ccndstop")
1203         command = cStringIO.StringIO()
1204         command.write(cmd)
1205         command.seek(0)
1206
1207         try:
1208             self._popen_scp(
1209                 command,
1210                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
1211                     os.path.join(self.home_path, "kill.sh"))
1212                 )
1213         except RuntimeError, e:
1214             raise RuntimeError, "Failed to kill ccndxdaemon: %s %s" \
1215                     % (e.args[0], e.args[1],)
1216         
1217
1218         script = "bash ./kill.sh"
1219         (out,err),proc = rspawn.remote_spawn(
1220             script,
1221             pidfile = 'kill-pid',
1222             home = self.home_path,
1223             stdin = '/dev/null',
1224             stdout = 'killlog',
1225             stderr = rspawn.STDOUT,
1226             
1227             host = self.node.hostname,
1228             port = None,
1229             user = self.node.slicename,
1230             agent = None,
1231             ident_key = self.node.ident_path,
1232             server_key = self.node.server_key,
1233             hostip = self.node.hostip,
1234             )
1235         
1236         if proc.wait():
1237             raise RuntimeError, "Failed to kill cnnxdaemon: %s %s" % (out,err,)
1238         
1239         super(CCNxDaemon, self).kill()
1240