dfe382fd5972306ec8527bc2a7418e0d14a5e796
[nepi.git] / src / nepi / testbeds / planetlab / application.py
1 # -*- coding: utf-8 -*-
2
3 from constants import TESTBED_ID
4 import plcapi
5 import operator
6 import os
7 import os.path
8 import sys
9 import nepi.util.server as server
10 import cStringIO
11 import subprocess
12 import rspawn
13 import random
14 import time
15 import socket
16 import threading
17 import logging
18 import re
19
20 from nepi.util.constants import ApplicationStatus as AS
21
22 class Dependency(object):
23     """
24     A Dependency is in every respect like an application.
25     
26     It depends on some packages, it may require building binaries, it must deploy
27     them...
28     
29     But it has no command. Dependencies aren't ever started, or stopped, and have
30     no status.
31     """
32
33     TRACES = ('buildlog')
34
35     def __init__(self, api=None):
36         if not api:
37             api = plcapi.PLCAPI()
38         self._api = api
39         
40         # Attributes
41         self.command = None
42         self.sudo = False
43         
44         self.build = None
45         self.install = None
46         self.depends = None
47         self.buildDepends = None
48         self.sources = None
49         self.rpmFusion = False
50         self.env = {}
51         
52         self.stdin = None
53         self.stdout = None
54         self.stderr = None
55         self.buildlog = None
56         
57         self.add_to_path = True
58         
59         # Those are filled when the app is configured
60         self.home_path = None
61         
62         # Those are filled when an actual node is connected
63         self.node = None
64         
65         # Those are filled when the app is started
66         #   Having both pid and ppid makes it harder
67         #   for pid rollover to induce tracking mistakes
68         self._started = False
69         self._setup = False
70         self._setuper = None
71         self._pid = None
72         self._ppid = None
73
74         # Spanning tree deployment
75         self._master = None
76         self._master_passphrase = None
77         self._master_prk = None
78         self._master_puk = None
79         self._master_token = os.urandom(8).encode("hex")
80         self._build_pid = None
81         self._build_ppid = None
82         
83         # Logging
84         self._logger = logging.getLogger('nepi.testbeds.planetlab')
85         
86     
87     def __str__(self):
88         return "%s<%s>" % (
89             self.__class__.__name__,
90             ' '.join(filter(bool,(self.depends, self.sources)))
91         )
92     
93     def validate(self):
94         if self.home_path is None:
95             raise AssertionError, "Misconfigured application: missing home path"
96         if self.node.ident_path is None or not os.access(self.node.ident_path, os.R_OK):
97             raise AssertionError, "Misconfigured application: missing slice SSH key"
98         if self.node is None:
99             raise AssertionError, "Misconfigured application: unconnected node"
100         if self.node.hostname is None:
101             raise AssertionError, "Misconfigured application: misconfigured node"
102         if self.node.slicename is None:
103             raise AssertionError, "Misconfigured application: unspecified slice"
104     
105     def check_bad_host(self, out, err):
106         """
107         Called whenever an operation fails, it's given the output to be checked for
108         telltale signs of unhealthy hosts.
109         """
110         return False
111     
112     def remote_trace_path(self, whichtrace):
113         if whichtrace in self.TRACES:
114             tracefile = os.path.join(self.home_path, whichtrace)
115         else:
116             tracefile = None
117         
118         return tracefile
119
120     def remote_trace_name(self, whichtrace):
121         if whichtrace in self.TRACES:
122             return whichtrace
123         return None
124
125     def sync_trace(self, local_dir, whichtrace):
126         tracefile = self.remote_trace_path(whichtrace)
127         if not tracefile:
128             return None
129         
130         local_path = os.path.join(local_dir, tracefile)
131         
132         # create parent local folders
133         proc = subprocess.Popen(
134             ["mkdir", "-p", os.path.dirname(local_path)],
135             stdout = open("/dev/null","w"),
136             stdin = open("/dev/null","r"))
137
138         if proc.wait():
139             raise RuntimeError, "Failed to synchronize trace"
140         
141         # sync files
142         try:
143             self._popen_scp(
144                 '%s@%s:%s' % (self.node.slicename, self.node.hostname,
145                     tracefile),
146                 local_path
147                 )
148         except RuntimeError, e:
149             raise RuntimeError, "Failed to synchronize trace: %s %s" \
150                     % (e.args[0], e.args[1],)
151         
152         return local_path
153     
154     def recover(self):
155         # We assume a correct deployment, so recovery only
156         # means we mark this dependency as deployed
157         self._setup = True
158
159     def setup(self):
160         self._logger.info("Setting up %s", self)
161         self._make_home()
162         self._launch_build()
163         self._finish_build()
164         self._setup = True
165     
166     def async_setup(self):
167         if not self._setuper:
168             def setuper():
169                 try:
170                     self.setup()
171                 except:
172                     self._setuper._exc.append(sys.exc_info())
173             self._setuper = threading.Thread(
174                 target = setuper)
175             self._setuper._exc = []
176             self._setuper.start()
177     
178     def async_setup_wait(self):
179         if not self._setup:
180             self._logger.info("Waiting for %s to be setup", self)
181             if self._setuper:
182                 self._setuper.join()
183                 if not self._setup:
184                     if self._setuper._exc:
185                         exctyp,exval,exctrace = self._setuper._exc[0]
186                         raise exctyp,exval,exctrace
187                     else:
188                         raise RuntimeError, "Failed to setup application"
189                 else:
190                     self._logger.info("Setup ready: %s at %s", self, self.node.hostname)
191             else:
192                 self.setup()
193         
194     def _make_home(self):
195         # Make sure all the paths are created where 
196         # they have to be created for deployment
197         # sync files
198         try:
199             self._popen_ssh_command(
200                 "mkdir -p %(home)s && ( rm -f %(home)s/{pid,build-pid,nepi-build.sh} >/dev/null 2>&1 || /bin/true )" \
201                     % { 'home' : server.shell_escape(self.home_path) },
202                 timeout = 120,
203                 retry = 3
204                 )
205         except RuntimeError, e:
206             raise RuntimeError, "Failed to set up application %s: %s %s" % (self.home_path, e.args[0], e.args[1],)
207         
208         if self.stdin:
209             # Write program input
210             try:
211                 self._popen_scp(
212                     cStringIO.StringIO(self.stdin),
213                     '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
214                         os.path.join(self.home_path, 'stdin') ),
215                     )
216             except RuntimeError, e:
217                 raise RuntimeError, "Failed to set up application %s: %s %s" \
218                         % (self.home_path, e.args[0], e.args[1],)
219
220     def _replace_paths(self, command):
221         """
222         Replace all special path tags with shell-escaped actual paths.
223         """
224         # need to append ${HOME} if paths aren't absolute, to MAKE them absolute.
225         root = '' if self.home_path.startswith('/') else "${HOME}/"
226         return ( command
227             .replace("${SOURCES}", root+server.shell_escape(self.home_path))
228             .replace("${BUILD}", root+server.shell_escape(os.path.join(self.home_path,'build'))) )
229
230     def _launch_build(self, trial=0):
231         if self._master is not None:
232             if not trial or self._master_prk is not None:
233                 self._do_install_keys()
234             buildscript = self._do_build_slave()
235         else:
236             buildscript = self._do_build_master()
237             
238         if buildscript is not None:
239             self._logger.info("Building %s at %s", self, self.node.hostname)
240             
241             # upload build script
242             try:
243                 self._popen_scp(
244                     buildscript,
245                     '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
246                         os.path.join(self.home_path, 'nepi-build.sh') )
247                     )
248             except RuntimeError, e:
249                 raise RuntimeError, "Failed to set up application %s: %s %s" \
250                         % (self.home_path, e.args[0], e.args[1],)
251             
252             # launch build
253             self._do_launch_build()
254     
255     def _finish_build(self):
256         self._do_wait_build()
257         self._do_install()
258
259     def _do_build_slave(self):
260         if not self.sources and not self.build:
261             return None
262             
263         # Create build script
264         files = set()
265         
266         if self.sources:
267             sources = self.sources.split(' ')
268             files.update(
269                 "%s@%s:%s" % (self._master.node.slicename, self._master.node.hostip, 
270                     os.path.join(self._master.home_path, os.path.basename(source)),)
271                 for source in sources
272             )
273         
274         if self.build:
275             files.add(
276                 "%s@%s:%s" % (self._master.node.slicename, self._master.node.hostip, 
277                     os.path.join(self._master.home_path, 'build.tar.gz'),)
278             )
279         
280         sshopts = "-o ConnectTimeout=30 -o ConnectionAttempts=3 -o ServerAliveInterval=30 -o TCPKeepAlive=yes"
281         
282         launch_agent = "{ ( echo -e '#!/bin/sh\\ncat' > .ssh-askpass ) && chmod u+x .ssh-askpass"\
283                         " && export SSH_ASKPASS=$(pwd)/.ssh-askpass "\
284                         " && ssh-agent > .ssh-agent.sh ; } && . ./.ssh-agent.sh && ( echo $NEPI_MASTER_PASSPHRASE | ssh-add %(prk)s ) && rm -rf %(prk)s %(puk)s" %  \
285         {
286             'prk' : server.shell_escape(self._master_prk_name),
287             'puk' : server.shell_escape(self._master_puk_name),
288         }
289         
290         kill_agent = "kill $SSH_AGENT_PID"
291         
292         waitmaster = (
293             "{ "
294             "echo 'Checking master reachability' ; "
295             "if ping -c 3 %(master_host)s && (. ./.ssh-agent.sh > /dev/null ; ssh -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s echo MASTER SAYS HI ) ; then "
296             "echo 'Master node reachable' ; "
297             "else "
298             "echo 'MASTER NODE UNREACHABLE' && "
299             "exit 1 ; "
300             "fi ; "
301             ". ./.ssh-agent.sh ; "
302             "while [[ $(. ./.ssh-agent.sh > /dev/null ; ssh -q -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s cat %(token_path)s.retcode || /bin/true) != %(token)s ]] ; do sleep 5 ; done ; "
303             "if [[ $(. ./.ssh-agent.sh > /dev/null ; ssh -q -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s cat %(token_path)s || /bin/true) != %(token)s ]] ; then echo BAD TOKEN ; exit 1 ; fi ; "
304             "}" 
305         ) % {
306             'hostkey' : 'master_known_hosts',
307             'master' : "%s@%s" % (self._master.node.slicename, self._master.node.hostip),
308             'master_host' : self._master.node.hostip,
309             'token_path' : os.path.join(self._master.home_path, 'build.token'),
310             'token' : server.shell_escape(self._master._master_token),
311             'sshopts' : sshopts,
312         }
313         
314         syncfiles = ". ./.ssh-agent.sh && scp -p -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(files)s ." % {
315             'hostkey' : 'master_known_hosts',
316             'files' : ' '.join(files),
317             'sshopts' : sshopts,
318         }
319         if self.build:
320             syncfiles += " && tar xzf build.tar.gz"
321         syncfiles += " && ( echo %s > build.token )" % (server.shell_escape(self._master_token),)
322         syncfiles += " && ( echo %s > build.token.retcode )" % (server.shell_escape(self._master_token),)
323         syncfiles = "{ . ./.ssh-agent.sh ; %s ; }" % (syncfiles,)
324         
325         cleanup = "{ . ./.ssh-agent.sh ; kill $SSH_AGENT_PID ; rm -rf %(prk)s %(puk)s master_known_hosts .ssh-askpass ; }" % {
326             'prk' : server.shell_escape(self._master_prk_name),
327             'puk' : server.shell_escape(self._master_puk_name),
328         }
329         
330         slavescript = "( ( %(launch_agent)s && %(waitmaster)s && %(syncfiles)s && %(kill_agent)s && %(cleanup)s ) || %(cleanup)s ) ; echo %(token)s > build.token.retcode" % {
331             'waitmaster' : waitmaster,
332             'syncfiles' : syncfiles,
333             'cleanup' : cleanup,
334             'kill_agent' : kill_agent,
335             'launch_agent' : launch_agent,
336             'home' : server.shell_escape(self.home_path),
337             'token' : server.shell_escape(self._master_token),
338         }
339         
340         return cStringIO.StringIO(slavescript)
341          
342     def _do_launch_build(self):
343         script = "bash ./nepi-build.sh"
344         if self._master_passphrase:
345             script = "NEPI_MASTER_PASSPHRASE=%s %s" % (
346                 server.shell_escape(self._master_passphrase),
347                 script
348             )
349         (out,err),proc = rspawn.remote_spawn(
350             script,
351             pidfile = 'build-pid',
352             home = self.home_path,
353             stdin = '/dev/null',
354             stdout = 'buildlog',
355             stderr = rspawn.STDOUT,
356             
357             host = self.node.hostname,
358             port = None,
359             user = self.node.slicename,
360             agent = None,
361             ident_key = self.node.ident_path,
362             server_key = self.node.server_key,
363             hostip = self.node.hostip,
364             )
365         
366         if proc.wait():
367             if self.check_bad_host(out, err):
368                 self.node.blacklist()
369             raise RuntimeError, "Failed to set up build slave %s: %s %s" % (self.home_path, out,err,)
370         
371         
372         pid = ppid = None
373         delay = 1.0
374         for i in xrange(5):
375             pidtuple = rspawn.remote_check_pid(
376                 os.path.join(self.home_path,'build-pid'),
377                 host = self.node.hostname,
378                 port = None,
379                 user = self.node.slicename,
380                 agent = None,
381                 ident_key = self.node.ident_path,
382                 server_key = self.node.server_key,
383                 hostip = self.node.hostip
384                 )
385             
386             if pidtuple:
387                 pid, ppid = pidtuple
388                 self._build_pid, self._build_ppid = pidtuple
389                 break
390             else:
391                 time.sleep(delay)
392                 delay = min(30,delay*1.2)
393         else:
394             raise RuntimeError, "Failed to set up build slave %s: cannot get pid" % (self.home_path,)
395
396         self._logger.info("Deploying %s at %s", self, self.node.hostname)
397         
398     def _do_wait_build(self, trial=0):
399         pid = self._build_pid
400         ppid = self._build_ppid
401         
402         if pid and ppid:
403             delay = 1.0
404             first = True
405             bustspin = 0
406             while True:
407                 status = rspawn.remote_status(
408                     pid, ppid,
409                     host = self.node.hostname,
410                     port = None,
411                     user = self.node.slicename,
412                     agent = None,
413                     ident_key = self.node.ident_path,
414                     server_key = self.node.server_key,
415                     hostip = self.node.hostip
416                     )
417                 
418                 if status is rspawn.FINISHED:
419                     self._build_pid = self._build_ppid = None
420                     break
421                 elif status is not rspawn.RUNNING:
422                     self._logger.warn("Busted waiting for %s to finish building at %s %s", self, self.node.hostname,
423                             "(build slave)" if self._master is not None else "(build master)")
424                     bustspin += 1
425                     time.sleep(delay*(5.5+random.random()))
426                     if bustspin > 12:
427                         self._build_pid = self._build_ppid = None
428                         break
429                 else:
430                     if first:
431                         self._logger.info("Waiting for %s to finish building at %s %s", self, self.node.hostname,
432                             "(build slave)" if self._master is not None else "(build master)")
433                         
434                         first = False
435                     time.sleep(delay*(0.5+random.random()))
436                     delay = min(30,delay*1.2)
437                     bustspin = 0
438             
439             # check build token
440             slave_token = ""
441             for i in xrange(3):
442                 (out, err), proc = self._popen_ssh_command(
443                     "cat %(token_path)s" % {
444                         'token_path' : os.path.join(self.home_path, 'build.token'),
445                     },
446                     timeout = 120,
447                     noerrors = True)
448                 if not proc.wait() and out:
449                     slave_token = out.strip()
450                 
451                 if slave_token:
452                     break
453                 else:
454                     time.sleep(2)
455             
456             if slave_token != self._master_token:
457                 # Get buildlog for the error message
458
459                 (buildlog, err), proc = self._popen_ssh_command(
460                     "cat %(buildlog)s" % {
461                         'buildlog' : os.path.join(self.home_path, 'buildlog'),
462                         'buildscript' : os.path.join(self.home_path, 'nepi-build.sh'),
463                     },
464                     timeout = 120,
465                     noerrors = True)
466                 
467                 proc.wait()
468                 
469                 if self.check_bad_host(buildlog, err):
470                     self.node.blacklist()
471                 elif self._master and trial < 3 and 'BAD TOKEN' in buildlog or 'BAD TOKEN' in err:
472                     # bad sync with master, may try again
473                     # but first wait for master
474                     self._master.async_setup_wait()
475                     self._launch_build(trial+1)
476                     return self._do_wait_build(trial+1)
477                 elif trial < 3:
478                     return self._do_wait_build(trial+1)
479                 else:
480                     # No longer need'em
481                     self._master_prk = None
482                     self._master_puk = None
483         
484                     raise RuntimeError, "Failed to set up application %s: "\
485                             "build failed, got wrong token from pid %s/%s "\
486                             "(expected %r, got %r), see buildlog at %s:\n%s" % (
487                         self.home_path, pid, ppid, self._master_token, slave_token, self.node.hostname, buildlog)
488
489             # No longer need'em
490             self._master_prk = None
491             self._master_puk = None
492         
493             self._logger.info("Built %s at %s", self, self.node.hostname)
494
495     def _do_kill_build(self):
496         pid = self._build_pid
497         ppid = self._build_ppid
498         
499         if pid and ppid:
500             self._logger.info("Killing build of %s", self)
501             rspawn.remote_kill(
502                 pid, ppid,
503                 host = self.node.hostname,
504                 port = None,
505                 user = self.node.slicename,
506                 agent = None,
507                 ident_key = self.node.ident_path,
508                 hostip = self.node.hostip
509                 )
510         
511         
512     def _do_build_master(self):
513         if not self.sources and not self.build and not self.buildDepends:
514             return None
515             
516         if self.sources:
517             sources = self.sources.split(' ')
518             
519             # Copy all sources
520             try:
521                 self._popen_scp(
522                     sources,
523                     "%s@%s:%s" % (self.node.slicename, self.node.hostname, 
524                         os.path.join(self.home_path,'.'),)
525                     )
526             except RuntimeError, e:
527                 raise RuntimeError, "Failed upload source file %r: %s %s" \
528                         % (sources, e.args[0], e.args[1],)
529             
530         buildscript = cStringIO.StringIO()
531         
532         buildscript.write("(\n")
533         
534         if self.buildDepends:
535             # Install build dependencies
536             buildscript.write(
537                 "sudo -S yum -y install %(packages)s\n" % {
538                     'packages' : self.buildDepends
539                 }
540             )
541         
542             
543         if self.build:
544             # Build sources
545             buildscript.write(
546                 "mkdir -p build && ( cd build && ( %(command)s ) )\n" % {
547                     'command' : self._replace_paths(self.build),
548                     'home' : server.shell_escape(self.home_path),
549                 }
550             )
551         
552             # Make archive
553             buildscript.write("tar czf build.tar.gz build\n")
554         
555         # Write token
556         buildscript.write("echo %(master_token)s > build.token ) ; echo %(master_token)s > build.token.retcode" % {
557             'master_token' : server.shell_escape(self._master_token)
558         })
559         
560         buildscript.seek(0)
561
562         return buildscript
563
564     def _do_install(self):
565         if self.install:
566             self._logger.info("Installing %s at %s", self, self.node.hostname)
567             
568             # Install application
569             try:
570                 self._popen_ssh_command(
571                     "cd %(home)s && cd build && ( %(command)s ) > ${HOME}/%(home)s/installlog 2>&1 || ( tail ${HOME}/%(home)s/{install,build}log >&2 && false )" % \
572                         {
573                         'command' : self._replace_paths(self.install),
574                         'home' : server.shell_escape(self.home_path),
575                         },
576                     )
577             except RuntimeError, e:
578                 if self.check_bad_host(e.args[0], e.args[1]):
579                     self.node.blacklist()
580                 raise RuntimeError, "Failed install build sources: %s %s" % (e.args[0], e.args[1],)
581
582     def set_master(self, master):
583         self._master = master
584         
585     def install_keys(self, prk, puk, passphrase):
586         # Install keys
587         self._master_passphrase = passphrase
588         self._master_prk = prk
589         self._master_puk = puk
590         self._master_prk_name = os.path.basename(prk.name)
591         self._master_puk_name = os.path.basename(puk.name)
592         
593     def _do_install_keys(self):
594         prk = self._master_prk
595         puk = self._master_puk
596        
597         try:
598             self._popen_scp(
599                 [ prk.name, puk.name ],
600                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, self.home_path )
601                 )
602         except RuntimeError, e:
603             raise RuntimeError, "Failed to set up application deployment keys: %s %s" \
604                     % (e.args[0], e.args[1],)
605
606         try:
607             self._popen_scp(
608                 cStringIO.StringIO('%s,%s %s\n' % (
609                     self._master.node.hostname, self._master.node.hostip, 
610                     self._master.node.server_key)),
611                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
612                     os.path.join(self.home_path,"master_known_hosts") )
613                 )
614         except RuntimeError, e:
615             raise RuntimeError, "Failed to set up application deployment keys: %s %s" \
616                     % (e.args[0], e.args[1],)
617         
618     
619     def cleanup(self):
620         # make sure there's no leftover build processes
621         self._do_kill_build()
622         
623         # No longer need'em
624         self._master_prk = None
625         self._master_puk = None
626
627     @server.eintr_retry
628     def _popen_scp(self, src, dst, retry = 3):
629         while 1:
630             try:
631                 (out,err),proc = server.popen_scp(
632                     src,
633                     dst, 
634                     port = None,
635                     agent = None,
636                     ident_key = self.node.ident_path,
637                     server_key = self.node.server_key
638                     )
639
640                 if server.eintr_retry(proc.wait)():
641                     raise RuntimeError, (out, err)
642                 return (out, err), proc
643             except:
644                 if retry <= 0:
645                     raise
646                 else:
647                     retry -= 1
648   
649
650     @server.eintr_retry
651     def _popen_ssh_command(self, command, retry = 0, noerrors=False, timeout=None):
652         (out,err),proc = server.popen_ssh_command(
653             command,
654             host = self.node.hostname,
655             port = None,
656             user = self.node.slicename,
657             agent = None,
658             ident_key = self.node.ident_path,
659             server_key = self.node.server_key,
660             timeout = timeout,
661             retry = retry
662             )
663
664         if server.eintr_retry(proc.wait)():
665             if not noerrors:
666                 raise RuntimeError, (out, err)
667         return (out, err), proc
668
669 class Application(Dependency):
670     """
671     An application also has dependencies, but also a command to be ran and monitored.
672     
673     It adds the output of that command as traces.
674     """
675     
676     TRACES = ('stdout','stderr','buildlog', 'output')
677     
678     def __init__(self, api=None):
679         super(Application,self).__init__(api)
680         
681         # Attributes
682         self.command = None
683         self.sudo = False
684         
685         self.stdin = None
686         self.stdout = None
687         self.stderr = None
688         self.output = None
689         
690         # Those are filled when the app is started
691         #   Having both pid and ppid makes it harder
692         #   for pid rollover to induce tracking mistakes
693         self._started = False
694         self._pid = None
695         self._ppid = None
696
697         # Do not add to the python path of nodes
698         self.add_to_path = False
699     
700     def __str__(self):
701         return "%s<command:%s%s>" % (
702             self.__class__.__name__,
703             "sudo " if self.sudo else "",
704             self.command,
705         )
706     
707     def start(self):
708         self._logger.info("Starting %s", self)
709         
710         # Create shell script with the command
711         # This way, complex commands and scripts can be ran seamlessly
712         # sync files
713         command = cStringIO.StringIO()
714         command.write('export PYTHONPATH=$PYTHONPATH:%s\n' % (
715             ':'.join(["${HOME}/"+server.shell_escape(s) for s in self.node.pythonpath])
716         ))
717         command.write('export PATH=$PATH:%s\n' % (
718             ':'.join(["${HOME}/"+server.shell_escape(s) for s in self.node.pythonpath])
719         ))
720         if self.node.env:
721             for envkey, envvals in self.node.env.iteritems():
722                 for envval in envvals:
723                     command.write('export %s=%s\n' % (envkey, envval))
724         command.write(self.command)
725         command.seek(0)
726
727         try:
728             self._popen_scp(
729                 command,
730                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
731                     os.path.join(self.home_path, "app.sh"))
732                 )
733         except RuntimeError, e:
734             raise RuntimeError, "Failed to set up application: %s %s" \
735                     % (e.args[0], e.args[1],)
736         
737         # Start process in a "daemonized" way, using nohup and heavy
738         # stdin/out redirection to avoid connection issues
739         (out,err),proc = rspawn.remote_spawn(
740             self._replace_paths("bash ./app.sh"),
741             
742             pidfile = './pid',
743             home = self.home_path,
744             stdin = 'stdin' if self.stdin is not None else '/dev/null',
745             stdout = 'stdout' if self.stdout else '/dev/null',
746             stderr = 'stderr' if self.stderr else '/dev/null',
747             sudo = self.sudo,
748             
749             host = self.node.hostname,
750             port = None,
751             user = self.node.slicename,
752             agent = None,
753             ident_key = self.node.ident_path,
754             server_key = self.node.server_key
755             )
756         
757         if proc.wait():
758             if self.check_bad_host(out, err):
759                 self.node.blacklist()
760             raise RuntimeError, "Failed to set up application: %s %s" % (out,err,)
761
762         self._started = True
763     
764     def recover(self):
765         # Assuming the application is running on PlanetLab,
766         # proper pidfiles should be present at the app's home path.
767         # So we mark this application as started, and check the pidfiles
768         self._started = True
769         self.checkpid()
770
771     def checkpid(self):            
772         # Get PID/PPID
773         # NOTE: wait a bit for the pidfile to be created
774         if self._started and not self._pid or not self._ppid:
775             pidtuple = rspawn.remote_check_pid(
776                 os.path.join(self.home_path,'pid'),
777                 host = self.node.hostname,
778                 port = None,
779                 user = self.node.slicename,
780                 agent = None,
781                 ident_key = self.node.ident_path,
782                 server_key = self.node.server_key
783                 )
784             
785             if pidtuple:
786                 self._pid, self._ppid = pidtuple
787     
788     def status(self):
789         self.checkpid()
790         if not self._started:
791             return AS.STATUS_NOT_STARTED
792         elif not self._pid or not self._ppid:
793             return AS.STATUS_NOT_STARTED
794         else:
795             status = rspawn.remote_status(
796                 self._pid, self._ppid,
797                 host = self.node.hostname,
798                 port = None,
799                 user = self.node.slicename,
800                 agent = None,
801                 ident_key = self.node.ident_path,
802                 server_key = self.node.server_key
803                 )
804             
805             if status is rspawn.NOT_STARTED:
806                 return AS.STATUS_NOT_STARTED
807             elif status is rspawn.RUNNING:
808                 return AS.STATUS_RUNNING
809             elif status is rspawn.FINISHED:
810                 return AS.STATUS_FINISHED
811             else:
812                 # WTF?
813                 return AS.STATUS_NOT_STARTED
814     
815     def kill(self):
816         status = self.status()
817         if status == AS.STATUS_RUNNING:
818             # kill by ppid+pid - SIGTERM first, then try SIGKILL
819             rspawn.remote_kill(
820                 self._pid, self._ppid,
821                 host = self.node.hostname,
822                 port = None,
823                 user = self.node.slicename,
824                 agent = None,
825                 ident_key = self.node.ident_path,
826                 server_key = self.node.server_key,
827                 sudo = self.sudo
828                 )
829             self._logger.info("Killed %s", self)
830
831
832 class NepiDependency(Dependency):
833     """
834     This dependency adds nepi itself to the python path,
835     so that you may run testbeds within PL nodes.
836     """
837     
838     # Class attribute holding a *weak* reference to the shared NEPI tar file
839     # so that they may share it. Don't operate on the file itself, it would
840     # be a mess, just use its path.
841     _shared_nepi_tar = None
842     
843     def __init__(self, api = None):
844         super(NepiDependency, self).__init__(api)
845         
846         self._tarball = None
847         
848         self.depends = 'python python-ipaddr python-setuptools'
849         
850         # our sources are in our ad-hoc tarball
851         self.sources = self.tarball.name
852         
853         tarname = os.path.basename(self.tarball.name)
854         
855         # it's already built - just move the tarball into place
856         self.build = "mv -f ${SOURCES}/%s ." % (tarname,)
857         
858         # unpack it into sources, and we're done
859         self.install = "tar xzf ${BUILD}/%s -C .." % (tarname,)
860     
861     @property
862     def tarball(self):
863         if self._tarball is None:
864             shared_tar = self._shared_nepi_tar and self._shared_nepi_tar()
865             if shared_tar is not None:
866                 self._tarball = shared_tar
867             else:
868                 # Build an ad-hoc tarball
869                 # Prebuilt
870                 import nepi
871                 import tempfile
872                 
873                 shared_tar = tempfile.NamedTemporaryFile(prefix='nepi-src-', suffix='.tar.gz')
874                 
875                 proc = subprocess.Popen(
876                     ["tar", "czf", shared_tar.name, 
877                         '-C', os.path.join(os.path.dirname(os.path.dirname(nepi.__file__)),'.'), 
878                         'nepi'],
879                     stdout = open("/dev/null","w"),
880                     stdin = open("/dev/null","r"))
881
882                 if proc.wait():
883                     raise RuntimeError, "Failed to create nepi tarball"
884                 
885                 self._tarball = self._shared_nepi_tar = shared_tar
886                 
887         return self._tarball
888
889 class NS3Dependency(Dependency):
890     """
891     This dependency adds NS3 libraries to the library paths,
892     so that you may run the NS3 testbed within PL nodes.
893     
894     You'll also need the NepiDependency.
895     """
896     
897     def __init__(self, api = None):
898         super(NS3Dependency, self).__init__(api)
899         
900         self.depends = 'bzr'
901         
902         self.buildDepends = 'make waf gcc gcc-c++ gccxml unzip'
903         
904         # We have to download the sources, untar, build...
905         pygccxml_source_url = "http://leaseweb.dl.sourceforge.net/project/pygccxml/pygccxml/pygccxml-1.0/pygccxml-1.0.0.zip"
906         ns3_source_url = "http://nepi.pl.sophia.inria.fr/code/nepi-ns3.13/arhive/tip.tar.gz"
907         passfd_source_url = "http://nepi.pl.sophia.inria.fr/code/python-passfd/archive/tip.tar.gz"
908         
909         pybindgen_version = "797"
910
911         self.build =(
912             " ( "
913             "  cd .. && "
914             "  python -c 'import pygccxml, pybindgen, passfd' && "
915             "  test -f lib/ns/_core.so && "
916             "  test -f lib/ns/__init__.py && "
917             "  test -f lib/ns/core.py && "
918             "  test -f lib/libns3-core.so && "
919             "  LD_LIBRARY_PATH=lib PYTHONPATH=lib python -c 'import ns.core' "
920             " ) || ( "
921                 # Not working, rebuild
922                      # Archive SHA1 sums to check
923                      "echo '7158877faff2254e6c094bf18e6b4283cac19137  pygccxml-1.0.0.zip' > archive_sums.txt && "
924                      " ( " # check existing files
925                      " sha1sum -c archive_sums.txt && "
926                      " test -f passfd-src.tar.gz && "
927                      " test -f ns3-src.tar.gz "
928                      " ) || ( " # nope? re-download
929                      " rm -rf pybindgen pygccxml-1.0.0.zip passfd-src.tar.gz ns3-src.tar.gz && "
930                      " bzr checkout lp:pybindgen -r %(pybindgen_version)s && " # continue, to exploit the case when it has already been dl'ed
931                      " wget -q -c -O pygccxml-1.0.0.zip %(pygccxml_source_url)s && " 
932                      " wget -q -c -O passfd-src.tar.gz %(passfd_source_url)s && "
933                      " wget -q -c -O ns3-src.tar.gz %(ns3_source_url)s && "  
934                      " sha1sum -c archive_sums.txt " # Check SHA1 sums when applicable
935                      " ) && "
936                      "unzip -n pygccxml-1.0.0.zip && "
937                      "mkdir -p ns3-src && "
938                      "mkdir -p passfd-src && "
939                      "tar xzf ns3-src.tar.gz --strip-components=1 -C ns3-src && "
940                      "tar xzf passfd-src.tar.gz --strip-components=1 -C passfd-src && "
941                      "rm -rf target && "    # mv doesn't like unclean targets
942                      "mkdir -p target && "
943                      "cd pygccxml-1.0.0 && "
944                      "rm -rf unittests docs && " # pygccxml has ~100M of unit tests - excessive - docs aren't needed either
945                      "python setup.py build && "
946                      "python setup.py install --install-lib ${BUILD}/target && "
947                      "python setup.py clean && "
948                      "cd ../pybindgen && "
949                      "export PYTHONPATH=$PYTHONPATH:${BUILD}/target && "
950                      "./waf configure --prefix=${BUILD}/target -d release && "
951                      "./waf && "
952                      "./waf install && "
953                      "./waf clean && "
954                      "mv -f ${BUILD}/target/lib/python*/site-packages/pybindgen ${BUILD}/target/. && "
955                      "rm -rf ${BUILD}/target/lib && "
956                      "cd ../passfd-src && "
957                      "python setup.py build && "
958                      "python setup.py install --install-lib ${BUILD}/target && "
959                      "python setup.py clean && "
960                      "cd ../ns3-src && "
961                      "./waf configure --prefix=${BUILD}/target --with-pybindgen=../pybindgen-src -d release --disable-examples --disable-tests && "
962                      "./waf &&"
963                      "./waf install && "
964                      "rm -f ${BUILD}/target/lib/*.so && "
965                      "cp -a ${BUILD}/ns3-src/build/release/libns3*.so ${BUILD}/target/lib && "
966                      "cp -a ${BUILD}/ns3-src/build/release/bindings/python/ns ${BUILD}/target/lib &&"
967                      "./waf clean "
968              " )"
969                      % dict(
970                         pybindgen_version = server.shell_escape(pybindgen_version),
971                         pygccxml_source_url = server.shell_escape(pygccxml_source_url),
972                         ns3_source_url = server.shell_escape(ns3_source_url),
973                         passfd_source_url = server.shell_escape(passfd_source_url),
974                      ))
975         
976         # Just move ${BUILD}/target
977         self.install = (
978             " ( "
979             "  cd .. && "
980             "  python -c 'import pygccxml, pybindgen, passfd' && "
981             "  test -f lib/ns/_core.so && "
982             "  test -f lib/ns/__init__.py && "
983             "  test -f lib/ns/core.py && "
984             "  test -f lib/libns3-core.so && "
985             "  LD_LIBRARY_PATH=lib PYTHONPATH=lib python -c 'import ns.core' "
986             " ) || ( "
987                 # Not working, reinstall
988                     "test -d ${BUILD}/target && "
989                     "[[ \"x\" != \"x$(find ${BUILD}/target -mindepth 1 -print -quit)\" ]] &&"
990                     "( for i in ${BUILD}/target/* ; do rm -rf ${SOURCES}/${i##*/} ; done ) && " # mv doesn't like unclean targets
991                     "mv -f ${BUILD}/target/* ${SOURCES}"
992             " )"
993         )
994         
995         # Set extra environment paths
996         self.env['NEPI_NS3BINDINGS'] = "${SOURCES}/lib"
997         self.env['NEPI_NS3LIBRARY'] = "${SOURCES}/lib"
998     
999     @property
1000     def tarball(self):
1001         if self._tarball is None:
1002             shared_tar = self._shared_nepi_tar and self._shared_nepi_tar()
1003             if shared_tar is not None:
1004                 self._tarball = shared_tar
1005             else:
1006                 # Build an ad-hoc tarball
1007                 # Prebuilt
1008                 import nepi
1009                 import tempfile
1010                 
1011                 shared_tar = tempfile.NamedTemporaryFile(prefix='nepi-src-', suffix='.tar.gz')
1012                 
1013                 proc = subprocess.Popen(
1014                     ["tar", "czf", shared_tar.name, 
1015                         '-C', os.path.join(os.path.dirname(os.path.dirname(nepi.__file__)),'.'), 
1016                         'nepi'],
1017                     stdout = open("/dev/null","w"),
1018                     stdin = open("/dev/null","r"))
1019
1020                 if proc.wait():
1021                     raise RuntimeError, "Failed to create nepi tarball"
1022                 
1023                 self._tarball = self._shared_nepi_tar = shared_tar
1024                 
1025         return self._tarball
1026
1027 class YumDependency(Dependency):
1028     """
1029     This dependency is an internal helper class used to
1030     efficiently distribute yum-downloaded rpms.
1031     
1032     It temporarily sets the yum cache as persistent in the
1033     build master, and installs all the required packages.
1034     
1035     The rpm packages left in the yum cache are gathered and
1036     distributed by the underlying Dependency in an efficient
1037     manner. Build slaves will then install those rpms back in
1038     the cache before issuing the install command.
1039     
1040     When packages have been installed already, nothing but an
1041     empty tar is distributed.
1042     """
1043     
1044     # Class attribute holding a *weak* reference to the shared NEPI tar file
1045     # so that they may share it. Don't operate on the file itself, it would
1046     # be a mess, just use its path.
1047     _shared_nepi_tar = None
1048     
1049     def _build_get(self):
1050         # canonical representation of dependencies
1051         depends = ' '.join( sorted( (self.depends or "").split(' ') ) )
1052         
1053         # download rpms and pack into a tar archive
1054         return (
1055             "sudo -S nice yum -y makecache && "
1056             "sudo -S sed -i -r 's/keepcache *= *0/keepcache=1/' /etc/yum.conf && "
1057             " ( ( "
1058                 "sudo -S nice yum -y install %s ; "
1059                 "rm -f ${BUILD}/packages.tar ; "
1060                 "tar -C /var/cache/yum -rf ${BUILD}/packages.tar $(cd /var/cache/yum ; find -iname '*.rpm')"
1061             " ) || /bin/true ) && "
1062             "sudo -S sed -i -r 's/keepcache *= *1/keepcache=0/' /etc/yum.conf && "
1063             "( sudo -S nice yum -y clean packages || /bin/true ) "
1064         ) % ( depends, )
1065     def _build_set(self, value):
1066         # ignore
1067         return
1068     build = property(_build_get, _build_set)
1069     
1070     def _install_get(self):
1071         # canonical representation of dependencies
1072         depends = ' '.join( sorted( (self.depends or "").split(' ') ) )
1073         
1074         # unpack cached rpms into yum cache, install, and cleanup
1075         return (
1076             "sudo -S tar -k --keep-newer-files -C /var/cache/yum -xf packages.tar && "
1077             "sudo -S nice yum -y install %s && "
1078             "( sudo -S nice yum -y clean packages || /bin/true ) "
1079         ) % ( depends, )
1080     def _install_set(self, value):
1081         # ignore
1082         return
1083     install = property(_install_get, _install_set)
1084         
1085     def check_bad_host(self, out, err):
1086         badre = re.compile(r'(?:'
1087                            r'The GPG keys listed for the ".*" repository are already installed but they are not correct for this package'
1088                            r'|Error: Cannot retrieve repository metadata (repomd.xml) for repository: .*[.] Please verify its path and try again'
1089                            r'|Error: disk I/O error'
1090                            r'|MASTER NODE UNREACHABLE'
1091                            r')', 
1092                            re.I)
1093         return badre.search(out) or badre.search(err) or self.node.check_bad_host(out,err)