Do not re-install keys on retrials (they're no longer available)
[nepi.git] / src / nepi / testbeds / planetlab / application.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from constants import TESTBED_ID
5 import plcapi
6 import operator
7 import os
8 import os.path
9 import sys
10 import nepi.util.server as server
11 import cStringIO
12 import subprocess
13 import rspawn
14 import random
15 import time
16 import socket
17 import threading
18 import logging
19 import re
20
21 from nepi.util.constants import ApplicationStatus as AS
22
23 class Dependency(object):
24     """
25     A Dependency is in every respect like an application.
26     
27     It depends on some packages, it may require building binaries, it must deploy
28     them...
29     
30     But it has no command. Dependencies aren't ever started, or stopped, and have
31     no status.
32     """
33
34     TRACES = ('buildlog')
35
36     def __init__(self, api=None):
37         if not api:
38             api = plcapi.PLCAPI()
39         self._api = api
40         
41         # Attributes
42         self.command = None
43         self.sudo = False
44         
45         self.build = None
46         self.install = None
47         self.depends = None
48         self.buildDepends = None
49         self.sources = None
50         self.rpmFusion = False
51         self.env = {}
52         
53         self.stdin = None
54         self.stdout = None
55         self.stderr = None
56         self.buildlog = None
57         
58         self.add_to_path = True
59         
60         # Those are filled when the app is configured
61         self.home_path = None
62         
63         # Those are filled when an actual node is connected
64         self.node = None
65         
66         # Those are filled when the app is started
67         #   Having both pid and ppid makes it harder
68         #   for pid rollover to induce tracking mistakes
69         self._started = False
70         self._setup = False
71         self._setuper = None
72         self._pid = None
73         self._ppid = None
74
75         # Spanning tree deployment
76         self._master = None
77         self._master_passphrase = None
78         self._master_prk = None
79         self._master_puk = None
80         self._master_token = os.urandom(8).encode("hex")
81         self._build_pid = None
82         self._build_ppid = None
83         
84         # Logging
85         self._logger = logging.getLogger('nepi.testbeds.planetlab')
86         
87     
88     def __str__(self):
89         return "%s<%s>" % (
90             self.__class__.__name__,
91             ' '.join(filter(bool,(self.depends, self.sources)))
92         )
93     
94     def validate(self):
95         if self.home_path is None:
96             raise AssertionError, "Misconfigured application: missing home path"
97         if self.node.ident_path is None or not os.access(self.node.ident_path, os.R_OK):
98             raise AssertionError, "Misconfigured application: missing slice SSH key"
99         if self.node is None:
100             raise AssertionError, "Misconfigured application: unconnected node"
101         if self.node.hostname is None:
102             raise AssertionError, "Misconfigured application: misconfigured node"
103         if self.node.slicename is None:
104             raise AssertionError, "Misconfigured application: unspecified slice"
105     
106     def check_bad_host(self, out, err):
107         """
108         Called whenever an operation fails, it's given the output to be checked for
109         telltale signs of unhealthy hosts.
110         """
111         return False
112     
113     def remote_trace_path(self, whichtrace):
114         if whichtrace in self.TRACES:
115             tracefile = os.path.join(self.home_path, whichtrace)
116         else:
117             tracefile = None
118         
119         return tracefile
120
121     def remote_trace_name(self, whichtrace):
122         if whichtrace in self.TRACES:
123             return whichtrace
124         return None
125
126     def sync_trace(self, local_dir, whichtrace):
127         tracefile = self.remote_trace_path(whichtrace)
128         if not tracefile:
129             return None
130         
131         local_path = os.path.join(local_dir, tracefile)
132         
133         # create parent local folders
134         proc = subprocess.Popen(
135             ["mkdir", "-p", os.path.dirname(local_path)],
136             stdout = open("/dev/null","w"),
137             stdin = open("/dev/null","r"))
138
139         if proc.wait():
140             raise RuntimeError, "Failed to synchronize trace"
141         
142         # sync files
143         try:
144             self._popen_scp(
145                 '%s@%s:%s' % (self.node.slicename, self.node.hostname,
146                     tracefile),
147                 local_path
148                 )
149         except RuntimeError, e:
150             raise RuntimeError, "Failed to synchronize trace: %s %s" \
151                     % (e.args[0], e.args[1],)
152         
153         return local_path
154     
155     def recover(self):
156         # We assume a correct deployment, so recovery only
157         # means we mark this dependency as deployed
158         self._setup = True
159
160     def setup(self):
161         self._logger.info("Setting up %s", self)
162         self._make_home()
163         self._launch_build()
164         self._finish_build()
165         self._setup = True
166     
167     def async_setup(self):
168         if not self._setuper:
169             def setuper():
170                 try:
171                     self.setup()
172                 except:
173                     self._setuper._exc.append(sys.exc_info())
174             self._setuper = threading.Thread(
175                 target = setuper)
176             self._setuper._exc = []
177             self._setuper.start()
178     
179     def async_setup_wait(self):
180         if not self._setup:
181             self._logger.info("Waiting for %s to be setup", self)
182             if self._setuper:
183                 self._setuper.join()
184                 if not self._setup:
185                     if self._setuper._exc:
186                         exctyp,exval,exctrace = self._setuper._exc[0]
187                         raise exctyp,exval,exctrace
188                     else:
189                         raise RuntimeError, "Failed to setup application"
190                 else:
191                     self._logger.info("Setup ready: %s at %s", self, self.node.hostname)
192             else:
193                 self.setup()
194         
195     def _make_home(self):
196         # Make sure all the paths are created where 
197         # they have to be created for deployment
198         # sync files
199         try:
200             self._popen_ssh_command(
201                 "mkdir -p %(home)s && ( rm -f %(home)s/{pid,build-pid,nepi-build.sh} >/dev/null 2>&1 || /bin/true )" \
202                     % { 'home' : server.shell_escape(self.home_path) },
203                 timeout = 120,
204                 retry = 3
205                 )
206         except RuntimeError, e:
207             raise RuntimeError, "Failed to set up application %s: %s %s" % (self.home_path, e.args[0], e.args[1],)
208         
209         if self.stdin:
210             # Write program input
211             try:
212                 self._popen_scp(
213                     cStringIO.StringIO(self.stdin),
214                     '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
215                         os.path.join(self.home_path, 'stdin') ),
216                     )
217             except RuntimeError, e:
218                 raise RuntimeError, "Failed to set up application %s: %s %s" \
219                         % (self.home_path, e.args[0], e.args[1],)
220
221     def _replace_paths(self, command):
222         """
223         Replace all special path tags with shell-escaped actual paths.
224         """
225         # need to append ${HOME} if paths aren't absolute, to MAKE them absolute.
226         root = '' if self.home_path.startswith('/') else "${HOME}/"
227         return ( command
228             .replace("${SOURCES}", root+server.shell_escape(self.home_path))
229             .replace("${BUILD}", root+server.shell_escape(os.path.join(self.home_path,'build'))) )
230
231     def _launch_build(self, trial=0):
232         if self._master is not None:
233             if not trial:
234                 self._do_install_keys()
235             buildscript = self._do_build_slave()
236         else:
237             buildscript = self._do_build_master()
238             
239         if buildscript is not None:
240             self._logger.info("Building %s at %s", self, self.node.hostname)
241             
242             # upload build script
243             try:
244                 self._popen_scp(
245                     buildscript,
246                     '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
247                         os.path.join(self.home_path, 'nepi-build.sh') )
248                     )
249             except RuntimeError, e:
250                 raise RuntimeError, "Failed to set up application %s: %s %s" \
251                         % (self.home_path, e.args[0], e.args[1],)
252             
253             # launch build
254             self._do_launch_build()
255     
256     def _finish_build(self):
257         self._do_wait_build()
258         self._do_install()
259
260     def _do_build_slave(self):
261         if not self.sources and not self.build:
262             return None
263             
264         # Create build script
265         files = set()
266         
267         if self.sources:
268             sources = self.sources.split(' ')
269             files.update(
270                 "%s@%s:%s" % (self._master.node.slicename, self._master.node.hostname, 
271                     os.path.join(self._master.home_path, os.path.basename(source)),)
272                 for source in sources
273             )
274         
275         if self.build:
276             files.add(
277                 "%s@%s:%s" % (self._master.node.slicename, self._master.node.hostname, 
278                     os.path.join(self._master.home_path, 'build.tar.gz'),)
279             )
280         
281         sshopts = "-o ConnectTimeout=30 -o ConnectionAttempts=3 -o ServerAliveInterval=30 -o TCPKeepAlive=yes"
282         
283         launch_agent = "{ ( echo -e '#!/bin/sh\\ncat' > .ssh-askpass ) && chmod u+x .ssh-askpass"\
284                         " && export SSH_ASKPASS=$(pwd)/.ssh-askpass "\
285                         " && ssh-agent > .ssh-agent.sh ; } && . ./.ssh-agent.sh && ( echo $NEPI_MASTER_PASSPHRASE | ssh-add %(prk)s ) && rm -rf %(prk)s %(puk)s" %  \
286         {
287             'prk' : server.shell_escape(self._master_prk_name),
288             'puk' : server.shell_escape(self._master_puk_name),
289         }
290         
291         kill_agent = "kill $SSH_AGENT_PID"
292         
293         waitmaster = (
294             "{ "
295             "echo 'Checking master reachability' ; "
296             "if ping -c 3 %(master_host)s && (. ./.ssh-agent.sh > /dev/null ; ssh -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s echo MASTER SAYS HI ) ; then "
297             "echo 'Master node reachable' ; "
298             "else "
299             "echo 'MASTER NODE UNREACHABLE' && "
300             "exit 1 ; "
301             "fi ; "
302             ". ./.ssh-agent.sh ; "
303             "while [[ $(. ./.ssh-agent.sh > /dev/null ; ssh -q -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s cat %(token_path)s.retcode || /bin/true) != %(token)s ]] ; do sleep 5 ; done ; "
304             "if [[ $(. ./.ssh-agent.sh > /dev/null ; ssh -q -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s cat %(token_path)s || /bin/true) != %(token)s ]] ; then echo BAD TOKEN ; exit 1 ; fi ; "
305             "}" 
306         ) % {
307             'hostkey' : 'master_known_hosts',
308             'master' : "%s@%s" % (self._master.node.slicename, self._master.node.hostname),
309             'master_host' : self._master.node.hostname,
310             'token_path' : os.path.join(self._master.home_path, 'build.token'),
311             'token' : server.shell_escape(self._master._master_token),
312             'sshopts' : sshopts,
313         }
314         
315         syncfiles = ". ./.ssh-agent.sh && scp -p -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(files)s ." % {
316             'hostkey' : 'master_known_hosts',
317             'files' : ' '.join(files),
318             'sshopts' : sshopts,
319         }
320         if self.build:
321             syncfiles += " && tar xzf build.tar.gz"
322         syncfiles += " && ( echo %s > build.token )" % (server.shell_escape(self._master_token),)
323         syncfiles += " && ( echo %s > build.token.retcode )" % (server.shell_escape(self._master_token),)
324         syncfiles = "{ . ./.ssh-agent.sh ; %s ; }" % (syncfiles,)
325         
326         cleanup = "{ . ./.ssh-agent.sh ; kill $SSH_AGENT_PID ; rm -rf %(prk)s %(puk)s master_known_hosts .ssh-askpass ; }" % {
327             'prk' : server.shell_escape(self._master_prk_name),
328             'puk' : server.shell_escape(self._master_puk_name),
329         }
330         
331         slavescript = "( ( %(launch_agent)s && %(waitmaster)s && %(syncfiles)s && %(kill_agent)s && %(cleanup)s ) || %(cleanup)s ) ; echo %(token)s > build.token.retcode" % {
332             'waitmaster' : waitmaster,
333             'syncfiles' : syncfiles,
334             'cleanup' : cleanup,
335             'kill_agent' : kill_agent,
336             'launch_agent' : launch_agent,
337             'home' : server.shell_escape(self.home_path),
338             'token' : server.shell_escape(self._master_token),
339         }
340         
341         return cStringIO.StringIO(slavescript)
342          
343     def _do_launch_build(self):
344         script = "bash ./nepi-build.sh"
345         if self._master_passphrase:
346             script = "NEPI_MASTER_PASSPHRASE=%s %s" % (
347                 server.shell_escape(self._master_passphrase),
348                 script
349             )
350         (out,err),proc = rspawn.remote_spawn(
351             script,
352             pidfile = 'build-pid',
353             home = self.home_path,
354             stdin = '/dev/null',
355             stdout = 'buildlog',
356             stderr = rspawn.STDOUT,
357             
358             host = self.node.hostname,
359             port = None,
360             user = self.node.slicename,
361             agent = None,
362             ident_key = self.node.ident_path,
363             server_key = self.node.server_key
364             )
365         
366         if proc.wait():
367             if self.check_bad_host(out, err):
368                 self.node.blacklist()
369             raise RuntimeError, "Failed to set up build slave %s: %s %s" % (self.home_path, out,err,)
370         
371         
372         pid = ppid = None
373         delay = 1.0
374         for i in xrange(5):
375             pidtuple = rspawn.remote_check_pid(
376                 os.path.join(self.home_path,'build-pid'),
377                 host = self.node.hostname,
378                 port = None,
379                 user = self.node.slicename,
380                 agent = None,
381                 ident_key = self.node.ident_path,
382                 server_key = self.node.server_key
383                 )
384             
385             if pidtuple:
386                 pid, ppid = pidtuple
387                 self._build_pid, self._build_ppid = pidtuple
388                 break
389             else:
390                 time.sleep(delay)
391                 delay = min(30,delay*1.2)
392         else:
393             raise RuntimeError, "Failed to set up build slave %s: cannot get pid" % (self.home_path,)
394
395         self._logger.info("Deploying %s at %s", self, self.node.hostname)
396         
397     def _do_wait_build(self, trial=0):
398         pid = self._build_pid
399         ppid = self._build_ppid
400         
401         if pid and ppid:
402             delay = 1.0
403             first = True
404             bustspin = 0
405             while True:
406                 status = rspawn.remote_status(
407                     pid, ppid,
408                     host = self.node.hostname,
409                     port = None,
410                     user = self.node.slicename,
411                     agent = None,
412                     ident_key = self.node.ident_path,
413                     server_key = self.node.server_key
414                     )
415                 
416                 if status is rspawn.FINISHED:
417                     self._build_pid = self._build_ppid = None
418                     break
419                 elif status is not rspawn.RUNNING:
420                     bustspin += 1
421                     time.sleep(delay*(5.5+random.random()))
422                     if bustspin > 12:
423                         self._build_pid = self._build_ppid = None
424                         break
425                 else:
426                     if first:
427                         self._logger.info("Waiting for %s to finish building at %s %s", self, self.node.hostname,
428                             "(build slave)" if self._master is not None else "(build master)")
429                         
430                         first = False
431                     time.sleep(delay*(0.5+random.random()))
432                     delay = min(30,delay*1.2)
433                     bustspin = 0
434             
435             # check build token
436             slave_token = ""
437             for i in xrange(3):
438                 (out, err), proc = self._popen_ssh_command(
439                     "cat %(token_path)s" % {
440                         'token_path' : os.path.join(self.home_path, 'build.token'),
441                     },
442                     timeout = 120,
443                     noerrors = True)
444                 if not proc.wait() and out:
445                     slave_token = out.strip()
446                 
447                 if slave_token:
448                     break
449                 else:
450                     time.sleep(2)
451             
452             if slave_token != self._master_token:
453                 # Get buildlog for the error message
454
455                 (buildlog, err), proc = self._popen_ssh_command(
456                     "cat %(buildlog)s" % {
457                         'buildlog' : os.path.join(self.home_path, 'buildlog'),
458                         'buildscript' : os.path.join(self.home_path, 'nepi-build.sh'),
459                     },
460                     timeout = 120,
461                     noerrors = True)
462                 
463                 proc.wait()
464                 
465                 if self.check_bad_host(buildlog, err):
466                     self.node.blacklist()
467                 elif self._master and trial < 3 and 'BAD TOKEN' in buildlog or 'BAD TOKEN' in err:
468                     # bad sync with master, may try again
469                     # but first wait for master
470                     self._master.async_setup_wait()
471                     self._launch_build(trial+1)
472                     self._do_wait_build(trial+1)
473                 else:
474                     raise RuntimeError, "Failed to set up application %s: "\
475                             "build failed, got wrong token from pid %s/%s "\
476                             "(expected %r, got %r), see buildlog at %s:\n%s" % (
477                         self.home_path, pid, ppid, self._master_token, slave_token, self.node.hostname, buildlog)
478
479             self._logger.info("Built %s at %s", self, self.node.hostname)
480
481     def _do_kill_build(self):
482         pid = self._build_pid
483         ppid = self._build_ppid
484         
485         if pid and ppid:
486             self._logger.info("Killing build of %s", self)
487             rspawn.remote_kill(
488                 pid, ppid,
489                 host = self.node.hostname,
490                 port = None,
491                 user = self.node.slicename,
492                 agent = None,
493                 ident_key = self.node.ident_path
494                 )
495         
496         
497     def _do_build_master(self):
498         if not self.sources and not self.build and not self.buildDepends:
499             return None
500             
501         if self.sources:
502             sources = self.sources.split(' ')
503             
504             # Copy all sources
505             try:
506                 self._popen_scp(
507                     sources,
508                     "%s@%s:%s" % (self.node.slicename, self.node.hostname, 
509                         os.path.join(self.home_path,'.'),)
510                     )
511             except RuntimeError, e:
512                 raise RuntimeError, "Failed upload source file %r: %s %s" \
513                         % (sources, e.args[0], e.args[1],)
514             
515         buildscript = cStringIO.StringIO()
516         
517         buildscript.write("(\n")
518         
519         if self.buildDepends:
520             # Install build dependencies
521             buildscript.write(
522                 "sudo -S yum -y install %(packages)s\n" % {
523                     'packages' : self.buildDepends
524                 }
525             )
526         
527             
528         if self.build:
529             # Build sources
530             buildscript.write(
531                 "mkdir -p build && ( cd build && ( %(command)s ) )\n" % {
532                     'command' : self._replace_paths(self.build),
533                     'home' : server.shell_escape(self.home_path),
534                 }
535             )
536         
537             # Make archive
538             buildscript.write("tar czf build.tar.gz build\n")
539         
540         # Write token
541         buildscript.write("echo %(master_token)s > build.token ) ; echo %(master_token)s > build.token.retcode" % {
542             'master_token' : server.shell_escape(self._master_token)
543         })
544         
545         buildscript.seek(0)
546
547         return buildscript
548
549     def _do_install(self):
550         if self.install:
551             self._logger.info("Installing %s at %s", self, self.node.hostname)
552             
553             # Install application
554             try:
555                 self._popen_ssh_command(
556                     "cd %(home)s && cd build && ( %(command)s ) > ${HOME}/%(home)s/installlog 2>&1 || ( tail ${HOME}/%(home)s/{install,build}log >&2 && false )" % \
557                         {
558                         'command' : self._replace_paths(self.install),
559                         'home' : server.shell_escape(self.home_path),
560                         },
561                     )
562             except RuntimeError, e:
563                 if self.check_bad_host(e.args[0], e.args[1]):
564                     self.node.blacklist()
565                 raise RuntimeError, "Failed install build sources: %s %s" % (e.args[0], e.args[1],)
566
567     def set_master(self, master):
568         self._master = master
569         
570     def install_keys(self, prk, puk, passphrase):
571         # Install keys
572         self._master_passphrase = passphrase
573         self._master_prk = prk
574         self._master_puk = puk
575         self._master_prk_name = os.path.basename(prk.name)
576         self._master_puk_name = os.path.basename(puk.name)
577         
578     def _do_install_keys(self):
579         prk = self._master_prk
580         puk = self._master_puk
581        
582         try:
583             self._popen_scp(
584                 [ prk.name, puk.name ],
585                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, self.home_path )
586                 )
587         except RuntimeError, e:
588             raise RuntimeError, "Failed to set up application deployment keys: %s %s" \
589                     % (e.args[0], e.args[1],)
590
591         try:
592             self._popen_scp(
593                 cStringIO.StringIO('%s,%s %s\n' % (
594                     self._master.node.hostname, socket.gethostbyname(self._master.node.hostname), 
595                     self._master.node.server_key)),
596                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
597                     os.path.join(self.home_path,"master_known_hosts") )
598                 )
599         except RuntimeError, e:
600             raise RuntimeError, "Failed to set up application deployment keys: %s %s" \
601                     % (e.args[0], e.args[1],)
602         
603         # No longer need'em
604         self._master_prk = None
605         self._master_puk = None
606     
607     def cleanup(self):
608         # make sure there's no leftover build processes
609         self._do_kill_build()
610
611     @server.eintr_retry
612     def _popen_scp(self, src, dst, retry = 3):
613         while 1:
614             try:
615                 (out,err),proc = server.popen_scp(
616                     src,
617                     dst, 
618                     port = None,
619                     agent = None,
620                     ident_key = self.node.ident_path,
621                     server_key = self.node.server_key
622                     )
623
624                 if server.eintr_retry(proc.wait)():
625                     raise RuntimeError, (out, err)
626                 return (out, err), proc
627             except:
628                 if retry <= 0:
629                     raise
630                 else:
631                     retry -= 1
632   
633
634     @server.eintr_retry
635     def _popen_ssh_command(self, command, retry = 0, noerrors=False, timeout=None):
636         (out,err),proc = server.popen_ssh_command(
637             command,
638             host = self.node.hostname,
639             port = None,
640             user = self.node.slicename,
641             agent = None,
642             ident_key = self.node.ident_path,
643             server_key = self.node.server_key,
644             timeout = timeout,
645             retry = retry
646             )
647
648         if server.eintr_retry(proc.wait)():
649             if not noerrors:
650                 raise RuntimeError, (out, err)
651         return (out, err), proc
652
653 class Application(Dependency):
654     """
655     An application also has dependencies, but also a command to be ran and monitored.
656     
657     It adds the output of that command as traces.
658     """
659     
660     TRACES = ('stdout','stderr','buildlog', 'output')
661     
662     def __init__(self, api=None):
663         super(Application,self).__init__(api)
664         
665         # Attributes
666         self.command = None
667         self.sudo = False
668         
669         self.stdin = None
670         self.stdout = None
671         self.stderr = None
672         self.output = None
673         
674         # Those are filled when the app is started
675         #   Having both pid and ppid makes it harder
676         #   for pid rollover to induce tracking mistakes
677         self._started = False
678         self._pid = None
679         self._ppid = None
680
681         # Do not add to the python path of nodes
682         self.add_to_path = False
683     
684     def __str__(self):
685         return "%s<command:%s%s>" % (
686             self.__class__.__name__,
687             "sudo " if self.sudo else "",
688             self.command,
689         )
690     
691     def start(self):
692         self._logger.info("Starting %s", self)
693         
694         # Create shell script with the command
695         # This way, complex commands and scripts can be ran seamlessly
696         # sync files
697         command = cStringIO.StringIO()
698         command.write('export PYTHONPATH=$PYTHONPATH:%s\n' % (
699             ':'.join(["${HOME}/"+server.shell_escape(s) for s in self.node.pythonpath])
700         ))
701         command.write('export PATH=$PATH:%s\n' % (
702             ':'.join(["${HOME}/"+server.shell_escape(s) for s in self.node.pythonpath])
703         ))
704         if self.node.env:
705             for envkey, envvals in self.node.env.iteritems():
706                 for envval in envvals:
707                     command.write('export %s=%s\n' % (envkey, envval))
708         command.write(self.command)
709         command.seek(0)
710
711         try:
712             self._popen_scp(
713                 command,
714                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
715                     os.path.join(self.home_path, "app.sh"))
716                 )
717         except RuntimeError, e:
718             raise RuntimeError, "Failed to set up application: %s %s" \
719                     % (e.args[0], e.args[1],)
720         
721         # Start process in a "daemonized" way, using nohup and heavy
722         # stdin/out redirection to avoid connection issues
723         (out,err),proc = rspawn.remote_spawn(
724             self._replace_paths("bash ./app.sh"),
725             
726             pidfile = './pid',
727             home = self.home_path,
728             stdin = 'stdin' if self.stdin is not None else '/dev/null',
729             stdout = 'stdout' if self.stdout else '/dev/null',
730             stderr = 'stderr' if self.stderr else '/dev/null',
731             sudo = self.sudo,
732             
733             host = self.node.hostname,
734             port = None,
735             user = self.node.slicename,
736             agent = None,
737             ident_key = self.node.ident_path,
738             server_key = self.node.server_key
739             )
740         
741         if proc.wait():
742             if self.check_bad_host(out, err):
743                 self.node.blacklist()
744             raise RuntimeError, "Failed to set up application: %s %s" % (out,err,)
745
746         self._started = True
747     
748     def recover(self):
749         # Assuming the application is running on PlanetLab,
750         # proper pidfiles should be present at the app's home path.
751         # So we mark this application as started, and check the pidfiles
752         self._started = True
753         self.checkpid()
754
755     def checkpid(self):            
756         # Get PID/PPID
757         # NOTE: wait a bit for the pidfile to be created
758         if self._started and not self._pid or not self._ppid:
759             pidtuple = rspawn.remote_check_pid(
760                 os.path.join(self.home_path,'pid'),
761                 host = self.node.hostname,
762                 port = None,
763                 user = self.node.slicename,
764                 agent = None,
765                 ident_key = self.node.ident_path,
766                 server_key = self.node.server_key
767                 )
768             
769             if pidtuple:
770                 self._pid, self._ppid = pidtuple
771     
772     def status(self):
773         self.checkpid()
774         if not self._started:
775             return AS.STATUS_NOT_STARTED
776         elif not self._pid or not self._ppid:
777             return AS.STATUS_NOT_STARTED
778         else:
779             status = rspawn.remote_status(
780                 self._pid, self._ppid,
781                 host = self.node.hostname,
782                 port = None,
783                 user = self.node.slicename,
784                 agent = None,
785                 ident_key = self.node.ident_path,
786                 server_key = self.node.server_key
787                 )
788             
789             if status is rspawn.NOT_STARTED:
790                 return AS.STATUS_NOT_STARTED
791             elif status is rspawn.RUNNING:
792                 return AS.STATUS_RUNNING
793             elif status is rspawn.FINISHED:
794                 return AS.STATUS_FINISHED
795             else:
796                 # WTF?
797                 return AS.STATUS_NOT_STARTED
798     
799     def kill(self):
800         status = self.status()
801         if status == AS.STATUS_RUNNING:
802             # kill by ppid+pid - SIGTERM first, then try SIGKILL
803             rspawn.remote_kill(
804                 self._pid, self._ppid,
805                 host = self.node.hostname,
806                 port = None,
807                 user = self.node.slicename,
808                 agent = None,
809                 ident_key = self.node.ident_path,
810                 server_key = self.node.server_key,
811                 sudo = self.sudo
812                 )
813             self._logger.info("Killed %s", self)
814
815
816 class NepiDependency(Dependency):
817     """
818     This dependency adds nepi itself to the python path,
819     so that you may run testbeds within PL nodes.
820     """
821     
822     # Class attribute holding a *weak* reference to the shared NEPI tar file
823     # so that they may share it. Don't operate on the file itself, it would
824     # be a mess, just use its path.
825     _shared_nepi_tar = None
826     
827     def __init__(self, api = None):
828         super(NepiDependency, self).__init__(api)
829         
830         self._tarball = None
831         
832         self.depends = 'python python-ipaddr python-setuptools'
833         
834         # our sources are in our ad-hoc tarball
835         self.sources = self.tarball.name
836         
837         tarname = os.path.basename(self.tarball.name)
838         
839         # it's already built - just move the tarball into place
840         self.build = "mv -f ${SOURCES}/%s ." % (tarname,)
841         
842         # unpack it into sources, and we're done
843         self.install = "tar xzf ${BUILD}/%s -C .." % (tarname,)
844     
845     @property
846     def tarball(self):
847         if self._tarball is None:
848             shared_tar = self._shared_nepi_tar and self._shared_nepi_tar()
849             if shared_tar is not None:
850                 self._tarball = shared_tar
851             else:
852                 # Build an ad-hoc tarball
853                 # Prebuilt
854                 import nepi
855                 import tempfile
856                 
857                 shared_tar = tempfile.NamedTemporaryFile(prefix='nepi-src-', suffix='.tar.gz')
858                 
859                 proc = subprocess.Popen(
860                     ["tar", "czf", shared_tar.name, 
861                         '-C', os.path.join(os.path.dirname(os.path.dirname(nepi.__file__)),'.'), 
862                         'nepi'],
863                     stdout = open("/dev/null","w"),
864                     stdin = open("/dev/null","r"))
865
866                 if proc.wait():
867                     raise RuntimeError, "Failed to create nepi tarball"
868                 
869                 self._tarball = self._shared_nepi_tar = shared_tar
870                 
871         return self._tarball
872
873 class NS3Dependency(Dependency):
874     """
875     This dependency adds NS3 libraries to the library paths,
876     so that you may run the NS3 testbed within PL nodes.
877     
878     You'll also need the NepiDependency.
879     """
880     
881     def __init__(self, api = None):
882         super(NS3Dependency, self).__init__(api)
883         
884         self.buildDepends = 'make waf gcc gcc-c++ gccxml unzip'
885         
886         # We have to download the sources, untar, build...
887         pybindgen_source_url = "http://yans.pl.sophia.inria.fr/trac/nepi/raw-attachment/wiki/WikiStart/pybindgen-r794.tar.gz"
888         pygccxml_source_url = "http://leaseweb.dl.sourceforge.net/project/pygccxml/pygccxml/pygccxml-1.0/pygccxml-1.0.0.zip"
889         ns3_source_url = "http://yans.pl.sophia.inria.fr/code/hgwebdir.cgi/ns-3.11-nepi/archive/tip.tar.gz"
890         passfd_source_url = "http://yans.pl.sophia.inria.fr/code/hgwebdir.cgi/python-passfd/archive/tip.tar.gz"
891         self.build =(
892             " ( "
893             "  cd .. && "
894             "  python -c 'import pygccxml, pybindgen, passfd' && "
895             "  test -f lib/ns/_core.so && "
896             "  test -f lib/ns/__init__.py && "
897             "  test -f lib/ns/core.py && "
898             "  test -f lib/libns3-core.so && "
899             "  LD_LIBRARY_PATH=lib PYTHONPATH=lib python -c 'import ns.core' "
900             " ) || ( "
901                 # Not working, rebuild
902                      # Archive SHA1 sums to check
903                      "echo '7158877faff2254e6c094bf18e6b4283cac19137  pygccxml-1.0.0.zip' > archive_sums.txt && "
904                      "echo 'a18c2ccffd0df517bc37e2f3a2475092517c43f2  pybindgen-src.tar.gz' >> archive_sums.txt && "
905                      " ( " # check existing files
906                      " sha1sum -c archive_sums.txt && "
907                      " test -f passfd-src.tar.gz && "
908                      " test -f ns3-src.tar.gz "
909                      " ) || ( " # nope? re-download
910                      " rm -f pybindgen-src.zip pygccxml-1.0.0.zip passfd-src.tar.gz ns3-src.tar.gz && "
911                      " wget -q -c -O pybindgen-src.tar.gz %(pybindgen_source_url)s && " # continue, to exploit the case when it has already been dl'ed
912                      " wget -q -c -O pygccxml-1.0.0.zip %(pygccxml_source_url)s && " 
913                      " wget -q -c -O passfd-src.tar.gz %(passfd_source_url)s && "
914                      " wget -q -c -O ns3-src.tar.gz %(ns3_source_url)s && "  
915                      " sha1sum -c archive_sums.txt " # Check SHA1 sums when applicable
916                      " ) && "
917                      "unzip -n pygccxml-1.0.0.zip && "
918                      "mkdir -p pybindgen-src && "
919                      "mkdir -p ns3-src && "
920                      "mkdir -p passfd-src && "
921                      "tar xzf ns3-src.tar.gz --strip-components=1 -C ns3-src && "
922                      "tar xzf passfd-src.tar.gz --strip-components=1 -C passfd-src && "
923                      "tar xzf pybindgen-src.tar.gz --strip-components=1 -C pybindgen-src && "
924                      "rm -rf target && "    # mv doesn't like unclean targets
925                      "mkdir -p target && "
926                      "cd pygccxml-1.0.0 && "
927                      "rm -rf unittests docs && " # pygccxml has ~100M of unit tests - excessive - docs aren't needed either
928                      "python setup.py build && "
929                      "python setup.py install --install-lib ${BUILD}/target && "
930                      "python setup.py clean && "
931                      "cd ../pybindgen-src && "
932                      "export PYTHONPATH=$PYTHONPATH:${BUILD}/target && "
933                      "./waf configure --prefix=${BUILD}/target -d release && "
934                      "./waf && "
935                      "./waf install && "
936                      "./waf clean && "
937                      "mv -f ${BUILD}/target/lib/python*/site-packages/pybindgen ${BUILD}/target/. && "
938                      "rm -rf ${BUILD}/target/lib && "
939                      "cd ../passfd-src && "
940                      "python setup.py build && "
941                      "python setup.py install --install-lib ${BUILD}/target && "
942                      "python setup.py clean && "
943                      "cd ../ns3-src && "
944                      "./waf configure --prefix=${BUILD}/target --with-pybindgen=../pybindgen-src -d release --disable-examples --disable-tests && "
945                      "./waf &&"
946                      "./waf install && "
947                      "rm -f ${BUILD}/target/lib/*.so && "
948                      "cp -a ${BUILD}/ns3-src/build/release/libns3*.so ${BUILD}/target/lib && "
949                      "cp -a ${BUILD}/ns3-src/build/release/bindings/python/ns ${BUILD}/target/lib &&"
950                      "./waf clean "
951              " )"
952                      % dict(
953                         pybindgen_source_url = server.shell_escape(pybindgen_source_url),
954                         pygccxml_source_url = server.shell_escape(pygccxml_source_url),
955                         ns3_source_url = server.shell_escape(ns3_source_url),
956                         passfd_source_url = server.shell_escape(passfd_source_url),
957                      ))
958         
959         # Just move ${BUILD}/target
960         self.install = (
961             " ( "
962             "  cd .. && "
963             "  python -c 'import pygccxml, pybindgen, passfd' && "
964             "  test -f lib/ns/_core.so && "
965             "  test -f lib/ns/__init__.py && "
966             "  test -f lib/ns/core.py && "
967             "  test -f lib/libns3-core.so && "
968             "  LD_LIBRARY_PATH=lib PYTHONPATH=lib python -c 'import ns.core' "
969             " ) || ( "
970                 # Not working, reinstall
971                     "test -d ${BUILD}/target && "
972                     "[[ \"x\" != \"x$(find ${BUILD}/target -mindepth 1 -print -quit)\" ]] &&"
973                     "( for i in ${BUILD}/target/* ; do rm -rf ${SOURCES}/${i##*/} ; done ) && " # mv doesn't like unclean targets
974                     "mv -f ${BUILD}/target/* ${SOURCES}"
975             " )"
976         )
977         
978         # Set extra environment paths
979         self.env['NEPI_NS3BINDINGS'] = "${SOURCES}/lib"
980         self.env['NEPI_NS3LIBRARY'] = "${SOURCES}/lib"
981     
982     @property
983     def tarball(self):
984         if self._tarball is None:
985             shared_tar = self._shared_nepi_tar and self._shared_nepi_tar()
986             if shared_tar is not None:
987                 self._tarball = shared_tar
988             else:
989                 # Build an ad-hoc tarball
990                 # Prebuilt
991                 import nepi
992                 import tempfile
993                 
994                 shared_tar = tempfile.NamedTemporaryFile(prefix='nepi-src-', suffix='.tar.gz')
995                 
996                 proc = subprocess.Popen(
997                     ["tar", "czf", shared_tar.name, 
998                         '-C', os.path.join(os.path.dirname(os.path.dirname(nepi.__file__)),'.'), 
999                         'nepi'],
1000                     stdout = open("/dev/null","w"),
1001                     stdin = open("/dev/null","r"))
1002
1003                 if proc.wait():
1004                     raise RuntimeError, "Failed to create nepi tarball"
1005                 
1006                 self._tarball = self._shared_nepi_tar = shared_tar
1007                 
1008         return self._tarball
1009
1010 class YumDependency(Dependency):
1011     """
1012     This dependency is an internal helper class used to
1013     efficiently distribute yum-downloaded rpms.
1014     
1015     It temporarily sets the yum cache as persistent in the
1016     build master, and installs all the required packages.
1017     
1018     The rpm packages left in the yum cache are gathered and
1019     distributed by the underlying Dependency in an efficient
1020     manner. Build slaves will then install those rpms back in
1021     the cache before issuing the install command.
1022     
1023     When packages have been installed already, nothing but an
1024     empty tar is distributed.
1025     """
1026     
1027     # Class attribute holding a *weak* reference to the shared NEPI tar file
1028     # so that they may share it. Don't operate on the file itself, it would
1029     # be a mess, just use its path.
1030     _shared_nepi_tar = None
1031     
1032     def _build_get(self):
1033         # canonical representation of dependencies
1034         depends = ' '.join( sorted( (self.depends or "").split(' ') ) )
1035         
1036         # download rpms and pack into a tar archive
1037         return (
1038             "sudo -S nice yum -y makecache && "
1039             "sudo -S sed -i -r 's/keepcache *= *0/keepcache=1/' /etc/yum.conf && "
1040             " ( ( "
1041                 "sudo -S nice yum -y install %s ; "
1042                 "rm -f ${BUILD}/packages.tar ; "
1043                 "tar -C /var/cache/yum -rf ${BUILD}/packages.tar $(cd /var/cache/yum ; find -iname '*.rpm')"
1044             " ) || /bin/true ) && "
1045             "sudo -S sed -i -r 's/keepcache *= *1/keepcache=0/' /etc/yum.conf && "
1046             "( sudo -S nice yum -y clean packages || /bin/true ) "
1047         ) % ( depends, )
1048     def _build_set(self, value):
1049         # ignore
1050         return
1051     build = property(_build_get, _build_set)
1052     
1053     def _install_get(self):
1054         # canonical representation of dependencies
1055         depends = ' '.join( sorted( (self.depends or "").split(' ') ) )
1056         
1057         # unpack cached rpms into yum cache, install, and cleanup
1058         return (
1059             "sudo -S tar -k --keep-newer-files -C /var/cache/yum -xf packages.tar && "
1060             "sudo -S nice yum -y install %s && "
1061             "( sudo -S nice yum -y clean packages || /bin/true ) "
1062         ) % ( depends, )
1063     def _install_set(self, value):
1064         # ignore
1065         return
1066     install = property(_install_get, _install_set)
1067         
1068     def check_bad_host(self, out, err):
1069         badre = re.compile(r'(?:'
1070                            r'The GPG keys listed for the ".*" repository are already installed but they are not correct for this package'
1071                            r'|Error: Cannot retrieve repository metadata (repomd.xml) for repository: .*[.] Please verify its path and try again'
1072                            r'|Error: disk I/O error'
1073                            r'|MASTER NODE UNREACHABLE'
1074                            r')', 
1075                            re.I)
1076         return badre.search(out) or badre.search(err)