- Detect SSH misconfigurations in PL nodes
[nepi.git] / src / nepi / testbeds / planetlab / application.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from constants import TESTBED_ID
5 import plcapi
6 import operator
7 import os
8 import os.path
9 import sys
10 import nepi.util.server as server
11 import cStringIO
12 import subprocess
13 import rspawn
14 import random
15 import time
16 import socket
17 import threading
18 import logging
19 import re
20
21 from nepi.util.constants import ApplicationStatus as AS
22
23 class Dependency(object):
24     """
25     A Dependency is in every respect like an application.
26     
27     It depends on some packages, it may require building binaries, it must deploy
28     them...
29     
30     But it has no command. Dependencies aren't ever started, or stopped, and have
31     no status.
32     """
33
34     TRACES = ('buildlog')
35
36     def __init__(self, api=None):
37         if not api:
38             api = plcapi.PLCAPI()
39         self._api = api
40         
41         # Attributes
42         self.command = None
43         self.sudo = False
44         
45         self.build = None
46         self.install = None
47         self.depends = None
48         self.buildDepends = None
49         self.sources = None
50         self.rpmFusion = False
51         self.env = {}
52         
53         self.stdin = None
54         self.stdout = None
55         self.stderr = None
56         self.buildlog = None
57         
58         self.add_to_path = True
59         
60         # Those are filled when the app is configured
61         self.home_path = None
62         
63         # Those are filled when an actual node is connected
64         self.node = None
65         
66         # Those are filled when the app is started
67         #   Having both pid and ppid makes it harder
68         #   for pid rollover to induce tracking mistakes
69         self._started = False
70         self._setup = False
71         self._setuper = None
72         self._pid = None
73         self._ppid = None
74
75         # Spanning tree deployment
76         self._master = None
77         self._master_passphrase = None
78         self._master_prk = None
79         self._master_puk = None
80         self._master_token = os.urandom(8).encode("hex")
81         self._build_pid = None
82         self._build_ppid = None
83         
84         # Logging
85         self._logger = logging.getLogger('nepi.testbeds.planetlab')
86         
87     
88     def __str__(self):
89         return "%s<%s>" % (
90             self.__class__.__name__,
91             ' '.join(filter(bool,(self.depends, self.sources)))
92         )
93     
94     def validate(self):
95         if self.home_path is None:
96             raise AssertionError, "Misconfigured application: missing home path"
97         if self.node.ident_path is None or not os.access(self.node.ident_path, os.R_OK):
98             raise AssertionError, "Misconfigured application: missing slice SSH key"
99         if self.node is None:
100             raise AssertionError, "Misconfigured application: unconnected node"
101         if self.node.hostname is None:
102             raise AssertionError, "Misconfigured application: misconfigured node"
103         if self.node.slicename is None:
104             raise AssertionError, "Misconfigured application: unspecified slice"
105     
106     def check_bad_host(self, out, err):
107         """
108         Called whenever an operation fails, it's given the output to be checked for
109         telltale signs of unhealthy hosts.
110         """
111         return False
112     
113     def remote_trace_path(self, whichtrace):
114         if whichtrace in self.TRACES:
115             tracefile = os.path.join(self.home_path, whichtrace)
116         else:
117             tracefile = None
118         
119         return tracefile
120
121     def remote_trace_name(self, whichtrace):
122         if whichtrace in self.TRACES:
123             return whichtrace
124         return None
125
126     def sync_trace(self, local_dir, whichtrace):
127         tracefile = self.remote_trace_path(whichtrace)
128         if not tracefile:
129             return None
130         
131         local_path = os.path.join(local_dir, tracefile)
132         
133         # create parent local folders
134         proc = subprocess.Popen(
135             ["mkdir", "-p", os.path.dirname(local_path)],
136             stdout = open("/dev/null","w"),
137             stdin = open("/dev/null","r"))
138
139         if proc.wait():
140             raise RuntimeError, "Failed to synchronize trace"
141         
142         # sync files
143         try:
144             self._popen_scp(
145                 '%s@%s:%s' % (self.node.slicename, self.node.hostname,
146                     tracefile),
147                 local_path
148                 )
149         except RuntimeError, e:
150             raise RuntimeError, "Failed to synchronize trace: %s %s" \
151                     % (e.args[0], e.args[1],)
152         
153         return local_path
154     
155     def recover(self):
156         # We assume a correct deployment, so recovery only
157         # means we mark this dependency as deployed
158         self._setup = True
159
160     def setup(self):
161         self._logger.info("Setting up %s", self)
162         self._make_home()
163         self._launch_build()
164         self._finish_build()
165         self._setup = True
166     
167     def async_setup(self):
168         if not self._setuper:
169             def setuper():
170                 try:
171                     self.setup()
172                 except:
173                     self._setuper._exc.append(sys.exc_info())
174             self._setuper = threading.Thread(
175                 target = setuper)
176             self._setuper._exc = []
177             self._setuper.start()
178     
179     def async_setup_wait(self):
180         if not self._setup:
181             self._logger.info("Waiting for %s to be setup", self)
182             if self._setuper:
183                 self._setuper.join()
184                 if not self._setup:
185                     if self._setuper._exc:
186                         exctyp,exval,exctrace = self._setuper._exc[0]
187                         raise exctyp,exval,exctrace
188                     else:
189                         raise RuntimeError, "Failed to setup application"
190                 else:
191                     self._logger.info("Setup ready: %s at %s", self, self.node.hostname)
192             else:
193                 self.setup()
194         
195     def _make_home(self):
196         # Make sure all the paths are created where 
197         # they have to be created for deployment
198         # sync files
199         try:
200             self._popen_ssh_command(
201                 "mkdir -p %(home)s && ( rm -f %(home)s/{pid,build-pid,nepi-build.sh} >/dev/null 2>&1 || /bin/true )" \
202                     % { 'home' : server.shell_escape(self.home_path) },
203                 timeout = 120,
204                 retry = 3
205                 )
206         except RuntimeError, e:
207             raise RuntimeError, "Failed to set up application %s: %s %s" % (self.home_path, e.args[0], e.args[1],)
208         
209         if self.stdin:
210             # Write program input
211             try:
212                 self._popen_scp(
213                     cStringIO.StringIO(self.stdin),
214                     '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
215                         os.path.join(self.home_path, 'stdin') ),
216                     )
217             except RuntimeError, e:
218                 raise RuntimeError, "Failed to set up application %s: %s %s" \
219                         % (self.home_path, e.args[0], e.args[1],)
220
221     def _replace_paths(self, command):
222         """
223         Replace all special path tags with shell-escaped actual paths.
224         """
225         # need to append ${HOME} if paths aren't absolute, to MAKE them absolute.
226         root = '' if self.home_path.startswith('/') else "${HOME}/"
227         return ( command
228             .replace("${SOURCES}", root+server.shell_escape(self.home_path))
229             .replace("${BUILD}", root+server.shell_escape(os.path.join(self.home_path,'build'))) )
230
231     def _launch_build(self):
232         if self._master is not None:
233             self._do_install_keys()
234             buildscript = self._do_build_slave()
235         else:
236             buildscript = self._do_build_master()
237             
238         if buildscript is not None:
239             self._logger.info("Building %s at %s", self, self.node.hostname)
240             
241             # upload build script
242             try:
243                 self._popen_scp(
244                     buildscript,
245                     '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
246                         os.path.join(self.home_path, 'nepi-build.sh') )
247                     )
248             except RuntimeError, e:
249                 raise RuntimeError, "Failed to set up application %s: %s %s" \
250                         % (self.home_path, e.args[0], e.args[1],)
251             
252             # launch build
253             self._do_launch_build()
254     
255     def _finish_build(self):
256         self._do_wait_build()
257         self._do_install()
258
259     def _do_build_slave(self):
260         if not self.sources and not self.build:
261             return None
262             
263         # Create build script
264         files = set()
265         
266         if self.sources:
267             sources = self.sources.split(' ')
268             files.update(
269                 "%s@%s:%s" % (self._master.node.slicename, self._master.node.hostname, 
270                     os.path.join(self._master.home_path, os.path.basename(source)),)
271                 for source in sources
272             )
273         
274         if self.build:
275             files.add(
276                 "%s@%s:%s" % (self._master.node.slicename, self._master.node.hostname, 
277                     os.path.join(self._master.home_path, 'build.tar.gz'),)
278             )
279         
280         sshopts = "-o ConnectTimeout=30 -o ConnectionAttempts=3 -o ServerAliveInterval=30 -o TCPKeepAlive=yes"
281         
282         launch_agent = "{ ( echo -e '#!/bin/sh\\ncat' > .ssh-askpass ) && chmod u+x .ssh-askpass"\
283                         " && export SSH_ASKPASS=$(pwd)/.ssh-askpass "\
284                         " && ssh-agent > .ssh-agent.sh ; } && . ./.ssh-agent.sh && ( echo $NEPI_MASTER_PASSPHRASE | ssh-add %(prk)s ) && rm -rf %(prk)s %(puk)s" %  \
285         {
286             'prk' : server.shell_escape(self._master_prk_name),
287             'puk' : server.shell_escape(self._master_puk_name),
288         }
289         
290         kill_agent = "kill $SSH_AGENT_PID"
291         
292         waitmaster = (
293             "{ "
294             "echo 'Checking master reachability' ; "
295             "if ping -c 3 %(master_host)s && (. ./.ssh-agent.sh > /dev/null ; ssh -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s echo MASTER SAYS HI ) ; then "
296             "echo 'Master node reachable' ; "
297             "else "
298             "echo 'MASTER NODE UNREACHABLE' && "
299             "exit 1 ; "
300             "fi ; "
301             ". ./.ssh-agent.sh ; "
302             "while [[ $(. ./.ssh-agent.sh > /dev/null ; ssh -q -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s cat %(token_path)s.retcode || /bin/true) != %(token)s ]] ; do sleep 5 ; done ; "
303             "if [[ $(. ./.ssh-agent.sh > /dev/null ; ssh -q -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s cat %(token_path)s || /bin/true) != %(token)s ]] ; then echo BAD TOKEN ; exit 1 ; fi ; "
304             "}" 
305         ) % {
306             'hostkey' : 'master_known_hosts',
307             'master' : "%s@%s" % (self._master.node.slicename, self._master.node.hostname),
308             'master_host' : self._master.node.hostname,
309             'token_path' : os.path.join(self._master.home_path, 'build.token'),
310             'token' : server.shell_escape(self._master._master_token),
311             'sshopts' : sshopts,
312         }
313         
314         syncfiles = ". ./.ssh-agent.sh && scp -p -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(files)s ." % {
315             'hostkey' : 'master_known_hosts',
316             'files' : ' '.join(files),
317             'sshopts' : sshopts,
318         }
319         if self.build:
320             syncfiles += " && tar xzf build.tar.gz"
321         syncfiles += " && ( echo %s > build.token )" % (server.shell_escape(self._master_token),)
322         syncfiles += " && ( echo %s > build.token.retcode )" % (server.shell_escape(self._master_token),)
323         syncfiles = "{ . ./.ssh-agent.sh ; %s ; }" % (syncfiles,)
324         
325         cleanup = "{ . ./.ssh-agent.sh ; kill $SSH_AGENT_PID ; rm -rf %(prk)s %(puk)s master_known_hosts .ssh-askpass ; }" % {
326             'prk' : server.shell_escape(self._master_prk_name),
327             'puk' : server.shell_escape(self._master_puk_name),
328         }
329         
330         slavescript = "( ( %(launch_agent)s && %(waitmaster)s && %(syncfiles)s && %(kill_agent)s && %(cleanup)s ) || %(cleanup)s ) ; echo %(token)s > build.token.retcode" % {
331             'waitmaster' : waitmaster,
332             'syncfiles' : syncfiles,
333             'cleanup' : cleanup,
334             'kill_agent' : kill_agent,
335             'launch_agent' : launch_agent,
336             'home' : server.shell_escape(self.home_path),
337             'token' : server.shell_escape(self._master_token),
338         }
339         
340         return cStringIO.StringIO(slavescript)
341          
342     def _do_launch_build(self):
343         script = "bash ./nepi-build.sh"
344         if self._master_passphrase:
345             script = "NEPI_MASTER_PASSPHRASE=%s %s" % (
346                 server.shell_escape(self._master_passphrase),
347                 script
348             )
349         (out,err),proc = rspawn.remote_spawn(
350             script,
351             pidfile = 'build-pid',
352             home = self.home_path,
353             stdin = '/dev/null',
354             stdout = 'buildlog',
355             stderr = rspawn.STDOUT,
356             
357             host = self.node.hostname,
358             port = None,
359             user = self.node.slicename,
360             agent = None,
361             ident_key = self.node.ident_path,
362             server_key = self.node.server_key
363             )
364         
365         if proc.wait():
366             if self.check_bad_host(out, err):
367                 self.node.blacklist()
368             raise RuntimeError, "Failed to set up build slave %s: %s %s" % (self.home_path, out,err,)
369         
370         
371         pid = ppid = None
372         delay = 1.0
373         for i in xrange(5):
374             pidtuple = rspawn.remote_check_pid(
375                 os.path.join(self.home_path,'build-pid'),
376                 host = self.node.hostname,
377                 port = None,
378                 user = self.node.slicename,
379                 agent = None,
380                 ident_key = self.node.ident_path,
381                 server_key = self.node.server_key
382                 )
383             
384             if pidtuple:
385                 pid, ppid = pidtuple
386                 self._build_pid, self._build_ppid = pidtuple
387                 break
388             else:
389                 time.sleep(delay)
390                 delay = min(30,delay*1.2)
391         else:
392             raise RuntimeError, "Failed to set up build slave %s: cannot get pid" % (self.home_path,)
393
394         self._logger.info("Deploying %s at %s", self, self.node.hostname)
395         
396     def _do_wait_build(self, trial=0):
397         pid = self._build_pid
398         ppid = self._build_ppid
399         
400         if pid and ppid:
401             delay = 1.0
402             first = True
403             bustspin = 0
404             while True:
405                 status = rspawn.remote_status(
406                     pid, ppid,
407                     host = self.node.hostname,
408                     port = None,
409                     user = self.node.slicename,
410                     agent = None,
411                     ident_key = self.node.ident_path,
412                     server_key = self.node.server_key
413                     )
414                 
415                 if status is rspawn.FINISHED:
416                     self._build_pid = self._build_ppid = None
417                     break
418                 elif status is not rspawn.RUNNING:
419                     bustspin += 1
420                     time.sleep(delay*(5.5+random.random()))
421                     if bustspin > 12:
422                         self._build_pid = self._build_ppid = None
423                         break
424                 else:
425                     if first:
426                         self._logger.info("Waiting for %s to finish building at %s %s", self, self.node.hostname,
427                             "(build slave)" if self._master is not None else "(build master)")
428                         
429                         first = False
430                     time.sleep(delay*(0.5+random.random()))
431                     delay = min(30,delay*1.2)
432                     bustspin = 0
433             
434             # check build token
435             slave_token = ""
436             for i in xrange(3):
437                 (out, err), proc = self._popen_ssh_command(
438                     "cat %(token_path)s" % {
439                         'token_path' : os.path.join(self.home_path, 'build.token'),
440                     },
441                     timeout = 120,
442                     noerrors = True)
443                 if not proc.wait() and out:
444                     slave_token = out.strip()
445                 
446                 if slave_token:
447                     break
448                 else:
449                     time.sleep(2)
450             
451             if slave_token != self._master_token:
452                 # Get buildlog for the error message
453
454                 (buildlog, err), proc = self._popen_ssh_command(
455                     "cat %(buildlog)s" % {
456                         'buildlog' : os.path.join(self.home_path, 'buildlog'),
457                         'buildscript' : os.path.join(self.home_path, 'nepi-build.sh'),
458                     },
459                     timeout = 120,
460                     noerrors = True)
461                 
462                 proc.wait()
463                 
464                 if self.check_bad_host(buildlog, err):
465                     self.node.blacklist()
466                 elif self._master and trial < 3 and 'BAD TOKEN' in buildlog or 'BAD TOKEN' in err:
467                     # bad sync with master, may try again
468                     # but first wait for master
469                     self._master.async_setup_wait()
470                     self._launch_build()
471                     self._do_wait_build(trial+1)
472                 else:
473                     raise RuntimeError, "Failed to set up application %s: "\
474                             "build failed, got wrong token from pid %s/%s "\
475                             "(expected %r, got %r), see buildlog at %s:\n%s" % (
476                         self.home_path, pid, ppid, self._master_token, slave_token, self.node.hostname, buildlog)
477
478             self._logger.info("Built %s at %s", self, self.node.hostname)
479
480     def _do_kill_build(self):
481         pid = self._build_pid
482         ppid = self._build_ppid
483         
484         if pid and ppid:
485             self._logger.info("Killing build of %s", self)
486             rspawn.remote_kill(
487                 pid, ppid,
488                 host = self.node.hostname,
489                 port = None,
490                 user = self.node.slicename,
491                 agent = None,
492                 ident_key = self.node.ident_path
493                 )
494         
495         
496     def _do_build_master(self):
497         if not self.sources and not self.build and not self.buildDepends:
498             return None
499             
500         if self.sources:
501             sources = self.sources.split(' ')
502             
503             # Copy all sources
504             try:
505                 self._popen_scp(
506                     sources,
507                     "%s@%s:%s" % (self.node.slicename, self.node.hostname, 
508                         os.path.join(self.home_path,'.'),)
509                     )
510             except RuntimeError, e:
511                 raise RuntimeError, "Failed upload source file %r: %s %s" \
512                         % (sources, e.args[0], e.args[1],)
513             
514         buildscript = cStringIO.StringIO()
515         
516         buildscript.write("(\n")
517         
518         if self.buildDepends:
519             # Install build dependencies
520             buildscript.write(
521                 "sudo -S yum -y install %(packages)s\n" % {
522                     'packages' : self.buildDepends
523                 }
524             )
525         
526             
527         if self.build:
528             # Build sources
529             buildscript.write(
530                 "mkdir -p build && ( cd build && ( %(command)s ) )\n" % {
531                     'command' : self._replace_paths(self.build),
532                     'home' : server.shell_escape(self.home_path),
533                 }
534             )
535         
536             # Make archive
537             buildscript.write("tar czf build.tar.gz build\n")
538         
539         # Write token
540         buildscript.write("echo %(master_token)s > build.token ) ; echo %(master_token)s > build.token.retcode" % {
541             'master_token' : server.shell_escape(self._master_token)
542         })
543         
544         buildscript.seek(0)
545
546         return buildscript
547
548     def _do_install(self):
549         if self.install:
550             self._logger.info("Installing %s at %s", self, self.node.hostname)
551             
552             # Install application
553             try:
554                 self._popen_ssh_command(
555                     "cd %(home)s && cd build && ( %(command)s ) > ${HOME}/%(home)s/installlog 2>&1 || ( tail ${HOME}/%(home)s/{install,build}log >&2 && false )" % \
556                         {
557                         'command' : self._replace_paths(self.install),
558                         'home' : server.shell_escape(self.home_path),
559                         },
560                     )
561             except RuntimeError, e:
562                 if self.check_bad_host(e.args[0], e.args[1]):
563                     self.node.blacklist()
564                 raise RuntimeError, "Failed install build sources: %s %s" % (e.args[0], e.args[1],)
565
566     def set_master(self, master):
567         self._master = master
568         
569     def install_keys(self, prk, puk, passphrase):
570         # Install keys
571         self._master_passphrase = passphrase
572         self._master_prk = prk
573         self._master_puk = puk
574         self._master_prk_name = os.path.basename(prk.name)
575         self._master_puk_name = os.path.basename(puk.name)
576         
577     def _do_install_keys(self):
578         prk = self._master_prk
579         puk = self._master_puk
580        
581         try:
582             self._popen_scp(
583                 [ prk.name, puk.name ],
584                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, self.home_path )
585                 )
586         except RuntimeError, e:
587             raise RuntimeError, "Failed to set up application deployment keys: %s %s" \
588                     % (e.args[0], e.args[1],)
589
590         try:
591             self._popen_scp(
592                 cStringIO.StringIO('%s,%s %s\n' % (
593                     self._master.node.hostname, socket.gethostbyname(self._master.node.hostname), 
594                     self._master.node.server_key)),
595                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
596                     os.path.join(self.home_path,"master_known_hosts") )
597                 )
598         except RuntimeError, e:
599             raise RuntimeError, "Failed to set up application deployment keys: %s %s" \
600                     % (e.args[0], e.args[1],)
601         
602         # No longer need'em
603         self._master_prk = None
604         self._master_puk = None
605     
606     def cleanup(self):
607         # make sure there's no leftover build processes
608         self._do_kill_build()
609
610     @server.eintr_retry
611     def _popen_scp(self, src, dst, retry = 3):
612         while 1:
613             try:
614                 (out,err),proc = server.popen_scp(
615                     src,
616                     dst, 
617                     port = None,
618                     agent = None,
619                     ident_key = self.node.ident_path,
620                     server_key = self.node.server_key
621                     )
622
623                 if server.eintr_retry(proc.wait)():
624                     raise RuntimeError, (out, err)
625                 return (out, err), proc
626             except:
627                 if retry <= 0:
628                     raise
629                 else:
630                     retry -= 1
631   
632
633     @server.eintr_retry
634     def _popen_ssh_command(self, command, retry = 0, noerrors=False, timeout=None):
635         (out,err),proc = server.popen_ssh_command(
636             command,
637             host = self.node.hostname,
638             port = None,
639             user = self.node.slicename,
640             agent = None,
641             ident_key = self.node.ident_path,
642             server_key = self.node.server_key,
643             timeout = timeout,
644             retry = retry
645             )
646
647         if server.eintr_retry(proc.wait)():
648             if not noerrors:
649                 raise RuntimeError, (out, err)
650         return (out, err), proc
651
652 class Application(Dependency):
653     """
654     An application also has dependencies, but also a command to be ran and monitored.
655     
656     It adds the output of that command as traces.
657     """
658     
659     TRACES = ('stdout','stderr','buildlog', 'output')
660     
661     def __init__(self, api=None):
662         super(Application,self).__init__(api)
663         
664         # Attributes
665         self.command = None
666         self.sudo = False
667         
668         self.stdin = None
669         self.stdout = None
670         self.stderr = None
671         self.output = None
672         
673         # Those are filled when the app is started
674         #   Having both pid and ppid makes it harder
675         #   for pid rollover to induce tracking mistakes
676         self._started = False
677         self._pid = None
678         self._ppid = None
679
680         # Do not add to the python path of nodes
681         self.add_to_path = False
682     
683     def __str__(self):
684         return "%s<command:%s%s>" % (
685             self.__class__.__name__,
686             "sudo " if self.sudo else "",
687             self.command,
688         )
689     
690     def start(self):
691         self._logger.info("Starting %s", self)
692         
693         # Create shell script with the command
694         # This way, complex commands and scripts can be ran seamlessly
695         # sync files
696         command = cStringIO.StringIO()
697         command.write('export PYTHONPATH=$PYTHONPATH:%s\n' % (
698             ':'.join(["${HOME}/"+server.shell_escape(s) for s in self.node.pythonpath])
699         ))
700         command.write('export PATH=$PATH:%s\n' % (
701             ':'.join(["${HOME}/"+server.shell_escape(s) for s in self.node.pythonpath])
702         ))
703         if self.node.env:
704             for envkey, envvals in self.node.env.iteritems():
705                 for envval in envvals:
706                     command.write('export %s=%s\n' % (envkey, envval))
707         command.write(self.command)
708         command.seek(0)
709
710         try:
711             self._popen_scp(
712                 command,
713                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
714                     os.path.join(self.home_path, "app.sh"))
715                 )
716         except RuntimeError, e:
717             raise RuntimeError, "Failed to set up application: %s %s" \
718                     % (e.args[0], e.args[1],)
719         
720         # Start process in a "daemonized" way, using nohup and heavy
721         # stdin/out redirection to avoid connection issues
722         (out,err),proc = rspawn.remote_spawn(
723             self._replace_paths("bash ./app.sh"),
724             
725             pidfile = './pid',
726             home = self.home_path,
727             stdin = 'stdin' if self.stdin is not None else '/dev/null',
728             stdout = 'stdout' if self.stdout else '/dev/null',
729             stderr = 'stderr' if self.stderr else '/dev/null',
730             sudo = self.sudo,
731             
732             host = self.node.hostname,
733             port = None,
734             user = self.node.slicename,
735             agent = None,
736             ident_key = self.node.ident_path,
737             server_key = self.node.server_key
738             )
739         
740         if proc.wait():
741             if self.check_bad_host(out, err):
742                 self.node.blacklist()
743             raise RuntimeError, "Failed to set up application: %s %s" % (out,err,)
744
745         self._started = True
746     
747     def recover(self):
748         # Assuming the application is running on PlanetLab,
749         # proper pidfiles should be present at the app's home path.
750         # So we mark this application as started, and check the pidfiles
751         self._started = True
752         self.checkpid()
753
754     def checkpid(self):            
755         # Get PID/PPID
756         # NOTE: wait a bit for the pidfile to be created
757         if self._started and not self._pid or not self._ppid:
758             pidtuple = rspawn.remote_check_pid(
759                 os.path.join(self.home_path,'pid'),
760                 host = self.node.hostname,
761                 port = None,
762                 user = self.node.slicename,
763                 agent = None,
764                 ident_key = self.node.ident_path,
765                 server_key = self.node.server_key
766                 )
767             
768             if pidtuple:
769                 self._pid, self._ppid = pidtuple
770     
771     def status(self):
772         self.checkpid()
773         if not self._started:
774             return AS.STATUS_NOT_STARTED
775         elif not self._pid or not self._ppid:
776             return AS.STATUS_NOT_STARTED
777         else:
778             status = rspawn.remote_status(
779                 self._pid, self._ppid,
780                 host = self.node.hostname,
781                 port = None,
782                 user = self.node.slicename,
783                 agent = None,
784                 ident_key = self.node.ident_path,
785                 server_key = self.node.server_key
786                 )
787             
788             if status is rspawn.NOT_STARTED:
789                 return AS.STATUS_NOT_STARTED
790             elif status is rspawn.RUNNING:
791                 return AS.STATUS_RUNNING
792             elif status is rspawn.FINISHED:
793                 return AS.STATUS_FINISHED
794             else:
795                 # WTF?
796                 return AS.STATUS_NOT_STARTED
797     
798     def kill(self):
799         status = self.status()
800         if status == AS.STATUS_RUNNING:
801             # kill by ppid+pid - SIGTERM first, then try SIGKILL
802             rspawn.remote_kill(
803                 self._pid, self._ppid,
804                 host = self.node.hostname,
805                 port = None,
806                 user = self.node.slicename,
807                 agent = None,
808                 ident_key = self.node.ident_path,
809                 server_key = self.node.server_key,
810                 sudo = self.sudo
811                 )
812             self._logger.info("Killed %s", self)
813
814
815 class NepiDependency(Dependency):
816     """
817     This dependency adds nepi itself to the python path,
818     so that you may run testbeds within PL nodes.
819     """
820     
821     # Class attribute holding a *weak* reference to the shared NEPI tar file
822     # so that they may share it. Don't operate on the file itself, it would
823     # be a mess, just use its path.
824     _shared_nepi_tar = None
825     
826     def __init__(self, api = None):
827         super(NepiDependency, self).__init__(api)
828         
829         self._tarball = None
830         
831         self.depends = 'python python-ipaddr python-setuptools'
832         
833         # our sources are in our ad-hoc tarball
834         self.sources = self.tarball.name
835         
836         tarname = os.path.basename(self.tarball.name)
837         
838         # it's already built - just move the tarball into place
839         self.build = "mv -f ${SOURCES}/%s ." % (tarname,)
840         
841         # unpack it into sources, and we're done
842         self.install = "tar xzf ${BUILD}/%s -C .." % (tarname,)
843     
844     @property
845     def tarball(self):
846         if self._tarball is None:
847             shared_tar = self._shared_nepi_tar and self._shared_nepi_tar()
848             if shared_tar is not None:
849                 self._tarball = shared_tar
850             else:
851                 # Build an ad-hoc tarball
852                 # Prebuilt
853                 import nepi
854                 import tempfile
855                 
856                 shared_tar = tempfile.NamedTemporaryFile(prefix='nepi-src-', suffix='.tar.gz')
857                 
858                 proc = subprocess.Popen(
859                     ["tar", "czf", shared_tar.name, 
860                         '-C', os.path.join(os.path.dirname(os.path.dirname(nepi.__file__)),'.'), 
861                         'nepi'],
862                     stdout = open("/dev/null","w"),
863                     stdin = open("/dev/null","r"))
864
865                 if proc.wait():
866                     raise RuntimeError, "Failed to create nepi tarball"
867                 
868                 self._tarball = self._shared_nepi_tar = shared_tar
869                 
870         return self._tarball
871
872 class NS3Dependency(Dependency):
873     """
874     This dependency adds NS3 libraries to the library paths,
875     so that you may run the NS3 testbed within PL nodes.
876     
877     You'll also need the NepiDependency.
878     """
879     
880     def __init__(self, api = None):
881         super(NS3Dependency, self).__init__(api)
882         
883         self.buildDepends = 'make waf gcc gcc-c++ gccxml unzip'
884         
885         # We have to download the sources, untar, build...
886         pybindgen_source_url = "http://yans.pl.sophia.inria.fr/trac/nepi/raw-attachment/wiki/WikiStart/pybindgen-r794.tar.gz"
887         pygccxml_source_url = "http://leaseweb.dl.sourceforge.net/project/pygccxml/pygccxml/pygccxml-1.0/pygccxml-1.0.0.zip"
888         ns3_source_url = "http://yans.pl.sophia.inria.fr/code/hgwebdir.cgi/ns-3.11-nepi/archive/tip.tar.gz"
889         passfd_source_url = "http://yans.pl.sophia.inria.fr/code/hgwebdir.cgi/python-passfd/archive/tip.tar.gz"
890         self.build =(
891             " ( "
892             "  cd .. && "
893             "  python -c 'import pygccxml, pybindgen, passfd' && "
894             "  test -f lib/ns/_core.so && "
895             "  test -f lib/ns/__init__.py && "
896             "  test -f lib/ns/core.py && "
897             "  test -f lib/libns3-core.so && "
898             "  LD_LIBRARY_PATH=lib PYTHONPATH=lib python -c 'import ns.core' "
899             " ) || ( "
900                 # Not working, rebuild
901                      # Archive SHA1 sums to check
902                      "echo '7158877faff2254e6c094bf18e6b4283cac19137  pygccxml-1.0.0.zip' > archive_sums.txt && "
903                      "echo 'a18c2ccffd0df517bc37e2f3a2475092517c43f2  pybindgen-src.tar.gz' >> archive_sums.txt && "
904                      " ( " # check existing files
905                      " sha1sum -c archive_sums.txt && "
906                      " test -f passfd-src.tar.gz && "
907                      " test -f ns3-src.tar.gz "
908                      " ) || ( " # nope? re-download
909                      " rm -f pybindgen-src.zip pygccxml-1.0.0.zip passfd-src.tar.gz ns3-src.tar.gz && "
910                      " wget -q -c -O pybindgen-src.tar.gz %(pybindgen_source_url)s && " # continue, to exploit the case when it has already been dl'ed
911                      " wget -q -c -O pygccxml-1.0.0.zip %(pygccxml_source_url)s && " 
912                      " wget -q -c -O passfd-src.tar.gz %(passfd_source_url)s && "
913                      " wget -q -c -O ns3-src.tar.gz %(ns3_source_url)s && "  
914                      " sha1sum -c archive_sums.txt " # Check SHA1 sums when applicable
915                      " ) && "
916                      "unzip -n pygccxml-1.0.0.zip && "
917                      "mkdir -p pybindgen-src && "
918                      "mkdir -p ns3-src && "
919                      "mkdir -p passfd-src && "
920                      "tar xzf ns3-src.tar.gz --strip-components=1 -C ns3-src && "
921                      "tar xzf passfd-src.tar.gz --strip-components=1 -C passfd-src && "
922                      "tar xzf pybindgen-src.tar.gz --strip-components=1 -C pybindgen-src && "
923                      "rm -rf target && "    # mv doesn't like unclean targets
924                      "mkdir -p target && "
925                      "cd pygccxml-1.0.0 && "
926                      "rm -rf unittests docs && " # pygccxml has ~100M of unit tests - excessive - docs aren't needed either
927                      "python setup.py build && "
928                      "python setup.py install --install-lib ${BUILD}/target && "
929                      "python setup.py clean && "
930                      "cd ../pybindgen-src && "
931                      "export PYTHONPATH=$PYTHONPATH:${BUILD}/target && "
932                      "./waf configure --prefix=${BUILD}/target -d release && "
933                      "./waf && "
934                      "./waf install && "
935                      "./waf clean && "
936                      "mv -f ${BUILD}/target/lib/python*/site-packages/pybindgen ${BUILD}/target/. && "
937                      "rm -rf ${BUILD}/target/lib && "
938                      "cd ../passfd-src && "
939                      "python setup.py build && "
940                      "python setup.py install --install-lib ${BUILD}/target && "
941                      "python setup.py clean && "
942                      "cd ../ns3-src && "
943                      "./waf configure --prefix=${BUILD}/target --with-pybindgen=../pybindgen-src -d release --disable-examples --disable-tests && "
944                      "./waf &&"
945                      "./waf install && "
946                      "rm -f ${BUILD}/target/lib/*.so && "
947                      "cp -a ${BUILD}/ns3-src/build/release/libns3*.so ${BUILD}/target/lib && "
948                      "cp -a ${BUILD}/ns3-src/build/release/bindings/python/ns ${BUILD}/target/lib &&"
949                      "./waf clean "
950              " )"
951                      % dict(
952                         pybindgen_source_url = server.shell_escape(pybindgen_source_url),
953                         pygccxml_source_url = server.shell_escape(pygccxml_source_url),
954                         ns3_source_url = server.shell_escape(ns3_source_url),
955                         passfd_source_url = server.shell_escape(passfd_source_url),
956                      ))
957         
958         # Just move ${BUILD}/target
959         self.install = (
960             " ( "
961             "  cd .. && "
962             "  python -c 'import pygccxml, pybindgen, passfd' && "
963             "  test -f lib/ns/_core.so && "
964             "  test -f lib/ns/__init__.py && "
965             "  test -f lib/ns/core.py && "
966             "  test -f lib/libns3-core.so && "
967             "  LD_LIBRARY_PATH=lib PYTHONPATH=lib python -c 'import ns.core' "
968             " ) || ( "
969                 # Not working, reinstall
970                     "test -d ${BUILD}/target && "
971                     "[[ \"x\" != \"x$(find ${BUILD}/target -mindepth 1 -print -quit)\" ]] &&"
972                     "( for i in ${BUILD}/target/* ; do rm -rf ${SOURCES}/${i##*/} ; done ) && " # mv doesn't like unclean targets
973                     "mv -f ${BUILD}/target/* ${SOURCES}"
974             " )"
975         )
976         
977         # Set extra environment paths
978         self.env['NEPI_NS3BINDINGS'] = "${SOURCES}/lib"
979         self.env['NEPI_NS3LIBRARY'] = "${SOURCES}/lib"
980     
981     @property
982     def tarball(self):
983         if self._tarball is None:
984             shared_tar = self._shared_nepi_tar and self._shared_nepi_tar()
985             if shared_tar is not None:
986                 self._tarball = shared_tar
987             else:
988                 # Build an ad-hoc tarball
989                 # Prebuilt
990                 import nepi
991                 import tempfile
992                 
993                 shared_tar = tempfile.NamedTemporaryFile(prefix='nepi-src-', suffix='.tar.gz')
994                 
995                 proc = subprocess.Popen(
996                     ["tar", "czf", shared_tar.name, 
997                         '-C', os.path.join(os.path.dirname(os.path.dirname(nepi.__file__)),'.'), 
998                         'nepi'],
999                     stdout = open("/dev/null","w"),
1000                     stdin = open("/dev/null","r"))
1001
1002                 if proc.wait():
1003                     raise RuntimeError, "Failed to create nepi tarball"
1004                 
1005                 self._tarball = self._shared_nepi_tar = shared_tar
1006                 
1007         return self._tarball
1008
1009 class YumDependency(Dependency):
1010     """
1011     This dependency is an internal helper class used to
1012     efficiently distribute yum-downloaded rpms.
1013     
1014     It temporarily sets the yum cache as persistent in the
1015     build master, and installs all the required packages.
1016     
1017     The rpm packages left in the yum cache are gathered and
1018     distributed by the underlying Dependency in an efficient
1019     manner. Build slaves will then install those rpms back in
1020     the cache before issuing the install command.
1021     
1022     When packages have been installed already, nothing but an
1023     empty tar is distributed.
1024     """
1025     
1026     # Class attribute holding a *weak* reference to the shared NEPI tar file
1027     # so that they may share it. Don't operate on the file itself, it would
1028     # be a mess, just use its path.
1029     _shared_nepi_tar = None
1030     
1031     def _build_get(self):
1032         # canonical representation of dependencies
1033         depends = ' '.join( sorted( (self.depends or "").split(' ') ) )
1034         
1035         # download rpms and pack into a tar archive
1036         return (
1037             "sudo -S nice yum -y makecache && "
1038             "sudo -S sed -i -r 's/keepcache *= *0/keepcache=1/' /etc/yum.conf && "
1039             " ( ( "
1040                 "sudo -S nice yum -y install %s ; "
1041                 "rm -f ${BUILD}/packages.tar ; "
1042                 "tar -C /var/cache/yum -rf ${BUILD}/packages.tar $(cd /var/cache/yum ; find -iname '*.rpm')"
1043             " ) || /bin/true ) && "
1044             "sudo -S sed -i -r 's/keepcache *= *1/keepcache=0/' /etc/yum.conf && "
1045             "( sudo -S nice yum -y clean packages || /bin/true ) "
1046         ) % ( depends, )
1047     def _build_set(self, value):
1048         # ignore
1049         return
1050     build = property(_build_get, _build_set)
1051     
1052     def _install_get(self):
1053         # canonical representation of dependencies
1054         depends = ' '.join( sorted( (self.depends or "").split(' ') ) )
1055         
1056         # unpack cached rpms into yum cache, install, and cleanup
1057         return (
1058             "sudo -S tar -k --keep-newer-files -C /var/cache/yum -xf packages.tar && "
1059             "sudo -S nice yum -y install %s && "
1060             "( sudo -S nice yum -y clean packages || /bin/true ) "
1061         ) % ( depends, )
1062     def _install_set(self, value):
1063         # ignore
1064         return
1065     install = property(_install_get, _install_set)
1066         
1067     def check_bad_host(self, out, err):
1068         badre = re.compile(r'(?:'
1069                            r'The GPG keys listed for the ".*" repository are already installed but they are not correct for this package'
1070                            r'|Error: Cannot retrieve repository metadata (repomd.xml) for repository: .*[.] Please verify its path and try again'
1071                            r'|Error: disk I/O error'
1072                            r'|MASTER NODE UNREACHABLE'
1073                            r')', 
1074                            re.I)
1075         return badre.search(out) or badre.search(err)