Several fixes:
[nepi.git] / src / nepi / testbeds / planetlab / application.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from constants import TESTBED_ID
5 import plcapi
6 import operator
7 import os
8 import os.path
9 import sys
10 import nepi.util.server as server
11 import cStringIO
12 import subprocess
13 import rspawn
14 import random
15 import time
16 import socket
17 import threading
18 import logging
19 import re
20
21 from nepi.util.constants import ApplicationStatus as AS
22
23 class Dependency(object):
24     """
25     A Dependency is in every respect like an application.
26     
27     It depends on some packages, it may require building binaries, it must deploy
28     them...
29     
30     But it has no command. Dependencies aren't ever started, or stopped, and have
31     no status.
32     """
33
34     TRACES = ('buildlog')
35
36     def __init__(self, api=None):
37         if not api:
38             api = plcapi.PLCAPI()
39         self._api = api
40         
41         # Attributes
42         self.command = None
43         self.sudo = False
44         
45         self.build = None
46         self.install = None
47         self.depends = None
48         self.buildDepends = None
49         self.sources = None
50         self.rpmFusion = False
51         self.env = {}
52         
53         self.stdin = None
54         self.stdout = None
55         self.stderr = None
56         self.buildlog = None
57         
58         self.add_to_path = True
59         
60         # Those are filled when the app is configured
61         self.home_path = None
62         
63         # Those are filled when an actual node is connected
64         self.node = None
65         
66         # Those are filled when the app is started
67         #   Having both pid and ppid makes it harder
68         #   for pid rollover to induce tracking mistakes
69         self._started = False
70         self._setup = False
71         self._setuper = None
72         self._pid = None
73         self._ppid = None
74
75         # Spanning tree deployment
76         self._master = None
77         self._master_passphrase = None
78         self._master_prk = None
79         self._master_puk = None
80         self._master_token = os.urandom(8).encode("hex")
81         self._build_pid = None
82         self._build_ppid = None
83         
84         # Logging
85         self._logger = logging.getLogger('nepi.testbeds.planetlab')
86         
87     
88     def __str__(self):
89         return "%s<%s>" % (
90             self.__class__.__name__,
91             ' '.join(filter(bool,(self.depends, self.sources)))
92         )
93     
94     def validate(self):
95         if self.home_path is None:
96             raise AssertionError, "Misconfigured application: missing home path"
97         if self.node.ident_path is None or not os.access(self.node.ident_path, os.R_OK):
98             raise AssertionError, "Misconfigured application: missing slice SSH key"
99         if self.node is None:
100             raise AssertionError, "Misconfigured application: unconnected node"
101         if self.node.hostname is None:
102             raise AssertionError, "Misconfigured application: misconfigured node"
103         if self.node.slicename is None:
104             raise AssertionError, "Misconfigured application: unspecified slice"
105     
106     def check_bad_host(self, out, err):
107         """
108         Called whenever an operation fails, it's given the output to be checked for
109         telltale signs of unhealthy hosts.
110         """
111         return False
112     
113     def remote_trace_path(self, whichtrace):
114         if whichtrace in self.TRACES:
115             tracefile = os.path.join(self.home_path, whichtrace)
116         else:
117             tracefile = None
118         
119         return tracefile
120
121     def remote_trace_name(self, whichtrace):
122         if whichtrace in self.TRACES:
123             return whichtrace
124         return None
125
126     def sync_trace(self, local_dir, whichtrace):
127         tracefile = self.remote_trace_path(whichtrace)
128         if not tracefile:
129             return None
130         
131         local_path = os.path.join(local_dir, tracefile)
132         
133         # create parent local folders
134         proc = subprocess.Popen(
135             ["mkdir", "-p", os.path.dirname(local_path)],
136             stdout = open("/dev/null","w"),
137             stdin = open("/dev/null","r"))
138
139         if proc.wait():
140             raise RuntimeError, "Failed to synchronize trace"
141         
142         # sync files
143         try:
144             self._popen_scp(
145                 '%s@%s:%s' % (self.node.slicename, self.node.hostname,
146                     tracefile),
147                 local_path
148                 )
149         except RuntimeError, e:
150             raise RuntimeError, "Failed to synchronize trace: %s %s" \
151                     % (e.args[0], e.args[1],)
152         
153         return local_path
154     
155     def recover(self):
156         # We assume a correct deployment, so recovery only
157         # means we mark this dependency as deployed
158         self._setup = True
159
160     def setup(self):
161         self._logger.info("Setting up %s", self)
162         self._make_home()
163         self._launch_build()
164         self._finish_build()
165         self._setup = True
166     
167     def async_setup(self):
168         if not self._setuper:
169             def setuper():
170                 try:
171                     self.setup()
172                 except:
173                     self._setuper._exc.append(sys.exc_info())
174             self._setuper = threading.Thread(
175                 target = setuper)
176             self._setuper._exc = []
177             self._setuper.start()
178     
179     def async_setup_wait(self):
180         if not self._setup:
181             self._logger.info("Waiting for %s to be setup", self)
182             if self._setuper:
183                 self._setuper.join()
184                 if not self._setup:
185                     if self._setuper._exc:
186                         exctyp,exval,exctrace = self._setuper._exc[0]
187                         raise exctyp,exval,exctrace
188                     else:
189                         raise RuntimeError, "Failed to setup application"
190                 else:
191                     self._logger.info("Setup ready: %s at %s", self, self.node.hostname)
192             else:
193                 self.setup()
194         
195     def _make_home(self):
196         # Make sure all the paths are created where 
197         # they have to be created for deployment
198         # sync files
199         try:
200             self._popen_ssh_command(
201                 "mkdir -p %(home)s && ( rm -f %(home)s/{pid,build-pid,nepi-build.sh} >/dev/null 2>&1 || /bin/true )" \
202                     % { 'home' : server.shell_escape(self.home_path) },
203                 timeout = 120,
204                 retry = 3
205                 )
206         except RuntimeError, e:
207             raise RuntimeError, "Failed to set up application %s: %s %s" % (self.home_path, e.args[0], e.args[1],)
208         
209         if self.stdin:
210             # Write program input
211             try:
212                 self._popen_scp(
213                     cStringIO.StringIO(self.stdin),
214                     '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
215                         os.path.join(self.home_path, 'stdin') ),
216                     )
217             except RuntimeError, e:
218                 raise RuntimeError, "Failed to set up application %s: %s %s" \
219                         % (self.home_path, e.args[0], e.args[1],)
220
221     def _replace_paths(self, command):
222         """
223         Replace all special path tags with shell-escaped actual paths.
224         """
225         # need to append ${HOME} if paths aren't absolute, to MAKE them absolute.
226         root = '' if self.home_path.startswith('/') else "${HOME}/"
227         return ( command
228             .replace("${SOURCES}", root+server.shell_escape(self.home_path))
229             .replace("${BUILD}", root+server.shell_escape(os.path.join(self.home_path,'build'))) )
230
231     def _launch_build(self):
232         if self._master is not None:
233             self._do_install_keys()
234             buildscript = self._do_build_slave()
235         else:
236             buildscript = self._do_build_master()
237             
238         if buildscript is not None:
239             self._logger.info("Building %s at %s", self, self.node.hostname)
240             
241             # upload build script
242             try:
243                 self._popen_scp(
244                     buildscript,
245                     '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
246                         os.path.join(self.home_path, 'nepi-build.sh') )
247                     )
248             except RuntimeError, e:
249                 raise RuntimeError, "Failed to set up application %s: %s %s" \
250                         % (self.home_path, e.args[0], e.args[1],)
251             
252             # launch build
253             self._do_launch_build()
254     
255     def _finish_build(self):
256         self._do_wait_build()
257         self._do_install()
258
259     def _do_build_slave(self):
260         if not self.sources and not self.build:
261             return None
262             
263         # Create build script
264         files = set()
265         
266         if self.sources:
267             sources = self.sources.split(' ')
268             files.update(
269                 "%s@%s:%s" % (self._master.node.slicename, self._master.node.hostname, 
270                     os.path.join(self._master.home_path, os.path.basename(source)),)
271                 for source in sources
272             )
273         
274         if self.build:
275             files.add(
276                 "%s@%s:%s" % (self._master.node.slicename, self._master.node.hostname, 
277                     os.path.join(self._master.home_path, 'build.tar.gz'),)
278             )
279         
280         sshopts = "-o ConnectTimeout=30 -o ConnectionAttempts=3 -o ServerAliveInterval=30 -o TCPKeepAlive=yes"
281         
282         launch_agent = "{ ( echo -e '#!/bin/sh\\ncat' > .ssh-askpass ) && chmod u+x .ssh-askpass"\
283                         " && export SSH_ASKPASS=$(pwd)/.ssh-askpass "\
284                         " && ssh-agent > .ssh-agent.sh ; } && . ./.ssh-agent.sh && ( echo $NEPI_MASTER_PASSPHRASE | ssh-add %(prk)s ) && rm -rf %(prk)s %(puk)s" %  \
285         {
286             'prk' : server.shell_escape(self._master_prk_name),
287             'puk' : server.shell_escape(self._master_puk_name),
288         }
289         
290         kill_agent = "kill $SSH_AGENT_PID"
291         
292         waitmaster = (
293             "{ . ./.ssh-agent.sh ; "
294             "while [[ $(. ./.ssh-agent.sh > /dev/null ; ssh -q -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s cat %(token_path)s.retcode || /bin/true) != %(token)s ]] ; do sleep 5 ; done ; "
295             "if [[ $(. ./.ssh-agent.sh > /dev/null ; ssh -q -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s cat %(token_path)s || /bin/true) != %(token)s ]] ; then echo BAD TOKEN ; exit 1 ; fi ; "
296             "}" 
297         ) % {
298             'hostkey' : 'master_known_hosts',
299             'master' : "%s@%s" % (self._master.node.slicename, self._master.node.hostname),
300             'token_path' : os.path.join(self._master.home_path, 'build.token'),
301             'token' : server.shell_escape(self._master._master_token),
302             'sshopts' : sshopts,
303         }
304         
305         syncfiles = ". ./.ssh-agent.sh && scp -p -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(files)s ." % {
306             'hostkey' : 'master_known_hosts',
307             'files' : ' '.join(files),
308             'sshopts' : sshopts,
309         }
310         if self.build:
311             syncfiles += " && tar xzf build.tar.gz"
312         syncfiles += " && ( echo %s > build.token )" % (server.shell_escape(self._master_token),)
313         syncfiles += " && ( echo %s > build.token.retcode )" % (server.shell_escape(self._master_token),)
314         syncfiles = "{ . ./.ssh-agent.sh ; %s ; }" % (syncfiles,)
315         
316         cleanup = "{ . ./.ssh-agent.sh ; kill $SSH_AGENT_PID ; rm -rf %(prk)s %(puk)s master_known_hosts .ssh-askpass ; }" % {
317             'prk' : server.shell_escape(self._master_prk_name),
318             'puk' : server.shell_escape(self._master_puk_name),
319         }
320         
321         slavescript = "( ( %(launch_agent)s && %(waitmaster)s && %(syncfiles)s && %(kill_agent)s && %(cleanup)s ) || %(cleanup)s ) ; echo %(token)s > build.token.retcode" % {
322             'waitmaster' : waitmaster,
323             'syncfiles' : syncfiles,
324             'cleanup' : cleanup,
325             'kill_agent' : kill_agent,
326             'launch_agent' : launch_agent,
327             'home' : server.shell_escape(self.home_path),
328             'token' : server.shell_escape(self._master_token),
329         }
330         
331         return cStringIO.StringIO(slavescript)
332          
333     def _do_launch_build(self):
334         script = "bash ./nepi-build.sh"
335         if self._master_passphrase:
336             script = "NEPI_MASTER_PASSPHRASE=%s %s" % (
337                 server.shell_escape(self._master_passphrase),
338                 script
339             )
340         (out,err),proc = rspawn.remote_spawn(
341             script,
342             pidfile = 'build-pid',
343             home = self.home_path,
344             stdin = '/dev/null',
345             stdout = 'buildlog',
346             stderr = rspawn.STDOUT,
347             
348             host = self.node.hostname,
349             port = None,
350             user = self.node.slicename,
351             agent = None,
352             ident_key = self.node.ident_path,
353             server_key = self.node.server_key
354             )
355         
356         if proc.wait():
357             if self.check_bad_host(out, err):
358                 self.node.blacklist()
359             raise RuntimeError, "Failed to set up build slave %s: %s %s" % (self.home_path, out,err,)
360         
361         
362         pid = ppid = None
363         delay = 1.0
364         for i in xrange(5):
365             pidtuple = rspawn.remote_check_pid(
366                 os.path.join(self.home_path,'build-pid'),
367                 host = self.node.hostname,
368                 port = None,
369                 user = self.node.slicename,
370                 agent = None,
371                 ident_key = self.node.ident_path,
372                 server_key = self.node.server_key
373                 )
374             
375             if pidtuple:
376                 pid, ppid = pidtuple
377                 self._build_pid, self._build_ppid = pidtuple
378                 break
379             else:
380                 time.sleep(delay)
381                 delay = min(30,delay*1.2)
382         else:
383             raise RuntimeError, "Failed to set up build slave %s: cannot get pid" % (self.home_path,)
384
385         self._logger.info("Deploying %s at %s", self, self.node.hostname)
386         
387     def _do_wait_build(self):
388         pid = self._build_pid
389         ppid = self._build_ppid
390         
391         if pid and ppid:
392             delay = 1.0
393             first = True
394             bustspin = 0
395             while True:
396                 status = rspawn.remote_status(
397                     pid, ppid,
398                     host = self.node.hostname,
399                     port = None,
400                     user = self.node.slicename,
401                     agent = None,
402                     ident_key = self.node.ident_path,
403                     server_key = self.node.server_key
404                     )
405                 
406                 if status is rspawn.FINISHED:
407                     self._build_pid = self._build_ppid = None
408                     break
409                 elif status is not rspawn.RUNNING:
410                     bustspin += 1
411                     time.sleep(delay*(5.5+random.random()))
412                     if bustspin > 12:
413                         self._build_pid = self._build_ppid = None
414                         break
415                 else:
416                     if first:
417                         self._logger.info("Waiting for %s to finish building at %s %s", self, self.node.hostname,
418                             "(build slave)" if self._master is not None else "(build master)")
419                         
420                         first = False
421                     time.sleep(delay*(0.5+random.random()))
422                     delay = min(30,delay*1.2)
423                     bustspin = 0
424             
425             # check build token
426             slave_token = ""
427             for i in xrange(3):
428                 (out, err), proc = self._popen_ssh_command(
429                     "cat %(token_path)s" % {
430                         'token_path' : os.path.join(self.home_path, 'build.token'),
431                     },
432                     timeout = 120,
433                     noerrors = True)
434                 if not proc.wait() and out:
435                     slave_token = out.strip()
436                 
437                 if slave_token:
438                     break
439                 else:
440                     time.sleep(2)
441             
442             if slave_token != self._master_token:
443                 # Get buildlog for the error message
444
445                 (buildlog, err), proc = self._popen_ssh_command(
446                     "cat %(buildlog)s" % {
447                         'buildlog' : os.path.join(self.home_path, 'buildlog'),
448                         'buildscript' : os.path.join(self.home_path, 'nepi-build.sh'),
449                     },
450                     timeout = 120,
451                     noerrors = True)
452                 
453                 proc.wait()
454                 
455                 if self.check_bad_host(buildlog, err):
456                     self.node.blacklist()
457                 
458                 raise RuntimeError, "Failed to set up application %s: "\
459                         "build failed, got wrong token from pid %s/%s "\
460                         "(expected %r, got %r), see buildlog at %s:\n%s" % (
461                     self.home_path, pid, ppid, self._master_token, slave_token, self.node.hostname, buildlog)
462
463             self._logger.info("Built %s at %s", self, self.node.hostname)
464
465     def _do_kill_build(self):
466         pid = self._build_pid
467         ppid = self._build_ppid
468         
469         if pid and ppid:
470             self._logger.info("Killing build of %s", self)
471             rspawn.remote_kill(
472                 pid, ppid,
473                 host = self.node.hostname,
474                 port = None,
475                 user = self.node.slicename,
476                 agent = None,
477                 ident_key = self.node.ident_path
478                 )
479         
480         
481     def _do_build_master(self):
482         if not self.sources and not self.build and not self.buildDepends:
483             return None
484             
485         if self.sources:
486             sources = self.sources.split(' ')
487             
488             # Copy all sources
489             try:
490                 self._popen_scp(
491                     sources,
492                     "%s@%s:%s" % (self.node.slicename, self.node.hostname, 
493                         os.path.join(self.home_path,'.'),)
494                     )
495             except RuntimeError, e:
496                 raise RuntimeError, "Failed upload source file %r: %s %s" \
497                         % (sources, e.args[0], e.args[1],)
498             
499         buildscript = cStringIO.StringIO()
500         
501         buildscript.write("(\n")
502         
503         if self.buildDepends:
504             # Install build dependencies
505             buildscript.write(
506                 "sudo -S yum -y install %(packages)s\n" % {
507                     'packages' : self.buildDepends
508                 }
509             )
510         
511             
512         if self.build:
513             # Build sources
514             buildscript.write(
515                 "mkdir -p build && ( cd build && ( %(command)s ) )\n" % {
516                     'command' : self._replace_paths(self.build),
517                     'home' : server.shell_escape(self.home_path),
518                 }
519             )
520         
521             # Make archive
522             buildscript.write("tar czf build.tar.gz build\n")
523         
524         # Write token
525         buildscript.write("echo %(master_token)s > build.token ) ; echo %(master_token)s > build.token.retcode" % {
526             'master_token' : server.shell_escape(self._master_token)
527         })
528         
529         buildscript.seek(0)
530
531         return buildscript
532
533     def _do_install(self):
534         if self.install:
535             self._logger.info("Installing %s at %s", self, self.node.hostname)
536             
537             # Install application
538             try:
539                 self._popen_ssh_command(
540                     "cd %(home)s && cd build && ( %(command)s ) > ${HOME}/%(home)s/installlog 2>&1 || ( tail ${HOME}/%(home)s/{install,build}log >&2 && false )" % \
541                         {
542                         'command' : self._replace_paths(self.install),
543                         'home' : server.shell_escape(self.home_path),
544                         },
545                     )
546             except RuntimeError, e:
547                 if self.check_bad_host(e.args[0], e.args[1]):
548                     self.node.blacklist()
549                 raise RuntimeError, "Failed install build sources: %s %s" % (e.args[0], e.args[1],)
550
551     def set_master(self, master):
552         self._master = master
553         
554     def install_keys(self, prk, puk, passphrase):
555         # Install keys
556         self._master_passphrase = passphrase
557         self._master_prk = prk
558         self._master_puk = puk
559         self._master_prk_name = os.path.basename(prk.name)
560         self._master_puk_name = os.path.basename(puk.name)
561         
562     def _do_install_keys(self):
563         prk = self._master_prk
564         puk = self._master_puk
565        
566         try:
567             self._popen_scp(
568                 [ prk.name, puk.name ],
569                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, self.home_path )
570                 )
571         except RuntimeError, e:
572             raise RuntimeError, "Failed to set up application deployment keys: %s %s" \
573                     % (e.args[0], e.args[1],)
574
575         try:
576             self._popen_scp(
577                 cStringIO.StringIO('%s,%s %s\n' % (
578                     self._master.node.hostname, socket.gethostbyname(self._master.node.hostname), 
579                     self._master.node.server_key)),
580                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
581                     os.path.join(self.home_path,"master_known_hosts") )
582                 )
583         except RuntimeError, e:
584             raise RuntimeError, "Failed to set up application deployment keys: %s %s" \
585                     % (e.args[0], e.args[1],)
586         
587         # No longer need'em
588         self._master_prk = None
589         self._master_puk = None
590     
591     def cleanup(self):
592         # make sure there's no leftover build processes
593         self._do_kill_build()
594
595     @server.eintr_retry
596     def _popen_scp(self, src, dst, retry = 3):
597         while 1:
598             try:
599                 (out,err),proc = server.popen_scp(
600                     src,
601                     dst, 
602                     port = None,
603                     agent = None,
604                     ident_key = self.node.ident_path,
605                     server_key = self.node.server_key
606                     )
607
608                 if server.eintr_retry(proc.wait)():
609                     raise RuntimeError, (out, err)
610                 return (out, err), proc
611             except:
612                 if retry <= 0:
613                     raise
614                 else:
615                     retry -= 1
616   
617
618     @server.eintr_retry
619     def _popen_ssh_command(self, command, retry = 0, noerrors=False, timeout=None):
620         (out,err),proc = server.popen_ssh_command(
621             command,
622             host = self.node.hostname,
623             port = None,
624             user = self.node.slicename,
625             agent = None,
626             ident_key = self.node.ident_path,
627             server_key = self.node.server_key,
628             timeout = timeout,
629             retry = retry
630             )
631
632         if server.eintr_retry(proc.wait)():
633             if not noerrors:
634                 raise RuntimeError, (out, err)
635         return (out, err), proc
636
637 class Application(Dependency):
638     """
639     An application also has dependencies, but also a command to be ran and monitored.
640     
641     It adds the output of that command as traces.
642     """
643     
644     TRACES = ('stdout','stderr','buildlog', 'output')
645     
646     def __init__(self, api=None):
647         super(Application,self).__init__(api)
648         
649         # Attributes
650         self.command = None
651         self.sudo = False
652         
653         self.stdin = None
654         self.stdout = None
655         self.stderr = None
656         self.output = None
657         
658         # Those are filled when the app is started
659         #   Having both pid and ppid makes it harder
660         #   for pid rollover to induce tracking mistakes
661         self._started = False
662         self._pid = None
663         self._ppid = None
664
665         # Do not add to the python path of nodes
666         self.add_to_path = False
667     
668     def __str__(self):
669         return "%s<command:%s%s>" % (
670             self.__class__.__name__,
671             "sudo " if self.sudo else "",
672             self.command,
673         )
674     
675     def start(self):
676         self._logger.info("Starting %s", self)
677         
678         # Create shell script with the command
679         # This way, complex commands and scripts can be ran seamlessly
680         # sync files
681         command = cStringIO.StringIO()
682         command.write('export PYTHONPATH=$PYTHONPATH:%s\n' % (
683             ':'.join(["${HOME}/"+server.shell_escape(s) for s in self.node.pythonpath])
684         ))
685         command.write('export PATH=$PATH:%s\n' % (
686             ':'.join(["${HOME}/"+server.shell_escape(s) for s in self.node.pythonpath])
687         ))
688         if self.node.env:
689             for envkey, envvals in self.node.env.iteritems():
690                 for envval in envvals:
691                     command.write('export %s=%s\n' % (envkey, envval))
692         command.write(self.command)
693         command.seek(0)
694
695         try:
696             self._popen_scp(
697                 command,
698                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
699                     os.path.join(self.home_path, "app.sh"))
700                 )
701         except RuntimeError, e:
702             raise RuntimeError, "Failed to set up application: %s %s" \
703                     % (e.args[0], e.args[1],)
704         
705         # Start process in a "daemonized" way, using nohup and heavy
706         # stdin/out redirection to avoid connection issues
707         (out,err),proc = rspawn.remote_spawn(
708             self._replace_paths("bash ./app.sh"),
709             
710             pidfile = './pid',
711             home = self.home_path,
712             stdin = 'stdin' if self.stdin is not None else '/dev/null',
713             stdout = 'stdout' if self.stdout else '/dev/null',
714             stderr = 'stderr' if self.stderr else '/dev/null',
715             sudo = self.sudo,
716             
717             host = self.node.hostname,
718             port = None,
719             user = self.node.slicename,
720             agent = None,
721             ident_key = self.node.ident_path,
722             server_key = self.node.server_key
723             )
724         
725         if proc.wait():
726             if self.check_bad_host(out, err):
727                 self.node.blacklist()
728             raise RuntimeError, "Failed to set up application: %s %s" % (out,err,)
729
730         self._started = True
731     
732     def recover(self):
733         # Assuming the application is running on PlanetLab,
734         # proper pidfiles should be present at the app's home path.
735         # So we mark this application as started, and check the pidfiles
736         self._started = True
737         self.checkpid()
738
739     def checkpid(self):            
740         # Get PID/PPID
741         # NOTE: wait a bit for the pidfile to be created
742         if self._started and not self._pid or not self._ppid:
743             pidtuple = rspawn.remote_check_pid(
744                 os.path.join(self.home_path,'pid'),
745                 host = self.node.hostname,
746                 port = None,
747                 user = self.node.slicename,
748                 agent = None,
749                 ident_key = self.node.ident_path,
750                 server_key = self.node.server_key
751                 )
752             
753             if pidtuple:
754                 self._pid, self._ppid = pidtuple
755     
756     def status(self):
757         self.checkpid()
758         if not self._started:
759             return AS.STATUS_NOT_STARTED
760         elif not self._pid or not self._ppid:
761             return AS.STATUS_NOT_STARTED
762         else:
763             status = rspawn.remote_status(
764                 self._pid, self._ppid,
765                 host = self.node.hostname,
766                 port = None,
767                 user = self.node.slicename,
768                 agent = None,
769                 ident_key = self.node.ident_path,
770                 server_key = self.node.server_key
771                 )
772             
773             if status is rspawn.NOT_STARTED:
774                 return AS.STATUS_NOT_STARTED
775             elif status is rspawn.RUNNING:
776                 return AS.STATUS_RUNNING
777             elif status is rspawn.FINISHED:
778                 return AS.STATUS_FINISHED
779             else:
780                 # WTF?
781                 return AS.STATUS_NOT_STARTED
782     
783     def kill(self):
784         status = self.status()
785         if status == AS.STATUS_RUNNING:
786             # kill by ppid+pid - SIGTERM first, then try SIGKILL
787             rspawn.remote_kill(
788                 self._pid, self._ppid,
789                 host = self.node.hostname,
790                 port = None,
791                 user = self.node.slicename,
792                 agent = None,
793                 ident_key = self.node.ident_path,
794                 server_key = self.node.server_key,
795                 sudo = self.sudo
796                 )
797             self._logger.info("Killed %s", self)
798
799
800 class NepiDependency(Dependency):
801     """
802     This dependency adds nepi itself to the python path,
803     so that you may run testbeds within PL nodes.
804     """
805     
806     # Class attribute holding a *weak* reference to the shared NEPI tar file
807     # so that they may share it. Don't operate on the file itself, it would
808     # be a mess, just use its path.
809     _shared_nepi_tar = None
810     
811     def __init__(self, api = None):
812         super(NepiDependency, self).__init__(api)
813         
814         self._tarball = None
815         
816         self.depends = 'python python-ipaddr python-setuptools'
817         
818         # our sources are in our ad-hoc tarball
819         self.sources = self.tarball.name
820         
821         tarname = os.path.basename(self.tarball.name)
822         
823         # it's already built - just move the tarball into place
824         self.build = "mv -f ${SOURCES}/%s ." % (tarname,)
825         
826         # unpack it into sources, and we're done
827         self.install = "tar xzf ${BUILD}/%s -C .." % (tarname,)
828     
829     @property
830     def tarball(self):
831         if self._tarball is None:
832             shared_tar = self._shared_nepi_tar and self._shared_nepi_tar()
833             if shared_tar is not None:
834                 self._tarball = shared_tar
835             else:
836                 # Build an ad-hoc tarball
837                 # Prebuilt
838                 import nepi
839                 import tempfile
840                 
841                 shared_tar = tempfile.NamedTemporaryFile(prefix='nepi-src-', suffix='.tar.gz')
842                 
843                 proc = subprocess.Popen(
844                     ["tar", "czf", shared_tar.name, 
845                         '-C', os.path.join(os.path.dirname(os.path.dirname(nepi.__file__)),'.'), 
846                         'nepi'],
847                     stdout = open("/dev/null","w"),
848                     stdin = open("/dev/null","r"))
849
850                 if proc.wait():
851                     raise RuntimeError, "Failed to create nepi tarball"
852                 
853                 self._tarball = self._shared_nepi_tar = shared_tar
854                 
855         return self._tarball
856
857 class NS3Dependency(Dependency):
858     """
859     This dependency adds NS3 libraries to the library paths,
860     so that you may run the NS3 testbed within PL nodes.
861     
862     You'll also need the NepiDependency.
863     """
864     
865     def __init__(self, api = None):
866         super(NS3Dependency, self).__init__(api)
867         
868         self.buildDepends = 'make waf gcc gcc-c++ gccxml unzip'
869         
870         # We have to download the sources, untar, build...
871         pybindgen_source_url = "http://yans.pl.sophia.inria.fr/trac/nepi/raw-attachment/wiki/WikiStart/pybindgen-r794.tar.gz"
872         pygccxml_source_url = "http://leaseweb.dl.sourceforge.net/project/pygccxml/pygccxml/pygccxml-1.0/pygccxml-1.0.0.zip"
873         ns3_source_url = "http://yans.pl.sophia.inria.fr/code/hgwebdir.cgi/ns-3.11-nepi/archive/tip.tar.gz"
874         passfd_source_url = "http://yans.pl.sophia.inria.fr/code/hgwebdir.cgi/python-passfd/archive/tip.tar.gz"
875         self.build =(
876             " ( "
877             "  cd .. && "
878             "  python -c 'import pygccxml, pybindgen, passfd' && "
879             "  test -f lib/ns/_core.so && "
880             "  test -f lib/ns/__init__.py && "
881             "  test -f lib/ns/core.py && "
882             "  test -f lib/libns3-core.so && "
883             "  LD_LIBRARY_PATH=lib PYTHONPATH=lib python -c 'import ns.core' "
884             " ) || ( "
885                 # Not working, rebuild
886                      # Archive SHA1 sums to check
887                      "echo '7158877faff2254e6c094bf18e6b4283cac19137  pygccxml-1.0.0.zip' > archive_sums.txt && "
888                      "echo 'a18c2ccffd0df517bc37e2f3a2475092517c43f2  pybindgen-src.tar.gz' >> archive_sums.txt && "
889                      " ( " # check existing files
890                      " sha1sum -c archive_sums.txt && "
891                      " test -f passfd-src.tar.gz && "
892                      " test -f ns3-src.tar.gz "
893                      " ) || ( " # nope? re-download
894                      " rm -f pybindgen-src.zip pygccxml-1.0.0.zip passfd-src.tar.gz ns3-src.tar.gz && "
895                      " wget -q -c -O pybindgen-src.tar.gz %(pybindgen_source_url)s && " # continue, to exploit the case when it has already been dl'ed
896                      " wget -q -c -O pygccxml-1.0.0.zip %(pygccxml_source_url)s && " 
897                      " wget -q -c -O passfd-src.tar.gz %(passfd_source_url)s && "
898                      " wget -q -c -O ns3-src.tar.gz %(ns3_source_url)s && "  
899                      " sha1sum -c archive_sums.txt " # Check SHA1 sums when applicable
900                      " ) && "
901                      "unzip -n pygccxml-1.0.0.zip && "
902                      "mkdir -p pybindgen-src && "
903                      "mkdir -p ns3-src && "
904                      "mkdir -p passfd-src && "
905                      "tar xzf ns3-src.tar.gz --strip-components=1 -C ns3-src && "
906                      "tar xzf passfd-src.tar.gz --strip-components=1 -C passfd-src && "
907                      "tar xzf pybindgen-src.tar.gz --strip-components=1 -C pybindgen-src && "
908                      "rm -rf target && "    # mv doesn't like unclean targets
909                      "mkdir -p target && "
910                      "cd pygccxml-1.0.0 && "
911                      "rm -rf unittests docs && " # pygccxml has ~100M of unit tests - excessive - docs aren't needed either
912                      "python setup.py build && "
913                      "python setup.py install --install-lib ${BUILD}/target && "
914                      "python setup.py clean && "
915                      "cd ../pybindgen-src && "
916                      "export PYTHONPATH=$PYTHONPATH:${BUILD}/target && "
917                      "./waf configure --prefix=${BUILD}/target -d release && "
918                      "./waf && "
919                      "./waf install && "
920                      "./waf clean && "
921                      "mv -f ${BUILD}/target/lib/python*/site-packages/pybindgen ${BUILD}/target/. && "
922                      "rm -rf ${BUILD}/target/lib && "
923                      "cd ../passfd-src && "
924                      "python setup.py build && "
925                      "python setup.py install --install-lib ${BUILD}/target && "
926                      "python setup.py clean && "
927                      "cd ../ns3-src && "
928                      "./waf configure --prefix=${BUILD}/target --with-pybindgen=../pybindgen-src -d release --disable-examples --disable-tests && "
929                      "./waf &&"
930                      "./waf install && "
931                      "rm -f ${BUILD}/target/lib/*.so && "
932                      "cp -a ${BUILD}/ns3-src/build/release/libns3*.so ${BUILD}/target/lib && "
933                      "cp -a ${BUILD}/ns3-src/build/release/bindings/python/ns ${BUILD}/target/lib &&"
934                      "./waf clean "
935              " )"
936                      % dict(
937                         pybindgen_source_url = server.shell_escape(pybindgen_source_url),
938                         pygccxml_source_url = server.shell_escape(pygccxml_source_url),
939                         ns3_source_url = server.shell_escape(ns3_source_url),
940                         passfd_source_url = server.shell_escape(passfd_source_url),
941                      ))
942         
943         # Just move ${BUILD}/target
944         self.install = (
945             " ( "
946             "  cd .. && "
947             "  python -c 'import pygccxml, pybindgen, passfd' && "
948             "  test -f lib/ns/_core.so && "
949             "  test -f lib/ns/__init__.py && "
950             "  test -f lib/ns/core.py && "
951             "  test -f lib/libns3-core.so && "
952             "  LD_LIBRARY_PATH=lib PYTHONPATH=lib python -c 'import ns.core' "
953             " ) || ( "
954                 # Not working, reinstall
955                     "test -d ${BUILD}/target && "
956                     "[[ \"x\" != \"x$(find ${BUILD}/target -mindepth 1 -print -quit)\" ]] &&"
957                     "( for i in ${BUILD}/target/* ; do rm -rf ${SOURCES}/${i##*/} ; done ) && " # mv doesn't like unclean targets
958                     "mv -f ${BUILD}/target/* ${SOURCES}"
959             " )"
960         )
961         
962         # Set extra environment paths
963         self.env['NEPI_NS3BINDINGS'] = "${SOURCES}/lib"
964         self.env['NEPI_NS3LIBRARY'] = "${SOURCES}/lib"
965     
966     @property
967     def tarball(self):
968         if self._tarball is None:
969             shared_tar = self._shared_nepi_tar and self._shared_nepi_tar()
970             if shared_tar is not None:
971                 self._tarball = shared_tar
972             else:
973                 # Build an ad-hoc tarball
974                 # Prebuilt
975                 import nepi
976                 import tempfile
977                 
978                 shared_tar = tempfile.NamedTemporaryFile(prefix='nepi-src-', suffix='.tar.gz')
979                 
980                 proc = subprocess.Popen(
981                     ["tar", "czf", shared_tar.name, 
982                         '-C', os.path.join(os.path.dirname(os.path.dirname(nepi.__file__)),'.'), 
983                         'nepi'],
984                     stdout = open("/dev/null","w"),
985                     stdin = open("/dev/null","r"))
986
987                 if proc.wait():
988                     raise RuntimeError, "Failed to create nepi tarball"
989                 
990                 self._tarball = self._shared_nepi_tar = shared_tar
991                 
992         return self._tarball
993
994 class YumDependency(Dependency):
995     """
996     This dependency is an internal helper class used to
997     efficiently distribute yum-downloaded rpms.
998     
999     It temporarily sets the yum cache as persistent in the
1000     build master, and installs all the required packages.
1001     
1002     The rpm packages left in the yum cache are gathered and
1003     distributed by the underlying Dependency in an efficient
1004     manner. Build slaves will then install those rpms back in
1005     the cache before issuing the install command.
1006     
1007     When packages have been installed already, nothing but an
1008     empty tar is distributed.
1009     """
1010     
1011     # Class attribute holding a *weak* reference to the shared NEPI tar file
1012     # so that they may share it. Don't operate on the file itself, it would
1013     # be a mess, just use its path.
1014     _shared_nepi_tar = None
1015     
1016     def _build_get(self):
1017         # canonical representation of dependencies
1018         depends = ' '.join( sorted( (self.depends or "").split(' ') ) )
1019         
1020         # download rpms and pack into a tar archive
1021         return (
1022             "sudo -S nice yum -y makecache && "
1023             "sudo -S sed -i -r 's/keepcache *= *0/keepcache=1/' /etc/yum.conf && "
1024             " ( ( "
1025                 "sudo -S nice yum -y install %s ; "
1026                 "rm -f ${BUILD}/packages.tar ; "
1027                 "tar -C /var/cache/yum -rf ${BUILD}/packages.tar $(cd /var/cache/yum ; find -iname '*.rpm')"
1028             " ) || /bin/true ) && "
1029             "sudo -S sed -i -r 's/keepcache *= *1/keepcache=0/' /etc/yum.conf && "
1030             "( sudo -S nice yum -y clean packages || /bin/true ) "
1031         ) % ( depends, )
1032     def _build_set(self, value):
1033         # ignore
1034         return
1035     build = property(_build_get, _build_set)
1036     
1037     def _install_get(self):
1038         # canonical representation of dependencies
1039         depends = ' '.join( sorted( (self.depends or "").split(' ') ) )
1040         
1041         # unpack cached rpms into yum cache, install, and cleanup
1042         return (
1043             "sudo -S tar -k --keep-newer-files -C /var/cache/yum -xf packages.tar && "
1044             "sudo -S nice yum -y install %s && "
1045             "( sudo -S nice yum -y clean packages || /bin/true ) "
1046         ) % ( depends, )
1047     def _install_set(self, value):
1048         # ignore
1049         return
1050     install = property(_install_get, _install_set)
1051         
1052     def check_bad_host(self, out, err):
1053         badre = re.compile(r'(?:'
1054                            r'The GPG keys listed for the ".*" repository are already installed but they are not correct for this package'
1055                            r'|Error: Cannot retrieve repository metadata (repomd.xml) for repository: .*[.] Please verify its path and try again'
1056                            r'|Error: disk I/O error'
1057                            r')', 
1058                            re.I)
1059         return badre.search(out) or badre.search(err)