Merge with head
[nepi.git] / src / nepi / testbeds / planetlab / application.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from constants import TESTBED_ID
5 import plcapi
6 import operator
7 import os
8 import os.path
9 import sys
10 import nepi.util.server as server
11 import cStringIO
12 import subprocess
13 import rspawn
14 import random
15 import time
16 import socket
17 import threading
18 import logging
19 import re
20
21 from nepi.util.constants import ApplicationStatus as AS
22
23 class Dependency(object):
24     """
25     A Dependency is in every respect like an application.
26     
27     It depends on some packages, it may require building binaries, it must deploy
28     them...
29     
30     But it has no command. Dependencies aren't ever started, or stopped, and have
31     no status.
32     """
33
34     TRACES = ('buildlog')
35
36     def __init__(self, api=None):
37         if not api:
38             api = plcapi.PLCAPI()
39         self._api = api
40         
41         # Attributes
42         self.command = None
43         self.sudo = False
44         
45         self.build = None
46         self.install = None
47         self.depends = None
48         self.buildDepends = None
49         self.sources = None
50         self.rpmFusion = False
51         self.env = {}
52         
53         self.stdin = None
54         self.stdout = None
55         self.stderr = None
56         self.buildlog = None
57         
58         self.add_to_path = True
59         
60         # Those are filled when the app is configured
61         self.home_path = None
62         
63         # Those are filled when an actual node is connected
64         self.node = None
65         
66         # Those are filled when the app is started
67         #   Having both pid and ppid makes it harder
68         #   for pid rollover to induce tracking mistakes
69         self._started = False
70         self._setup = False
71         self._setuper = None
72         self._pid = None
73         self._ppid = None
74
75         # Spanning tree deployment
76         self._master = None
77         self._master_passphrase = None
78         self._master_prk = None
79         self._master_puk = None
80         self._master_token = ''.join(map(chr,[rng.randint(0,255) 
81                                       for rng in (random.SystemRandom(),)
82                                       for i in xrange(8)] )).encode("hex")
83         self._build_pid = None
84         self._build_ppid = None
85         
86         # Logging
87         self._logger = logging.getLogger('nepi.testbeds.planetlab')
88         
89     
90     def __str__(self):
91         return "%s<%s>" % (
92             self.__class__.__name__,
93             ' '.join(filter(bool,(self.depends, self.sources)))
94         )
95     
96     def validate(self):
97         if self.home_path is None:
98             raise AssertionError, "Misconfigured application: missing home path"
99         if self.node.ident_path is None or not os.access(self.node.ident_path, os.R_OK):
100             raise AssertionError, "Misconfigured application: missing slice SSH key"
101         if self.node is None:
102             raise AssertionError, "Misconfigured application: unconnected node"
103         if self.node.hostname is None:
104             raise AssertionError, "Misconfigured application: misconfigured node"
105         if self.node.slicename is None:
106             raise AssertionError, "Misconfigured application: unspecified slice"
107     
108     def check_bad_host(self, out, err):
109         """
110         Called whenever an operation fails, it's given the output to be checked for
111         telltale signs of unhealthy hosts.
112         """
113         return False
114     
115     def remote_trace_path(self, whichtrace):
116         if whichtrace in self.TRACES:
117             tracefile = os.path.join(self.home_path, whichtrace)
118         else:
119             tracefile = None
120         
121         return tracefile
122
123     def remote_trace_name(self, whichtrace):
124         if whichtrace in self.TRACES:
125             return whichtrace
126         return None
127
128     def sync_trace(self, local_dir, whichtrace):
129         tracefile = self.remote_trace_path(whichtrace)
130         if not tracefile:
131             return None
132         
133         local_path = os.path.join(local_dir, tracefile)
134         
135         # create parent local folders
136         proc = subprocess.Popen(
137             ["mkdir", "-p", os.path.dirname(local_path)],
138             stdout = open("/dev/null","w"),
139             stdin = open("/dev/null","r"))
140
141         if proc.wait():
142             raise RuntimeError, "Failed to synchronize trace"
143         
144         # sync files
145         try:
146             self._popen_scp(
147                 '%s@%s:%s' % (self.node.slicename, self.node.hostname,
148                     tracefile),
149                 local_path
150                 )
151         except RuntimeError, e:
152             raise RuntimeError, "Failed to synchronize trace: %s %s" \
153                     % (e.args[0], e.args[1],)
154         
155         return local_path
156     
157     def recover(self):
158         # We assume a correct deployment, so recovery only
159         # means we mark this dependency as deployed
160         self._setup = True
161
162     def setup(self):
163         self._logger.info("Setting up %s", self)
164         self._make_home()
165         self._launch_build()
166         self._finish_build()
167         self._setup = True
168     
169     def async_setup(self):
170         if not self._setuper:
171             def setuper():
172                 try:
173                     self.setup()
174                 except:
175                     self._setuper._exc.append(sys.exc_info())
176             self._setuper = threading.Thread(
177                 target = setuper)
178             self._setuper._exc = []
179             self._setuper.start()
180     
181     def async_setup_wait(self):
182         if not self._setup:
183             self._logger.info("Waiting for %s to be setup", self)
184             if self._setuper:
185                 self._setuper.join()
186                 if not self._setup:
187                     if self._setuper._exc:
188                         exctyp,exval,exctrace = self._setuper._exc[0]
189                         raise exctyp,exval,exctrace
190                     else:
191                         raise RuntimeError, "Failed to setup application"
192                 else:
193                     self._logger.info("Setup ready: %s", self)
194             else:
195                 self.setup()
196         
197     def _make_home(self):
198         # Make sure all the paths are created where 
199         # they have to be created for deployment
200         # sync files
201         try:
202             self._popen_ssh_command(
203                 "mkdir -p %(home)s && ( rm -f %(home)s/{pid,build-pid,nepi-build.sh} >/dev/null 2>&1 || /bin/true )" \
204                     % { 'home' : server.shell_escape(self.home_path) },
205                 timeout = 120,
206                 retry = 3
207                 )
208         except RuntimeError, e:
209             raise RuntimeError, "Failed to set up application %s: %s %s" % (self.home_path, e.args[0], e.args[1],)
210         
211         if self.stdin:
212             # Write program input
213             try:
214                 self._popen_scp(
215                     cStringIO.StringIO(self.stdin),
216                     '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
217                         os.path.join(self.home_path, 'stdin') ),
218                     )
219             except RuntimeError, e:
220                 raise RuntimeError, "Failed to set up application %s: %s %s" \
221                         % (self.home_path, e.args[0], e.args[1],)
222
223     def _replace_paths(self, command):
224         """
225         Replace all special path tags with shell-escaped actual paths.
226         """
227         # need to append ${HOME} if paths aren't absolute, to MAKE them absolute.
228         root = '' if self.home_path.startswith('/') else "${HOME}/"
229         return ( command
230             .replace("${SOURCES}", root+server.shell_escape(self.home_path))
231             .replace("${BUILD}", root+server.shell_escape(os.path.join(self.home_path,'build'))) )
232
233     def _launch_build(self):
234         if self._master is not None:
235             self._do_install_keys()
236             buildscript = self._do_build_slave()
237         else:
238             buildscript = self._do_build_master()
239             
240         if buildscript is not None:
241             self._logger.info("Building %s", self)
242             
243             # upload build script
244             try:
245                 self._popen_scp(
246                     buildscript,
247                     '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
248                         os.path.join(self.home_path, 'nepi-build.sh') )
249                     )
250             except RuntimeError, e:
251                 raise RuntimeError, "Failed to set up application %s: %s %s" \
252                         % (self.home_path, e.args[0], e.args[1],)
253             
254             # launch build
255             self._do_launch_build()
256     
257     def _finish_build(self):
258         self._do_wait_build()
259         self._do_install()
260
261     def _do_build_slave(self):
262         if not self.sources and not self.build:
263             return None
264             
265         # Create build script
266         files = set()
267         
268         if self.sources:
269             sources = self.sources.split(' ')
270             files.update(
271                 "%s@%s:%s" % (self._master.node.slicename, self._master.node.hostname, 
272                     os.path.join(self._master.home_path, os.path.basename(source)),)
273                 for source in sources
274             )
275         
276         if self.build:
277             files.add(
278                 "%s@%s:%s" % (self._master.node.slicename, self._master.node.hostname, 
279                     os.path.join(self._master.home_path, 'build.tar.gz'),)
280             )
281         
282         sshopts = "-o ConnectTimeout=30 -o ConnectionAttempts=3 -o ServerAliveInterval=30 -o TCPKeepAlive=yes"
283         
284         launch_agent = "{ ( echo -e '#!/bin/sh\\ncat' > .ssh-askpass ) && chmod u+x .ssh-askpass"\
285                         " && export SSH_ASKPASS=$(pwd)/.ssh-askpass "\
286                         " && ssh-agent > .ssh-agent.sh ; } && . ./.ssh-agent.sh && ( echo $NEPI_MASTER_PASSPHRASE | ssh-add %(prk)s ) && rm -rf %(prk)s %(puk)s" %  \
287         {
288             'prk' : server.shell_escape(self._master_prk_name),
289             'puk' : server.shell_escape(self._master_puk_name),
290         }
291         
292         kill_agent = "kill $SSH_AGENT_PID"
293         
294         waitmaster = (
295             "{ . ./.ssh-agent.sh ; "
296             "while [[ $(ssh -q -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s cat %(token_path)s.retcode || /bin/true) != %(token)s ]] ; do sleep 5 ; done ; "
297             "if [[ $(ssh -q -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s cat %(token_path)s || /bin/true) != %(token)s ]] ; then echo BAD TOKEN ; exit 1 ; fi ; "
298             "}" 
299         ) % {
300             'hostkey' : 'master_known_hosts',
301             'master' : "%s@%s" % (self._master.node.slicename, self._master.node.hostname),
302             'token_path' : os.path.join(self._master.home_path, 'build.token'),
303             'token' : server.shell_escape(self._master._master_token),
304             'sshopts' : sshopts,
305         }
306         
307         syncfiles = "scp -p -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(files)s ." % {
308             'hostkey' : 'master_known_hosts',
309             'files' : ' '.join(files),
310             'sshopts' : sshopts,
311         }
312         if self.build:
313             syncfiles += " && tar xzf build.tar.gz"
314         syncfiles += " && ( echo %s > build.token )" % (server.shell_escape(self._master_token),)
315         syncfiles += " && ( echo %s > build.token.retcode )" % (server.shell_escape(self._master_token),)
316         syncfiles = "{ . ./.ssh-agent.sh ; %s ; }" % (syncfiles,)
317         
318         cleanup = "{ . ./.ssh-agent.sh ; kill $SSH_AGENT_PID ; rm -rf %(prk)s %(puk)s master_known_hosts .ssh-askpass ; }" % {
319             'prk' : server.shell_escape(self._master_prk_name),
320             'puk' : server.shell_escape(self._master_puk_name),
321         }
322         
323         slavescript = "( ( %(launch_agent)s && %(waitmaster)s && %(syncfiles)s && %(kill_agent)s && %(cleanup)s ) || %(cleanup)s ) ; echo %(token)s > build.token.retcode" % {
324             'waitmaster' : waitmaster,
325             'syncfiles' : syncfiles,
326             'cleanup' : cleanup,
327             'kill_agent' : kill_agent,
328             'launch_agent' : launch_agent,
329             'home' : server.shell_escape(self.home_path),
330             'token' : server.shell_escape(self._master_token),
331         }
332         
333         return cStringIO.StringIO(slavescript)
334          
335     def _do_launch_build(self):
336         script = "bash ./nepi-build.sh"
337         if self._master_passphrase:
338             script = "NEPI_MASTER_PASSPHRASE=%s %s" % (
339                 server.shell_escape(self._master_passphrase),
340                 script
341             )
342         (out,err),proc = rspawn.remote_spawn(
343             script,
344             pidfile = 'build-pid',
345             home = self.home_path,
346             stdin = '/dev/null',
347             stdout = 'buildlog',
348             stderr = rspawn.STDOUT,
349             
350             host = self.node.hostname,
351             port = None,
352             user = self.node.slicename,
353             agent = None,
354             ident_key = self.node.ident_path,
355             server_key = self.node.server_key
356             )
357         
358         if proc.wait():
359             if self.check_bad_host(out, err):
360                 self.node.blacklist()
361             raise RuntimeError, "Failed to set up build slave %s: %s %s" % (self.home_path, out,err,)
362         
363         
364         pid = ppid = None
365         delay = 1.0
366         for i in xrange(5):
367             pidtuple = rspawn.remote_check_pid(
368                 os.path.join(self.home_path,'build-pid'),
369                 host = self.node.hostname,
370                 port = None,
371                 user = self.node.slicename,
372                 agent = None,
373                 ident_key = self.node.ident_path,
374                 server_key = self.node.server_key
375                 )
376             
377             if pidtuple:
378                 pid, ppid = pidtuple
379                 self._build_pid, self._build_ppid = pidtuple
380                 break
381             else:
382                 time.sleep(delay)
383                 delay = min(30,delay*1.2)
384         else:
385             raise RuntimeError, "Failed to set up build slave %s: cannot get pid" % (self.home_path,)
386
387         self._logger.info("Deploying %s", self)
388         
389     def _do_wait_build(self):
390         pid = self._build_pid
391         ppid = self._build_ppid
392         
393         if pid and ppid:
394             delay = 1.0
395             first = True
396             bustspin = 0
397             while True:
398                 status = rspawn.remote_status(
399                     pid, ppid,
400                     host = self.node.hostname,
401                     port = None,
402                     user = self.node.slicename,
403                     agent = None,
404                     ident_key = self.node.ident_path,
405                     server_key = self.node.server_key
406                     )
407                 
408                 if status is rspawn.FINISHED:
409                     self._build_pid = self._build_ppid = None
410                     break
411                 elif status is not rspawn.RUNNING:
412                     bustspin += 1
413                     time.sleep(5)
414                     if bustspin > 12:
415                         self._build_pid = self._build_ppid = None
416                         break
417                 else:
418                     if first:
419                         self._logger.info("Waiting for %s to finish building at %s %s", self, self.node.hostname,
420                             "(build slave)" if self._master is not None else "(build master)")
421                         
422                         first = False
423                     time.sleep(delay*(0.5+random.random()))
424                     delay = min(30,delay*1.2)
425                     bustspin = 0
426             
427             # check build token
428             slave_token = ""
429             for i in xrange(3):
430                 (out, err), proc = self._popen_ssh_command(
431                     "cat %(token_path)s" % {
432                         'token_path' : os.path.join(self.home_path, 'build.token'),
433                     },
434                     timeout = 120,
435                     noerrors = True)
436                 if not proc.wait() and out:
437                     slave_token = out.strip()
438                 
439                 if slave_token:
440                     break
441                 else:
442                     time.sleep(2)
443             
444             if slave_token != self._master_token:
445                 # Get buildlog for the error message
446
447                 (buildlog, err), proc = self._popen_ssh_command(
448                     "cat %(buildlog)s" % {
449                         'buildlog' : os.path.join(self.home_path, 'buildlog'),
450                         'buildscript' : os.path.join(self.home_path, 'nepi-build.sh'),
451                     },
452                     timeout = 120,
453                     noerrors = True)
454                 
455                 proc.wait()
456                 
457                 if self.check_bad_host(buildlog, err):
458                     self.node.blacklist()
459                 
460                 raise RuntimeError, "Failed to set up application %s: "\
461                         "build failed, got wrong token from pid %s/%s "\
462                         "(expected %r, got %r), see buildlog at %s:\n%s" % (
463                     self.home_path, pid, ppid, self._master_token, slave_token, self.node.hostname, buildlog)
464
465             self._logger.info("Built %s at %s", self, self.node.hostname)
466
467     def _do_kill_build(self):
468         pid = self._build_pid
469         ppid = self._build_ppid
470         
471         if pid and ppid:
472             self._logger.info("Killing build of %s", self)
473             rspawn.remote_kill(
474                 pid, ppid,
475                 host = self.node.hostname,
476                 port = None,
477                 user = self.node.slicename,
478                 agent = None,
479                 ident_key = self.node.ident_path
480                 )
481         
482         
483     def _do_build_master(self):
484         if not self.sources and not self.build and not self.buildDepends:
485             return None
486             
487         if self.sources:
488             sources = self.sources.split(' ')
489             
490             # Copy all sources
491             try:
492                 self._popen_scp(
493                     sources,
494                     "%s@%s:%s" % (self.node.slicename, self.node.hostname, 
495                         os.path.join(self.home_path,'.'),)
496                     )
497             except RuntimeError, e:
498                 raise RuntimeError, "Failed upload source file %r: %s %s" \
499                         % (sources, e.args[0], e.args[1],)
500             
501         buildscript = cStringIO.StringIO()
502         
503         buildscript.write("(\n")
504         
505         if self.buildDepends:
506             # Install build dependencies
507             buildscript.write(
508                 "sudo -S yum -y install %(packages)s\n" % {
509                     'packages' : self.buildDepends
510                 }
511             )
512         
513             
514         if self.build:
515             # Build sources
516             buildscript.write(
517                 "mkdir -p build && ( cd build && ( %(command)s ) )\n" % {
518                     'command' : self._replace_paths(self.build),
519                     'home' : server.shell_escape(self.home_path),
520                 }
521             )
522         
523             # Make archive
524             buildscript.write("tar czf build.tar.gz build\n")
525         
526         # Write token
527         buildscript.write("echo %(master_token)s > build.token ) ; echo %(master_token)s > build.token.retcode" % {
528             'master_token' : server.shell_escape(self._master_token)
529         })
530         
531         buildscript.seek(0)
532
533         return buildscript
534
535     def _do_install(self):
536         if self.install:
537             self._logger.info("Installing %s", self)
538             
539             # Install application
540             try:
541                 self._popen_ssh_command(
542                     "cd %(home)s && cd build && ( %(command)s ) > ${HOME}/%(home)s/installlog 2>&1 || ( tail ${HOME}/%(home)s/{install,build}log >&2 && false )" % \
543                         {
544                         'command' : self._replace_paths(self.install),
545                         'home' : server.shell_escape(self.home_path),
546                         },
547                     )
548             except RuntimeError, e:
549                 if self.check_bad_host(e.args[0], e.args[1]):
550                     self.node.blacklist()
551                 raise RuntimeError, "Failed install build sources: %s %s" % (e.args[0], e.args[1],)
552
553     def set_master(self, master):
554         self._master = master
555         
556     def install_keys(self, prk, puk, passphrase):
557         # Install keys
558         self._master_passphrase = passphrase
559         self._master_prk = prk
560         self._master_puk = puk
561         self._master_prk_name = os.path.basename(prk.name)
562         self._master_puk_name = os.path.basename(puk.name)
563         
564     def _do_install_keys(self):
565         prk = self._master_prk
566         puk = self._master_puk
567        
568         try:
569             self._popen_scp(
570                 [ prk.name, puk.name ],
571                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, self.home_path )
572                 )
573         except RuntimeError, e:
574             raise RuntimeError, "Failed to set up application deployment keys: %s %s" \
575                     % (e.args[0], e.args[1],)
576
577         try:
578             self._popen_scp(
579                 cStringIO.StringIO('%s,%s %s\n' % (
580                     self._master.node.hostname, socket.gethostbyname(self._master.node.hostname), 
581                     self._master.node.server_key)),
582                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
583                     os.path.join(self.home_path,"master_known_hosts") )
584                 )
585         except RuntimeError, e:
586             raise RuntimeError, "Failed to set up application deployment keys: %s %s" \
587                     % (e.args[0], e.args[1],)
588         
589         # No longer need'em
590         self._master_prk = None
591         self._master_puk = None
592     
593     def cleanup(self):
594         # make sure there's no leftover build processes
595         self._do_kill_build()
596
597     @server.eintr_retry
598     def _popen_scp(self, src, dst, retry = 3):
599         while 1:
600             try:
601                 (out,err),proc = server.popen_scp(
602                     src,
603                     dst, 
604                     port = None,
605                     agent = None,
606                     ident_key = self.node.ident_path,
607                     server_key = self.node.server_key
608                     )
609
610                 if server.eintr_retry(proc.wait)():
611                     raise RuntimeError, (out, err)
612                 return (out, err), proc
613             except:
614                 if retry <= 0:
615                     raise
616                 else:
617                     retry -= 1
618   
619
620     @server.eintr_retry
621     def _popen_ssh_command(self, command, retry = 0, noerrors=False, timeout=None):
622         (out,err),proc = server.popen_ssh_command(
623             command,
624             host = self.node.hostname,
625             port = None,
626             user = self.node.slicename,
627             agent = None,
628             ident_key = self.node.ident_path,
629             server_key = self.node.server_key,
630             timeout = timeout,
631             retry = retry
632             )
633
634         if server.eintr_retry(proc.wait)():
635             if not noerrors:
636                 raise RuntimeError, (out, err)
637         return (out, err), proc
638
639 class Application(Dependency):
640     """
641     An application also has dependencies, but also a command to be ran and monitored.
642     
643     It adds the output of that command as traces.
644     """
645     
646     TRACES = ('stdout','stderr','buildlog', 'output')
647     
648     def __init__(self, api=None):
649         super(Application,self).__init__(api)
650         
651         # Attributes
652         self.command = None
653         self.sudo = False
654         
655         self.stdin = None
656         self.stdout = None
657         self.stderr = None
658         self.output = None
659         
660         # Those are filled when the app is started
661         #   Having both pid and ppid makes it harder
662         #   for pid rollover to induce tracking mistakes
663         self._started = False
664         self._pid = None
665         self._ppid = None
666
667         # Do not add to the python path of nodes
668         self.add_to_path = False
669     
670     def __str__(self):
671         return "%s<command:%s%s>" % (
672             self.__class__.__name__,
673             "sudo " if self.sudo else "",
674             self.command,
675         )
676     
677     def start(self):
678         self._logger.info("Starting %s", self)
679         
680         # Create shell script with the command
681         # This way, complex commands and scripts can be ran seamlessly
682         # sync files
683         command = cStringIO.StringIO()
684         command.write('export PYTHONPATH=$PYTHONPATH:%s\n' % (
685             ':'.join(["${HOME}/"+server.shell_escape(s) for s in self.node.pythonpath])
686         ))
687         command.write('export PATH=$PATH:%s\n' % (
688             ':'.join(["${HOME}/"+server.shell_escape(s) for s in self.node.pythonpath])
689         ))
690         if self.node.env:
691             for envkey, envvals in self.node.env.iteritems():
692                 for envval in envvals:
693                     command.write('export %s=%s\n' % (envkey, envval))
694         command.write(self.command)
695         command.seek(0)
696
697         try:
698             self._popen_scp(
699                 command,
700                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
701                     os.path.join(self.home_path, "app.sh"))
702                 )
703         except RuntimeError, e:
704             raise RuntimeError, "Failed to set up application: %s %s" \
705                     % (e.args[0], e.args[1],)
706         
707         # Start process in a "daemonized" way, using nohup and heavy
708         # stdin/out redirection to avoid connection issues
709         (out,err),proc = rspawn.remote_spawn(
710             self._replace_paths("bash ./app.sh"),
711             
712             pidfile = './pid',
713             home = self.home_path,
714             stdin = 'stdin' if self.stdin is not None else '/dev/null',
715             stdout = 'stdout' if self.stdout else '/dev/null',
716             stderr = 'stderr' if self.stderr else '/dev/null',
717             sudo = self.sudo,
718             
719             host = self.node.hostname,
720             port = None,
721             user = self.node.slicename,
722             agent = None,
723             ident_key = self.node.ident_path,
724             server_key = self.node.server_key
725             )
726         
727         if proc.wait():
728             if self.check_bad_host(out, err):
729                 self.node.blacklist()
730             raise RuntimeError, "Failed to set up application: %s %s" % (out,err,)
731
732         self._started = True
733     
734     def recover(self):
735         # Assuming the application is running on PlanetLab,
736         # proper pidfiles should be present at the app's home path.
737         # So we mark this application as started, and check the pidfiles
738         self._started = True
739         self.checkpid()
740
741     def checkpid(self):            
742         # Get PID/PPID
743         # NOTE: wait a bit for the pidfile to be created
744         if self._started and not self._pid or not self._ppid:
745             pidtuple = rspawn.remote_check_pid(
746                 os.path.join(self.home_path,'pid'),
747                 host = self.node.hostname,
748                 port = None,
749                 user = self.node.slicename,
750                 agent = None,
751                 ident_key = self.node.ident_path,
752                 server_key = self.node.server_key
753                 )
754             
755             if pidtuple:
756                 self._pid, self._ppid = pidtuple
757     
758     def status(self):
759         self.checkpid()
760         if not self._started:
761             return AS.STATUS_NOT_STARTED
762         elif not self._pid or not self._ppid:
763             return AS.STATUS_NOT_STARTED
764         else:
765             status = rspawn.remote_status(
766                 self._pid, self._ppid,
767                 host = self.node.hostname,
768                 port = None,
769                 user = self.node.slicename,
770                 agent = None,
771                 ident_key = self.node.ident_path,
772                 server_key = self.node.server_key
773                 )
774             
775             if status is rspawn.NOT_STARTED:
776                 return AS.STATUS_NOT_STARTED
777             elif status is rspawn.RUNNING:
778                 return AS.STATUS_RUNNING
779             elif status is rspawn.FINISHED:
780                 return AS.STATUS_FINISHED
781             else:
782                 # WTF?
783                 return AS.STATUS_NOT_STARTED
784     
785     def kill(self):
786         status = self.status()
787         if status == AS.STATUS_RUNNING:
788             # kill by ppid+pid - SIGTERM first, then try SIGKILL
789             rspawn.remote_kill(
790                 self._pid, self._ppid,
791                 host = self.node.hostname,
792                 port = None,
793                 user = self.node.slicename,
794                 agent = None,
795                 ident_key = self.node.ident_path,
796                 server_key = self.node.server_key,
797                 sudo = self.sudo
798                 )
799             self._logger.info("Killed %s", self)
800
801
802 class NepiDependency(Dependency):
803     """
804     This dependency adds nepi itself to the python path,
805     so that you may run testbeds within PL nodes.
806     """
807     
808     # Class attribute holding a *weak* reference to the shared NEPI tar file
809     # so that they may share it. Don't operate on the file itself, it would
810     # be a mess, just use its path.
811     _shared_nepi_tar = None
812     
813     def __init__(self, api = None):
814         super(NepiDependency, self).__init__(api)
815         
816         self._tarball = None
817         
818         self.depends = 'python python-ipaddr python-setuptools'
819         
820         # our sources are in our ad-hoc tarball
821         self.sources = self.tarball.name
822         
823         tarname = os.path.basename(self.tarball.name)
824         
825         # it's already built - just move the tarball into place
826         self.build = "mv -f ${SOURCES}/%s ." % (tarname,)
827         
828         # unpack it into sources, and we're done
829         self.install = "tar xzf ${BUILD}/%s -C .." % (tarname,)
830     
831     @property
832     def tarball(self):
833         if self._tarball is None:
834             shared_tar = self._shared_nepi_tar and self._shared_nepi_tar()
835             if shared_tar is not None:
836                 self._tarball = shared_tar
837             else:
838                 # Build an ad-hoc tarball
839                 # Prebuilt
840                 import nepi
841                 import tempfile
842                 
843                 shared_tar = tempfile.NamedTemporaryFile(prefix='nepi-src-', suffix='.tar.gz')
844                 
845                 proc = subprocess.Popen(
846                     ["tar", "czf", shared_tar.name, 
847                         '-C', os.path.join(os.path.dirname(os.path.dirname(nepi.__file__)),'.'), 
848                         'nepi'],
849                     stdout = open("/dev/null","w"),
850                     stdin = open("/dev/null","r"))
851
852                 if proc.wait():
853                     raise RuntimeError, "Failed to create nepi tarball"
854                 
855                 self._tarball = self._shared_nepi_tar = shared_tar
856                 
857         return self._tarball
858
859 class NS3Dependency(Dependency):
860     """
861     This dependency adds NS3 libraries to the library paths,
862     so that you may run the NS3 testbed within PL nodes.
863     
864     You'll also need the NepiDependency.
865     """
866     
867     def __init__(self, api = None):
868         super(NS3Dependency, self).__init__(api)
869         
870         self.buildDepends = 'make waf gcc gcc-c++ gccxml unzip'
871         
872         # We have to download the sources, untar, build...
873         pybindgen_source_url = "http://yans.pl.sophia.inria.fr/trac/nepi/raw-attachment/wiki/WikiStart/pybindgen-r794.tar.gz"
874         pygccxml_source_url = "http://leaseweb.dl.sourceforge.net/project/pygccxml/pygccxml/pygccxml-1.0/pygccxml-1.0.0.zip"
875         ns3_source_url = "http://yans.pl.sophia.inria.fr/code/hgwebdir.cgi/ns-3.11-nepi/archive/tip.tar.gz"
876         passfd_source_url = "http://yans.pl.sophia.inria.fr/code/hgwebdir.cgi/python-passfd/archive/tip.tar.gz"
877         self.build =(
878             " ( "
879             "  cd .. && "
880             "  python -c 'import pygccxml, pybindgen, passfd' && "
881             "  test -f lib/ns/_core.so && "
882             "  test -f lib/ns/__init__.py && "
883             "  test -f lib/ns/core.py && "
884             "  test -f lib/libns3-core.so && "
885             "  LD_LIBRARY_PATH=lib PYTHONPATH=lib python -c 'import ns.core' "
886             " ) || ( "
887                 # Not working, rebuild
888                      # Archive SHA1 sums to check
889                      "echo '7158877faff2254e6c094bf18e6b4283cac19137  pygccxml-1.0.0.zip' > archive_sums.txt && "
890                      "echo 'a18c2ccffd0df517bc37e2f3a2475092517c43f2  pybindgen-src.tar.gz' >> archive_sums.txt && "
891                      " ( " # check existing files
892                      " sha1sum -c archive_sums.txt && "
893                      " test -f passfd-src.tar.gz && "
894                      " test -f ns3-src.tar.gz "
895                      " ) || ( " # nope? re-download
896                      " rm -f pybindgen-src.zip pygccxml-1.0.0.zip passfd-src.tar.gz ns3-src.tar.gz && "
897                      " wget -q -c -O pybindgen-src.tar.gz %(pybindgen_source_url)s && " # continue, to exploit the case when it has already been dl'ed
898                      " wget -q -c -O pygccxml-1.0.0.zip %(pygccxml_source_url)s && " 
899                      " wget -q -c -O passfd-src.tar.gz %(passfd_source_url)s && "
900                      " wget -q -c -O ns3-src.tar.gz %(ns3_source_url)s && "  
901                      " sha1sum -c archive_sums.txt " # Check SHA1 sums when applicable
902                      " ) && "
903                      "unzip -n pygccxml-1.0.0.zip && "
904                      "mkdir -p pybindgen-src && "
905                      "mkdir -p ns3-src && "
906                      "mkdir -p passfd-src && "
907                      "tar xzf ns3-src.tar.gz --strip-components=1 -C ns3-src && "
908                      "tar xzf passfd-src.tar.gz --strip-components=1 -C passfd-src && "
909                      "tar xzf pybindgen-src.tar.gz --strip-components=1 -C pybindgen-src && "
910                      "rm -rf target && "    # mv doesn't like unclean targets
911                      "mkdir -p target && "
912                      "cd pygccxml-1.0.0 && "
913                      "rm -rf unittests docs && " # pygccxml has ~100M of unit tests - excessive - docs aren't needed either
914                      "python setup.py build && "
915                      "python setup.py install --install-lib ${BUILD}/target && "
916                      "python setup.py clean && "
917                      "cd ../pybindgen-src && "
918                      "export PYTHONPATH=$PYTHONPATH:${BUILD}/target && "
919                      "./waf configure --prefix=${BUILD}/target -d release && "
920                      "./waf && "
921                      "./waf install && "
922                      "./waf clean && "
923                      "mv -f ${BUILD}/target/lib/python*/site-packages/pybindgen ${BUILD}/target/. && "
924                      "rm -rf ${BUILD}/target/lib && "
925                      "cd ../passfd-src && "
926                      "python setup.py build && "
927                      "python setup.py install --install-lib ${BUILD}/target && "
928                      "python setup.py clean && "
929                      "cd ../ns3-src && "
930                      "./waf configure --prefix=${BUILD}/target --with-pybindgen=../pybindgen-src -d release --disable-examples --disable-tests && "
931                      "./waf &&"
932                      "./waf install && "
933                      "rm -f ${BUILD}/target/lib/*.so && "
934                      "cp -a ${BUILD}/ns3-src/build/release/libns3*.so ${BUILD}/target/lib && "
935                      "cp -a ${BUILD}/ns3-src/build/release/bindings/python/ns ${BUILD}/target/lib &&"
936                      "./waf clean "
937              " )"
938                      % dict(
939                         pybindgen_source_url = server.shell_escape(pybindgen_source_url),
940                         pygccxml_source_url = server.shell_escape(pygccxml_source_url),
941                         ns3_source_url = server.shell_escape(ns3_source_url),
942                         passfd_source_url = server.shell_escape(passfd_source_url),
943                      ))
944         
945         # Just move ${BUILD}/target
946         self.install = (
947             " ( "
948             "  cd .. && "
949             "  python -c 'import pygccxml, pybindgen, passfd' && "
950             "  test -f lib/ns/_core.so && "
951             "  test -f lib/ns/__init__.py && "
952             "  test -f lib/ns/core.py && "
953             "  test -f lib/libns3-core.so && "
954             "  LD_LIBRARY_PATH=lib PYTHONPATH=lib python -c 'import ns.core' "
955             " ) || ( "
956                 # Not working, reinstall
957                     "test -d ${BUILD}/target && "
958                     "[[ \"x\" != \"x$(find ${BUILD}/target -mindepth 1 -print -quit)\" ]] &&"
959                     "( for i in ${BUILD}/target/* ; do rm -rf ${SOURCES}/${i##*/} ; done ) && " # mv doesn't like unclean targets
960                     "mv -f ${BUILD}/target/* ${SOURCES}"
961             " )"
962         )
963         
964         # Set extra environment paths
965         self.env['NEPI_NS3BINDINGS'] = "${SOURCES}/lib"
966         self.env['NEPI_NS3LIBRARY'] = "${SOURCES}/lib"
967     
968     @property
969     def tarball(self):
970         if self._tarball is None:
971             shared_tar = self._shared_nepi_tar and self._shared_nepi_tar()
972             if shared_tar is not None:
973                 self._tarball = shared_tar
974             else:
975                 # Build an ad-hoc tarball
976                 # Prebuilt
977                 import nepi
978                 import tempfile
979                 
980                 shared_tar = tempfile.NamedTemporaryFile(prefix='nepi-src-', suffix='.tar.gz')
981                 
982                 proc = subprocess.Popen(
983                     ["tar", "czf", shared_tar.name, 
984                         '-C', os.path.join(os.path.dirname(os.path.dirname(nepi.__file__)),'.'), 
985                         'nepi'],
986                     stdout = open("/dev/null","w"),
987                     stdin = open("/dev/null","r"))
988
989                 if proc.wait():
990                     raise RuntimeError, "Failed to create nepi tarball"
991                 
992                 self._tarball = self._shared_nepi_tar = shared_tar
993                 
994         return self._tarball
995
996 class YumDependency(Dependency):
997     """
998     This dependency is an internal helper class used to
999     efficiently distribute yum-downloaded rpms.
1000     
1001     It temporarily sets the yum cache as persistent in the
1002     build master, and installs all the required packages.
1003     
1004     The rpm packages left in the yum cache are gathered and
1005     distributed by the underlying Dependency in an efficient
1006     manner. Build slaves will then install those rpms back in
1007     the cache before issuing the install command.
1008     
1009     When packages have been installed already, nothing but an
1010     empty tar is distributed.
1011     """
1012     
1013     # Class attribute holding a *weak* reference to the shared NEPI tar file
1014     # so that they may share it. Don't operate on the file itself, it would
1015     # be a mess, just use its path.
1016     _shared_nepi_tar = None
1017     
1018     def _build_get(self):
1019         # canonical representation of dependencies
1020         depends = ' '.join( sorted( (self.depends or "").split(' ') ) )
1021         
1022         # download rpms and pack into a tar archive
1023         return (
1024             "sudo -S nice yum -y makecache && "
1025             "sudo -S sed -i -r 's/keepcache *= *0/keepcache=1/' /etc/yum.conf && "
1026             " ( ( "
1027                 "sudo -S nice yum -y install %s ; "
1028                 "rm -f ${BUILD}/packages.tar ; "
1029                 "tar -C /var/cache/yum -rf ${BUILD}/packages.tar $(cd /var/cache/yum ; find -iname '*.rpm')"
1030             " ) || /bin/true ) && "
1031             "sudo -S sed -i -r 's/keepcache *= *1/keepcache=0/' /etc/yum.conf && "
1032             "( sudo -S nice yum -y clean packages || /bin/true ) "
1033         ) % ( depends, )
1034     def _build_set(self, value):
1035         # ignore
1036         return
1037     build = property(_build_get, _build_set)
1038     
1039     def _install_get(self):
1040         # canonical representation of dependencies
1041         depends = ' '.join( sorted( (self.depends or "").split(' ') ) )
1042         
1043         # unpack cached rpms into yum cache, install, and cleanup
1044         return (
1045             "sudo -S tar -k --keep-newer-files -C /var/cache/yum -xf packages.tar && "
1046             "sudo -S nice yum -y install %s && "
1047             "( sudo -S nice yum -y clean packages || /bin/true ) "
1048         ) % ( depends, )
1049     def _install_set(self, value):
1050         # ignore
1051         return
1052     install = property(_install_get, _install_set)
1053         
1054     def check_bad_host(self, out, err):
1055         badre = re.compile(r'(?:'
1056                            r'The GPG keys listed for the ".*" repository are already installed but they are not correct for this package'
1057                            r'|Error: Cannot retrieve repository metadata (repomd.xml) for repository: .*[.] Please verify its path and try again'
1058                            r'|Error: disk I/O error'
1059                            r')', 
1060                            re.I)
1061         return badre.search(out) or badre.search(err)