Make application deployment more robust.
[nepi.git] / src / nepi / testbeds / planetlab / application.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from constants import TESTBED_ID
5 import plcapi
6 import operator
7 import os
8 import os.path
9 import sys
10 import nepi.util.server as server
11 import cStringIO
12 import subprocess
13 import rspawn
14 import random
15 import time
16 import socket
17 import threading
18 import logging
19 import re
20
21 from nepi.util.constants import ApplicationStatus as AS
22
23 class Dependency(object):
24     """
25     A Dependency is in every respect like an application.
26     
27     It depends on some packages, it may require building binaries, it must deploy
28     them...
29     
30     But it has no command. Dependencies aren't ever started, or stopped, and have
31     no status.
32     """
33
34     TRACES = ('buildlog')
35
36     def __init__(self, api=None):
37         if not api:
38             api = plcapi.PLCAPI()
39         self._api = api
40         
41         # Attributes
42         self.command = None
43         self.sudo = False
44         
45         self.build = None
46         self.install = None
47         self.depends = None
48         self.buildDepends = None
49         self.sources = None
50         self.rpmFusion = False
51         self.env = {}
52         
53         self.stdin = None
54         self.stdout = None
55         self.stderr = None
56         self.buildlog = None
57         
58         self.add_to_path = True
59         
60         # Those are filled when the app is configured
61         self.home_path = None
62         
63         # Those are filled when an actual node is connected
64         self.node = None
65         
66         # Those are filled when the app is started
67         #   Having both pid and ppid makes it harder
68         #   for pid rollover to induce tracking mistakes
69         self._started = False
70         self._setup = False
71         self._setuper = None
72         self._pid = None
73         self._ppid = None
74
75         # Spanning tree deployment
76         self._master = None
77         self._master_passphrase = None
78         self._master_prk = None
79         self._master_puk = None
80         self._master_token = os.urandom(8).encode("hex")
81         self._build_pid = None
82         self._build_ppid = None
83         
84         # Logging
85         self._logger = logging.getLogger('nepi.testbeds.planetlab')
86         
87     
88     def __str__(self):
89         return "%s<%s>" % (
90             self.__class__.__name__,
91             ' '.join(filter(bool,(self.depends, self.sources)))
92         )
93     
94     def validate(self):
95         if self.home_path is None:
96             raise AssertionError, "Misconfigured application: missing home path"
97         if self.node.ident_path is None or not os.access(self.node.ident_path, os.R_OK):
98             raise AssertionError, "Misconfigured application: missing slice SSH key"
99         if self.node is None:
100             raise AssertionError, "Misconfigured application: unconnected node"
101         if self.node.hostname is None:
102             raise AssertionError, "Misconfigured application: misconfigured node"
103         if self.node.slicename is None:
104             raise AssertionError, "Misconfigured application: unspecified slice"
105     
106     def check_bad_host(self, out, err):
107         """
108         Called whenever an operation fails, it's given the output to be checked for
109         telltale signs of unhealthy hosts.
110         """
111         return False
112     
113     def remote_trace_path(self, whichtrace):
114         if whichtrace in self.TRACES:
115             tracefile = os.path.join(self.home_path, whichtrace)
116         else:
117             tracefile = None
118         
119         return tracefile
120
121     def remote_trace_name(self, whichtrace):
122         if whichtrace in self.TRACES:
123             return whichtrace
124         return None
125
126     def sync_trace(self, local_dir, whichtrace):
127         tracefile = self.remote_trace_path(whichtrace)
128         if not tracefile:
129             return None
130         
131         local_path = os.path.join(local_dir, tracefile)
132         
133         # create parent local folders
134         proc = subprocess.Popen(
135             ["mkdir", "-p", os.path.dirname(local_path)],
136             stdout = open("/dev/null","w"),
137             stdin = open("/dev/null","r"))
138
139         if proc.wait():
140             raise RuntimeError, "Failed to synchronize trace"
141         
142         # sync files
143         try:
144             self._popen_scp(
145                 '%s@%s:%s' % (self.node.slicename, self.node.hostname,
146                     tracefile),
147                 local_path
148                 )
149         except RuntimeError, e:
150             raise RuntimeError, "Failed to synchronize trace: %s %s" \
151                     % (e.args[0], e.args[1],)
152         
153         return local_path
154     
155     def recover(self):
156         # We assume a correct deployment, so recovery only
157         # means we mark this dependency as deployed
158         self._setup = True
159
160     def setup(self):
161         self._logger.info("Setting up %s", self)
162         self._make_home()
163         self._launch_build()
164         self._finish_build()
165         self._setup = True
166     
167     def async_setup(self):
168         if not self._setuper:
169             def setuper():
170                 try:
171                     self.setup()
172                 except:
173                     self._setuper._exc.append(sys.exc_info())
174             self._setuper = threading.Thread(
175                 target = setuper)
176             self._setuper._exc = []
177             self._setuper.start()
178     
179     def async_setup_wait(self):
180         if not self._setup:
181             self._logger.info("Waiting for %s to be setup", self)
182             if self._setuper:
183                 self._setuper.join()
184                 if not self._setup:
185                     if self._setuper._exc:
186                         exctyp,exval,exctrace = self._setuper._exc[0]
187                         raise exctyp,exval,exctrace
188                     else:
189                         raise RuntimeError, "Failed to setup application"
190                 else:
191                     self._logger.info("Setup ready: %s at %s", self, self.node.hostname)
192             else:
193                 self.setup()
194         
195     def _make_home(self):
196         # Make sure all the paths are created where 
197         # they have to be created for deployment
198         # sync files
199         try:
200             self._popen_ssh_command(
201                 "mkdir -p %(home)s && ( rm -f %(home)s/{pid,build-pid,nepi-build.sh} >/dev/null 2>&1 || /bin/true )" \
202                     % { 'home' : server.shell_escape(self.home_path) },
203                 timeout = 120,
204                 retry = 3
205                 )
206         except RuntimeError, e:
207             raise RuntimeError, "Failed to set up application %s: %s %s" % (self.home_path, e.args[0], e.args[1],)
208         
209         if self.stdin:
210             # Write program input
211             try:
212                 self._popen_scp(
213                     cStringIO.StringIO(self.stdin),
214                     '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
215                         os.path.join(self.home_path, 'stdin') ),
216                     )
217             except RuntimeError, e:
218                 raise RuntimeError, "Failed to set up application %s: %s %s" \
219                         % (self.home_path, e.args[0], e.args[1],)
220
221     def _replace_paths(self, command):
222         """
223         Replace all special path tags with shell-escaped actual paths.
224         """
225         # need to append ${HOME} if paths aren't absolute, to MAKE them absolute.
226         root = '' if self.home_path.startswith('/') else "${HOME}/"
227         return ( command
228             .replace("${SOURCES}", root+server.shell_escape(self.home_path))
229             .replace("${BUILD}", root+server.shell_escape(os.path.join(self.home_path,'build'))) )
230
231     def _launch_build(self):
232         if self._master is not None:
233             self._do_install_keys()
234             buildscript = self._do_build_slave()
235         else:
236             buildscript = self._do_build_master()
237             
238         if buildscript is not None:
239             self._logger.info("Building %s at %s", self, self.node.hostname)
240             
241             # upload build script
242             try:
243                 self._popen_scp(
244                     buildscript,
245                     '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
246                         os.path.join(self.home_path, 'nepi-build.sh') )
247                     )
248             except RuntimeError, e:
249                 raise RuntimeError, "Failed to set up application %s: %s %s" \
250                         % (self.home_path, e.args[0], e.args[1],)
251             
252             # launch build
253             self._do_launch_build()
254     
255     def _finish_build(self):
256         self._do_wait_build()
257         self._do_install()
258
259     def _do_build_slave(self):
260         if not self.sources and not self.build:
261             return None
262             
263         # Create build script
264         files = set()
265         
266         if self.sources:
267             sources = self.sources.split(' ')
268             files.update(
269                 "%s@%s:%s" % (self._master.node.slicename, self._master.node.hostname, 
270                     os.path.join(self._master.home_path, os.path.basename(source)),)
271                 for source in sources
272             )
273         
274         if self.build:
275             files.add(
276                 "%s@%s:%s" % (self._master.node.slicename, self._master.node.hostname, 
277                     os.path.join(self._master.home_path, 'build.tar.gz'),)
278             )
279         
280         sshopts = "-o ConnectTimeout=30 -o ConnectionAttempts=3 -o ServerAliveInterval=30 -o TCPKeepAlive=yes"
281         
282         launch_agent = "{ ( echo -e '#!/bin/sh\\ncat' > .ssh-askpass ) && chmod u+x .ssh-askpass"\
283                         " && export SSH_ASKPASS=$(pwd)/.ssh-askpass "\
284                         " && ssh-agent > .ssh-agent.sh ; } && . ./.ssh-agent.sh && ( echo $NEPI_MASTER_PASSPHRASE | ssh-add %(prk)s ) && rm -rf %(prk)s %(puk)s" %  \
285         {
286             'prk' : server.shell_escape(self._master_prk_name),
287             'puk' : server.shell_escape(self._master_puk_name),
288         }
289         
290         kill_agent = "kill $SSH_AGENT_PID"
291         
292         waitmaster = (
293             "{ "
294             "echo 'Checking master reachability' ; "
295             "if ping -c 3 %(master_host)s ; then "
296             "echo 'Master node reachable' ; "
297             "else "
298             "echo 'MASTER NODE UNREACHABLE' && "
299             "exit 1 ; "
300             "fi ; "
301             ". ./.ssh-agent.sh ; "
302             "while [[ $(. ./.ssh-agent.sh > /dev/null ; ssh -q -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s cat %(token_path)s.retcode || /bin/true) != %(token)s ]] ; do sleep 5 ; done ; "
303             "if [[ $(. ./.ssh-agent.sh > /dev/null ; ssh -q -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s cat %(token_path)s || /bin/true) != %(token)s ]] ; then echo BAD TOKEN ; exit 1 ; fi ; "
304             "}" 
305         ) % {
306             'hostkey' : 'master_known_hosts',
307             'master' : "%s@%s" % (self._master.node.slicename, self._master.node.hostname),
308             'master_host' : self._master.node.hostname,
309             'token_path' : os.path.join(self._master.home_path, 'build.token'),
310             'token' : server.shell_escape(self._master._master_token),
311             'sshopts' : sshopts,
312         }
313         
314         syncfiles = ". ./.ssh-agent.sh && scp -p -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(files)s ." % {
315             'hostkey' : 'master_known_hosts',
316             'files' : ' '.join(files),
317             'sshopts' : sshopts,
318         }
319         if self.build:
320             syncfiles += " && tar xzf build.tar.gz"
321         syncfiles += " && ( echo %s > build.token )" % (server.shell_escape(self._master_token),)
322         syncfiles += " && ( echo %s > build.token.retcode )" % (server.shell_escape(self._master_token),)
323         syncfiles = "{ . ./.ssh-agent.sh ; %s ; }" % (syncfiles,)
324         
325         cleanup = "{ . ./.ssh-agent.sh ; kill $SSH_AGENT_PID ; rm -rf %(prk)s %(puk)s master_known_hosts .ssh-askpass ; }" % {
326             'prk' : server.shell_escape(self._master_prk_name),
327             'puk' : server.shell_escape(self._master_puk_name),
328         }
329         
330         slavescript = "( ( %(launch_agent)s && %(waitmaster)s && %(syncfiles)s && %(kill_agent)s && %(cleanup)s ) || %(cleanup)s ) ; echo %(token)s > build.token.retcode" % {
331             'waitmaster' : waitmaster,
332             'syncfiles' : syncfiles,
333             'cleanup' : cleanup,
334             'kill_agent' : kill_agent,
335             'launch_agent' : launch_agent,
336             'home' : server.shell_escape(self.home_path),
337             'token' : server.shell_escape(self._master_token),
338         }
339         
340         return cStringIO.StringIO(slavescript)
341          
342     def _do_launch_build(self):
343         script = "bash ./nepi-build.sh"
344         if self._master_passphrase:
345             script = "NEPI_MASTER_PASSPHRASE=%s %s" % (
346                 server.shell_escape(self._master_passphrase),
347                 script
348             )
349         (out,err),proc = rspawn.remote_spawn(
350             script,
351             pidfile = 'build-pid',
352             home = self.home_path,
353             stdin = '/dev/null',
354             stdout = 'buildlog',
355             stderr = rspawn.STDOUT,
356             
357             host = self.node.hostname,
358             port = None,
359             user = self.node.slicename,
360             agent = None,
361             ident_key = self.node.ident_path,
362             server_key = self.node.server_key
363             )
364         
365         if proc.wait():
366             if self.check_bad_host(out, err):
367                 self.node.blacklist()
368             raise RuntimeError, "Failed to set up build slave %s: %s %s" % (self.home_path, out,err,)
369         
370         
371         pid = ppid = None
372         delay = 1.0
373         for i in xrange(5):
374             pidtuple = rspawn.remote_check_pid(
375                 os.path.join(self.home_path,'build-pid'),
376                 host = self.node.hostname,
377                 port = None,
378                 user = self.node.slicename,
379                 agent = None,
380                 ident_key = self.node.ident_path,
381                 server_key = self.node.server_key
382                 )
383             
384             if pidtuple:
385                 pid, ppid = pidtuple
386                 self._build_pid, self._build_ppid = pidtuple
387                 break
388             else:
389                 time.sleep(delay)
390                 delay = min(30,delay*1.2)
391         else:
392             raise RuntimeError, "Failed to set up build slave %s: cannot get pid" % (self.home_path,)
393
394         self._logger.info("Deploying %s at %s", self, self.node.hostname)
395         
396     def _do_wait_build(self):
397         pid = self._build_pid
398         ppid = self._build_ppid
399         
400         if pid and ppid:
401             delay = 1.0
402             first = True
403             bustspin = 0
404             while True:
405                 status = rspawn.remote_status(
406                     pid, ppid,
407                     host = self.node.hostname,
408                     port = None,
409                     user = self.node.slicename,
410                     agent = None,
411                     ident_key = self.node.ident_path,
412                     server_key = self.node.server_key
413                     )
414                 
415                 if status is rspawn.FINISHED:
416                     self._build_pid = self._build_ppid = None
417                     break
418                 elif status is not rspawn.RUNNING:
419                     bustspin += 1
420                     time.sleep(delay*(5.5+random.random()))
421                     if bustspin > 12:
422                         self._build_pid = self._build_ppid = None
423                         break
424                 else:
425                     if first:
426                         self._logger.info("Waiting for %s to finish building at %s %s", self, self.node.hostname,
427                             "(build slave)" if self._master is not None else "(build master)")
428                         
429                         first = False
430                     time.sleep(delay*(0.5+random.random()))
431                     delay = min(30,delay*1.2)
432                     bustspin = 0
433             
434             # check build token
435             slave_token = ""
436             for i in xrange(3):
437                 (out, err), proc = self._popen_ssh_command(
438                     "cat %(token_path)s" % {
439                         'token_path' : os.path.join(self.home_path, 'build.token'),
440                     },
441                     timeout = 120,
442                     noerrors = True)
443                 if not proc.wait() and out:
444                     slave_token = out.strip()
445                 
446                 if slave_token:
447                     break
448                 else:
449                     time.sleep(2)
450             
451             if slave_token != self._master_token:
452                 # Get buildlog for the error message
453
454                 (buildlog, err), proc = self._popen_ssh_command(
455                     "cat %(buildlog)s" % {
456                         'buildlog' : os.path.join(self.home_path, 'buildlog'),
457                         'buildscript' : os.path.join(self.home_path, 'nepi-build.sh'),
458                     },
459                     timeout = 120,
460                     noerrors = True)
461                 
462                 proc.wait()
463                 
464                 if self.check_bad_host(buildlog, err):
465                     self.node.blacklist()
466                 
467                 raise RuntimeError, "Failed to set up application %s: "\
468                         "build failed, got wrong token from pid %s/%s "\
469                         "(expected %r, got %r), see buildlog at %s:\n%s" % (
470                     self.home_path, pid, ppid, self._master_token, slave_token, self.node.hostname, buildlog)
471
472             self._logger.info("Built %s at %s", self, self.node.hostname)
473
474     def _do_kill_build(self):
475         pid = self._build_pid
476         ppid = self._build_ppid
477         
478         if pid and ppid:
479             self._logger.info("Killing build of %s", self)
480             rspawn.remote_kill(
481                 pid, ppid,
482                 host = self.node.hostname,
483                 port = None,
484                 user = self.node.slicename,
485                 agent = None,
486                 ident_key = self.node.ident_path
487                 )
488         
489         
490     def _do_build_master(self):
491         if not self.sources and not self.build and not self.buildDepends:
492             return None
493             
494         if self.sources:
495             sources = self.sources.split(' ')
496             
497             # Copy all sources
498             try:
499                 self._popen_scp(
500                     sources,
501                     "%s@%s:%s" % (self.node.slicename, self.node.hostname, 
502                         os.path.join(self.home_path,'.'),)
503                     )
504             except RuntimeError, e:
505                 raise RuntimeError, "Failed upload source file %r: %s %s" \
506                         % (sources, e.args[0], e.args[1],)
507             
508         buildscript = cStringIO.StringIO()
509         
510         buildscript.write("(\n")
511         
512         if self.buildDepends:
513             # Install build dependencies
514             buildscript.write(
515                 "sudo -S yum -y install %(packages)s\n" % {
516                     'packages' : self.buildDepends
517                 }
518             )
519         
520             
521         if self.build:
522             # Build sources
523             buildscript.write(
524                 "mkdir -p build && ( cd build && ( %(command)s ) )\n" % {
525                     'command' : self._replace_paths(self.build),
526                     'home' : server.shell_escape(self.home_path),
527                 }
528             )
529         
530             # Make archive
531             buildscript.write("tar czf build.tar.gz build\n")
532         
533         # Write token
534         buildscript.write("echo %(master_token)s > build.token ) ; echo %(master_token)s > build.token.retcode" % {
535             'master_token' : server.shell_escape(self._master_token)
536         })
537         
538         buildscript.seek(0)
539
540         return buildscript
541
542     def _do_install(self):
543         if self.install:
544             self._logger.info("Installing %s at %s", self, self.node.hostname)
545             
546             # Install application
547             try:
548                 self._popen_ssh_command(
549                     "cd %(home)s && cd build && ( %(command)s ) > ${HOME}/%(home)s/installlog 2>&1 || ( tail ${HOME}/%(home)s/{install,build}log >&2 && false )" % \
550                         {
551                         'command' : self._replace_paths(self.install),
552                         'home' : server.shell_escape(self.home_path),
553                         },
554                     )
555             except RuntimeError, e:
556                 if self.check_bad_host(e.args[0], e.args[1]):
557                     self.node.blacklist()
558                 raise RuntimeError, "Failed install build sources: %s %s" % (e.args[0], e.args[1],)
559
560     def set_master(self, master):
561         self._master = master
562         
563     def install_keys(self, prk, puk, passphrase):
564         # Install keys
565         self._master_passphrase = passphrase
566         self._master_prk = prk
567         self._master_puk = puk
568         self._master_prk_name = os.path.basename(prk.name)
569         self._master_puk_name = os.path.basename(puk.name)
570         
571     def _do_install_keys(self):
572         prk = self._master_prk
573         puk = self._master_puk
574        
575         try:
576             self._popen_scp(
577                 [ prk.name, puk.name ],
578                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, self.home_path )
579                 )
580         except RuntimeError, e:
581             raise RuntimeError, "Failed to set up application deployment keys: %s %s" \
582                     % (e.args[0], e.args[1],)
583
584         try:
585             self._popen_scp(
586                 cStringIO.StringIO('%s,%s %s\n' % (
587                     self._master.node.hostname, socket.gethostbyname(self._master.node.hostname), 
588                     self._master.node.server_key)),
589                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
590                     os.path.join(self.home_path,"master_known_hosts") )
591                 )
592         except RuntimeError, e:
593             raise RuntimeError, "Failed to set up application deployment keys: %s %s" \
594                     % (e.args[0], e.args[1],)
595         
596         # No longer need'em
597         self._master_prk = None
598         self._master_puk = None
599     
600     def cleanup(self):
601         # make sure there's no leftover build processes
602         self._do_kill_build()
603
604     @server.eintr_retry
605     def _popen_scp(self, src, dst, retry = 3):
606         while 1:
607             try:
608                 (out,err),proc = server.popen_scp(
609                     src,
610                     dst, 
611                     port = None,
612                     agent = None,
613                     ident_key = self.node.ident_path,
614                     server_key = self.node.server_key
615                     )
616
617                 if server.eintr_retry(proc.wait)():
618                     raise RuntimeError, (out, err)
619                 return (out, err), proc
620             except:
621                 if retry <= 0:
622                     raise
623                 else:
624                     retry -= 1
625   
626
627     @server.eintr_retry
628     def _popen_ssh_command(self, command, retry = 0, noerrors=False, timeout=None):
629         (out,err),proc = server.popen_ssh_command(
630             command,
631             host = self.node.hostname,
632             port = None,
633             user = self.node.slicename,
634             agent = None,
635             ident_key = self.node.ident_path,
636             server_key = self.node.server_key,
637             timeout = timeout,
638             retry = retry
639             )
640
641         if server.eintr_retry(proc.wait)():
642             if not noerrors:
643                 raise RuntimeError, (out, err)
644         return (out, err), proc
645
646 class Application(Dependency):
647     """
648     An application also has dependencies, but also a command to be ran and monitored.
649     
650     It adds the output of that command as traces.
651     """
652     
653     TRACES = ('stdout','stderr','buildlog', 'output')
654     
655     def __init__(self, api=None):
656         super(Application,self).__init__(api)
657         
658         # Attributes
659         self.command = None
660         self.sudo = False
661         
662         self.stdin = None
663         self.stdout = None
664         self.stderr = None
665         self.output = None
666         
667         # Those are filled when the app is started
668         #   Having both pid and ppid makes it harder
669         #   for pid rollover to induce tracking mistakes
670         self._started = False
671         self._pid = None
672         self._ppid = None
673
674         # Do not add to the python path of nodes
675         self.add_to_path = False
676     
677     def __str__(self):
678         return "%s<command:%s%s>" % (
679             self.__class__.__name__,
680             "sudo " if self.sudo else "",
681             self.command,
682         )
683     
684     def start(self):
685         self._logger.info("Starting %s", self)
686         
687         # Create shell script with the command
688         # This way, complex commands and scripts can be ran seamlessly
689         # sync files
690         command = cStringIO.StringIO()
691         command.write('export PYTHONPATH=$PYTHONPATH:%s\n' % (
692             ':'.join(["${HOME}/"+server.shell_escape(s) for s in self.node.pythonpath])
693         ))
694         command.write('export PATH=$PATH:%s\n' % (
695             ':'.join(["${HOME}/"+server.shell_escape(s) for s in self.node.pythonpath])
696         ))
697         if self.node.env:
698             for envkey, envvals in self.node.env.iteritems():
699                 for envval in envvals:
700                     command.write('export %s=%s\n' % (envkey, envval))
701         command.write(self.command)
702         command.seek(0)
703
704         try:
705             self._popen_scp(
706                 command,
707                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
708                     os.path.join(self.home_path, "app.sh"))
709                 )
710         except RuntimeError, e:
711             raise RuntimeError, "Failed to set up application: %s %s" \
712                     % (e.args[0], e.args[1],)
713         
714         # Start process in a "daemonized" way, using nohup and heavy
715         # stdin/out redirection to avoid connection issues
716         (out,err),proc = rspawn.remote_spawn(
717             self._replace_paths("bash ./app.sh"),
718             
719             pidfile = './pid',
720             home = self.home_path,
721             stdin = 'stdin' if self.stdin is not None else '/dev/null',
722             stdout = 'stdout' if self.stdout else '/dev/null',
723             stderr = 'stderr' if self.stderr else '/dev/null',
724             sudo = self.sudo,
725             
726             host = self.node.hostname,
727             port = None,
728             user = self.node.slicename,
729             agent = None,
730             ident_key = self.node.ident_path,
731             server_key = self.node.server_key
732             )
733         
734         if proc.wait():
735             if self.check_bad_host(out, err):
736                 self.node.blacklist()
737             raise RuntimeError, "Failed to set up application: %s %s" % (out,err,)
738
739         self._started = True
740     
741     def recover(self):
742         # Assuming the application is running on PlanetLab,
743         # proper pidfiles should be present at the app's home path.
744         # So we mark this application as started, and check the pidfiles
745         self._started = True
746         self.checkpid()
747
748     def checkpid(self):            
749         # Get PID/PPID
750         # NOTE: wait a bit for the pidfile to be created
751         if self._started and not self._pid or not self._ppid:
752             pidtuple = rspawn.remote_check_pid(
753                 os.path.join(self.home_path,'pid'),
754                 host = self.node.hostname,
755                 port = None,
756                 user = self.node.slicename,
757                 agent = None,
758                 ident_key = self.node.ident_path,
759                 server_key = self.node.server_key
760                 )
761             
762             if pidtuple:
763                 self._pid, self._ppid = pidtuple
764     
765     def status(self):
766         self.checkpid()
767         if not self._started:
768             return AS.STATUS_NOT_STARTED
769         elif not self._pid or not self._ppid:
770             return AS.STATUS_NOT_STARTED
771         else:
772             status = rspawn.remote_status(
773                 self._pid, self._ppid,
774                 host = self.node.hostname,
775                 port = None,
776                 user = self.node.slicename,
777                 agent = None,
778                 ident_key = self.node.ident_path,
779                 server_key = self.node.server_key
780                 )
781             
782             if status is rspawn.NOT_STARTED:
783                 return AS.STATUS_NOT_STARTED
784             elif status is rspawn.RUNNING:
785                 return AS.STATUS_RUNNING
786             elif status is rspawn.FINISHED:
787                 return AS.STATUS_FINISHED
788             else:
789                 # WTF?
790                 return AS.STATUS_NOT_STARTED
791     
792     def kill(self):
793         status = self.status()
794         if status == AS.STATUS_RUNNING:
795             # kill by ppid+pid - SIGTERM first, then try SIGKILL
796             rspawn.remote_kill(
797                 self._pid, self._ppid,
798                 host = self.node.hostname,
799                 port = None,
800                 user = self.node.slicename,
801                 agent = None,
802                 ident_key = self.node.ident_path,
803                 server_key = self.node.server_key,
804                 sudo = self.sudo
805                 )
806             self._logger.info("Killed %s", self)
807
808
809 class NepiDependency(Dependency):
810     """
811     This dependency adds nepi itself to the python path,
812     so that you may run testbeds within PL nodes.
813     """
814     
815     # Class attribute holding a *weak* reference to the shared NEPI tar file
816     # so that they may share it. Don't operate on the file itself, it would
817     # be a mess, just use its path.
818     _shared_nepi_tar = None
819     
820     def __init__(self, api = None):
821         super(NepiDependency, self).__init__(api)
822         
823         self._tarball = None
824         
825         self.depends = 'python python-ipaddr python-setuptools'
826         
827         # our sources are in our ad-hoc tarball
828         self.sources = self.tarball.name
829         
830         tarname = os.path.basename(self.tarball.name)
831         
832         # it's already built - just move the tarball into place
833         self.build = "mv -f ${SOURCES}/%s ." % (tarname,)
834         
835         # unpack it into sources, and we're done
836         self.install = "tar xzf ${BUILD}/%s -C .." % (tarname,)
837     
838     @property
839     def tarball(self):
840         if self._tarball is None:
841             shared_tar = self._shared_nepi_tar and self._shared_nepi_tar()
842             if shared_tar is not None:
843                 self._tarball = shared_tar
844             else:
845                 # Build an ad-hoc tarball
846                 # Prebuilt
847                 import nepi
848                 import tempfile
849                 
850                 shared_tar = tempfile.NamedTemporaryFile(prefix='nepi-src-', suffix='.tar.gz')
851                 
852                 proc = subprocess.Popen(
853                     ["tar", "czf", shared_tar.name, 
854                         '-C', os.path.join(os.path.dirname(os.path.dirname(nepi.__file__)),'.'), 
855                         'nepi'],
856                     stdout = open("/dev/null","w"),
857                     stdin = open("/dev/null","r"))
858
859                 if proc.wait():
860                     raise RuntimeError, "Failed to create nepi tarball"
861                 
862                 self._tarball = self._shared_nepi_tar = shared_tar
863                 
864         return self._tarball
865
866 class NS3Dependency(Dependency):
867     """
868     This dependency adds NS3 libraries to the library paths,
869     so that you may run the NS3 testbed within PL nodes.
870     
871     You'll also need the NepiDependency.
872     """
873     
874     def __init__(self, api = None):
875         super(NS3Dependency, self).__init__(api)
876         
877         self.buildDepends = 'make waf gcc gcc-c++ gccxml unzip'
878         
879         # We have to download the sources, untar, build...
880         pybindgen_source_url = "http://yans.pl.sophia.inria.fr/trac/nepi/raw-attachment/wiki/WikiStart/pybindgen-r794.tar.gz"
881         pygccxml_source_url = "http://leaseweb.dl.sourceforge.net/project/pygccxml/pygccxml/pygccxml-1.0/pygccxml-1.0.0.zip"
882         ns3_source_url = "http://yans.pl.sophia.inria.fr/code/hgwebdir.cgi/ns-3.11-nepi/archive/tip.tar.gz"
883         passfd_source_url = "http://yans.pl.sophia.inria.fr/code/hgwebdir.cgi/python-passfd/archive/tip.tar.gz"
884         self.build =(
885             " ( "
886             "  cd .. && "
887             "  python -c 'import pygccxml, pybindgen, passfd' && "
888             "  test -f lib/ns/_core.so && "
889             "  test -f lib/ns/__init__.py && "
890             "  test -f lib/ns/core.py && "
891             "  test -f lib/libns3-core.so && "
892             "  LD_LIBRARY_PATH=lib PYTHONPATH=lib python -c 'import ns.core' "
893             " ) || ( "
894                 # Not working, rebuild
895                      # Archive SHA1 sums to check
896                      "echo '7158877faff2254e6c094bf18e6b4283cac19137  pygccxml-1.0.0.zip' > archive_sums.txt && "
897                      "echo 'a18c2ccffd0df517bc37e2f3a2475092517c43f2  pybindgen-src.tar.gz' >> archive_sums.txt && "
898                      " ( " # check existing files
899                      " sha1sum -c archive_sums.txt && "
900                      " test -f passfd-src.tar.gz && "
901                      " test -f ns3-src.tar.gz "
902                      " ) || ( " # nope? re-download
903                      " rm -f pybindgen-src.zip pygccxml-1.0.0.zip passfd-src.tar.gz ns3-src.tar.gz && "
904                      " wget -q -c -O pybindgen-src.tar.gz %(pybindgen_source_url)s && " # continue, to exploit the case when it has already been dl'ed
905                      " wget -q -c -O pygccxml-1.0.0.zip %(pygccxml_source_url)s && " 
906                      " wget -q -c -O passfd-src.tar.gz %(passfd_source_url)s && "
907                      " wget -q -c -O ns3-src.tar.gz %(ns3_source_url)s && "  
908                      " sha1sum -c archive_sums.txt " # Check SHA1 sums when applicable
909                      " ) && "
910                      "unzip -n pygccxml-1.0.0.zip && "
911                      "mkdir -p pybindgen-src && "
912                      "mkdir -p ns3-src && "
913                      "mkdir -p passfd-src && "
914                      "tar xzf ns3-src.tar.gz --strip-components=1 -C ns3-src && "
915                      "tar xzf passfd-src.tar.gz --strip-components=1 -C passfd-src && "
916                      "tar xzf pybindgen-src.tar.gz --strip-components=1 -C pybindgen-src && "
917                      "rm -rf target && "    # mv doesn't like unclean targets
918                      "mkdir -p target && "
919                      "cd pygccxml-1.0.0 && "
920                      "rm -rf unittests docs && " # pygccxml has ~100M of unit tests - excessive - docs aren't needed either
921                      "python setup.py build && "
922                      "python setup.py install --install-lib ${BUILD}/target && "
923                      "python setup.py clean && "
924                      "cd ../pybindgen-src && "
925                      "export PYTHONPATH=$PYTHONPATH:${BUILD}/target && "
926                      "./waf configure --prefix=${BUILD}/target -d release && "
927                      "./waf && "
928                      "./waf install && "
929                      "./waf clean && "
930                      "mv -f ${BUILD}/target/lib/python*/site-packages/pybindgen ${BUILD}/target/. && "
931                      "rm -rf ${BUILD}/target/lib && "
932                      "cd ../passfd-src && "
933                      "python setup.py build && "
934                      "python setup.py install --install-lib ${BUILD}/target && "
935                      "python setup.py clean && "
936                      "cd ../ns3-src && "
937                      "./waf configure --prefix=${BUILD}/target --with-pybindgen=../pybindgen-src -d release --disable-examples --disable-tests && "
938                      "./waf &&"
939                      "./waf install && "
940                      "rm -f ${BUILD}/target/lib/*.so && "
941                      "cp -a ${BUILD}/ns3-src/build/release/libns3*.so ${BUILD}/target/lib && "
942                      "cp -a ${BUILD}/ns3-src/build/release/bindings/python/ns ${BUILD}/target/lib &&"
943                      "./waf clean "
944              " )"
945                      % dict(
946                         pybindgen_source_url = server.shell_escape(pybindgen_source_url),
947                         pygccxml_source_url = server.shell_escape(pygccxml_source_url),
948                         ns3_source_url = server.shell_escape(ns3_source_url),
949                         passfd_source_url = server.shell_escape(passfd_source_url),
950                      ))
951         
952         # Just move ${BUILD}/target
953         self.install = (
954             " ( "
955             "  cd .. && "
956             "  python -c 'import pygccxml, pybindgen, passfd' && "
957             "  test -f lib/ns/_core.so && "
958             "  test -f lib/ns/__init__.py && "
959             "  test -f lib/ns/core.py && "
960             "  test -f lib/libns3-core.so && "
961             "  LD_LIBRARY_PATH=lib PYTHONPATH=lib python -c 'import ns.core' "
962             " ) || ( "
963                 # Not working, reinstall
964                     "test -d ${BUILD}/target && "
965                     "[[ \"x\" != \"x$(find ${BUILD}/target -mindepth 1 -print -quit)\" ]] &&"
966                     "( for i in ${BUILD}/target/* ; do rm -rf ${SOURCES}/${i##*/} ; done ) && " # mv doesn't like unclean targets
967                     "mv -f ${BUILD}/target/* ${SOURCES}"
968             " )"
969         )
970         
971         # Set extra environment paths
972         self.env['NEPI_NS3BINDINGS'] = "${SOURCES}/lib"
973         self.env['NEPI_NS3LIBRARY'] = "${SOURCES}/lib"
974     
975     @property
976     def tarball(self):
977         if self._tarball is None:
978             shared_tar = self._shared_nepi_tar and self._shared_nepi_tar()
979             if shared_tar is not None:
980                 self._tarball = shared_tar
981             else:
982                 # Build an ad-hoc tarball
983                 # Prebuilt
984                 import nepi
985                 import tempfile
986                 
987                 shared_tar = tempfile.NamedTemporaryFile(prefix='nepi-src-', suffix='.tar.gz')
988                 
989                 proc = subprocess.Popen(
990                     ["tar", "czf", shared_tar.name, 
991                         '-C', os.path.join(os.path.dirname(os.path.dirname(nepi.__file__)),'.'), 
992                         'nepi'],
993                     stdout = open("/dev/null","w"),
994                     stdin = open("/dev/null","r"))
995
996                 if proc.wait():
997                     raise RuntimeError, "Failed to create nepi tarball"
998                 
999                 self._tarball = self._shared_nepi_tar = shared_tar
1000                 
1001         return self._tarball
1002
1003 class YumDependency(Dependency):
1004     """
1005     This dependency is an internal helper class used to
1006     efficiently distribute yum-downloaded rpms.
1007     
1008     It temporarily sets the yum cache as persistent in the
1009     build master, and installs all the required packages.
1010     
1011     The rpm packages left in the yum cache are gathered and
1012     distributed by the underlying Dependency in an efficient
1013     manner. Build slaves will then install those rpms back in
1014     the cache before issuing the install command.
1015     
1016     When packages have been installed already, nothing but an
1017     empty tar is distributed.
1018     """
1019     
1020     # Class attribute holding a *weak* reference to the shared NEPI tar file
1021     # so that they may share it. Don't operate on the file itself, it would
1022     # be a mess, just use its path.
1023     _shared_nepi_tar = None
1024     
1025     def _build_get(self):
1026         # canonical representation of dependencies
1027         depends = ' '.join( sorted( (self.depends or "").split(' ') ) )
1028         
1029         # download rpms and pack into a tar archive
1030         return (
1031             "sudo -S nice yum -y makecache && "
1032             "sudo -S sed -i -r 's/keepcache *= *0/keepcache=1/' /etc/yum.conf && "
1033             " ( ( "
1034                 "sudo -S nice yum -y install %s ; "
1035                 "rm -f ${BUILD}/packages.tar ; "
1036                 "tar -C /var/cache/yum -rf ${BUILD}/packages.tar $(cd /var/cache/yum ; find -iname '*.rpm')"
1037             " ) || /bin/true ) && "
1038             "sudo -S sed -i -r 's/keepcache *= *1/keepcache=0/' /etc/yum.conf && "
1039             "( sudo -S nice yum -y clean packages || /bin/true ) "
1040         ) % ( depends, )
1041     def _build_set(self, value):
1042         # ignore
1043         return
1044     build = property(_build_get, _build_set)
1045     
1046     def _install_get(self):
1047         # canonical representation of dependencies
1048         depends = ' '.join( sorted( (self.depends or "").split(' ') ) )
1049         
1050         # unpack cached rpms into yum cache, install, and cleanup
1051         return (
1052             "sudo -S tar -k --keep-newer-files -C /var/cache/yum -xf packages.tar && "
1053             "sudo -S nice yum -y install %s && "
1054             "( sudo -S nice yum -y clean packages || /bin/true ) "
1055         ) % ( depends, )
1056     def _install_set(self, value):
1057         # ignore
1058         return
1059     install = property(_install_get, _install_set)
1060         
1061     def check_bad_host(self, out, err):
1062         badre = re.compile(r'(?:'
1063                            r'The GPG keys listed for the ".*" repository are already installed but they are not correct for this package'
1064                            r'|Error: Cannot retrieve repository metadata (repomd.xml) for repository: .*[.] Please verify its path and try again'
1065                            r'|Error: disk I/O error'
1066                            r'|MASTER NODE UNREACHABLE'
1067                            r')', 
1068                            re.I)
1069         return badre.search(out) or badre.search(err)