PlanetLab support toon-up: home_cleanup only nepi folders + make server support longe...
[nepi.git] / src / nepi / testbeds / planetlab / application.py
1 # -*- coding: utf-8 -*-
2
3 from constants import TESTBED_ID
4 import plcapi
5 import operator
6 import os
7 import os.path
8 import sys
9 import nepi.util.server as server
10 import cStringIO
11 import subprocess
12 import rspawn
13 import random
14 import time
15 import socket
16 import threading
17 import logging
18 import re
19
20 from nepi.util.constants import ApplicationStatus as AS
21
22 _ccnre = re.compile("\s*(udp|tcp)\s+(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\s*$")
23
24 class Dependency(object):
25     """
26     A Dependency is in every respect like an application.
27     
28     It depends on some packages, it may require building binaries, it must deploy
29     them...
30     
31     But it has no command. Dependencies aren't ever started, or stopped, and have
32     no status.
33     """
34
35     TRACES = ('buildlog')
36
37     def __init__(self, api=None):
38         if not api:
39             api = plcapi.PLCAPI()
40         self._api = api
41         
42         # Attributes
43         self.command = None
44         self.sudo = False
45         
46         self.build = None
47         self.install = None
48         self.depends = None
49         self.buildDepends = None
50         self.sources = None
51         self.rpmFusion = False
52         self.env = {}
53         
54         self.stdin = None
55         self.stdout = None
56         self.stderr = None
57         self.buildlog = None
58         
59         self.add_to_path = True
60         
61         # Those are filled when the app is configured
62         self.home_path = None
63         
64         # Those are filled when an actual node is connected
65         self.node = None
66         
67         # Those are filled when the app is started
68         #   Having both pid and ppid makes it harder
69         #   for pid rollover to induce tracking mistakes
70         self._started = False
71         self._setup = False
72         self._setuper = None
73         self._pid = None
74         self._ppid = None
75
76         # Spanning tree deployment
77         self._master = None
78         self._master_passphrase = None
79         self._master_prk = None
80         self._master_puk = None
81         self._master_token = os.urandom(8).encode("hex")
82         self._build_pid = None
83         self._build_ppid = None
84         
85         # Logging
86         self._logger = logging.getLogger('nepi.testbeds.planetlab')
87         
88     
89     def __str__(self):
90         return "%s<%s>" % (
91             self.__class__.__name__,
92             ' '.join(filter(bool,(self.depends, self.sources)))
93         )
94    
95     def deployed(self):
96         return self._setup
97
98     def validate(self):
99         if self.home_path is None:
100             raise AssertionError, "Misconfigured application: missing home path"
101         if self.node.ident_path is None or not os.access(self.node.ident_path, os.R_OK):
102             raise AssertionError, "Misconfigured application: missing slice SSH key"
103         if self.node is None:
104             raise AssertionError, "Misconfigured application: unconnected node"
105         if self.node.hostname is None:
106             raise AssertionError, "Misconfigured application: misconfigured node"
107         if self.node.slicename is None:
108             raise AssertionError, "Misconfigured application: unspecified slice"
109     
110     def check_bad_host(self, out, err):
111         """
112         Called whenever an operation fails, it's given the output to be checked for
113         telltale signs of unhealthy hosts.
114         """
115         return False
116     
117     def remote_trace_path(self, whichtrace):
118         if whichtrace in self.TRACES:
119             tracefile = os.path.join(self.home_path, whichtrace)
120         else:
121             tracefile = None
122         
123         return tracefile
124
125     def remote_trace_name(self, whichtrace):
126         if whichtrace in self.TRACES:
127             return whichtrace
128         return None
129
130     def sync_trace(self, local_dir, whichtrace):
131         tracefile = self.remote_trace_path(whichtrace)
132         if not tracefile:
133             return None
134         
135         local_path = os.path.join(local_dir, tracefile)
136         
137         # create parent local folders
138         proc = subprocess.Popen(
139             ["mkdir", "-p", os.path.dirname(local_path)],
140             stdout = open("/dev/null","w"),
141             stdin = open("/dev/null","r"))
142
143         if proc.wait():
144             raise RuntimeError, "Failed to synchronize trace"
145         
146         # sync files
147         try:
148             self._popen_scp(
149                 '%s@%s:%s' % (self.node.slicename, self.node.hostname,
150                     tracefile),
151                 local_path
152                 )
153         except RuntimeError, e:
154             raise RuntimeError, "Failed to synchronize trace: %s %s" \
155                     % (e.args[0], e.args[1],)
156         
157         return local_path
158     
159     def recover(self):
160         # We assume a correct deployment, so recovery only
161         # means we mark this dependency as deployed
162         self._setup = True
163
164     def setup(self):
165         self._logger.info("Setting up %s", self)
166         self._make_home()
167         self._launch_build()
168         self._finish_build()
169         self._setup = True
170     
171     def async_setup(self):
172         if not self._setuper:
173             def setuper():
174                 try:
175                     self.setup()
176                 except:
177                     self._setuper._exc.append(sys.exc_info())
178             self._setuper = threading.Thread(
179                 target = setuper)
180             self._setuper._exc = []
181             self._setuper.start()
182     
183     def async_setup_wait(self):
184         if not self._setup:
185             self._logger.info("Waiting for %s to be setup", self)
186             if self._setuper:
187                 self._setuper.join()
188                 if not self._setup:
189                     if self._setuper._exc:
190                         exctyp,exval,exctrace = self._setuper._exc[0]
191                         raise exctyp,exval,exctrace
192                     else:
193                         raise RuntimeError, "Failed to setup application"
194                 else:
195                     self._logger.info("Setup ready: %s at %s", self, self.node.hostname)
196             else:
197                 self.setup()
198         
199     def _make_home(self):
200         # Make sure all the paths are created where 
201         # they have to be created for deployment
202         # sync files
203         try:
204             self._popen_ssh_command(
205                 "mkdir -p %(home)s && ( rm -f %(home)s/{pid,build-pid,nepi-build.sh} >/dev/null 2>&1 || /bin/true )" \
206                     % { 'home' : server.shell_escape(self.home_path) },
207                 timeout = 120,
208                 retry = 3
209                 )
210         except RuntimeError, e:
211             raise RuntimeError, "Failed to set up application %s: %s %s" % (self.home_path, e.args[0], e.args[1],)
212         
213         if self.stdin:
214             stdin = self.stdin
215             if not os.path.isfile(stdin):
216                 stdin = cStringIO.StringIO(self.stdin)
217
218             # Write program input
219             try:
220                 self._popen_scp(stdin,
221                     '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
222                         os.path.join(self.home_path, 'stdin') ),
223                     )
224             except RuntimeError, e:
225                 raise RuntimeError, "Failed to set up application %s: %s %s" \
226                         % (self.home_path, e.args[0], e.args[1],)
227
228     def _replace_paths(self, command):
229         """
230         Replace all special path tags with shell-escaped actual paths.
231         """
232         # need to append ${HOME} if paths aren't absolute, to MAKE them absolute.
233         root = '' if self.home_path.startswith('/') else "${HOME}/"
234         return ( command
235             .replace("${SOURCES}", root+server.shell_escape(self.home_path))
236             .replace("${BUILD}", root+server.shell_escape(os.path.join(self.home_path,'build'))) )
237
238     def _launch_build(self, trial=0):
239         if self._master is not None:
240             if not trial or self._master_prk is not None:
241                 self._do_install_keys()
242             buildscript = self._do_build_slave()
243         else:
244             buildscript = self._do_build_master()
245             
246         if buildscript is not None:
247             self._logger.info("Building %s at %s", self, self.node.hostname)
248             
249             # upload build script
250             try:
251                 self._popen_scp(
252                     buildscript,
253                     '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
254                         os.path.join(self.home_path, 'nepi-build.sh') )
255                     )
256             except RuntimeError, e:
257                 raise RuntimeError, "Failed to set up application %s: %s %s" \
258                         % (self.home_path, e.args[0], e.args[1],)
259             
260             # launch build
261             self._do_launch_build()
262     
263     def _finish_build(self):
264         self._do_wait_build()
265         self._do_install()
266
267     def _do_build_slave(self):
268         if not self.sources and not self.build:
269             return None
270             
271         # Create build script
272         files = set()
273         
274         if self.sources:
275             sources = self.sources.split(' ')
276             files.update(
277                 "%s@%s:%s" % (self._master.node.slicename, self._master.node.hostip, 
278                     os.path.join(self._master.home_path, os.path.basename(source)),)
279                 for source in sources
280             )
281         
282         if self.build:
283             files.add(
284                 "%s@%s:%s" % (self._master.node.slicename, self._master.node.hostip, 
285                     os.path.join(self._master.home_path, 'build.tar.gz'),)
286             )
287         
288         sshopts = "-o ConnectTimeout=30 -o ConnectionAttempts=3 -o ServerAliveInterval=30 -o TCPKeepAlive=yes"
289         
290         launch_agent = "{ ( echo -e '#!/bin/sh\\ncat' > .ssh-askpass ) && chmod u+x .ssh-askpass"\
291                         " && export SSH_ASKPASS=$(pwd)/.ssh-askpass "\
292                         " && ssh-agent > .ssh-agent.sh ; } && . ./.ssh-agent.sh && ( echo $NEPI_MASTER_PASSPHRASE | ssh-add %(prk)s ) && rm -rf %(prk)s %(puk)s" %  \
293         {
294             'prk' : server.shell_escape(self._master_prk_name),
295             'puk' : server.shell_escape(self._master_puk_name),
296         }
297         
298         kill_agent = "kill $SSH_AGENT_PID"
299         
300         waitmaster = (
301             "{ "
302             "echo 'Checking master reachability' ; "
303             "if ping -c 3 %(master_host)s && (. ./.ssh-agent.sh > /dev/null ; ssh -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s echo MASTER SAYS HI ) ; then "
304             "echo 'Master node reachable' ; "
305             "else "
306             "echo 'MASTER NODE UNREACHABLE' && "
307             "exit 1 ; "
308             "fi ; "
309             ". ./.ssh-agent.sh ; "
310             "while [[ $(. ./.ssh-agent.sh > /dev/null ; ssh -q -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s cat %(token_path)s.retcode || /bin/true) != %(token)s ]] ; do sleep 5 ; done ; "
311             "if [[ $(. ./.ssh-agent.sh > /dev/null ; ssh -q -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(master)s cat %(token_path)s || /bin/true) != %(token)s ]] ; then echo BAD TOKEN ; exit 1 ; fi ; "
312             "}" 
313         ) % {
314             'hostkey' : 'master_known_hosts',
315             'master' : "%s@%s" % (self._master.node.slicename, self._master.node.hostip),
316             'master_host' : self._master.node.hostip,
317             'token_path' : os.path.join(self._master.home_path, 'build.token'),
318             'token' : server.shell_escape(self._master._master_token),
319             'sshopts' : sshopts,
320         }
321         
322         syncfiles = ". ./.ssh-agent.sh && scp -p -o UserKnownHostsFile=%(hostkey)s %(sshopts)s %(files)s ." % {
323             'hostkey' : 'master_known_hosts',
324             'files' : ' '.join(files),
325             'sshopts' : sshopts,
326         }
327         if self.build:
328             syncfiles += " && tar xzf build.tar.gz"
329         syncfiles += " && ( echo %s > build.token )" % (server.shell_escape(self._master_token),)
330         syncfiles += " && ( echo %s > build.token.retcode )" % (server.shell_escape(self._master_token),)
331         syncfiles = "{ . ./.ssh-agent.sh ; %s ; }" % (syncfiles,)
332         
333         cleanup = "{ . ./.ssh-agent.sh ; kill $SSH_AGENT_PID ; rm -rf %(prk)s %(puk)s master_known_hosts .ssh-askpass ; }" % {
334             'prk' : server.shell_escape(self._master_prk_name),
335             'puk' : server.shell_escape(self._master_puk_name),
336         }
337         
338         slavescript = "( ( %(launch_agent)s && %(waitmaster)s && %(syncfiles)s && %(kill_agent)s && %(cleanup)s ) || %(cleanup)s ) ; echo %(token)s > build.token.retcode" % {
339             'waitmaster' : waitmaster,
340             'syncfiles' : syncfiles,
341             'cleanup' : cleanup,
342             'kill_agent' : kill_agent,
343             'launch_agent' : launch_agent,
344             'home' : server.shell_escape(self.home_path),
345             'token' : server.shell_escape(self._master_token),
346         }
347        
348         return cStringIO.StringIO(slavescript)
349          
350     def _do_launch_build(self):
351         script = "bash ./nepi-build.sh"
352         if self._master_passphrase:
353             script = "NEPI_MASTER_PASSPHRASE=%s %s" % (
354                 server.shell_escape(self._master_passphrase),
355                 script
356             )
357         (out,err),proc = rspawn.remote_spawn(
358             script,
359             pidfile = 'build-pid',
360             home = self.home_path,
361             stdin = '/dev/null',
362             stdout = 'buildlog',
363             stderr = rspawn.STDOUT,
364             
365             host = self.node.hostname,
366             port = None,
367             user = self.node.slicename,
368             agent = None,
369             ident_key = self.node.ident_path,
370             server_key = self.node.server_key,
371             hostip = self.node.hostip,
372             )
373         
374         if proc.wait():
375             if self.check_bad_host(out, err):
376                 self.node.blacklist()
377             raise RuntimeError, "Failed to set up build slave %s: %s %s" % (self.home_path, out,err,)
378         
379         
380         pid = ppid = None
381         delay = 1.0
382         for i in xrange(5):
383             pidtuple = rspawn.remote_check_pid(
384                 os.path.join(self.home_path,'build-pid'),
385                 host = self.node.hostname,
386                 port = None,
387                 user = self.node.slicename,
388                 agent = None,
389                 ident_key = self.node.ident_path,
390                 server_key = self.node.server_key,
391                 hostip = self.node.hostip
392                 )
393             
394             if pidtuple:
395                 pid, ppid = pidtuple
396                 self._build_pid, self._build_ppid = pidtuple
397                 break
398             else:
399                 time.sleep(delay)
400                 delay = min(30,delay*1.2)
401         else:
402             raise RuntimeError, "Failed to set up build slave %s: cannot get pid" % (self.home_path,)
403
404         self._logger.info("Deploying %s at %s", self, self.node.hostname)
405         
406     def _do_wait_build(self, trial=0):
407         pid = self._build_pid
408         ppid = self._build_ppid
409         
410         if pid and ppid:
411             delay = 1.0
412             first = True
413             bustspin = 0
414             while True:
415                 status = rspawn.remote_status(
416                     pid, ppid,
417                     host = self.node.hostname,
418                     port = None,
419                     user = self.node.slicename,
420                     agent = None,
421                     ident_key = self.node.ident_path,
422                     server_key = self.node.server_key,
423                     hostip = self.node.hostip
424                     )
425                 
426                 if status is rspawn.FINISHED:
427                     self._build_pid = self._build_ppid = None
428                     break
429                 elif status is not rspawn.RUNNING:
430                     self._logger.warn("Busted waiting for %s to finish building at %s %s", self, self.node.hostname,
431                             "(build slave)" if self._master is not None else "(build master)")
432                     bustspin += 1
433                     time.sleep(delay*(5.5+random.random()))
434                     if bustspin > 12:
435                         self._build_pid = self._build_ppid = None
436                         break
437                 else:
438                     if first:
439                         self._logger.info("Waiting for %s to finish building at %s %s", self, self.node.hostname,
440                             "(build slave)" if self._master is not None else "(build master)")
441                         
442                         first = False
443                     time.sleep(delay*(0.5+random.random()))
444                     delay = min(30,delay*1.2)
445                     bustspin = 0
446             
447             # check build token
448             slave_token = ""
449             for i in xrange(3):
450                 (out, err), proc = self._popen_ssh_command(
451                     "cat %(token_path)s" % {
452                         'token_path' : os.path.join(self.home_path, 'build.token'),
453                     },
454                     timeout = 120,
455                     noerrors = True)
456                 if not proc.wait() and out:
457                     slave_token = out.strip()
458                 
459                 if slave_token:
460                     break
461                 else:
462                     time.sleep(2)
463             
464             if slave_token != self._master_token:
465                 # Get buildlog for the error message
466
467                 (buildlog, err), proc = self._popen_ssh_command(
468                     "cat %(buildlog)s" % {
469                         'buildlog' : os.path.join(self.home_path, 'buildlog'),
470                         'buildscript' : os.path.join(self.home_path, 'nepi-build.sh'),
471                     },
472                     timeout = 120,
473                     noerrors = True)
474                 
475                 proc.wait()
476                 
477                 if self.check_bad_host(buildlog, err):
478                     self.node.blacklist()
479                 elif self._master and trial < 3 and 'BAD TOKEN' in buildlog or 'BAD TOKEN' in err:
480                     # bad sync with master, may try again
481                     # but first wait for master
482                     self._master.async_setup_wait()
483                     self._launch_build(trial+1)
484                     return self._do_wait_build(trial+1)
485                 elif trial < 3:
486                     return self._do_wait_build(trial+1)
487                 else:
488                     # No longer need'em
489                     self._master_prk = None
490                     self._master_puk = None
491         
492                     raise RuntimeError, "Failed to set up application %s: "\
493                             "build failed, got wrong token from pid %s/%s "\
494                             "(expected %r, got %r), see buildlog at %s:\n%s" % (
495                         self.home_path, pid, ppid, self._master_token, slave_token, self.node.hostname, buildlog)
496
497             # No longer need'em
498             self._master_prk = None
499             self._master_puk = None
500         
501             self._logger.info("Built %s at %s", self, self.node.hostname)
502
503     def _do_kill_build(self):
504         pid = self._build_pid
505         ppid = self._build_ppid
506         
507         if pid and ppid:
508             self._logger.info("Killing build of %s", self)
509             rspawn.remote_kill(
510                 pid, ppid,
511                 host = self.node.hostname,
512                 port = None,
513                 user = self.node.slicename,
514                 agent = None,
515                 ident_key = self.node.ident_path,
516                 hostip = self.node.hostip
517                 )
518         
519         
520     def _do_build_master(self):
521         if not self.sources and not self.build and not self.buildDepends:
522             return None
523             
524         if self.sources:
525             sources = self.sources.split(' ')
526
527             http_sources = list()
528             for source in list(sources):
529                 if source.startswith("http") or source.startswith("https"):
530                     http_sources.append(source)
531                     sources.remove(source)
532
533             # Download http sources
534             try:
535                 for source in http_sources:
536                     path = os.path.join(self.home_path, source.split("/")[-1])
537                     command = "wget -o %s %s" % (path, source)
538                     self._popen_ssh(command)
539             except RuntimeError, e:
540                 raise RuntimeError, "Failed wget source file %r: %s %s" \
541                         % (sources, e.args[0], e.args[1],)
542
543             # Copy all other sources
544             try:
545                 self._popen_scp(
546                     sources,
547                     "%s@%s:%s" % (self.node.slicename, self.node.hostname, 
548                         os.path.join(self.home_path,'.'),)
549                     )
550             except RuntimeError, e:
551                 raise RuntimeError, "Failed upload source file %r: %s %s" \
552                         % (sources, e.args[0], e.args[1],)
553             
554         buildscript = cStringIO.StringIO()
555         
556         buildscript.write("(\n")
557         
558         if self.buildDepends:
559             # Install build dependencies
560             buildscript.write(
561                 "sudo -S yum -y install %(packages)s\n" % {
562                     'packages' : self.buildDepends
563                 }
564             )
565         
566             
567         if self.build:
568             # Build sources
569             buildscript.write(
570                 "mkdir -p build && ( cd build && ( %(command)s ) )\n" % {
571                     'command' : self._replace_paths(self.build),
572                     'home' : server.shell_escape(self.home_path),
573                 }
574             )
575         
576             # Make archive
577             buildscript.write("tar czf build.tar.gz build\n")
578         
579         # Write token
580         buildscript.write("echo %(master_token)s > build.token ) ; echo %(master_token)s > build.token.retcode" % {
581             'master_token' : server.shell_escape(self._master_token)
582         })
583         
584         buildscript.seek(0)
585
586         return buildscript
587
588     def _do_install(self):
589         if self.install:
590             self._logger.info("Installing %s at %s", self, self.node.hostname)
591            
592             # Install application
593             try:
594                 self._popen_ssh_command(
595                     "cd %(home)s && cd build && ( %(command)s ) > ${HOME}/%(home)s/installlog 2>&1 || ( tail ${HOME}/%(home)s/{install,build}log >&2 && false )" % \
596                         {
597                         'command' : self._replace_paths(self.install),
598                         'home' : server.shell_escape(self.home_path),
599                         },
600                     )
601             except RuntimeError, e:
602                 if self.check_bad_host(e.args[0], e.args[1]):
603                     self.node.blacklist()
604                 raise RuntimeError, "Failed install build sources on node %s: %s %s" % (
605                         self.node.hostname, e.args[0], e.args[1],)
606
607     def set_master(self, master):
608         self._master = master
609         
610     def install_keys(self, prk, puk, passphrase):
611         # Install keys
612         self._master_passphrase = passphrase
613         self._master_prk = prk
614         self._master_puk = puk
615         self._master_prk_name = os.path.basename(prk.name)
616         self._master_puk_name = os.path.basename(puk.name)
617         
618     def _do_install_keys(self):
619         prk = self._master_prk
620         puk = self._master_puk
621        
622         try:
623             self._popen_scp(
624                 [ prk.name, puk.name ],
625                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, self.home_path )
626                 )
627         except RuntimeError, e:
628             raise RuntimeError, "Failed to set up application deployment keys: %s %s" \
629                     % (e.args[0], e.args[1],)
630
631         try:
632             self._popen_scp(
633                 cStringIO.StringIO('%s,%s %s\n' % (
634                     self._master.node.hostname, self._master.node.hostip, 
635                     self._master.node.server_key)),
636                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
637                     os.path.join(self.home_path,"master_known_hosts") )
638                 )
639         except RuntimeError, e:
640             raise RuntimeError, "Failed to set up application deployment keys: %s %s" \
641                     % (e.args[0], e.args[1],)
642         
643     
644     def cleanup(self):
645         # make sure there's no leftover build processes
646         self._do_kill_build()
647         
648         # No longer need'em
649         self._master_prk = None
650         self._master_puk = None
651
652     @server.eintr_retry
653     def _popen_scp(self, src, dst, retry = 3):
654         while 1:
655             try:
656                 (out,err),proc = server.popen_scp(
657                     src,
658                     dst, 
659                     port = None,
660                     agent = None,
661                     ident_key = self.node.ident_path,
662                     server_key = self.node.server_key
663                     )
664
665                 if server.eintr_retry(proc.wait)():
666                     raise RuntimeError, (out, err)
667                 return (out, err), proc
668             except:
669                 if retry <= 0:
670                     raise
671                 else:
672                     retry -= 1
673   
674
675     @server.eintr_retry
676     def _popen_ssh_command(self, command, retry = 0, noerrors=False, timeout=None):
677         (out,err),proc = server.popen_ssh_command(
678             command,
679             host = self.node.hostname,
680             port = None,
681             user = self.node.slicename,
682             agent = None,
683             ident_key = self.node.ident_path,
684             server_key = self.node.server_key,
685             timeout = timeout,
686             retry = retry
687             )
688
689         if server.eintr_retry(proc.wait)():
690             if not noerrors:
691                 raise RuntimeError, (out, err)
692         return (out, err), proc
693
694 class Application(Dependency):
695     """
696     An application also has dependencies, but also a command to be ran and monitored.
697     
698     It adds the output of that command as traces.
699     """
700     
701     TRACES = ('stdout','stderr','buildlog', 'output')
702     
703     def __init__(self, api=None):
704         super(Application,self).__init__(api)
705         
706         # Attributes
707         self.command = None
708         self.sudo = False
709         
710         self.stdin = None
711         self.stdout = None
712         self.stderr = None
713         self.output = None
714         
715         # Those are filled when the app is started
716         #   Having both pid and ppid makes it harder
717         #   for pid rollover to induce tracking mistakes
718         self._started = False
719         self._pid = None
720         self._ppid = None
721
722         # Do not add to the python path of nodes
723         self.add_to_path = False
724     
725     def __str__(self):
726         return "%s<command:%s%s>" % (
727             self.__class__.__name__,
728             "sudo " if self.sudo else "",
729             self.command,
730         )
731     
732     def start(self):
733         self._logger.info("Starting %s", self)
734         
735         # Create shell script with the command
736         # This way, complex commands and scripts can be ran seamlessly
737         # sync files
738         command = cStringIO.StringIO()
739         command.write('export PYTHONPATH=$PYTHONPATH:%s\n' % (
740             ':'.join(["${HOME}/"+server.shell_escape(s) for s in self.node.pythonpath])
741         ))
742         command.write('export PATH=$PATH:%s\n' % (
743             ':'.join(["${HOME}/"+server.shell_escape(s) for s in self.node.pythonpath])
744         ))
745         if self.node.env:
746             for envkey, envvals in self.node.env.iteritems():
747                 for envval in envvals:
748                     command.write('export %s=%s\n' % (envkey, envval))
749         command.write(self.command)
750         command.seek(0)
751
752         try:
753             self._popen_scp(
754                 command,
755                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
756                     os.path.join(self.home_path, "app.sh"))
757                 )
758         except RuntimeError, e:
759             raise RuntimeError, "Failed to set up application: %s %s" \
760                     % (e.args[0], e.args[1],)
761         
762         # Start process in a "daemonized" way, using nohup and heavy
763         # stdin/out redirection to avoid connection issues
764         (out,err),proc = rspawn.remote_spawn(
765             self._replace_paths("bash ./app.sh"),
766             
767             pidfile = './pid',
768             home = self.home_path,
769             stdin = 'stdin' if self.stdin is not None else '/dev/null',
770             stdout = 'stdout' if self.stdout else '/dev/null',
771             stderr = 'stderr' if self.stderr else '/dev/null',
772             sudo = self.sudo,
773             host = self.node.hostname,
774             port = None,
775             user = self.node.slicename,
776             agent = None,
777             ident_key = self.node.ident_path,
778             server_key = self.node.server_key
779             )
780         
781         if proc.wait():
782             if self.check_bad_host(out, err):
783                 self.node.blacklist()
784             raise RuntimeError, "Failed to set up application: %s %s" % (out,err,)
785
786         self._started = True
787     
788     def recover(self):
789         # Assuming the application is running on PlanetLab,
790         # proper pidfiles should be present at the app's home path.
791         # So we mark this application as started, and check the pidfiles
792         self._started = True
793         self.checkpid()
794
795     def checkpid(self):            
796         # Get PID/PPID
797         # NOTE: wait a bit for the pidfile to be created
798         if self._started and not self._pid or not self._ppid:
799             pidtuple = rspawn.remote_check_pid(
800                 os.path.join(self.home_path,'pid'),
801                 host = self.node.hostname,
802                 port = None,
803                 user = self.node.slicename,
804                 agent = None,
805                 ident_key = self.node.ident_path,
806                 server_key = self.node.server_key
807                 )
808             
809             if pidtuple:
810                 self._pid, self._ppid = pidtuple
811     
812     def status(self):
813         self.checkpid()
814         if not self._started:
815             return AS.STATUS_NOT_STARTED
816         elif not self._pid or not self._ppid:
817             return AS.STATUS_NOT_STARTED
818         else:
819             status = rspawn.remote_status(
820                 self._pid, self._ppid,
821                 host = self.node.hostname,
822                 port = None,
823                 user = self.node.slicename,
824                 agent = None,
825                 ident_key = self.node.ident_path,
826                 server_key = self.node.server_key
827                 )
828             
829             if status is rspawn.NOT_STARTED:
830                 return AS.STATUS_NOT_STARTED
831             elif status is rspawn.RUNNING:
832                 return AS.STATUS_RUNNING
833             elif status is rspawn.FINISHED:
834                 return AS.STATUS_FINISHED
835             else:
836                 # WTF?
837                 return AS.STATUS_NOT_STARTED
838     
839     def kill(self):
840         status = self.status()
841         if status == AS.STATUS_RUNNING:
842             # kill by ppid+pid - SIGTERM first, then try SIGKILL
843             rspawn.remote_kill(
844                 self._pid, self._ppid,
845                 host = self.node.hostname,
846                 port = None,
847                 user = self.node.slicename,
848                 agent = None,
849                 ident_key = self.node.ident_path,
850                 server_key = self.node.server_key,
851                 sudo = self.sudo
852                 )
853             self._logger.info("Killed %s", self)
854
855
856 class NepiDependency(Dependency):
857     """
858     This dependency adds nepi itself to the python path,
859     so that you may run testbeds within PL nodes.
860     """
861     
862     # Class attribute holding a *weak* reference to the shared NEPI tar file
863     # so that they may share it. Don't operate on the file itself, it would
864     # be a mess, just use its path.
865     _shared_nepi_tar = None
866     
867     def __init__(self, api = None):
868         super(NepiDependency, self).__init__(api)
869         
870         self._tarball = None
871         
872         self.depends = 'python python-ipaddr python-setuptools'
873         
874         # our sources are in our ad-hoc tarball
875         self.sources = self.tarball.name
876         
877         tarname = os.path.basename(self.tarball.name)
878         
879         # it's already built - just move the tarball into place
880         self.build = "mv -f ${SOURCES}/%s ." % (tarname,)
881         
882         # unpack it into sources, and we're done
883         self.install = "tar xzf ${BUILD}/%s -C .." % (tarname,)
884     
885     @property
886     def tarball(self):
887         if self._tarball is None:
888             shared_tar = self._shared_nepi_tar and self._shared_nepi_tar()
889             if shared_tar is not None:
890                 self._tarball = shared_tar
891             else:
892                 # Build an ad-hoc tarball
893                 # Prebuilt
894                 import nepi
895                 import tempfile
896                 
897                 shared_tar = tempfile.NamedTemporaryFile(prefix='nepi-src-', suffix='.tar.gz')
898                 
899                 proc = subprocess.Popen(
900                     ["tar", "czf", shared_tar.name, 
901                         '-C', os.path.join(os.path.dirname(os.path.dirname(nepi.__file__)),'.'), 
902                         'nepi'],
903                     stdout = open("/dev/null","w"),
904                     stdin = open("/dev/null","r"))
905
906                 if proc.wait():
907                     raise RuntimeError, "Failed to create nepi tarball"
908                 
909                 self._tarball = self._shared_nepi_tar = shared_tar
910                 
911         return self._tarball
912
913 class NS3Dependency(Dependency):
914     """
915     This dependency adds NS3 libraries to the library paths,
916     so that you may run the NS3 testbed within PL nodes.
917     
918     You'll also need the NepiDependency.
919     """
920     
921     def __init__(self, api = None):
922         super(NS3Dependency, self).__init__(api)
923         
924         self.buildDepends = 'make waf gcc gcc-c++ gccxml unzip bzr'
925         
926         # We have to download the sources, untar, build...
927         #pygccxml_source_url = "http://leaseweb.dl.sourceforge.net/project/pygccxml/pygccxml/pygccxml-1.0/pygccxml-1.0.0.zip"
928         pygccxml_source_url = "http://yans.pl.sophia.inria.fr/libs/pygccxml-1.0.0.zip"
929         ns3_source_url = "http://nepi.inria.fr/code/nepi-ns3.13/archive/tip.tar.gz"
930         passfd_source_url = "http://nepi.inria.fr/code/python-passfd/archive/tip.tar.gz"
931         
932         pybindgen_version = "797"
933
934         self.build =(
935             " ( "
936             "  cd .. && "
937             "  python -c 'import pygccxml, pybindgen, passfd' && "
938             "  test -f lib/ns/_core.so && "
939             "  test -f lib/ns/__init__.py && "
940             "  test -f lib/ns/core.py && "
941             "  test -f lib/libns3-core.so && "
942             "  LD_LIBRARY_PATH=lib PYTHONPATH=lib python -c 'import ns.core' "
943             " ) || ( "
944                 # Not working, rebuild
945                      # Archive SHA1 sums to check
946                      "echo '7158877faff2254e6c094bf18e6b4283cac19137  pygccxml-1.0.0.zip' > archive_sums.txt && "
947                      " ( " # check existing files
948                      " sha1sum -c archive_sums.txt && "
949                      " test -f passfd-src.tar.gz && "
950                      " test -f ns3-src.tar.gz "
951                      " ) || ( " # nope? re-download
952                      " rm -rf pybindgen pygccxml-1.0.0.zip passfd-src.tar.gz ns3-src.tar.gz && "
953                      " bzr checkout lp:pybindgen -r %(pybindgen_version)s && " # continue, to exploit the case when it has already been dl'ed
954                      " wget -q -c -O pygccxml-1.0.0.zip %(pygccxml_source_url)s && " 
955                      " wget -q -c -O passfd-src.tar.gz %(passfd_source_url)s && "
956                      " wget -q -c -O ns3-src.tar.gz %(ns3_source_url)s && "  
957                      " sha1sum -c archive_sums.txt " # Check SHA1 sums when applicable
958                      " ) && "
959                      "unzip -n pygccxml-1.0.0.zip && "
960                      "mkdir -p ns3-src && "
961                      "mkdir -p passfd-src && "
962                      "tar xzf ns3-src.tar.gz --strip-components=1 -C ns3-src && "
963                      "tar xzf passfd-src.tar.gz --strip-components=1 -C passfd-src && "
964                      "rm -rf target && "    # mv doesn't like unclean targets
965                      "mkdir -p target && "
966                      "cd pygccxml-1.0.0 && "
967                      "rm -rf unittests docs && " # pygccxml has ~100M of unit tests - excessive - docs aren't needed either
968                      "python setup.py build && "
969                      "python setup.py install --install-lib ${BUILD}/target && "
970                      "python setup.py clean && "
971                      "cd ../pybindgen && "
972                      "export PYTHONPATH=$PYTHONPATH:${BUILD}/target && "
973                      "./waf configure --prefix=${BUILD}/target -d release && "
974                      "./waf && "
975                      "./waf install && "
976                      "./waf clean && "
977                      "mv -f ${BUILD}/target/lib/python*/site-packages/pybindgen ${BUILD}/target/. && "
978                      "rm -rf ${BUILD}/target/lib && "
979                      "cd ../passfd-src && "
980                      "python setup.py build && "
981                      "python setup.py install --install-lib ${BUILD}/target && "
982                      "python setup.py clean && "
983                      "cd ../ns3-src && "
984                      "./waf configure --prefix=${BUILD}/target --with-pybindgen=../pybindgen-src -d release --disable-examples --disable-tests && "
985                      "./waf &&"
986                      "./waf install && "
987                      "rm -f ${BUILD}/target/lib/*.so && "
988                      "cp -a ${BUILD}/ns3-src/build/libns3*.so ${BUILD}/target/lib && "
989                      "cp -a ${BUILD}/ns3-src/build/bindings/python/ns ${BUILD}/target/lib &&"
990                      "./waf clean "
991              " )"
992                      % dict(
993                         pybindgen_version = server.shell_escape(pybindgen_version),
994                         pygccxml_source_url = server.shell_escape(pygccxml_source_url),
995                         ns3_source_url = server.shell_escape(ns3_source_url),
996                         passfd_source_url = server.shell_escape(passfd_source_url),
997                      ))
998         
999         # Just move ${BUILD}/target
1000         self.install = (
1001             " ( "
1002             "  cd .. && "
1003             "  python -c 'import pygccxml, pybindgen, passfd' && "
1004             "  test -f lib/ns/_core.so && "
1005             "  test -f lib/ns/__init__.py && "
1006             "  test -f lib/ns/core.py && "
1007             "  test -f lib/libns3-core.so && "
1008             "  LD_LIBRARY_PATH=lib PYTHONPATH=lib python -c 'import ns.core' "
1009             " ) || ( "
1010                 # Not working, reinstall
1011                     "test -d ${BUILD}/target && "
1012                     "[[ \"x\" != \"x$(find ${BUILD}/target -mindepth 1 -print -quit)\" ]] &&"
1013                     "( for i in ${BUILD}/target/* ; do rm -rf ${SOURCES}/${i##*/} ; done ) && " # mv doesn't like unclean targets
1014                     "mv -f ${BUILD}/target/* ${SOURCES}"
1015             " )"
1016         )
1017         
1018         # Set extra environment paths
1019         self.env['NEPI_NS3BINDINGS'] = "${SOURCES}/lib"
1020         self.env['NEPI_NS3LIBRARY'] = "${SOURCES}/lib"
1021     
1022     @property
1023     def tarball(self):
1024         if self._tarball is None:
1025             shared_tar = self._shared_nepi_tar and self._shared_nepi_tar()
1026             if shared_tar is not None:
1027                 self._tarball = shared_tar
1028             else:
1029                 # Build an ad-hoc tarball
1030                 # Prebuilt
1031                 import nepi
1032                 import tempfile
1033                 
1034                 shared_tar = tempfile.NamedTemporaryFile(prefix='nepi-src-', suffix='.tar.gz')
1035                 
1036                 proc = subprocess.Popen(
1037                     ["tar", "czf", shared_tar.name, 
1038                         '-C', os.path.join(os.path.dirname(os.path.dirname(nepi.__file__)),'.'), 
1039                         'nepi'],
1040                     stdout = open("/dev/null","w"),
1041                     stdin = open("/dev/null","r"))
1042
1043                 if proc.wait():
1044                     raise RuntimeError, "Failed to create nepi tarball"
1045                 
1046                 self._tarball = self._shared_nepi_tar = shared_tar
1047                 
1048         return self._tarball
1049
1050 class YumDependency(Dependency):
1051     """
1052     This dependency is an internal helper class used to
1053     efficiently distribute yum-downloaded rpms.
1054     
1055     It temporarily sets the yum cache as persistent in the
1056     build master, and installs all the required packages.
1057     
1058     The rpm packages left in the yum cache are gathered and
1059     distributed by the underlying Dependency in an efficient
1060     manner. Build slaves will then install those rpms back in
1061     the cache before issuing the install command.
1062     
1063     When packages have been installed already, nothing but an
1064     empty tar is distributed.
1065     """
1066     
1067     # Class attribute holding a *weak* reference to the shared NEPI tar file
1068     # so that they may share it. Don't operate on the file itself, it would
1069     # be a mess, just use its path.
1070     _shared_nepi_tar = None
1071     
1072     def _build_get(self):
1073         # canonical representation of dependencies
1074         depends = ' '.join( sorted( (self.depends or "").split(' ') ) )
1075         
1076         # download rpms and pack into a tar archive
1077         return (
1078             "sudo -S nice yum -y makecache && "
1079             "sudo -S sed -i -r 's/keepcache *= *0/keepcache=1/' /etc/yum.conf && "
1080             " ( ( "
1081                 "sudo -S nice yum -y install %s ; "
1082                 "rm -f ${BUILD}/packages.tar ; "
1083                 "tar -C /var/cache/yum -rf ${BUILD}/packages.tar $(cd /var/cache/yum ; find -iname '*.rpm')"
1084             " ) || /bin/true ) && "
1085             "sudo -S sed -i -r 's/keepcache *= *1/keepcache=0/' /etc/yum.conf && "
1086             "( sudo -S nice yum -y clean packages || /bin/true ) "
1087         ) % ( depends, )
1088     def _build_set(self, value):
1089         # ignore
1090         return
1091     build = property(_build_get, _build_set)
1092     
1093     def _install_get(self):
1094         # canonical representation of dependencies
1095         depends = ' '.join( sorted( (self.depends or "").split(' ') ) )
1096         
1097         # unpack cached rpms into yum cache, install, and cleanup
1098         return (
1099             "sudo -S tar -k --keep-newer-files -C /var/cache/yum -xf packages.tar && "
1100             "sudo -S nice yum -y install %s && "
1101             "( sudo -S nice yum -y clean packages || /bin/true ) "
1102         ) % ( depends, )
1103     def _install_set(self, value):
1104         # ignore
1105         return
1106     install = property(_install_get, _install_set)
1107         
1108     def check_bad_host(self, out, err):
1109         badre = re.compile(r'(?:'
1110                            r'The GPG keys listed for the ".*" repository are already installed but they are not correct for this package'
1111                            r'|Error: Cannot retrieve repository metadata (repomd.xml) for repository: .*[.] Please verify its path and try again'
1112                            r'|Error: disk I/O error'
1113                            r'|MASTER NODE UNREACHABLE'
1114                            r')', 
1115                            re.I)
1116         return badre.search(out) or badre.search(err) or self.node.check_bad_host(out,err)
1117
1118
1119 class CCNxDaemon(Application):
1120     """
1121     An application also has dependencies, but also a command to be ran and monitored.
1122     
1123     It adds the output of that command as traces.
1124     """
1125     
1126     def __init__(self, api=None):
1127         super(CCNxDaemon,self).__init__(api)
1128         
1129         # Attributes
1130         self.ccnLocalPort = None
1131         self.ccnRoutes = None
1132         self.ccnxVersion = "ccnx-0.6.0"
1133         
1134         #self.ccnx_0_5_1_sources = "http://www.ccnx.org/releases/ccnx-0.5.1.tar.gz"
1135         self.ccnx_0_5_1_sources = "http://yans.pl.sophia.inria.fr/libs/ccnx-0.5.1.tar.gz"
1136         #self.ccnx_0_6_0_sources = "http://www.ccnx.org/releases/ccnx-0.6.0.tar.gz"
1137         self.ccnx_0_6_0_sources = "http://yans.pl.sophia.inria.fr/libs/ccnx-0.6.0.tar.gz"
1138         #self.buildDepends = 'make gcc development-tools openssl-devel expat-devel libpcap-devel libxml2-devel'
1139         self.buildDepends = 'make gcc openssl-devel expat-devel libpcap-devel libxml2-devel'
1140
1141         self.ccnx_0_5_1_build = (
1142             " ( "
1143             "  cd .. && "
1144             "  test -d ccnx-0.5.1-src/build/bin "
1145             " ) || ( "
1146                 # Not working, rebuild
1147                 "("
1148                      " mkdir -p ccnx-0.5.1-src && "
1149                      " wget -q -c -O ccnx-0.5.1-src.tar.gz %(ccnx_source_url)s &&"
1150                      " tar xf ccnx-0.5.1-src.tar.gz --strip-components=1 -C ccnx-0.5.1-src "
1151                 ") && "
1152                      "cd ccnx-0.5.1-src && "
1153                      "mkdir -p build/include &&"
1154                      "mkdir -p build/lib &&"
1155                      "mkdir -p build/bin &&"
1156                      "I=$PWD/build && "
1157                      "INSTALL_BASE=$I ./configure &&"
1158                      "make && make install"
1159              " )") % dict(
1160                      ccnx_source_url = server.shell_escape(self.ccnx_0_5_1_sources),
1161                 )
1162
1163         self.ccnx_0_5_1_install = (
1164             " ( "
1165             "  test -d ${BUILD}/ccnx-0.5.1-src/build/bin && "
1166             "  cp -r ${BUILD}/ccnx-0.5.1-src/build/bin ${SOURCES}"
1167             " )"
1168         )
1169
1170         self.ccnx_0_6_0_build = (
1171             " ( "
1172             "  cd .. && "
1173             "  test -d ccnx-0.6.0-src/build/bin "
1174             " ) || ( "
1175                 # Not working, rebuild
1176                 "("
1177                      " mkdir -p ccnx-0.6.0-src && "
1178                      " wget -q -c -O ccnx-0.6.0-src.tar.gz %(ccnx_source_url)s &&"
1179                      " tar xf ccnx-0.6.0-src.tar.gz --strip-components=1 -C ccnx-0.6.0-src "
1180                 ") && "
1181                      "cd ccnx-0.6.0-src && "
1182                      "./configure && make"
1183              " )") % dict(
1184                      ccnx_source_url = server.shell_escape(self.ccnx_0_6_0_sources),
1185                 )
1186
1187         self.ccnx_0_6_0_install = (
1188             " ( "
1189             "  test -d ${BUILD}/ccnx-0.6.0-src/bin && "
1190             "  cp -r ${BUILD}/ccnx-0.6.0-src/bin ${SOURCES}"
1191             " )"
1192         )
1193
1194         self.env['PATH'] = "$PATH:${SOURCES}/bin"
1195
1196     def setup(self):
1197         # setting ccn sources
1198         if not self.build:
1199             if self.ccnxVersion == 'ccnx-0.6.0':
1200                 self.build = self.ccnx_0_6_0_build
1201             elif self.ccnxVersion == 'ccnx-0.5.1':
1202                 self.build = self.ccnx_0_5_1_build
1203
1204         if not self.install:
1205             if self.ccnxVersion == 'ccnx-0.6.0':
1206                 self.install = self.ccnx_0_6_0_install
1207             elif self.ccnxVersion == 'ccnx-0.5.1':
1208                 self.install = self.ccnx_0_5_1_install
1209
1210         super(CCNxDaemon, self).setup()
1211
1212     def start(self):
1213         self.command = ""
1214         if self.ccnLocalPort:
1215             self.command = "export CCN_LOCAL_PORT=%s ; " % self.ccnLocalPort
1216         self.command += " ccndstart "
1217
1218         # configure ccn routes
1219         if self.ccnRoutes:
1220             routes = self.ccnRoutes.split("|")
1221             
1222             if self.ccnLocalPort:
1223                 routes = map(lambda route: "%s %s" %(route, 
1224                     self.ccnLocalPort) if _ccnre.match(route) else route, 
1225                         routes)
1226
1227             routes = map(lambda route: "ccndc add ccnx:/ %s" % route, 
1228                 routes)
1229
1230             routescmd = " ; ".join(routes)
1231             self.command += " ; "
1232             self.command += routescmd
1233
1234         # Start will be invoked in prestart step
1235         super(CCNxDaemon, self).start()
1236             
1237     def kill(self):
1238         self._logger.info("Killing %s", self)
1239
1240         command = "${SOURCES}/bin/ccndstop"
1241
1242         if self.ccnLocalPort:
1243             self.command = "export CCN_LOCAL_PORT=%s; %s" % (self.ccnLocalPort, command)
1244
1245         cmd = self._replace_paths(command)
1246         command = cStringIO.StringIO()
1247         command.write(cmd)
1248         command.seek(0)
1249
1250         try:
1251             self._popen_scp(
1252                 command,
1253                 '%s@%s:%s' % (self.node.slicename, self.node.hostname, 
1254                     os.path.join(self.home_path, "kill.sh"))
1255                 )
1256         except RuntimeError, e:
1257             raise RuntimeError, "Failed to kill ccndxdaemon: %s %s" \
1258                     % (e.args[0], e.args[1],)
1259         
1260
1261         script = "bash ./kill.sh"
1262         (out,err),proc = rspawn.remote_spawn(
1263             script,
1264             pidfile = 'kill-pid',
1265             home = self.home_path,
1266             stdin = '/dev/null',
1267             stdout = 'killlog',
1268             stderr = rspawn.STDOUT,
1269             
1270             host = self.node.hostname,
1271             port = None,
1272             user = self.node.slicename,
1273             agent = None,
1274             ident_key = self.node.ident_path,
1275             server_key = self.node.server_key,
1276             hostip = self.node.hostip,
1277             )
1278         
1279         if proc.wait():
1280             raise RuntimeError, "Failed to kill cnnxdaemon: %s %s" % (out,err,)
1281         
1282         super(CCNxDaemon, self).kill()
1283