349ddc27147e43d52a58d4a1828c1be3c5a7da16
[nepi.git] / src / nepi / resources / linux / udptunnel.py
1 #
2 #    NEPI, a framework to manage network experiments
3 #    Copyright (C) 2013 INRIA
4 #
5 #    This program is free software: you can redistribute it and/or modify
6 #    it under the terms of the GNU General Public License as published by
7 #    the Free Software Foundation, either version 3 of the License, or
8 #    (at your option) any later version.
9 #
10 #    This program is distributed in the hope that it will be useful,
11 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
12 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 #    GNU General Public License for more details.
14 #
15 #    You should have received a copy of the GNU General Public License
16 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 #
18 # Author: Alina Quereilhac <alina.quereilhac@inria.fr>
19
20 from nepi.execution.attribute import Attribute, Flags, Types
21 from nepi.execution.resource import ResourceManager, clsinit_copy, ResourceState, \
22         reschedule_delay
23 from nepi.resources.linux.application import LinuxApplication
24 from nepi.util.sshfuncs import ProcStatus
25 from nepi.util.timefuncs import tnow, tdiffsec
26
27 import os
28 import socket
29 import time
30
31 @clsinit_copy
32 class UdpTunnel(LinuxApplication):
33     _rtype = "UdpTunnel"
34
35     @classmethod
36     def _register_attributes(cls):
37         cipher = Attribute("cipher",
38                "Cipher to encript communication. "
39                 "One of PLAIN, AES, Blowfish, DES, DES3. ",
40                 default = None,
41                 allowed = ["PLAIN", "AES", "Blowfish", "DES", "DES3"],
42                 type = Types.Enumerate, 
43                 flags = Flags.ExecReadOnly)
44
45         cipher_key = Attribute("cipherKey",
46                 "Specify a symmetric encryption key with which to protect "
47                 "packets across the tunnel. python-crypto must be installed "
48                 "on the system." ,
49                 flags = Flags.ExecReadOnly)
50
51         txqueuelen = Attribute("txQueueLen",
52                 "Specifies the interface's transmission queue length. "
53                 "Defaults to 1000. ", 
54                 type = Types.Integer, 
55                 flags = Flags.ExecReadOnly)
56
57         bwlimit = Attribute("bwLimit",
58                 "Specifies the interface's emulated bandwidth in bytes "
59                 "per second.",
60                 type = Types.Integer, 
61                 flags = Flags.ExecReadOnly)
62
63         cls._register_attribute(cipher)
64         cls._register_attribute(cipher_key)
65         cls._register_attribute(txqueuelen)
66         cls._register_attribute(bwlimit)
67
68     def __init__(self, ec, guid):
69         super(UdpTunnel, self).__init__(ec, guid)
70         self._home = "udp-tunnel-%s" % self.guid
71         self._pid1 = None
72         self._ppid1 = None
73         self._pid2 = None
74         self._ppid2 = None
75
76     def log_message(self, msg):
77         return " guid %d - tunnel %s - %s - %s " % (self.guid, 
78                 self.endpoint1.node.get("hostname"), 
79                 self.endpoint2.node.get("hostname"), 
80                 msg)
81
82     def get_endpoints(self):
83         """ Returns the list of RM that are endpoints to the tunnel 
84         """
85         connected = []
86         for guid in self.connections:
87             rm = self.ec.get_resource(guid)
88             if hasattr(rm, "udp_connect_command"):
89                 connected.append(rm)
90         return connected
91
92     @property
93     def endpoint1(self):
94         endpoints = self.get_endpoints()
95         if endpoints: return endpoints[0]
96         return None
97
98     @property
99     def endpoint2(self):
100         endpoints = self.get_endpoints()
101         if endpoints and len(endpoints) > 1: return endpoints[1]
102         return None
103
104     def app_home(self, endpoint):
105         return os.path.join(endpoint.node.exp_home, self._home)
106
107     def run_home(self, endpoint):
108         return os.path.join(self.app_home(endpoint), self.ec.run_id)
109
110     def udp_connect(self, endpoint, remote_ip):
111         # Get udp connect command
112         local_port_file = os.path.join(self.run_home(endpoint), 
113                 "local_port")
114         remote_port_file = os.path.join(self.run_home(endpoint), 
115                 "remote_port")
116         ret_file = os.path.join(self.run_home(endpoint), 
117                 "ret_file")
118         cipher = self.get("cipher")
119         cipher_key = self.get("cipherKey")
120         bwlimit = self.get("bwLimit")
121         txqueuelen = self.get("txQueueLen")
122         udp_connect_command = endpoint.udp_connect_command(
123                 remote_ip, local_port_file, remote_port_file,
124                 ret_file, cipher, cipher_key, bwlimit, txqueuelen)
125
126         # upload command to connect.sh script
127         shfile = os.path.join(self.app_home(endpoint), "udp-connect.sh")
128         endpoint.node.upload(udp_connect_command,
129                 shfile,
130                 text = True, 
131                 overwrite = False)
132
133         # invoke connect script
134         cmd = "bash %s" % shfile
135         (out, err), proc = endpoint.node.run(cmd, self.run_home(endpoint)) 
136              
137         # check if execution errors occurred
138         msg = " Failed to connect endpoints "
139         
140         if proc.poll():
141             self.fail()
142             self.error(msg, out, err)
143             raise RuntimeError, msg
144     
145         # Wait for pid file to be generated
146         pid, ppid = endpoint.node.wait_pid(self.run_home(endpoint))
147         
148         # If the process is not running, check for error information
149         # on the remote machine
150         if not pid or not ppid:
151             (out, err), proc = endpoint.node.check_errors(self.run_home(endpoint))
152             # Out is what was written in the stderr file
153             if err:
154                 self.fail()
155                 msg = " Failed to start command '%s' " % command
156                 self.error(msg, out, err)
157                 raise RuntimeError, msg
158
159         # wait until port is written to file
160         port = self.wait_local_port(endpoint)
161         return (port, pid, ppid)
162
163     def provision(self):
164         # create run dir for tunnel on each node 
165         self.endpoint1.node.mkdir(self.run_home(self.endpoint1))
166         self.endpoint2.node.mkdir(self.run_home(self.endpoint2))
167
168         # Invoke connect script in endpoint 1
169         remote_ip1 = socket.gethostbyname(self.endpoint2.node.get("hostname"))
170         (port1, self._pid1, self._ppid1) = self.udp_connect(self.endpoint1,
171                 remote_ip1)
172
173         # Invoke connect script in endpoint 2
174         remote_ip2 = socket.gethostbyname(self.endpoint1.node.get("hostname"))
175         (port2, self._pid2, self._ppid2) = self.udp_connect(self.endpoint2,
176                 remote_ip2)
177
178         # upload file with port 2 to endpoint 1
179         self.upload_remote_port(self.endpoint1, port2)
180         
181         # upload file with port 1 to endpoint 2
182         self.upload_remote_port(self.endpoint2, port1)
183
184         # check if connection was successful on both sides
185         self.wait_result(self.endpoint1)
186         self.wait_result(self.endpoint2)
187        
188         self.info("Provisioning finished")
189  
190         self.debug("----- READY ---- ")
191         self._provision_time = tnow()
192         self._state = ResourceState.PROVISIONED
193
194     def deploy(self):
195         if (not self.endpoint1 or self.endpoint1.state < ResourceState.READY) or \
196             (not self.endpoint2 or self.endpoint2.state < ResourceState.READY):
197             self.ec.schedule(reschedule_delay, self.deploy)
198         else:
199             try:
200                 self.discover()
201                 self.provision()
202             except:
203                 self.fail()
204                 raise
205  
206             self.debug("----- READY ---- ")
207             self._ready_time = tnow()
208             self._state = ResourceState.READY
209
210     def start(self):
211         if self._state == ResourceState.READY:
212             command = self.get("command")
213             self.info("Starting command '%s'" % command)
214
215             self._start_time = tnow()
216             self._state = ResourceState.STARTED
217         else:
218             msg = " Failed to execute command '%s'" % command
219             self.error(msg, out, err)
220             self._state = ResourceState.FAILED
221             raise RuntimeError, msg
222
223     # XXX: Leaves process unkilled!! 
224     #       Implement another mechanism to kill the tunnel!
225     def stop(self):
226         """ Stops application execution
227         """
228         if self.state == ResourceState.STARTED:
229             stopped = True
230             self.info("Stopping tunnel")
231     
232             # Only try to kill the process if the pid and ppid
233             # were retrieved
234             if self._pid1 and self._ppid1 and self._pid2 and self._ppid2:
235                 (out1, err1), proc1 = self.endpoint1.node.kill(self._pid1,
236                         self._ppid1, sudo = True) 
237                 (out2, err2), proc2 = self.endpoint2.node.kill(self._pid2, 
238                         self._ppid2, sudo = True) 
239
240                 if err1 or err2 or proc1.poll() or proc2.poll():
241                     # check if execution errors occurred
242                     msg = " Failed to STOP tunnel"
243                     self.error(msg, err1, err2)
244                     self.fail()
245                     stopped = False
246
247             if stopped:
248                 self._stop_time = tnow()
249                 self._state = ResourceState.STOPPED
250
251     @property
252     def state(self):
253         """ Returns the state of the application
254         """
255         if self._state == ResourceState.STARTED:
256             # In order to avoid overwhelming the remote host and
257             # the local processor with too many ssh queries, the state is only
258             # requested every 'state_check_delay' seconds.
259             state_check_delay = 0.5
260             if tdiffsec(tnow(), self._last_state_check) > state_check_delay:
261                 if self._pid1 and self._ppid1 and self._pid2 and self._ppid2:
262                     # Make sure the process is still running in background
263                     # No execution errors occurred. Make sure the background
264                     # process with the recorded pid is still running.
265                     status1 = self.endpoint1.node.status(self._pid1, self._ppid1)
266                     status2 = self.endpoint2.node.status(self._pid2, self._ppid2)
267
268                     if status1 == ProcStatus.FINISHED and \
269                             status2 == ProcStatus.FINISHED:
270
271                         # check if execution errors occurred
272                         (out1, err1), proc1 = self.endpoint1.node.check_errors(
273                                 self.run_home(self.endpoint1))
274
275                         (out2, err2), proc2 = self.endpoint2.node.check_errors(
276                                 self.run_home(self.endpoint2))
277
278                         if err1 or err2: 
279                             msg = "Error occurred in tunnel"
280                             self.error(msg, err1, err2)
281                             self.fail()
282                         else:
283                             self._state = ResourceState.FINISHED
284
285                 self._last_state_check = tnow()
286
287         return self._state
288
289     def wait_local_port(self, endpoint):
290         """ Waits until the local_port file for the endpoint is generated, 
291             and returns the port number """
292         return self.wait_file(endpoint, "local_port")
293
294     def wait_result(self, endpoint):
295         """ Waits until the return code file for the endpoint is generated """ 
296         return self.wait_file(endpoint, "ret_file")
297  
298     def wait_file(self, endpoint, filename):
299         """ Waits until file on endpoint is generated """
300         result = None
301         delay = 1.0
302
303         for i in xrange(4):
304             (out, err), proc = endpoint.node.check_output(
305                     self.run_home(endpoint), filename)
306
307             if out:
308                 result = out.strip()
309                 break
310             else:
311                 time.sleep(delay)
312                 delay = delay * 1.5
313         else:
314             msg = "Couldn't retrieve %s" % filename
315             self.error(msg, out, err)
316             self.fail()
317             raise RuntimeError, msg
318
319         return result
320
321     def upload_remote_port(self, endpoint, port):
322         # upload remote port number to file
323         port = "%s\n" % port
324         endpoint.node.upload(port,
325                 os.path.join(self.run_home(endpoint), "remote_port"),
326                 text = True, 
327                 overwrite = False)
328
329     def valid_connection(self, guid):
330         # TODO: Validate!
331         return True
332