2 # NEPI, a framework to manage network experiments
3 # Copyright (C) 2013 INRIA
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 # Author: Alina Quereilhac <alina.quereilhac@inria.fr>
19 # Lucia Guevgeozian <lucia.guevgeozian_odizzio@inria.fr>
21 from nepi.execution.attribute import Attribute, Flags, Types
22 from nepi.execution.resource import ResourceManager, clsinit_copy, \
23 ResourceState, reschedule_delay
24 from nepi.resources.linux.node import LinuxNode
25 from nepi.resources.planetlab.plcapi import PLCAPIFactory
26 from nepi.util.execfuncs import lexec
27 from nepi.util import sshfuncs
29 from random import randint
39 class PlanetlabNode(LinuxNode):
40 _rtype = "PlanetlabNode"
41 _help = "Controls a PlanetLab host accessible using a SSH key " \
42 "associated to a PlanetLab user account"
43 _backend = "planetlab"
45 lock = threading.Lock()
48 def _register_attributes(cls):
49 ip = Attribute("ip", "PlanetLab host public IP address",
52 pl_url = Attribute("plcApiUrl", "URL of PlanetLab PLCAPI host \
53 (e.g. www.planet-lab.eu or www.planet-lab.org) ",
54 default = "www.planet-lab.eu",
55 flags = Flags.Credential)
57 pl_ptn = Attribute("plcApiPattern", "PLC API service regexp pattern \
58 (e.g. https://%(hostname)s:443/PLCAPI/ ) ",
59 default = "https://%(hostname)s:443/PLCAPI/",
62 pl_user = Attribute("pluser", "PlanetLab account user, as the one to \
63 authenticate in the website) ",
64 flags = Flags.Credential)
66 pl_password = Attribute("plpassword",
67 "PlanetLab account password, as \
68 the one to authenticate in the website) ",
69 flags = Flags.Credential)
71 city = Attribute("city", "Constrain location (city) during resource \
72 discovery. May use wildcards.",
75 country = Attribute("country", "Constrain location (country) during \
76 resource discovery. May use wildcards.",
79 region = Attribute("region", "Constrain location (region) during \
80 resource discovery. May use wildcards.",
83 architecture = Attribute("architecture", "Constrain architecture \
84 during resource discovery.",
85 type = Types.Enumerate,
90 operating_system = Attribute("operatingSystem", "Constrain operating \
91 system during resource discovery.",
92 type = Types.Enumerate,
100 min_reliability = Attribute("minReliability", "Constrain reliability \
101 while picking PlanetLab nodes. Specifies a lower \
105 flags = Flags.Filter)
107 max_reliability = Attribute("maxReliability", "Constrain reliability \
108 while picking PlanetLab nodes. Specifies an upper \
112 flags = Flags.Filter)
114 min_bandwidth = Attribute("minBandwidth", "Constrain available \
115 bandwidth while picking PlanetLab nodes. \
116 Specifies a lower acceptable bound.",
119 flags = Flags.Filter)
121 max_bandwidth = Attribute("maxBandwidth", "Constrain available \
122 bandwidth while picking PlanetLab nodes. \
123 Specifies an upper acceptable bound.",
126 flags = Flags.Filter)
128 min_load = Attribute("minLoad", "Constrain node load average while \
129 picking PlanetLab nodes. Specifies a lower acceptable \
133 flags = Flags.Filter)
135 max_load = Attribute("maxLoad", "Constrain node load average while \
136 picking PlanetLab nodes. Specifies an upper acceptable \
140 flags = Flags.Filter)
142 min_cpu = Attribute("minCpu", "Constrain available cpu time while \
143 picking PlanetLab nodes. Specifies a lower acceptable \
147 flags = Flags.Filter)
149 max_cpu = Attribute("maxCpu", "Constrain available cpu time while \
150 picking PlanetLab nodes. Specifies an upper acceptable \
154 flags = Flags.Filter)
156 timeframe = Attribute("timeframe", "Past time period in which to check\
157 information about the node. Values are year,month, \
160 type = Types.Enumerate,
165 flags = Flags.Filter)
167 plblacklist = Attribute("persist_blacklist", "Take into account the file plblacklist \
168 in the user's home directory under .nepi directory. This file \
169 contains a list of PL nodes to blacklist, and at the end \
170 of the experiment execution the new blacklisted nodes are added.",
173 flags = Flags.Global)
176 cls._register_attribute(ip)
177 cls._register_attribute(pl_url)
178 cls._register_attribute(pl_ptn)
179 cls._register_attribute(pl_user)
180 cls._register_attribute(pl_password)
181 cls._register_attribute(city)
182 cls._register_attribute(country)
183 cls._register_attribute(region)
184 cls._register_attribute(architecture)
185 cls._register_attribute(operating_system)
186 cls._register_attribute(min_reliability)
187 cls._register_attribute(max_reliability)
188 cls._register_attribute(min_bandwidth)
189 cls._register_attribute(max_bandwidth)
190 cls._register_attribute(min_load)
191 cls._register_attribute(max_load)
192 cls._register_attribute(min_cpu)
193 cls._register_attribute(max_cpu)
194 cls._register_attribute(timeframe)
195 cls._register_attribute(plblacklist)
197 def __init__(self, ec, guid):
198 super(PlanetlabNode, self).__init__(ec, guid)
200 self._ecobj = weakref.ref(ec)
202 self._node_to_provision = None
203 self._slicenode = False
204 self._hostname = False
206 if self.get("gateway") or self.get("gatewayUser"):
207 self.set("gateway", None)
208 self.set("gatewayUser", None)
211 nepi_home = os.path.join(os.path.expanduser("~"), ".nepi")
212 plblacklist_file = os.path.join(nepi_home, "plblacklist.txt")
213 if not os.path.exists(plblacklist_file):
214 if os.path.isdir(nepi_home):
215 open(plblacklist_file, 'w').close()
217 os.makedirs(nepi_home)
218 open(plblacklist_file, 'w').close()
220 def _skip_provision(self):
221 pl_user = self.get("pluser")
222 pl_pass = self.get("plpassword")
223 if not pl_user and not pl_pass:
230 pl_user = self.get("pluser")
231 pl_pass = self.get("plpassword")
232 pl_url = self.get("plcApiUrl")
233 pl_ptn = self.get("plcApiPattern")
234 _plapi = PLCAPIFactory.get_api(pl_user, pl_pass, pl_url,
235 pl_ptn, self._ecobj())
240 self._plapi = weakref.ref(_plapi)
244 def do_discover(self):
246 Based on the attributes defined by the user, discover the suitable
249 if self._skip_provision():
250 super(PlanetlabNode, self).do_discover()
253 hostname = self._get_hostname()
255 # the user specified one particular node to be provisioned
256 self._hostname = True
257 node_id = self._get_nodes_id({'hostname':hostname})
258 node_id = node_id.pop()['node_id']
260 # check that the node is not blacklisted or being provisioned
262 with PlanetlabNode.lock:
263 plist = self.plapi.reserved()
264 blist = self.plapi.blacklisted()
265 if node_id not in blist and node_id not in plist:
267 # check that is really alive, by performing ping
268 ping_ok = self._do_ping(node_id)
270 self._blacklist_node(node_id)
271 self.fail_node_not_alive(hostname)
273 if self._check_if_in_slice([node_id]):
274 self._slicenode = True
275 self._put_node_in_provision(node_id)
276 self._node_to_provision = node_id
278 self.fail_node_not_available(hostname)
279 super(PlanetlabNode, self).do_discover()
282 # the user specifies constraints based on attributes, zero, one or
283 # more nodes can match these constraints
284 nodes = self._filter_based_on_attributes()
286 # nodes that are already part of user's slice have the priority to
288 nodes_inslice = self._check_if_in_slice(nodes)
289 nodes_not_inslice = list(set(nodes) - set(nodes_inslice))
293 node_id = self._choose_random_node(nodes_inslice)
294 self._slicenode = True
297 # Either there were no matching nodes in the user's slice, or
298 # the nodes in the slice were blacklisted or being provisioned
299 # by other RM. Note nodes_not_inslice is never empty
300 node_id = self._choose_random_node(nodes_not_inslice)
301 self._slicenode = False
304 self._node_to_provision = node_id
306 self._set_hostname_attr(node_id)
307 self.info(" Selected node to provision ")
308 super(PlanetlabNode, self).do_discover()
310 with PlanetlabNode.lock:
311 self._blacklist_node(node_id)
314 self.fail_not_enough_nodes()
316 def do_provision(self):
318 Add node to user's slice after verifing that the node is functioning
321 if self._skip_provision():
322 super(PlanetlabNode, self).do_provision()
330 while not provision_ok:
331 node = self._node_to_provision
332 if not self._slicenode:
333 self._add_node_to_slice(node)
334 if self._check_if_in_slice([node]):
335 self.debug( "Node added to slice" )
337 self.warning(" Could not add to slice ")
338 with PlanetlabNode.lock:
339 self._blacklist_node(node)
343 # check ssh connection
345 while t < timeout and not ssh_ok:
347 cmd = 'echo \'GOOD NODE\''
348 ((out, err), proc) = self.execute(cmd)
349 if out.find("GOOD NODE") < 0:
350 self.debug( "No SSH connection, waiting 60s" )
355 self.debug( "SSH OK" )
359 cmd = 'echo \'GOOD NODE\''
360 ((out, err), proc) = self.execute(cmd)
361 if not out.find("GOOD NODE") < 0:
365 # the timeout was reach without establishing ssh connection
366 # the node is blacklisted, deleted from the slice, and a new
367 # node to provision is discovered
368 with PlanetlabNode.lock:
369 self.warning(" Could not SSH login ")
370 self._blacklist_node(node)
371 #self._delete_node_from_slice(node)
375 # check /proc directory is mounted (ssh_ok = True)
376 # and file system is not read only
378 cmd = 'mount |grep proc'
379 ((out1, err1), proc1) = self.execute(cmd)
380 cmd = 'touch /tmp/tmpfile; rm /tmp/tmpfile'
381 ((out2, err2), proc2) = self.execute(cmd)
382 if out1.find("/proc type proc") < 0 or \
383 "Read-only file system".lower() in err2.lower():
384 with PlanetlabNode.lock:
385 self.warning(" Corrupted file system ")
386 self._blacklist_node(node)
387 #self._delete_node_from_slice(node)
393 if not self.get('hostname'):
394 self._set_hostname_attr(node)
396 ip = self._get_ip(node)
398 self.info(" Node provisioned ")
400 super(PlanetlabNode, self).do_provision()
402 def do_release(self):
403 super(PlanetlabNode, self).do_release()
404 if self.state == ResourceState.RELEASED and not self._skip_provision():
405 self.debug(" Releasing PLC API ")
408 def _filter_based_on_attributes(self):
410 Retrive the list of nodes ids that match user's constraints
412 # Map user's defined attributes with tagnames of PlanetLab
413 timeframe = self.get("timeframe")[0]
416 'country' : 'country',
418 'architecture' : 'arch',
419 'operatingSystem' : 'fcdistro',
420 'minReliability' : 'reliability%s' % timeframe,
421 'maxReliability' : 'reliability%s' % timeframe,
422 'minBandwidth' : 'bw%s' % timeframe,
423 'maxBandwidth' : 'bw%s' % timeframe,
424 'minLoad' : 'load%s' % timeframe,
425 'maxLoad' : 'load%s' % timeframe,
426 'minCpu' : 'cpu%s' % timeframe,
427 'maxCpu' : 'cpu%s' % timeframe,
433 for attr_name, attr_obj in self._attrs.iteritems():
434 attr_value = self.get(attr_name)
436 if attr_value is not None and attr_obj.has_flag(Flags.Filter) and \
437 attr_name != 'timeframe':
439 attr_tag = attr_to_tags[attr_name]
440 filters['tagname'] = attr_tag
442 # filter nodes by fixed constraints e.g. operating system
443 if not 'min' in attr_name and not 'max' in attr_name:
444 filters['value'] = attr_value
445 nodes_id = self._filter_by_fixed_attr(filters, nodes_id)
447 # filter nodes by range constraints e.g. max bandwidth
448 elif ('min' or 'max') in attr_name:
449 nodes_id = self._filter_by_range_attr(attr_name, attr_value, filters, nodes_id)
452 nodes = self._get_nodes_id()
454 nodes_id.append(node['node_id'])
457 def _filter_by_fixed_attr(self, filters, nodes_id):
459 Query PLCAPI for nodes ids matching fixed attributes defined by the
462 node_tags = self.plapi.get_node_tags(filters)
463 if node_tags is not None:
465 if len(nodes_id) == 0:
466 # first attribute being matched
467 for node_tag in node_tags:
468 nodes_id.append(node_tag['node_id'])
470 # remove the nodes ids that don't match the new attribute
471 # that is being match
474 for node_tag in node_tags:
475 if node_tag['node_id'] in nodes_id:
476 nodes_id_tmp.append(node_tag['node_id'])
478 if len(nodes_id_tmp):
479 nodes_id = set(nodes_id) & set(nodes_id_tmp)
481 # no node from before match the new constraint
482 self.fail_discovery()
484 # no nodes match the filter applied
485 self.fail_discovery()
489 def _filter_by_range_attr(self, attr_name, attr_value, filters, nodes_id):
491 Query PLCAPI for nodes ids matching attributes defined in a certain
494 node_tags = self.plapi.get_node_tags(filters)
497 if len(nodes_id) == 0:
498 # first attribute being matched
499 for node_tag in node_tags:
501 # check that matches the min or max restriction
502 if 'min' in attr_name and node_tag['value'] != 'n/a' and \
503 float(node_tag['value']) > attr_value:
504 nodes_id.append(node_tag['node_id'])
506 elif 'max' in attr_name and node_tag['value'] != 'n/a' and \
507 float(node_tag['value']) < attr_value:
508 nodes_id.append(node_tag['node_id'])
511 # remove the nodes ids that don't match the new attribute
512 # that is being match
514 for node_tag in node_tags:
516 # check that matches the min or max restriction and was a
517 # matching previous filters
518 if 'min' in attr_name and node_tag['value'] != 'n/a' and \
519 float(node_tag['value']) > attr_value and \
520 node_tag['node_id'] in nodes_id:
521 nodes_id_tmp.append(node_tag['node_id'])
523 elif 'max' in attr_name and node_tag['value'] != 'n/a' and \
524 float(node_tag['value']) < attr_value and \
525 node_tag['node_id'] in nodes_id:
526 nodes_id_tmp.append(node_tag['node_id'])
528 if len(nodes_id_tmp):
529 nodes_id = set(nodes_id) & set(nodes_id_tmp)
531 # no node from before match the new constraint
532 self.fail_discovery()
535 # no nodes match the filter applied
536 self.fail_discovery()
540 def _choose_random_node(self, nodes):
542 From the possible nodes for provision, choose randomly to decrese the
543 probability of different RMs choosing the same node for provision
548 index = randint(0, size)
549 node_id = nodes[index]
550 nodes[index] = nodes[size]
552 # check the node is not blacklisted or being provision by other RM
553 # and perform ping to check that is really alive
554 with PlanetlabNode.lock:
556 blist = self.plapi.blacklisted()
557 plist = self.plapi.reserved()
558 if node_id not in blist and node_id not in plist:
559 ping_ok = self._do_ping(node_id)
561 self._set_hostname_attr(node_id)
562 self.warning(" Node not responding PING ")
563 self._blacklist_node(node_id)
565 # discovered node for provision, added to provision list
566 self._put_node_in_provision(node_id)
569 def _get_nodes_id(self, filters=None):
570 return self.plapi.get_nodes(filters, fields=['node_id'])
572 def _add_node_to_slice(self, node_id):
573 self.info(" Adding node to slice ")
574 slicename = self.get("username")
575 with PlanetlabNode.lock:
576 slice_nodes = self.plapi.get_slice_nodes(slicename)
577 self.debug(" Previous slice nodes %s " % slice_nodes)
578 slice_nodes.append(node_id)
579 self.plapi.add_slice_nodes(slicename, slice_nodes)
581 def _delete_node_from_slice(self, node):
582 self.warning(" Deleting node from slice ")
583 slicename = self.get("username")
584 self.plapi.delete_slice_node(slicename, [node])
586 def _get_hostname(self):
587 hostname = self.get("hostname")
592 hostname = socket.gethostbyaddr(ip)[0]
593 self.set('hostname', hostname)
598 def _set_hostname_attr(self, node):
600 Query PLCAPI for the hostname of a certain node id and sets the
601 attribute hostname, it will over write the previous value
603 hostname = self.plapi.get_nodes(node, ['hostname'])
604 self.set("hostname", hostname[0]['hostname'])
606 def _check_if_in_slice(self, nodes_id):
608 Query PLCAPI to find out if any node id from nodes_id is in the user's
611 slicename = self.get("username")
612 slice_nodes = self.plapi.get_slice_nodes(slicename)
613 nodes_inslice = list(set(nodes_id) & set(slice_nodes))
616 def _do_ping(self, node_id):
618 Perform ping command on node's IP matching node id
621 ip = self._get_ip(node_id)
623 command = "ping -c4 %s" % ip
624 (out, err) = lexec(command)
626 m = re.search("(\d+)% packet loss", str(out))
627 if m and int(m.groups()[0]) < 50:
632 def _blacklist_node(self, node):
634 Add node mal functioning node to blacklist
636 self.warning(" Blacklisting malfunctioning node ")
637 self.plapi.blacklist_host(node)
638 if not self._hostname:
639 self.set('hostname', None)
641 def _put_node_in_provision(self, node):
643 Add node to the list of nodes being provisioned, in order for other RMs
644 to not try to provision the same one again
646 self.plapi.reserve_host(node)
648 def _get_ip(self, node_id):
650 Query PLCAPI for the IP of a node with certain node id
652 hostname = self.get("hostname") or \
653 self.plapi.get_nodes(node_id, ['hostname'])[0]['hostname']
655 ip = sshfuncs.gethostbyname(hostname)
657 # Fail while trying to find the IP
661 def fail_discovery(self):
662 msg = "Discovery failed. No candidates found for node"
664 raise RuntimeError, msg
666 def fail_node_not_alive(self, hostname=None):
667 msg = "Node %s not alive" % hostname
668 raise RuntimeError, msg
670 def fail_node_not_available(self, hostname):
671 msg = "Node %s not available for provisioning" % hostname
672 raise RuntimeError, msg
674 def fail_not_enough_nodes(self):
675 msg = "Not enough nodes available for provisioning"
676 raise RuntimeError, msg
678 def fail_plapi(self):
679 msg = "Failing while trying to instanciate the PLC API.\nSet the" + \
680 " attributes pluser and plpassword."
681 raise RuntimeError, msg
683 def valid_connection(self, guid):