X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=src%2Fnepi%2Fresources%2Fomf%2Fwilabt_node.py;h=f9b118f45674053ab0a65ced575505c38a3e9d69;hb=6285ca51026efb69642eea9dfc7c480e722d84a9;hp=80f076c7d24ac0ce62ac2dd2a3f69d488f26cf57;hpb=015685373fef1bce7af361dac8410adadf0c125f;p=nepi.git diff --git a/src/nepi/resources/omf/wilabt_node.py b/src/nepi/resources/omf/wilabt_node.py index 80f076c7..f9b118f4 100644 --- a/src/nepi/resources/omf/wilabt_node.py +++ b/src/nepi/resources/omf/wilabt_node.py @@ -3,9 +3,8 @@ # Copyright (C) 2013 INRIA # # This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation; # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -19,8 +18,8 @@ from nepi.execution.attribute import Attribute, Flags, Types from nepi.execution.resource import ResourceManager, clsinit_copy, \ - ResourceState, reschedule_delay -from nepi.resources.linux.node import LinuxNode + ResourceState +from nepi.resources.omf.node import OMFNode from nepi.util.sfaapi import SFAAPIFactory from nepi.util.execfuncs import lexec from nepi.util import sshfuncs @@ -34,14 +33,23 @@ import threading import datetime @clsinit_copy -class WilabtSfaNode(LinuxNode): - _rtype = "WilabtSfaNode" +class WilabtSfaNode(OMFNode): + _rtype = "wilabt::sfa::Node" _help = "Controls a Wilabt host accessible using a SSH key " \ "and provisioned using SFA" - _backend = "omf" + _platform = "omf" @classmethod def _register_attributes(cls): + + username = Attribute("username", "Local account username", + flags = Flags.Credential) + + identity = Attribute("identity", "SSH identity file", + flags = Flags.Credential) + + server_key = Attribute("serverKey", "Server public key", + flags = Flags.Design) sfa_user = Attribute("sfauser", "SFA user", flags = Flags.Credential) @@ -59,11 +67,22 @@ class WilabtSfaNode(LinuxNode): gateway = Attribute("gateway", "Hostname of the gateway machine", flags = Flags.Design) + host = Attribute("host", "Name of the physical machine", + flags = Flags.Design) + + disk_image = Attribute("disk_image", "Specify a specific disk image for a node", + flags = Flags.Design) + + cls._register_attribute(username) + cls._register_attribute(identity) + cls._register_attribute(server_key) cls._register_attribute(sfa_user) cls._register_attribute(sfa_private_key) cls._register_attribute(slicename) cls._register_attribute(gateway_user) cls._register_attribute(gateway) + cls._register_attribute(host) + cls._register_attribute(disk_image) def __init__(self, ec, guid): super(WilabtSfaNode, self).__init__(ec, guid) @@ -72,7 +91,7 @@ class WilabtSfaNode(LinuxNode): self._sfaapi = None self._node_to_provision = None self._slicenode = False - self._hostname = False + self._host = False self._username = None def _skip_provision(self): @@ -110,17 +129,13 @@ class WilabtSfaNode(LinuxNode): Based on the attributes defined by the user, discover the suitable node for provision. """ - if self._skip_provision(): - super(WilabtSfaNode, self).do_discover() - return - nodes = self.sfaapi.get_resources_hrn() - hostname = self._get_hostname() - if hostname: + host = self._get_host() + if host: # the user specified one particular node to be provisioned - self._hostname = True - host_hrn = nodes[hostname] + self._host = True + host_hrn = nodes[host] # check that the node is not blacklisted or being provisioned # by other RM @@ -129,8 +144,8 @@ class WilabtSfaNode(LinuxNode): if self._check_if_in_slice([host_hrn]): self.debug("Node already in slice %s" % host_hrn) self._slicenode = True - hostname = hostname + '.wilab2.ilabt.iminds.be' - self.set('hostname', hostname) + host = host + '.wilab2.ilabt.iminds.be' + self.set('host', host) self._node_to_provision = host_hrn super(WilabtSfaNode, self).do_discover() @@ -139,10 +154,6 @@ class WilabtSfaNode(LinuxNode): Add node to user's slice and verifing that the node is functioning correctly. Check ssh, omf rc running, hostname, file system. """ - if self._skip_provision(): - super(WilabtSfaNode, self).do_provision() - return - provision_ok = False ssh_ok = False proc_ok = False @@ -150,17 +161,17 @@ class WilabtSfaNode(LinuxNode): while not provision_ok: node = self._node_to_provision - if self._slicenode: - self._delete_from_slice() - self.debug("Waiting 300 seg for re-adding to slice") - time.sleep(300) # Timout for the testbed to allow a new reservation + #if self._slicenode: + # self._delete_from_slice() + # self.debug("Waiting 480 sec for re-adding to slice") + # time.sleep(480) # Timout for the testbed to allow a new reservation self._add_node_to_slice(node) t = 0 while not self._check_if_in_slice([node]) and t < timeout \ and not self._ecobj().abort: t = t + 5 time.sleep(t) - self.debug("Waiting 5 seg for resources to be added") + self.debug("Waiting 5 sec for resources to be added") continue if not self._check_if_in_slice([node]): @@ -185,7 +196,7 @@ class WilabtSfaNode(LinuxNode): if not self._check_fs(): self.do_discover() continue - if not self._check_omf(): + if not self._check_omfrc(): self.do_discover() continue if not self._check_hostname(): @@ -194,12 +205,25 @@ class WilabtSfaNode(LinuxNode): else: provision_ok = True - if not self.get('hostname'): - self._set_hostname_attr(node) + if not self.get('host'): + self._set_host_attr(node) self.info(" Node provisioned ") super(WilabtSfaNode, self).do_provision() + def do_deploy(self): + if self.state == ResourceState.NEW: + self.info("Deploying w-iLab.t node") + self.do_discover() + self.do_provision() + super(WilabtSfaNode, self).do_deploy() + + def do_release(self): + super(WilabtSfaNode, self).do_release() + if self.state == ResourceState.RELEASED and not self._skip_provision(): + self.debug(" Releasing SFA API ") + self.sfaapi.release() + def _blacklisted(self, host_hrn): """ Check in the SFA API that the node is not in the blacklist. @@ -237,15 +261,15 @@ class WilabtSfaNode(LinuxNode): through the gateway because is private testbed. """ t = 0 - timeout = 10 + timeout = 1200 ssh_ok = False while t < timeout and not ssh_ok: cmd = 'echo \'GOOD NODE\'' ((out, err), proc) = self.execute(cmd) if out.find("GOOD NODE") < 0: - self.debug( "No SSH connection, waiting 60s" ) - t = t + 5 - time.sleep(5) + self.debug( "No SSH connection, waiting 20s" ) + t = t + 20 + time.sleep(20) continue else: self.debug( "SSH OK" ) @@ -283,8 +307,48 @@ class WilabtSfaNode(LinuxNode): ((out, err), proc) = self.execute(cmd) if 'localhost' in out.lower(): return False + else: + self.set('hostname', out.strip()) return True + def execute(self, command, + sudo = False, + env = None, + tty = False, + forward_x11 = False, + retry = 3, + connect_timeout = 30, + strict_host_checking = False, + persistent = True, + blocking = True, + ): + """ Notice that this invocation will block until the + execution finishes. If this is not the desired behavior, + use 'run' instead.""" + (out, err), proc = sshfuncs.rexec( + command, + host = self.get("host"), + user = self.get("username"), + port = 22, + gwuser = self.get("gatewayUser"), + gw = self.get("gateway"), + agent = True, + sudo = sudo, + identity = self.get("identity"), + server_key = self.get("serverKey"), + env = env, + tty = tty, + forward_x11 = forward_x11, + retry = retry, + connect_timeout = connect_timeout, + persistent = persistent, + blocking = blocking, + strict_host_checking = strict_host_checking + ) + + return (out, err), proc + + def _add_node_to_slice(self, host_hrn): """ Add node to slice, using SFA API. Actually Wilabt testbed @@ -295,7 +359,12 @@ class WilabtSfaNode(LinuxNode): """ self.info(" Adding node to slice ") slicename = self.get("slicename") - self.sfaapi.add_resource_to_slice_batch(slicename, host_hrn) + disk_image = self.get("disk_image") + if disk_image is not None: + properties = {'disk_image': disk_image} + else: properties = None + #properties = None + self.sfaapi.add_resource_to_slice_batch(slicename, host_hrn, properties=properties) def _delete_from_slice(self): """ @@ -308,26 +377,26 @@ class WilabtSfaNode(LinuxNode): slicename = self.get("slicename") self.sfaapi.remove_all_from_slice(slicename) - def _get_hostname(self): + def _get_host(self): """ Get the attribute hostname. """ - hostname = self.get("hostname") - if hostname: - return hostname + host = self.get("host") + if host: + return host else: return None - def _set_hostname_attr(self, node): + def _set_host_attr(self, node): """ Query SFAAPI for the hostname of a certain host hrn and sets the attribute hostname, it will over write the previous value. """ hosts_hrn = self.sfaapi.get_resources_hrn() - for hostname, hrn in hosts_hrn.iteritems(): + for host, hrn in hosts_hrn.iteritems(): if hrn == node: - hostname = hostname + '.wilab2.ilabt.iminds.be' - self.set("hostname", hostname) + host = host + '.wilab2.ilabt.iminds.be' + self.set("host", host) def _check_if_in_slice(self, hosts_hrn): """ @@ -343,32 +412,16 @@ class WilabtSfaNode(LinuxNode): nodes_inslice = list(set(hosts_hrn) & set(slice_nodes_hrn)) return nodes_inslice - def _do_ping(self, hostname): - """ - Perform ping command on node's IP matching hostname. - """ - ping_ok = False - guser = self.get("gatewayUser") - gw = self.get("gateway") - host = hostname + ".wilab2.ilabt.iminds.be" - command = "ssh %s@%s 'ping -c4 %s'" % (guser, gw, host) - (out, err) = lexec(command) - m = re.search("(\d+)% packet loss", str(out)) - if m and int(m.groups()[0]) < 50: - ping_ok = True - - return ping_ok - def _blacklist_node(self, host_hrn): """ Add mal functioning node to blacklist (in SFA API). """ self.warning(" Blacklisting malfunctioning node ") self.sfaapi.blacklist_resource(host_hrn) - if not self._hostname: - self.set('hostname', None) + if not self._host: + self.set('host', None) else: - self.set('hostname', host_hrn.split('.').pop()) + self.set('host', host_hrn.split('.').pop()) def _put_node_in_provision(self, host_hrn): """ @@ -377,12 +430,12 @@ class WilabtSfaNode(LinuxNode): """ self.sfaapi.reserve_resource(host_hrn) - def _get_ip(self, hostname): + def _get_ip(self, host): """ Query cache for the IP of a node with certain hostname """ try: - ip = sshfuncs.gethostbyname(hostname) + ip = sshfuncs.gethostbyname(host) except: # Fail while trying to find the IP return None @@ -391,24 +444,23 @@ class WilabtSfaNode(LinuxNode): def fail_discovery(self): msg = "Discovery failed. No candidates found for node" self.error(msg) - raise RuntimeError, msg + raise RuntimeError(msg) - def fail_node_not_alive(self, hostname=None): - msg = "Node %s not alive" % hostname - raise RuntimeError, msg + def fail_node_not_alive(self, host=None): + msg = "Node %s not alive" % host + raise RuntimeError(msg) - def fail_node_not_available(self, hostname): - msg = "Node %s not available for provisioning" % hostname - raise RuntimeError, msg + def fail_node_not_available(self, host): + msg = "Some nodes not available for provisioning" + raise RuntimeError(msg) def fail_not_enough_nodes(self): msg = "Not enough nodes available for provisioning" - raise RuntimeError, msg + raise RuntimeError(msg) - def fail_plapi(self): - msg = "Failing while trying to instanciate the PLC API.\nSet the" + \ - " attributes pluser and plpassword." - raise RuntimeError, msg + def fail_sfaapi(self): + msg = "Failing while trying to instanciate the SFA API." + raise RuntimeError(msg) def valid_connection(self, guid): # TODO: Validate!