X-Git-Url: http://git.onelab.eu/?p=nodemanager.git;a=blobdiff_plain;f=sliver_libvirt.py;h=cea8a391471df523f6399ddbe334cefb619d7c01;hp=4eb86af7dd12606303e356b3f9a8f6747c987fae;hb=HEAD;hpb=84936c9dc941fc82cd58be637a2d9344b5d6c3c7 diff --git a/sliver_libvirt.py b/sliver_libvirt.py index 4eb86af..cea8a39 100644 --- a/sliver_libvirt.py +++ b/sliver_libvirt.py @@ -24,12 +24,33 @@ STATES = { libvirt.VIR_DOMAIN_CRASHED: 'crashed', } -REASONS = { - libvirt.VIR_CONNECT_CLOSE_REASON_ERROR: 'Misc I/O error', - libvirt.VIR_CONNECT_CLOSE_REASON_EOF: 'End-of-file from server', - libvirt.VIR_CONNECT_CLOSE_REASON_KEEPALIVE: 'Keepalive timer triggered', - libvirt.VIR_CONNECT_CLOSE_REASON_CLIENT: 'Client requested it', -} +# with fedora24 and (broken) libvirt-python-1.3.3-3, +# the following symbols are not available +# kashyap on IRC reported that libvirt-python-1.3.5-1.fc24.x86_64 +# did not have the issue though +try: + REASONS = { + # 0 + libvirt.VIR_CONNECT_CLOSE_REASON_ERROR: 'Misc I/O error', + # 1 + libvirt.VIR_CONNECT_CLOSE_REASON_EOF: 'End-of-file from server', + # 2 + libvirt.VIR_CONNECT_CLOSE_REASON_KEEPALIVE: 'Keepalive timer triggered', + # 3 + libvirt.VIR_CONNECT_CLOSE_REASON_CLIENT: 'Client requested it', + } +except: + REASONS = { + # libvirt.VIR_CONNECT_CLOSE_REASON_ERROR + 0 : 'Misc I/O error', + # libvirt.VIR_CONNECT_CLOSE_REASON_EOF + 1 : 'End-of-file from server', + # libvirt.VIR_CONNECT_CLOSE_REASON_KEEPALIVE + 2 : 'Keepalive timer triggered', + # libvirt.VIR_CONNECT_CLOSE_REASON_CLIENT + 3 : 'Client requested it', + } + logger.log("WARNING : using hard-wired constants instead of symbolic names for CONNECT_CLOSE*") connections = dict() @@ -41,15 +62,37 @@ class Sliver_Libvirt(Account): @staticmethod def getConnection(sliver_type): - # TODO: error checking - # vtype is of the form sliver.[LXC/QEMU] we need to lower case to lxc/qemu + """ + returns a connection to the underlying libvirt service + a single connection is created and shared among slivers + this call ensures the connection is alive + and will reconnect if it appears to be necessary + """ + # sliver_type comes from rec['type'] and is of the form sliver.{LXC,QEMU} + # so we need to lower case to lxc/qemu vtype = sliver_type.split('.')[1].lower() - uri = vtype + '://' - return connections.setdefault(uri, libvirt.open(uri)) + uri = vtype + ':///' + if uri not in connections: + # create connection + conn = libvirt.open(uri) + connections[uri] = conn + return conn + else: + # connection already available : check for health + conn = connections[uri] + # see if a reconnection is needed + try: + numDomains = conn.numOfDomains() + except: + logger.log("libvirt connection to {} looks broken - reconnecting".format(uri)) + conn = libvirt.open(uri) + # if this fails then an expection is thrown outside of this function + numDomains = conn.numOfDomains() + return conn def __init__(self, rec): self.name = rec['name'] - logger.verbose ('sliver_libvirt: %s init'%(self.name)) + logger.verbose ('sliver_libvirt: {} init'.format(self.name)) # Assume the directory with the image and config files # are in place @@ -65,24 +108,26 @@ class Sliver_Libvirt(Account): try: dom = self.conn.lookupByName(self.name) except: - logger.log('sliver_libvirt: Domain %s does not exist. ' \ - 'Will try to create it again.' % (self.name)) + logger.log('sliver_libvirt: Domain {} does not exist. ' \ + 'Will try to create it again.'.format(self.name)) self.__class__.create(rec['name'], rec) dom = self.conn.lookupByName(self.name) self.dom = dom @staticmethod def dom_details (dom): - output="" - output += " id=%s - OSType=%s"%(dom.ID(),dom.OSType()) + output = "" + output += " id={} - OSType={}".format(dom.ID(), dom.OSType()) # calling state() seems to be working fine - (state,reason)=dom.state() - output += " state=%s, reason=%s"%(STATES.get(state,state),REASONS.get(reason,reason)) + (state, reason) = dom.state() + output += " state={}, reason={}".format(STATES.get(state, state), + REASONS.get(reason, reason)) try: # try to use info() - this however does not work for some reason on f20 # info cannot get info operation failed: Cannot read cputime for domain [state, maxmem, mem, ncpu, cputime] = dom.info() - output += " [info: maxmem = %s, mem = %s, ncpu = %s, cputime = %s]" % (STATES.get(state, state), maxmem, mem, ncpu, cputime) + output += " [info: state={}, maxmem = {}, mem = {}, ncpu = {}, cputime = {}]"\ + .format(STATES.get(state, state), maxmem, mem, ncpu, cputime) except: # too bad but libvirt.py prints out stuff on stdout when this fails, don't know how to get rid of that.. output += " [info: not available]" @@ -90,17 +135,21 @@ class Sliver_Libvirt(Account): def __repr__(self): ''' Helper method to get a "nice" output of the domain struct for debug purposes''' - output="Domain %s"%self.name - dom=self.dom + output = "Domain {}".format(self.name) + dom = self.dom if dom is None: output += " [no attached dom ?!?]" else: output += Sliver_Libvirt.dom_details (dom) return output + # Thierry : I am not quite sure if /etc/libvirt/lxc/<>.xml holds a reliably up-to-date + # copy of the sliver XML config; I feel like issuing a virsh dumpxml first might be safer def repair_veth(self): # See workaround email, 2-14-2014, "libvirt 1.2.1 rollout" - xml = open("/etc/libvirt/lxc/%s.xml" % self.name).read() + xmlfilename = "/etc/libvirt/lxc/{}.xml".format(self.name) + with open(xmlfilename) as xmlfile: + xml = xmlfile.read() veths = re.findall("", xml) veths = [x[13:-3] for x in veths] for veth in veths: @@ -108,54 +157,57 @@ class Sliver_Libvirt(Account): logger.log_call(command) logger.log("trying to redefine the VM") - command = ["virsh", "define", "/etc/libvirt/lxc/%s.xml" % self.name] + command = [ "virsh", "define", xmlfilename ] logger.log_call(command) def start(self, delay=0): - ''' Just start the sliver ''' - logger.verbose('sliver_libvirt: %s start'%(self.name)) + '''Just start the sliver''' + logger.verbose('sliver_libvirt: {} start'.format(self.name)) # Check if it's running to avoid throwing an exception if the - # domain was already running, create actually means start + # domain was already running if not self.is_running(): try: + # create actually means start self.dom.create() - except Exception, e: + except Exception as e: # XXX smbaker: attempt to resolve slivers that are stuck in # "failed to allocate free veth". if "ailed to allocate free veth" in str(e): - logger.log("failed to allocate free veth on %s" % self.name) + logger.log("failed to allocate free veth on {}".format(self.name)) self.repair_veth() logger.log("trying dom.create again") self.dom.create() else: raise else: - logger.verbose('sliver_libvirt: sliver %s already started'%(self.name)) + logger.verbose('sliver_libvirt: sliver {} already started'.format(self.name)) # After the VM is started... we can play with the virtual interface # Create the ebtables rule to mark the packets going out from the virtual # interface to the actual device so the filter canmatch against the mark - bwlimit.ebtables("-A INPUT -i veth%d -j mark --set-mark %d" % \ - (self.xid, self.xid)) + bwlimit.ebtables("-A INPUT -i veth{} -j mark --set-mark {}" + .format(self.xid, self.xid)) + ### this is confusing, because it seems it is not used in fact def stop(self): - logger.verbose('sliver_libvirt: %s stop'%(self.name)) + logger.verbose('sliver_libvirt: {} stop'.format(self.name)) # Remove the ebtables rule before stopping - bwlimit.ebtables("-D INPUT -i veth%d -j mark --set-mark %d" % \ - (self.xid, self.xid)) + bwlimit.ebtables("-D INPUT -i veth{} -j mark --set-mark {}" + .format(self.xid, self.xid)) try: self.dom.destroy() except: - logger.log_exc("in sliver_libvirt.stop",name=self.name) + logger.log_exc("in sliver_libvirt.stop", name=self.name) def is_running(self): ''' Return True if the domain is running ''' - (state,_) = self.dom.state() + (state, _) = self.dom.state() result = (state == libvirt.VIR_DOMAIN_RUNNING) - logger.verbose('sliver_libvirt.is_running: %s => %s'%(self,result)) + logger.verbose('sliver_libvirt.is_running: {} => {}' + .format(self, result)) return result def configure(self, rec): @@ -163,7 +215,7 @@ class Sliver_Libvirt(Account): #sliver.[LXC/QEMU] tolower case #sliver_type = rec['type'].split('.')[1].lower() - #BASE_DIR = '/cgroup/libvirt/%s/%s/'%(sliver_type, self.name) + #BASE_DIR = '/cgroup/libvirt/{}/{}/'.format(sliver_type, self.name) # Disk allocation # No way through cgroups... figure out how to do that with user/dir quotas. @@ -173,16 +225,17 @@ class Sliver_Libvirt(Account): # Btrfs support quota per volumes - if rec.has_key("rspec") and rec["rspec"].has_key("tags"): + if "rspec" in rec and "tags" in rec["rspec"]: if cgroups.get_cgroup_path(self.name) == None: # If configure is called before start, then the cgroups won't exist # yet. NM will eventually re-run configure on the next iteration. # TODO: Add a post-start configure, and move this stuff there - logger.log("Configure: postponing tag check on %s as cgroups are not yet populated" % self.name) + logger.log("Configure: postponing tag check on {} as cgroups are not yet populated" + .format(self.name)) else: tags = rec["rspec"]["tags"] # It will depend on the FS selection - if tags.has_key('disk_max'): + if 'disk_max' in tags: disk_max = tags['disk_max'] if disk_max == 0: # unlimited @@ -192,17 +245,17 @@ class Sliver_Libvirt(Account): pass # Memory allocation - if tags.has_key('memlock_hard'): + if 'memlock_hard' in tags: mem = str(int(tags['memlock_hard']) * 1024) # hard limit in bytes cgroups.write(self.name, 'memory.limit_in_bytes', mem, subsystem="memory") - if tags.has_key('memlock_soft'): + if 'memlock_soft' in tags: mem = str(int(tags['memlock_soft']) * 1024) # soft limit in bytes cgroups.write(self.name, 'memory.soft_limit_in_bytes', mem, subsystem="memory") # CPU allocation # Only cpu_shares until figure out how to provide limits and guarantees # (RT_SCHED?) - if tags.has_key('cpu_share'): + if 'cpu_share' in tags: cpu_share = tags['cpu_share'] cgroups.write(self.name, 'cpu.shares', cpu_share) @@ -211,7 +264,7 @@ class Sliver_Libvirt(Account): @staticmethod def get_unique_vif(): - return 'veth%s' % random.getrandbits(32) + return 'veth{}'.format(random.getrandbits(32)) # A placeholder until we get true VirtualInterface objects @staticmethod @@ -219,9 +272,9 @@ class Sliver_Libvirt(Account): xml = """ - + -""" % (Sliver_Libvirt.get_unique_vif()) +""".format(Sliver_Libvirt.get_unique_vif()) try: tags = rec['rspec']['tags'] if 'interface' in tags: @@ -232,31 +285,31 @@ class Sliver_Libvirt(Account): tag_xml = "" for interface in interfaces: if 'vlan' in interface: - vlanxml = "" % interface['vlan'] + vlanxml = "".format(interface['vlan']) else: vlanxml = "" if 'bridge' in interface: tag_xml = tag_xml + """ - - %s + + {} - + - """ % (interface['bridge'], vlanxml, Sliver_Libvirt.get_unique_vif()) + """.format(interface['bridge'], vlanxml, Sliver_Libvirt.get_unique_vif()) else: tag_xml = tag_xml + """ - + - """ % (Sliver_Libvirt.get_unique_vif()) + """.format(Sliver_Libvirt.get_unique_vif()) xml = tag_xml - logger.log('sliver_libvirty.py: interface XML is: %s' % xml) + logger.log('sliver_libvirty.py: interface XML is: {}'.format(xml)) except: - logger.log('sliver_libvirt.py: ERROR parsing "interface" tag for slice %s' % rec['name']) - logger.log('sliver_libvirt.py: tag value: %s' % tags['interface']) + logger.log('sliver_libvirt.py: ERROR parsing "interface" tag for slice {}'.format(rec['name'])) + logger.log('sliver_libvirt.py: tag value: {}'.format(tags['interface'])) return xml