X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=sliver_libvirt.py;h=5407cdc41249ab2aeda55e67ec4f427d9cebee46;hb=ecee05390277f57b02d21ffca0195292bde1defa;hp=98525c08e2f0b6aabb2ec721fe92f7ade6fd0839;hpb=db1f4974bb5e2f250152890207c9164f8fa4a852;p=nodemanager.git diff --git a/sliver_libvirt.py b/sliver_libvirt.py index 98525c0..5407cdc 100644 --- a/sliver_libvirt.py +++ b/sliver_libvirt.py @@ -2,6 +2,7 @@ import sys import os, os.path +import re import subprocess import pprint import random @@ -23,6 +24,13 @@ STATES = { libvirt.VIR_DOMAIN_CRASHED: 'crashed', } +REASONS = { + libvirt.VIR_CONNECT_CLOSE_REASON_ERROR: 'Misc I/O error', + libvirt.VIR_CONNECT_CLOSE_REASON_EOF: 'End-of-file from server', + libvirt.VIR_CONNECT_CLOSE_REASON_KEEPALIVE: 'Keepalive timer triggered', + libvirt.VIR_CONNECT_CLOSE_REASON_CLIENT: 'Client requested it', +} + connections = dict() # Common Libvirt code @@ -39,12 +47,6 @@ class Sliver_Libvirt(Account): uri = vtype + '://' return connections.setdefault(uri, libvirt.open(uri)) - @staticmethod - def debuginfo(dom): - ''' Helper method to get a "nice" output of the info struct for debug''' - [state, maxmem, mem, ncpu, cputime] = dom.info() - return '%s is %s, maxmem = %s, mem = %s, ncpu = %s, cputime = %s' % (dom.name(), STATES.get(state, state), maxmem, mem, ncpu, cputime) - def __init__(self, rec): self.name = rec['name'] logger.verbose ('sliver_libvirt: %s init'%(self.name)) @@ -69,6 +71,48 @@ class Sliver_Libvirt(Account): dom = self.conn.lookupByName(self.name) self.dom = dom + @staticmethod + def dom_details (dom): + output="" + output += " id=%s - OSType=%s"%(dom.ID(),dom.OSType()) + # calling state() seems to be working fine + (state,reason)=dom.state() + output += " state=%s, reason=%s"%(STATES.get(state,state),REASONS.get(reason,reason)) + try: + # try to use info() - this however does not work for some reason on f20 + # info cannot get info operation failed: Cannot read cputime for domain + [state, maxmem, mem, ncpu, cputime] = dom.info() + output += " [info: maxmem = %s, mem = %s, ncpu = %s, cputime = %s]" % (STATES.get(state, state), maxmem, mem, ncpu, cputime) + except: + # too bad but libvirt.py prints out stuff on stdout when this fails, don't know how to get rid of that.. + output += " [info: not available]" + return output + + def __repr__(self): + ''' Helper method to get a "nice" output of the domain struct for debug purposes''' + output="Domain %s"%self.name + dom=self.dom + if dom is None: + output += " [no attached dom ?!?]" + else: + output += Sliver_Libvirt.dom_details (dom) + return output + + # Thierry : I am not quite sure if /etc/libvirt/lxc/<>.xml holds a reliably up-to-date + # copy of the sliver XML config; I feel like issuing a virsh dumpxml first might be safer + def repair_veth(self): + # See workaround email, 2-14-2014, "libvirt 1.2.1 rollout" + xml = open("/etc/libvirt/lxc/%s.xml" % self.name).read() + veths = re.findall("", xml) + veths = [x[13:-3] for x in veths] + for veth in veths: + command = ["ip", "link", "delete", veth] + logger.log_call(command) + + logger.log("trying to redefine the VM") + command = ["virsh", "define", "/etc/libvirt/lxc/%s.xml" % self.name] + logger.log_call(command) + def start(self, delay=0): ''' Just start the sliver ''' logger.verbose('sliver_libvirt: %s start'%(self.name)) @@ -76,7 +120,18 @@ class Sliver_Libvirt(Account): # Check if it's running to avoid throwing an exception if the # domain was already running, create actually means start if not self.is_running(): - self.dom.create() + try: + self.dom.create() + except Exception, e: + # XXX smbaker: attempt to resolve slivers that are stuck in + # "failed to allocate free veth". + if "ailed to allocate free veth" in str(e): + logger.log("failed to allocate free veth on %s" % self.name) + self.repair_veth() + logger.log("trying dom.create again") + self.dom.create() + else: + raise else: logger.verbose('sliver_libvirt: sliver %s already started'%(self.name)) @@ -98,33 +153,12 @@ class Sliver_Libvirt(Account): except: logger.log_exc("in sliver_libvirt.stop",name=self.name) - def is_running (self): - result=self._is_running() - logger.log("sliver_libvirt.is_running on %s returned %s"%(self.name,result)) - return result - - def _is_running(self): + def is_running(self): ''' Return True if the domain is running ''' - logger.verbose('sliver_libvirt: entering is_running on [%s:%s]'%(self.name,self.dom.ID())) - try: - state, _, _, _, _ = self.dom.info() - if state == libvirt.VIR_DOMAIN_RUNNING: - logger.verbose('sliver_libvirt: %s is RUNNING'%self.name) - return True - else: - info = Sliver_Libvirt.debuginfo(self.dom) - logger.verbose('sliver_libvirt: %s is ' \ - 'NOT RUNNING...\n%s'%(self.name, info)) - return False - except: - logger.log("Re-fetching dom from name=%s"%self.name) - try: - self.dom=self.conn.lookupByName(self.name) - state, _, _, _, _ = self.dom.info() - return state==libvirt.VIR_DOMAIN_RUNNING - except: - logger.log_exc("in sliver_libvirt.is_running",name=self.name) - return False + (state,_) = self.dom.state() + result = (state == libvirt.VIR_DOMAIN_RUNNING) + logger.verbose('sliver_libvirt.is_running: %s => %s'%(self,result)) + return result def configure(self, rec):