X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=sliver_lxc.py;h=9ca2aa83d5e2d149b7e87d47c80dfc17b71b4645;hb=619f6ee406f8e11a5054c1e2c988f6eb95bce324;hp=aef00add081a10ca0865d527b739e9a65d27ad7b;hpb=ffd0695eb1c979e4dcee9eae29dd3578801971f3;p=nodemanager.git
diff --git a/sliver_lxc.py b/sliver_lxc.py
index aef00ad..9ca2aa8 100644
--- a/sliver_lxc.py
+++ b/sliver_lxc.py
@@ -2,158 +2,361 @@
"""LXC slivers"""
-import accounts
-import logger
import subprocess
-import os
-import libvirt
import sys
+import time
+import os, os.path
+import grp
+from pwd import getpwnam
+from string import Template
-def test_template():
-
- xml_template = """
-
- test_1
- 32768
-
- exe
- /bin/sh
-
- 1
-
- destroy
- restart
- destroy
-
- /usr/libexec/libvirt_lxc
-
-
-
-
-
-
-
-
-
- """
-
- return xml_template
-
-def createConnection():
- conn = libvirt.open('lxc:///')
- if conn == None:
- print 'Failed to open connection to LXC hypervisor'
- sys.exit(1)
- else: return conn
-
-
-states = {
- libvirt.VIR_DOMAIN_NOSTATE: 'no state',
- libvirt.VIR_DOMAIN_RUNNING: 'running',
- libvirt.VIR_DOMAIN_BLOCKED: 'blocked on resource',
- libvirt.VIR_DOMAIN_PAUSED: 'paused by user',
- libvirt.VIR_DOMAIN_SHUTDOWN: 'being shut down',
- libvirt.VIR_DOMAIN_SHUTOFF: 'shut off',
- libvirt.VIR_DOMAIN_CRASHED: 'crashed',
-}
+# vsys probably should not be a plugin
+# the thing is, the right way to handle stuff would be that
+# if slivers get created by doing a,b,c
+# then they sohuld be delted by doing c,b,a
+# the current ordering model for vsys plugins completely fails to capture that
+from plugins.vsys import removeSliverFromVsys, startService as vsysStartService
-def info(dom):
- [state, maxmem, mem, ncpu, cputime] = dom.info()
- return '%s is %s,\nmaxmem = %s, mem = %s, ncpu = %s, cputime = %s' % (dom.name(), states.get(state, state), maxmem, mem, ncpu, cputime)
+import libvirt
-class Sliver_LXC(accounts.Account):
+import logger
+import plnode.bwlimit as bwlimit
+from initscript import Initscript
+from account import Account
+from sliver_libvirt import Sliver_Libvirt
+
+BTRFS_TIMEOUT=15*60
+
+class Sliver_LXC(Sliver_Libvirt, Initscript):
"""This class wraps LXC commands"""
-
- SHELL = '/bin/sh'
- # Using /bin/bash triggers destroy root/site_admin (?!?)
+
+ SHELL = '/usr/sbin/vsh'
TYPE = 'sliver.LXC'
# Need to add a tag at myplc to actually use this account
# type = 'sliver.LXC'
- def __init__(self, rec):
- self.name = rec['name']
- print "LXC __init__ %s"%(self.name)
- logger.verbose ('sliver_lxc: %s init'%self.name)
-
- self.dir = '/vservers/%s'%(self.name)
-
- # Assume the directory with the image and config files
- # are in place
-
- self.config = '%s/config'%(self.dir)
- self.fstab = '%s/fstab'%(self.dir)
- self.lxc_log = '%s/lxc.log'%(self.dir)
- self.keys = ''
- self.rspec = {}
- self.slice_id = rec['slice_id']
- self.disk_usage_initialized = False
- self.initscript = ''
- self.enabled = True
- self.connection = createConnection()
+ REF_IMG_BASE_DIR = '/vservers/.lvref'
+ CON_BASE_DIR = '/vservers'
+
+ def __init__ (self, rec):
+ name=rec['name']
+ Sliver_Libvirt.__init__ (self,rec)
+ Initscript.__init__ (self,name)
+
+ def configure (self, rec):
+ Sliver_Libvirt.configure (self,rec)
+
+ # in case we update nodemanager..
+ self.install_and_enable_vinit()
+ # do the configure part from Initscript
+ Initscript.configure(self,rec)
+
+ def start(self, delay=0):
+ if 'enabled' in self.rspec and self.rspec['enabled'] <= 0:
+ logger.log('sliver_lxc: not starting %s, is not enabled'%self.name)
+ return
+ # the generic /etc/init.d/vinit script is permanently refreshed, and enabled
+ self.install_and_enable_vinit()
+ # expose .ssh for omf_friendly slivers
+ if 'tags' in self.rspec and 'omf_control' in self.rspec['tags']:
+ Account.mount_ssh_dir(self.name)
+ Sliver_Libvirt.start (self, delay)
+ # if a change has occured in the slice initscript, reflect this in /etc/init.d/vinit.slice
+ self.refresh_slice_vinit()
+
+ def rerun_slice_vinit (self):
+ """This is called whenever the initscript code changes"""
+ # xxx - todo - not sure exactly how to:
+ # (.) invoke something in the guest
+ # (.) which options of systemctl should be used to trigger a restart
+ # should not prevent the first run from going fine hopefully
+ logger.log("WARNING: sliver_lxc.rerun_slice_vinit not implemented yet")
@staticmethod
- def create(name, rec = None):
+ def create(name, rec=None):
''' Create dirs, copy fs image, lxc_create '''
- print "LXC create %s"%(name)
- logger.verbose ('sliver_lxc: %s create'%name)
- dir = '/vservers/%s'%(name)
- config = '%s/config'%(dir)
- lxc_log = '%s/lxc.log'%(dir)
-
- if not (os.path.isdir(dir) and
- os.access(dir, os.R_OK | os.W_OK | os.X_OK)):
- print 'lxc_create: directory %s does not exist or wrong perms'%(dir)
+ logger.verbose ('sliver_lxc: %s create'%(name))
+ conn = Sliver_Libvirt.getConnection(Sliver_LXC.TYPE)
+
+ # Get the type of image from vref myplc tags specified as:
+ # pldistro = lxc
+ # fcdistro = squeeze
+ # arch x86_64
+
+ arch = 'x86_64'
+ tags = rec['rspec']['tags']
+ if 'arch' in tags:
+ arch = tags['arch']
+ if arch == 'i386':
+ arch = 'i686'
+
+ vref = rec['vref']
+ if vref is None:
+ vref = "lxc-f18-x86_64"
+ logger.log("sliver_libvirt: %s: WARNING - no vref attached, using hard-wired default %s" % (name,vref))
+
+ refImgDir = os.path.join(Sliver_LXC.REF_IMG_BASE_DIR, vref)
+ containerDir = os.path.join(Sliver_LXC.CON_BASE_DIR, name)
+
+ # check the template exists -- there's probably a better way..
+ if not os.path.isdir(refImgDir):
+ logger.log('sliver_lxc: %s: ERROR Could not create sliver - reference image %s not found' % (name,vref))
+ logger.log('sliver_lxc: %s: ERROR Expected reference image in %s'%(name,refImgDir))
+ return
+
+# this hopefully should be fixed now
+# # in fedora20 we have some difficulty in properly cleaning up /vservers/
+# # also note that running e.g. btrfs subvolume create /vservers/.lvref/image /vservers/foo
+# # behaves differently, whether /vservers/foo exists or not:
+# # if /vservers/foo does not exist, it creates /vservers/foo
+# # but if it does exist, then it creates /vservers/foo/image !!
+# # so we need to check the expected container rootfs does not exist yet
+# # this hopefully could be removed in a future release
+# if os.path.exists (containerDir):
+# logger.log("sliver_lxc: %s: WARNING cleaning up pre-existing %s"%(name,containerDir))
+# command = ['btrfs', 'subvolume', 'delete', containerDir]
+# logger.log_call(command, BTRFS_TIMEOUT)
+# # re-check
+# if os.path.exists (containerDir):
+# logger.log('sliver_lxc: %s: ERROR Could not create sliver - could not clean up empty %s'%(name,containerDir))
+# return
+
+ # Snapshot the reference image fs (assume the reference image is in its own
+ # subvolume)
+ command = ['btrfs', 'subvolume', 'snapshot', refImgDir, containerDir]
+ if not logger.log_call(command, timeout=BTRFS_TIMEOUT):
+ logger.log('sliver_lxc: ERROR Could not create BTRFS snapshot at', containerDir)
+ return
+ command = ['chmod', '755', containerDir]
+ logger.log_call(command)
+
+ # TODO: set quotas...
+
+ # Set hostname. A valid hostname cannot have '_'
+ #with open(os.path.join(containerDir, 'etc/hostname'), 'w') as f:
+ # print >>f, name.replace('_', '-')
+
+ # Add slices group if not already present
+ try:
+ group = grp.getgrnam('slices')
+ except:
+ command = ['/usr/sbin/groupadd', 'slices']
+ logger.log_call(command)
+
+ # Add unix account (TYPE is specified in the subclass)
+ command = ['/usr/sbin/useradd', '-g', 'slices', '-s', Sliver_LXC.SHELL, name, '-p', '*']
+ logger.log_call(command)
+ command = ['mkdir', '/home/%s/.ssh'%name]
+ logger.log_call(command)
+
+ # Create PK pair keys to connect from the host to the guest without
+ # password... maybe remove the need for authentication inside the
+ # guest?
+ command = ['su', '-s', '/bin/bash', '-c', 'ssh-keygen -t rsa -N "" -f /home/%s/.ssh/id_rsa'%(name)]
+ logger.log_call(command)
+
+ command = ['chown', '-R', '%s.slices'%name, '/home/%s/.ssh'%name]
+ logger.log_call(command)
+
+ command = ['mkdir', '%s/root/.ssh'%containerDir]
+ logger.log_call(command)
+
+ command = ['cp', '/home/%s/.ssh/id_rsa.pub'%name, '%s/root/.ssh/authorized_keys'%containerDir]
+ logger.log_call(command)
+
+ logger.log("creating /etc/slicename file in %s" % os.path.join(containerDir,'etc/slicename'))
+ try:
+ file(os.path.join(containerDir,'etc/slicename'), 'w').write(name)
+ except:
+ logger.log_exc("exception while creating /etc/slicename")
+
+ try:
+ file(os.path.join(containerDir,'etc/slicefamily'), 'w').write(vref)
+ except:
+ logger.log_exc("exception while creating /etc/slicefamily")
+
+ uid = None
+ try:
+ uid = getpwnam(name).pw_uid
+ except KeyError:
+ # keyerror will happen if user id was not created successfully
+ logger.log_exc("exception while getting user id")
+
+ if uid is not None:
+ logger.log("uid is %d" % uid)
+ command = ['mkdir', '%s/home/%s' % (containerDir, name)]
+ logger.log_call(command)
+ command = ['chown', name, '%s/home/%s' % (containerDir, name)]
+ logger.log_call(command)
+ etcpasswd = os.path.join(containerDir, 'etc/passwd')
+ etcgroup = os.path.join(containerDir, 'etc/group')
+ if os.path.exists(etcpasswd):
+ # create all accounts with gid=1001 - i.e. 'slices' like it is in the root context
+ slices_gid=1001
+ logger.log("adding user %(name)s id %(uid)d gid %(slices_gid)d to %(etcpasswd)s" % (locals()))
+ try:
+ file(etcpasswd,'a').write("%(name)s:x:%(uid)d:%(slices_gid)d::/home/%(name)s:/bin/bash\n" % locals())
+ except:
+ logger.log_exc("exception while updating %s"%etcpasswd)
+ logger.log("adding group slices with gid %(slices_gid)d to %(etcgroup)s"%locals())
+ try:
+ file(etcgroup,'a').write("slices:x:%(slices_gid)d\n"%locals())
+ except:
+ logger.log_exc("exception while updating %s"%etcgroup)
+ sudoers = os.path.join(containerDir, 'etc/sudoers')
+ if os.path.exists(sudoers):
+ try:
+ file(sudoers,'a').write("%s ALL=(ALL) NOPASSWD: ALL\n" % name)
+ except:
+ logger.log_exc("exception while updating /etc/sudoers")
+
+ # customizations for the user environment - root or slice uid
+ # we save the whole business in /etc/planetlab.profile
+ # and source this file for both root and the slice uid's .profile
+ # prompt for slice owner, + LD_PRELOAD for transparently wrap bind
+ pl_profile=os.path.join(containerDir,"etc/planetlab.profile")
+ ld_preload_text="""# by default, we define this setting so that calls to bind(2),
+# when invoked on 0.0.0.0, get transparently redirected to the public interface of this node
+# see https://svn.planet-lab.org/wiki/LxcPortForwarding"""
+ usrmove_path_text="""# VM's before Features/UsrMove need /bin and /sbin in their PATH"""
+ usrmove_path_code="""
+pathmunge () {
+ if ! echo $PATH | /bin/egrep -q "(^|:)$1($|:)" ; then
+ if [ "$2" = "after" ] ; then
+ PATH=$PATH:$1
+ else
+ PATH=$1:$PATH
+ fi
+ fi
+}
+pathmunge /bin after
+pathmunge /sbin after
+unset pathmunge
+"""
+ with open(pl_profile,'w') as f:
+ f.write("export PS1='%s@\H \$ '\n"%(name))
+ f.write("%s\n"%ld_preload_text)
+ f.write("export LD_PRELOAD=/etc/planetlab/lib/bind_public.so\n")
+ f.write("%s\n"%usrmove_path_text)
+ f.write("%s\n"%usrmove_path_code)
+
+ # make sure this file is sourced from both root's and slice's .profile
+ enforced_line = "[ -f /etc/planetlab.profile ] && source /etc/planetlab.profile\n"
+ for path in [ 'root/.profile', 'home/%s/.profile'%name ]:
+ from_root=os.path.join(containerDir,path)
+ # if dir is not yet existing let's forget it for now
+ if not os.path.isdir(os.path.dirname(from_root)): continue
+ found=False
+ try:
+ contents=file(from_root).readlines()
+ for content in contents:
+ if content==enforced_line: found=True
+ except IOError: pass
+ if not found:
+ with open(from_root,"a") as user_profile:
+ user_profile.write(enforced_line)
+ # in case we create the slice's .profile when writing
+ if from_root.find("/home")>=0:
+ command=['chown','%s:slices'%name,from_root]
+ logger.log_call(command)
+
+ # Lookup for xid and create template after the user is created so we
+ # can get the correct xid based on the name of the slice
+ xid = bwlimit.get_xid(name)
+
+ # Template for libvirt sliver configuration
+ template_filename_sliceimage = os.path.join(Sliver_LXC.REF_IMG_BASE_DIR,'lxc_template.xml')
+ if os.path.isfile (template_filename_sliceimage):
+ logger.log("WARNING: using compat template %s"%template_filename_sliceimage)
+ template_filename=template_filename_sliceimage
+ else:
+ logger.log("Cannot find XML template %s"%template_filename_sliceimage)
+ return
+
+ interfaces = Sliver_Libvirt.get_interfaces_xml(rec)
+
+ try:
+ with open(template_filename) as f:
+ template = Template(f.read())
+ xml = template.substitute(name=name, xid=xid, interfaces=interfaces, arch=arch)
+ except IOError:
+ logger.log('Failed to parse or use XML template file %s'%template_filename)
return
- # Assume for now that the directory is there and with a FS
- command=[]
- # be verbose
- command += ['/bin/bash','-x',]
- command += ['/usr/bin/lxc-create', '-n', name, '-f', config, '&']
- print command
- #subprocess.call(command, stdin=open('/dev/null', 'r'), stdout=open('/dev/null', 'w'), stderr=subprocess.STDOUT, shell=False)
- conn = createConnection()
+
+ # Lookup for the sliver before actually
+ # defining it, just in case it was already defined.
try:
- dom0 = conn.lookupByName(name)
+ dom = conn.lookupByName(name)
except:
- dom0 = conn.defineXML(test_template())
- print info(dom0)
+ dom = conn.defineXML(xml)
+ logger.verbose('lxc_create: %s -> %s'%(name, Sliver_Libvirt.dom_details(dom)))
+
@staticmethod
def destroy(name):
- ''' lxc_destroy '''
- print "LXC destroy %s"%(name)
- dir = '/vservers/%s'%(name)
- lxc_log = '%s/lxc.log'%(dir)
- command=[]
- command += ['/usr/bin/lxc-destroy', '-n', name]
+ # umount .ssh directory - only if mounted
+ Account.umount_ssh_dir(name)
+ logger.verbose ('sliver_lxc: %s destroy'%(name))
+ conn = Sliver_Libvirt.getConnection(Sliver_LXC.TYPE)
- subprocess.call(command, stdin=open('/dev/null', 'r'), stdout=open('/dev/null', 'w'), stderr=subprocess.STDOUT, shell=False)
- print "LXC destroy DONE"
+ containerDir = Sliver_LXC.CON_BASE_DIR + '/%s'%(name)
- def configure(self, rec):
- print "LXC configure %s"%(self.name)
+ try:
+ # Destroy libvirt domain
+ dom = conn.lookupByName(name)
+ except:
+ logger.verbose('sliver_lxc.destroy: Domain %s does not exist!' % name)
+ return
- def start(self, delay=0):
- ''' Check existence? lxc_start '''
- print "LXC start %s"%(self.name)
- command=[]
- command += ['/usr/bin/lxc-start', '-n', self.name, '-d']
- print command
- subprocess.call(command, stdin=open('/dev/null', 'r'), stdout=open('/dev/null', 'w'), stderr=subprocess.STDOUT, shell=False)
-
- def stop(self):
- ''' lxc_stop '''
- print "LXC stop %s"%(self.name)
-
- def is_running(self):
- print "LXC is_running %s"%(self.name)
- command = []
- command += ['/usr/bin/lxc-info -n %s'%(self.name)]
- print command
- p = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
- state = p.communicate()[0].split(' ')[2]
- print state
- if state == 'RUNNING': return True
- else: return False
-
-
+ # Slivers with vsys running will fail the subvolume delete
+ # removeSliverFromVsys return True if it stops vsys, telling us to start it again later
+ vsys_stopped = removeSliverFromVsys (name)
+
+ try:
+ logger.log("sliver_lxc.destroy: destroying domain %s"%name)
+ dom.destroy()
+ except:
+ logger.verbose('sliver_lxc.destroy: Domain %s not running... continuing.' % name)
+
+ try:
+ logger.log("sliver_lxc.destroy: undefining domain %s"%name)
+ dom.undefine()
+ except:
+ logger.verbose('sliver_lxc.destroy: Domain %s is not defined... continuing.' % name)
+
+ # Remove user after destroy domain to force logout
+ command = ['/usr/sbin/userdel', '-f', '-r', name]
+ logger.log_call(command)
+
+ # Remove rootfs of destroyed domain
+ command = ['btrfs', 'subvolume', 'delete', containerDir]
+ logger.log_call(command, timeout=BTRFS_TIMEOUT)
+
+ # For some reason I am seeing this :
+ #log_call: running command btrfs subvolume delete /vservers/inri_sl1
+ #log_call: ERROR: cannot delete '/vservers/inri_sl1' - Device or resource busy
+ #log_call: Delete subvolume '/vservers/inri_sl1'
+ #log_call:end command (btrfs subvolume delete /vservers/inri_sl1) returned with code 1
+ #
+ # something must have an open handle to a file in there, but I can't find out what it is
+ # the following code aims at gathering data on what is going on in the system at this point in time
+ # note that some time later (typically when the sliver gets re-created) the same
+ # attempt at deleting the subvolume does work
+ # also lsof never shows anything relevant; this is painful..
+
+ if not os.path.exists(containerDir):
+ logger.log('sliver_lxc.destroy: %s cleanly destroyed.'%name)
+ else:
+ # we're in /
+ #logger.log("-TMP-cwd %s : %s"%(name,os.getcwd()))
+ # also lsof never shows anything relevant; this is painful..
+ #logger.log("-TMP-lsof %s"%name)
+ #command=['lsof']
+ #logger.log_call(command)
+ logger.log("-TMP-ls-l %s"%name)
+ command = ['ls', '-l', containerDir]
+ logger.log_call(command)
+ if os.path.exists(containerDir):
+ logger.log('sliver_lxc.destroy: ERROR could not cleanly destroy %s - giving up'%name)
+
+ if vsys_stopped: vsysStartService()