reguire gnupg1 on f>=31; sense the system to use gpg1 when installed
[nodemanager.git] / sliver_lxc.py
index 9ca2aa8..e54c453 100644 (file)
@@ -13,7 +13,7 @@ from string import Template
 # vsys probably should not be a plugin
 # the thing is, the right way to handle stuff would be that
 # if slivers get created by doing a,b,c
-# then they sohuld be delted by doing c,b,a
+# then they should be deleted by doing c,b,a
 # the current ordering model for vsys plugins completely fails to capture that
 from plugins.vsys import removeSliverFromVsys, startService as vsysStartService
 
@@ -25,7 +25,7 @@ from initscript import Initscript
 from account import Account
 from sliver_libvirt import Sliver_Libvirt
 
-BTRFS_TIMEOUT=15*60
+BTRFS_TIMEOUT = 15*60
 
 class Sliver_LXC(Sliver_Libvirt, Initscript):
     """This class wraps LXC commands"""
@@ -38,46 +38,72 @@ class Sliver_LXC(Sliver_Libvirt, Initscript):
     REF_IMG_BASE_DIR = '/vservers/.lvref'
     CON_BASE_DIR     = '/vservers'
 
-    def __init__ (self, rec):
-        name=rec['name']
-        Sliver_Libvirt.__init__ (self,rec)
-        Initscript.__init__ (self,name)
+    def __init__(self, rec):
+        name = rec['name']
+        Sliver_Libvirt.__init__(self, rec)
+        Initscript.__init__(self, name)
 
-    def configure (self, rec):
-        Sliver_Libvirt.configure (self,rec)
+    def configure(self, rec):
+        logger.log('========== sliver_lxc.configure {}'.format(self.name))
+        Sliver_Libvirt.configure(self, rec)
 
         # in case we update nodemanager..
         self.install_and_enable_vinit()
         # do the configure part from Initscript
-        Initscript.configure(self,rec)
+        Initscript.configure(self, rec)
 
+    # remember configure() always gets called *before* start()
+    # in particular the slice initscript
+    # is expected to be in place already at this point
     def start(self, delay=0):
+        logger.log('==================== sliver_lxc.start {}'.format(self.name))
         if 'enabled' in self.rspec and self.rspec['enabled'] <= 0:
-            logger.log('sliver_lxc: not starting %s, is not enabled'%self.name)
+            logger.log('sliver_lxc: not starting {}, is not enabled'.format(self.name))
             return
         # the generic /etc/init.d/vinit script is permanently refreshed, and enabled
         self.install_and_enable_vinit()
         # expose .ssh for omf_friendly slivers
         if 'tags' in self.rspec and 'omf_control' in self.rspec['tags']:
             Account.mount_ssh_dir(self.name)
-        Sliver_Libvirt.start (self, delay)
-        # if a change has occured in the slice initscript, reflect this in /etc/init.d/vinit.slice
-        self.refresh_slice_vinit()
-
-    def rerun_slice_vinit (self):
-        """This is called whenever the initscript code changes"""
-        # xxx - todo - not sure exactly how to:
-        # (.) invoke something in the guest
-        # (.) which options of systemctl should be used to trigger a restart
-        # should not prevent the first run from going fine hopefully
-        logger.log("WARNING: sliver_lxc.rerun_slice_vinit not implemented yet")
+#        logger.log("NM is exiting for debug - just about to start {}".format(self.name))
+#        exit(0)
+        Sliver_Libvirt.start(self, delay)
+
+    def rerun_slice_vinit(self):
+        """This is called at startup, and whenever the initscript code changes"""
+        logger.log("sliver_lxc.rerun_slice_vinit {}".format(self.name))
+        plain = "virsh -c lxc:/// lxc-enter-namespace --noseclabel -- {} /usr/bin/systemctl --system daemon-reload"\
+            .format(self.name)
+        command = plain.split()
+        logger.log_call(command, timeout=3)
+        plain = "virsh -c lxc:/// lxc-enter-namespace --noseclabel -- {} /usr/bin/systemctl restart vinit.service"\
+            .format(self.name)
+        command = plain.split()
+        logger.log_call(command, timeout=3)
+
 
     @staticmethod
     def create(name, rec=None):
-        ''' Create dirs, copy fs image, lxc_create '''
-        logger.verbose ('sliver_lxc: %s create'%(name))
+        '''
+        Create dirs, copy fs image, lxc_create
+        '''
+        logger.verbose('sliver_lxc: {} create'.format(name))
         conn = Sliver_Libvirt.getConnection(Sliver_LXC.TYPE)
 
+        vref = rec['vref']
+        if vref is None:
+            vref = "lxc-f24-x86_64"
+            logger.log("sliver_libvirt: {}: WARNING - no vref attached, using hard-wired default {}"
+                       .format(name, vref))
+
+        # compute guest arch from vref
+        # essentially we want x86_64 (default) or i686 here for libvirt
+        try:
+            (x, y, arch) = vref.split('-')
+            arch = "x86_64" if arch.find("64") >= 0 else "i686"
+        except:
+            arch = 'x86_64'
+
         # Get the type of image from vref myplc tags specified as:
         # pldistro = lxc
         # fcdistro = squeeze
@@ -90,42 +116,43 @@ class Sliver_LXC(Sliver_Libvirt, Initscript):
             if arch == 'i386':
                 arch = 'i686'
 
-        vref = rec['vref']
-        if vref is None:
-            vref = "lxc-f18-x86_64"
-            logger.log("sliver_libvirt: %s: WARNING - no vref attached, using hard-wired default %s" % (name,vref))
 
         refImgDir    = os.path.join(Sliver_LXC.REF_IMG_BASE_DIR, vref)
         containerDir = os.path.join(Sliver_LXC.CON_BASE_DIR, name)
 
         # check the template exists -- there's probably a better way..
         if not os.path.isdir(refImgDir):
-            logger.log('sliver_lxc: %s: ERROR Could not create sliver - reference image %s not found' % (name,vref))
-            logger.log('sliver_lxc: %s: ERROR Expected reference image in %s'%(name,refImgDir))
+            logger.log('sliver_lxc: {}: ERROR Could not create sliver - reference image {} not found'
+                       .format(name, vref))
+            logger.log('sliver_lxc: {}: ERROR Expected reference image in {}'.format(name, refImgDir))
             return
 
-# this hopefully should be fixed now
+# during some time this fragment had been commented out
+# but we're seeing cases where this code might actually be useful, so..
+#        this hopefully should be fixed now
 #        # in fedora20 we have some difficulty in properly cleaning up /vservers/<slicename>
 #        # also note that running e.g. btrfs subvolume create /vservers/.lvref/image /vservers/foo
 #        # behaves differently, whether /vservers/foo exists or not:
 #        # if /vservers/foo does not exist, it creates /vservers/foo
 #        # but if it does exist, then       it creates /vservers/foo/image !!
 #        # so we need to check the expected container rootfs does not exist yet
-#        # this hopefully could be removed in a future release 
-#        if os.path.exists (containerDir):
-#            logger.log("sliver_lxc: %s: WARNING cleaning up pre-existing %s"%(name,containerDir))
-#            command = ['btrfs', 'subvolume', 'delete', containerDir]
-#            logger.log_call(command, BTRFS_TIMEOUT)
-#            # re-check
-#            if os.path.exists (containerDir):
-#                logger.log('sliver_lxc: %s: ERROR Could not create sliver - could not clean up empty %s'%(name,containerDir))
-#                return
-
-        # Snapshot the reference image fs (assume the reference image is in its own
-        # subvolume)
+#        # this hopefully could be removed in a future release
+        if os.path.exists (containerDir):
+            logger.log("sliver_lxc: {}: WARNING cleaning up pre-existing {}".format(name, containerDir))
+            command = ['btrfs', 'subvolume', 'delete', containerDir]
+            logger.log_call(command, BTRFS_TIMEOUT)
+            # re-check
+            if os.path.exists (containerDir):
+                logger.log('sliver_lxc: {}: ERROR Could not create sliver - could not clean up empty {}'
+                           .format(name, containerDir))
+                return
+
+        # Snapshot the reference image fs
+        # this assumes the reference image is in its own subvolume
         command = ['btrfs', 'subvolume', 'snapshot', refImgDir, containerDir]
         if not logger.log_call(command, timeout=BTRFS_TIMEOUT):
-            logger.log('sliver_lxc: ERROR Could not create BTRFS snapshot at', containerDir)
+            logger.log('sliver_lxc: ERROR Could not create BTRFS snapshot at {}'
+                       .format(containerDir))
             return
         command = ['chmod', '755', containerDir]
         logger.log_call(command)
@@ -146,32 +173,36 @@ class Sliver_LXC(Sliver_Libvirt, Initscript):
         # Add unix account (TYPE is specified in the subclass)
         command = ['/usr/sbin/useradd', '-g', 'slices', '-s', Sliver_LXC.SHELL, name, '-p', '*']
         logger.log_call(command)
-        command = ['mkdir', '/home/%s/.ssh'%name]
+        command = ['mkdir', '/home/{}/.ssh'.format(name)]
         logger.log_call(command)
 
         # Create PK pair keys to connect from the host to the guest without
         # password... maybe remove the need for authentication inside the
         # guest?
-        command = ['su', '-s', '/bin/bash', '-c', 'ssh-keygen -t rsa -N "" -f /home/%s/.ssh/id_rsa'%(name)]
+        command = ['su', '-s', '/bin/bash', '-c',
+                   'ssh-keygen -t rsa -N "" -f /home/{}/.ssh/id_rsa'.format(name)]
         logger.log_call(command)
 
-        command = ['chown', '-R', '%s.slices'%name, '/home/%s/.ssh'%name]
+        command = ['chown', '-R', '{}:slices'.format(name), '/home/{}/.ssh'.format(name)]
         logger.log_call(command)
 
-        command = ['mkdir', '%s/root/.ssh'%containerDir]
+        command = ['mkdir', '{}/root/.ssh'.format(containerDir)]
         logger.log_call(command)
 
-        command = ['cp', '/home/%s/.ssh/id_rsa.pub'%name, '%s/root/.ssh/authorized_keys'%containerDir]
+        command = ['cp', '/home/{}/.ssh/id_rsa.pub'.format(name),
+                   '{}/root/.ssh/authorized_keys'.format(containerDir)]
         logger.log_call(command)
 
-        logger.log("creating /etc/slicename file in %s" % os.path.join(containerDir,'etc/slicename'))
+        logger.log("creating /etc/slicename file in {}".format(os.path.join(containerDir, 'etc/slicename')))
         try:
-            file(os.path.join(containerDir,'etc/slicename'), 'w').write(name)
+            with open(os.path.join(containerDir, 'etc/slicename'), 'w') as f:
+                f.write(name)
         except:
             logger.log_exc("exception while creating /etc/slicename")
 
         try:
-            file(os.path.join(containerDir,'etc/slicefamily'), 'w').write(vref)
+            with open(os.path.join(containerDir, 'etc/slicefamily'), 'w') as f:
+                f.write(vref)
         except:
             logger.log_exc("exception while creating /etc/slicefamily")
 
@@ -183,43 +214,50 @@ class Sliver_LXC(Sliver_Libvirt, Initscript):
             logger.log_exc("exception while getting user id")
 
         if uid is not None:
-            logger.log("uid is %d" % uid)
-            command = ['mkdir', '%s/home/%s' % (containerDir, name)]
+            logger.log("uid is {}".format(uid))
+            command = ['mkdir', '{}/home/{}'.format(containerDir, name)]
             logger.log_call(command)
-            command = ['chown', name, '%s/home/%s' % (containerDir, name)]
+            command = ['chown', name, '{}/home/{}'.format(containerDir, name)]
             logger.log_call(command)
             etcpasswd = os.path.join(containerDir, 'etc/passwd')
             etcgroup = os.path.join(containerDir, 'etc/group')
             if os.path.exists(etcpasswd):
                 # create all accounts with gid=1001 - i.e. 'slices' like it is in the root context
-                slices_gid=1001
-                logger.log("adding user %(name)s id %(uid)d gid %(slices_gid)d to %(etcpasswd)s" % (locals()))
+                slices_gid = 1001
+                logger.log("adding user {name} id {uid} gid {slices_gid} to {etcpasswd}"
+                           .format(**(locals())))
                 try:
-                    file(etcpasswd,'a').write("%(name)s:x:%(uid)d:%(slices_gid)d::/home/%(name)s:/bin/bash\n" % locals())
+                    with open(etcpasswd, 'a') as passwdfile:
+                        passwdfile.write("{name}:x:{uid}:{slices_gid}::/home/{name}:/bin/bash\n"
+                                         .format(**locals()))
                 except:
-                    logger.log_exc("exception while updating %s"%etcpasswd)
-                logger.log("adding group slices with gid %(slices_gid)d to %(etcgroup)s"%locals())
+                    logger.log_exc("exception while updating {}".format(etcpasswd))
+                logger.log("adding group slices with gid {slices_gid} to {etcgroup}"
+                           .format(**locals()))
                 try:
-                    file(etcgroup,'a').write("slices:x:%(slices_gid)d\n"%locals())
+                    with open(etcgroup, 'a') as groupfile:
+                        groupfile.write("slices:x:{slices_gid}\n"
+                                        .format(**locals()))
                 except:
-                    logger.log_exc("exception while updating %s"%etcgroup)
+                    logger.log_exc("exception while updating {}".format(etcgroup))
             sudoers = os.path.join(containerDir, 'etc/sudoers')
             if os.path.exists(sudoers):
                 try:
-                    file(sudoers,'a').write("%s ALL=(ALL) NOPASSWD: ALL\n" % name)
+                    with open(sudoers, 'a') as f:
+                        f.write("{} ALL=(ALL) NOPASSWD: ALL\n".format(name))
                 except:
                     logger.log_exc("exception while updating /etc/sudoers")
 
         # customizations for the user environment - root or slice uid
-        # we save the whole business in /etc/planetlab.profile 
+        # we save the whole business in /etc/planetlab.profile
         # and source this file for both root and the slice uid's .profile
         # prompt for slice owner, + LD_PRELOAD for transparently wrap bind
-        pl_profile=os.path.join(containerDir,"etc/planetlab.profile")
-        ld_preload_text="""# by default, we define this setting so that calls to bind(2),
+        pl_profile = os.path.join(containerDir, "etc/planetlab.profile")
+        ld_preload_text = """# by default, we define this setting so that calls to bind(2),
 # when invoked on 0.0.0.0, get transparently redirected to the public interface of this node
 # see https://svn.planet-lab.org/wiki/LxcPortForwarding"""
-        usrmove_path_text="""# VM's before Features/UsrMove need /bin and /sbin in their PATH"""
-        usrmove_path_code="""
+        usrmove_path_text = """# VM's before Features/UsrMove need /bin and /sbin in their PATH"""
+        usrmove_path_code = """
 pathmunge () {
         if ! echo $PATH | /bin/egrep -q "(^|:)$1($|:)" ; then
            if [ "$2" = "after" ] ; then
@@ -233,31 +271,34 @@ pathmunge /bin after
 pathmunge /sbin after
 unset pathmunge
 """
-        with open(pl_profile,'w') as f:
-            f.write("export PS1='%s@\H \$ '\n"%(name))
-            f.write("%s\n"%ld_preload_text)
+        with open(pl_profile, 'w') as f:
+            f.write("export PS1='{}@\H \$ '\n".format(name))
+            f.write("{}\n".format(ld_preload_text))
             f.write("export LD_PRELOAD=/etc/planetlab/lib/bind_public.so\n")
-            f.write("%s\n"%usrmove_path_text)
-            f.write("%s\n"%usrmove_path_code)
+            f.write("{}\n".format(usrmove_path_text))
+            f.write("{}\n".format(usrmove_path_code))
 
         # make sure this file is sourced from both root's and slice's .profile
         enforced_line = "[ -f /etc/planetlab.profile ] && source /etc/planetlab.profile\n"
-        for path in [ 'root/.profile', 'home/%s/.profile'%name ]:
-            from_root=os.path.join(containerDir,path)
+        for path in [ 'root/.profile', 'home/{}/.profile'.format(name) ]:
+            from_root = os.path.join(containerDir, path)
             # if dir is not yet existing let's forget it for now
             if not os.path.isdir(os.path.dirname(from_root)): continue
-            found=False
-            try: 
-                contents=file(from_root).readlines()
-                for content in contents:
-                    if content==enforced_line: found=True
-            except IOError: pass
+            found = False
+            try:
+                with open(from_root) as f:
+                    contents = f.readlines()
+                    for content in contents:
+                        if content == enforced_line:
+                            found = True
+            except IOError:
+                pass
             if not found:
-                with open(from_root,"a") as user_profile:
+                with open(from_root, "a") as user_profile:
                     user_profile.write(enforced_line)
                 # in case we create the slice's .profile when writing
-                if from_root.find("/home")>=0:
-                    command=['chown','%s:slices'%name,from_root]
+                if from_root.find("/home") >= 0:
+                    command = ['chown', '{}:slices'.format(name), from_root]
                     logger.log_call(command)
 
         # Lookup for xid and create template after the user is created so we
@@ -265,12 +306,12 @@ unset pathmunge
         xid = bwlimit.get_xid(name)
 
         # Template for libvirt sliver configuration
-        template_filename_sliceimage = os.path.join(Sliver_LXC.REF_IMG_BASE_DIR,'lxc_template.xml')
-        if os.path.isfile (template_filename_sliceimage):
-            logger.log("WARNING: using compat template %s"%template_filename_sliceimage)
-            template_filename=template_filename_sliceimage
+        template_filename_sliceimage = os.path.join(Sliver_LXC.REF_IMG_BASE_DIR, 'lxc_template.xml')
+        if os.path.isfile(template_filename_sliceimage):
+            logger.verbose("Using XML template {}".format(template_filename_sliceimage))
+            template_filename = template_filename_sliceimage
         else:
-            logger.log("Cannot find XML template %s"%template_filename_sliceimage)
+            logger.log("Cannot find XML template {}".format(template_filename_sliceimage))
             return
 
         interfaces = Sliver_Libvirt.get_interfaces_xml(rec)
@@ -280,7 +321,7 @@ unset pathmunge
                 template = Template(f.read())
                 xml  = template.substitute(name=name, xid=xid, interfaces=interfaces, arch=arch)
         except IOError:
-            logger.log('Failed to parse or use XML template file %s'%template_filename)
+            logger.log('Failed to parse or use XML template file {}'.format(template_filename))
             return
 
         # Lookup for the sliver before actually
@@ -289,23 +330,23 @@ unset pathmunge
             dom = conn.lookupByName(name)
         except:
             dom = conn.defineXML(xml)
-        logger.verbose('lxc_create: %s -> %s'%(name, Sliver_Libvirt.dom_details(dom)))
+        logger.verbose('lxc_create: {} -> {}'.format(name, Sliver_Libvirt.dom_details(dom)))
 
 
     @staticmethod
     def destroy(name):
         # umount .ssh directory - only if mounted
         Account.umount_ssh_dir(name)
-        logger.verbose ('sliver_lxc: %s destroy'%(name))
+        logger.verbose ('sliver_lxc: {} destroy'.format(name))
         conn = Sliver_Libvirt.getConnection(Sliver_LXC.TYPE)
 
-        containerDir = Sliver_LXC.CON_BASE_DIR + '/%s'%(name)
+        containerDir = os.path.join(Sliver_LXC.CON_BASE_DIR, name)
 
         try:
             # Destroy libvirt domain
             dom = conn.lookupByName(name)
         except:
-            logger.verbose('sliver_lxc.destroy: Domain %s does not exist!' % name)
+            logger.verbose('sliver_lxc.destroy: Domain {} does not exist!'.format(name))
             return
 
         # Slivers with vsys running will fail the subvolume delete
@@ -313,25 +354,38 @@ unset pathmunge
         vsys_stopped = removeSliverFromVsys (name)
 
         try:
-            logger.log("sliver_lxc.destroy: destroying domain %s"%name)
+            logger.log("sliver_lxc.destroy: destroying domain {}".format(name))
             dom.destroy()
         except:
-            logger.verbose('sliver_lxc.destroy: Domain %s not running... continuing.' % name)
+            logger.verbose("sliver_lxc.destroy: Domain {} not running... continuing.".format(name))
 
         try:
-            logger.log("sliver_lxc.destroy: undefining domain %s"%name)
+            logger.log("sliver_lxc.destroy: undefining domain {}".format(name))
             dom.undefine()
         except:
-            logger.verbose('sliver_lxc.destroy: Domain %s is not defined... continuing.' % name)
+            logger.verbose('sliver_lxc.destroy: Domain {} is not defined... continuing.'.format(name))
 
         # Remove user after destroy domain to force logout
         command = ['/usr/sbin/userdel', '-f', '-r', name]
         logger.log_call(command)
 
+        # Remove rootfs of destroyed domain
+        command = ['/usr/bin/rm', '-rf', containerDir]
+        logger.log_call(command, timeout=BTRFS_TIMEOUT)
+
+        # ???
+        logger.log("-TMP-ls-l {}".format(name))
+        command = ['ls', '-lR', containerDir]
+        logger.log_call(command)
+        logger.log("-TMP-vsys-status")
+        command = ['/usr/bin/systemctl', 'status', 'vsys']
+        logger.log_call(command)
+        # ???
+
         # Remove rootfs of destroyed domain
         command = ['btrfs', 'subvolume', 'delete', containerDir]
         logger.log_call(command, timeout=BTRFS_TIMEOUT)
-        
+
         # For some reason I am seeing this :
         #log_call: running command btrfs subvolume delete /vservers/inri_sl1
         #log_call: ERROR: cannot delete '/vservers/inri_sl1' - Device or resource busy
@@ -345,18 +399,22 @@ unset pathmunge
         # also lsof never shows anything relevant; this is painful..
 
         if not os.path.exists(containerDir):
-            logger.log('sliver_lxc.destroy: %s cleanly destroyed.'%name)
+            logger.log('sliver_lxc.destroy: {} cleanly destroyed.'.format(name))
         else:
             # we're in /
-            #logger.log("-TMP-cwd %s : %s"%(name,os.getcwd()))
+            #logger.log("-TMP-cwd {} : {}".format(name, os.getcwd()))
             # also lsof never shows anything relevant; this is painful..
-            #logger.log("-TMP-lsof %s"%name)
-            #command=['lsof']
+            #logger.log("-TMP-lsof {}".format(name))
+            #command = ['lsof']
             #logger.log_call(command)
-            logger.log("-TMP-ls-l %s"%name)
-            command = ['ls', '-l', containerDir]
+            logger.log("-TMP-ls-l {}".format(name))
+            command = ['ls', '-lR', containerDir]
+            logger.log_call(command)
+            logger.log("-TMP-lsof")
+            command = ['lsof']
             logger.log_call(command)
             if os.path.exists(containerDir):
-                logger.log('sliver_lxc.destroy: ERROR could not cleanly destroy %s - giving up'%name)
+                logger.log('sliver_lxc.destroy: ERROR could not cleanly destroy {} - giving up'.format(name))
 
-        if vsys_stopped: vsysStartService()
+        if vsys_stopped:
+            vsysStartService()