define a default for 'arch' as I find it sometimes undefined
[lxc-userspace.git] / lxcsu
diff --git a/lxcsu b/lxcsu
index eceea86..97600cf 100644 (file)
--- a/lxcsu
+++ b/lxcsu
@@ -5,74 +5,149 @@ import setns
 import os
 import sys
 
-#from optparse import OptionParser
+from optparse import OptionParser
 
-drop_capabilities='cap_sys_admin,cap_sys_boot,cap_sys_module'.split(',')
+drop_capabilities='cap_sys_admin,cap_sys_boot,cap_sys_module'
+
+def getarch(f):
+    output = os.popen('readelf -h %s 2>&1'%f).readlines()
+    classlines = [x for x in output if ('Class' in x.split(':')[0])]
+    line = classlines[0]
+    c = line.split(':')[1]
+    if ('ELF64' in c):
+        return 'x86_64'
+    elif ('ELF32' in c):
+        return 'i686'
+    else:
+        raise Exception('Could not determine architecture')
+
+def umount(fs_dir):
+    output = os.popen('/bin/umount %s 2>&1'%fs_dir).read()
+    return ('device is busy' not in fs_dir)
 
-"""
 parser = OptionParser()
-parser.add_option("-n", "--net",
+parser.add_option("-n", "--nonet",
                   action="store_true", dest="netns", default=False,
-                  help="Enter network namespace")
-parser.add_option("-m", "--mnt",
+                  help="Don't enter network namespace")
+parser.add_option("-m", "--nomnt",
                   action="store_true", dest="mntns", default=False,
-                  help="Enter mount namespace")
-parser.add_option("-p", "--pid",
+                  help="Don't enter mount namespace")
+parser.add_option("-p", "--nopid",
                   action="store_true", dest="pidns", default=False,
-                  help="Enter pid namespace")
+                  help="Don't enter pid namespace")
+parser.add_option("-r", "--root",
+                  action="store_true", dest="root", default=False,
+                  help="Enter as root: be careful")
 
 (options, args) = parser.parse_args()
-"""
-args = sys.argv[1:]
 
 try:
-       slice_name = args[0]
+    slice_name = args[0]
 except IndexError:
-       print "You must specify a vm name"
-       exit(1)
+    print "You must specify a vm name"
+    exit(1)
 
 try:
-       cmd = 'grep %s /proc/*/cgroup | grep freezer'%slice_name
-       output = os.popen(cmd).readlines()
+    cmd = 'grep %s /proc/*/cgroup | grep freezer'%slice_name
+    output = os.popen(cmd).readlines()
 except:
-       print "Error finding slice %s"%slice_name
-       exit(1)
+    print "Error finding slice %s"%slice_name
+    exit(1)
 
 slice_spec = None
+
+# xxx fixme xxx - provide a default as this is not always properly computed
+arch = 'x86_64'
+
 for e in output:
-       try:
-               l = e.rstrip()
-               path = l.split(':')[0]  
-               comp = l.rsplit(':')[-1]
-               slice_name_check = comp.rsplit('/')[-1]
-
-               if (slice_name_check == slice_name):
-                       slice_path = path
-                       pid = slice_path.split('/')[2]
-                       cmdline = open('/proc/%s/cmdline'%pid).read().rstrip('\n\x00')
-                       if (cmdline == '/sbin/init'):
-                               slice_spec = slice_path
-                               break
-       except:
-               break
+    try:
+        l = e.rstrip()
+        path = l.split(':')[0]  
+        comp = l.rsplit(':')[-1]
+        slice_name_check = comp.rsplit('/')[-1]
+
+        if (slice_name_check == slice_name):
+            slice_path = path
+            pid = slice_path.split('/')[2]
+            cmdline = open('/proc/%s/cmdline'%pid).read().rstrip('\n\x00')
+            if (cmdline == '/sbin/init'):
+                slice_spec = slice_path
+                arch = getarch('/proc/%s/exe'%pid)
+                break
+    except:
+        break
 
 if (not slice_spec or not pid):
     print "Not started: %s"%slice_name
     exit(1)
 
-r1 = setns.chcontext('/proc/%s/ns/utc'%pid)
-r2 = setns.chcontext('/proc/%s/ns/ipc'%pid)
-path = '/proc/%s/ns/net'%pid
-r3 = setns.chcontext(path)
+# Enter cgroups
+try:
+    for subsystem in ['cpuset','memory','blkio']:
+        open('/sys/fs/cgroup/%s/libvirt/lxc/%s/tasks'%(subsystem,slice_name),'w').write(str(os.getpid()))
+
+except:
+    print "Error assigning resources: %s"%slice_name
+    exit(1)
+
+try:
+    open('/sys/fs/cgroup/cpuacct/system/libvirtd.service/libvirt/lxc/%s/tasks'%slice_name,'w').write(str(os.getpid()))
+except:
+    print "Error assigning cpuacct: %s" % slice_name
+    exit(1)
+
+# If the slice is frozen, then we'll get an EBUSY when trying to write to the task
+# list for the freezer cgroup. Since the user couldn't do anything anyway, it's best
+# in this case to error out the shell. (an alternative would be to un-freeze it,
+# add the task, and re-freeze it)
+try:
+    f=open('/sys/fs/cgroup/freezer/libvirt/lxc/%s/tasks'%(slice_name),'w')
+    f.write(str(os.getpid()))
+    # note: we need to call f.close() explicitly, or we'll get an exception in
+    # the object destructor, which will not be caught
+    f.close()
+except:
+    print "Error adding task to freezer cgroup. Slice is probably frozen: %s" % slice_name
+    exit(1)
+
+setns.chcontext('/proc/%s/ns/uts'%pid)
+setns.chcontext('/proc/%s/ns/ipc'%pid)
+
+if (not options.netns):
+  setns.chcontext('/proc/%s/ns/net'%pid)
 
-open('/proc/lxcsu','w').write(pid)
-open('/proc/pidsu','w').write(pid)
+if (not options.mntns):
+  open('/proc/lxcsu','w').write(pid)
+
+if (not options.pidns):
+  open('/proc/pidsu','w').write(pid)
+
+# cgroups is not yet LXC-safe, so we need to use the course grained access control
+# strategy of unmounting the filesystem
+
+umount_result = True
+for subsystem in ['cpuset','cpu,cpuacct','memory','devices','freezer','net_cls','blkio','perf_event']:
+    fs_path = '/sys/fs/cgroup/%s'%subsystem
+    if (not umount(fs_path)):
+        print "Error disabling cgroup access"
+        exit(1)
+
+if (not umount('/sys/fs/cgroup')):
+    print "Error disabling cgroup access"
+    exit(1)
 
 pid = os.fork()
 
-cap_args = map(lambda c:'--drop='+c, drop_capabilities)
-    
 if (pid == 0):
-    os.execv('/usr/sbin/capsh',cap_args+['--']+args[1:])
+    cap_arg = '--drop='+drop_capabilities
+
+    if (not options.root):
+      exec_args = [arch,'/usr/sbin/capsh',cap_arg,'--','--login']+args[1:]
+    else:
+      exec_args = [arch,'/usr/sbin/capsh','--','--login']+args[1:]
+
+    os.environ['SHELL'] = '/bin/sh'
+    os.execv('/usr/bin/setarch',exec_args)
 else:
-    os.waitpid(pid,0)
+    _,status = os.waitpid(pid,0)
+    exit(os.WEXITSTATUS(status))