From a6e1134d8bdb41b0146cdac83e4e82909e74cb47 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Bar=C4=B1=C5=9F=20Metin?= Date: Mon, 10 May 2010 19:24:34 +0000 Subject: [PATCH] merge trellis branch to trunk. 2.6.22 kernels shall use 0.3 branch and 2.6.27 needs 0.4 (trunk). --- Makefile.am | 6 +- python/vserver.py | 197 ++++------------------ python/vserverimpl.c | 115 +------------ scripts/vsh | 6 + scripts/vuseradd | 16 +- src/netns.c | 23 ++- src/planetlab.c | 82 ++++------ src/planetlab.h | 9 +- src/vsh.c | 381 ++++++++----------------------------------- util-vserver-pl.spec | 2 +- 10 files changed, 178 insertions(+), 659 deletions(-) create mode 100755 scripts/vsh diff --git a/Makefile.am b/Makefile.am index bcc0c23..4cb1e63 100644 --- a/Makefile.am +++ b/Makefile.am @@ -25,8 +25,7 @@ python_vserverimpl_la_LDFLAGS = -module -avoid-version src_vip6_autod_SOURCES = src/vip6-autod.c src_vip6_autod_LDADD = -lvserver -lnl -src_vsh_SOURCES = src/vsh.c src/planetlab.c src/netns.c -src_vsh_LDADD = -lvserver +src_vsh_SOURCES = src/vsh.c noinst_HEADERS = src/planetlab.h @@ -43,7 +42,8 @@ sbin_SCRIPTS = python/bwlimit \ scripts/vuseradd \ scripts/vuserdel -sbin_PROGRAMS = src/vsh +sbin_PROGRAMS = src/vsh + sysv_SCRIPTS = if ENSC_HAVE_LIBNL diff --git a/python/vserver.py b/python/vserver.py index 44341a6..55d78f0 100644 --- a/python/vserver.py +++ b/python/vserver.py @@ -12,6 +12,7 @@ import sys import time import traceback import subprocess +import commands import resource import vserverimpl @@ -41,6 +42,8 @@ RLIMITS = { "NSOCK": VLIMIT_NSOCK, "ANON": VLIMIT_ANON, "SHMEM": VLIMIT_SHMEM} +CPU_SHARE_MULT = 1024 + # add in the platform supported rlimits for entry in resource.__dict__.keys(): if entry.find("RLIMIT_")==0: @@ -52,7 +55,6 @@ for entry in resource.__dict__.keys(): class NoSuchVServer(Exception): pass - class VServerConfig: def __init__(self, name, directory): self.name = name @@ -129,26 +131,6 @@ class VServerConfig: os.path.walk(self.dir, add_to_cache, self.cache) -def adjust_lim(goal, curr): - gh = goal[0] - gs = goal[1] - gm = goal[2] - soft = curr[0] - hard = curr[1] - if gm != VC_LIM_KEEP: - if gm > soft or gm == resource.RLIM_INFINITY: - soft = gm - if gm > hard or gm == resource.RLIM_INFINITY: - hard = gm - if gs != VC_LIM_KEEP: - if gs > soft or gs == resource.RLIM_INFINITY: - soft = gs - if gh != VC_LIM_KEEP: - if gh > hard or gh == resource.RLIM_INFINITY: - hard = gh - return (soft, hard) - - class VServer: INITSCRIPTS = [('/etc/rc.vinit', 'start'), @@ -162,7 +144,7 @@ class VServer: os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)): raise NoSuchVServer, "no such vserver: " + name self.config = VServerConfig(name, "/etc/vservers/%s" % name) - self.remove_caps = ~vserverimpl.CAP_SAFE; + #self.remove_caps = ~vserverimpl.CAP_SAFE; if vm_id == None: vm_id = int(self.config.get('context')) self.ctx = vm_id @@ -201,10 +183,6 @@ class VServer: resource_type = RLIMITS[type] try: vserverimpl.setrlimit(self.ctx, resource_type, hard, soft, min) - if hasattr(resource, 'RLIMIT_' + type): - lim = resource.getrlimit(resource_type) - lim = adjust_lim((hard, soft, min), lim) - resource.setrlimit(resource_type, lim) except OSError, e: self.log("Error: setrlimit(%d, %s, %d, %d, %d): %s" % (self.ctx, type.lower(), hard, soft, min, e)) @@ -224,6 +202,9 @@ class VServer: def set_capabilities_config(self, capabilities): bcaps = self.get_bcaps_from_capabilities(capabilities) ccaps = self.get_ccaps_from_capabilities(capabilities) + if len(bcaps) > 0: + bcaps += "," + bcaps += "CAP_NET_RAW" self.config.update('bcapabilities', bcaps) self.config.update('ccapabilities', ccaps) ret = vserverimpl.setbcaps(self.ctx, vserverimpl.text2bcaps(bcaps)) @@ -251,6 +232,7 @@ class VServer: vserverimpl.netadd(self.ctx, a) def set_ipaddresses_config(self, addresses): + return # acb i = 0 for a in addresses.split(","): self.config.update("interfaces/%d/ip" % i, a) @@ -272,6 +254,7 @@ class VServer: def get_ipaddresses(self): # No clean way to do this right now. + self.log("Calling Vserver.get_ipaddresses for slice %s" % self.name) return None def __do_chroot(self): @@ -279,7 +262,6 @@ class VServer: os.chdir("/") def chroot_call(self, fn, *args): - cwd_fd = os.open(".", os.O_RDONLY) try: root_fd = os.open("/", os.O_RDONLY) @@ -301,7 +283,7 @@ class VServer: try: vserverimpl.unsetdlimit(self.dir, self.ctx) except OSError, e: - self.log("Unexpected error with unsetdlimit for context %d: %r" % (self.ctx,e)) + self.log("Unexpected error with unsetdlimit for context %d" % self.ctx) return if self.vm_running: @@ -321,16 +303,15 @@ class VServer: vserverimpl.DLIMIT_INF, # inode limit 2) # %age reserved for root except OSError, e: - self.log("Unexpected error with setdlimit for context %d: %r" % (self.ctx, e)) - + self.log("Unexpected error with setdlimit for context %d" % self.ctx) self.config.update('dlimits/0/space_total', block_limit) def is_running(self): - return vserverimpl.isrunning(self.ctx) + status = subprocess.call(["/usr/sbin/vserver", self.name, "running"], shell=False) + return not status def get_disklimit(self): - try: (self.disk_blocks, block_limit, self.disk_inodes, inode_limit, reserved) = vserverimpl.getdlimit(self.dir, self.ctx) @@ -343,26 +324,28 @@ class VServer: return block_limit def set_sched_config(self, cpu_min, cpu_share): - """ Write current CPU scheduler parameters to the vserver - configuration file. This method does not modify the kernel CPU - scheduling parameters for this context. """ - - self.config.update('sched/fill-rate', cpu_min) - self.config.update('sched/fill-rate2', cpu_share) - if cpu_share == 0: - self.config.unset('sched/idle-time') - + configuration file. Currently, 'cpu_min' is not supported. """ + self.config.update('cgroup/cpu.shares', cpu_share * CPU_SHARE_MULT) if self.is_running(): self.set_sched(cpu_min, cpu_share) def set_sched(self, cpu_min, cpu_share): - """ Update kernel CPU scheduling parameters for this context. """ - vserverimpl.setsched(self.ctx, cpu_min, cpu_share) + """ Update kernel CPU scheduling parameters for this context. + Currently, 'cpu_min' is not supported. """ + try: + cgroup = open('/dev/cgroup/%s/cpu.shares' % name, 'w') + cgroup.write('%s' % (cpu_share * CPU_SHARE_MULT)) + cgroup.close() + except: + pass def get_sched(self): - # have no way of querying scheduler right now on a per vserver basis - return (-1, False) + try: + cpu_share = int(int(self.config.get('cgroup/cpu.shares')) / CPU_SHARE_MULT) + except: + cpu_share = False + return (-1, cpu_share) def set_bwlimit(self, minrate = bwlimit.bwmin, maxrate = None, exempt_min = None, exempt_max = None, @@ -386,71 +369,10 @@ class VServer: return self.chroot_call(open, filename, mode, bufsize) - def __do_chcontext(self, state_file): - - if state_file: - print >>state_file, "%u" % self.ctx - state_file.close() - - if vserverimpl.chcontext(self.ctx, vserverimpl.text2bcaps(self.get_capabilities_config())): - self.set_resources(True) - vserverimpl.setup_done(self.ctx) - - - def __prep(self, runlevel): - - """ Perform all the crap that the vserver script does before - actually executing the startup scripts. """ - - - # set the initial runlevel - vserverimpl.setrunlevel(self.dir + "/var/run/utmp", runlevel) - - # mount /proc and /dev/pts - self.__do_mount("none", self.dir, "/proc", "proc") - # XXX - magic mount options - self.__do_mount("none", self.dir, "/dev/pts", "devpts", 0, "gid=5,mode=0620") - - - def __cleanvar(self): - """ - Clean the /var/ directory so RH startup scripts can run - """ - - RUNDIR = "/var/run" - LOCKDIR = "/var/lock/subsys" - - filter = ["utmp"] - garbage = [] - for topdir in [RUNDIR, LOCKDIR]: - #os.walk() = (dirpath, dirnames, filenames) - for root, dirs, files in os.walk(topdir): - for file in files: - if not file in filter: - garbage.append(root + "/" + file) - - for f in garbage: os.unlink(f) - return garbage - - - def __do_mount(self, *mount_args): - try: - vserverimpl.mount(*mount_args) - except OSError, ex: - if ex.errno == errno.EBUSY: - # assume already mounted - return - raise ex - - def enter(self): - self.config.cache_it() - self.__do_chroot() - self.__do_chcontext(None) - + subprocess.call("/usr/sbin/vserver %s enter" % self.name, shell=True) def start(self, runlevel = 3): - if (os.fork() != 0): # Parent should just return. self.vm_running = True @@ -458,63 +380,14 @@ class VServer: else: # child process try: - # so we don't chcontext with priv'ed fds - close_nonstandard_fds() - - # get a new session - os.setsid() - - # open state file to record vserver info - state_file = open("/var/run/vservers/%s" % self.name, "w") - - # use /dev/null for stdin, /var/log/nm for stdout/err - fd = os.open("/dev/null", os.O_RDONLY) - if fd != 0: - os.dup2(fd, 0) - os.close(fd) - - # perform pre-init cleanup - self.__prep(runlevel) - - self.config.cache_it() - self.__do_chroot() - if not self.is_running(): - removed = self.__cleanvar() - else: - removed = 0 - - log = open("/var/log/nm", "a", 0) - if log.fileno() != 1: - os.dup2(log.fileno(), 1) - os.dup2(1, 2) - - print >>log, ("%s: removing %s" % - (time.asctime(time.gmtime()), removed)) - print >>log, ("%s: starting the virtual server %s" % - (time.asctime(time.gmtime()), self.name)) - # execute each init script in turn - # XXX - we don't support all scripts that vserver script does - self.__do_chcontext(state_file) - for cmd in self.INITSCRIPTS: - try: - # enter vserver context - arg_subst = { 'runlevel': runlevel } - cmd_args = [cmd[0]] + map(lambda x: x % arg_subst, - cmd[1:]) - if os.path.isfile(cmd[0]): - print >>log, "executing '%s'" % " ".join(cmd_args) - os.spawnvp(os.P_NOWAIT,cmd[0],cmd_args) - else: - print >>log, "WARNING: could not run %s"%cmd[0] - except: - print >>log, traceback.format_exc() - + subprocess.call("/usr/sbin/vserver %s start" % self.name, + shell=True) # we get here due to an exception in the top-level child process except Exception, ex: self.log(traceback.format_exc()) os._exit(0) - def set_resources(self,setup=False): + def set_resources(self): """ Called when vserver context is entered for first time, should be overridden by subclass. """ @@ -544,17 +417,15 @@ class VServer: (space, inodes) = line.split() self.disk_inodes = int(inodes) self.disk_blocks = int(space) - #(self.disk_inodes, self.disk_blocks) = vduimpl.vdu(self.dir) return self.disk_blocks * 1024 def stop(self, signal = signal.SIGKILL): - vserverimpl.killall(self.ctx, signal) self.vm_running = False + subprocess.call("/usr/sbin/vserver %s stop" % self.name, shell=True) def setname(self, slice_id): - '''Set vcVHI_CONTEXT field in kernel to slice_id''' - vserverimpl.setname(self.ctx, slice_id) + pass def getname(self): '''Get vcVHI_CONTEXT field in kernel''' diff --git a/python/vserverimpl.c b/python/vserverimpl.c index b9a06aa..2232f73 100644 --- a/python/vserverimpl.c +++ b/python/vserverimpl.c @@ -62,60 +62,6 @@ static inline PyObject *inc_and_ret_none(void) #define PL_INSECURE_BCAPS (vc_get_insecurebcaps() | (1 << VC_CAP_NET_BIND_SERVICE)) #define PL_INSECURE_CCAPS vc_get_insecureccaps() -/* - * context create - */ -static PyObject * -vserver_chcontext(PyObject *self, PyObject *args) -{ - int ctx_is_new; - xid_t ctx; - uint_least64_t bcaps = 0; - - if (!PyArg_ParseTuple(args, "I|K", &ctx, &bcaps)) - return NULL; - bcaps |= ~PL_INSECURE_BCAPS; - - if ((ctx_is_new = pl_chcontext(ctx, bcaps, 0)) < 0) - return PyErr_SetFromErrno(PyExc_OSError); - - return PyBool_FromLong(ctx_is_new); -} - -static PyObject * -vserver_setup_done(PyObject *self, PyObject *args) -{ - xid_t ctx; - - if (!PyArg_ParseTuple(args, "I", &ctx)) - return NULL; - - if (pl_setup_done(ctx) < 0) - return PyErr_SetFromErrno(PyExc_OSError); - - return NONE; -} - -static PyObject * -vserver_isrunning(PyObject *self, PyObject *args) -{ - xid_t ctx; - PyObject *ret; - struct stat statbuf; - char fname[64]; - - if (!PyArg_ParseTuple(args, "I", &ctx)) - return NULL; - - sprintf(fname,"/proc/virtual/%d", ctx); - - if(stat(&fname[0],&statbuf)==0) - ret = PyBool_FromLong(1); - else - ret = PyBool_FromLong(0); - - return ret; -} static PyObject * __vserver_get_rlimit(xid_t xid, int resource) { @@ -152,7 +98,7 @@ vserver_set_rlimit(PyObject *self, PyObject *args) { uint32_t bitmask; xid_t xid; int resource; - PyObject *ret; + PyObject *ret = NULL; limits.min = VC_LIM_KEEP; limits.soft = VC_LIM_KEEP; @@ -177,26 +123,6 @@ vserver_set_rlimit(PyObject *self, PyObject *args) { return ret; } -/* - * setsched - */ -static PyObject * -vserver_setsched(PyObject *self, PyObject *args) -{ - xid_t ctx; - uint32_t cpu_min; - uint32_t cpu_share; - - if (!PyArg_ParseTuple(args, "II|I", &ctx, &cpu_min, &cpu_share)) - return NULL; - - /* ESRCH indicates that there are no processes in the context */ - if (pl_setsched(ctx, cpu_min, cpu_share) && - errno != ESRCH) - return PyErr_SetFromErrno(PyExc_OSError); - - return NONE; -} static PyObject * vserver_get_dlimit(PyObject *self, PyObject *args) @@ -266,34 +192,6 @@ vserver_unset_dlimit(PyObject *self, PyObject *args) return NONE; } -static PyObject * -vserver_killall(PyObject *self, PyObject *args) -{ - xid_t ctx; - int sig; - struct vc_ctx_flags cflags = { - .flagword = 0, - .mask = VC_VXF_PERSISTENT - }; - struct vc_net_flags nflags = { - .flagword = 0, - .mask = VC_NXF_PERSISTENT - }; - - if (!PyArg_ParseTuple(args, "Ii", &ctx, &sig)) - return NULL; - - if (vc_ctx_kill(ctx, 0, sig) && errno != ESRCH) - return PyErr_SetFromErrno(PyExc_OSError); - - if (vc_set_cflags(ctx, &cflags) && errno != ESRCH) - return PyErr_SetFromErrno(PyExc_OSError); - - if (vc_set_nflags(ctx, &nflags) && errno != ESRCH) - return PyErr_SetFromErrno(PyExc_OSError); - - return NONE; -} static PyObject * vserver_set_bcaps(PyObject *self, PyObject *args) @@ -775,13 +673,8 @@ vserver_get_name(PyObject *self, PyObject *args) return ret; } + static PyMethodDef methods[] = { - { "chcontext", vserver_chcontext, METH_VARARGS, - "chcontext to vserver with provided flags" }, - { "setup_done", vserver_setup_done, METH_VARARGS, - "Release vserver setup lock" }, - { "setsched", vserver_setsched, METH_VARARGS, - "Change vserver scheduling attributes for given vserver context" }, { "setdlimit", vserver_set_dlimit, METH_VARARGS, "Set disk limits for given vserver context" }, { "unsetdlimit", vserver_unset_dlimit, METH_VARARGS, @@ -792,10 +685,6 @@ static PyMethodDef methods[] = { "Set resource limits for given resource of a vserver context" }, { "getrlimit", vserver_get_rlimit, METH_VARARGS, "Get resource limits for given resource of a vserver context" }, - { "killall", vserver_killall, METH_VARARGS, - "Send signal to all processes in vserver context" }, - { "isrunning", vserver_isrunning, METH_VARARGS, - "Check if vserver is running"}, { "setbcaps", vserver_set_bcaps, METH_VARARGS, "Set POSIX capabilities of a vserver context" }, { "getbcaps", vserver_get_bcaps, METH_VARARGS, diff --git a/scripts/vsh b/scripts/vsh new file mode 100755 index 0000000..0099b38 --- /dev/null +++ b/scripts/vsh @@ -0,0 +1,6 @@ +#!/bin/sh + +SLICE=`whoami` +SLICEID=`id -u` + +/usr/sbin/vserver_suid_wrapper $SLICE suexec $SLICEID /bin/bash "$@" diff --git a/scripts/vuseradd b/scripts/vuseradd index f77d58b..b54aec9 100755 --- a/scripts/vuseradd +++ b/scripts/vuseradd @@ -78,20 +78,21 @@ if [ ! -d $__CONFDIR/$NAME ] ; then fi $_VSERVER $NAME build -m skeleton --context $USERID \ - --interface nodev:0.0.0.0/0 \ - --flags persistent,~info_init,sched_hard + --interface nodev:`hostname -i` \ + --interface nodev:127.0.0.1 \ + --flags persistent,~info_init RETVAL=$? DIR=$__CONFDIR/$NAME if [ $RETVAL -ne 0 ] ; then echo "Error $RETVAL building $DIR" rm -rf $DIR $__DEFAULT_VSERVERDIR/$NAME fi - mkdir -p $DIR/apps/init $DIR/rlimits $DIR/sched $DIR/dlimits/0 + mkdir -p $DIR/apps/init $DIR/rlimits $DIR/sched $DIR/dlimits/0 $DIR/sysctl/0 echo default > $DIR/apps/init/mark echo 1000 > $DIR/rlimits/nproc.hard # Set persistent for the network context - echo persistent > $DIR/nflags + echo persistent,lback_allow > $DIR/nflags # Set up the scheduler echo 100 > $DIR/sched/interval @@ -109,9 +110,16 @@ if [ ! -d $__CONFDIR/$NAME ] ; then echo -1 > $DIR/dlimits/0/inodes_total echo -1 > $DIR/dlimits/0/space_total + # Set up sysctl variables + echo net.ipv4.ip_forward > $DIR/sysctl/0/setting + echo 1 > $DIR/sysctl/0/value + # Disable mount namespaces touch $DIR/nonamespace + # Add spaces directory + mkdir -p $DIR/spaces + # Remove the basically empty guest directory rm -rf $__DEFAULT_VSERVERDIR/$NAME # Move the guest back diff --git a/src/netns.c b/src/netns.c index 8f0c598..cf91eff 100644 --- a/src/netns.c +++ b/src/netns.c @@ -14,26 +14,33 @@ #define SPACE_FILE "/spaces/net" #define VSERVERCONF "/etc/vservers/" -uint32_t -get_space_flag(xid_t xid) { +int +pl_unshare_netns(xid_t xid) { char *ctx_space_file, *space_name; struct passwd *slice_user; - uint32_t space_flag = 0; + int res = 0; + char buf[100]; + FILE *fb; slice_user = getpwuid(xid); if (!slice_user) - return 0; + return 0; ctx_space_file=(char *) malloc(sizeof(VSERVERCONF SPACE_FILE "Z")+strlen(slice_user->pw_name)); if (!ctx_space_file) - return 0; + return 0; sprintf(ctx_space_file,VSERVERCONF "%s" SPACE_FILE, slice_user->pw_name); - if (access(ctx_space_file, F_OK)==0) - space_flag |= CLONE_NEWNET; + if ((fb = fopen(ctx_space_file, "r")) == NULL) + return 0; + if (fgets(buf, sizeof(buf), fb) != NULL) { + res = atoi(buf); + } + + fclose(fb); free(ctx_space_file); - return space_flag; + return res; } diff --git a/src/planetlab.c b/src/planetlab.c index 73ccfc3..b931e81 100644 --- a/src/planetlab.c +++ b/src/planetlab.c @@ -43,7 +43,6 @@ POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #include #define _GNU_SOURCE #include @@ -51,9 +50,6 @@ POSSIBILITY OF SUCH DAMAGE. #include "vserver.h" #include "planetlab.h" -/* defined in netns.c */ -extern uint32_t get_space_flag(xid_t); - #ifndef VC_NXC_RAW_SOCKET # define VC_NXC_RAW_SOCKET 0x00000200ull #endif @@ -93,16 +89,19 @@ tag: if (vc_tag_create(ctx) == VC_NOCTX) return -1; +process: + /* * Create context info - this sets the STATE_SETUP and STATE_INIT flags. */ if (vc_ctx_create(ctx, 0) == VC_NOCTX) return -1; - if (unshare_flags != 0) { + if (unshare_flags != 0) { unshare(unshare_flags); unshare_flags |= vc_get_space_mask(); - vc_set_namespace(ctx, unshare_flags); + //printf("vc_set_namespace(%d, %X)\n", ctx, unshare_flags); + //vc_set_namespace(ctx, unshare_flags); } /* Set capabilities - these don't take effect until SETUP flag is unset */ @@ -140,11 +139,12 @@ pl_setup_done(xid_t ctx) #define RETRY_LIMIT 10 int -pl_chcontext(xid_t ctx, uint64_t bcaps, const struct sliver_resources *slr) +pl_chcontext(xid_t ctx, uint64_t bcaps, const struct sliver_resources *slr, + int unshare_netns) { int retry_count = 0; int net_migrated = 0; - + if (pl_set_ulimits(slr) != 0) return -1; @@ -154,12 +154,12 @@ pl_chcontext(xid_t ctx, uint64_t bcaps, const struct sliver_resources *slr) if (vc_get_cflags(ctx, &vc_flags)) { - uint32_t unshare_flags; + uint32_t unshare_flags; if (errno != ESRCH) return -1; - /* Unshare the net namespace if the slice if requested in the local slice configuration */ - unshare_flags = get_space_flag(ctx); + /* Always unshare the net namespace for a new context */ + unshare_flags = CLONE_NEWNET; /* context doesn't exist - create it */ if (create_context(ctx, bcaps, unshare_flags)) @@ -194,13 +194,16 @@ pl_chcontext(xid_t ctx, uint64_t bcaps, const struct sliver_resources *slr) migrate: if (net_migrated || !vc_net_migrate(ctx)) { - uint32_t unshare_flags; - /* Unshare the net namespace if the slice if requested in the local slice configuration */ - unshare_flags = get_space_flag(ctx); - if (unshare_flags != 0) { - unshare_flags |=vc_get_space_mask(); - vc_enter_namespace(ctx, unshare_flags); - } + uint32_t unshare_flags; + + /* Unshare the net namespace if requested in the slice config */ + unshare_flags = unshare_netns ? CLONE_NEWNET : 0; + + if (unshare_flags != 0) { + unshare_flags |=vc_get_space_mask(); + //printf("vc_enter_namespace(%d, %X)\n", ctx, unshare_flags); + //vc_enter_namespace(ctx, unshare_flags); + } if (!vc_tag_migrate(ctx) && !vc_ctx_migrate(ctx, 0)) break; /* done */ @@ -261,8 +264,8 @@ enum { }; struct pl_resources { - char *name; - unsigned type; + char *name; + unsigned type; union { unsigned long long *limit; unsigned long int *personality; @@ -280,8 +283,9 @@ pl_get_limits(const char *context, struct sliver_resources *slr) { FILE *fb; int cwd; + size_t len = strlen(VSERVERCONF) + strlen(context) + NULLBYTE_SIZE; + char *conf = (char *)malloc(len + strlen("rlimits/openfd.hard")); struct pl_resources *r; - struct pl_resources sliver_list[] = { {"sched/fill-rate2", TYPE_LONG, &slr->vs_cpu}, @@ -297,21 +301,15 @@ pl_get_limits(const char *context, struct sliver_resources *slr) {"rlimits/as.soft", TYPE_LONG, &slr->vs_as.soft}, {"rlimits/as.min", TYPE_LONG, &slr->vs_as.min}, - {"rlimits/nofile.hard", TYPE_LONG, &slr->vs_nofile.hard}, - {"rlimits/nofile.soft", TYPE_LONG, &slr->vs_nofile.soft}, - {"rlimits/nofile.min", TYPE_LONG, &slr->vs_nofile.min}, - - {"rlimits/memlock.hard", TYPE_LONG, &slr->vs_memlock.hard}, - {"rlimits/memlock.soft", TYPE_LONG, &slr->vs_memlock.soft}, - {"rlimits/memlock.min", TYPE_LONG, &slr->vs_memlock.min}, + {"rlimits/openfd.hard", TYPE_LONG, &slr->vs_openfd.hard}, + {"rlimits/openfd.soft", TYPE_LONG, &slr->vs_openfd.soft}, + {"rlimits/openfd.min", TYPE_LONG, &slr->vs_openfd.min}, {"personality", TYPE_PERS, &slr->personality}, {0,0} }; - size_t len = strlen(VSERVERCONF) + strlen(context) + NULLBYTE_SIZE; - char *conf = (char *)malloc(len); sprintf(conf, "%s%s", VSERVERCONF, context); slr->vs_rss.hard = VC_LIM_KEEP; @@ -326,13 +324,9 @@ pl_get_limits(const char *context, struct sliver_resources *slr) slr->vs_nproc.soft = VC_LIM_KEEP; slr->vs_nproc.min = VC_LIM_KEEP; - slr->vs_nofile.hard = VC_LIM_KEEP; - slr->vs_nofile.soft = VC_LIM_KEEP; - slr->vs_nofile.min = VC_LIM_KEEP; - - slr->vs_memlock.hard = VC_LIM_KEEP; - slr->vs_memlock.soft = VC_LIM_KEEP; - slr->vs_memlock.min = VC_LIM_KEEP; + slr->vs_openfd.hard = VC_LIM_KEEP; + slr->vs_openfd.soft = VC_LIM_KEEP; + slr->vs_openfd.min = VC_LIM_KEEP; slr->personality = 0; @@ -359,13 +353,8 @@ pl_get_limits(const char *context, struct sliver_resources *slr) buf[len-1]='\0'; len --; } - if (r->type == TYPE_LONG) { - int val; - char *res=0; - errno=0; - val = strtol(buf,&res,0); - if ( !( (val==0 && res) || (errno!=0) ) ) - *r->limit = val; + if ( (r->type == TYPE_LONG) && isdigit(*buf)) { + *r->limit = atoi(buf); } else if ( (r->type == TYPE_PERS) && isalpha(*buf)) { unsigned long int res; res = vc_str2personalitytype(buf,len); @@ -378,7 +367,7 @@ pl_get_limits(const char *context, struct sliver_resources *slr) fclose(fb); } - (void)fchdir(cwd); + fchdir(cwd); out_fd: close(cwd); out: @@ -453,7 +442,6 @@ pl_set_ulimits(const struct sliver_resources *slr) set_one_ulimit(RLIMIT_RSS, &slr->vs_rss); set_one_ulimit(RLIMIT_AS, &slr->vs_as); set_one_ulimit(RLIMIT_NPROC, &slr->vs_nproc); - set_one_ulimit(RLIMIT_NOFILE, &slr->vs_nofile); - set_one_ulimit(RLIMIT_MEMLOCK, &slr->vs_memlock); + set_one_ulimit(RLIMIT_NOFILE, &slr->vs_openfd); return set_personality(slr->personality); } diff --git a/src/planetlab.h b/src/planetlab.h index 3946640..7127c3a 100644 --- a/src/planetlab.h +++ b/src/planetlab.h @@ -45,15 +45,15 @@ struct sliver_resources { struct vc_rlimit vs_rss; struct vc_rlimit vs_as; struct vc_rlimit vs_nproc; - struct vc_rlimit vs_nofile; - struct vc_rlimit vs_memlock; + struct vc_rlimit vs_openfd; unsigned long int personality; }; int adjust_lim(const struct vc_rlimit *vcr, struct rlimit *lim); int -pl_chcontext(xid_t ctx, uint64_t bcaps, const struct sliver_resources *slr); +pl_chcontext(xid_t ctx, uint64_t bcaps, const struct sliver_resources *slr, + int unshare_netns); int pl_setup_done(xid_t ctx); @@ -67,6 +67,9 @@ pl_setsched(xid_t ctx, uint32_t cpu_min, uint32_t cpu_share); void pl_get_limits(const char *, struct sliver_resources *); int pl_set_ulimits(const struct sliver_resources *); +/* For network namespaces */ +int pl_unshare_netns(xid_t xid); + static inline int _PERROR(const char *format, char *file, int line, int _errno, ...) { diff --git a/src/vsh.c b/src/vsh.c index ffbe027..0bbdadf 100644 --- a/src/vsh.c +++ b/src/vsh.c @@ -1,343 +1,90 @@ -/* - * Marc E. Fiuczynski - * - * Copyright (c) 2004 The Trustees of Princeton University (Trustees). - * - * vsh is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * vsh is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public - * License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Poptop; see the file COPYING. If not, write to the Free - * Software Foundation, 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. - */ +/* Version 2 of vsh. Basically a wrapper around 'vserver enter.' */ -#ifdef HAVE_CONFIG_H -# include -#endif - -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include +#include +#include #include #include -#include -#include -#include -#include - -//-------------------------------------------------------------------- -#include -#include "planetlab.h" - -/* Change to root:root (before entering new context) */ -static int setuidgid_root() -{ - if (setgid(0) < 0) { - PERROR("setgid(0)"); - return -1; - } - if (setuid(0) < 0) { - PERROR("setuid(0)"); - return -1; - } - return 0; -} - -static void compute_new_root(char *base, char **root, const struct passwd *pwd) -{ - int root_len; - - root_len = - strlen(base) + strlen("/") + - strlen(pwd->pw_name) + NULLBYTE_SIZE; - (*root) = (char *)malloc(root_len); - if ((*root) == NULL) { - PERROR("malloc(%d)", root_len); - exit(1); - } - - sprintf((*root), "%s/%s", base, pwd->pw_name); - (*root)[root_len - 1] = '\0'; -} - -static int sandbox_chroot(const struct passwd *pwd) -{ - char *sandbox_root = NULL; - - compute_new_root(DEFAULT_VSERVERDIR,&sandbox_root, pwd); - if (chroot(sandbox_root) < 0) { - PERROR("chroot(%s)", sandbox_root); - exit(1); - } - if (chdir("/") < 0) { - PERROR("chdir(/)"); - exit(1); - } - return 0; -} - -static int sandbox_processes(xid_t ctx, const char *context, const struct passwd *pwd) -{ -#ifdef CONFIG_VSERVER_LEGACY - int flags; - - flags = 0; - flags |= 1; /* VX_INFO_LOCK -- cannot request a new vx_id */ - /* flags |= 4; VX_INFO_NPROC -- limit number of procs in a context */ - - (void) vc_new_s_context(ctx, 0, flags); - - /* use legacy dirty hack for capremove */ - if (vc_new_s_context(VC_SAMECTX, vc_get_insecurebcaps(), flags) == VC_NOCTX) { - PERROR("vc_new_s_context(%u, 0x%16llx, 0x%08x)", - VC_SAMECTX, vc_get_insecurebcaps(), flags); - exit(1); - } -#else - int ctx_is_new; - struct sliver_resources slr; - char hostname[HOST_NAME_MAX+1]; - pl_get_limits(context,&slr); - - if (gethostname(hostname, sizeof hostname) == -1) - { - PERROR("gethostname(...)"); - exit(1); - } - - /* check whether the slice has been suspended */ - if (slr.vs_cpu==0) - { - fprintf(stderr, "*** %s: %s has zero cpu resources and presumably it has been disabled/suspended ***\n", hostname, context); - exit(0); - } - - (void) (sandbox_chroot(pwd)); +#include +#include +#include - if ((ctx_is_new = pl_chcontext(ctx, ~vc_get_insecurebcaps(),&slr)) < 0) - { - PERROR("pl_chcontext(%u)", ctx); - exit(1); - } - if (ctx_is_new) - { - fprintf(stderr, " *** %s: %s has not been started yet, please check back later ***\n", hostname, context); - exit(1); - } +#define VSH_PATH "/usr/sbin/vsh" +#ifndef PATH_MAX +#define PATH_MAX 4096 #endif - return 0; -} +#define VSERVER_PATH "/usr/sbin/vserver" -void runas_slice_user(struct passwd *pwd) +char* get_current_username (unsigned int uid) { - char *username = pwd->pw_name; - char *home_env, *logname_env, *mail_env, *shell_env, *user_env; - int home_len, logname_len, mail_len, shell_len, user_len; - static char *envp[10]; - - if (setgid(pwd->pw_gid) < 0) { - PERROR("setgid(%d)", pwd->pw_gid); - exit(1); - } - - if (setuid(pwd->pw_uid) < 0) { - PERROR("setuid(%d)", pwd->pw_uid); - exit(1); - } - - if (chdir(pwd->pw_dir) < 0) { - PERROR("chdir(%s)", pwd->pw_dir); - exit(1); - } - - home_len = strlen("HOME=") + strlen(pwd->pw_dir) + NULLBYTE_SIZE; - logname_len = strlen("LOGNAME=") + strlen(username) + NULLBYTE_SIZE; - mail_len = strlen("MAIL=/var/spool/mail/") + strlen(username) - + NULLBYTE_SIZE; - shell_len = strlen("SHELL=") + strlen(pwd->pw_shell) + NULLBYTE_SIZE; - user_len = strlen("USER=") + strlen(username) + NULLBYTE_SIZE; - - home_env = (char *)malloc(home_len); - logname_env = (char *)malloc(logname_len); - mail_env = (char *)malloc(mail_len); - shell_env = (char *)malloc(shell_len); - user_env = (char *)malloc(user_len); - - if ((home_env == NULL) || - (logname_env == NULL) || - (mail_env == NULL) || - (shell_env == NULL) || - (user_env == NULL)) { - PERROR("malloc"); - exit(1); - } - - sprintf(home_env, "HOME=%s", pwd->pw_dir); - sprintf(logname_env, "LOGNAME=%s", username); - sprintf(mail_env, "MAIL=/var/spool/mail/%s", username); - sprintf(shell_env, "SHELL=%s", pwd->pw_shell); - sprintf(user_env, "USER=%s", username); - - home_env[home_len - 1] = '\0'; - logname_env[logname_len - 1] = '\0'; - mail_env[mail_len - 1] = '\0'; - shell_env[shell_len - 1] = '\0'; - user_env[user_len - 1] = '\0'; - - envp[0] = home_env; - envp[1] = logname_env; - envp[2] = mail_env; - envp[3] = shell_env; - envp[4] = user_env; - envp[5] = 0; + struct passwd *passwd_entry; + if ((passwd_entry = getpwuid(uid)) == NULL) { + fprintf(stderr, "Could not look up user record for %d\n", uid); + return NULL; + } - if ((putenv(home_env) < 0) || - (putenv(logname_env) < 0) || - (putenv(mail_env) < 0) || - (putenv(shell_env) < 0) || - (putenv(user_env) < 0)) { - PERROR("vserver: putenv error "); - exit(1); - } + return (strdup(passwd_entry->pw_name)); } -void slice_enter(struct passwd *pwd) -{ - if (setuidgid_root() < 0) { /* For chroot, new_s_context */ - fprintf(stderr, "vsh: Could not become root, check that SUID flag is set on binary\n"); - exit(2); - } +char **extend_argv(int argc, char **argv, int num_extra_args) { + int argc2, i; + char **argv2; -#ifdef CONFIG_VSERVER_LEGACY - (void) (sandbox_chroot(pwd)); -#endif + argc2 = argc + num_extra_args; + argv2 = (char **) malloc((argc2 + 1) * sizeof(char *)); - if (sandbox_processes((xid_t) pwd->pw_uid, pwd->pw_name, pwd) < 0) { - fprintf(stderr, "vsh: Could not change context to %d\n", pwd->pw_uid); - exit(2); - } -} + if (!argv2) + return (char **) NULL; -//-------------------------------------------------------------------- + for (i=0; ipw_name); - if (!context) { - PERROR("strdup"); - exit(2); + char *slice_name; + char **argv2; + int argc2; + char slice_id_str[256]; + unsigned int slice_xid; + char *envp[] = { NULL, NULL }; + char home_env_str[256]; + + slice_xid = getuid(); + slice_name = get_current_username(slice_xid); + if (!slice_name) { + fprintf(stderr,"Could not look up slice name\n"); + goto out_exception; } + + argv2 = extend_argv(argc, argv, NUM_VSERVER_SUEXEC_ARGS); + if (!argv2) goto out_exception; + + + // Populate arguments + snprintf(slice_id_str, 255, "%u", slice_xid); + argv2[0] = strdup(VSERVER_PATH); + argv2[1] = strdup(slice_name); + argv2[2] = strdup("suexec"); + argv2[3] = strdup(slice_id_str); + argv2[4] = strdup("/bin/bash"); + argv2[5] = strdup("-login"); - /* enter vserver "context" */ - slice_enter(prechroot); - - /* Get the /etc/passwd entry for this user, this time inside - * the chroot. - */ - username = context; - - pwdBuffer_len = sysconf(_SC_GETPW_R_SIZE_MAX); - if (pwdBuffer_len == -1) { - PERROR("sysconf(_SC_GETPW_R_SIZE_MAX"); - exit(1); - } - pwdBuffer = (char*)malloc(pwdBuffer_len); - if (pwdBuffer == NULL) { - PERROR("malloc(%d)", pwdBuffer_len); - exit(1); - } - - errno = 0; - if ((getpwnam_r(username,postchroot,pwdBuffer,pwdBuffer_len, &result) != 0) || - (errno != 0) || result != postchroot) { - PERROR("getpwnam_r(%s)", username); - exit(1); - } - - /* Now run as username in this context. Note that for PlanetLab's - vserver configuration the context name also happens to be the - "default" username within the vserver context. - */ - runas_slice_user(postchroot); - - /* Make sure pw->pw_shell is non-NULL.*/ - if (postchroot->pw_shell == NULL || postchroot->pw_shell[0] == '\0') { - postchroot->pw_shell = (char *) DEFAULT_SHELL; - } + snprintf(home_env_str, 255, "HOME=/home/%s", slice_name); + envp[0] = home_env_str; - shell = (char *)strdup(postchroot->pw_shell); - if (!shell) { - PERROR("strdup"); - exit(2); - } + if (setuid(geteuid())) goto out_exception; - /* Check whether 'su' or 'sshd' invoked us as a login shell or - not; did this above when testing argv[0]=='-'. - */ - argv[0] = shell; - if (index == 1) { - char **args; - args = (char**)malloc(sizeof(char*)*(argc+2)); - if (!args) { - PERROR("malloc(%d)", sizeof(char*)*(argc+2)); - exit(1); - } - args[0] = argv[0]; - args[1] = "-l"; - for(i=1;i