2.6.22 kernels shall use 0.3 branch and 2.6.27 needs 0.4 (trunk).
src_vip6_autod_SOURCES = src/vip6-autod.c
src_vip6_autod_LDADD = -lvserver -lnl
-src_vsh_SOURCES = src/vsh.c src/planetlab.c src/netns.c
-src_vsh_LDADD = -lvserver
+src_vsh_SOURCES = src/vsh.c
noinst_HEADERS = src/planetlab.h
scripts/vuseradd \
scripts/vuserdel
-sbin_PROGRAMS = src/vsh
+sbin_PROGRAMS = src/vsh
+
sysv_SCRIPTS =
if ENSC_HAVE_LIBNL
import time
import traceback
import subprocess
+import commands
import resource
import vserverimpl
"ANON": VLIMIT_ANON,
"SHMEM": VLIMIT_SHMEM}
+CPU_SHARE_MULT = 1024
+
# add in the platform supported rlimits
for entry in resource.__dict__.keys():
if entry.find("RLIMIT_")==0:
class NoSuchVServer(Exception): pass
-
class VServerConfig:
def __init__(self, name, directory):
self.name = name
os.path.walk(self.dir, add_to_cache, self.cache)
-def adjust_lim(goal, curr):
- gh = goal[0]
- gs = goal[1]
- gm = goal[2]
- soft = curr[0]
- hard = curr[1]
- if gm != VC_LIM_KEEP:
- if gm > soft or gm == resource.RLIM_INFINITY:
- soft = gm
- if gm > hard or gm == resource.RLIM_INFINITY:
- hard = gm
- if gs != VC_LIM_KEEP:
- if gs > soft or gs == resource.RLIM_INFINITY:
- soft = gs
- if gh != VC_LIM_KEEP:
- if gh > hard or gh == resource.RLIM_INFINITY:
- hard = gh
- return (soft, hard)
-
-
class VServer:
INITSCRIPTS = [('/etc/rc.vinit', 'start'),
os.access(self.dir, os.R_OK | os.W_OK | os.X_OK)):
raise NoSuchVServer, "no such vserver: " + name
self.config = VServerConfig(name, "/etc/vservers/%s" % name)
- self.remove_caps = ~vserverimpl.CAP_SAFE;
+ #self.remove_caps = ~vserverimpl.CAP_SAFE;
if vm_id == None:
vm_id = int(self.config.get('context'))
self.ctx = vm_id
resource_type = RLIMITS[type]
try:
vserverimpl.setrlimit(self.ctx, resource_type, hard, soft, min)
- if hasattr(resource, 'RLIMIT_' + type):
- lim = resource.getrlimit(resource_type)
- lim = adjust_lim((hard, soft, min), lim)
- resource.setrlimit(resource_type, lim)
except OSError, e:
self.log("Error: setrlimit(%d, %s, %d, %d, %d): %s"
% (self.ctx, type.lower(), hard, soft, min, e))
def set_capabilities_config(self, capabilities):
bcaps = self.get_bcaps_from_capabilities(capabilities)
ccaps = self.get_ccaps_from_capabilities(capabilities)
+ if len(bcaps) > 0:
+ bcaps += ","
+ bcaps += "CAP_NET_RAW"
self.config.update('bcapabilities', bcaps)
self.config.update('ccapabilities', ccaps)
ret = vserverimpl.setbcaps(self.ctx, vserverimpl.text2bcaps(bcaps))
vserverimpl.netadd(self.ctx, a)
def set_ipaddresses_config(self, addresses):
+ return # acb
i = 0
for a in addresses.split(","):
self.config.update("interfaces/%d/ip" % i, a)
def get_ipaddresses(self):
# No clean way to do this right now.
+ self.log("Calling Vserver.get_ipaddresses for slice %s" % self.name)
return None
def __do_chroot(self):
os.chdir("/")
def chroot_call(self, fn, *args):
-
cwd_fd = os.open(".", os.O_RDONLY)
try:
root_fd = os.open("/", os.O_RDONLY)
try:
vserverimpl.unsetdlimit(self.dir, self.ctx)
except OSError, e:
- self.log("Unexpected error with unsetdlimit for context %d: %r" % (self.ctx,e))
+ self.log("Unexpected error with unsetdlimit for context %d" % self.ctx)
return
if self.vm_running:
vserverimpl.DLIMIT_INF, # inode limit
2) # %age reserved for root
except OSError, e:
- self.log("Unexpected error with setdlimit for context %d: %r" % (self.ctx, e))
-
+ self.log("Unexpected error with setdlimit for context %d" % self.ctx)
self.config.update('dlimits/0/space_total', block_limit)
def is_running(self):
- return vserverimpl.isrunning(self.ctx)
+ status = subprocess.call(["/usr/sbin/vserver", self.name, "running"], shell=False)
+ return not status
def get_disklimit(self):
-
try:
(self.disk_blocks, block_limit, self.disk_inodes, inode_limit,
reserved) = vserverimpl.getdlimit(self.dir, self.ctx)
return block_limit
def set_sched_config(self, cpu_min, cpu_share):
-
""" Write current CPU scheduler parameters to the vserver
- configuration file. This method does not modify the kernel CPU
- scheduling parameters for this context. """
-
- self.config.update('sched/fill-rate', cpu_min)
- self.config.update('sched/fill-rate2', cpu_share)
- if cpu_share == 0:
- self.config.unset('sched/idle-time')
-
+ configuration file. Currently, 'cpu_min' is not supported. """
+ self.config.update('cgroup/cpu.shares', cpu_share * CPU_SHARE_MULT)
if self.is_running():
self.set_sched(cpu_min, cpu_share)
def set_sched(self, cpu_min, cpu_share):
- """ Update kernel CPU scheduling parameters for this context. """
- vserverimpl.setsched(self.ctx, cpu_min, cpu_share)
+ """ Update kernel CPU scheduling parameters for this context.
+ Currently, 'cpu_min' is not supported. """
+ try:
+ cgroup = open('/dev/cgroup/%s/cpu.shares' % name, 'w')
+ cgroup.write('%s' % (cpu_share * CPU_SHARE_MULT))
+ cgroup.close()
+ except:
+ pass
def get_sched(self):
- # have no way of querying scheduler right now on a per vserver basis
- return (-1, False)
+ try:
+ cpu_share = int(int(self.config.get('cgroup/cpu.shares')) / CPU_SHARE_MULT)
+ except:
+ cpu_share = False
+ return (-1, cpu_share)
def set_bwlimit(self, minrate = bwlimit.bwmin, maxrate = None,
exempt_min = None, exempt_max = None,
return self.chroot_call(open, filename, mode, bufsize)
- def __do_chcontext(self, state_file):
-
- if state_file:
- print >>state_file, "%u" % self.ctx
- state_file.close()
-
- if vserverimpl.chcontext(self.ctx, vserverimpl.text2bcaps(self.get_capabilities_config())):
- self.set_resources(True)
- vserverimpl.setup_done(self.ctx)
-
-
- def __prep(self, runlevel):
-
- """ Perform all the crap that the vserver script does before
- actually executing the startup scripts. """
-
-
- # set the initial runlevel
- vserverimpl.setrunlevel(self.dir + "/var/run/utmp", runlevel)
-
- # mount /proc and /dev/pts
- self.__do_mount("none", self.dir, "/proc", "proc")
- # XXX - magic mount options
- self.__do_mount("none", self.dir, "/dev/pts", "devpts", 0, "gid=5,mode=0620")
-
-
- def __cleanvar(self):
- """
- Clean the /var/ directory so RH startup scripts can run
- """
-
- RUNDIR = "/var/run"
- LOCKDIR = "/var/lock/subsys"
-
- filter = ["utmp"]
- garbage = []
- for topdir in [RUNDIR, LOCKDIR]:
- #os.walk() = (dirpath, dirnames, filenames)
- for root, dirs, files in os.walk(topdir):
- for file in files:
- if not file in filter:
- garbage.append(root + "/" + file)
-
- for f in garbage: os.unlink(f)
- return garbage
-
-
- def __do_mount(self, *mount_args):
- try:
- vserverimpl.mount(*mount_args)
- except OSError, ex:
- if ex.errno == errno.EBUSY:
- # assume already mounted
- return
- raise ex
-
-
def enter(self):
- self.config.cache_it()
- self.__do_chroot()
- self.__do_chcontext(None)
-
+ subprocess.call("/usr/sbin/vserver %s enter" % self.name, shell=True)
def start(self, runlevel = 3):
-
if (os.fork() != 0):
# Parent should just return.
self.vm_running = True
else:
# child process
try:
- # so we don't chcontext with priv'ed fds
- close_nonstandard_fds()
-
- # get a new session
- os.setsid()
-
- # open state file to record vserver info
- state_file = open("/var/run/vservers/%s" % self.name, "w")
-
- # use /dev/null for stdin, /var/log/nm for stdout/err
- fd = os.open("/dev/null", os.O_RDONLY)
- if fd != 0:
- os.dup2(fd, 0)
- os.close(fd)
-
- # perform pre-init cleanup
- self.__prep(runlevel)
-
- self.config.cache_it()
- self.__do_chroot()
- if not self.is_running():
- removed = self.__cleanvar()
- else:
- removed = 0
-
- log = open("/var/log/nm", "a", 0)
- if log.fileno() != 1:
- os.dup2(log.fileno(), 1)
- os.dup2(1, 2)
-
- print >>log, ("%s: removing %s" %
- (time.asctime(time.gmtime()), removed))
- print >>log, ("%s: starting the virtual server %s" %
- (time.asctime(time.gmtime()), self.name))
- # execute each init script in turn
- # XXX - we don't support all scripts that vserver script does
- self.__do_chcontext(state_file)
- for cmd in self.INITSCRIPTS:
- try:
- # enter vserver context
- arg_subst = { 'runlevel': runlevel }
- cmd_args = [cmd[0]] + map(lambda x: x % arg_subst,
- cmd[1:])
- if os.path.isfile(cmd[0]):
- print >>log, "executing '%s'" % " ".join(cmd_args)
- os.spawnvp(os.P_NOWAIT,cmd[0],cmd_args)
- else:
- print >>log, "WARNING: could not run %s"%cmd[0]
- except:
- print >>log, traceback.format_exc()
-
+ subprocess.call("/usr/sbin/vserver %s start" % self.name,
+ shell=True)
# we get here due to an exception in the top-level child process
except Exception, ex:
self.log(traceback.format_exc())
os._exit(0)
- def set_resources(self,setup=False):
+ def set_resources(self):
""" Called when vserver context is entered for first time,
should be overridden by subclass. """
(space, inodes) = line.split()
self.disk_inodes = int(inodes)
self.disk_blocks = int(space)
- #(self.disk_inodes, self.disk_blocks) = vduimpl.vdu(self.dir)
return self.disk_blocks * 1024
def stop(self, signal = signal.SIGKILL):
- vserverimpl.killall(self.ctx, signal)
self.vm_running = False
+ subprocess.call("/usr/sbin/vserver %s stop" % self.name, shell=True)
def setname(self, slice_id):
- '''Set vcVHI_CONTEXT field in kernel to slice_id'''
- vserverimpl.setname(self.ctx, slice_id)
+ pass
def getname(self):
'''Get vcVHI_CONTEXT field in kernel'''
#define PL_INSECURE_BCAPS (vc_get_insecurebcaps() | (1 << VC_CAP_NET_BIND_SERVICE))
#define PL_INSECURE_CCAPS vc_get_insecureccaps()
-/*
- * context create
- */
-static PyObject *
-vserver_chcontext(PyObject *self, PyObject *args)
-{
- int ctx_is_new;
- xid_t ctx;
- uint_least64_t bcaps = 0;
-
- if (!PyArg_ParseTuple(args, "I|K", &ctx, &bcaps))
- return NULL;
- bcaps |= ~PL_INSECURE_BCAPS;
-
- if ((ctx_is_new = pl_chcontext(ctx, bcaps, 0)) < 0)
- return PyErr_SetFromErrno(PyExc_OSError);
-
- return PyBool_FromLong(ctx_is_new);
-}
-
-static PyObject *
-vserver_setup_done(PyObject *self, PyObject *args)
-{
- xid_t ctx;
-
- if (!PyArg_ParseTuple(args, "I", &ctx))
- return NULL;
-
- if (pl_setup_done(ctx) < 0)
- return PyErr_SetFromErrno(PyExc_OSError);
-
- return NONE;
-}
-
-static PyObject *
-vserver_isrunning(PyObject *self, PyObject *args)
-{
- xid_t ctx;
- PyObject *ret;
- struct stat statbuf;
- char fname[64];
-
- if (!PyArg_ParseTuple(args, "I", &ctx))
- return NULL;
-
- sprintf(fname,"/proc/virtual/%d", ctx);
-
- if(stat(&fname[0],&statbuf)==0)
- ret = PyBool_FromLong(1);
- else
- ret = PyBool_FromLong(0);
-
- return ret;
-}
static PyObject *
__vserver_get_rlimit(xid_t xid, int resource) {
uint32_t bitmask;
xid_t xid;
int resource;
- PyObject *ret;
+ PyObject *ret = NULL;
limits.min = VC_LIM_KEEP;
limits.soft = VC_LIM_KEEP;
return ret;
}
-/*
- * setsched
- */
-static PyObject *
-vserver_setsched(PyObject *self, PyObject *args)
-{
- xid_t ctx;
- uint32_t cpu_min;
- uint32_t cpu_share;
-
- if (!PyArg_ParseTuple(args, "II|I", &ctx, &cpu_min, &cpu_share))
- return NULL;
-
- /* ESRCH indicates that there are no processes in the context */
- if (pl_setsched(ctx, cpu_min, cpu_share) &&
- errno != ESRCH)
- return PyErr_SetFromErrno(PyExc_OSError);
-
- return NONE;
-}
static PyObject *
vserver_get_dlimit(PyObject *self, PyObject *args)
return NONE;
}
-static PyObject *
-vserver_killall(PyObject *self, PyObject *args)
-{
- xid_t ctx;
- int sig;
- struct vc_ctx_flags cflags = {
- .flagword = 0,
- .mask = VC_VXF_PERSISTENT
- };
- struct vc_net_flags nflags = {
- .flagword = 0,
- .mask = VC_NXF_PERSISTENT
- };
-
- if (!PyArg_ParseTuple(args, "Ii", &ctx, &sig))
- return NULL;
-
- if (vc_ctx_kill(ctx, 0, sig) && errno != ESRCH)
- return PyErr_SetFromErrno(PyExc_OSError);
-
- if (vc_set_cflags(ctx, &cflags) && errno != ESRCH)
- return PyErr_SetFromErrno(PyExc_OSError);
-
- if (vc_set_nflags(ctx, &nflags) && errno != ESRCH)
- return PyErr_SetFromErrno(PyExc_OSError);
-
- return NONE;
-}
static PyObject *
vserver_set_bcaps(PyObject *self, PyObject *args)
return ret;
}
+
static PyMethodDef methods[] = {
- { "chcontext", vserver_chcontext, METH_VARARGS,
- "chcontext to vserver with provided flags" },
- { "setup_done", vserver_setup_done, METH_VARARGS,
- "Release vserver setup lock" },
- { "setsched", vserver_setsched, METH_VARARGS,
- "Change vserver scheduling attributes for given vserver context" },
{ "setdlimit", vserver_set_dlimit, METH_VARARGS,
"Set disk limits for given vserver context" },
{ "unsetdlimit", vserver_unset_dlimit, METH_VARARGS,
"Set resource limits for given resource of a vserver context" },
{ "getrlimit", vserver_get_rlimit, METH_VARARGS,
"Get resource limits for given resource of a vserver context" },
- { "killall", vserver_killall, METH_VARARGS,
- "Send signal to all processes in vserver context" },
- { "isrunning", vserver_isrunning, METH_VARARGS,
- "Check if vserver is running"},
{ "setbcaps", vserver_set_bcaps, METH_VARARGS,
"Set POSIX capabilities of a vserver context" },
{ "getbcaps", vserver_get_bcaps, METH_VARARGS,
--- /dev/null
+#!/bin/sh
+
+SLICE=`whoami`
+SLICEID=`id -u`
+
+/usr/sbin/vserver_suid_wrapper $SLICE suexec $SLICEID /bin/bash "$@"
fi
$_VSERVER $NAME build -m skeleton --context $USERID \
- --interface nodev:0.0.0.0/0 \
- --flags persistent,~info_init,sched_hard
+ --interface nodev:`hostname -i` \
+ --interface nodev:127.0.0.1 \
+ --flags persistent,~info_init
RETVAL=$?
DIR=$__CONFDIR/$NAME
if [ $RETVAL -ne 0 ] ; then
echo "Error $RETVAL building $DIR"
rm -rf $DIR $__DEFAULT_VSERVERDIR/$NAME
fi
- mkdir -p $DIR/apps/init $DIR/rlimits $DIR/sched $DIR/dlimits/0
+ mkdir -p $DIR/apps/init $DIR/rlimits $DIR/sched $DIR/dlimits/0 $DIR/sysctl/0
echo default > $DIR/apps/init/mark
echo 1000 > $DIR/rlimits/nproc.hard
# Set persistent for the network context
- echo persistent > $DIR/nflags
+ echo persistent,lback_allow > $DIR/nflags
# Set up the scheduler
echo 100 > $DIR/sched/interval
echo -1 > $DIR/dlimits/0/inodes_total
echo -1 > $DIR/dlimits/0/space_total
+ # Set up sysctl variables
+ echo net.ipv4.ip_forward > $DIR/sysctl/0/setting
+ echo 1 > $DIR/sysctl/0/value
+
# Disable mount namespaces
touch $DIR/nonamespace
+ # Add spaces directory
+ mkdir -p $DIR/spaces
+
# Remove the basically empty guest directory
rm -rf $__DEFAULT_VSERVERDIR/$NAME
# Move the guest back
#define SPACE_FILE "/spaces/net"
#define VSERVERCONF "/etc/vservers/"
-uint32_t
-get_space_flag(xid_t xid) {
+int
+pl_unshare_netns(xid_t xid) {
char *ctx_space_file, *space_name;
struct passwd *slice_user;
- uint32_t space_flag = 0;
+ int res = 0;
+ char buf[100];
+ FILE *fb;
slice_user = getpwuid(xid);
if (!slice_user)
- return 0;
+ return 0;
ctx_space_file=(char *) malloc(sizeof(VSERVERCONF SPACE_FILE "Z")+strlen(slice_user->pw_name));
if (!ctx_space_file)
- return 0;
+ return 0;
sprintf(ctx_space_file,VSERVERCONF "%s" SPACE_FILE, slice_user->pw_name);
- if (access(ctx_space_file, F_OK)==0)
- space_flag |= CLONE_NEWNET;
+ if ((fb = fopen(ctx_space_file, "r")) == NULL)
+ return 0;
+ if (fgets(buf, sizeof(buf), fb) != NULL) {
+ res = atoi(buf);
+ }
+
+ fclose(fb);
free(ctx_space_file);
- return space_flag;
+ return res;
}
#include <unistd.h>
#include <ctype.h>
#include <sys/resource.h>
-#include <sys/types.h>
#include <fcntl.h>
#define _GNU_SOURCE
#include <sched.h>
#include "vserver.h"
#include "planetlab.h"
-/* defined in netns.c */
-extern uint32_t get_space_flag(xid_t);
-
#ifndef VC_NXC_RAW_SOCKET
# define VC_NXC_RAW_SOCKET 0x00000200ull
#endif
if (vc_tag_create(ctx) == VC_NOCTX)
return -1;
+process:
+
/*
* Create context info - this sets the STATE_SETUP and STATE_INIT flags.
*/
if (vc_ctx_create(ctx, 0) == VC_NOCTX)
return -1;
- if (unshare_flags != 0) {
+ if (unshare_flags != 0) {
unshare(unshare_flags);
unshare_flags |= vc_get_space_mask();
- vc_set_namespace(ctx, unshare_flags);
+ //printf("vc_set_namespace(%d, %X)\n", ctx, unshare_flags);
+ //vc_set_namespace(ctx, unshare_flags);
}
/* Set capabilities - these don't take effect until SETUP flag is unset */
#define RETRY_LIMIT 10
int
-pl_chcontext(xid_t ctx, uint64_t bcaps, const struct sliver_resources *slr)
+pl_chcontext(xid_t ctx, uint64_t bcaps, const struct sliver_resources *slr,
+ int unshare_netns)
{
int retry_count = 0;
int net_migrated = 0;
-
+
if (pl_set_ulimits(slr) != 0)
return -1;
if (vc_get_cflags(ctx, &vc_flags))
{
- uint32_t unshare_flags;
+ uint32_t unshare_flags;
if (errno != ESRCH)
return -1;
- /* Unshare the net namespace if the slice if requested in the local slice configuration */
- unshare_flags = get_space_flag(ctx);
+ /* Always unshare the net namespace for a new context */
+ unshare_flags = CLONE_NEWNET;
/* context doesn't exist - create it */
if (create_context(ctx, bcaps, unshare_flags))
migrate:
if (net_migrated || !vc_net_migrate(ctx))
{
- uint32_t unshare_flags;
- /* Unshare the net namespace if the slice if requested in the local slice configuration */
- unshare_flags = get_space_flag(ctx);
- if (unshare_flags != 0) {
- unshare_flags |=vc_get_space_mask();
- vc_enter_namespace(ctx, unshare_flags);
- }
+ uint32_t unshare_flags;
+
+ /* Unshare the net namespace if requested in the slice config */
+ unshare_flags = unshare_netns ? CLONE_NEWNET : 0;
+
+ if (unshare_flags != 0) {
+ unshare_flags |=vc_get_space_mask();
+ //printf("vc_enter_namespace(%d, %X)\n", ctx, unshare_flags);
+ //vc_enter_namespace(ctx, unshare_flags);
+ }
if (!vc_tag_migrate(ctx) && !vc_ctx_migrate(ctx, 0))
break; /* done */
};
struct pl_resources {
- char *name;
- unsigned type;
+ char *name;
+ unsigned type;
union {
unsigned long long *limit;
unsigned long int *personality;
{
FILE *fb;
int cwd;
+ size_t len = strlen(VSERVERCONF) + strlen(context) + NULLBYTE_SIZE;
+ char *conf = (char *)malloc(len + strlen("rlimits/openfd.hard"));
struct pl_resources *r;
-
struct pl_resources sliver_list[] = {
{"sched/fill-rate2", TYPE_LONG, &slr->vs_cpu},
{"rlimits/as.soft", TYPE_LONG, &slr->vs_as.soft},
{"rlimits/as.min", TYPE_LONG, &slr->vs_as.min},
- {"rlimits/nofile.hard", TYPE_LONG, &slr->vs_nofile.hard},
- {"rlimits/nofile.soft", TYPE_LONG, &slr->vs_nofile.soft},
- {"rlimits/nofile.min", TYPE_LONG, &slr->vs_nofile.min},
-
- {"rlimits/memlock.hard", TYPE_LONG, &slr->vs_memlock.hard},
- {"rlimits/memlock.soft", TYPE_LONG, &slr->vs_memlock.soft},
- {"rlimits/memlock.min", TYPE_LONG, &slr->vs_memlock.min},
+ {"rlimits/openfd.hard", TYPE_LONG, &slr->vs_openfd.hard},
+ {"rlimits/openfd.soft", TYPE_LONG, &slr->vs_openfd.soft},
+ {"rlimits/openfd.min", TYPE_LONG, &slr->vs_openfd.min},
{"personality", TYPE_PERS, &slr->personality},
{0,0}
};
- size_t len = strlen(VSERVERCONF) + strlen(context) + NULLBYTE_SIZE;
- char *conf = (char *)malloc(len);
sprintf(conf, "%s%s", VSERVERCONF, context);
slr->vs_rss.hard = VC_LIM_KEEP;
slr->vs_nproc.soft = VC_LIM_KEEP;
slr->vs_nproc.min = VC_LIM_KEEP;
- slr->vs_nofile.hard = VC_LIM_KEEP;
- slr->vs_nofile.soft = VC_LIM_KEEP;
- slr->vs_nofile.min = VC_LIM_KEEP;
-
- slr->vs_memlock.hard = VC_LIM_KEEP;
- slr->vs_memlock.soft = VC_LIM_KEEP;
- slr->vs_memlock.min = VC_LIM_KEEP;
+ slr->vs_openfd.hard = VC_LIM_KEEP;
+ slr->vs_openfd.soft = VC_LIM_KEEP;
+ slr->vs_openfd.min = VC_LIM_KEEP;
slr->personality = 0;
buf[len-1]='\0';
len --;
}
- if (r->type == TYPE_LONG) {
- int val;
- char *res=0;
- errno=0;
- val = strtol(buf,&res,0);
- if ( !( (val==0 && res) || (errno!=0) ) )
- *r->limit = val;
+ if ( (r->type == TYPE_LONG) && isdigit(*buf)) {
+ *r->limit = atoi(buf);
} else if ( (r->type == TYPE_PERS) && isalpha(*buf)) {
unsigned long int res;
res = vc_str2personalitytype(buf,len);
fclose(fb);
}
- (void)fchdir(cwd);
+ fchdir(cwd);
out_fd:
close(cwd);
out:
set_one_ulimit(RLIMIT_RSS, &slr->vs_rss);
set_one_ulimit(RLIMIT_AS, &slr->vs_as);
set_one_ulimit(RLIMIT_NPROC, &slr->vs_nproc);
- set_one_ulimit(RLIMIT_NOFILE, &slr->vs_nofile);
- set_one_ulimit(RLIMIT_MEMLOCK, &slr->vs_memlock);
+ set_one_ulimit(RLIMIT_NOFILE, &slr->vs_openfd);
return set_personality(slr->personality);
}
struct vc_rlimit vs_rss;
struct vc_rlimit vs_as;
struct vc_rlimit vs_nproc;
- struct vc_rlimit vs_nofile;
- struct vc_rlimit vs_memlock;
+ struct vc_rlimit vs_openfd;
unsigned long int personality;
};
int adjust_lim(const struct vc_rlimit *vcr, struct rlimit *lim);
int
-pl_chcontext(xid_t ctx, uint64_t bcaps, const struct sliver_resources *slr);
+pl_chcontext(xid_t ctx, uint64_t bcaps, const struct sliver_resources *slr,
+ int unshare_netns);
int
pl_setup_done(xid_t ctx);
void pl_get_limits(const char *, struct sliver_resources *);
int pl_set_ulimits(const struct sliver_resources *);
+/* For network namespaces */
+int pl_unshare_netns(xid_t xid);
+
static inline int
_PERROR(const char *format, char *file, int line, int _errno, ...)
{
-/*
- * Marc E. Fiuczynski <mef@cs.princeton.edu>
- *
- * Copyright (c) 2004 The Trustees of Princeton University (Trustees).
- *
- * vsh is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * vsh is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
- * License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Poptop; see the file COPYING. If not, write to the Free
- * Software Foundation, 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
- */
+/* Version 2 of vsh. Basically a wrapper around 'vserver <slice name> enter.' */
-#ifdef HAVE_CONFIG_H
-# include <config.h>
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <limits.h>
-#include <pwd.h>
#include <unistd.h>
-#include <syscall.h>
-#include <sys/syscall.h>
-#include <asm/unistd.h>
-#include <sys/mount.h>
+#include <pwd.h>
+#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
-#include <sys/resource.h>
-#include <fcntl.h>
-#include <ctype.h>
-#include <stdarg.h>
-
-//--------------------------------------------------------------------
-#include <vserver.h>
-#include "planetlab.h"
-
-/* Change to root:root (before entering new context) */
-static int setuidgid_root()
-{
- if (setgid(0) < 0) {
- PERROR("setgid(0)");
- return -1;
- }
- if (setuid(0) < 0) {
- PERROR("setuid(0)");
- return -1;
- }
- return 0;
-}
-
-static void compute_new_root(char *base, char **root, const struct passwd *pwd)
-{
- int root_len;
-
- root_len =
- strlen(base) + strlen("/") +
- strlen(pwd->pw_name) + NULLBYTE_SIZE;
- (*root) = (char *)malloc(root_len);
- if ((*root) == NULL) {
- PERROR("malloc(%d)", root_len);
- exit(1);
- }
-
- sprintf((*root), "%s/%s", base, pwd->pw_name);
- (*root)[root_len - 1] = '\0';
-}
-
-static int sandbox_chroot(const struct passwd *pwd)
-{
- char *sandbox_root = NULL;
-
- compute_new_root(DEFAULT_VSERVERDIR,&sandbox_root, pwd);
- if (chroot(sandbox_root) < 0) {
- PERROR("chroot(%s)", sandbox_root);
- exit(1);
- }
- if (chdir("/") < 0) {
- PERROR("chdir(/)");
- exit(1);
- }
- return 0;
-}
-
-static int sandbox_processes(xid_t ctx, const char *context, const struct passwd *pwd)
-{
-#ifdef CONFIG_VSERVER_LEGACY
- int flags;
-
- flags = 0;
- flags |= 1; /* VX_INFO_LOCK -- cannot request a new vx_id */
- /* flags |= 4; VX_INFO_NPROC -- limit number of procs in a context */
-
- (void) vc_new_s_context(ctx, 0, flags);
-
- /* use legacy dirty hack for capremove */
- if (vc_new_s_context(VC_SAMECTX, vc_get_insecurebcaps(), flags) == VC_NOCTX) {
- PERROR("vc_new_s_context(%u, 0x%16llx, 0x%08x)",
- VC_SAMECTX, vc_get_insecurebcaps(), flags);
- exit(1);
- }
-#else
- int ctx_is_new;
- struct sliver_resources slr;
- char hostname[HOST_NAME_MAX+1];
- pl_get_limits(context,&slr);
-
- if (gethostname(hostname, sizeof hostname) == -1)
- {
- PERROR("gethostname(...)");
- exit(1);
- }
-
- /* check whether the slice has been suspended */
- if (slr.vs_cpu==0)
- {
- fprintf(stderr, "*** %s: %s has zero cpu resources and presumably it has been disabled/suspended ***\n", hostname, context);
- exit(0);
- }
-
- (void) (sandbox_chroot(pwd));
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
- if ((ctx_is_new = pl_chcontext(ctx, ~vc_get_insecurebcaps(),&slr)) < 0)
- {
- PERROR("pl_chcontext(%u)", ctx);
- exit(1);
- }
- if (ctx_is_new)
- {
- fprintf(stderr, " *** %s: %s has not been started yet, please check back later ***\n", hostname, context);
- exit(1);
- }
+#define VSH_PATH "/usr/sbin/vsh"
+#ifndef PATH_MAX
+#define PATH_MAX 4096
#endif
- return 0;
-}
+#define VSERVER_PATH "/usr/sbin/vserver"
-void runas_slice_user(struct passwd *pwd)
+char* get_current_username (unsigned int uid)
{
- char *username = pwd->pw_name;
- char *home_env, *logname_env, *mail_env, *shell_env, *user_env;
- int home_len, logname_len, mail_len, shell_len, user_len;
- static char *envp[10];
-
- if (setgid(pwd->pw_gid) < 0) {
- PERROR("setgid(%d)", pwd->pw_gid);
- exit(1);
- }
-
- if (setuid(pwd->pw_uid) < 0) {
- PERROR("setuid(%d)", pwd->pw_uid);
- exit(1);
- }
-
- if (chdir(pwd->pw_dir) < 0) {
- PERROR("chdir(%s)", pwd->pw_dir);
- exit(1);
- }
-
- home_len = strlen("HOME=") + strlen(pwd->pw_dir) + NULLBYTE_SIZE;
- logname_len = strlen("LOGNAME=") + strlen(username) + NULLBYTE_SIZE;
- mail_len = strlen("MAIL=/var/spool/mail/") + strlen(username)
- + NULLBYTE_SIZE;
- shell_len = strlen("SHELL=") + strlen(pwd->pw_shell) + NULLBYTE_SIZE;
- user_len = strlen("USER=") + strlen(username) + NULLBYTE_SIZE;
-
- home_env = (char *)malloc(home_len);
- logname_env = (char *)malloc(logname_len);
- mail_env = (char *)malloc(mail_len);
- shell_env = (char *)malloc(shell_len);
- user_env = (char *)malloc(user_len);
-
- if ((home_env == NULL) ||
- (logname_env == NULL) ||
- (mail_env == NULL) ||
- (shell_env == NULL) ||
- (user_env == NULL)) {
- PERROR("malloc");
- exit(1);
- }
-
- sprintf(home_env, "HOME=%s", pwd->pw_dir);
- sprintf(logname_env, "LOGNAME=%s", username);
- sprintf(mail_env, "MAIL=/var/spool/mail/%s", username);
- sprintf(shell_env, "SHELL=%s", pwd->pw_shell);
- sprintf(user_env, "USER=%s", username);
-
- home_env[home_len - 1] = '\0';
- logname_env[logname_len - 1] = '\0';
- mail_env[mail_len - 1] = '\0';
- shell_env[shell_len - 1] = '\0';
- user_env[user_len - 1] = '\0';
-
- envp[0] = home_env;
- envp[1] = logname_env;
- envp[2] = mail_env;
- envp[3] = shell_env;
- envp[4] = user_env;
- envp[5] = 0;
+ struct passwd *passwd_entry;
+ if ((passwd_entry = getpwuid(uid)) == NULL) {
+ fprintf(stderr, "Could not look up user record for %d\n", uid);
+ return NULL;
+ }
- if ((putenv(home_env) < 0) ||
- (putenv(logname_env) < 0) ||
- (putenv(mail_env) < 0) ||
- (putenv(shell_env) < 0) ||
- (putenv(user_env) < 0)) {
- PERROR("vserver: putenv error ");
- exit(1);
- }
+ return (strdup(passwd_entry->pw_name));
}
-void slice_enter(struct passwd *pwd)
-{
- if (setuidgid_root() < 0) { /* For chroot, new_s_context */
- fprintf(stderr, "vsh: Could not become root, check that SUID flag is set on binary\n");
- exit(2);
- }
+char **extend_argv(int argc, char **argv, int num_extra_args) {
+ int argc2, i;
+ char **argv2;
-#ifdef CONFIG_VSERVER_LEGACY
- (void) (sandbox_chroot(pwd));
-#endif
+ argc2 = argc + num_extra_args;
+ argv2 = (char **) malloc((argc2 + 1) * sizeof(char *));
- if (sandbox_processes((xid_t) pwd->pw_uid, pwd->pw_name, pwd) < 0) {
- fprintf(stderr, "vsh: Could not change context to %d\n", pwd->pw_uid);
- exit(2);
- }
-}
+ if (!argv2)
+ return (char **) NULL;
-//--------------------------------------------------------------------
+ for (i=0; i<argc; i++) {
+ argv2[i+num_extra_args]=strdup(argv[i]);
+ }
+ argv2[argc2]=NULL;
-#define DEFAULT_SHELL "/bin/sh"
+ return argv2;
+}
-/* Exit statuses for programs like 'env' that exec other programs.
- EXIT_FAILURE might not be 1, so use EXIT_FAIL in such programs. */
-enum
-{
- EXIT_CANNOT_INVOKE = 126,
- EXIT_ENOENT = 127
-};
+#define NUM_VSERVER_SUEXEC_ARGS 5
int main(int argc, char **argv)
{
- struct passwd pwdd, *result, *prechroot, *postchroot = &pwdd;
- char *context, *username, *shell, *pwdBuffer;
- long pwdBuffer_len;
- uid_t uid;
- int index, i;
-
- if (argv[0][0]=='-')
- index = 1;
- else
- index = 0;
-
- uid = getuid();
- if ((prechroot = getpwuid(uid)) == NULL) {
- PERROR("getpwuid(%d)", uid);
- exit(1);
- }
-
- context = (char*)strdup(prechroot->pw_name);
- if (!context) {
- PERROR("strdup");
- exit(2);
+ char *slice_name;
+ char **argv2;
+ int argc2;
+ char slice_id_str[256];
+ unsigned int slice_xid;
+ char *envp[] = { NULL, NULL };
+ char home_env_str[256];
+
+ slice_xid = getuid();
+ slice_name = get_current_username(slice_xid);
+ if (!slice_name) {
+ fprintf(stderr,"Could not look up slice name\n");
+ goto out_exception;
}
+
+ argv2 = extend_argv(argc, argv, NUM_VSERVER_SUEXEC_ARGS);
+ if (!argv2) goto out_exception;
+
+
+ // Populate arguments
+ snprintf(slice_id_str, 255, "%u", slice_xid);
+ argv2[0] = strdup(VSERVER_PATH);
+ argv2[1] = strdup(slice_name);
+ argv2[2] = strdup("suexec");
+ argv2[3] = strdup(slice_id_str);
+ argv2[4] = strdup("/bin/bash");
+ argv2[5] = strdup("-login");
- /* enter vserver "context" */
- slice_enter(prechroot);
-
- /* Get the /etc/passwd entry for this user, this time inside
- * the chroot.
- */
- username = context;
-
- pwdBuffer_len = sysconf(_SC_GETPW_R_SIZE_MAX);
- if (pwdBuffer_len == -1) {
- PERROR("sysconf(_SC_GETPW_R_SIZE_MAX");
- exit(1);
- }
- pwdBuffer = (char*)malloc(pwdBuffer_len);
- if (pwdBuffer == NULL) {
- PERROR("malloc(%d)", pwdBuffer_len);
- exit(1);
- }
-
- errno = 0;
- if ((getpwnam_r(username,postchroot,pwdBuffer,pwdBuffer_len, &result) != 0) ||
- (errno != 0) || result != postchroot) {
- PERROR("getpwnam_r(%s)", username);
- exit(1);
- }
-
- /* Now run as username in this context. Note that for PlanetLab's
- vserver configuration the context name also happens to be the
- "default" username within the vserver context.
- */
- runas_slice_user(postchroot);
-
- /* Make sure pw->pw_shell is non-NULL.*/
- if (postchroot->pw_shell == NULL || postchroot->pw_shell[0] == '\0') {
- postchroot->pw_shell = (char *) DEFAULT_SHELL;
- }
+ snprintf(home_env_str, 255, "HOME=/home/%s", slice_name);
+ envp[0] = home_env_str;
- shell = (char *)strdup(postchroot->pw_shell);
- if (!shell) {
- PERROR("strdup");
- exit(2);
- }
+ if (setuid(geteuid())) goto out_exception;
- /* Check whether 'su' or 'sshd' invoked us as a login shell or
- not; did this above when testing argv[0]=='-'.
- */
- argv[0] = shell;
- if (index == 1) {
- char **args;
- args = (char**)malloc(sizeof(char*)*(argc+2));
- if (!args) {
- PERROR("malloc(%d)", sizeof(char*)*(argc+2));
- exit(1);
- }
- args[0] = argv[0];
- args[1] = "-l";
- for(i=1;i<argc+1;i++) {
- args[i+1] = argv[i];
- }
- argv = args;
- }
- (void) execvp(shell,argv);
- {
- int exit_status = (errno == ENOENT ? EXIT_ENOENT : EXIT_CANNOT_INVOKE);
- exit (exit_status);
- }
+ execve(VSERVER_PATH, argv2, envp);
- return 0; /* shutup compiler */
+out_exception:
+ printf("%s\n", strerror(errno));
+ return errno;
}
%define name util-vserver-pl
%define version 0.4
-%define taglevel 4
+%define taglevel 5
%define release %{taglevel}%{?pldistro:.%{pldistro}}%{?date:.%{date}}