#include <unistd.h>
#include <ctype.h>
#include <sys/resource.h>
+#include <sys/types.h>
#include <fcntl.h>
+#define _GNU_SOURCE
+#include <sched.h>
#include "vserver.h"
#include "planetlab.h"
+/* defined in netns.c */
+extern uint32_t get_space_flag(xid_t);
+
+#ifndef VC_NXC_RAW_SOCKET
+# define VC_NXC_RAW_SOCKET 0x00000200ull
+#endif
+#ifndef VC_NXC_RAW_SEND
+# define VC_NXC_RAW_SEND 0x00000400ull
+#endif
+#ifndef VC_NXF_LBACK_ALLOW
+# define VC_NXF_LBACK_ALLOW 0x00000400ull
+#endif
+
static int
-create_context(xid_t ctx, uint64_t bcaps)
+create_context(xid_t ctx, uint64_t bcaps, uint32_t unshare_flags)
{
- struct vc_ctx_caps vc_caps;
+ struct vc_ctx_caps vc_caps;
struct vc_net_flags vc_nf;
+ struct vc_net_caps vc_ncaps;
/* Create network context */
if (vc_net_create(ctx) == VC_NOCTX) {
if (errno == EEXIST)
- goto process;
+ goto tag;
return -1;
}
/* Make the network context persistent */
- vc_nf.mask = vc_nf.flagword = VC_NXF_PERSISTENT;
+ vc_nf.mask = vc_nf.flagword = VC_NXF_PERSISTENT | VC_NXF_LBACK_ALLOW;
if (vc_set_nflags(ctx, &vc_nf))
return -1;
-process:
+ /* Give it raw sockets capabilities */
+ vc_ncaps.ncaps = vc_ncaps.cmask = VC_NXC_RAW_ICMP | VC_NXC_RAW_SOCKET;
+ if (vc_set_ncaps(ctx, &vc_ncaps))
+ return -1;
+
+tag:
+ /* Create tag context */
+ if (vc_tag_create(ctx) == VC_NOCTX)
+ return -1;
+
/*
* Create context info - this sets the STATE_SETUP and STATE_INIT flags.
*/
if (vc_ctx_create(ctx, 0) == VC_NOCTX)
return -1;
+ if (unshare_flags != 0) {
+ unshare(unshare_flags);
+ unshare_flags |= vc_get_space_mask();
+ vc_set_namespace(ctx, unshare_flags);
+ }
+
/* Set capabilities - these don't take effect until SETUP flag is unset */
vc_caps.bcaps = bcaps;
vc_caps.bmask = ~0ULL; /* currently unused */
if (vc_set_ccaps(ctx, &vc_caps))
return -1;
- if (pl_setsched(ctx, 1, 0) < 0) {
+ if (pl_setsched(ctx, 0, 1) < 0) {
PERROR("pl_setsched(%u)", ctx);
exit(1);
}
int retry_count = 0;
int net_migrated = 0;
- pl_set_ulimits(slr);
+ if (pl_set_ulimits(slr) != 0)
+ return -1;
for (;;)
{
if (vc_get_cflags(ctx, &vc_flags))
{
+ uint32_t unshare_flags;
if (errno != ESRCH)
return -1;
+ /* Unshare the net namespace if the slice if requested in the local slice configuration */
+ unshare_flags = get_space_flag(ctx);
+
/* context doesn't exist - create it */
- if (create_context(ctx, bcaps))
+ if (create_context(ctx, bcaps, unshare_flags))
{
if (errno == EEXIST)
/* another process beat us in a race */
migrate:
if (net_migrated || !vc_net_migrate(ctx))
{
- if (!vc_ctx_migrate(ctx, 0))
+ uint32_t unshare_flags;
+ /* Unshare the net namespace if the slice if requested in the local slice configuration */
+ unshare_flags = get_space_flag(ctx);
+ if (unshare_flags != 0) {
+ unshare_flags |=vc_get_space_mask();
+ vc_enter_namespace(ctx, unshare_flags);
+ }
+
+ if (!vc_tag_migrate(ctx) && !vc_ctx_migrate(ctx, 0))
break; /* done */
net_migrated = 1;
}
while (0)
int
-pl_setsched(xid_t ctx, uint32_t cpu_share, uint32_t cpu_sched_flags)
+pl_setsched(xid_t ctx, uint32_t cpu_min, uint32_t cpu_share)
{
struct vc_set_sched vc_sched;
struct vc_ctx_flags vc_flags;
- uint32_t new_flags;
vc_sched.set_mask = (VC_VXSM_FILL_RATE | VC_VXSM_INTERVAL | VC_VXSM_TOKENS |
VC_VXSM_TOKENS_MIN | VC_VXSM_TOKENS_MAX | VC_VXSM_MSEC |
- VC_VXSM_FILL_RATE2 | VC_VXSM_INTERVAL2 | VC_VXSM_FORCE |
- VC_VXSM_IDLE_TIME);
- vc_sched.fill_rate = 0;
- vc_sched.fill_rate2 = cpu_share; /* tokens accumulated per interval */
- vc_sched.interval = vc_sched.interval2 = 1000; /* milliseconds */
+ VC_VXSM_FILL_RATE2 | VC_VXSM_INTERVAL2 | VC_VXSM_FORCE);
+ vc_sched.fill_rate = cpu_min; /* percent reserved */
+ vc_sched.interval = 100;
+ vc_sched.fill_rate2 = cpu_share; /* best-effort fair share of unreserved */
+ vc_sched.interval2 = 1000; /* milliseconds */
vc_sched.tokens = 100; /* initial allocation of tokens */
vc_sched.tokens_min = 50; /* need this many tokens to run */
vc_sched.tokens_max = 100; /* max accumulated number of tokens */
- if (cpu_share == (uint32_t)VC_LIM_KEEP)
- vc_sched.set_mask &= ~(VC_VXSM_FILL_RATE|VC_VXSM_FILL_RATE2);
-
- /* guaranteed CPU corresponds to SCHED_SHARE flag being cleared */
- if (cpu_sched_flags & VS_SCHED_CPU_GUARANTEED) {
- new_flags = 0;
- vc_sched.fill_rate = vc_sched.fill_rate2;
+ if (cpu_share) {
+ if (cpu_share == (uint32_t)VC_LIM_KEEP)
+ vc_sched.set_mask &= ~(VC_VXSM_FILL_RATE|VC_VXSM_FILL_RATE2);
+ else
+ vc_sched.set_mask |= VC_VXSM_IDLE_TIME;
}
- else
- new_flags = VC_VXF_SCHED_SHARE;
VC_SYSCALL(vc_set_sched(ctx, &vc_sched));
vc_flags.mask = VC_VXF_SCHED_FLAGS;
- vc_flags.flagword = new_flags | VC_VXF_SCHED_HARD;
+ vc_flags.flagword = VC_VXF_SCHED_HARD;
VC_SYSCALL(vc_set_cflags(ctx, &vc_flags));
return 0;
}
+enum {
+ TYPE_LONG = 1,
+ TYPE_PERS = 2,
+};
+
struct pl_resources {
- char *name;
- unsigned long long *limit;
+ char *name;
+ unsigned type;
+ union {
+ unsigned long long *limit;
+ unsigned long int *personality;
+ };
};
#define WHITESPACE(buffer,index,len) \
if (index < len) index++; else goto out;
#define VSERVERCONF "/etc/vservers/"
+
void
pl_get_limits(const char *context, struct sliver_resources *slr)
{
FILE *fb;
int cwd;
- size_t len = strlen(VSERVERCONF) + strlen(context) + NULLBYTE_SIZE;
- char *conf = (char *)malloc(len + strlen("rlimits/openfd.hard"));
struct pl_resources *r;
+
struct pl_resources sliver_list[] = {
- {"sched/fill-rate2", &slr->vs_cpu},
+ {"sched/fill-rate2", TYPE_LONG, &slr->vs_cpu},
- {"rlimits/nproc.hard", &slr->vs_nproc.hard},
- {"rlimits/nproc.soft", &slr->vs_nproc.soft},
- {"rlimits/nproc.min", &slr->vs_nproc.min},
+ {"rlimits/nproc.hard", TYPE_LONG, &slr->vs_nproc.hard},
+ {"rlimits/nproc.soft", TYPE_LONG, &slr->vs_nproc.soft},
+ {"rlimits/nproc.min", TYPE_LONG, &slr->vs_nproc.min},
- {"rlimits/rss.hard", &slr->vs_rss.hard},
- {"rlimits/rss.soft", &slr->vs_rss.soft},
- {"rlimits/rss.min", &slr->vs_rss.min},
+ {"rlimits/rss.hard", TYPE_LONG, &slr->vs_rss.hard},
+ {"rlimits/rss.soft", TYPE_LONG, &slr->vs_rss.soft},
+ {"rlimits/rss.min", TYPE_LONG, &slr->vs_rss.min},
- {"rlimits/as.hard", &slr->vs_as.hard},
- {"rlimits/as.soft", &slr->vs_as.soft},
- {"rlimits/as.min", &slr->vs_as.min},
+ {"rlimits/as.hard", TYPE_LONG, &slr->vs_as.hard},
+ {"rlimits/as.soft", TYPE_LONG, &slr->vs_as.soft},
+ {"rlimits/as.min", TYPE_LONG, &slr->vs_as.min},
- {"rlimits/openfd.hard", &slr->vs_openfd.hard},
- {"rlimits/openfd.soft", &slr->vs_openfd.soft},
- {"rlimits/openfd.min", &slr->vs_openfd.min},
+ {"rlimits/nofile.hard", TYPE_LONG, &slr->vs_nofile.hard},
+ {"rlimits/nofile.soft", TYPE_LONG, &slr->vs_nofile.soft},
+ {"rlimits/nofile.min", TYPE_LONG, &slr->vs_nofile.min},
+
+ {"rlimits/memlock.hard", TYPE_LONG, &slr->vs_memlock.hard},
+ {"rlimits/memlock.soft", TYPE_LONG, &slr->vs_memlock.soft},
+ {"rlimits/memlock.min", TYPE_LONG, &slr->vs_memlock.min},
+
+ {"personality", TYPE_PERS, &slr->personality},
{0,0}
};
+ size_t len = strlen(VSERVERCONF) + strlen(context) + NULLBYTE_SIZE;
+ char *conf = (char *)malloc(len);
sprintf(conf, "%s%s", VSERVERCONF, context);
slr->vs_rss.hard = VC_LIM_KEEP;
slr->vs_nproc.soft = VC_LIM_KEEP;
slr->vs_nproc.min = VC_LIM_KEEP;
- slr->vs_openfd.hard = VC_LIM_KEEP;
- slr->vs_openfd.soft = VC_LIM_KEEP;
- slr->vs_openfd.min = VC_LIM_KEEP;
+ slr->vs_nofile.hard = VC_LIM_KEEP;
+ slr->vs_nofile.soft = VC_LIM_KEEP;
+ slr->vs_nofile.min = VC_LIM_KEEP;
+
+ slr->vs_memlock.hard = VC_LIM_KEEP;
+ slr->vs_memlock.soft = VC_LIM_KEEP;
+ slr->vs_memlock.min = VC_LIM_KEEP;
+
+ slr->personality = 0;
cwd = open(".", O_RDONLY);
if (cwd == -1) {
fb = fopen(r->name, "r");
if (fb == NULL)
continue;
- if (fgets(buf, sizeof(buf), fb) != NULL && isdigit(*buf))
- *r->limit = atoi(buf);
+ if (fgets(buf, sizeof(buf), fb) != NULL) {
+ len=strlen(buf);
+ /* remove trailing newline */
+ if (buf[len-1] == '\n') {
+ buf[len-1]='\0';
+ len --;
+ }
+ if (r->type == TYPE_LONG) {
+ int val;
+ char *res=0;
+ errno=0;
+ val = strtol(buf,&res,0);
+ if ( !( (val==0 && res) || (errno!=0) ) )
+ *r->limit = val;
+ } else if ( (r->type == TYPE_PERS) && isalpha(*buf)) {
+ unsigned long int res;
+ res = vc_str2personalitytype(buf,len);
+ if (res != VC_BAD_PERSONALITY) {
+ *r->personality = res;
+ }
+ }
+ }
+
fclose(fb);
}
- fchdir(cwd);
+ (void)fchdir(cwd);
out_fd:
close(cwd);
out:
if (vcr->hard < vcr->min)
break;
case 0:
- lim->rlim_cur = vcr->hard;
+ lim->rlim_max = vcr->hard;
adjusted = 1;
}
}
setrlimit(resource, &lim);
}
-void
+static inline int
+set_personality(unsigned long int personality_arg)
+{
+ if (personality_arg == 0)
+ return 0;
+ if (personality(personality_arg) < 0) {
+ return -1;
+ }
+ return 0;
+}
+
+int
pl_set_ulimits(const struct sliver_resources *slr)
{
if (!slr)
- return;
+ return 0;
set_one_ulimit(RLIMIT_RSS, &slr->vs_rss);
set_one_ulimit(RLIMIT_AS, &slr->vs_as);
set_one_ulimit(RLIMIT_NPROC, &slr->vs_nproc);
- set_one_ulimit(RLIMIT_NOFILE, &slr->vs_openfd);
+ set_one_ulimit(RLIMIT_NOFILE, &slr->vs_nofile);
+ set_one_ulimit(RLIMIT_MEMLOCK, &slr->vs_memlock);
+ return set_personality(slr->personality);
}