X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fplanetlab.c;h=b1a4e8edec939760ef1d19547cb6f052b1f090db;hb=130eeb12b73458d3d8a23dd9decf20a120013a32;hp=6b5d936450f7983b937b3317660af47490b3258c;hpb=d967219815660f527f556e134ab1e45d92f48788;p=util-vserver.git diff --git a/lib/planetlab.c b/lib/planetlab.c index 6b5d936..b1a4e8e 100644 --- a/lib/planetlab.c +++ b/lib/planetlab.c @@ -31,29 +31,50 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifdef HAVE_CONFIG_H +# include +#endif +#include +#include +#include #include #include +#include #include +#include #include +#include -#include "config.h" -#include "planetlab.h" -#include "sched_cmd.h" -#include "virtual.h" #include "vserver.h" +#include "planetlab.h" static int -create_context(xid_t ctx, uint32_t flags, uint64_t bcaps, const rspec_t *rspec) +create_context(xid_t ctx, uint64_t bcaps, struct sliver_resources *slr) { struct vc_ctx_caps vc_caps; - struct vc_ctx_flags vc_flags; - struct vc_rlimit vc_rlimit; + struct vc_net_nx vc_net; + struct vc_net_flags vc_nf; - /* create context info */ + /* Create network context */ + if (vc_net_create(ctx) == VC_NOCTX) { + if (errno == EEXIST) + goto process; + return -1; + } + + /* Make the network context persistent */ + vc_nf.mask = vc_nf.flagword = VC_NXF_PERSISTENT; + if (vc_set_nflags(ctx, &vc_nf)) + return -1; + +process: + /* + * Create context info - this sets the STATE_SETUP and STATE_INIT flags. + */ if (vc_ctx_create(ctx) == VC_NOCTX) return -1; - /* set capabilities - these don't take effect until SETUP flags is unset */ + /* Set capabilities - these don't take effect until SETUP flag is unset */ vc_caps.bcaps = bcaps; vc_caps.bmask = ~0ULL; /* currently unused */ vc_caps.ccaps = 0; /* don't want any of these */ @@ -61,45 +82,46 @@ create_context(xid_t ctx, uint32_t flags, uint64_t bcaps, const rspec_t *rspec) if (vc_set_ccaps(ctx, &vc_caps)) return -1; - /* ignore all flags except SETUP and scheduler flags */ - vc_flags.mask = VC_VXF_STATE_SETUP | VC_VXF_SCHED_FLAGS; - /* don't let user change scheduler flags */ - vc_flags.flagword = flags & ~VC_VXF_SCHED_FLAGS; /* SETUP not set */ - - /* set scheduler parameters */ - vc_flags.flagword |= rspec->cpu_sched_flags; - pl_setsched(ctx, rspec->cpu_share, rspec->cpu_sched_flags); + pl_set_limits(ctx, slr); - /* set resource limits */ - vc_rlimit.min = VC_LIM_KEEP; - vc_rlimit.soft = VC_LIM_KEEP; - vc_rlimit.hard = rspec->mem_limit; - if (vc_set_rlimit(ctx, RLIMIT_RSS, &vc_rlimit)) - return -1; + return 0; +} - /* assume min and soft unchanged by set_rlimit */ - vc_rlimit.hard = rspec->task_limit; - if (vc_set_rlimit(ctx, RLIMIT_NPROC, &vc_rlimit)) - return -1; +int +pl_setup_done(xid_t ctx) +{ + struct vc_ctx_flags vc_flags; - /* set flags, unset SETUP flag - this allows other processes to migrate */ + /* unset SETUP flag - this allows other processes to migrate */ + /* set the PERSISTENT flag - so the context doesn't vanish */ + /* Don't clear the STATE_INIT flag, as that would make us the init task. */ + vc_flags.mask = VC_VXF_STATE_SETUP|VC_VXF_PERSISTENT; + vc_flags.flagword = VC_VXF_PERSISTENT; if (vc_set_cflags(ctx, &vc_flags)) return -1; return 0; } +#define RETRY_LIMIT 10 + int -pl_chcontext(xid_t ctx, uint32_t flags, uint64_t bcaps, const rspec_t *rspec) +pl_chcontext(xid_t ctx, uint64_t bcaps, struct sliver_resources *slr) { + int retry_count = 0; + int net_migrated = 0; + for (;;) { struct vc_ctx_flags vc_flags; if (vc_get_cflags(ctx, &vc_flags)) { + if (errno != ESRCH) + return -1; + /* context doesn't exist - create it */ - if (create_context(ctx, flags, bcaps, rspec)) + if (create_context(ctx, bcaps, slr)) { if (errno == EEXIST) /* another process beat us in a race */ @@ -111,21 +133,30 @@ pl_chcontext(xid_t ctx, uint32_t flags, uint64_t bcaps, const rspec_t *rspec) } /* created context and migrated to it i.e., we're done */ - break; + return 1; } /* check the SETUP flag */ if (vc_flags.flagword & VC_VXF_STATE_SETUP) { /* context is still being setup - wait a while then retry */ + if (retry_count++ >= RETRY_LIMIT) + { + errno = EBUSY; + return -1; + } sleep(1); continue; } /* context has been setup */ migrate: - if (!vc_ctx_migrate(ctx)) - break; /* done */ + if (net_migrated || !vc_net_migrate(ctx)) + { + if (!vc_ctx_migrate(ctx, 0)) + break; /* done */ + net_migrated = 1; + } /* context disappeared - retry */ } @@ -133,18 +164,272 @@ pl_chcontext(xid_t ctx, uint32_t flags, uint64_t bcaps, const rspec_t *rspec) return 0; } +/* it's okay for a syscall to fail because the context doesn't exist */ +#define VC_SYSCALL(x) \ +do \ +{ \ + if (x) \ + return errno == ESRCH ? 0 : -1; \ +} \ +while (0) + int pl_setsched(xid_t ctx, uint32_t cpu_share, uint32_t cpu_sched_flags) { struct vc_set_sched vc_sched; + struct vc_ctx_flags vc_flags; + uint32_t new_flags; vc_sched.set_mask = (VC_VXSM_FILL_RATE | VC_VXSM_INTERVAL | VC_VXSM_TOKENS | - VC_VXSM_TOKENS_MIN | VC_VXSM_TOKENS_MAX); - vc_sched.fill_rate = cpu_share; /* tokens accumulated per interval */ - vc_sched.interval = 1000; /* milliseconds */ + VC_VXSM_TOKENS_MIN | VC_VXSM_TOKENS_MAX | VC_VXSM_MSEC | + VC_VXSM_FILL_RATE2 | VC_VXSM_INTERVAL2 | VC_VXSM_FORCE | + VC_VXSM_IDLE_TIME); + vc_sched.fill_rate = 0; + vc_sched.fill_rate2 = cpu_share; /* tokens accumulated per interval */ + vc_sched.interval = vc_sched.interval2 = 1000; /* milliseconds */ vc_sched.tokens = 100; /* initial allocation of tokens */ vc_sched.tokens_min = 50; /* need this many tokens to run */ vc_sched.tokens_max = 100; /* max accumulated number of tokens */ - return vc_set_sched(ctx, &vc_sched); + if (cpu_share == (uint32_t)VC_LIM_KEEP) + vc_sched.set_mask &= ~(VC_VXSM_FILL_RATE|VC_VXSM_FILL_RATE2); + + /* guaranteed CPU corresponds to SCHED_SHARE flag being cleared */ + if (cpu_sched_flags & VS_SCHED_CPU_GUARANTEED) { + new_flags = 0; + vc_sched.fill_rate = vc_sched.fill_rate2; + } + else + new_flags = VC_VXF_SCHED_SHARE; + + VC_SYSCALL(vc_set_sched(ctx, &vc_sched)); + + vc_flags.mask = VC_VXF_SCHED_FLAGS; + vc_flags.flagword = new_flags | VC_VXF_SCHED_HARD; + VC_SYSCALL(vc_set_cflags(ctx, &vc_flags)); + + return 0; +} + +struct pl_resources { + char *name; + unsigned long long *limit; +}; + +#define WHITESPACE(buffer,index,len) \ + while(isspace((int)buffer[index])) \ + if (index < len) index++; else goto out; + +#define VSERVERCONF "/etc/vservers/" +void +pl_get_limits(char *context, struct sliver_resources *slr) +{ + FILE *fb; + int cwd; + size_t len = strlen(VSERVERCONF) + strlen(context) + NULLBYTE_SIZE; + char *conf = (char *)malloc(len + strlen("rlimits/openfd.hard")); + struct pl_resources *r; + struct pl_resources sliver_list[] = { + {"sched/fill-rate2", &slr->vs_cpu}, + {"sched/fill-rate", &slr->vs_cpuguaranteed}, + + {"rlimits/nproc.hard", &slr->vs_nproc.hard}, + {"rlimits/nproc.soft", &slr->vs_nproc.soft}, + {"rlimits/nproc.min", &slr->vs_nproc.min}, + + {"rlimits/rss.hard", &slr->vs_rss.hard}, + {"rlimits/rss.soft", &slr->vs_rss.soft}, + {"rlimits/rss.min", &slr->vs_rss.min}, + + {"rlimits/as.hard", &slr->vs_as.hard}, + {"rlimits/as.soft", &slr->vs_as.soft}, + {"rlimits/as.min", &slr->vs_as.min}, + + {"rlimits/openfd.hard", &slr->vs_openfd.hard}, + {"rlimits/openfd.soft", &slr->vs_openfd.soft}, + {"rlimits/openfd.min", &slr->vs_openfd.min}, + + {"bcapabilities", NULL}, + {0,0} + }; + + sprintf(conf, "%s%s", VSERVERCONF, context); + + slr->vs_cpu = VC_LIM_KEEP; + slr->vs_cpuguaranteed = 0; + + slr->vs_rss.hard = VC_LIM_KEEP; + slr->vs_rss.soft = VC_LIM_KEEP; + slr->vs_rss.min = VC_LIM_KEEP; + + slr->vs_as.hard = VC_LIM_KEEP; + slr->vs_as.soft = VC_LIM_KEEP; + slr->vs_as.min = VC_LIM_KEEP; + + + slr->vs_nproc.hard = VC_LIM_KEEP; + slr->vs_nproc.soft = VC_LIM_KEEP; + slr->vs_nproc.min = VC_LIM_KEEP; + + slr->vs_openfd.hard = VC_LIM_KEEP; + slr->vs_openfd.soft = VC_LIM_KEEP; + slr->vs_openfd.min = VC_LIM_KEEP; + + slr->vs_capabilities.bcaps = 0; + slr->vs_capabilities.bmask = 0; + slr->vs_capabilities.ccaps = 0; + slr->vs_capabilities.cmask = 0; + + cwd = open(".", O_RDONLY); + if (cwd == -1) { + perror("cannot get a handle on ."); + goto out; + } + if (chdir(conf) == -1) { + fprintf(stderr, "cannot chdir to "); + perror(conf); + goto out_fd; + } + + for (r = &sliver_list[0]; r->name; r++) { + char buf[1000]; + fb = fopen(r->name, "r"); + if (fb == NULL) + continue; + /* XXX: UGLY. */ + if (strcmp(r->name, "bcapabilities") == 0) { + size_t len, i; + struct vc_err_listparser err; + + len = fread(buf, 1, sizeof(buf), fb); + for (i = 0; i < len; i++) { + if (buf[i] == '\n') + buf[i] = ','; + } + vc_list2bcap(buf, len, &err, &slr->vs_capabilities); + } + else + if (fgets(buf, sizeof(buf), fb) != NULL && isdigit(*buf)) + *r->limit = atoi(buf); + fclose(fb); + } + + fchdir(cwd); +out_fd: + close(cwd); +out: + free(conf); +} + +int +adjust_lim(struct vc_rlimit *vcr, struct rlimit *lim) +{ + int adjusted = 0; + if (vcr->min != VC_LIM_KEEP) { + if (vcr->min > lim->rlim_cur) { + lim->rlim_cur = vcr->min; + adjusted = 1; + } + if (vcr->min > lim->rlim_max) { + lim->rlim_max = vcr->min; + adjusted = 1; + } + } + + if (vcr->soft != VC_LIM_KEEP) { + switch (vcr->min != VC_LIM_KEEP) { + case 1: + if (vcr->soft < vcr->min) + break; + case 0: + lim->rlim_cur = vcr->soft; + adjusted = 1; + } + } + + if (vcr->hard != VC_LIM_KEEP) { + switch (vcr->min != VC_LIM_KEEP) { + case 1: + if (vcr->hard < vcr->min) + break; + case 0: + lim->rlim_cur = vcr->hard; + adjusted = 1; + } + } + return adjusted; +} + +void +pl_set_limits(xid_t ctx, struct sliver_resources *slr) +{ + struct rlimit lim; /* getrlimit values */ + unsigned long long vs_cpu; + uint32_t cpu_sched_flags; + + if (slr != 0) { + /* set memory limits */ + getrlimit(RLIMIT_RSS,&lim); + if (adjust_lim(&slr->vs_rss, &lim)) { + setrlimit(RLIMIT_RSS, &lim); + if (vc_set_rlimit(ctx, RLIMIT_RSS, &slr->vs_rss)) + { + PERROR("pl_setrlimit(%u, RLIMIT_RSS)", ctx); + exit(1); + } + } + + /* set address space limits */ + getrlimit(RLIMIT_AS,&lim); + if (adjust_lim(&slr->vs_as, &lim)) { + setrlimit(RLIMIT_AS, &lim); + if (vc_set_rlimit(ctx, RLIMIT_AS, &slr->vs_as)) + { + PERROR("pl_setrlimit(%u, RLIMIT_AS)", ctx); + exit(1); + } + } + /* set nrpoc limit */ + getrlimit(RLIMIT_NPROC,&lim); + if (adjust_lim(&slr->vs_nproc, &lim)) { + setrlimit(RLIMIT_NPROC, &lim); + if (vc_set_rlimit(ctx, RLIMIT_NPROC, &slr->vs_nproc)) + { + PERROR("pl_setrlimit(%u, RLIMIT_NPROC)", ctx); + exit(1); + } + } + + /* set openfd limit */ + getrlimit(RLIMIT_NOFILE,&lim); + if (adjust_lim(&slr->vs_openfd, &lim)) { + setrlimit(RLIMIT_NOFILE, &lim); + if (vc_set_rlimit(ctx, RLIMIT_NOFILE, &slr->vs_openfd)) + { + PERROR("pl_setrlimit(%u, RLIMIT_NOFILE)", ctx); + exit(1); + } + if (vc_set_rlimit(ctx, VC_VLIMIT_OPENFD, &slr->vs_openfd)) + { + PERROR("pl_setrlimit(%u, VLIMIT_OPENFD)", ctx); + exit(1); + } + } + vs_cpu = slr->vs_cpu; + cpu_sched_flags = slr->vs_cpuguaranteed & VS_SCHED_CPU_GUARANTEED; + + slr->vs_capabilities.bmask = vc_get_insecurebcaps(); + if (vc_set_ccaps(ctx, &slr->vs_capabilities) < 0) { + PERROR("pl_setcaps(%u)", ctx); + exit(1); + } + } else { + vs_cpu = 1; + cpu_sched_flags = 0; + } + + if (pl_setsched(ctx, vs_cpu, cpu_sched_flags) < 0) { + PERROR("pl_setsched(%u)", ctx); + exit(1); + } }