1 /* Copyright 2005 Princeton University
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above
11 copyright notice, this list of conditions and the following
12 disclaimer in the documentation and/or other materials provided
13 with the distribution.
15 * Neither the name of the copyright holder nor the names of its
16 contributors may be used to endorse or promote products derived
17 from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PRINCETON
23 UNIVERSITY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
26 OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
29 WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
45 #include <sys/resource.h>
49 #include "planetlab.h"
52 create_context(xid_t ctx, uint64_t bcaps, struct sliver_resources *slr)
54 struct vc_ctx_caps vc_caps;
55 struct vc_net_nx vc_net;
56 struct vc_net_flags vc_nf;
58 /* Create network context */
59 if (vc_net_create(ctx) == VC_NOCTX) {
65 /* Make the network context persistent */
66 vc_nf.mask = vc_nf.flagword = VC_NXF_PERSISTENT;
67 if (vc_set_nflags(ctx, &vc_nf))
70 /* XXX: Allow access to all IPv4 addresses (for now) */
71 vc_net.type = vcNET_IPV4;
75 if (vc_net_add(ctx, &vc_net) == -1)
80 * Create context info - this sets the STATE_SETUP and STATE_INIT flags.
82 if (vc_ctx_create(ctx) == VC_NOCTX)
85 /* Set capabilities - these don't take effect until SETUP flag is unset */
86 vc_caps.bcaps = bcaps;
87 vc_caps.bmask = ~0ULL; /* currently unused */
88 vc_caps.ccaps = 0; /* don't want any of these */
89 vc_caps.cmask = ~0ULL;
90 if (vc_set_ccaps(ctx, &vc_caps))
93 pl_set_limits(ctx, slr);
99 pl_setup_done(xid_t ctx)
101 struct vc_ctx_flags vc_flags;
103 /* unset SETUP flag - this allows other processes to migrate */
104 /* set the PERSISTENT flag - so the context doesn't vanish */
105 /* Don't clear the STATE_INIT flag, as that would make us the init task. */
106 vc_flags.mask = VC_VXF_STATE_SETUP|VC_VXF_PERSISTENT;
107 vc_flags.flagword = VC_VXF_PERSISTENT;
108 if (vc_set_cflags(ctx, &vc_flags))
114 #define RETRY_LIMIT 10
117 pl_chcontext(xid_t ctx, uint64_t bcaps, struct sliver_resources *slr)
120 int net_migrated = 0;
124 struct vc_ctx_flags vc_flags;
126 if (vc_get_cflags(ctx, &vc_flags))
131 /* context doesn't exist - create it */
132 if (create_context(ctx, bcaps, slr))
135 /* another process beat us in a race */
138 /* another process is creating - poll the SETUP flag */
143 /* created context and migrated to it i.e., we're done */
147 /* check the SETUP flag */
148 if (vc_flags.flagword & VC_VXF_STATE_SETUP)
150 /* context is still being setup - wait a while then retry */
151 if (retry_count++ >= RETRY_LIMIT)
160 /* context has been setup */
162 if (net_migrated || !vc_net_migrate(ctx))
164 if (!vc_ctx_migrate(ctx, 0))
169 /* context disappeared - retry */
175 /* it's okay for a syscall to fail because the context doesn't exist */
176 #define VC_SYSCALL(x) \
180 return errno == ESRCH ? 0 : -1; \
185 pl_setsched(xid_t ctx, uint32_t cpu_share, uint32_t cpu_sched_flags)
187 struct vc_set_sched vc_sched;
188 struct vc_ctx_flags vc_flags;
191 vc_sched.set_mask = (VC_VXSM_FILL_RATE | VC_VXSM_INTERVAL | VC_VXSM_TOKENS |
192 VC_VXSM_TOKENS_MIN | VC_VXSM_TOKENS_MAX | VC_VXSM_MSEC |
193 VC_VXSM_FILL_RATE2 | VC_VXSM_INTERVAL2 | VC_VXSM_FORCE |
195 vc_sched.fill_rate = 0;
196 vc_sched.fill_rate2 = cpu_share; /* tokens accumulated per interval */
197 vc_sched.interval = vc_sched.interval2 = 1000; /* milliseconds */
198 vc_sched.tokens = 100; /* initial allocation of tokens */
199 vc_sched.tokens_min = 50; /* need this many tokens to run */
200 vc_sched.tokens_max = 100; /* max accumulated number of tokens */
202 if (cpu_share == (uint32_t)VC_LIM_KEEP)
203 vc_sched.set_mask &= ~(VC_VXSM_FILL_RATE|VC_VXSM_FILL_RATE2);
205 /* guaranteed CPU corresponds to SCHED_SHARE flag being cleared */
206 if (cpu_sched_flags & VS_SCHED_CPU_GUARANTEED) {
208 vc_sched.fill_rate = vc_sched.fill_rate2;
211 new_flags = VC_VXF_SCHED_SHARE;
213 VC_SYSCALL(vc_set_sched(ctx, &vc_sched));
215 vc_flags.mask = VC_VXF_SCHED_FLAGS;
216 vc_flags.flagword = new_flags | VC_VXF_SCHED_HARD;
217 VC_SYSCALL(vc_set_cflags(ctx, &vc_flags));
222 struct pl_resources {
224 unsigned long long *limit;
227 #define WHITESPACE(buffer,index,len) \
228 while(isspace((int)buffer[index])) \
229 if (index < len) index++; else goto out;
231 #define VSERVERCONF "/etc/vservers/"
233 pl_get_limits(char *context, struct sliver_resources *slr)
237 size_t len = strlen(VSERVERCONF) + strlen(context) + NULLBYTE_SIZE;
238 char *conf = (char *)malloc(len + strlen("rlimits/openfd.hard"));
239 struct pl_resources *r;
240 struct pl_resources sliver_list[] = {
241 {"sched/fill-rate2", &slr->vs_cpu},
242 {"sched/fill-rate", &slr->vs_cpuguaranteed},
244 {"rlimits/nproc.hard", &slr->vs_nproc.hard},
245 {"rlimits/nproc.soft", &slr->vs_nproc.soft},
246 {"rlimits/nproc.min", &slr->vs_nproc.min},
248 {"rlimits/rss.hard", &slr->vs_rss.hard},
249 {"rlimits/rss.soft", &slr->vs_rss.soft},
250 {"rlimits/rss.min", &slr->vs_rss.min},
252 {"rlimits/as.hard", &slr->vs_as.hard},
253 {"rlimits/as.soft", &slr->vs_as.soft},
254 {"rlimits/as.min", &slr->vs_as.min},
256 {"rlimits/openfd.hard", &slr->vs_openfd.hard},
257 {"rlimits/openfd.soft", &slr->vs_openfd.soft},
258 {"rlimits/openfd.min", &slr->vs_openfd.min},
260 {"whitelisted", &slr->vs_whitelisted},
262 {"bcapabilities", NULL},
266 sprintf(conf, "%s%s", VSERVERCONF, context);
268 slr->vs_cpu = VC_LIM_KEEP;
269 slr->vs_cpuguaranteed = 0;
271 slr->vs_rss.hard = VC_LIM_KEEP;
272 slr->vs_rss.soft = VC_LIM_KEEP;
273 slr->vs_rss.min = VC_LIM_KEEP;
275 slr->vs_as.hard = VC_LIM_KEEP;
276 slr->vs_as.soft = VC_LIM_KEEP;
277 slr->vs_as.min = VC_LIM_KEEP;
280 slr->vs_nproc.hard = VC_LIM_KEEP;
281 slr->vs_nproc.soft = VC_LIM_KEEP;
282 slr->vs_nproc.min = VC_LIM_KEEP;
284 slr->vs_openfd.hard = VC_LIM_KEEP;
285 slr->vs_openfd.soft = VC_LIM_KEEP;
286 slr->vs_openfd.min = VC_LIM_KEEP;
288 slr->vs_whitelisted = 1;
290 slr->vs_capabilities.bcaps = 0;
291 slr->vs_capabilities.bmask = 0;
292 slr->vs_capabilities.ccaps = 0;
293 slr->vs_capabilities.cmask = 0;
295 cwd = open(".", O_RDONLY);
297 perror("cannot get a handle on .");
300 if (chdir(conf) == -1) {
301 fprintf(stderr, "cannot chdir to ");
306 for (r = &sliver_list[0]; r->name; r++) {
308 fb = fopen(r->name, "r");
312 if (strcmp(r->name, "bcapabilities") == 0) {
314 struct vc_err_listparser err;
316 len = fread(buf, 1, sizeof(buf), fb);
317 for (i = 0; i < len; i++) {
321 vc_list2bcap(buf, len, &err, &slr->vs_capabilities);
324 if (fgets(buf, sizeof(buf), fb) != NULL && isdigit(*buf))
325 *r->limit = atoi(buf);
337 adjust_lim(struct vc_rlimit *vcr, struct rlimit *lim)
340 if (vcr->min != VC_LIM_KEEP) {
341 if (vcr->min > lim->rlim_cur) {
342 lim->rlim_cur = vcr->min;
345 if (vcr->min > lim->rlim_max) {
346 lim->rlim_max = vcr->min;
351 if (vcr->soft != VC_LIM_KEEP) {
352 switch (vcr->min != VC_LIM_KEEP) {
354 if (vcr->soft < vcr->min)
357 lim->rlim_cur = vcr->soft;
362 if (vcr->hard != VC_LIM_KEEP) {
363 switch (vcr->min != VC_LIM_KEEP) {
365 if (vcr->hard < vcr->min)
368 lim->rlim_cur = vcr->hard;
376 pl_set_limits(xid_t ctx, struct sliver_resources *slr)
378 struct rlimit lim; /* getrlimit values */
379 unsigned long long vs_cpu;
380 uint32_t cpu_sched_flags;
383 /* set memory limits */
384 getrlimit(RLIMIT_RSS,&lim);
385 if (adjust_lim(&slr->vs_rss, &lim)) {
386 setrlimit(RLIMIT_RSS, &lim);
387 if (vc_set_rlimit(ctx, RLIMIT_RSS, &slr->vs_rss))
389 PERROR("pl_setrlimit(%u, RLIMIT_RSS)", ctx);
394 /* set address space limits */
395 getrlimit(RLIMIT_AS,&lim);
396 if (adjust_lim(&slr->vs_as, &lim)) {
397 setrlimit(RLIMIT_AS, &lim);
398 if (vc_set_rlimit(ctx, RLIMIT_AS, &slr->vs_as))
400 PERROR("pl_setrlimit(%u, RLIMIT_AS)", ctx);
404 /* set nrpoc limit */
405 getrlimit(RLIMIT_NPROC,&lim);
406 if (adjust_lim(&slr->vs_nproc, &lim)) {
407 setrlimit(RLIMIT_NPROC, &lim);
408 if (vc_set_rlimit(ctx, RLIMIT_NPROC, &slr->vs_nproc))
410 PERROR("pl_setrlimit(%u, RLIMIT_NPROC)", ctx);
415 /* set openfd limit */
416 getrlimit(RLIMIT_NOFILE,&lim);
417 if (adjust_lim(&slr->vs_openfd, &lim)) {
418 setrlimit(RLIMIT_NOFILE, &lim);
419 if (vc_set_rlimit(ctx, RLIMIT_NOFILE, &slr->vs_openfd))
421 PERROR("pl_setrlimit(%u, RLIMIT_NOFILE)", ctx);
424 if (vc_set_rlimit(ctx, VC_VLIMIT_OPENFD, &slr->vs_openfd))
426 PERROR("pl_setrlimit(%u, VLIMIT_OPENFD)", ctx);
430 vs_cpu = slr->vs_cpu;
431 cpu_sched_flags = slr->vs_cpuguaranteed & VS_SCHED_CPU_GUARANTEED;
433 slr->vs_capabilities.bmask = vc_get_insecurebcaps();
434 if (vc_set_ccaps(ctx, &slr->vs_capabilities) < 0) {
435 PERROR("pl_setcaps(%u)", ctx);
443 if (pl_setsched(ctx, vs_cpu, cpu_sched_flags) < 0) {
444 PERROR("pl_setsched(%u)", ctx);