1 /* Copyright 2005 Princeton University
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above
11 copyright notice, this list of conditions and the following
12 disclaimer in the documentation and/or other materials provided
13 with the distribution.
15 * Neither the name of the copyright holder nor the names of its
16 contributors may be used to endorse or promote products derived
17 from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PRINCETON
23 UNIVERSITY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
26 OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
29 WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
45 #include <sys/resource.h>
49 #include "planetlab.h"
52 create_context(xid_t ctx, uint64_t bcaps, struct sliver_resources *slr)
54 struct vc_ctx_caps vc_caps;
55 struct vc_net_nx vc_net;
56 struct vc_net_flags vc_nf;
58 /* Create network context */
59 if (vc_net_create(ctx) == VC_NOCTX) {
65 /* Make the network context persistent */
66 vc_nf.mask = vc_nf.flagword = VC_NXF_PERSISTENT;
67 if (vc_set_nflags(ctx, &vc_nf))
72 * Create context info - this sets the STATE_SETUP and STATE_INIT flags.
74 if (vc_ctx_create(ctx) == VC_NOCTX)
77 /* Set capabilities - these don't take effect until SETUP flag is unset */
78 vc_caps.bcaps = bcaps;
79 vc_caps.bmask = ~0ULL; /* currently unused */
80 vc_caps.ccaps = 0; /* don't want any of these */
81 vc_caps.cmask = ~0ULL;
82 if (vc_set_ccaps(ctx, &vc_caps))
85 pl_set_limits(ctx, slr);
91 pl_setup_done(xid_t ctx)
93 struct vc_ctx_flags vc_flags;
95 /* unset SETUP flag - this allows other processes to migrate */
96 /* set the PERSISTENT flag - so the context doesn't vanish */
97 /* Don't clear the STATE_INIT flag, as that would make us the init task. */
98 vc_flags.mask = VC_VXF_STATE_SETUP|VC_VXF_PERSISTENT;
99 vc_flags.flagword = VC_VXF_PERSISTENT;
100 if (vc_set_cflags(ctx, &vc_flags))
106 #define RETRY_LIMIT 10
109 pl_chcontext(xid_t ctx, uint64_t bcaps, struct sliver_resources *slr)
112 int net_migrated = 0;
116 struct vc_ctx_flags vc_flags;
118 if (vc_get_cflags(ctx, &vc_flags))
123 /* context doesn't exist - create it */
124 if (create_context(ctx, bcaps, slr))
127 /* another process beat us in a race */
130 /* another process is creating - poll the SETUP flag */
135 /* created context and migrated to it i.e., we're done */
139 /* check the SETUP flag */
140 if (vc_flags.flagword & VC_VXF_STATE_SETUP)
142 /* context is still being setup - wait a while then retry */
143 if (retry_count++ >= RETRY_LIMIT)
152 /* context has been setup */
154 if (net_migrated || !vc_net_migrate(ctx))
156 if (!vc_ctx_migrate(ctx, 0))
161 /* context disappeared - retry */
167 /* it's okay for a syscall to fail because the context doesn't exist */
168 #define VC_SYSCALL(x) \
172 return errno == ESRCH ? 0 : -1; \
177 pl_setsched(xid_t ctx, uint32_t cpu_share, uint32_t cpu_sched_flags)
179 struct vc_set_sched vc_sched;
180 struct vc_ctx_flags vc_flags;
183 vc_sched.set_mask = (VC_VXSM_FILL_RATE | VC_VXSM_INTERVAL | VC_VXSM_TOKENS |
184 VC_VXSM_TOKENS_MIN | VC_VXSM_TOKENS_MAX | VC_VXSM_MSEC |
185 VC_VXSM_FILL_RATE2 | VC_VXSM_INTERVAL2 | VC_VXSM_FORCE |
187 vc_sched.fill_rate = 0;
188 vc_sched.fill_rate2 = cpu_share; /* tokens accumulated per interval */
189 vc_sched.interval = vc_sched.interval2 = 1000; /* milliseconds */
190 vc_sched.tokens = 100; /* initial allocation of tokens */
191 vc_sched.tokens_min = 50; /* need this many tokens to run */
192 vc_sched.tokens_max = 100; /* max accumulated number of tokens */
194 if (cpu_share == (uint32_t)VC_LIM_KEEP)
195 vc_sched.set_mask &= ~(VC_VXSM_FILL_RATE|VC_VXSM_FILL_RATE2);
197 /* guaranteed CPU corresponds to SCHED_SHARE flag being cleared */
198 if (cpu_sched_flags & VS_SCHED_CPU_GUARANTEED) {
200 vc_sched.fill_rate = vc_sched.fill_rate2;
203 new_flags = VC_VXF_SCHED_SHARE;
205 VC_SYSCALL(vc_set_sched(ctx, &vc_sched));
207 vc_flags.mask = VC_VXF_SCHED_FLAGS;
208 vc_flags.flagword = new_flags | VC_VXF_SCHED_HARD;
209 VC_SYSCALL(vc_set_cflags(ctx, &vc_flags));
214 struct pl_resources {
216 unsigned long long *limit;
219 #define WHITESPACE(buffer,index,len) \
220 while(isspace((int)buffer[index])) \
221 if (index < len) index++; else goto out;
223 #define VSERVERCONF "/etc/vservers/"
225 pl_get_limits(char *context, struct sliver_resources *slr)
229 size_t len = strlen(VSERVERCONF) + strlen(context) + NULLBYTE_SIZE;
230 char *conf = (char *)malloc(len + strlen("rlimits/openfd.hard"));
231 struct pl_resources *r;
232 struct pl_resources sliver_list[] = {
233 {"sched/fill-rate2", &slr->vs_cpu},
234 {"sched/fill-rate", &slr->vs_cpuguaranteed},
236 {"rlimits/nproc.hard", &slr->vs_nproc.hard},
237 {"rlimits/nproc.soft", &slr->vs_nproc.soft},
238 {"rlimits/nproc.min", &slr->vs_nproc.min},
240 {"rlimits/rss.hard", &slr->vs_rss.hard},
241 {"rlimits/rss.soft", &slr->vs_rss.soft},
242 {"rlimits/rss.min", &slr->vs_rss.min},
244 {"rlimits/as.hard", &slr->vs_as.hard},
245 {"rlimits/as.soft", &slr->vs_as.soft},
246 {"rlimits/as.min", &slr->vs_as.min},
248 {"rlimits/openfd.hard", &slr->vs_openfd.hard},
249 {"rlimits/openfd.soft", &slr->vs_openfd.soft},
250 {"rlimits/openfd.min", &slr->vs_openfd.min},
252 {"whitelisted", &slr->vs_whitelisted},
254 {"bcapabilities", NULL},
258 sprintf(conf, "%s%s", VSERVERCONF, context);
260 slr->vs_cpu = VC_LIM_KEEP;
261 slr->vs_cpuguaranteed = 0;
263 slr->vs_rss.hard = VC_LIM_KEEP;
264 slr->vs_rss.soft = VC_LIM_KEEP;
265 slr->vs_rss.min = VC_LIM_KEEP;
267 slr->vs_as.hard = VC_LIM_KEEP;
268 slr->vs_as.soft = VC_LIM_KEEP;
269 slr->vs_as.min = VC_LIM_KEEP;
272 slr->vs_nproc.hard = VC_LIM_KEEP;
273 slr->vs_nproc.soft = VC_LIM_KEEP;
274 slr->vs_nproc.min = VC_LIM_KEEP;
276 slr->vs_openfd.hard = VC_LIM_KEEP;
277 slr->vs_openfd.soft = VC_LIM_KEEP;
278 slr->vs_openfd.min = VC_LIM_KEEP;
280 slr->vs_whitelisted = 1;
282 slr->vs_capabilities.bcaps = 0;
283 slr->vs_capabilities.bmask = 0;
284 slr->vs_capabilities.ccaps = 0;
285 slr->vs_capabilities.cmask = 0;
287 cwd = open(".", O_RDONLY);
289 perror("cannot get a handle on .");
292 if (chdir(conf) == -1) {
293 fprintf(stderr, "cannot chdir to ");
298 for (r = &sliver_list[0]; r->name; r++) {
300 fb = fopen(r->name, "r");
304 if (strcmp(r->name, "bcapabilities") == 0) {
306 struct vc_err_listparser err;
308 len = fread(buf, 1, sizeof(buf), fb);
309 for (i = 0; i < len; i++) {
313 vc_list2bcap(buf, len, &err, &slr->vs_capabilities);
316 if (fgets(buf, sizeof(buf), fb) != NULL && isdigit(*buf))
317 *r->limit = atoi(buf);
329 adjust_lim(struct vc_rlimit *vcr, struct rlimit *lim)
332 if (vcr->min != VC_LIM_KEEP) {
333 if (vcr->min > lim->rlim_cur) {
334 lim->rlim_cur = vcr->min;
337 if (vcr->min > lim->rlim_max) {
338 lim->rlim_max = vcr->min;
343 if (vcr->soft != VC_LIM_KEEP) {
344 switch (vcr->min != VC_LIM_KEEP) {
346 if (vcr->soft < vcr->min)
349 lim->rlim_cur = vcr->soft;
354 if (vcr->hard != VC_LIM_KEEP) {
355 switch (vcr->min != VC_LIM_KEEP) {
357 if (vcr->hard < vcr->min)
360 lim->rlim_cur = vcr->hard;
368 pl_set_limits(xid_t ctx, struct sliver_resources *slr)
370 struct rlimit lim; /* getrlimit values */
371 unsigned long long vs_cpu;
372 uint32_t cpu_sched_flags;
375 /* set memory limits */
376 getrlimit(RLIMIT_RSS,&lim);
377 if (adjust_lim(&slr->vs_rss, &lim)) {
378 setrlimit(RLIMIT_RSS, &lim);
379 if (vc_set_rlimit(ctx, RLIMIT_RSS, &slr->vs_rss))
381 PERROR("pl_setrlimit(%u, RLIMIT_RSS)", ctx);
386 /* set address space limits */
387 getrlimit(RLIMIT_AS,&lim);
388 if (adjust_lim(&slr->vs_as, &lim)) {
389 setrlimit(RLIMIT_AS, &lim);
390 if (vc_set_rlimit(ctx, RLIMIT_AS, &slr->vs_as))
392 PERROR("pl_setrlimit(%u, RLIMIT_AS)", ctx);
396 /* set nrpoc limit */
397 getrlimit(RLIMIT_NPROC,&lim);
398 if (adjust_lim(&slr->vs_nproc, &lim)) {
399 setrlimit(RLIMIT_NPROC, &lim);
400 if (vc_set_rlimit(ctx, RLIMIT_NPROC, &slr->vs_nproc))
402 PERROR("pl_setrlimit(%u, RLIMIT_NPROC)", ctx);
407 /* set openfd limit */
408 getrlimit(RLIMIT_NOFILE,&lim);
409 if (adjust_lim(&slr->vs_openfd, &lim)) {
410 setrlimit(RLIMIT_NOFILE, &lim);
411 if (vc_set_rlimit(ctx, RLIMIT_NOFILE, &slr->vs_openfd))
413 PERROR("pl_setrlimit(%u, RLIMIT_NOFILE)", ctx);
416 if (vc_set_rlimit(ctx, VC_VLIMIT_OPENFD, &slr->vs_openfd))
418 PERROR("pl_setrlimit(%u, VLIMIT_OPENFD)", ctx);
422 vs_cpu = slr->vs_cpu;
423 cpu_sched_flags = slr->vs_cpuguaranteed & VS_SCHED_CPU_GUARANTEED;
425 slr->vs_capabilities.bmask = vc_get_insecurebcaps();
426 if (vc_set_ccaps(ctx, &slr->vs_capabilities) < 0) {
427 PERROR("pl_setcaps(%u)", ctx);
435 if (pl_setsched(ctx, vs_cpu, cpu_sched_flags) < 0) {
436 PERROR("pl_setsched(%u)", ctx);