1 /* Copyright 2005 Princeton University
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above
11 copyright notice, this list of conditions and the following
12 disclaimer in the documentation and/or other materials provided
13 with the distribution.
15 * Neither the name of the copyright holder nor the names of its
16 contributors may be used to endorse or promote products derived
17 from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PRINCETON
23 UNIVERSITY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
26 OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
29 WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
45 #include <sys/resource.h>
49 #include "planetlab.h"
52 create_context(xid_t ctx, uint64_t bcaps, struct sliver_resources *slr)
54 struct vc_ctx_caps vc_caps;
55 struct vc_net_nx vc_net;
56 struct vc_net_flags vc_nf;
58 /* Create network context */
59 if (vc_net_create(ctx) == VC_NOCTX)
62 /* Make the network context persistent */
63 vc_nf.mask = vc_nf.flagword = VC_NXF_PERSISTENT;
64 if (vc_set_nflags(ctx, &vc_nf))
67 /* XXX: Allow access to all IPv4 addresses (for now) */
68 vc_net.type = vcNET_IPV4;
72 if (vc_net_add(ctx, &vc_net) == -1)
76 * Create context info - this sets the STATE_SETUP and STATE_INIT flags.
78 if (vc_ctx_create(ctx) == VC_NOCTX)
81 /* Set capabilities - these don't take effect until SETUP flag is unset */
82 vc_caps.bcaps = bcaps;
83 vc_caps.bmask = ~0ULL; /* currently unused */
84 vc_caps.ccaps = 0; /* don't want any of these */
85 vc_caps.cmask = ~0ULL;
86 if (vc_set_ccaps(ctx, &vc_caps))
89 pl_set_limits(ctx, slr);
95 pl_setup_done(xid_t ctx)
97 struct vc_ctx_flags vc_flags;
99 /* unset SETUP flag - this allows other processes to migrate */
100 /* set the PERSISTENT flag - so the context doesn't vanish */
101 /* Don't clear the STATE_INIT flag, as that would make us the init task. */
102 vc_flags.mask = VC_VXF_STATE_SETUP|VC_VXF_PERSISTENT;
103 vc_flags.flagword = VC_VXF_PERSISTENT;
104 if (vc_set_cflags(ctx, &vc_flags))
110 #define RETRY_LIMIT 10
113 pl_chcontext(xid_t ctx, uint64_t bcaps, struct sliver_resources *slr)
116 int net_migrated = 0;
120 struct vc_ctx_flags vc_flags;
122 if (vc_get_cflags(ctx, &vc_flags))
127 /* context doesn't exist - create it */
128 if (create_context(ctx, bcaps,slr))
131 /* another process beat us in a race */
134 /* another process is creating - poll the SETUP flag */
139 /* created context and migrated to it i.e., we're done */
143 /* check the SETUP flag */
144 if (vc_flags.flagword & VC_VXF_STATE_SETUP)
146 /* context is still being setup - wait a while then retry */
147 if (retry_count++ >= RETRY_LIMIT)
156 /* context has been setup */
158 if (net_migrated || !vc_net_migrate(ctx))
160 if (!vc_ctx_migrate(ctx, 0))
165 /* context disappeared - retry */
171 /* it's okay for a syscall to fail because the context doesn't exist */
172 #define VC_SYSCALL(x) \
176 return errno == ESRCH ? 0 : -1; \
181 pl_setsched(xid_t ctx, uint32_t cpu_share, uint32_t cpu_sched_flags)
183 struct vc_set_sched vc_sched;
184 struct vc_ctx_flags vc_flags;
187 vc_sched.set_mask = (VC_VXSM_FILL_RATE | VC_VXSM_INTERVAL | VC_VXSM_TOKENS |
188 VC_VXSM_TOKENS_MIN | VC_VXSM_TOKENS_MAX | VC_VXSM_MSEC |
189 VC_VXSM_FILL_RATE2 | VC_VXSM_INTERVAL2 | VC_VXSM_FORCE |
191 vc_sched.fill_rate = 0;
192 vc_sched.fill_rate2 = cpu_share; /* tokens accumulated per interval */
193 vc_sched.interval = vc_sched.interval2 = 1000; /* milliseconds */
194 vc_sched.tokens = 100; /* initial allocation of tokens */
195 vc_sched.tokens_min = 50; /* need this many tokens to run */
196 vc_sched.tokens_max = 100; /* max accumulated number of tokens */
198 if (cpu_share == VC_LIM_KEEP)
199 vc_sched.set_mask &= ~(VC_VXSM_FILL_RATE|VC_VXSM_FILL_RATE2);
201 VC_SYSCALL(vc_set_sched(ctx, &vc_sched));
203 /* get current flag values */
204 VC_SYSCALL(vc_get_cflags(ctx, &vc_flags));
206 /* guaranteed CPU corresponds to SCHED_SHARE flag being cleared */
207 if (cpu_sched_flags & VS_SCHED_CPU_GUARANTEED) {
208 new_flags = VC_VXF_SCHED_SHARE;
209 vc_sched.fill_rate = vc_sched.fill_rate2;
214 if ((vc_flags.flagword & VC_VXF_SCHED_SHARE) != new_flags)
216 vc_flags.mask = VC_VXF_SCHED_FLAGS;
217 vc_flags.flagword = new_flags | VC_VXF_SCHED_HARD;
218 VC_SYSCALL(vc_set_cflags(ctx, &vc_flags));
224 struct pl_resources {
226 unsigned long long *limit;
229 #define WHITESPACE(buffer,index,len) \
230 while(isspace((int)buffer[index])) \
231 if (index < len) index++; else goto out;
233 #define VSERVERCONF "/etc/vservers/"
235 pl_get_limits(char *context, struct sliver_resources *slr)
239 size_t len = strlen(VSERVERCONF) + strlen(context) + NULLBYTE_SIZE;
240 char *conf = (char *)malloc(len + strlen("rlimits/openfd.hard"));
241 struct pl_resources *r;
242 struct pl_resources sliver_list[] = {
243 {"sched/fill-rate2", &slr->vs_cpu},
244 {"sched/fill-rate", &slr->vs_cpuguaranteed},
246 {"rlimits/nproc.hard", &slr->vs_nproc.hard},
247 {"rlimits/nproc.soft", &slr->vs_nproc.soft},
248 {"rlimits/nproc.min", &slr->vs_nproc.min},
250 {"rlimits/rss.hard", &slr->vs_rss.hard},
251 {"rlimits/rss.soft", &slr->vs_rss.soft},
252 {"rlimits/rss.min", &slr->vs_rss.min},
254 {"rlimits/as.hard", &slr->vs_as.hard},
255 {"rlimits/as.soft", &slr->vs_as.soft},
256 {"rlimits/as.min", &slr->vs_as.min},
258 {"rlimits/openfd.hard", &slr->vs_openfd.hard},
259 {"rlimits/openfd.soft", &slr->vs_openfd.soft},
260 {"rlimits/openfd.min", &slr->vs_openfd.min},
262 {"whitelisted", &slr->vs_whitelisted},
266 sprintf(conf, "%s%s", VSERVERCONF, context);
268 slr->vs_cpu = VC_LIM_KEEP;
269 slr->vs_cpuguaranteed = 0;
271 slr->vs_rss.hard = VC_LIM_KEEP;
272 slr->vs_rss.soft = VC_LIM_KEEP;
273 slr->vs_rss.min = VC_LIM_KEEP;
275 slr->vs_as.hard = VC_LIM_KEEP;
276 slr->vs_as.soft = VC_LIM_KEEP;
277 slr->vs_as.min = VC_LIM_KEEP;
280 slr->vs_nproc.hard = VC_LIM_KEEP;
281 slr->vs_nproc.soft = VC_LIM_KEEP;
282 slr->vs_nproc.min = VC_LIM_KEEP;
284 slr->vs_openfd.hard = VC_LIM_KEEP;
285 slr->vs_openfd.soft = VC_LIM_KEEP;
286 slr->vs_openfd.min = VC_LIM_KEEP;
288 slr->vs_whitelisted = 1;
290 cwd = open(".", O_RDONLY);
292 perror("cannot get a handle on .");
295 if (chdir(conf) == -1) {
296 fprintf(stderr, "cannot chdir to ");
301 for (r = &sliver_list[0]; r->name; r++) {
303 fb = fopen(r->name, "r");
306 if (fgets(buf, sizeof(buf), fb) != NULL && isdigit(*buf)) {
307 *r->limit = atoi(buf);
317 /* open the conf file for reading */
318 fb = fopen(conf,"r");
321 char *buffer = malloc(1000);
324 /* the conf file exist */
325 while((p=fgets(buffer,1000-1,fb))!=NULL) {
327 len = strnlen(buffer,1000);
328 WHITESPACE(buffer,index,len);
329 if (buffer[index] == '#')
332 for (r=&sliver_list[0]; r->name; r++)
333 if ((p=strstr(&buffer[index],r->name))!=NULL) {
334 /* adjust index into buffer */
335 index+= (p-&buffer[index])+strlen(r->name);
337 /* skip over whitespace */
338 WHITESPACE(buffer,index,len);
340 /* expecting to see = sign */
341 if (buffer[index++]!='=') goto out;
343 /* skip over whitespace */
344 WHITESPACE(buffer,index,len);
346 /* expecting to see a digit for number */
347 if (!isdigit((int)buffer[index])) goto out;
349 *r->limit = atoi(&buffer[index]);
350 if (0) /* for debugging only */
351 fprintf(stderr,"pl_get_limits found %s=%lld\n",
360 fprintf(stderr,"cannot open %s\n",conf);
367 adjust_lim(struct vc_rlimit *vcr, struct rlimit *lim)
370 if (vcr->min != VC_LIM_KEEP) {
371 if (vcr->min > lim->rlim_cur) {
372 lim->rlim_cur = vcr->min;
375 if (vcr->min > lim->rlim_max) {
376 lim->rlim_max = vcr->min;
381 if (vcr->soft != VC_LIM_KEEP) {
382 switch (vcr->min != VC_LIM_KEEP) {
384 if (vcr->soft < vcr->min)
387 lim->rlim_cur = vcr->soft;
392 if (vcr->hard != VC_LIM_KEEP) {
393 switch (vcr->min != VC_LIM_KEEP) {
395 if (vcr->hard < vcr->min)
398 lim->rlim_cur = vcr->hard;
407 pl_set_limits(xid_t ctx, struct sliver_resources *slr)
409 struct rlimit lim; /* getrlimit values */
410 unsigned long long vs_cpu;
411 uint32_t cpu_sched_flags;
414 /* set memory limits */
415 getrlimit(RLIMIT_RSS,&lim);
416 if (adjust_lim(&slr->vs_rss, &lim)) {
417 setrlimit(RLIMIT_RSS, &lim);
418 if (vc_set_rlimit(ctx, RLIMIT_RSS, &slr->vs_rss))
420 PERROR("pl_setrlimit(%u, RLIMIT_RSS)", ctx);
425 /* set address space limits */
426 getrlimit(RLIMIT_AS,&lim);
427 if (adjust_lim(&slr->vs_as, &lim)) {
428 setrlimit(RLIMIT_AS, &lim);
429 if (vc_set_rlimit(ctx, RLIMIT_AS, &slr->vs_as))
431 PERROR("pl_setrlimit(%u, RLIMIT_AS)", ctx);
435 /* set nrpoc limit */
436 getrlimit(RLIMIT_NPROC,&lim);
437 if (adjust_lim(&slr->vs_nproc, &lim)) {
438 setrlimit(RLIMIT_NPROC, &lim);
439 if (vc_set_rlimit(ctx, RLIMIT_NPROC, &slr->vs_nproc))
441 PERROR("pl_setrlimit(%u, RLIMIT_NPROC)", ctx);
446 /* set openfd limit */
447 getrlimit(RLIMIT_NOFILE,&lim);
448 if (adjust_lim(&slr->vs_openfd, &lim)) {
449 setrlimit(RLIMIT_NOFILE, &lim);
450 if (vc_set_rlimit(ctx, RLIMIT_NOFILE, &slr->vs_openfd))
452 PERROR("pl_setrlimit(%u, RLIMIT_NOFILE)", ctx);
455 if (vc_set_rlimit(ctx, VC_VLIMIT_OPENFD, &slr->vs_openfd))
457 PERROR("pl_setrlimit(%u, VLIMIT_OPENFD)", ctx);
461 vs_cpu = slr->vs_cpu;
462 cpu_sched_flags = slr->vs_cpuguaranteed & VS_SCHED_CPU_GUARANTEED;
468 if (pl_setsched(ctx, vs_cpu, cpu_sched_flags) < 0) {
469 PERROR("pl_setsched(&u)", ctx);