1 /* Copyright 2005 Princeton University
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above
11 copyright notice, this list of conditions and the following
12 disclaimer in the documentation and/or other materials provided
13 with the distribution.
15 * Neither the name of the copyright holder nor the names of its
16 contributors may be used to endorse or promote products derived
17 from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PRINCETON
23 UNIVERSITY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
26 OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
29 WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
45 #include <sys/resource.h>
49 #include "planetlab.h"
52 create_context(xid_t ctx, uint64_t bcaps, struct sliver_resources *slr)
54 struct vc_ctx_caps vc_caps;
55 struct vc_net_flags vc_nf;
57 /* Create network context */
58 if (vc_net_create(ctx) == VC_NOCTX) {
64 /* Make the network context persistent */
65 vc_nf.mask = vc_nf.flagword = VC_NXF_PERSISTENT;
66 if (vc_set_nflags(ctx, &vc_nf))
71 * Create context info - this sets the STATE_SETUP and STATE_INIT flags.
73 if (vc_ctx_create(ctx) == VC_NOCTX)
76 /* Set capabilities - these don't take effect until SETUP flag is unset */
77 vc_caps.bcaps = bcaps;
78 vc_caps.bmask = ~0ULL; /* currently unused */
79 vc_caps.ccaps = 0; /* don't want any of these */
80 vc_caps.cmask = ~0ULL;
81 if (vc_set_ccaps(ctx, &vc_caps))
84 pl_set_limits(ctx, slr);
90 pl_setup_done(xid_t ctx)
92 struct vc_ctx_flags vc_flags;
94 /* unset SETUP flag - this allows other processes to migrate */
95 /* set the PERSISTENT flag - so the context doesn't vanish */
96 /* Don't clear the STATE_INIT flag, as that would make us the init task. */
97 vc_flags.mask = VC_VXF_STATE_SETUP|VC_VXF_PERSISTENT;
98 vc_flags.flagword = VC_VXF_PERSISTENT;
99 if (vc_set_cflags(ctx, &vc_flags))
105 #define RETRY_LIMIT 10
108 pl_chcontext(xid_t ctx, uint64_t bcaps, struct sliver_resources *slr)
111 int net_migrated = 0;
117 struct vc_ctx_flags vc_flags;
119 if (vc_get_cflags(ctx, &vc_flags))
124 /* context doesn't exist - create it */
125 if (create_context(ctx, bcaps, slr))
128 /* another process beat us in a race */
131 /* another process is creating - poll the SETUP flag */
136 /* created context and migrated to it i.e., we're done */
140 /* check the SETUP flag */
141 if (vc_flags.flagword & VC_VXF_STATE_SETUP)
143 /* context is still being setup - wait a while then retry */
144 if (retry_count++ >= RETRY_LIMIT)
153 /* context has been setup */
155 if (net_migrated || !vc_net_migrate(ctx))
157 if (!vc_ctx_migrate(ctx, 0))
162 /* context disappeared - retry */
168 /* it's okay for a syscall to fail because the context doesn't exist */
169 #define VC_SYSCALL(x) \
173 return errno == ESRCH ? 0 : -1; \
178 pl_setsched(xid_t ctx, uint32_t cpu_share, uint32_t cpu_sched_flags)
180 struct vc_set_sched vc_sched;
181 struct vc_ctx_flags vc_flags;
184 vc_sched.set_mask = (VC_VXSM_FILL_RATE | VC_VXSM_INTERVAL | VC_VXSM_TOKENS |
185 VC_VXSM_TOKENS_MIN | VC_VXSM_TOKENS_MAX | VC_VXSM_MSEC |
186 VC_VXSM_FILL_RATE2 | VC_VXSM_INTERVAL2 | VC_VXSM_FORCE |
188 vc_sched.fill_rate = 0;
189 vc_sched.fill_rate2 = cpu_share; /* tokens accumulated per interval */
190 vc_sched.interval = vc_sched.interval2 = 1000; /* milliseconds */
191 vc_sched.tokens = 100; /* initial allocation of tokens */
192 vc_sched.tokens_min = 50; /* need this many tokens to run */
193 vc_sched.tokens_max = 100; /* max accumulated number of tokens */
195 if (cpu_share == (uint32_t)VC_LIM_KEEP)
196 vc_sched.set_mask &= ~(VC_VXSM_FILL_RATE|VC_VXSM_FILL_RATE2);
198 /* guaranteed CPU corresponds to SCHED_SHARE flag being cleared */
199 if (cpu_sched_flags & VS_SCHED_CPU_GUARANTEED) {
201 vc_sched.fill_rate = vc_sched.fill_rate2;
204 new_flags = VC_VXF_SCHED_SHARE;
206 VC_SYSCALL(vc_set_sched(ctx, &vc_sched));
208 vc_flags.mask = VC_VXF_SCHED_FLAGS;
209 vc_flags.flagword = new_flags | VC_VXF_SCHED_HARD;
210 VC_SYSCALL(vc_set_cflags(ctx, &vc_flags));
215 struct pl_resources {
217 unsigned long long *limit;
220 #define WHITESPACE(buffer,index,len) \
221 while(isspace((int)buffer[index])) \
222 if (index < len) index++; else goto out;
224 #define VSERVERCONF "/etc/vservers/"
226 pl_get_limits(char *context, struct sliver_resources *slr)
230 size_t len = strlen(VSERVERCONF) + strlen(context) + NULLBYTE_SIZE;
231 char *conf = (char *)malloc(len + strlen("rlimits/openfd.hard"));
232 struct pl_resources *r;
233 struct pl_resources sliver_list[] = {
234 {"sched/fill-rate2", &slr->vs_cpu},
235 {"sched/fill-rate", &slr->vs_cpuguaranteed},
237 {"rlimits/nproc.hard", &slr->vs_nproc.hard},
238 {"rlimits/nproc.soft", &slr->vs_nproc.soft},
239 {"rlimits/nproc.min", &slr->vs_nproc.min},
241 {"rlimits/rss.hard", &slr->vs_rss.hard},
242 {"rlimits/rss.soft", &slr->vs_rss.soft},
243 {"rlimits/rss.min", &slr->vs_rss.min},
245 {"rlimits/as.hard", &slr->vs_as.hard},
246 {"rlimits/as.soft", &slr->vs_as.soft},
247 {"rlimits/as.min", &slr->vs_as.min},
249 {"rlimits/openfd.hard", &slr->vs_openfd.hard},
250 {"rlimits/openfd.soft", &slr->vs_openfd.soft},
251 {"rlimits/openfd.min", &slr->vs_openfd.min},
253 {"bcapabilities", NULL},
257 sprintf(conf, "%s%s", VSERVERCONF, context);
259 slr->vs_cpu = VC_LIM_KEEP;
260 slr->vs_cpuguaranteed = 0;
262 slr->vs_rss.hard = VC_LIM_KEEP;
263 slr->vs_rss.soft = VC_LIM_KEEP;
264 slr->vs_rss.min = VC_LIM_KEEP;
266 slr->vs_as.hard = VC_LIM_KEEP;
267 slr->vs_as.soft = VC_LIM_KEEP;
268 slr->vs_as.min = VC_LIM_KEEP;
271 slr->vs_nproc.hard = VC_LIM_KEEP;
272 slr->vs_nproc.soft = VC_LIM_KEEP;
273 slr->vs_nproc.min = VC_LIM_KEEP;
275 slr->vs_openfd.hard = VC_LIM_KEEP;
276 slr->vs_openfd.soft = VC_LIM_KEEP;
277 slr->vs_openfd.min = VC_LIM_KEEP;
279 slr->vs_capabilities.bcaps = 0;
280 slr->vs_capabilities.bmask = 0;
281 slr->vs_capabilities.ccaps = 0;
282 slr->vs_capabilities.cmask = 0;
284 cwd = open(".", O_RDONLY);
286 perror("cannot get a handle on .");
289 if (chdir(conf) == -1) {
290 fprintf(stderr, "cannot chdir to ");
295 for (r = &sliver_list[0]; r->name; r++) {
297 fb = fopen(r->name, "r");
301 if (strcmp(r->name, "bcapabilities") == 0) {
303 struct vc_err_listparser err;
305 len = fread(buf, 1, sizeof(buf), fb);
306 for (i = 0; i < len; i++) {
310 vc_list2bcap(buf, len, &err, &slr->vs_capabilities);
313 if (fgets(buf, sizeof(buf), fb) != NULL && isdigit(*buf))
314 *r->limit = atoi(buf);
326 adjust_lim(struct vc_rlimit *vcr, struct rlimit *lim)
329 if (vcr->min != VC_LIM_KEEP) {
330 if (vcr->min > lim->rlim_cur) {
331 lim->rlim_cur = vcr->min;
334 if (vcr->min > lim->rlim_max) {
335 lim->rlim_max = vcr->min;
340 if (vcr->soft != VC_LIM_KEEP) {
341 switch (vcr->min != VC_LIM_KEEP) {
343 if (vcr->soft < vcr->min)
346 lim->rlim_cur = vcr->soft;
351 if (vcr->hard != VC_LIM_KEEP) {
352 switch (vcr->min != VC_LIM_KEEP) {
354 if (vcr->hard < vcr->min)
357 lim->rlim_cur = vcr->hard;
365 set_one_ulimit(int resource, struct vc_rlimit *limit)
368 getrlimit(resource, &lim);
369 adjust_lim(limit, &lim);
370 setrlimit(resource, &lim);
374 pl_set_ulimits(struct sliver_resources *slr)
379 set_one_ulimit(RLIMIT_RSS, &slr->vs_rss);
380 set_one_ulimit(RLIMIT_AS, &slr->vs_as);
381 set_one_ulimit(RLIMIT_NPROC, &slr->vs_nproc);
382 set_one_ulimit(RLIMIT_NOFILE, &slr->vs_openfd);
386 pl_set_limits(xid_t ctx, struct sliver_resources *slr)
388 unsigned long long vs_cpu;
389 uint32_t cpu_sched_flags;
392 /* set memory limits */
393 if (vc_set_rlimit(ctx, RLIMIT_RSS, &slr->vs_rss)) {
394 PERROR("pl_setrlimit(%u, RLIMIT_RSS)", ctx);
398 /* set address space limits */
399 if (vc_set_rlimit(ctx, RLIMIT_AS, &slr->vs_as)) {
400 PERROR("pl_setrlimit(%u, RLIMIT_AS)", ctx);
404 /* set nrpoc limit */
405 if (vc_set_rlimit(ctx, RLIMIT_NPROC, &slr->vs_nproc)) {
406 PERROR("pl_setrlimit(%u, RLIMIT_NPROC)", ctx);
410 /* set openfd limit */
411 if (vc_set_rlimit(ctx, RLIMIT_NOFILE, &slr->vs_openfd)) {
412 PERROR("pl_setrlimit(%u, RLIMIT_NOFILE)", ctx);
415 if (vc_set_rlimit(ctx, VC_VLIMIT_OPENFD, &slr->vs_openfd)) {
416 PERROR("pl_setrlimit(%u, VLIMIT_OPENFD)", ctx);
420 vs_cpu = slr->vs_cpu;
421 cpu_sched_flags = slr->vs_cpuguaranteed & VS_SCHED_CPU_GUARANTEED;
423 slr->vs_capabilities.bmask = vc_get_insecurebcaps();
424 if (vc_set_ccaps(ctx, &slr->vs_capabilities) < 0) {
425 PERROR("pl_setcaps(%u)", ctx);
433 if (pl_setsched(ctx, vs_cpu, cpu_sched_flags) < 0) {
434 PERROR("pl_setsched(%u)", ctx);