1 /* Copyright 2005 Princeton University
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above
11 copyright notice, this list of conditions and the following
12 disclaimer in the documentation and/or other materials provided
13 with the distribution.
15 * Neither the name of the copyright holder nor the names of its
16 contributors may be used to endorse or promote products derived
17 from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PRINCETON
23 UNIVERSITY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
26 OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
29 WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
45 #include <sys/resource.h>
49 #include "planetlab.h"
51 #ifndef VC_NXC_RAW_SOCKET
52 # define VC_NXC_RAW_SOCKET 0x00000200ull
54 #ifndef VC_NXC_RAW_SEND
55 # define VC_NXC_RAW_SEND 0x00000400ull
57 #ifndef VC_NXF_LBACK_ALLOW
58 # define VC_NXF_LBACK_ALLOW 0x00000400ull
62 create_context(xid_t ctx, uint64_t bcaps)
64 struct vc_ctx_caps vc_caps;
65 struct vc_net_flags vc_nf;
66 struct vc_net_caps vc_ncaps;
67 uint32_t unshare_mask;
69 /* Create network context */
70 if (vc_net_create(ctx) == VC_NOCTX) {
76 /* Make the network context persistent */
77 vc_nf.mask = vc_nf.flagword = VC_NXF_PERSISTENT | VC_NXF_LBACK_ALLOW;
78 if (vc_set_nflags(ctx, &vc_nf))
81 /* Give it raw sockets capabilities */
82 vc_ncaps.ncaps = vc_ncaps.cmask = VC_NXC_RAW_ICMP | VC_NXC_RAW_SOCKET;
83 if (vc_set_ncaps(ctx, &vc_ncaps))
87 /* Create tag context */
88 if (vc_tag_create(ctx) == VC_NOCTX)
94 * Create context info - this sets the STATE_SETUP and STATE_INIT flags.
96 if (vc_ctx_create(ctx, 0) == VC_NOCTX)
99 /* Unshare the NET namespace if the slice if requested in the local slice configuration */
100 unshare_mask = get_space_flag(ctx);
101 if (unshare_mask != 0) {
102 sys_unshare(unshare_mask);
105 /* Set capabilities - these don't take effect until SETUP flag is unset */
106 vc_caps.bcaps = bcaps;
107 vc_caps.bmask = ~0ULL; /* currently unused */
108 vc_caps.ccaps = 0; /* don't want any of these */
109 vc_caps.cmask = ~0ULL;
110 if (vc_set_ccaps(ctx, &vc_caps))
113 if (pl_setsched(ctx, 0, 1) < 0) {
114 PERROR("pl_setsched(%u)", ctx);
122 pl_setup_done(xid_t ctx)
124 struct vc_ctx_flags vc_flags;
126 /* unset SETUP flag - this allows other processes to migrate */
127 /* set the PERSISTENT flag - so the context doesn't vanish */
128 /* Don't clear the STATE_INIT flag, as that would make us the init task. */
129 vc_flags.mask = VC_VXF_STATE_SETUP|VC_VXF_PERSISTENT;
130 vc_flags.flagword = VC_VXF_PERSISTENT;
131 if (vc_set_cflags(ctx, &vc_flags))
137 #define RETRY_LIMIT 10
140 pl_chcontext(xid_t ctx, uint64_t bcaps, const struct sliver_resources *slr)
143 int net_migrated = 0;
145 if (pl_set_ulimits(slr) != 0)
150 struct vc_ctx_flags vc_flags;
152 if (vc_get_cflags(ctx, &vc_flags))
157 /* context doesn't exist - create it */
158 if (create_context(ctx, bcaps))
161 /* another process beat us in a race */
164 /* another process is creating - poll the SETUP flag */
169 /* created context and migrated to it i.e., we're done */
173 /* check the SETUP flag */
174 if (vc_flags.flagword & VC_VXF_STATE_SETUP)
176 /* context is still being setup - wait a while then retry */
177 if (retry_count++ >= RETRY_LIMIT)
186 /* context has been setup */
188 if (net_migrated || !vc_net_migrate(ctx))
190 if (!vc_tag_migrate(ctx) && !vc_ctx_migrate(ctx, 0))
195 /* context disappeared - retry */
201 /* it's okay for a syscall to fail because the context doesn't exist */
202 #define VC_SYSCALL(x) \
206 return errno == ESRCH ? 0 : -1; \
211 pl_setsched(xid_t ctx, uint32_t cpu_min, uint32_t cpu_share)
213 struct vc_set_sched vc_sched;
214 struct vc_ctx_flags vc_flags;
216 vc_sched.set_mask = (VC_VXSM_FILL_RATE | VC_VXSM_INTERVAL | VC_VXSM_TOKENS |
217 VC_VXSM_TOKENS_MIN | VC_VXSM_TOKENS_MAX | VC_VXSM_MSEC |
218 VC_VXSM_FILL_RATE2 | VC_VXSM_INTERVAL2 | VC_VXSM_FORCE);
219 vc_sched.fill_rate = cpu_min; /* percent reserved */
220 vc_sched.interval = 100;
221 vc_sched.fill_rate2 = cpu_share; /* best-effort fair share of unreserved */
222 vc_sched.interval2 = 1000; /* milliseconds */
223 vc_sched.tokens = 100; /* initial allocation of tokens */
224 vc_sched.tokens_min = 50; /* need this many tokens to run */
225 vc_sched.tokens_max = 100; /* max accumulated number of tokens */
228 if (cpu_share == (uint32_t)VC_LIM_KEEP)
229 vc_sched.set_mask &= ~(VC_VXSM_FILL_RATE|VC_VXSM_FILL_RATE2);
231 vc_sched.set_mask |= VC_VXSM_IDLE_TIME;
234 VC_SYSCALL(vc_set_sched(ctx, &vc_sched));
236 vc_flags.mask = VC_VXF_SCHED_FLAGS;
237 vc_flags.flagword = VC_VXF_SCHED_HARD;
238 VC_SYSCALL(vc_set_cflags(ctx, &vc_flags));
248 struct pl_resources {
252 unsigned long long *limit;
253 unsigned long int *personality;
257 #define WHITESPACE(buffer,index,len) \
258 while(isspace((int)buffer[index])) \
259 if (index < len) index++; else goto out;
261 #define VSERVERCONF "/etc/vservers/"
264 pl_get_limits(const char *context, struct sliver_resources *slr)
268 size_t len = strlen(VSERVERCONF) + strlen(context) + NULLBYTE_SIZE;
269 char *conf = (char *)malloc(len + strlen("rlimits/openfd.hard"));
270 struct pl_resources *r;
271 struct pl_resources sliver_list[] = {
272 {"sched/fill-rate2", TYPE_LONG, &slr->vs_cpu},
274 {"rlimits/nproc.hard", TYPE_LONG, &slr->vs_nproc.hard},
275 {"rlimits/nproc.soft", TYPE_LONG, &slr->vs_nproc.soft},
276 {"rlimits/nproc.min", TYPE_LONG, &slr->vs_nproc.min},
278 {"rlimits/rss.hard", TYPE_LONG, &slr->vs_rss.hard},
279 {"rlimits/rss.soft", TYPE_LONG, &slr->vs_rss.soft},
280 {"rlimits/rss.min", TYPE_LONG, &slr->vs_rss.min},
282 {"rlimits/as.hard", TYPE_LONG, &slr->vs_as.hard},
283 {"rlimits/as.soft", TYPE_LONG, &slr->vs_as.soft},
284 {"rlimits/as.min", TYPE_LONG, &slr->vs_as.min},
286 {"rlimits/openfd.hard", TYPE_LONG, &slr->vs_openfd.hard},
287 {"rlimits/openfd.soft", TYPE_LONG, &slr->vs_openfd.soft},
288 {"rlimits/openfd.min", TYPE_LONG, &slr->vs_openfd.min},
290 {"personality", TYPE_PERS, &slr->personality},
295 sprintf(conf, "%s%s", VSERVERCONF, context);
297 slr->vs_rss.hard = VC_LIM_KEEP;
298 slr->vs_rss.soft = VC_LIM_KEEP;
299 slr->vs_rss.min = VC_LIM_KEEP;
301 slr->vs_as.hard = VC_LIM_KEEP;
302 slr->vs_as.soft = VC_LIM_KEEP;
303 slr->vs_as.min = VC_LIM_KEEP;
305 slr->vs_nproc.hard = VC_LIM_KEEP;
306 slr->vs_nproc.soft = VC_LIM_KEEP;
307 slr->vs_nproc.min = VC_LIM_KEEP;
309 slr->vs_openfd.hard = VC_LIM_KEEP;
310 slr->vs_openfd.soft = VC_LIM_KEEP;
311 slr->vs_openfd.min = VC_LIM_KEEP;
313 slr->personality = 0;
315 cwd = open(".", O_RDONLY);
317 perror("cannot get a handle on .");
320 if (chdir(conf) == -1) {
321 fprintf(stderr, "cannot chdir to ");
326 for (r = &sliver_list[0]; r->name; r++) {
328 fb = fopen(r->name, "r");
331 if (fgets(buf, sizeof(buf), fb) != NULL) {
333 /* remove trailing newline */
334 if (buf[len-1] == '\n') {
338 if ( (r->type == TYPE_LONG) && isdigit(*buf)) {
339 *r->limit = atoi(buf);
340 } else if ( (r->type == TYPE_PERS) && isalpha(*buf)) {
341 unsigned long int res;
342 res = vc_str2personalitytype(buf,len);
343 if (res != VC_BAD_PERSONALITY) {
344 *r->personality = res;
360 adjust_lim(const struct vc_rlimit *vcr, struct rlimit *lim)
363 if (vcr->min != VC_LIM_KEEP) {
364 if (vcr->min > lim->rlim_cur) {
365 lim->rlim_cur = vcr->min;
368 if (vcr->min > lim->rlim_max) {
369 lim->rlim_max = vcr->min;
374 if (vcr->soft != VC_LIM_KEEP) {
375 switch (vcr->min != VC_LIM_KEEP) {
377 if (vcr->soft < vcr->min)
380 lim->rlim_cur = vcr->soft;
385 if (vcr->hard != VC_LIM_KEEP) {
386 switch (vcr->min != VC_LIM_KEEP) {
388 if (vcr->hard < vcr->min)
391 lim->rlim_max = vcr->hard;
399 set_one_ulimit(int resource, const struct vc_rlimit *limit)
402 getrlimit(resource, &lim);
403 adjust_lim(limit, &lim);
404 setrlimit(resource, &lim);
408 set_personality(unsigned long int personality_arg)
410 if (personality_arg == 0)
412 if (personality(personality_arg) < 0) {
419 pl_set_ulimits(const struct sliver_resources *slr)
424 set_one_ulimit(RLIMIT_RSS, &slr->vs_rss);
425 set_one_ulimit(RLIMIT_AS, &slr->vs_as);
426 set_one_ulimit(RLIMIT_NPROC, &slr->vs_nproc);
427 set_one_ulimit(RLIMIT_NOFILE, &slr->vs_openfd);
428 return set_personality(slr->personality);