1 /* Copyright 2005 Princeton University
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above
11 copyright notice, this list of conditions and the following
12 disclaimer in the documentation and/or other materials provided
13 with the distribution.
15 * Neither the name of the copyright holder nor the names of its
16 contributors may be used to endorse or promote products derived
17 from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PRINCETON
23 UNIVERSITY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
26 OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
29 WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
45 #include <sys/resource.h>
51 #include "planetlab.h"
53 #ifndef VC_NXC_RAW_SOCKET
54 # define VC_NXC_RAW_SOCKET 0x00000200ull
56 #ifndef VC_NXC_RAW_SEND
57 # define VC_NXC_RAW_SEND 0x00000400ull
59 #ifndef VC_NXF_LBACK_ALLOW
60 # define VC_NXF_LBACK_ALLOW 0x00000400ull
64 create_context(xid_t ctx, uint64_t bcaps, uint32_t unshare_flags)
66 struct vc_ctx_caps vc_caps;
67 struct vc_net_flags vc_nf;
68 struct vc_net_caps vc_ncaps;
70 /* Create network context */
71 if (vc_net_create(ctx) == VC_NOCTX) {
77 /* Make the network context persistent */
78 vc_nf.mask = vc_nf.flagword = VC_NXF_PERSISTENT | VC_NXF_LBACK_ALLOW;
79 if (vc_set_nflags(ctx, &vc_nf))
82 /* Give it raw sockets capabilities */
83 vc_ncaps.ncaps = vc_ncaps.cmask = VC_NXC_RAW_ICMP | VC_NXC_RAW_SOCKET;
84 if (vc_set_ncaps(ctx, &vc_ncaps))
88 /* Create tag context */
89 if (vc_tag_create(ctx) == VC_NOCTX)
95 * Create context info - this sets the STATE_SETUP and STATE_INIT flags.
97 if (vc_ctx_create(ctx, 0) == VC_NOCTX)
100 if (unshare_flags != 0) {
101 unshare(unshare_flags);
102 unshare_flags |= vc_get_space_mask();
103 vc_set_namespace(ctx, unshare_flags);
106 /* Set capabilities - these don't take effect until SETUP flag is unset */
107 vc_caps.bcaps = bcaps;
108 vc_caps.bmask = ~0ULL; /* currently unused */
109 vc_caps.ccaps = 0; /* don't want any of these */
110 vc_caps.cmask = ~0ULL;
111 if (vc_set_ccaps(ctx, &vc_caps))
114 if (pl_setsched(ctx, 0, 1) < 0) {
115 PERROR("pl_setsched(%u)", ctx);
123 pl_setup_done(xid_t ctx)
125 struct vc_ctx_flags vc_flags;
127 /* unset SETUP flag - this allows other processes to migrate */
128 /* set the PERSISTENT flag - so the context doesn't vanish */
129 /* Don't clear the STATE_INIT flag, as that would make us the init task. */
130 vc_flags.mask = VC_VXF_STATE_SETUP|VC_VXF_PERSISTENT;
131 vc_flags.flagword = VC_VXF_PERSISTENT;
132 if (vc_set_cflags(ctx, &vc_flags))
138 #define RETRY_LIMIT 10
141 pl_chcontext(xid_t ctx, uint64_t bcaps, const struct sliver_resources *slr)
144 int net_migrated = 0;
146 if (pl_set_ulimits(slr) != 0)
151 struct vc_ctx_flags vc_flags;
153 if (vc_get_cflags(ctx, &vc_flags))
155 uint32_t unshare_flags;
159 /* Unshare the net namespace if the slice if requested in the local slice configuration */
160 unshare_flags = get_space_flag(ctx);
162 /* context doesn't exist - create it */
163 if (create_context(ctx, bcaps, unshare_flags))
166 /* another process beat us in a race */
169 /* another process is creating - poll the SETUP flag */
174 /* created context and migrated to it i.e., we're done */
178 /* check the SETUP flag */
179 if (vc_flags.flagword & VC_VXF_STATE_SETUP)
181 /* context is still being setup - wait a while then retry */
182 if (retry_count++ >= RETRY_LIMIT)
191 /* context has been setup */
193 if (net_migrated || !vc_net_migrate(ctx))
195 uint32_t unshare_flags;
196 /* Unshare the net namespace if the slice if requested in the local slice configuration */
197 unshare_flags = get_space_flag(ctx);
198 if (unshare_flags != 0) {
199 unshare_flags |=vc_get_space_mask();
200 vc_enter_namespace(ctx, unshare_flags);
203 if (!vc_tag_migrate(ctx) && !vc_ctx_migrate(ctx, 0))
208 /* context disappeared - retry */
214 /* it's okay for a syscall to fail because the context doesn't exist */
215 #define VC_SYSCALL(x) \
219 return errno == ESRCH ? 0 : -1; \
224 pl_setsched(xid_t ctx, uint32_t cpu_min, uint32_t cpu_share)
226 struct vc_set_sched vc_sched;
227 struct vc_ctx_flags vc_flags;
229 vc_sched.set_mask = (VC_VXSM_FILL_RATE | VC_VXSM_INTERVAL | VC_VXSM_TOKENS |
230 VC_VXSM_TOKENS_MIN | VC_VXSM_TOKENS_MAX | VC_VXSM_MSEC |
231 VC_VXSM_FILL_RATE2 | VC_VXSM_INTERVAL2 | VC_VXSM_FORCE);
232 vc_sched.fill_rate = cpu_min; /* percent reserved */
233 vc_sched.interval = 100;
234 vc_sched.fill_rate2 = cpu_share; /* best-effort fair share of unreserved */
235 vc_sched.interval2 = 1000; /* milliseconds */
236 vc_sched.tokens = 100; /* initial allocation of tokens */
237 vc_sched.tokens_min = 50; /* need this many tokens to run */
238 vc_sched.tokens_max = 100; /* max accumulated number of tokens */
241 if (cpu_share == (uint32_t)VC_LIM_KEEP)
242 vc_sched.set_mask &= ~(VC_VXSM_FILL_RATE|VC_VXSM_FILL_RATE2);
244 vc_sched.set_mask |= VC_VXSM_IDLE_TIME;
247 VC_SYSCALL(vc_set_sched(ctx, &vc_sched));
249 vc_flags.mask = VC_VXF_SCHED_FLAGS;
250 vc_flags.flagword = VC_VXF_SCHED_HARD;
251 VC_SYSCALL(vc_set_cflags(ctx, &vc_flags));
261 struct pl_resources {
265 unsigned long long *limit;
266 unsigned long int *personality;
270 #define WHITESPACE(buffer,index,len) \
271 while(isspace((int)buffer[index])) \
272 if (index < len) index++; else goto out;
274 #define VSERVERCONF "/etc/vservers/"
277 pl_get_limits(const char *context, struct sliver_resources *slr)
281 size_t len = strlen(VSERVERCONF) + strlen(context) + NULLBYTE_SIZE;
282 char *conf = (char *)malloc(len + strlen("rlimits/openfd.hard"));
283 struct pl_resources *r;
284 struct pl_resources sliver_list[] = {
285 {"sched/fill-rate2", TYPE_LONG, &slr->vs_cpu},
287 {"rlimits/nproc.hard", TYPE_LONG, &slr->vs_nproc.hard},
288 {"rlimits/nproc.soft", TYPE_LONG, &slr->vs_nproc.soft},
289 {"rlimits/nproc.min", TYPE_LONG, &slr->vs_nproc.min},
291 {"rlimits/rss.hard", TYPE_LONG, &slr->vs_rss.hard},
292 {"rlimits/rss.soft", TYPE_LONG, &slr->vs_rss.soft},
293 {"rlimits/rss.min", TYPE_LONG, &slr->vs_rss.min},
295 {"rlimits/as.hard", TYPE_LONG, &slr->vs_as.hard},
296 {"rlimits/as.soft", TYPE_LONG, &slr->vs_as.soft},
297 {"rlimits/as.min", TYPE_LONG, &slr->vs_as.min},
299 {"rlimits/openfd.hard", TYPE_LONG, &slr->vs_openfd.hard},
300 {"rlimits/openfd.soft", TYPE_LONG, &slr->vs_openfd.soft},
301 {"rlimits/openfd.min", TYPE_LONG, &slr->vs_openfd.min},
303 {"rlimits/memlock.hard", TYPE_LONG, &slr->vs_memlock.hard},
304 {"rlimits/memlock.soft", TYPE_LONG, &slr->vs_memlock.soft},
305 {"rlimits/memlock.min", TYPE_LONG, &slr->vs_memlock.min},
307 {"personality", TYPE_PERS, &slr->personality},
312 sprintf(conf, "%s%s", VSERVERCONF, context);
314 slr->vs_rss.hard = VC_LIM_KEEP;
315 slr->vs_rss.soft = VC_LIM_KEEP;
316 slr->vs_rss.min = VC_LIM_KEEP;
318 slr->vs_as.hard = VC_LIM_KEEP;
319 slr->vs_as.soft = VC_LIM_KEEP;
320 slr->vs_as.min = VC_LIM_KEEP;
322 slr->vs_nproc.hard = VC_LIM_KEEP;
323 slr->vs_nproc.soft = VC_LIM_KEEP;
324 slr->vs_nproc.min = VC_LIM_KEEP;
326 slr->vs_openfd.hard = VC_LIM_KEEP;
327 slr->vs_openfd.soft = VC_LIM_KEEP;
328 slr->vs_openfd.min = VC_LIM_KEEP;
330 slr->vs_memlock.hard = VC_LIM_KEEP;
331 slr->vs_memlock.soft = VC_LIM_KEEP;
332 slr->vs_memlock.min = VC_LIM_KEEP;
334 slr->personality = 0;
336 cwd = open(".", O_RDONLY);
338 perror("cannot get a handle on .");
341 if (chdir(conf) == -1) {
342 fprintf(stderr, "cannot chdir to ");
347 for (r = &sliver_list[0]; r->name; r++) {
349 fb = fopen(r->name, "r");
352 if (fgets(buf, sizeof(buf), fb) != NULL) {
354 /* remove trailing newline */
355 if (buf[len-1] == '\n') {
359 if (r->type == TYPE_LONG) {
363 val = strtol(buf,&res,0);
364 if ( !( (val==0 && res) || (errno!=0) ) )
366 } else if ( (r->type == TYPE_PERS) && isalpha(*buf)) {
367 unsigned long int res;
368 res = vc_str2personalitytype(buf,len);
369 if (res != VC_BAD_PERSONALITY) {
370 *r->personality = res;
386 adjust_lim(const struct vc_rlimit *vcr, struct rlimit *lim)
389 if (vcr->min != VC_LIM_KEEP) {
390 if (vcr->min > lim->rlim_cur) {
391 lim->rlim_cur = vcr->min;
394 if (vcr->min > lim->rlim_max) {
395 lim->rlim_max = vcr->min;
400 if (vcr->soft != VC_LIM_KEEP) {
401 switch (vcr->min != VC_LIM_KEEP) {
403 if (vcr->soft < vcr->min)
406 lim->rlim_cur = vcr->soft;
411 if (vcr->hard != VC_LIM_KEEP) {
412 switch (vcr->min != VC_LIM_KEEP) {
414 if (vcr->hard < vcr->min)
417 lim->rlim_max = vcr->hard;
425 set_one_ulimit(int resource, const struct vc_rlimit *limit)
428 getrlimit(resource, &lim);
429 adjust_lim(limit, &lim);
430 setrlimit(resource, &lim);
434 set_personality(unsigned long int personality_arg)
436 if (personality_arg == 0)
438 if (personality(personality_arg) < 0) {
445 pl_set_ulimits(const struct sliver_resources *slr)
450 set_one_ulimit(RLIMIT_RSS, &slr->vs_rss);
451 set_one_ulimit(RLIMIT_AS, &slr->vs_as);
452 set_one_ulimit(RLIMIT_NPROC, &slr->vs_nproc);
453 set_one_ulimit(RLIMIT_NOFILE, &slr->vs_openfd);
454 set_one_ulimit(RLIMIT_MEMLOCK, &slr->vs_memlock);
455 return set_personality(slr->personality);