1 /* Copyright 2005 Princeton University
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above
11 copyright notice, this list of conditions and the following
12 disclaimer in the documentation and/or other materials provided
13 with the distribution.
15 * Neither the name of the copyright holder nor the names of its
16 contributors may be used to endorse or promote products derived
17 from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PRINCETON
23 UNIVERSITY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
26 OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
29 WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
45 #include <sys/resource.h>
46 #include <sys/types.h>
52 #include "planetlab.h"
54 /* defined in netns.c */
55 extern uint32_t get_space_flag(xid_t);
57 #ifndef VC_NXC_RAW_SOCKET
58 # define VC_NXC_RAW_SOCKET 0x00000200ull
60 #ifndef VC_NXC_RAW_SEND
61 # define VC_NXC_RAW_SEND 0x00000400ull
63 #ifndef VC_NXF_LBACK_ALLOW
64 # define VC_NXF_LBACK_ALLOW 0x00000400ull
68 create_context(xid_t ctx, uint64_t bcaps, uint32_t unshare_flags)
70 struct vc_ctx_caps vc_caps;
71 struct vc_net_flags vc_nf;
72 struct vc_net_caps vc_ncaps;
74 /* Create network context */
75 if (vc_net_create(ctx) == VC_NOCTX) {
81 /* Make the network context persistent */
82 vc_nf.mask = vc_nf.flagword = VC_NXF_PERSISTENT | VC_NXF_LBACK_ALLOW;
83 if (vc_set_nflags(ctx, &vc_nf))
86 /* Give it raw sockets capabilities */
87 vc_ncaps.ncaps = vc_ncaps.cmask = VC_NXC_RAW_ICMP | VC_NXC_RAW_SOCKET;
88 if (vc_set_ncaps(ctx, &vc_ncaps))
92 /* Create tag context */
93 if (vc_tag_create(ctx) == VC_NOCTX)
97 * Create context info - this sets the STATE_SETUP and STATE_INIT flags.
99 if (vc_ctx_create(ctx, 0) == VC_NOCTX)
102 if (unshare_flags != 0) {
103 unshare(unshare_flags);
104 unshare_flags |= vc_get_space_mask();
105 #ifdef VC_VXC_NAMESPACE
106 /* this is a hack to make util-vserver-pl compile with
107 util-vserver-0.30.216, which changes the signature for
108 vc_{set,enter}_namespace functions. */
109 vc_set_namespace(ctx, unshare_flags, NULL);
111 vc_set_namespace(ctx, unshare_flags);
115 /* Set capabilities - these don't take effect until SETUP flag is unset */
116 vc_caps.bcaps = bcaps;
117 vc_caps.bmask = ~0ULL; /* currently unused */
118 vc_caps.ccaps = 0; /* don't want any of these */
119 vc_caps.cmask = ~0ULL;
120 if (vc_set_ccaps(ctx, &vc_caps))
123 if (pl_setsched(ctx, 0, 1) < 0) {
124 PERROR("pl_setsched(%u)", ctx);
132 pl_setup_done(xid_t ctx)
134 struct vc_ctx_flags vc_flags;
136 /* unset SETUP flag - this allows other processes to migrate */
137 /* set the PERSISTENT flag - so the context doesn't vanish */
138 /* Don't clear the STATE_INIT flag, as that would make us the init task. */
139 vc_flags.mask = VC_VXF_STATE_SETUP|VC_VXF_PERSISTENT;
140 vc_flags.flagword = VC_VXF_PERSISTENT;
141 if (vc_set_cflags(ctx, &vc_flags))
147 #define RETRY_LIMIT 10
150 pl_chcontext(xid_t ctx, uint64_t bcaps, const struct sliver_resources *slr)
153 int net_migrated = 0;
155 if (pl_set_ulimits(slr) != 0)
160 struct vc_ctx_flags vc_flags;
162 if (vc_get_cflags(ctx, &vc_flags))
164 uint32_t unshare_flags;
168 /* Unshare the net namespace if the slice if requested in the local slice configuration */
169 unshare_flags = get_space_flag(ctx);
171 /* context doesn't exist - create it */
172 if (create_context(ctx, bcaps, unshare_flags))
175 /* another process beat us in a race */
178 /* another process is creating - poll the SETUP flag */
183 /* created context and migrated to it i.e., we're done */
187 /* check the SETUP flag */
188 if (vc_flags.flagword & VC_VXF_STATE_SETUP)
190 /* context is still being setup - wait a while then retry */
191 if (retry_count++ >= RETRY_LIMIT)
200 /* context has been setup */
202 if (net_migrated || !vc_net_migrate(ctx))
204 uint32_t unshare_flags;
205 /* Unshare the net namespace if the slice if requested in the local slice configuration */
206 unshare_flags = get_space_flag(ctx);
207 if (unshare_flags != 0) {
208 unshare_flags |=vc_get_space_mask();
209 #ifdef VC_VXC_NAMESPACE
210 /* this is a hack to make util-vserver-pl compile with
211 util-vserver-0.30.216, which changes the signature for
212 vc_{set,enter}_namespace functions. */
213 vc_enter_namespace(ctx, unshare_flags, NULL);
215 vc_enter_namespace(ctx, unshare_flags);
219 if (!vc_tag_migrate(ctx) && !vc_ctx_migrate(ctx, 0))
224 /* context disappeared - retry */
230 /* it's okay for a syscall to fail because the context doesn't exist */
231 #define VC_SYSCALL(x) \
235 return errno == ESRCH ? 0 : -1; \
240 pl_setsched(xid_t ctx, uint32_t cpu_min, uint32_t cpu_share)
242 struct vc_set_sched vc_sched;
243 struct vc_ctx_flags vc_flags;
245 vc_sched.set_mask = (VC_VXSM_FILL_RATE | VC_VXSM_INTERVAL | VC_VXSM_TOKENS |
246 VC_VXSM_TOKENS_MIN | VC_VXSM_TOKENS_MAX | VC_VXSM_MSEC |
247 VC_VXSM_FILL_RATE2 | VC_VXSM_INTERVAL2 | VC_VXSM_FORCE);
248 vc_sched.fill_rate = cpu_min; /* percent reserved */
249 vc_sched.interval = 100;
250 vc_sched.fill_rate2 = cpu_share; /* best-effort fair share of unreserved */
251 vc_sched.interval2 = 1000; /* milliseconds */
252 vc_sched.tokens = 100; /* initial allocation of tokens */
253 vc_sched.tokens_min = 50; /* need this many tokens to run */
254 vc_sched.tokens_max = 100; /* max accumulated number of tokens */
257 if (cpu_share == (uint32_t)VC_LIM_KEEP)
258 vc_sched.set_mask &= ~(VC_VXSM_FILL_RATE|VC_VXSM_FILL_RATE2);
260 vc_sched.set_mask |= VC_VXSM_IDLE_TIME;
263 VC_SYSCALL(vc_set_sched(ctx, &vc_sched));
265 vc_flags.mask = VC_VXF_SCHED_FLAGS;
266 vc_flags.flagword = VC_VXF_SCHED_HARD;
267 VC_SYSCALL(vc_set_cflags(ctx, &vc_flags));
277 struct pl_resources {
281 unsigned long long *limit;
282 unsigned long int *personality;
286 #define WHITESPACE(buffer,index,len) \
287 while(isspace((int)buffer[index])) \
288 if (index < len) index++; else goto out;
290 #define VSERVERCONF "/etc/vservers/"
293 pl_get_limits(const char *context, struct sliver_resources *slr)
297 struct pl_resources *r;
299 struct pl_resources sliver_list[] = {
300 {"sched/fill-rate2", TYPE_LONG, &slr->vs_cpu},
302 {"rlimits/nproc.hard", TYPE_LONG, &slr->vs_nproc.hard},
303 {"rlimits/nproc.soft", TYPE_LONG, &slr->vs_nproc.soft},
304 {"rlimits/nproc.min", TYPE_LONG, &slr->vs_nproc.min},
306 {"rlimits/rss.hard", TYPE_LONG, &slr->vs_rss.hard},
307 {"rlimits/rss.soft", TYPE_LONG, &slr->vs_rss.soft},
308 {"rlimits/rss.min", TYPE_LONG, &slr->vs_rss.min},
310 {"rlimits/as.hard", TYPE_LONG, &slr->vs_as.hard},
311 {"rlimits/as.soft", TYPE_LONG, &slr->vs_as.soft},
312 {"rlimits/as.min", TYPE_LONG, &slr->vs_as.min},
314 {"rlimits/nofile.hard", TYPE_LONG, &slr->vs_nofile.hard},
315 {"rlimits/nofile.soft", TYPE_LONG, &slr->vs_nofile.soft},
316 {"rlimits/nofile.min", TYPE_LONG, &slr->vs_nofile.min},
318 {"rlimits/memlock.hard", TYPE_LONG, &slr->vs_memlock.hard},
319 {"rlimits/memlock.soft", TYPE_LONG, &slr->vs_memlock.soft},
320 {"rlimits/memlock.min", TYPE_LONG, &slr->vs_memlock.min},
322 {"personality", TYPE_PERS, &slr->personality},
327 size_t len = strlen(VSERVERCONF) + strlen(context) + NULLBYTE_SIZE;
328 char *conf = (char *)malloc(len);
329 sprintf(conf, "%s%s", VSERVERCONF, context);
331 slr->vs_rss.hard = VC_LIM_KEEP;
332 slr->vs_rss.soft = VC_LIM_KEEP;
333 slr->vs_rss.min = VC_LIM_KEEP;
335 slr->vs_as.hard = VC_LIM_KEEP;
336 slr->vs_as.soft = VC_LIM_KEEP;
337 slr->vs_as.min = VC_LIM_KEEP;
339 slr->vs_nproc.hard = VC_LIM_KEEP;
340 slr->vs_nproc.soft = VC_LIM_KEEP;
341 slr->vs_nproc.min = VC_LIM_KEEP;
343 slr->vs_nofile.hard = VC_LIM_KEEP;
344 slr->vs_nofile.soft = VC_LIM_KEEP;
345 slr->vs_nofile.min = VC_LIM_KEEP;
347 slr->vs_memlock.hard = VC_LIM_KEEP;
348 slr->vs_memlock.soft = VC_LIM_KEEP;
349 slr->vs_memlock.min = VC_LIM_KEEP;
351 slr->personality = 0;
353 cwd = open(".", O_RDONLY);
355 perror("cannot get a handle on .");
358 if (chdir(conf) == -1) {
359 fprintf(stderr, "cannot chdir to ");
364 for (r = &sliver_list[0]; r->name; r++) {
366 fb = fopen(r->name, "r");
369 if (fgets(buf, sizeof(buf), fb) != NULL) {
371 /* remove trailing newline */
372 if (buf[len-1] == '\n') {
376 if (r->type == TYPE_LONG) {
380 val = strtol(buf,&res,0);
381 if ( !( (val==0 && res) || (errno!=0) ) )
383 } else if ( (r->type == TYPE_PERS) && isalpha(*buf)) {
384 unsigned long int res;
385 res = vc_str2personalitytype(buf,len);
386 if (res != VC_BAD_PERSONALITY) {
387 *r->personality = res;
403 adjust_lim(const struct vc_rlimit *vcr, struct rlimit *lim)
406 if (vcr->min != VC_LIM_KEEP) {
407 if (vcr->min > lim->rlim_cur) {
408 lim->rlim_cur = vcr->min;
411 if (vcr->min > lim->rlim_max) {
412 lim->rlim_max = vcr->min;
417 if (vcr->soft != VC_LIM_KEEP) {
418 switch (vcr->min != VC_LIM_KEEP) {
420 if (vcr->soft < vcr->min)
423 lim->rlim_cur = vcr->soft;
428 if (vcr->hard != VC_LIM_KEEP) {
429 switch (vcr->min != VC_LIM_KEEP) {
431 if (vcr->hard < vcr->min)
434 lim->rlim_max = vcr->hard;
442 set_one_ulimit(int resource, const struct vc_rlimit *limit)
445 getrlimit(resource, &lim);
446 adjust_lim(limit, &lim);
447 setrlimit(resource, &lim);
451 set_personality(unsigned long int personality_arg)
453 if (personality_arg == 0)
455 if (personality(personality_arg) < 0) {
462 pl_set_ulimits(const struct sliver_resources *slr)
467 set_one_ulimit(RLIMIT_RSS, &slr->vs_rss);
468 set_one_ulimit(RLIMIT_AS, &slr->vs_as);
469 set_one_ulimit(RLIMIT_NPROC, &slr->vs_nproc);
470 set_one_ulimit(RLIMIT_NOFILE, &slr->vs_nofile);
471 set_one_ulimit(RLIMIT_MEMLOCK, &slr->vs_memlock);
472 return set_personality(slr->personality);