1 /* Copyright 2005 Princeton University
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above
11 copyright notice, this list of conditions and the following
12 disclaimer in the documentation and/or other materials provided
13 with the distribution.
15 * Neither the name of the copyright holder nor the names of its
16 contributors may be used to endorse or promote products derived
17 from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PRINCETON
23 UNIVERSITY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
26 OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
29 WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
45 #include <sys/resource.h>
46 #include <sys/types.h>
52 #include "planetlab.h"
54 /* defined in netns.c */
55 extern uint32_t get_space_flag(xid_t);
57 #ifndef VC_NXC_RAW_SOCKET
58 # define VC_NXC_RAW_SOCKET 0x00000200ull
60 #ifndef VC_NXC_RAW_SEND
61 # define VC_NXC_RAW_SEND 0x00000400ull
63 #ifndef VC_NXF_LBACK_ALLOW
64 # define VC_NXF_LBACK_ALLOW 0x00000400ull
68 create_context(xid_t ctx, uint64_t bcaps, uint32_t unshare_flags)
70 struct vc_ctx_caps vc_caps;
71 struct vc_net_flags vc_nf;
72 struct vc_net_caps vc_ncaps;
74 /* Create network context */
75 if (vc_net_create(ctx) == VC_NOCTX) {
81 /* Make the network context persistent */
82 vc_nf.mask = vc_nf.flagword = VC_NXF_PERSISTENT | VC_NXF_LBACK_ALLOW;
83 if (vc_set_nflags(ctx, &vc_nf))
86 /* Give it raw sockets capabilities */
87 vc_ncaps.ncaps = vc_ncaps.cmask = VC_NXC_RAW_ICMP | VC_NXC_RAW_SOCKET;
88 if (vc_set_ncaps(ctx, &vc_ncaps))
92 /* Create tag context */
93 if (vc_tag_create(ctx) == VC_NOCTX)
97 * Create context info - this sets the STATE_SETUP and STATE_INIT flags.
99 if (vc_ctx_create(ctx, 0) == VC_NOCTX)
102 if (unshare_flags != 0) {
103 unshare(unshare_flags);
104 unshare_flags |= vc_get_space_mask();
105 vc_set_namespace(ctx, unshare_flags);
108 /* Set capabilities - these don't take effect until SETUP flag is unset */
109 vc_caps.bcaps = bcaps;
110 vc_caps.bmask = ~0ULL; /* currently unused */
111 vc_caps.ccaps = 0; /* don't want any of these */
112 vc_caps.cmask = ~0ULL;
113 if (vc_set_ccaps(ctx, &vc_caps))
116 if (pl_setsched(ctx, 0, 1) < 0) {
117 PERROR("pl_setsched(%u)", ctx);
125 pl_setup_done(xid_t ctx)
127 struct vc_ctx_flags vc_flags;
129 /* unset SETUP flag - this allows other processes to migrate */
130 /* set the PERSISTENT flag - so the context doesn't vanish */
131 /* Don't clear the STATE_INIT flag, as that would make us the init task. */
132 vc_flags.mask = VC_VXF_STATE_SETUP|VC_VXF_PERSISTENT;
133 vc_flags.flagword = VC_VXF_PERSISTENT;
134 if (vc_set_cflags(ctx, &vc_flags))
140 #define RETRY_LIMIT 10
143 pl_chcontext(xid_t ctx, uint64_t bcaps, const struct sliver_resources *slr)
146 int net_migrated = 0;
148 if (pl_set_ulimits(slr) != 0)
153 struct vc_ctx_flags vc_flags;
155 if (vc_get_cflags(ctx, &vc_flags))
157 uint32_t unshare_flags;
161 /* Unshare the net namespace if the slice if requested in the local slice configuration */
162 unshare_flags = get_space_flag(ctx);
164 /* context doesn't exist - create it */
165 if (create_context(ctx, bcaps, unshare_flags))
168 /* another process beat us in a race */
171 /* another process is creating - poll the SETUP flag */
176 /* created context and migrated to it i.e., we're done */
180 /* check the SETUP flag */
181 if (vc_flags.flagword & VC_VXF_STATE_SETUP)
183 /* context is still being setup - wait a while then retry */
184 if (retry_count++ >= RETRY_LIMIT)
193 /* context has been setup */
195 if (net_migrated || !vc_net_migrate(ctx))
197 uint32_t unshare_flags;
198 /* Unshare the net namespace if the slice if requested in the local slice configuration */
199 unshare_flags = get_space_flag(ctx);
200 if (unshare_flags != 0) {
201 unshare_flags |=vc_get_space_mask();
202 vc_enter_namespace(ctx, unshare_flags);
205 if (!vc_tag_migrate(ctx) && !vc_ctx_migrate(ctx, 0))
210 /* context disappeared - retry */
216 /* it's okay for a syscall to fail because the context doesn't exist */
217 #define VC_SYSCALL(x) \
221 return errno == ESRCH ? 0 : -1; \
226 pl_setsched(xid_t ctx, uint32_t cpu_min, uint32_t cpu_share)
228 struct vc_set_sched vc_sched;
229 struct vc_ctx_flags vc_flags;
231 vc_sched.set_mask = (VC_VXSM_FILL_RATE | VC_VXSM_INTERVAL | VC_VXSM_TOKENS |
232 VC_VXSM_TOKENS_MIN | VC_VXSM_TOKENS_MAX | VC_VXSM_MSEC |
233 VC_VXSM_FILL_RATE2 | VC_VXSM_INTERVAL2 | VC_VXSM_FORCE);
234 vc_sched.fill_rate = cpu_min; /* percent reserved */
235 vc_sched.interval = 100;
236 vc_sched.fill_rate2 = cpu_share; /* best-effort fair share of unreserved */
237 vc_sched.interval2 = 1000; /* milliseconds */
238 vc_sched.tokens = 100; /* initial allocation of tokens */
239 vc_sched.tokens_min = 50; /* need this many tokens to run */
240 vc_sched.tokens_max = 100; /* max accumulated number of tokens */
243 if (cpu_share == (uint32_t)VC_LIM_KEEP)
244 vc_sched.set_mask &= ~(VC_VXSM_FILL_RATE|VC_VXSM_FILL_RATE2);
246 vc_sched.set_mask |= VC_VXSM_IDLE_TIME;
249 VC_SYSCALL(vc_set_sched(ctx, &vc_sched));
251 vc_flags.mask = VC_VXF_SCHED_FLAGS;
252 vc_flags.flagword = VC_VXF_SCHED_HARD;
253 VC_SYSCALL(vc_set_cflags(ctx, &vc_flags));
263 struct pl_resources {
267 unsigned long long *limit;
268 unsigned long int *personality;
272 #define WHITESPACE(buffer,index,len) \
273 while(isspace((int)buffer[index])) \
274 if (index < len) index++; else goto out;
276 #define VSERVERCONF "/etc/vservers/"
279 pl_get_limits(const char *context, struct sliver_resources *slr)
283 struct pl_resources *r;
285 struct pl_resources sliver_list[] = {
286 {"sched/fill-rate2", TYPE_LONG, &slr->vs_cpu},
288 {"rlimits/nproc.hard", TYPE_LONG, &slr->vs_nproc.hard},
289 {"rlimits/nproc.soft", TYPE_LONG, &slr->vs_nproc.soft},
290 {"rlimits/nproc.min", TYPE_LONG, &slr->vs_nproc.min},
292 {"rlimits/rss.hard", TYPE_LONG, &slr->vs_rss.hard},
293 {"rlimits/rss.soft", TYPE_LONG, &slr->vs_rss.soft},
294 {"rlimits/rss.min", TYPE_LONG, &slr->vs_rss.min},
296 {"rlimits/as.hard", TYPE_LONG, &slr->vs_as.hard},
297 {"rlimits/as.soft", TYPE_LONG, &slr->vs_as.soft},
298 {"rlimits/as.min", TYPE_LONG, &slr->vs_as.min},
300 {"rlimits/nofile.hard", TYPE_LONG, &slr->vs_nofile.hard},
301 {"rlimits/nofile.soft", TYPE_LONG, &slr->vs_nofile.soft},
302 {"rlimits/nofile.min", TYPE_LONG, &slr->vs_nofile.min},
304 {"rlimits/memlock.hard", TYPE_LONG, &slr->vs_memlock.hard},
305 {"rlimits/memlock.soft", TYPE_LONG, &slr->vs_memlock.soft},
306 {"rlimits/memlock.min", TYPE_LONG, &slr->vs_memlock.min},
308 {"personality", TYPE_PERS, &slr->personality},
313 size_t len = strlen(VSERVERCONF) + strlen(context) + NULLBYTE_SIZE;
314 char *conf = (char *)malloc(len);
315 sprintf(conf, "%s%s", VSERVERCONF, context);
317 slr->vs_rss.hard = VC_LIM_KEEP;
318 slr->vs_rss.soft = VC_LIM_KEEP;
319 slr->vs_rss.min = VC_LIM_KEEP;
321 slr->vs_as.hard = VC_LIM_KEEP;
322 slr->vs_as.soft = VC_LIM_KEEP;
323 slr->vs_as.min = VC_LIM_KEEP;
325 slr->vs_nproc.hard = VC_LIM_KEEP;
326 slr->vs_nproc.soft = VC_LIM_KEEP;
327 slr->vs_nproc.min = VC_LIM_KEEP;
329 slr->vs_nofile.hard = VC_LIM_KEEP;
330 slr->vs_nofile.soft = VC_LIM_KEEP;
331 slr->vs_nofile.min = VC_LIM_KEEP;
333 slr->vs_memlock.hard = VC_LIM_KEEP;
334 slr->vs_memlock.soft = VC_LIM_KEEP;
335 slr->vs_memlock.min = VC_LIM_KEEP;
337 slr->personality = 0;
339 cwd = open(".", O_RDONLY);
341 perror("cannot get a handle on .");
344 if (chdir(conf) == -1) {
345 fprintf(stderr, "cannot chdir to ");
350 for (r = &sliver_list[0]; r->name; r++) {
352 fb = fopen(r->name, "r");
355 if (fgets(buf, sizeof(buf), fb) != NULL) {
357 /* remove trailing newline */
358 if (buf[len-1] == '\n') {
362 if (r->type == TYPE_LONG) {
366 val = strtol(buf,&res,0);
367 if ( !( (val==0 && res) || (errno!=0) ) )
369 } else if ( (r->type == TYPE_PERS) && isalpha(*buf)) {
370 unsigned long int res;
371 res = vc_str2personalitytype(buf,len);
372 if (res != VC_BAD_PERSONALITY) {
373 *r->personality = res;
389 adjust_lim(const struct vc_rlimit *vcr, struct rlimit *lim)
392 if (vcr->min != VC_LIM_KEEP) {
393 if (vcr->min > lim->rlim_cur) {
394 lim->rlim_cur = vcr->min;
397 if (vcr->min > lim->rlim_max) {
398 lim->rlim_max = vcr->min;
403 if (vcr->soft != VC_LIM_KEEP) {
404 switch (vcr->min != VC_LIM_KEEP) {
406 if (vcr->soft < vcr->min)
409 lim->rlim_cur = vcr->soft;
414 if (vcr->hard != VC_LIM_KEEP) {
415 switch (vcr->min != VC_LIM_KEEP) {
417 if (vcr->hard < vcr->min)
420 lim->rlim_max = vcr->hard;
428 set_one_ulimit(int resource, const struct vc_rlimit *limit)
431 getrlimit(resource, &lim);
432 adjust_lim(limit, &lim);
433 setrlimit(resource, &lim);
437 set_personality(unsigned long int personality_arg)
439 if (personality_arg == 0)
441 if (personality(personality_arg) < 0) {
448 pl_set_ulimits(const struct sliver_resources *slr)
453 set_one_ulimit(RLIMIT_RSS, &slr->vs_rss);
454 set_one_ulimit(RLIMIT_AS, &slr->vs_as);
455 set_one_ulimit(RLIMIT_NPROC, &slr->vs_nproc);
456 set_one_ulimit(RLIMIT_NOFILE, &slr->vs_nofile);
457 set_one_ulimit(RLIMIT_MEMLOCK, &slr->vs_memlock);
458 return set_personality(slr->personality);