1 /* Copyright 2005 Princeton University
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above
11 copyright notice, this list of conditions and the following
12 disclaimer in the documentation and/or other materials provided
13 with the distribution.
15 * Neither the name of the copyright holder nor the names of its
16 contributors may be used to endorse or promote products derived
17 from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PRINCETON
23 UNIVERSITY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
26 OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
29 WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
42 #include <sys/resource.h>
45 #include "sched_cmd.h"
48 #include "planetlab.h"
51 create_context(xid_t ctx, uint64_t bcaps, struct sliver_resources *slr)
53 struct vc_ctx_caps vc_caps;
56 * Create context info - this sets the STATE_SETUP and STATE_INIT flags.
58 if (vc_ctx_create(ctx) == VC_NOCTX)
61 /* Set capabilities - these don't take effect until SETUP flag is unset */
62 vc_caps.bcaps = bcaps;
63 vc_caps.bmask = ~0ULL; /* currently unused */
64 vc_caps.ccaps = 0; /* don't want any of these */
65 vc_caps.cmask = ~0ULL;
66 if (vc_set_ccaps(ctx, &vc_caps))
69 pl_set_limits(ctx, slr);
75 pl_setup_done(xid_t ctx)
77 struct vc_ctx_flags vc_flags;
79 /* unset SETUP flag - this allows other processes to migrate */
81 /* Don't clear the STATE_INIT flag, as that would make us the init task. */
82 vc_flags.mask = VC_VXF_STATE_SETUP;
83 vc_flags.flagword = 0;
84 if (vc_set_cflags(ctx, &vc_flags))
90 #define RETRY_LIMIT 10
93 pl_chcontext(xid_t ctx, uint64_t bcaps, struct sliver_resources *slr)
99 struct vc_ctx_flags vc_flags;
101 if (vc_get_cflags(ctx, &vc_flags))
106 /* context doesn't exist - create it */
107 if (create_context(ctx, bcaps,slr))
110 /* another process beat us in a race */
113 /* another process is creating - poll the SETUP flag */
118 /* created context and migrated to it i.e., we're done */
122 /* check the SETUP flag */
123 if (vc_flags.flagword & VC_VXF_STATE_SETUP)
125 /* context is still being setup - wait a while then retry */
126 if (retry_count++ >= RETRY_LIMIT)
135 /* context has been setup */
137 if (!vc_ctx_migrate(ctx))
140 /* context disappeared - retry */
146 /* it's okay for a syscall to fail because the context doesn't exist */
147 #define VC_SYSCALL(x) \
151 return errno == ESRCH ? 0 : -1; \
156 pl_setsched(xid_t ctx, uint32_t cpu_share, uint32_t cpu_sched_flags)
158 struct vc_set_sched vc_sched;
159 struct vc_ctx_flags vc_flags;
162 vc_sched.set_mask = (VC_VXSM_FILL_RATE | VC_VXSM_INTERVAL | VC_VXSM_TOKENS |
163 VC_VXSM_TOKENS_MIN | VC_VXSM_TOKENS_MAX);
164 vc_sched.fill_rate = cpu_share; /* tokens accumulated per interval */
165 vc_sched.interval = 1000; /* milliseconds */
166 vc_sched.tokens = 100; /* initial allocation of tokens */
167 vc_sched.tokens_min = 50; /* need this many tokens to run */
168 vc_sched.tokens_max = 100; /* max accumulated number of tokens */
170 VC_SYSCALL(vc_set_sched(ctx, &vc_sched));
172 /* get current flag values */
173 VC_SYSCALL(vc_get_cflags(ctx, &vc_flags));
175 /* guaranteed CPU corresponds to SCHED_SHARE flag being cleared */
176 new_flags = (cpu_sched_flags & VS_SCHED_CPU_GUARANTEED
178 : VC_VXF_SCHED_SHARE);
179 if ((vc_flags.flagword & VC_VXF_SCHED_SHARE) != new_flags)
181 vc_flags.mask = VC_VXF_SCHED_FLAGS;
182 vc_flags.flagword = new_flags | VC_VXF_SCHED_HARD;
183 VC_SYSCALL(vc_set_cflags(ctx, &vc_flags));
189 struct pl_resources {
191 unsigned long long *limit;
194 #define WHITESPACE(buffer,index,len) \
195 while(isspace((int)buffer[index])) \
196 if (index < len) index++; else goto out;
198 #define VSERVERCONF "/etc/vservers/"
200 pl_get_limits(char *context, struct sliver_resources *slr)
203 size_t len = strlen(VSERVERCONF) + strlen(context) + strlen(".conf") + NULLBYTE_SIZE;
204 char *conf = (char *)malloc(len);
205 struct pl_resources *r;
206 struct pl_resources sliver_list[] = {
207 {"CPULIMIT", &slr->vs_cpu},
208 {"CPUSHARE", &slr->vs_cpu},
209 {"CPUGUARANTEED", &slr->vs_cpuguaranteed},
211 {"TASKLIMIT", &slr->vs_nproc.hard}, /* backwards compatible */
212 {"VS_NPROC_HARD", &slr->vs_nproc.hard},
213 {"VS_NPROC_SOFT", &slr->vs_nproc.soft},
214 {"VS_NPROC_MINIMUM", &slr->vs_nproc.min},
216 {"MEMLIMIT", &slr->vs_rss.hard}, /* backwards compatible */
217 {"VS_RSS_HARD", &slr->vs_rss.hard},
218 {"VS_RSS_SOFT", &slr->vs_rss.soft},
219 {"VS_RSS_MINIMUM", &slr->vs_rss.min},
221 {"VS_AS_HARD", &slr->vs_as.hard},
222 {"VS_AS_SOFT", &slr->vs_as.soft},
223 {"VS_AS_MINIMUM", &slr->vs_as.min},
225 {"VS_OPENFD_HARD", &slr->vs_openfd.hard},
226 {"VS_OPENFD_SOFT", &slr->vs_openfd.soft},
227 {"VS_OPENFD_MINIMUM", &slr->vs_openfd.min},
229 {"VS_WHITELISTED", &slr->vs_whitelisted},
233 sprintf(conf, "%s%s.conf", VSERVERCONF, context);
235 slr->vs_cpu = VC_LIM_KEEP;
236 slr->vs_cpuguaranteed = 0;
238 slr->vs_rss.hard = VC_LIM_KEEP;
239 slr->vs_rss.soft = VC_LIM_KEEP;
240 slr->vs_rss.min = VC_LIM_KEEP;
242 slr->vs_as.hard = VC_LIM_KEEP;
243 slr->vs_as.soft = VC_LIM_KEEP;
244 slr->vs_as.min = VC_LIM_KEEP;
247 slr->vs_nproc.hard = VC_LIM_KEEP;
248 slr->vs_nproc.soft = VC_LIM_KEEP;
249 slr->vs_nproc.min = VC_LIM_KEEP;
251 slr->vs_openfd.hard = VC_LIM_KEEP;
252 slr->vs_openfd.soft = VC_LIM_KEEP;
253 slr->vs_openfd.min = VC_LIM_KEEP;
255 slr->vs_whitelisted = 1;
257 /* open the conf file for reading */
258 fb = fopen(conf,"r");
261 char *buffer = malloc(1000);
264 /* the conf file exist */
265 while((p=fgets(buffer,1000-1,fb))!=NULL) {
267 len = strnlen(buffer,1000);
268 WHITESPACE(buffer,index,len);
269 if (buffer[index] == '#')
272 for (r=&sliver_list[0]; r->name; r++)
273 if ((p=strstr(&buffer[index],r->name))!=NULL) {
274 /* adjust index into buffer */
275 index+= (p-&buffer[index])+strlen(r->name);
277 /* skip over whitespace */
278 WHITESPACE(buffer,index,len);
280 /* expecting to see = sign */
281 if (buffer[index++]!='=') goto out;
283 /* skip over whitespace */
284 WHITESPACE(buffer,index,len);
286 /* expecting to see a digit for number */
287 if (!isdigit((int)buffer[index])) goto out;
289 *r->limit = atoi(&buffer[index]);
290 if (0) /* for debugging only */
291 fprintf(stderr,"pl_get_limits found %s=%ld\n",
300 fprintf(stderr,"cannot open %s\n",conf);
306 adjust_lim(struct vc_rlimit *vcr, struct rlimit *lim)
309 if (vcr->min != VC_LIM_KEEP) {
310 if (vcr->min > lim->rlim_cur) {
311 lim->rlim_cur = vcr->min;
314 if (vcr->min > lim->rlim_max) {
315 lim->rlim_max = vcr->min;
320 if (vcr->soft != VC_LIM_KEEP) {
321 switch (vcr->min != VC_LIM_KEEP) {
323 if (vcr->soft < vcr->min)
326 lim->rlim_cur = vcr->soft;
331 if (vcr->hard != VC_LIM_KEEP) {
332 switch (vcr->min != VC_LIM_KEEP) {
334 if (vcr->hard < vcr->min)
337 lim->rlim_cur = vcr->hard;
346 pl_set_limits(xid_t ctx, struct sliver_resources *slr)
348 struct rlimit lim; /* getrlimit values */
349 unsigned long long vs_cpu;
350 uint32_t cpu_sched_flags;
353 /* set memory limits */
354 getrlimit(RLIMIT_RSS,&lim);
355 if (adjust_lim(&slr->vs_rss, &lim)) {
356 setrlimit(RLIMIT_RSS, &lim);
357 if (vc_set_rlimit(ctx, RLIMIT_RSS, &slr->vs_rss))
359 PERROR("pl_setrlimit(%u, RLIMIT_RSS)", ctx);
364 /* set address space limits */
365 getrlimit(RLIMIT_AS,&lim);
366 if (adjust_lim(&slr->vs_as, &lim)) {
367 setrlimit(RLIMIT_AS, &lim);
368 if (vc_set_rlimit(ctx, RLIMIT_AS, &slr->vs_as))
370 PERROR("pl_setrlimit(%u, RLIMIT_AS)", ctx);
374 /* set nrpoc limit */
375 getrlimit(RLIMIT_NPROC,&lim);
376 if (adjust_lim(&slr->vs_nproc, &lim)) {
377 setrlimit(RLIMIT_NPROC, &lim);
378 if (vc_set_rlimit(ctx, RLIMIT_NPROC, &slr->vs_nproc))
380 PERROR("pl_setrlimit(%u, RLIMIT_NPROC)", ctx);
385 /* set openfd limit */
386 getrlimit(RLIMIT_NOFILE,&lim);
387 if (adjust_lim(&slr->vs_openfd, &lim)) {
388 setrlimit(RLIMIT_NOFILE, &lim);
389 if (vc_set_rlimit(ctx, RLIMIT_NOFILE, &slr->vs_openfd))
391 PERROR("pl_setrlimit(%u, RLIMIT_NOFILE)", ctx);
394 #ifndef VLIMIT_OPENFD
395 #warning VLIMIT_OPENFD should be defined from standard header
396 #define VLIMIT_OPENFD 17
398 if (vc_set_rlimit(ctx, VLIMIT_OPENFD, &slr->vs_openfd))
400 PERROR("pl_setrlimit(%u, VLIMIT_OPENFD)", ctx);
404 vs_cpu = slr->vs_cpu;
405 cpu_sched_flags = slr->vs_cpuguaranteed & VS_SCHED_CPU_GUARANTEED;
411 if (pl_setsched(ctx, vs_cpu, cpu_sched_flags) < 0) {
412 PERROR("pl_setsched(&u)", ctx);