1 /* Copyright 2005 Princeton University
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above
11 copyright notice, this list of conditions and the following
12 disclaimer in the documentation and/or other materials provided
13 with the distribution.
15 * Neither the name of the copyright holder nor the names of its
16 contributors may be used to endorse or promote products derived
17 from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PRINCETON
23 UNIVERSITY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
26 OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
29 WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
42 #include <sys/resource.h>
45 #include "sched_cmd.h"
48 #include "planetlab.h"
51 create_context(xid_t ctx, uint32_t flags, uint64_t bcaps, struct sliver_resources *slr)
53 struct vc_ctx_caps vc_caps;
56 * Create context info - this sets the STATE_SETUP and STATE_INIT flags.
57 * Don't ever clear the STATE_INIT flag, that makes us the init task.
59 * XXX - the kernel code allows initial flags to be passed as an arg.
61 if (vc_ctx_create(ctx) == VC_NOCTX)
64 /* set capabilities - these don't take effect until SETUP flag is unset */
65 vc_caps.bcaps = bcaps;
66 vc_caps.bmask = ~0ULL; /* currently unused */
67 vc_caps.ccaps = 0; /* don't want any of these */
68 vc_caps.cmask = ~0ULL;
69 if (vc_set_ccaps(ctx, &vc_caps))
72 pl_set_limits(ctx, slr);
78 pl_setup_done(xid_t ctx)
80 struct vc_ctx_flags vc_flags;
82 /* unset SETUP flag - this allows other processes to migrate */
83 vc_flags.mask = VC_VXF_STATE_SETUP;
84 vc_flags.flagword = 0;
85 if (vc_set_cflags(ctx, &vc_flags))
91 #define RETRY_LIMIT 10
94 pl_chcontext(xid_t ctx, uint32_t flags, uint64_t bcaps, struct sliver_resources *slr)
100 struct vc_ctx_flags vc_flags;
102 if (vc_get_cflags(ctx, &vc_flags))
107 /* context doesn't exist - create it */
108 if (create_context(ctx, flags, bcaps,slr))
111 /* another process beat us in a race */
114 /* another process is creating - poll the SETUP flag */
119 /* created context and migrated to it i.e., we're done */
123 /* check the SETUP flag */
124 if (vc_flags.flagword & VC_VXF_STATE_SETUP)
126 /* context is still being setup - wait a while then retry */
127 if (retry_count++ >= RETRY_LIMIT)
136 /* context has been setup */
138 if (!vc_ctx_migrate(ctx))
141 /* context disappeared - retry */
147 /* it's okay for a syscall to fail because the context doesn't exist */
148 #define VC_SYSCALL(x) \
152 return errno == ESRCH ? 0 : -1; \
157 pl_setsched(xid_t ctx, uint32_t cpu_share, uint32_t cpu_sched_flags)
159 struct vc_set_sched vc_sched;
160 struct vc_ctx_flags vc_flags;
163 vc_sched.set_mask = (VC_VXSM_FILL_RATE | VC_VXSM_INTERVAL | VC_VXSM_TOKENS |
164 VC_VXSM_TOKENS_MIN | VC_VXSM_TOKENS_MAX);
165 vc_sched.fill_rate = cpu_share; /* tokens accumulated per interval */
166 vc_sched.interval = 1000; /* milliseconds */
167 vc_sched.tokens = 100; /* initial allocation of tokens */
168 vc_sched.tokens_min = 50; /* need this many tokens to run */
169 vc_sched.tokens_max = 100; /* max accumulated number of tokens */
171 VC_SYSCALL(vc_set_sched(ctx, &vc_sched));
173 /* get current flag values */
174 VC_SYSCALL(vc_get_cflags(ctx, &vc_flags));
176 /* guaranteed CPU corresponds to SCHED_SHARE flag being cleared */
177 new_flags = (cpu_sched_flags & VS_SCHED_CPU_GUARANTEED
179 : VC_VXF_SCHED_SHARE);
180 if ((vc_flags.flagword & VC_VXF_SCHED_SHARE) != new_flags)
182 vc_flags.mask = VC_VXF_SCHED_FLAGS;
183 vc_flags.flagword = new_flags | VC_VXF_SCHED_HARD;
184 VC_SYSCALL(vc_set_cflags(ctx, &vc_flags));
190 struct pl_resources {
192 unsigned long long *limit;
195 #define WHITESPACE(buffer,index,len) \
196 while(isspace((int)buffer[index])) \
197 if (index < len) index++; else goto out;
199 #define VSERVERCONF "/etc/vservers/"
201 pl_get_limits(char *context, struct sliver_resources *slr)
204 size_t len = strlen(VSERVERCONF) + strlen(context) + strlen(".conf") + NULLBYTE_SIZE;
205 char *conf = (char *)malloc(len);
206 struct pl_resources *r;
207 struct pl_resources sliver_list[] = {
208 {"CPULIMIT", &slr->vs_cpu},
209 {"CPUSHARE", &slr->vs_cpu},
210 {"CPUGUARANTEED", &slr->vs_cpuguaranteed},
212 {"TASKLIMIT", &slr->vs_nproc.hard}, /* backwards compatible */
213 {"VS_NPROC_HARD", &slr->vs_nproc.hard},
214 {"VS_NPROC_SOFT", &slr->vs_nproc.soft},
215 {"VS_NPROC_MINIMUM", &slr->vs_nproc.min},
217 {"MEMLIMIT", &slr->vs_rss.hard}, /* backwards compatible */
218 {"VS_RSS_HARD", &slr->vs_rss.hard},
219 {"VS_RSS_SOFT", &slr->vs_rss.soft},
220 {"VS_RSS_MINIMUM", &slr->vs_rss.min},
222 {"VS_AS_HARD", &slr->vs_as.hard},
223 {"VS_AS_SOFT", &slr->vs_as.soft},
224 {"VS_AS_MINIMUM", &slr->vs_as.min},
226 {"VS_OPENFD_HARD", &slr->vs_openfd.hard},
227 {"VS_OPENFD_SOFT", &slr->vs_openfd.soft},
228 {"VS_OPENFD_MINIMUM", &slr->vs_openfd.min},
230 {"VS_WHITELISTED", &slr->vs_whitelisted},
234 sprintf(conf, "%s%s.conf", VSERVERCONF, context);
236 slr->vs_cpu = VC_LIM_KEEP;
237 slr->vs_cpuguaranteed = 0;
239 slr->vs_rss.hard = VC_LIM_KEEP;
240 slr->vs_rss.soft = VC_LIM_KEEP;
241 slr->vs_rss.min = VC_LIM_KEEP;
243 slr->vs_as.hard = VC_LIM_KEEP;
244 slr->vs_as.soft = VC_LIM_KEEP;
245 slr->vs_as.min = VC_LIM_KEEP;
248 slr->vs_nproc.hard = VC_LIM_KEEP;
249 slr->vs_nproc.soft = VC_LIM_KEEP;
250 slr->vs_nproc.min = VC_LIM_KEEP;
252 slr->vs_openfd.hard = VC_LIM_KEEP;
253 slr->vs_openfd.soft = VC_LIM_KEEP;
254 slr->vs_openfd.min = VC_LIM_KEEP;
256 slr->vs_whitelisted = 1;
258 /* open the conf file for reading */
259 fb = fopen(conf,"r");
262 char *buffer = malloc(1000);
265 /* the conf file exist */
266 while((p=fgets(buffer,1000-1,fb))!=NULL) {
268 len = strnlen(buffer,1000);
269 WHITESPACE(buffer,index,len);
270 if (buffer[index] == '#')
273 for (r=&sliver_list[0]; r->name; r++)
274 if ((p=strstr(&buffer[index],r->name))!=NULL) {
275 /* adjust index into buffer */
276 index+= (p-&buffer[index])+strlen(r->name);
278 /* skip over whitespace */
279 WHITESPACE(buffer,index,len);
281 /* expecting to see = sign */
282 if (buffer[index++]!='=') goto out;
284 /* skip over whitespace */
285 WHITESPACE(buffer,index,len);
287 /* expecting to see a digit for number */
288 if (!isdigit((int)buffer[index])) goto out;
290 *r->limit = atoi(&buffer[index]);
291 if (0) /* for debugging only */
292 fprintf(stderr,"pl_get_limits found %s=%ld\n",
301 fprintf(stderr,"cannot open %s\n",conf);
307 pl_set_limits(xid_t ctx, struct sliver_resources *slr)
309 struct rlimit olim; /* current limit values */
310 struct rlimit nlim; /* new limit values */
313 /* set memory limits */
314 getrlimit(RLIMIT_RSS,&olim);
315 if (0) /* for debugging only */
316 fprintf(stderr,"rss cur = %ld, max = %ld, vs_rss min = %ld\n",olim.rlim_cur,olim.rlim_max,slr->vs_rss.min);
317 if ((slr->vs_rss.min != VC_LIM_KEEP) && (slr->vs_rss.min > olim.rlim_cur)) {
318 nlim.rlim_cur = slr->vs_rss.min;
319 if (slr->vs_rss.min > olim.rlim_max) {
320 nlim.rlim_max = slr->vs_rss.min;
322 nlim.rlim_max = olim.rlim_max;
324 setrlimit(RLIMIT_RSS, &nlim);
326 if (vc_set_rlimit(ctx, RLIMIT_RSS, &slr->vs_rss))
328 PERROR("pl_setrlimit(%u, RLIMIT_RSS)", ctx);
332 /* set address space limits */
333 getrlimit(RLIMIT_AS,&olim);
334 if (0) /* for debugging only */
335 fprintf(stderr,"as cur = %ld, max = %ld, vs_as min = %ld\n",olim.rlim_cur,olim.rlim_max,slr->vs_as.min);
336 if ((slr->vs_as.min != VC_LIM_KEEP) && (slr->vs_as.min > olim.rlim_cur)) {
337 nlim.rlim_cur = slr->vs_as.min;
338 if (slr->vs_as.min > olim.rlim_max) {
339 nlim.rlim_max = slr->vs_as.min;
341 nlim.rlim_max = olim.rlim_max;
343 setrlimit(RLIMIT_AS, &nlim);
345 if (vc_set_rlimit(ctx, RLIMIT_AS, &slr->vs_as))
347 PERROR("pl_setrlimit(%u, RLIMIT_AS)", ctx);
351 /* set nrpoc limit */
352 getrlimit(RLIMIT_NPROC,&olim);
353 if (0) /* for debugging only */
354 fprintf(stderr,"nproc cur = %ld, max = %ld, vs_nproc min = %ld\n",olim.rlim_cur,olim.rlim_max,slr->vs_nproc.min);
355 if ((slr->vs_nproc.min != VC_LIM_KEEP) && (slr->vs_nproc.min > olim.rlim_cur)) {
356 nlim.rlim_cur = slr->vs_nproc.min;
357 if (slr->vs_nproc.min > olim.rlim_max) {
358 nlim.rlim_max = slr->vs_nproc.min;
360 nlim.rlim_max = olim.rlim_max;
362 setrlimit(RLIMIT_NPROC, &nlim);
364 if (vc_set_rlimit(ctx, RLIMIT_NPROC, &slr->vs_nproc))
366 PERROR("pl_setrlimit(%u, RLIMIT_NPROC)", ctx);
370 /* set openfd limit */
371 getrlimit(RLIMIT_NOFILE,&olim);
372 if (0) /* for debugging only */
373 fprintf(stderr,"NOFILE cur = %ld, max = %ld, vs_openfd min = %ld\n",olim.rlim_cur,olim.rlim_max,slr->vs_openfd.min);
374 if ((slr->vs_openfd.min != VC_LIM_KEEP) && (slr->vs_openfd.min > olim.rlim_cur)) {
375 nlim.rlim_cur = slr->vs_openfd.min;
376 if (slr->vs_openfd.min > olim.rlim_max) {
377 nlim.rlim_max = slr->vs_openfd.min;
379 nlim.rlim_max = olim.rlim_max;
381 setrlimit(RLIMIT_NOFILE, &nlim);
382 if (vc_set_rlimit(ctx, RLIMIT_NOFILE, &slr->vs_openfd))
384 PERROR("pl_setrlimit(%u, RLIMIT_NOFILE)", ctx);
388 #ifndef VLIMIT_OPENFD
389 #warning VLIMIT_OPENFD should be defined from standard header
390 #define VLIMIT_OPENFD 17
392 if (vc_set_rlimit(ctx, VLIMIT_OPENFD, &slr->vs_openfd))
394 PERROR("pl_setrlimit(%u, VLIMIT_OPENFD)", ctx);
399 if (pl_setsched(ctx, slr ? slr->vs_cpu : 1, slr ? (slr->vs_cpuguaranteed & VS_SCHED_CPU_GUARANTEED) : 0 ) < 0)
401 PERROR("pl_setsched(&u)", ctx);