X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=lib%2Fplanetlab.c;h=8bef6dc2c4057807d0bb97cfd373073b178548d5;hb=6471cae28b5b32ff2d425b3d103a4ceac7550125;hp=879cb5d8967a9acb8a2d1d49ed742959743f83dc;hpb=f061f5d3ea230ebc5132315131c00852ffb58269;p=util-vserver.git

diff --git a/lib/planetlab.c b/lib/planetlab.c
index 879cb5d..8bef6dc 100644
--- a/lib/planetlab.c
+++ b/lib/planetlab.c
@@ -31,28 +31,32 @@ POSSIBILITY OF SUCH DAMAGE.
 
 */
 
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
 #include <errno.h>
 #include <stdint.h>
-#include <stdio.h>
+#include <stdarg.h>
 #include <unistd.h>
+#include <ctype.h>
 #include <sys/resource.h>
 
 #include "config.h"
-#include "planetlab.h"
 #include "sched_cmd.h"
 #include "virtual.h"
 #include "vserver.h"
+#include "planetlab.h"
 
 static int
-create_context(xid_t ctx, uint32_t flags, uint64_t bcaps, const rspec_t *rspec)
+create_context(xid_t ctx, uint32_t flags, uint64_t bcaps, struct sliver_resources *slr)
 {
   struct vc_ctx_caps  vc_caps;
-  struct vc_ctx_flags  vc_flags;
-  struct vc_rlimit  vc_rlimit;
 
   /*
    * Create context info - this sets the STATE_SETUP and STATE_INIT flags.
    * Don't ever clear the STATE_INIT flag, that makes us the init task.
+   *
+   * XXX - the kernel code allows initial flags to be passed as an arg.
    */
   if (vc_ctx_create(ctx) == VC_NOCTX)
     return -1;
@@ -65,43 +69,43 @@ create_context(xid_t ctx, uint32_t flags, uint64_t bcaps, const rspec_t *rspec)
   if (vc_set_ccaps(ctx, &vc_caps))
     return -1;
 
-  /* set scheduler parameters */
-  if (pl_setsched(ctx, rspec->cpu_share, rspec->cpu_sched_flags))
-    return -1;
+  pl_set_limits(ctx, slr);
 
-  /* set resource limits */
-  vc_rlimit.min = VC_LIM_KEEP;
-  vc_rlimit.soft = VC_LIM_KEEP;
-  vc_rlimit.hard = rspec->mem_limit;
-  if (vc_set_rlimit(ctx, RLIMIT_RSS, &vc_rlimit))
-    return -1;
+  return 0;
+}
 
-  /* assume min and soft unchanged by set_rlimit */
-  vc_rlimit.hard = rspec->task_limit;
-  if (vc_set_rlimit(ctx, RLIMIT_NPROC, &vc_rlimit))
-    return -1;
+int
+pl_setup_done(xid_t ctx)
+{
+  struct vc_ctx_flags  vc_flags;
 
-  /* set flags, unset SETUP flag - this allows other processes to migrate */
-  vc_flags.mask = VC_VXF_STATE_SETUP | VC_VXF_SCHED_FLAGS;
-  flags = 0;  /* XXX - ignore flags parameter */
-  vc_flags.flagword = flags | rspec->cpu_sched_flags;  /* SETUP cleared */
+  /* unset SETUP flag - this allows other processes to migrate */
+  vc_flags.mask = VC_VXF_STATE_SETUP;
+  vc_flags.flagword = 0;
   if (vc_set_cflags(ctx, &vc_flags))
     return -1;
 
   return 0;
 }
 
+#define RETRY_LIMIT  10
+
 int
-pl_chcontext(xid_t ctx, uint32_t flags, uint64_t bcaps, const rspec_t *rspec)
+pl_chcontext(xid_t ctx, uint32_t flags, uint64_t bcaps, struct sliver_resources *slr)
 {
+  int  retry_count = 0;
+
   for (;;)
     {
       struct vc_ctx_flags  vc_flags;
 
       if (vc_get_cflags(ctx, &vc_flags))
 	{
+	  if (errno != ESRCH)
+	    return -1;
+
 	  /* context doesn't exist - create it */
-	  if (create_context(ctx, flags, bcaps, rspec))
+	  if (create_context(ctx, flags, bcaps,slr))
 	    {
 	      if (errno == EEXIST)
 		/* another process beat us in a race */
@@ -113,13 +117,18 @@ pl_chcontext(xid_t ctx, uint32_t flags, uint64_t bcaps, const rspec_t *rspec)
 	    }
 
 	  /* created context and migrated to it i.e., we're done */
-	  break;
+	  return 1;
 	}
 
       /* check the SETUP flag */
       if (vc_flags.flagword & VC_VXF_STATE_SETUP)
 	{
 	  /* context is still being setup - wait a while then retry */
+	  if (retry_count++ >= RETRY_LIMIT)
+	    {
+	      errno = EBUSY;
+	      return -1;
+	    }
 	  sleep(1);
 	  continue;
 	}
@@ -144,18 +153,12 @@ do						\
 }						\
 while (0)
 
-
 int
 pl_setsched(xid_t ctx, uint32_t cpu_share, uint32_t cpu_sched_flags)
 {
   struct vc_set_sched  vc_sched;
   struct vc_ctx_flags  vc_flags;
-
-  if (cpu_sched_flags & ~VC_VXF_SCHED_FLAGS)
-    {
-      errno = EINVAL;
-      return -1;
-    }
+  uint32_t  new_flags;
 
   vc_sched.set_mask = (VC_VXSM_FILL_RATE | VC_VXSM_INTERVAL | VC_VXSM_TOKENS |
 		       VC_VXSM_TOKENS_MIN | VC_VXSM_TOKENS_MAX);
@@ -170,13 +173,232 @@ pl_setsched(xid_t ctx, uint32_t cpu_share, uint32_t cpu_sched_flags)
   /* get current flag values */
   VC_SYSCALL(vc_get_cflags(ctx, &vc_flags));
 
-  /* the only flag which ever changes is the SCHED_SHARE bit */
-  if ((vc_flags.flagword ^ cpu_sched_flags) & VC_VXF_SCHED_SHARE)
+  /* guaranteed CPU corresponds to SCHED_SHARE flag being cleared */
+  new_flags = (cpu_sched_flags & VS_SCHED_CPU_GUARANTEED
+	       ? 0
+	       : VC_VXF_SCHED_SHARE);
+  if ((vc_flags.flagword & VC_VXF_SCHED_SHARE) != new_flags)
     {
-      vc_flags.mask = VC_VXF_SCHED_SHARE;
-      vc_flags.flagword = cpu_sched_flags & VC_VXF_SCHED_FLAGS;
+      vc_flags.mask = VC_VXF_SCHED_FLAGS;
+      vc_flags.flagword = new_flags | VC_VXF_SCHED_HARD;
       VC_SYSCALL(vc_set_cflags(ctx, &vc_flags));
     }
 
   return 0;
 }
+
+struct pl_resources {
+	char *name;
+	unsigned long long *limit;
+};
+
+#define WHITESPACE(buffer,index,len)     \
+  while(isspace((int)buffer[index])) \
+	if (index < len) index++; else goto out;
+
+#define VSERVERCONF "/etc/vservers/"
+void
+pl_get_limits(char *context, struct sliver_resources *slr)
+{
+  FILE *fb;
+  size_t len = strlen(VSERVERCONF) + strlen(context) + strlen(".conf") + NULLBYTE_SIZE;
+  char *conf = (char *)malloc(len);	
+  struct pl_resources *r;
+  struct pl_resources sliver_list[] = {
+    {"CPULIMIT", &slr->vs_cpu},
+    {"CPUSHARE", &slr->vs_cpu},
+    {"CPUGUARANTEED", &slr->vs_cpuguaranteed},
+  
+    {"TASKLIMIT", &slr->vs_nproc.hard}, /* backwards compatible */
+    {"VS_NPROC_HARD", &slr->vs_nproc.hard},
+    {"VS_NPROC_SOFT", &slr->vs_nproc.soft},
+    {"VS_NPROC_MINIMUM", &slr->vs_nproc.min},
+  
+    {"MEMLIMIT", &slr->vs_rss.hard}, /* backwards compatible */
+    {"VS_RSS_HARD", &slr->vs_rss.hard},
+    {"VS_RSS_SOFT", &slr->vs_rss.soft},
+    {"VS_RSS_MINIMUM", &slr->vs_rss.min},
+  
+    {"VS_AS_HARD", &slr->vs_as.hard},
+    {"VS_AS_SOFT", &slr->vs_as.soft},
+    {"VS_AS_MINIMUM", &slr->vs_as.min},
+  
+    {"VS_OPENFD_HARD", &slr->vs_openfd.hard},
+    {"VS_OPENFD_SOFT", &slr->vs_openfd.soft},
+    {"VS_OPENFD_MINIMUM", &slr->vs_openfd.min},
+
+    {"VS_WHITELISTED", &slr->vs_whitelisted},
+    {0,0}
+  };
+
+  sprintf(conf, "%s%s.conf", VSERVERCONF, context);
+
+  slr->vs_cpu = VC_LIM_KEEP;
+  slr->vs_cpuguaranteed = 0;
+
+  slr->vs_rss.hard = VC_LIM_KEEP;
+  slr->vs_rss.soft = VC_LIM_KEEP;
+  slr->vs_rss.min = VC_LIM_KEEP;
+
+  slr->vs_as.hard = VC_LIM_KEEP;
+  slr->vs_as.soft = VC_LIM_KEEP;
+  slr->vs_as.min = VC_LIM_KEEP;
+
+
+  slr->vs_nproc.hard = VC_LIM_KEEP;
+  slr->vs_nproc.soft = VC_LIM_KEEP;
+  slr->vs_nproc.min = VC_LIM_KEEP;
+
+  slr->vs_openfd.hard = VC_LIM_KEEP;
+  slr->vs_openfd.soft = VC_LIM_KEEP;
+  slr->vs_openfd.min = VC_LIM_KEEP;
+
+  slr->vs_whitelisted = 1;
+
+  /* open the conf file for reading */
+  fb = fopen(conf,"r");
+  if (fb != NULL) {
+    size_t index;
+    char *buffer = malloc(1000);
+    char *p;
+    
+    /* the conf file exist */ 
+    while((p=fgets(buffer,1000-1,fb))!=NULL) {
+      index = 0;
+      len = strnlen(buffer,1000);
+      WHITESPACE(buffer,index,len);
+      if (buffer[index] == '#') 
+	continue;
+      
+      for (r=&sliver_list[0]; r->name; r++)
+	if ((p=strstr(&buffer[index],r->name))!=NULL) {
+	  /* adjust index into buffer */
+	  index+= (p-&buffer[index])+strlen(r->name);
+	  
+	  /* skip over whitespace */
+	  WHITESPACE(buffer,index,len);
+	  
+	  /* expecting to see = sign */
+	  if (buffer[index++]!='=') goto out;
+	  
+	  /* skip over whitespace */
+	  WHITESPACE(buffer,index,len);
+	  
+	  /* expecting to see a digit for number */
+	  if (!isdigit((int)buffer[index])) goto out;
+	  
+	  *r->limit = atoi(&buffer[index]);
+	  if (0) /* for debugging only */
+	    fprintf(stderr,"pl_get_limits found %s=%ld\n",
+		    r->name,*r->limit);
+	  break;
+	}
+    }
+  out:
+    fclose(fb);
+    free(buffer);
+  } else {
+    fprintf(stderr,"cannot open %s\n",conf);
+  }
+  free(conf);
+}
+
+void
+pl_set_limits(xid_t ctx, struct sliver_resources *slr)
+{
+  struct rlimit olim; /* current limit values */
+  struct rlimit nlim; /* new limit values */
+
+  if (slr != 0) {
+    /* set memory limits */
+    getrlimit(RLIMIT_RSS,&olim);
+    if (0) /* for debugging only */
+      fprintf(stderr,"rss cur = %ld, max = %ld, vs_rss min = %ld\n",olim.rlim_cur,olim.rlim_max,slr->vs_rss.min);
+    if ((slr->vs_rss.min != VC_LIM_KEEP) && (slr->vs_rss.min > olim.rlim_cur)) {
+      nlim.rlim_cur = slr->vs_rss.min;
+      if (slr->vs_rss.min > olim.rlim_max) {
+	nlim.rlim_max = slr->vs_rss.min;
+      } else {
+	nlim.rlim_max = olim.rlim_max;
+      }
+      setrlimit(RLIMIT_RSS, &nlim);
+    }
+    if (vc_set_rlimit(ctx, RLIMIT_RSS, &slr->vs_rss))
+      {
+	PERROR("pl_setrlimit(%u, RLIMIT_RSS)", ctx);
+	exit(1);
+      }
+    
+    /* set address space limits */
+    getrlimit(RLIMIT_AS,&olim);
+    if (0) /* for debugging only */
+      fprintf(stderr,"as cur = %ld, max = %ld, vs_as min = %ld\n",olim.rlim_cur,olim.rlim_max,slr->vs_as.min);
+    if ((slr->vs_as.min != VC_LIM_KEEP) && (slr->vs_as.min > olim.rlim_cur)) {
+      nlim.rlim_cur = slr->vs_as.min;
+      if (slr->vs_as.min > olim.rlim_max) {
+	nlim.rlim_max = slr->vs_as.min;
+      } else {
+	nlim.rlim_max = olim.rlim_max;
+      }
+      setrlimit(RLIMIT_AS, &nlim);
+    }
+    if (vc_set_rlimit(ctx, RLIMIT_AS, &slr->vs_as))
+      {
+	PERROR("pl_setrlimit(%u, RLIMIT_AS)", ctx);
+	exit(1);
+      }
+
+    /* set nrpoc limit */
+    getrlimit(RLIMIT_NPROC,&olim);
+    if (0) /* for debugging only */
+      fprintf(stderr,"nproc cur = %ld, max = %ld, vs_nproc min = %ld\n",olim.rlim_cur,olim.rlim_max,slr->vs_nproc.min);
+    if ((slr->vs_nproc.min != VC_LIM_KEEP) && (slr->vs_nproc.min > olim.rlim_cur)) {
+      nlim.rlim_cur = slr->vs_nproc.min;
+      if (slr->vs_nproc.min > olim.rlim_max) {
+	nlim.rlim_max = slr->vs_nproc.min;
+      } else {
+	nlim.rlim_max = olim.rlim_max;
+      }
+      setrlimit(RLIMIT_NPROC, &nlim);
+    }
+    if (vc_set_rlimit(ctx, RLIMIT_NPROC, &slr->vs_nproc))
+      {
+	PERROR("pl_setrlimit(%u, RLIMIT_NPROC)", ctx);
+	exit(1);
+      }
+    
+    /* set openfd limit */
+    getrlimit(RLIMIT_NOFILE,&olim);
+    if (0) /* for debugging only */
+      fprintf(stderr,"NOFILE cur = %ld, max = %ld, vs_openfd min = %ld\n",olim.rlim_cur,olim.rlim_max,slr->vs_openfd.min);
+    if ((slr->vs_openfd.min != VC_LIM_KEEP) && (slr->vs_openfd.min > olim.rlim_cur)) {
+      nlim.rlim_cur = slr->vs_openfd.min;
+      if (slr->vs_openfd.min > olim.rlim_max) {
+	nlim.rlim_max = slr->vs_openfd.min;
+      } else {
+	nlim.rlim_max = olim.rlim_max;
+      }
+      setrlimit(RLIMIT_NOFILE, &nlim);
+      if (vc_set_rlimit(ctx, RLIMIT_NOFILE, &slr->vs_openfd))
+	{
+	  PERROR("pl_setrlimit(%u, RLIMIT_NOFILE)", ctx);
+	  exit(1);
+	}
+    }
+#ifndef VLIMIT_OPENFD
+#warning VLIMIT_OPENFD should be defined from standard header
+#define VLIMIT_OPENFD	17
+#endif
+    if (vc_set_rlimit(ctx, VLIMIT_OPENFD, &slr->vs_openfd))
+      {
+	PERROR("pl_setrlimit(%u, VLIMIT_OPENFD)", ctx);
+      exit(1);
+      }
+  }
+    
+  if (pl_setsched(ctx, slr ? slr->vs_cpu : 1, slr ? (slr->vs_cpuguaranteed & VS_SCHED_CPU_GUARANTEED) : 0 ) < 0)
+    {
+      PERROR("pl_setsched(&u)", ctx);
+      exit(1);
+    }
+}