Changes to support Andy's new CPU scheduler

author Andy Bavier <acb@cs.princeton.edu>

Thu, 8 Sep 2005 03:10:24 +0000 (03:10 +0000)

committer Andy Bavier <acb@cs.princeton.edu>

Thu, 8 Sep 2005 03:10:24 +0000 (03:10 +0000)
author Andy Bavier <acb@cs.princeton.edu>
Thu, 8 Sep 2005 03:10:24 +0000 (03:10 +0000)
committer Andy Bavier <acb@cs.princeton.edu>
Thu, 8 Sep 2005 03:10:24 +0000 (03:10 +0000)
diff --git a/configs/kernel-2.6.12-i686-planetlab.config b/configs/kernel-2.6.12-i686-planetlab.config

index 0e3ae64..241cc9b 100644 (file)
--- a/configs/kernel-2.6.12-i686-planetlab.config
+++ b/configs/kernel-2.6.12-i686-planetlab.config
@@ -1408,7 +1408,8 @@ CONFIG_VSERVER_LEGACY=y
  # CONFIG_VSERVER_NGNET is not set
  # CONFIG_VSERVER_PROC_SECURE is not set
  CONFIG_VSERVER_HARDCPU=y
-# CONFIG_VSERVER_HARDCPU_IDLE is not set
+CONFIG_VSERVER_HARDCPU_IDLE=y
+CONFIG_VSERVER_ACB_SCHED=y
  # CONFIG_INOXID_NONE is not set
  # CONFIG_INOXID_UID16 is not set
  # CONFIG_INOXID_GID16 is not set
diff --git a/include/linux/vs_sched.h b/include/linux/vs_sched.h

index 42fca7d..aa0ff71 100644 (file)
--- a/include/linux/vs_sched.h
+++ b/include/linux/vs_sched.h
@@ -13,6 +13,18 @@
  #define MAX_PRIO_BIAS           20
  #define MIN_PRIO_BIAS          -20
  
+#ifdef CONFIG_VSERVER_ACB_SCHED
+
+#define VX_INVALID_TICKS        -1000000
+#define IS_BEST_EFFORT(vxi)     (vx_info_flags(vxi, VXF_SCHED_SHARE, 0))
+
+int vx_tokens_avail(struct vx_info *vxi);
+void vx_consume_token(struct vx_info *vxi);
+void vx_scheduler_tick(void);
+void vx_advance_best_effort_ticks(int ticks);
+void vx_advance_guaranteed_ticks(int ticks);
+
+#else
  
  static inline int vx_tokens_avail(struct vx_info *vxi)
  {
@@ -24,6 +36,8 @@ static inline void vx_consume_token(struct vx_info *vxi)
         atomic_dec(&vxi->sched.tokens);
  }
  
+#endif /* CONFIG_VSERVER_ACB_SCHED */
+
  static inline int vx_need_resched(struct task_struct *p)
  {
  #ifdef CONFIG_VSERVER_HARDCPU
diff --git a/include/linux/vserver/context.h b/include/linux/vserver/context.h

index 79f9053..48ed9ca 100644 (file)
--- a/include/linux/vserver/context.h
+++ b/include/linux/vserver/context.h
@@ -24,7 +24,8 @@
  #define VXF_SCHED_HARD         0x00000100
  #define VXF_SCHED_PRIO         0x00000200
  #define VXF_SCHED_PAUSE                0x00000400
-
+#define VXF_SCHED_SHARE         0x00000800
+ 
  #define VXF_VIRT_MEM           0x00010000
  #define VXF_VIRT_UPTIME                0x00020000
  #define VXF_VIRT_CPU           0x00040000
diff --git a/include/linux/vserver/sched_def.h b/include/linux/vserver/sched_def.h

index 3a57761..4d915a2 100644 (file)
--- a/include/linux/vserver/sched_def.h
+++ b/include/linux/vserver/sched_def.h
@@ -15,9 +15,26 @@ struct _vx_ticks {
         uint64_t unused[5];             /* cacheline ? */
  };
  
+#ifdef CONFIG_VSERVER_ACB_SCHED
+enum {
+// Different scheduling classes
+    SCH_GUARANTEE = 0,
+    SCH_BEST_EFFORT = 1,
+    SCH_NUM_CLASSES = 2,
+// States
+    SCH_UNINITIALIZED,
+    SCH_INITIALIZED,
+};
+#endif
+
  /* context sub struct */
  
  struct _vx_sched {
+#ifdef CONFIG_VSERVER_ACB_SCHED
+        uint64_t ticks[SCH_NUM_CLASSES];
+        uint64_t last_ticks[SCH_NUM_CLASSES];
+        int      state[SCH_NUM_CLASSES];
+#endif
         atomic_t tokens;                /* number of CPU tokens */
         spinlock_t tokens_lock;         /* lock for token bucket */
  
diff --git a/kernel/sched.c b/kernel/sched.c

index daf0319..4e10dc6 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2453,6 +2453,10 @@ void scheduler_tick(void)
  
         rq->timestamp_last_tick = now;
  
+#if defined(CONFIG_VSERVER_HARDCPU) && defined(CONFIG_VSERVER_ACB_SCHED) 
+       vx_scheduler_tick();
+#endif
+
         if (p == rq->idle) {
                 if (wake_priority_sleeper(rq))
                         goto out;
@@ -2711,6 +2715,10 @@ asmlinkage void __sched schedule(void)
         struct vx_info *vxi;
  #ifdef CONFIG_VSERVER_HARDCPU
         int maxidle = -HZ;
+# ifdef CONFIG_VSERVER_ACB_SCHED
+        int min_guarantee_ticks = VX_INVALID_TICKS;
+        int min_best_effort_ticks = VX_INVALID_TICKS;
+# endif
  #endif
         int cpu, idx;
  
@@ -2781,6 +2789,9 @@ need_resched_nonpreemptible:
         }
  
  #ifdef CONFIG_VSERVER_HARDCPU
+# ifdef CONFIG_VSERVER_ACB_SCHED
+drain_hold_queue:
+# endif        
         if (!list_empty(&rq->hold_queue)) {
                 struct list_head *l, *n;
                 int ret;
@@ -2800,6 +2811,17 @@ need_resched_nonpreemptible:
                         }
                         if ((ret < 0) && (maxidle < ret))
                                 maxidle = ret;
+# ifdef CONFIG_VSERVER_ACB_SCHED
+                       if (ret < 0) {
+                               if (IS_BEST_EFFORT(vxi)) {
+                                       if (min_best_effort_ticks < ret) 
+                                               min_best_effort_ticks = ret;
+                               } else {
+                                       if (min_guarantee_ticks < ret)
+                                               min_guarantee_ticks = ret;
+                               }
+                       }
+# endif
                 }
         }
         rq->idle_tokens = -maxidle;
@@ -2860,8 +2882,19 @@ go_idle:
                 int ret = vx_tokens_recalc(vxi);
  
                 if (unlikely(ret <= 0)) {
-                       if (ret && (rq->idle_tokens > -ret))
-                               rq->idle_tokens = -ret;
+                       if (ret) {
+                               if ((rq->idle_tokens > -ret))
+                                       rq->idle_tokens = -ret;
+# ifdef CONFIG_VSERVER_ACB_SCHED
+                               if (IS_BEST_EFFORT(vxi)) {
+                                       if (min_best_effort_ticks < ret) 
+                                               min_best_effort_ticks = ret;
+                               } else {
+                                       if (min_guarantee_ticks < ret)
+                                               min_guarantee_ticks = ret;
+                               }
+# endif
+                       }
                         vx_hold_task(vxi, next, rq);
                         goto pick_next;
                 }
@@ -2885,6 +2918,18 @@ go_idle:
         }
         next->activated = 0;
  switch_tasks:
+#if defined(CONFIG_VSERVER_HARDCPU) && defined(CONFIG_VSERVER_ACB_SCHED)
+       if (next == rq->idle && !list_empty(&rq->hold_queue)) {
+               if (min_best_effort_ticks != VX_INVALID_TICKS) {
+                       vx_advance_best_effort_ticks(-min_best_effort_ticks);
+                       goto drain_hold_queue;
+               } 
+               if (min_guarantee_ticks != VX_INVALID_TICKS) {
+                       vx_advance_guaranteed_ticks(-min_guarantee_ticks);
+                       goto drain_hold_queue;
+               }
+       }
+#endif
         if (next == rq->idle)
                 schedstat_inc(rq, sched_goidle);
         prefetch(next);
diff --git a/kernel/vserver/Kconfig b/kernel/vserver/Kconfig

index f820729..b4ee9b6 100644 (file)
--- a/kernel/vserver/Kconfig
+++ b/kernel/vserver/Kconfig
@@ -101,6 +101,13 @@ config     VSERVER_HARDCPU_IDLE
           This might improve interactivity and latency, but
           will also marginally increase scheduling overhead.
  
+config VSERVER_ACB_SCHED
+       bool    "Guaranteed/fair share scheduler"
+       depends on VSERVER_HARDCPU
+       default n
+       help
+         Andy Bavier's experimental scheduler
+
  choice
         prompt  "Persistent Inode Context Tagging"
         default INOXID_UGID24
diff --git a/kernel/vserver/sched.c b/kernel/vserver/sched.c

index 60f3c6a..e978e7a 100644 (file)
--- a/kernel/vserver/sched.c
+++ b/kernel/vserver/sched.c
@@ -19,6 +19,120 @@
  #include <asm/errno.h>
  #include <asm/uaccess.h>
  
+#ifdef CONFIG_VSERVER_ACB_SCHED
+
+#define TICK_SCALE 1000
+#define TICKS_PER_TOKEN(vxi) \
+        ((vxi->sched.interval * TICK_SCALE) / vxi->sched.fill_rate)
+#define CLASS(vxi) \
+    (IS_BEST_EFFORT(vxi) ? SCH_BEST_EFFORT : SCH_GUARANTEE)
+#define GLOBAL_TICKS(vxi) \
+    (IS_BEST_EFFORT(vxi) ? vx_best_effort_ticks : vx_guaranteed_ticks)
+
+uint64_t vx_guaranteed_ticks = 0;
+uint64_t vx_best_effort_ticks = 0;
+
+void vx_tokens_set(struct vx_info *vxi, int tokens) {
+    int class = CLASS(vxi);
+
+    vxi->sched.ticks[class] = GLOBAL_TICKS(vxi);
+    vxi->sched.ticks[class] -= tokens * TICKS_PER_TOKEN(vxi);
+}
+
+void vx_scheduler_tick(void) {
+    vx_guaranteed_ticks += TICK_SCALE;
+    vx_best_effort_ticks += TICK_SCALE;
+}
+
+void vx_advance_best_effort_ticks(int ticks) {
+    vx_best_effort_ticks += TICK_SCALE * ticks;
+}
+
+void vx_advance_guaranteed_ticks(int ticks) {
+    vx_guaranteed_ticks += TICK_SCALE * ticks;
+}
+
+int vx_tokens_avail(struct vx_info *vxi)
+{
+    uint64_t diff;
+    int tokens;
+    long rem;
+    int class = CLASS(vxi);
+
+    if (vxi->sched.state[class] == SCH_UNINITIALIZED) {
+       /* Set the "real" token count */
+       tokens = atomic_read(&vxi->sched.tokens);
+       vx_tokens_set(vxi, tokens);
+       vxi->sched.state[class] = SCH_INITIALIZED;
+       goto out;
+    } 
+
+    if (vxi->sched.last_ticks[class] == GLOBAL_TICKS(vxi)) {
+       tokens = atomic_read(&vxi->sched.tokens);
+       goto out;
+    }
+
+    /* Use of fixed-point arithmetic in these calculations leads to
+     * some limitations.  These should be made explicit.
+     */
+    diff = GLOBAL_TICKS(vxi) - vxi->sched.ticks[class];
+    tokens = div_long_long_rem(diff, TICKS_PER_TOKEN(vxi), &rem);
+
+    if (tokens > vxi->sched.tokens_max) {
+       vx_tokens_set(vxi, vxi->sched.tokens_max);
+       tokens = vxi->sched.tokens_max;
+    }
+
+    atomic_set(&vxi->sched.tokens, tokens);
+
+out:
+    vxi->sched.last_ticks[class] = GLOBAL_TICKS(vxi);
+    return tokens;
+}
+
+void vx_consume_token(struct vx_info *vxi)
+{
+    int class = CLASS(vxi);
+
+    vxi->sched.ticks[class] += TICKS_PER_TOKEN(vxi);
+}
+
+/*
+ * recalculate the context's scheduling tokens
+ *
+ * ret > 0 : number of tokens available
+ * ret = 0 : context is paused
+ * ret < 0 : number of jiffies until new tokens arrive
+ *
+ */
+int vx_tokens_recalc(struct vx_info *vxi)
+{
+        long delta, tokens;
+
+       if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
+               /* we are paused */
+               return 0;
+
+       tokens = vx_tokens_avail(vxi);
+       if (tokens <= 0)
+           vxi->vx_state |= VXS_ONHOLD;
+       if (tokens < vxi->sched.tokens_min) {
+           delta = tokens - vxi->sched.tokens_min;
+           /* enough tokens will be available in */
+           return (delta * vxi->sched.interval) / vxi->sched.fill_rate;
+       }
+
+       /* we have some tokens left */
+       if (vx_info_state(vxi, VXS_ONHOLD) &&
+               (tokens >= vxi->sched.tokens_min))
+               vxi->vx_state &= ~VXS_ONHOLD;
+       if (vx_info_state(vxi, VXS_ONHOLD))
+               tokens -= vxi->sched.tokens_min;
+
+       return tokens;
+}
+
+#else
  
  /*
   * recalculate the context's scheduling tokens
@@ -81,6 +195,8 @@ int vx_tokens_recalc(struct vx_info *vxi)
         return tokens;
  }
  
+#endif /* CONFIG_VSERVER_ACB_SCHED */
+
  /*
   * effective_prio - return the priority that is based on the static
   * priority but is modified by bonuses/penalties.
@@ -159,6 +275,10 @@ int vc_set_sched_v2(uint32_t xid, void __user *data)
         if (vxi->sched.tokens_min > vxi->sched.tokens_max)
                 vxi->sched.tokens_min = vxi->sched.tokens_max;
  
+#ifdef CONFIG_VSERVER_ACB_SCHED
+       vx_tokens_set(vxi, atomic_read(&vxi->sched.tokens));
+#endif
+
         spin_unlock(&vxi->sched.tokens_lock);
         put_vx_info(vxi);
         return 0;
@@ -211,6 +331,10 @@ int vc_set_sched(uint32_t xid, void __user *data)
         if (vxi->sched.priority_bias < MIN_PRIO_BIAS)
                 vxi->sched.priority_bias = MIN_PRIO_BIAS;
  
+#ifdef CONFIG_VSERVER_ACB_SCHED
+       vx_tokens_set(vxi, atomic_read(&vxi->sched.tokens));
+#endif
+
         spin_unlock(&vxi->sched.tokens_lock);
         put_vx_info(vxi);
         return 0;
diff --git a/kernel/vserver/sched_init.h b/kernel/vserver/sched_init.h

index 90d1396..6724c28 100644 (file)
--- a/kernel/vserver/sched_init.h
+++ b/kernel/vserver/sched_init.h
@@ -11,6 +11,14 @@ static inline void vx_info_init_sched(struct _vx_sched *sched)
         sched->jiffies          = jiffies;
         sched->tokens_lock      = SPIN_LOCK_UNLOCKED;
  
+#ifdef CONFIG_VSERVER_ACB_SCHED
+       /* We can't set the "real" token count here because we don't have
+        * access to the vx_info struct.  Do it later... */
+       for (i = 0; i < SCH_NUM_CLASSES; i++) {
+           sched->state[i] = SCH_UNINITIALIZED;
+       }
+#endif
+
         atomic_set(&sched->tokens, HZ >> 2);
         sched->cpus_allowed     = CPU_MASK_ALL;
         sched->priority_bias    = 0;
author	Andy Bavier <acb@cs.princeton.edu>
	Thu, 8 Sep 2005 03:10:24 +0000 (03:10 +0000)
committer	Andy Bavier <acb@cs.princeton.edu>
	Thu, 8 Sep 2005 03:10:24 +0000 (03:10 +0000)
configs/kernel-2.6.12-i686-planetlab.config		patch \| blob \| history
include/linux/vs_sched.h		patch \| blob \| history
include/linux/vserver/context.h		patch \| blob \| history
include/linux/vserver/sched_def.h		patch \| blob \| history
kernel/sched.c		patch \| blob \| history
kernel/vserver/Kconfig		patch \| blob \| history
kernel/vserver/sched.c		patch \| blob \| history
kernel/vserver/sched_init.h		patch \| blob \| history