backported vs2.1.x fix to irq handling, which caused incorrect scheduler behavior
[linux-2.6.git] / kernel / vserver / sched.c
index a75195a..b76a72c 100644 (file)
  *
  *  Virtual Server: Scheduler Support
  *
- *  Copyright (C) 2004  Herbert Pötzl
+ *  Copyright (C) 2004-2005  Herbert Pötzl
  *
  *  V0.01  adapted Sam Vilains version to 2.6.3
  *  V0.02  removed legacy interface
  *
  */
 
-#include <linux/config.h>
 #include <linux/sched.h>
-#include <linux/vinline.h>
-#include <linux/vserver/context.h>
-#include <linux/vserver/sched.h>
+#include <linux/vs_base.h>
+#include <linux/vs_context.h>
+#include <linux/vs_sched.h>
+#include <linux/vserver/sched_cmd.h>
 
 #include <asm/errno.h>
 #include <asm/uaccess.h>
 
+#ifdef CONFIG_VSERVER_ACB_SCHED
+
+#define TICK_SCALE 1000
+#define TICKS_PER_TOKEN(vxi) \
+        ((vxi->sched.interval * TICK_SCALE) / vxi->sched.fill_rate)
+#define CLASS(vxi) \
+    (IS_BEST_EFFORT(vxi) ? SCH_BEST_EFFORT : SCH_GUARANTEE)
+#define GLOBAL_TICKS(vxi) \
+    (IS_BEST_EFFORT(vxi) ? vx_best_effort_ticks : vx_guaranteed_ticks)
+
+uint64_t vx_guaranteed_ticks = 0;
+uint64_t vx_best_effort_ticks = 0;
+
+void vx_tokens_set(struct vx_info *vxi, int tokens) {
+    int class = CLASS(vxi);
+    uint64_t tmp;
+    tmp = GLOBAL_TICKS(vxi);
+    tmp -= tokens * TICKS_PER_TOKEN(vxi);
+
+    vxi->sched.ticks[class] = tmp;
+}
+
+void vx_scheduler_tick(void) {
+    vx_guaranteed_ticks += TICK_SCALE;
+    vx_best_effort_ticks += TICK_SCALE;
+}
+
+void vx_advance_best_effort_ticks(int ticks) {
+    vx_best_effort_ticks += TICK_SCALE * ticks;
+}
+
+void vx_advance_guaranteed_ticks(int ticks) {
+    vx_guaranteed_ticks += TICK_SCALE * ticks;
+}
+
+int vx_tokens_avail(struct vx_info *vxi)
+{
+    uint64_t diff, max_ticks;
+    int tokens;
+    long tpt, rem;
+    int class = CLASS(vxi);
+
+    if (vxi->sched.state[class] == SCH_UNINITIALIZED) {
+       /* Set the "real" token count */
+       tokens = atomic_read(&vxi->sched.tokens);
+       vx_tokens_set(vxi, tokens);
+       vxi->sched.state[class] = SCH_INITIALIZED;
+       goto out;
+    } 
+
+    if (vxi->sched.last_ticks[class] == GLOBAL_TICKS(vxi)) {
+       tokens = atomic_read(&vxi->sched.tokens);
+       goto out;
+    }
+
+    /* Use of fixed-point arithmetic in these calculations leads to
+     * some limitations.  These should be made explicit.
+     */
+    max_ticks = (tpt = TICKS_PER_TOKEN(vxi));
+    max_ticks *= vxi->sched.tokens_max;
+    diff = GLOBAL_TICKS(vxi) - vxi->sched.ticks[class];
+
+    /* Avoid an overflow from div_long_long_rem */
+    if (diff >= max_ticks) {
+       vx_tokens_set(vxi, vxi->sched.tokens_max);
+       tokens = vxi->sched.tokens_max;
+    } else {
+           /* Divide ticks by ticks per token to get tokens */
+           tokens = div_long_long_rem(diff, tpt, &rem);
+    }
+
+    atomic_set(&vxi->sched.tokens, tokens);
+
+out:
+    vxi->sched.last_ticks[class] = GLOBAL_TICKS(vxi);
+    return tokens;
+}
+
+void vx_consume_token(struct vx_info *vxi)
+{
+    int class = CLASS(vxi);
+
+    vxi->sched.ticks[class] += TICKS_PER_TOKEN(vxi);
+    atomic_dec(&vxi->sched.tokens);
+}
+
+/*
+ * recalculate the context's scheduling tokens
+ *
+ * ret > 0 : number of tokens available
+ * ret = 0 : context is paused
+ * ret < 0 : number of jiffies until new tokens arrive
+ *
+ */
+int vx_tokens_recalc(struct vx_info *vxi)
+{
+        long delta, tokens;
+
+       if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
+               /* we are paused */
+               return 0;
+
+       tokens = vx_tokens_avail(vxi);
+       if (tokens <= 0)
+           vxi->vx_state |= VXS_ONHOLD;
+       if (tokens < vxi->sched.tokens_min) {
+           delta = tokens - vxi->sched.tokens_min;
+           /* enough tokens will be available in */
+           return (delta * vxi->sched.interval) / vxi->sched.fill_rate;
+       }
+
+       /* we have some tokens left */
+       if (vx_info_state(vxi, VXS_ONHOLD) &&
+               (tokens >= vxi->sched.tokens_min))
+               vxi->vx_state &= ~VXS_ONHOLD;
+       if (vx_info_state(vxi, VXS_ONHOLD))
+               tokens -= vxi->sched.tokens_min;
+
+       return tokens;
+}
+
+#else
 
 /*
  * recalculate the context's scheduling tokens
@@ -32,7 +155,7 @@ int vx_tokens_recalc(struct vx_info *vxi)
 {
        long delta, tokens = 0;
 
-       if (__vx_flags(vxi->vx_flags, VXF_SCHED_PAUSE, 0))
+       if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
                /* we are paused */
                return 0;
 
@@ -51,7 +174,7 @@ int vx_tokens_recalc(struct vx_info *vxi)
                atomic_add(tokens, &vxi->sched.tokens);
                vxi->sched.jiffies += delta;
                tokens = atomic_read(&vxi->sched.tokens);
-       
+
                if (tokens > vxi->sched.tokens_max) {
                        tokens = vxi->sched.tokens_max;
                        atomic_set(&vxi->sched.tokens, tokens);
@@ -59,7 +182,10 @@ int vx_tokens_recalc(struct vx_info *vxi)
                spin_unlock(&vxi->sched.tokens_lock);
        } else {
                /* no new tokens */
-               if ((tokens = vx_tokens_avail(vxi)) < vxi->sched.tokens_min) {
+               tokens = vx_tokens_avail(vxi);
+               if (tokens <= 0)
+                       vxi->vx_state |= VXS_ONHOLD;
+               if (tokens < vxi->sched.tokens_min) {
                        /* enough tokens will be available in */
                        if (vxi->sched.tokens_min == 0)
                                return delta - vxi->sched.interval;
@@ -67,10 +193,19 @@ int vx_tokens_recalc(struct vx_info *vxi)
                                vxi->sched.tokens_min / vxi->sched.fill_rate;
                }
        }
+
        /* we have some tokens left */
+       if (vx_info_state(vxi, VXS_ONHOLD) &&
+               (tokens >= vxi->sched.tokens_min))
+               vxi->vx_state &= ~VXS_ONHOLD;
+       if (vx_info_state(vxi, VXS_ONHOLD))
+               tokens -= vxi->sched.tokens_min;
+
        return tokens;
 }
 
+#endif /* CONFIG_VSERVER_ACB_SCHED */
+
 /*
  * effective_prio - return the priority that is based on the static
  * priority but is modified by bonuses/penalties.
@@ -92,9 +227,8 @@ int vx_tokens_recalc(struct vx_info *vxi)
  *
  * Both properties are important to certain workloads.
  */
-int effective_vavavoom(task_t *p, int max_prio)
+int vx_effective_vavavoom(struct vx_info *vxi, int max_prio)
 {
-       struct vx_info *vxi = p->vx_info;
        int vavavoom, max;
 
        /* lots of tokens = lots of vavavoom
@@ -105,30 +239,25 @@ int effective_vavavoom(task_t *p, int max_prio)
                max = max * max;
                vavavoom = max_prio * VAVAVOOM_RATIO / 100
                        * (vavavoom*vavavoom - (max >> 2)) / max;
-               /*  alternative, geometric mapping
-               vavavoom = -( MAX_USER_PRIO*VAVAVOOM_RATIO/100 * vavavoom
-                       / vxi->sched.tokens_max -
-                       MAX_USER_PRIO*VAVAVOOM_RATIO/100/2); */
        } else
                vavavoom = 0;
-       /* vavavoom = ( MAX_USER_PRIO*VAVAVOOM_RATIO/100*tokens_left(p) -
-               MAX_USER_PRIO*VAVAVOOM_RATIO/100/2); */
 
-       return vavavoom;
+       vxi->sched.vavavoom = vavavoom;
+       return vavavoom + vxi->sched.priority_bias;
 }
 
 
-int vc_set_sched(uint32_t xid, void __user *data)
+int vc_set_sched_v2(uint32_t xid, void __user *data)
 {
        struct vcmd_set_sched_v2 vc_data;
        struct vx_info *vxi;
 
        if (copy_from_user (&vc_data, data, sizeof(vc_data)))
                return -EFAULT;
-       
-       vxi = find_vx_info(xid);
+
+       vxi = lookup_vx_info(xid);
        if (!vxi)
-               return -EINVAL;
+               return -ESRCH;
 
        spin_lock(&vxi->sched.tokens_lock);
 
@@ -155,6 +284,66 @@ int vc_set_sched(uint32_t xid, void __user *data)
        if (vxi->sched.tokens_min > vxi->sched.tokens_max)
                vxi->sched.tokens_min = vxi->sched.tokens_max;
 
+#ifdef CONFIG_VSERVER_ACB_SCHED
+       vx_tokens_set(vxi, atomic_read(&vxi->sched.tokens));
+#endif
+
+       spin_unlock(&vxi->sched.tokens_lock);
+       put_vx_info(vxi);
+       return 0;
+}
+
+
+int vc_set_sched(uint32_t xid, void __user *data)
+{
+       struct vcmd_set_sched_v3 vc_data;
+       struct vx_info *vxi;
+       unsigned int set_mask;
+
+       if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
+
+       vxi = lookup_vx_info(xid);
+       if (!vxi)
+               return -ESRCH;
+
+       set_mask = vc_data.set_mask;
+
+       spin_lock(&vxi->sched.tokens_lock);
+
+       if (set_mask & VXSM_FILL_RATE)
+               vxi->sched.fill_rate = vc_data.fill_rate;
+       if (set_mask & VXSM_INTERVAL)
+               vxi->sched.interval = vc_data.interval;
+       if (set_mask & VXSM_TOKENS)
+               atomic_set(&vxi->sched.tokens, vc_data.tokens);
+       if (set_mask & VXSM_TOKENS_MIN)
+               vxi->sched.tokens_min = vc_data.tokens_min;
+       if (set_mask & VXSM_TOKENS_MAX)
+               vxi->sched.tokens_max = vc_data.tokens_max;
+       if (set_mask & VXSM_PRIO_BIAS)
+               vxi->sched.priority_bias = vc_data.priority_bias;
+
+       /* Sanity check the resultant values */
+       if (vxi->sched.fill_rate <= 0)
+               vxi->sched.fill_rate = 1;
+       if (vxi->sched.interval <= 0)
+               vxi->sched.interval = HZ;
+       if (vxi->sched.tokens_max == 0)
+               vxi->sched.tokens_max = 1;
+       if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
+               atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
+       if (vxi->sched.tokens_min > vxi->sched.tokens_max)
+               vxi->sched.tokens_min = vxi->sched.tokens_max;
+       if (vxi->sched.priority_bias > MAX_PRIO_BIAS)
+               vxi->sched.priority_bias = MAX_PRIO_BIAS;
+       if (vxi->sched.priority_bias < MIN_PRIO_BIAS)
+               vxi->sched.priority_bias = MIN_PRIO_BIAS;
+
+#ifdef CONFIG_VSERVER_ACB_SCHED
+       vx_tokens_set(vxi, atomic_read(&vxi->sched.tokens));
+#endif
+
        spin_unlock(&vxi->sched.tokens_lock);
        put_vx_info(vxi);
        return 0;