kernel/vserver/sched.c

   1 /*
   2  *  linux/kernel/vserver/sched.c
   3  *
   4  *  Virtual Server: Scheduler Support
   5  *
   6  *  Copyright (C) 2004-2005  Herbert Pötzl
   7  *
   8  *  V0.01  adapted Sam Vilains version to 2.6.3
   9  *  V0.02  removed legacy interface
  10  *
  11  */
  12
  13 #include <linux/config.h>
  14 #include <linux/sched.h>
  15 #include <linux/vs_context.h>
  16 #include <linux/vs_sched.h>
  17 #include <linux/vserver/sched_cmd.h>
  18
  19 #include <asm/errno.h>
  20 #include <asm/uaccess.h>
  21
  22 #ifdef CONFIG_VSERVER_ACB_SCHED
  23
  24 #define TICK_SCALE 1000
  25 #define TICKS_PER_TOKEN(vxi) \
  26         ((vxi->sched.interval * TICK_SCALE) / vxi->sched.fill_rate)
  27 #define CLASS(vxi) \
  28     (IS_BEST_EFFORT(vxi) ? SCH_BEST_EFFORT : SCH_GUARANTEE)
  29 #define GLOBAL_TICKS(vxi) \
  30     (IS_BEST_EFFORT(vxi) ? vx_best_effort_ticks : vx_guaranteed_ticks)
  31
  32 uint64_t vx_guaranteed_ticks = 0;
  33 uint64_t vx_best_effort_ticks = 0;
  34
  35 void vx_tokens_set(struct vx_info *vxi, int tokens) {
  36     int class = CLASS(vxi);
  37
  38     vxi->sched.ticks[class] = GLOBAL_TICKS(vxi);
  39     vxi->sched.ticks[class] -=  tokens * TICKS_PER_TOKEN(vxi);
  40 }
  41
  42 void vx_scheduler_tick(void) {
  43     vx_guaranteed_ticks += TICK_SCALE;
  44     vx_best_effort_ticks += TICK_SCALE;
  45 }
  46
  47 void vx_advance_best_effort_ticks(int ticks) {
  48     vx_best_effort_ticks += TICK_SCALE * ticks;
  49 }
  50
  51 void vx_advance_guaranteed_ticks(int ticks) {
  52     vx_guaranteed_ticks += TICK_SCALE * ticks;
  53 }
  54
  55 int vx_tokens_avail(struct vx_info *vxi)
  56 {
  57     uint64_t diff;
  58     int tokens;
  59     long rem;
  60     int class = CLASS(vxi);
  61
  62     if (vxi->sched.state[class] == SCH_UNINITIALIZED) {
  63         /* Set the "real" token count */
  64         tokens = atomic_read(&vxi->sched.tokens);
  65         vx_tokens_set(vxi, tokens);
  66         vxi->sched.state[class] = SCH_INITIALIZED;
  67         goto out;
  68     }
  69
  70     if (vxi->sched.last_ticks[class] == GLOBAL_TICKS(vxi)) {
  71         tokens = atomic_read(&vxi->sched.tokens);
  72         goto out;
  73     }
  74
  75     /* Use of fixed-point arithmetic in these calculations leads to
  76      * some limitations.  These should be made explicit.
  77      */
  78     diff = GLOBAL_TICKS(vxi) - vxi->sched.ticks[class];
  79     tokens = div_long_long_rem(diff, TICKS_PER_TOKEN(vxi), &rem);
  80
  81     if (tokens > vxi->sched.tokens_max) {
  82         vx_tokens_set(vxi, vxi->sched.tokens_max);
  83         tokens = vxi->sched.tokens_max;
  84     }
  85
  86     atomic_set(&vxi->sched.tokens, tokens);
  87
  88 out:
  89     vxi->sched.last_ticks[class] = GLOBAL_TICKS(vxi);
  90     return tokens;
  91 }
  92
  93 void vx_consume_token(struct vx_info *vxi)
  94 {
  95     int class = CLASS(vxi);
  96
  97     vxi->sched.ticks[class] += TICKS_PER_TOKEN(vxi);
  98 }
  99
 100 /*
 101  * recalculate the context's scheduling tokens
 102  *
 103  * ret > 0 : number of tokens available
 104  * ret = 0 : context is paused
 105  * ret < 0 : number of jiffies until new tokens arrive
 106  *
 107  */
 108 int vx_tokens_recalc(struct vx_info *vxi)
 109 {
 110         long delta, tokens;
 111
 112         if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
 113                 /* we are paused */
 114                 return 0;
 115
 116         tokens = vx_tokens_avail(vxi);
 117         if (tokens <= 0)
 118             vxi->vx_state |= VXS_ONHOLD;
 119         if (tokens < vxi->sched.tokens_min) {
 120             delta = tokens - vxi->sched.tokens_min;
 121             /* enough tokens will be available in */
 122             return (delta * vxi->sched.interval) / vxi->sched.fill_rate;
 123         }
 124
 125         /* we have some tokens left */
 126         if (vx_info_state(vxi, VXS_ONHOLD) &&
 127                 (tokens >= vxi->sched.tokens_min))
 128                 vxi->vx_state &= ~VXS_ONHOLD;
 129         if (vx_info_state(vxi, VXS_ONHOLD))
 130                 tokens -= vxi->sched.tokens_min;
 131
 132         return tokens;
 133 }
 134
 135 #else
 136
 137 /*
 138  * recalculate the context's scheduling tokens
 139  *
 140  * ret > 0 : number of tokens available
 141  * ret = 0 : context is paused
 142  * ret < 0 : number of jiffies until new tokens arrive
 143  *
 144  */
 145 int vx_tokens_recalc(struct vx_info *vxi)
 146 {
 147         long delta, tokens = 0;
 148
 149         if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
 150                 /* we are paused */
 151                 return 0;
 152
 153         delta = jiffies - vxi->sched.jiffies;
 154
 155         if (delta >= vxi->sched.interval) {
 156                 /* lockdown scheduler info */
 157                 spin_lock(&vxi->sched.tokens_lock);
 158
 159                 /* calc integral token part */
 160                 delta = jiffies - vxi->sched.jiffies;
 161                 tokens = delta / vxi->sched.interval;
 162                 delta = tokens * vxi->sched.interval;
 163                 tokens *= vxi->sched.fill_rate;
 164
 165                 atomic_add(tokens, &vxi->sched.tokens);
 166                 vxi->sched.jiffies += delta;
 167                 tokens = atomic_read(&vxi->sched.tokens);
 168
 169                 if (tokens > vxi->sched.tokens_max) {
 170                         tokens = vxi->sched.tokens_max;
 171                         atomic_set(&vxi->sched.tokens, tokens);
 172                 }
 173                 spin_unlock(&vxi->sched.tokens_lock);
 174         } else {
 175                 /* no new tokens */
 176                 tokens = vx_tokens_avail(vxi);
 177                 if (tokens <= 0)
 178                         vxi->vx_state |= VXS_ONHOLD;
 179                 if (tokens < vxi->sched.tokens_min) {
 180                         /* enough tokens will be available in */
 181                         if (vxi->sched.tokens_min == 0)
 182                                 return delta - vxi->sched.interval;
 183                         return delta - vxi->sched.interval *
 184                                 vxi->sched.tokens_min / vxi->sched.fill_rate;
 185                 }
 186         }
 187
 188         /* we have some tokens left */
 189         if (vx_info_state(vxi, VXS_ONHOLD) &&
 190                 (tokens >= vxi->sched.tokens_min))
 191                 vxi->vx_state &= ~VXS_ONHOLD;
 192         if (vx_info_state(vxi, VXS_ONHOLD))
 193                 tokens -= vxi->sched.tokens_min;
 194
 195         return tokens;
 196 }
 197
 198 #endif /* CONFIG_VSERVER_ACB_SCHED */
 199
 200 /*
 201  * effective_prio - return the priority that is based on the static
 202  * priority but is modified by bonuses/penalties.
 203  *
 204  * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
 205  * into a -4 ... 0 ... +4 bonus/penalty range.
 206  *
 207  * Additionally, we scale another amount based on the number of
 208  * CPU tokens currently held by the context, if the process is
 209  * part of a context (and the appropriate SCHED flag is set).
 210  * This ranges from -5 ... 0 ... +15, quadratically.
 211  *
 212  * So, the total bonus is -9 .. 0 .. +19
 213  * We use ~50% of the full 0...39 priority range so that:
 214  *
 215  * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
 216  * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
 217  *    unless that context is far exceeding its CPU allocation.
 218  *
 219  * Both properties are important to certain workloads.
 220  */
 221 int vx_effective_vavavoom(struct vx_info *vxi, int max_prio)
 222 {
 223         int vavavoom, max;
 224
 225         /* lots of tokens = lots of vavavoom
 226          *      no tokens = no vavavoom      */
 227         if ((vavavoom = atomic_read(&vxi->sched.tokens)) >= 0) {
 228                 max = vxi->sched.tokens_max;
 229                 vavavoom = max - vavavoom;
 230                 max = max * max;
 231                 vavavoom = max_prio * VAVAVOOM_RATIO / 100
 232                         * (vavavoom*vavavoom - (max >> 2)) / max;
 233         } else
 234                 vavavoom = 0;
 235
 236         vxi->sched.vavavoom = vavavoom;
 237         return vavavoom;
 238 }
 239
 240
 241 int vc_set_sched_v2(uint32_t xid, void __user *data)
 242 {
 243         struct vcmd_set_sched_v2 vc_data;
 244         struct vx_info *vxi;
 245
 246         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
 247                 return -EFAULT;
 248
 249         vxi = locate_vx_info(xid);
 250         if (!vxi)
 251                 return -EINVAL;
 252
 253         spin_lock(&vxi->sched.tokens_lock);
 254
 255         if (vc_data.interval != SCHED_KEEP)
 256                 vxi->sched.interval = vc_data.interval;
 257         if (vc_data.fill_rate != SCHED_KEEP)
 258                 vxi->sched.fill_rate = vc_data.fill_rate;
 259         if (vc_data.tokens_min != SCHED_KEEP)
 260                 vxi->sched.tokens_min = vc_data.tokens_min;
 261         if (vc_data.tokens_max != SCHED_KEEP)
 262                 vxi->sched.tokens_max = vc_data.tokens_max;
 263         if (vc_data.tokens != SCHED_KEEP)
 264                 atomic_set(&vxi->sched.tokens, vc_data.tokens);
 265
 266         /* Sanity check the resultant values */
 267         if (vxi->sched.fill_rate <= 0)
 268                 vxi->sched.fill_rate = 1;
 269         if (vxi->sched.interval <= 0)
 270                 vxi->sched.interval = HZ;
 271         if (vxi->sched.tokens_max == 0)
 272                 vxi->sched.tokens_max = 1;
 273         if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
 274                 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
 275         if (vxi->sched.tokens_min > vxi->sched.tokens_max)
 276                 vxi->sched.tokens_min = vxi->sched.tokens_max;
 277
 278 #ifdef CONFIG_VSERVER_ACB_SCHED
 279         vx_tokens_set(vxi, atomic_read(&vxi->sched.tokens));
 280 #endif
 281
 282         spin_unlock(&vxi->sched.tokens_lock);
 283         put_vx_info(vxi);
 284         return 0;
 285 }
 286
 287
 288 int vc_set_sched(uint32_t xid, void __user *data)
 289 {
 290         struct vcmd_set_sched_v3 vc_data;
 291         struct vx_info *vxi;
 292         unsigned int set_mask;
 293
 294         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
 295                 return -EFAULT;
 296
 297         vxi = locate_vx_info(xid);
 298         if (!vxi)
 299                 return -EINVAL;
 300
 301         set_mask = vc_data.set_mask;
 302
 303         spin_lock(&vxi->sched.tokens_lock);
 304
 305         if (set_mask & VXSM_FILL_RATE)
 306                 vxi->sched.fill_rate = vc_data.fill_rate;
 307         if (set_mask & VXSM_INTERVAL)
 308                 vxi->sched.interval = vc_data.interval;
 309         if (set_mask & VXSM_TOKENS)
 310                 atomic_set(&vxi->sched.tokens, vc_data.tokens);
 311         if (set_mask & VXSM_TOKENS_MIN)
 312                 vxi->sched.tokens_min = vc_data.tokens_min;
 313         if (set_mask & VXSM_TOKENS_MAX)
 314                 vxi->sched.tokens_max = vc_data.tokens_max;
 315         if (set_mask & VXSM_PRIO_BIAS)
 316                 vxi->sched.priority_bias = vc_data.priority_bias;
 317
 318         /* Sanity check the resultant values */
 319         if (vxi->sched.fill_rate <= 0)
 320                 vxi->sched.fill_rate = 1;
 321         if (vxi->sched.interval <= 0)
 322                 vxi->sched.interval = HZ;
 323         if (vxi->sched.tokens_max == 0)
 324                 vxi->sched.tokens_max = 1;
 325         if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
 326                 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
 327         if (vxi->sched.tokens_min > vxi->sched.tokens_max)
 328                 vxi->sched.tokens_min = vxi->sched.tokens_max;
 329         if (vxi->sched.priority_bias > MAX_PRIO_BIAS)
 330                 vxi->sched.priority_bias = MAX_PRIO_BIAS;
 331         if (vxi->sched.priority_bias < MIN_PRIO_BIAS)
 332                 vxi->sched.priority_bias = MIN_PRIO_BIAS;
 333
 334 #ifdef CONFIG_VSERVER_ACB_SCHED
 335         vx_tokens_set(vxi, atomic_read(&vxi->sched.tokens));
 336 #endif
 337
 338         spin_unlock(&vxi->sched.tokens_lock);
 339         put_vx_info(vxi);
 340         return 0;
 341 }
 342