kernel/vserver/sched.c

   1 /*
   2  *  linux/kernel/vserver/sched.c
   3  *
   4  *  Virtual Server: Scheduler Support
   5  *
   6  *  Copyright (C) 2004-2005  Herbert Pötzl
   7  *
   8  *  V0.01  adapted Sam Vilains version to 2.6.3
   9  *  V0.02  removed legacy interface
  10  *
  11  */
  12
  13 #include <linux/config.h>
  14 #include <linux/sched.h>
  15 #include <linux/vs_base.h>
  16 #include <linux/vs_context.h>
  17 #include <linux/vs_sched.h>
  18 #include <linux/vserver/sched_cmd.h>
  19
  20 #include <asm/errno.h>
  21 #include <asm/uaccess.h>
  22
  23 #ifdef CONFIG_VSERVER_ACB_SCHED
  24
  25 #define TICK_SCALE 1000
  26 #define TICKS_PER_TOKEN(vxi) \
  27         ((vxi->sched.interval * TICK_SCALE) / vxi->sched.fill_rate)
  28 #define CLASS(vxi) \
  29     (IS_BEST_EFFORT(vxi) ? SCH_BEST_EFFORT : SCH_GUARANTEE)
  30 #define GLOBAL_TICKS(vxi) \
  31     (IS_BEST_EFFORT(vxi) ? vx_best_effort_ticks : vx_guaranteed_ticks)
  32
  33 uint64_t vx_guaranteed_ticks = 0;
  34 uint64_t vx_best_effort_ticks = 0;
  35
  36 void vx_tokens_set(struct vx_info *vxi, int tokens) {
  37     int class = CLASS(vxi);
  38     uint64_t tmp;
  39
  40     tmp = GLOBAL_TICKS(vxi);
  41     tmp -= tokens * TICKS_PER_TOKEN(vxi);
  42
  43     vxi->sched.ticks[class] = tmp;
  44 }
  45
  46 void vx_scheduler_tick(void) {
  47     vx_guaranteed_ticks += TICK_SCALE;
  48     vx_best_effort_ticks += TICK_SCALE;
  49 }
  50
  51 void vx_advance_best_effort_ticks(int ticks) {
  52     vx_best_effort_ticks += TICK_SCALE * ticks;
  53 }
  54
  55 void vx_advance_guaranteed_ticks(int ticks) {
  56     vx_guaranteed_ticks += TICK_SCALE * ticks;
  57 }
  58
  59 int vx_tokens_avail(struct vx_info *vxi)
  60 {
  61     uint64_t diff, max_ticks;
  62     int tokens;
  63     long tpt, rem;
  64     int class = CLASS(vxi);
  65
  66     if (vxi->sched.state[class] == SCH_UNINITIALIZED) {
  67         /* Set the "real" token count */
  68         tokens = atomic_read(&vxi->sched.tokens);
  69         vx_tokens_set(vxi, tokens);
  70         vxi->sched.state[class] = SCH_INITIALIZED;
  71         goto out;
  72     }
  73
  74     if (vxi->sched.last_ticks[class] == GLOBAL_TICKS(vxi)) {
  75         tokens = atomic_read(&vxi->sched.tokens);
  76         goto out;
  77     }
  78
  79     /* Use of fixed-point arithmetic in these calculations leads to
  80      * some limitations.  These should be made explicit.
  81      */
  82     max_ticks = (tpt = TICKS_PER_TOKEN(vxi));
  83     max_ticks *= vxi->sched.tokens_max;
  84     diff = GLOBAL_TICKS(vxi) - vxi->sched.ticks[class];
  85
  86     /* Avoid an overflow from div_long_long_rem */
  87     if (diff >= max_ticks) {
  88         vx_tokens_set(vxi, vxi->sched.tokens_max);
  89         tokens = vxi->sched.tokens_max;
  90     } else {
  91             /* Divide ticks by ticks per token to get tokens */
  92             tokens = div_long_long_rem(diff, tpt, &rem);
  93     }
  94
  95     atomic_set(&vxi->sched.tokens, tokens);
  96
  97 out:
  98     vxi->sched.last_ticks[class] = GLOBAL_TICKS(vxi);
  99     return tokens;
 100 }
 101
 102 void vx_consume_token(struct vx_info *vxi)
 103 {
 104     int class = CLASS(vxi);
 105
 106     vxi->sched.ticks[class] += TICKS_PER_TOKEN(vxi);
 107     atomic_dec(&vxi->sched.tokens);
 108 }
 109
 110 /*
 111  * recalculate the context's scheduling tokens
 112  *
 113  * ret > 0 : number of tokens available
 114  * ret = 0 : context is paused
 115  * ret < 0 : number of jiffies until new tokens arrive
 116  *
 117  */
 118 int vx_tokens_recalc(struct vx_info *vxi)
 119 {
 120         long delta, tokens;
 121
 122         if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
 123                 /* we are paused */
 124                 return 0;
 125
 126         tokens = vx_tokens_avail(vxi);
 127         if (tokens <= 0)
 128             vxi->vx_state |= VXS_ONHOLD;
 129         if (tokens < vxi->sched.tokens_min) {
 130             delta = tokens - vxi->sched.tokens_min;
 131             /* enough tokens will be available in */
 132             return (delta * vxi->sched.interval) / vxi->sched.fill_rate;
 133         }
 134
 135         /* we have some tokens left */
 136         if (vx_info_state(vxi, VXS_ONHOLD) &&
 137                 (tokens >= vxi->sched.tokens_min))
 138                 vxi->vx_state &= ~VXS_ONHOLD;
 139         if (vx_info_state(vxi, VXS_ONHOLD))
 140                 tokens -= vxi->sched.tokens_min;
 141
 142         return tokens;
 143 }
 144
 145 #else
 146
 147 /*
 148  * recalculate the context's scheduling tokens
 149  *
 150  * ret > 0 : number of tokens available
 151  * ret = 0 : context is paused
 152  * ret < 0 : number of jiffies until new tokens arrive
 153  *
 154  */
 155 int vx_tokens_recalc(struct vx_info *vxi)
 156 {
 157         long delta, tokens = 0;
 158
 159         if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
 160                 /* we are paused */
 161                 return 0;
 162
 163         delta = jiffies - vxi->sched.jiffies;
 164
 165         if (delta >= vxi->sched.interval) {
 166                 /* lockdown scheduler info */
 167                 spin_lock(&vxi->sched.tokens_lock);
 168
 169                 /* calc integral token part */
 170                 delta = jiffies - vxi->sched.jiffies;
 171                 tokens = delta / vxi->sched.interval;
 172                 delta = tokens * vxi->sched.interval;
 173                 tokens *= vxi->sched.fill_rate;
 174
 175                 atomic_add(tokens, &vxi->sched.tokens);
 176                 vxi->sched.jiffies += delta;
 177                 tokens = atomic_read(&vxi->sched.tokens);
 178
 179                 if (tokens > vxi->sched.tokens_max) {
 180                         tokens = vxi->sched.tokens_max;
 181                         atomic_set(&vxi->sched.tokens, tokens);
 182                 }
 183                 spin_unlock(&vxi->sched.tokens_lock);
 184         } else {
 185                 /* no new tokens */
 186                 tokens = vx_tokens_avail(vxi);
 187                 if (tokens <= 0)
 188                         vxi->vx_state |= VXS_ONHOLD;
 189                 if (tokens < vxi->sched.tokens_min) {
 190                         /* enough tokens will be available in */
 191                         if (vxi->sched.tokens_min == 0)
 192                                 return delta - vxi->sched.interval;
 193                         return delta - vxi->sched.interval *
 194                                 vxi->sched.tokens_min / vxi->sched.fill_rate;
 195                 }
 196         }
 197
 198         /* we have some tokens left */
 199         if (vx_info_state(vxi, VXS_ONHOLD) &&
 200                 (tokens >= vxi->sched.tokens_min))
 201                 vxi->vx_state &= ~VXS_ONHOLD;
 202         if (vx_info_state(vxi, VXS_ONHOLD))
 203                 tokens -= vxi->sched.tokens_min;
 204
 205         return tokens;
 206 }
 207
 208 #endif /* CONFIG_VSERVER_ACB_SCHED */
 209
 210 /*
 211  * effective_prio - return the priority that is based on the static
 212  * priority but is modified by bonuses/penalties.
 213  *
 214  * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
 215  * into a -4 ... 0 ... +4 bonus/penalty range.
 216  *
 217  * Additionally, we scale another amount based on the number of
 218  * CPU tokens currently held by the context, if the process is
 219  * part of a context (and the appropriate SCHED flag is set).
 220  * This ranges from -5 ... 0 ... +15, quadratically.
 221  *
 222  * So, the total bonus is -9 .. 0 .. +19
 223  * We use ~50% of the full 0...39 priority range so that:
 224  *
 225  * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
 226  * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
 227  *    unless that context is far exceeding its CPU allocation.
 228  *
 229  * Both properties are important to certain workloads.
 230  */
 231 int vx_effective_vavavoom(struct vx_info *vxi, int max_prio)
 232 {
 233         int vavavoom, max;
 234
 235         /* lots of tokens = lots of vavavoom
 236          *      no tokens = no vavavoom      */
 237         if ((vavavoom = atomic_read(&vxi->sched.tokens)) >= 0) {
 238                 max = vxi->sched.tokens_max;
 239                 vavavoom = max - vavavoom;
 240                 max = max * max;
 241                 vavavoom = max_prio * VAVAVOOM_RATIO / 100
 242                         * (vavavoom*vavavoom - (max >> 2)) / max;
 243         } else
 244                 vavavoom = 0;
 245
 246         vxi->sched.vavavoom = vavavoom;
 247         return vavavoom + vxi->sched.priority_bias;
 248 }
 249
 250
 251 int vc_set_sched_v2(uint32_t xid, void __user *data)
 252 {
 253         struct vcmd_set_sched_v2 vc_data;
 254         struct vx_info *vxi;
 255
 256         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
 257                 return -EFAULT;
 258
 259         vxi = lookup_vx_info(xid);
 260         if (!vxi)
 261                 return -ESRCH;
 262
 263         spin_lock(&vxi->sched.tokens_lock);
 264
 265         if (vc_data.interval != SCHED_KEEP)
 266                 vxi->sched.interval = vc_data.interval;
 267         if (vc_data.fill_rate != SCHED_KEEP)
 268                 vxi->sched.fill_rate = vc_data.fill_rate;
 269         if (vc_data.tokens_min != SCHED_KEEP)
 270                 vxi->sched.tokens_min = vc_data.tokens_min;
 271         if (vc_data.tokens_max != SCHED_KEEP)
 272                 vxi->sched.tokens_max = vc_data.tokens_max;
 273         if (vc_data.tokens != SCHED_KEEP)
 274                 atomic_set(&vxi->sched.tokens, vc_data.tokens);
 275
 276         /* Sanity check the resultant values */
 277         if (vxi->sched.fill_rate <= 0)
 278                 vxi->sched.fill_rate = 1;
 279         if (vxi->sched.interval <= 0)
 280                 vxi->sched.interval = HZ;
 281         if (vxi->sched.tokens_max == 0)
 282                 vxi->sched.tokens_max = 1;
 283         if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
 284                 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
 285         if (vxi->sched.tokens_min > vxi->sched.tokens_max)
 286                 vxi->sched.tokens_min = vxi->sched.tokens_max;
 287
 288 #ifdef CONFIG_VSERVER_ACB_SCHED
 289         vx_tokens_set(vxi, atomic_read(&vxi->sched.tokens));
 290 #endif
 291
 292         spin_unlock(&vxi->sched.tokens_lock);
 293         put_vx_info(vxi);
 294         return 0;
 295 }
 296
 297
 298 int vc_set_sched(uint32_t xid, void __user *data)
 299 {
 300         struct vcmd_set_sched_v3 vc_data;
 301         struct vx_info *vxi;
 302         unsigned int set_mask;
 303
 304         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
 305                 return -EFAULT;
 306
 307         vxi = lookup_vx_info(xid);
 308         if (!vxi)
 309                 return -ESRCH;
 310
 311         set_mask = vc_data.set_mask;
 312
 313         spin_lock(&vxi->sched.tokens_lock);
 314
 315         if (set_mask & VXSM_FILL_RATE)
 316                 vxi->sched.fill_rate = vc_data.fill_rate;
 317         if (set_mask & VXSM_INTERVAL)
 318                 vxi->sched.interval = vc_data.interval;
 319         if (set_mask & VXSM_TOKENS)
 320                 atomic_set(&vxi->sched.tokens, vc_data.tokens);
 321         if (set_mask & VXSM_TOKENS_MIN)
 322                 vxi->sched.tokens_min = vc_data.tokens_min;
 323         if (set_mask & VXSM_TOKENS_MAX)
 324                 vxi->sched.tokens_max = vc_data.tokens_max;
 325         if (set_mask & VXSM_PRIO_BIAS)
 326                 vxi->sched.priority_bias = vc_data.priority_bias;
 327
 328         /* Sanity check the resultant values */
 329         if (vxi->sched.fill_rate <= 0)
 330                 vxi->sched.fill_rate = 1;
 331         if (vxi->sched.interval <= 0)
 332                 vxi->sched.interval = HZ;
 333         if (vxi->sched.tokens_max == 0)
 334                 vxi->sched.tokens_max = 1;
 335         if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
 336                 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
 337         if (vxi->sched.tokens_min > vxi->sched.tokens_max)
 338                 vxi->sched.tokens_min = vxi->sched.tokens_max;
 339         if (vxi->sched.priority_bias > MAX_PRIO_BIAS)
 340                 vxi->sched.priority_bias = MAX_PRIO_BIAS;
 341         if (vxi->sched.priority_bias < MIN_PRIO_BIAS)
 342                 vxi->sched.priority_bias = MIN_PRIO_BIAS;
 343
 344 #ifdef CONFIG_VSERVER_ACB_SCHED
 345         vx_tokens_set(vxi, atomic_read(&vxi->sched.tokens));
 346 #endif
 347
 348         spin_unlock(&vxi->sched.tokens_lock);
 349         put_vx_info(vxi);
 350         return 0;
 351 }
 352