kernel/vserver/sched.c

   1 /*
   2  *  linux/kernel/vserver/sched.c
   3  *
   4  *  Virtual Server: Scheduler Support
   5  *
   6  *  Copyright (C) 2004-2005  Herbert Pötzl
   7  *
   8  *  V0.01  adapted Sam Vilains version to 2.6.3
   9  *  V0.02  removed legacy interface
  10  *
  11  */
  12
  13 #include <linux/sched.h>
  14 #include <linux/vs_context.h>
  15 #include <linux/vs_sched.h>
  16 #include <linux/vserver/sched_cmd.h>
  17
  18 #include <asm/errno.h>
  19 #include <asm/uaccess.h>
  20
  21 #ifdef CONFIG_VSERVER_ACB_SCHED
  22
  23 #define TICK_SCALE 1000
  24 #define TICKS_PER_TOKEN(vxi) \
  25         ((vxi->sched.interval * TICK_SCALE) / vxi->sched.fill_rate)
  26 #define CLASS(vxi) \
  27     (IS_BEST_EFFORT(vxi) ? SCH_BEST_EFFORT : SCH_GUARANTEE)
  28 #define GLOBAL_TICKS(vxi) \
  29     (IS_BEST_EFFORT(vxi) ? vx_best_effort_ticks : vx_guaranteed_ticks)
  30
  31 uint64_t vx_guaranteed_ticks = 0;
  32 uint64_t vx_best_effort_ticks = 0;
  33
  34 void vx_tokens_set(struct vx_info *vxi, int tokens) {
  35     int class = CLASS(vxi);
  36     uint64_t tmp;
  37
  38     tmp = GLOBAL_TICKS(vxi);
  39     tmp -= tokens * TICKS_PER_TOKEN(vxi);
  40
  41     vxi->sched.ticks[class] = tmp;
  42 }
  43
  44 void vx_scheduler_tick(void) {
  45     vx_guaranteed_ticks += TICK_SCALE;
  46     vx_best_effort_ticks += TICK_SCALE;
  47 }
  48
  49 void vx_advance_best_effort_ticks(int ticks) {
  50     vx_best_effort_ticks += TICK_SCALE * ticks;
  51 }
  52
  53 void vx_advance_guaranteed_ticks(int ticks) {
  54     vx_guaranteed_ticks += TICK_SCALE * ticks;
  55 }
  56
  57 int vx_tokens_avail(struct vx_info *vxi)
  58 {
  59     uint64_t diff, max_ticks;
  60     int tokens;
  61     long tpt, rem;
  62     int class = CLASS(vxi);
  63
  64     if (vxi->sched.state[class] == SCH_UNINITIALIZED) {
  65         /* Set the "real" token count */
  66         tokens = atomic_read(&vxi->sched.tokens);
  67         vx_tokens_set(vxi, tokens);
  68         vxi->sched.state[class] = SCH_INITIALIZED;
  69         goto out;
  70     }
  71
  72     if (vxi->sched.last_ticks[class] == GLOBAL_TICKS(vxi)) {
  73         tokens = atomic_read(&vxi->sched.tokens);
  74         goto out;
  75     }
  76
  77     /* Use of fixed-point arithmetic in these calculations leads to
  78      * some limitations.  These should be made explicit.
  79      */
  80     max_ticks = (tpt = TICKS_PER_TOKEN(vxi));
  81     max_ticks *= vxi->sched.tokens_max;
  82     diff = GLOBAL_TICKS(vxi) - vxi->sched.ticks[class];
  83
  84     /* Avoid an overflow from div_long_long_rem */
  85     if (diff >= max_ticks) {
  86         vx_tokens_set(vxi, vxi->sched.tokens_max);
  87         tokens = vxi->sched.tokens_max;
  88     } else {
  89             /* Divide ticks by ticks per token to get tokens */
  90             tokens = div_long_long_rem(diff, tpt, &rem);
  91     }
  92
  93     atomic_set(&vxi->sched.tokens, tokens);
  94
  95 out:
  96     vxi->sched.last_ticks[class] = GLOBAL_TICKS(vxi);
  97     return tokens;
  98 }
  99
 100 void vx_consume_token(struct vx_info *vxi)
 101 {
 102     int class = CLASS(vxi);
 103
 104     vxi->sched.ticks[class] += TICKS_PER_TOKEN(vxi);
 105     atomic_dec(&vxi->sched.tokens);
 106 }
 107
 108 /*
 109  * recalculate the context's scheduling tokens
 110  *
 111  * ret > 0 : number of tokens available
 112  * ret = 0 : context is paused
 113  * ret < 0 : number of jiffies until new tokens arrive
 114  *
 115  */
 116 int vx_tokens_recalc(struct vx_info *vxi)
 117 {
 118         long delta, tokens;
 119
 120         if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
 121                 /* we are paused */
 122                 return 0;
 123
 124         tokens = vx_tokens_avail(vxi);
 125         if (tokens <= 0)
 126             vxi->vx_state |= VXS_ONHOLD;
 127         if (tokens < vxi->sched.tokens_min) {
 128             delta = tokens - vxi->sched.tokens_min;
 129             /* enough tokens will be available in */
 130             return (delta * vxi->sched.interval) / vxi->sched.fill_rate;
 131         }
 132
 133         /* we have some tokens left */
 134         if (vx_info_state(vxi, VXS_ONHOLD) &&
 135                 (tokens >= vxi->sched.tokens_min))
 136                 vxi->vx_state &= ~VXS_ONHOLD;
 137         if (vx_info_state(vxi, VXS_ONHOLD))
 138                 tokens -= vxi->sched.tokens_min;
 139
 140         return tokens;
 141 }
 142
 143 #else
 144
 145 /*
 146  * recalculate the context's scheduling tokens
 147  *
 148  * ret > 0 : number of tokens available
 149  * ret = 0 : context is paused
 150  * ret < 0 : number of jiffies until new tokens arrive
 151  *
 152  */
 153 int vx_tokens_recalc(struct vx_info *vxi)
 154 {
 155         long delta, tokens = 0;
 156
 157         if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
 158                 /* we are paused */
 159                 return 0;
 160
 161         delta = jiffies - vxi->sched.jiffies;
 162
 163         if (delta >= vxi->sched.interval) {
 164                 /* lockdown scheduler info */
 165                 spin_lock(&vxi->sched.tokens_lock);
 166
 167                 /* calc integral token part */
 168                 delta = jiffies - vxi->sched.jiffies;
 169                 tokens = delta / vxi->sched.interval;
 170                 delta = tokens * vxi->sched.interval;
 171                 tokens *= vxi->sched.fill_rate;
 172
 173                 atomic_add(tokens, &vxi->sched.tokens);
 174                 vxi->sched.jiffies += delta;
 175                 tokens = atomic_read(&vxi->sched.tokens);
 176
 177                 if (tokens > vxi->sched.tokens_max) {
 178                         tokens = vxi->sched.tokens_max;
 179                         atomic_set(&vxi->sched.tokens, tokens);
 180                 }
 181                 spin_unlock(&vxi->sched.tokens_lock);
 182         } else {
 183                 /* no new tokens */
 184                 tokens = vx_tokens_avail(vxi);
 185                 if (tokens <= 0)
 186                         vxi->vx_state |= VXS_ONHOLD;
 187                 if (tokens < vxi->sched.tokens_min) {
 188                         /* enough tokens will be available in */
 189                         if (vxi->sched.tokens_min == 0)
 190                                 return delta - vxi->sched.interval;
 191                         return delta - vxi->sched.interval *
 192                                 vxi->sched.tokens_min / vxi->sched.fill_rate;
 193                 }
 194         }
 195
 196         /* we have some tokens left */
 197         if (vx_info_state(vxi, VXS_ONHOLD) &&
 198                 (tokens >= vxi->sched.tokens_min))
 199                 vxi->vx_state &= ~VXS_ONHOLD;
 200         if (vx_info_state(vxi, VXS_ONHOLD))
 201                 tokens -= vxi->sched.tokens_min;
 202
 203         return tokens;
 204 }
 205
 206 #endif /* CONFIG_VSERVER_ACB_SCHED */
 207
 208 /*
 209  * effective_prio - return the priority that is based on the static
 210  * priority but is modified by bonuses/penalties.
 211  *
 212  * We scale the actual sleep average [0 .... MAX_SLEEP_AVG]
 213  * into a -4 ... 0 ... +4 bonus/penalty range.
 214  *
 215  * Additionally, we scale another amount based on the number of
 216  * CPU tokens currently held by the context, if the process is
 217  * part of a context (and the appropriate SCHED flag is set).
 218  * This ranges from -5 ... 0 ... +15, quadratically.
 219  *
 220  * So, the total bonus is -9 .. 0 .. +19
 221  * We use ~50% of the full 0...39 priority range so that:
 222  *
 223  * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs.
 224  * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks.
 225  *    unless that context is far exceeding its CPU allocation.
 226  *
 227  * Both properties are important to certain workloads.
 228  */
 229 int vx_effective_vavavoom(struct vx_info *vxi, int max_prio)
 230 {
 231         int vavavoom, max;
 232
 233         /* lots of tokens = lots of vavavoom
 234          *      no tokens = no vavavoom      */
 235         if ((vavavoom = atomic_read(&vxi->sched.tokens)) >= 0) {
 236                 max = vxi->sched.tokens_max;
 237                 vavavoom = max - vavavoom;
 238                 max = max * max;
 239                 vavavoom = max_prio * VAVAVOOM_RATIO / 100
 240                         * (vavavoom*vavavoom - (max >> 2)) / max;
 241         } else
 242                 vavavoom = 0;
 243
 244         vxi->sched.vavavoom = vavavoom;
 245         return vavavoom + vxi->sched.priority_bias;
 246 }
 247
 248
 249 int vc_set_sched_v2(uint32_t xid, void __user *data)
 250 {
 251         struct vcmd_set_sched_v2 vc_data;
 252         struct vx_info *vxi;
 253
 254         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
 255                 return -EFAULT;
 256
 257         vxi = lookup_vx_info(xid);
 258         if (!vxi)
 259                 return -ESRCH;
 260
 261         spin_lock(&vxi->sched.tokens_lock);
 262
 263         if (vc_data.interval != SCHED_KEEP)
 264                 vxi->sched.interval = vc_data.interval;
 265         if (vc_data.fill_rate != SCHED_KEEP)
 266                 vxi->sched.fill_rate = vc_data.fill_rate;
 267         if (vc_data.tokens_min != SCHED_KEEP)
 268                 vxi->sched.tokens_min = vc_data.tokens_min;
 269         if (vc_data.tokens_max != SCHED_KEEP)
 270                 vxi->sched.tokens_max = vc_data.tokens_max;
 271         if (vc_data.tokens != SCHED_KEEP)
 272                 atomic_set(&vxi->sched.tokens, vc_data.tokens);
 273
 274         /* Sanity check the resultant values */
 275         if (vxi->sched.fill_rate <= 0)
 276                 vxi->sched.fill_rate = 1;
 277         if (vxi->sched.interval <= 0)
 278                 vxi->sched.interval = HZ;
 279         if (vxi->sched.tokens_max == 0)
 280                 vxi->sched.tokens_max = 1;
 281         if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
 282                 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
 283         if (vxi->sched.tokens_min > vxi->sched.tokens_max)
 284                 vxi->sched.tokens_min = vxi->sched.tokens_max;
 285
 286 #ifdef CONFIG_VSERVER_ACB_SCHED
 287         vx_tokens_set(vxi, atomic_read(&vxi->sched.tokens));
 288 #endif
 289
 290         spin_unlock(&vxi->sched.tokens_lock);
 291         put_vx_info(vxi);
 292         return 0;
 293 }
 294
 295
 296 int vc_set_sched(uint32_t xid, void __user *data)
 297 {
 298         struct vcmd_set_sched_v3 vc_data;
 299         struct vx_info *vxi;
 300         unsigned int set_mask;
 301
 302         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
 303                 return -EFAULT;
 304
 305         vxi = lookup_vx_info(xid);
 306         if (!vxi)
 307                 return -ESRCH;
 308
 309         set_mask = vc_data.set_mask;
 310
 311         spin_lock(&vxi->sched.tokens_lock);
 312
 313         if (set_mask & VXSM_FILL_RATE)
 314                 vxi->sched.fill_rate = vc_data.fill_rate;
 315         if (set_mask & VXSM_INTERVAL)
 316                 vxi->sched.interval = vc_data.interval;
 317         if (set_mask & VXSM_TOKENS)
 318                 atomic_set(&vxi->sched.tokens, vc_data.tokens);
 319         if (set_mask & VXSM_TOKENS_MIN)
 320                 vxi->sched.tokens_min = vc_data.tokens_min;
 321         if (set_mask & VXSM_TOKENS_MAX)
 322                 vxi->sched.tokens_max = vc_data.tokens_max;
 323         if (set_mask & VXSM_PRIO_BIAS)
 324                 vxi->sched.priority_bias = vc_data.priority_bias;
 325
 326         /* Sanity check the resultant values */
 327         if (vxi->sched.fill_rate <= 0)
 328                 vxi->sched.fill_rate = 1;
 329         if (vxi->sched.interval <= 0)
 330                 vxi->sched.interval = HZ;
 331         if (vxi->sched.tokens_max == 0)
 332                 vxi->sched.tokens_max = 1;
 333         if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
 334                 atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
 335         if (vxi->sched.tokens_min > vxi->sched.tokens_max)
 336                 vxi->sched.tokens_min = vxi->sched.tokens_max;
 337         if (vxi->sched.priority_bias > MAX_PRIO_BIAS)
 338                 vxi->sched.priority_bias = MAX_PRIO_BIAS;
 339         if (vxi->sched.priority_bias < MIN_PRIO_BIAS)
 340                 vxi->sched.priority_bias = MIN_PRIO_BIAS;
 341
 342 #ifdef CONFIG_VSERVER_ACB_SCHED
 343         vx_tokens_set(vxi, atomic_read(&vxi->sched.tokens));
 344 #endif
 345
 346         spin_unlock(&vxi->sched.tokens_lock);
 347         put_vx_info(vxi);
 348         return 0;
 349 }
 350