kernel/ckrm_sched.c

   1 /* kernel/ckrm_sched.c - Supporting functions for ckrm scheduling
   2  *
   3  * Copyright (C) Haoqiang Zheng,  IBM Corp. 2004
   4  *           (C) Hubertus Franke, IBM Corp. 2004
   5  *
   6  * Latest version, more details at http://ckrm.sf.net
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  */
  14 #include <linux/mm.h>
  15 #include <linux/init.h>
  16 #include <linux/ckrm_sched.h>
  17
  18 rwlock_t   class_list_lock = RW_LOCK_UNLOCKED;
  19 LIST_HEAD(active_cpu_classes);   // list of active cpu classes; anchor
  20
  21 struct ckrm_cpu_class default_cpu_class_obj;
  22
  23 unsigned int ckrm_sched_mode __cacheline_aligned_in_smp =
  24 #ifdef CONFIG_CKRM_CPU_SCHEDULE_AT_BOOT
  25                         CKRM_SCHED_MODE_ENABLED;
  26 #else
  27                         CKRM_SCHED_MODE_DISABLED;
  28 #endif
  29
  30 static int __init ckrm_cpu_enabled_setup(char *str)
  31 {
  32         ckrm_sched_mode = CKRM_SCHED_MODE_ENABLED;
  33         return 1;
  34 }
  35
  36 static int __init ckrm_cpu_disabled_setup(char *str)
  37 {
  38         ckrm_sched_mode = CKRM_SCHED_MODE_DISABLED;
  39         return 1;
  40 }
  41
  42 __setup("ckrmcpu",  ckrm_cpu_enabled_setup);
  43 __setup("nockrmcpu",ckrm_cpu_disabled_setup);
  44
  45 struct ckrm_cpu_class * get_default_cpu_class(void) {
  46         return (&default_cpu_class_obj);
  47 }
  48
  49 /*******************************************************/
  50 /*                CVT Management                       */
  51 /*******************************************************/
  52
  53 //an absolute bonus of 200ms for classes when reactivated
  54 #define INTERACTIVE_BONUS(lrq) ((200*NSEC_PER_MS)/local_class_weight(lrq))
  55
  56 static void check_inactive_class(ckrm_lrq_t * lrq,CVT_t cur_cvt)
  57 {
  58         CVT_t min_cvt;
  59         CVT_t bonus;
  60
  61         //just a safty measure
  62         if (unlikely(! cur_cvt))
  63                 return;
  64
  65         /*
  66          * Always leaving a small bonus for inactive classes
  67          * allows them to compete for cycles immediately when the become
  68          * active. This should improve interactive behavior
  69          */
  70         bonus = INTERACTIVE_BONUS(lrq);
  71         //cvt can't be negative
  72         if (likely(cur_cvt > bonus))
  73                 min_cvt = cur_cvt - bonus;
  74         else
  75                 min_cvt = 0;
  76
  77         if (lrq->local_cvt < min_cvt) {
  78                 //      if (lrq->local_cvt < min_cvt && ! lrq_nr_running(lrq)) {
  79                 CVT_t lost_cvt;
  80
  81                 if (unlikely(lrq->local_cvt == 0)) {
  82                         lrq->local_cvt = cur_cvt;
  83                         return;
  84                 }
  85                 lost_cvt = min_cvt - lrq->local_cvt;
  86                 lost_cvt *= local_class_weight(lrq);
  87                 lrq->local_cvt = min_cvt;
  88                 BUG_ON(lost_cvt < 0);
  89
  90                 /* add what the class lost to its savings*/
  91 #if 1 /*zhq debugging*/
  92                 lrq->savings += lost_cvt;
  93 #endif
  94                 if (lrq->savings > MAX_SAVINGS)
  95                         lrq->savings = MAX_SAVINGS;
  96 #if 0 /* zhq debugging*/
  97                 printk("lrq= %x savings: %llu lost= %llu\n",(int)lrq,lrq->savings,lost_cvt);
  98 #endif
  99         }
 100 }
 101
 102 /*
 103  * return the max_cvt of all the classes
 104  */
 105 CVT_t get_max_cvt(int this_cpu)
 106 {
 107         struct ckrm_cpu_class *clsptr;
 108         ckrm_lrq_t * lrq;
 109         CVT_t max_cvt;
 110
 111         max_cvt = 0;
 112
 113         list_for_each_entry(clsptr, &active_cpu_classes, links) {
 114                 lrq = get_ckrm_lrq(clsptr, this_cpu);
 115                 if (lrq->local_cvt > max_cvt)
 116                         max_cvt = lrq->local_cvt;
 117         }
 118
 119         return max_cvt;
 120 }
 121
 122 CVT_t get_min_cvt(int this_cpu)
 123 {
 124         struct ckrm_cpu_class *clsptr;
 125         ckrm_lrq_t * lrq;
 126         CVT_t max_cvt;
 127
 128         max_cvt = 0xFFFFFFFFFFFFFLLU;
 129
 130         list_for_each_entry(clsptr, &active_cpu_classes, links) {
 131                 lrq = get_ckrm_lrq(clsptr, this_cpu);
 132                 if (lrq->local_cvt < max_cvt)
 133                         max_cvt = lrq->local_cvt;
 134         }
 135
 136         return max_cvt;
 137 }
 138
 139 /**
 140  * update_class_cputime - updates cvt of inactive classes
 141  * -- an inactive class shouldn't starve others when it comes back
 142  * -- the cpu time it lost when it's inactive should be accumulated
 143  * -- its accumulated saving should be compensated (in a leaky bucket fashion)
 144  *
 145  * class_list_lock must have been acquired
 146  */
 147 void update_class_cputime(int this_cpu, int idle)
 148 {
 149         struct ckrm_cpu_class *clsptr;
 150         ckrm_lrq_t * lrq;
 151         CVT_t cur_cvt;
 152
 153         /*
 154          *  a class's local_cvt must not be significantly smaller than min_cvt
 155          *  of active classes otherwise, it will starve other classes when it
 156          *  is reactivated.
 157          *
 158          *  Hence we keep all local_cvt's within a range of the min_cvt off
 159          *  all active classes (approximated by the local_cvt of the currently
 160          *  running class) and account for how many cycles where thus taken
 161          *  from an inactive class building a savings (not to exceed a few seconds)
 162          *  for a class to gradually make up upon reactivation, without
 163          *  starvation of other classes.
 164          *
 165          */
 166         cur_cvt = get_local_cur_cvt(this_cpu);
 167
 168         /*
 169          * cur_cvt == 0 means the system is now idle
 170          * in this case, we use max_cvt as cur_cvt
 171          * max_cvt roughly represents the cvt of the class
 172          * that has just finished running
 173          *
 174          * fairness wouldn't be a problem since we account for whatever lost in savings
 175          * if the system is not busy, the system responsiveness is not a problem.
 176          * still fine if the sytem is busy, but happened to be idle at this certain point
 177          * since bias toward interactive classes (class priority) is a more important way to improve system responsiveness
 178          */
 179         if (unlikely(! cur_cvt))  {
 180                 cur_cvt = get_max_cvt(this_cpu);
 181                 //return;
 182         }
 183
 184         /*
 185          *  - check the local cvt of all the classes
 186          *  - update total_ns received by the class
 187          *  - do a usage sampling for the whole class
 188          */
 189         list_for_each_entry(clsptr, &active_cpu_classes, links) {
 190                 lrq = get_ckrm_lrq(clsptr, this_cpu);
 191
 192                 spin_lock(&clsptr->stat.stat_lock);
 193                 clsptr->stat.total_ns += lrq->uncounted_ns;
 194                 ckrm_sample_usage(clsptr);
 195                 spin_unlock(&clsptr->stat.stat_lock);
 196                 lrq->uncounted_ns = 0;
 197
 198                 check_inactive_class(lrq,cur_cvt);
 199         }
 200 }
 201
 202 /*******************************************************/
 203 /*                PID load balancing stuff             */
 204 /*******************************************************/
 205 #define PID_KP 20
 206 #define PID_KI 60
 207 #define PID_KD 20
 208
 209 /*
 210  * runqueue load is the local_weight of all the classes on this cpu
 211  * must be called with class_list_lock held
 212  */
 213 static unsigned long ckrm_cpu_load(int cpu)
 214 {
 215         struct ckrm_cpu_class *clsptr;
 216         ckrm_lrq_t* lrq;
 217         struct ckrm_cpu_demand_stat* l_stat;
 218         int total_load = 0;
 219         int load;
 220
 221         list_for_each_entry(clsptr,&active_cpu_classes,links) {
 222                 lrq =  get_ckrm_lrq(clsptr,cpu);
 223                 l_stat = get_cls_local_stat(clsptr,cpu);
 224
 225                 load = WEIGHT_TO_SHARE(lrq->local_weight);
 226
 227                 if (l_stat->cpu_demand < load)
 228                         load = l_stat->cpu_demand;
 229                 total_load += load;
 230         }
 231         return total_load;
 232 }
 233
 234
 235 /**
 236  * sample pid load periodically
 237  */
 238
 239 void ckrm_load_sample(ckrm_load_t* pid,int cpu)
 240 {
 241         long load;
 242         long err;
 243
 244         load = ckrm_cpu_load(cpu);
 245         err = load - pid->load_p;
 246         pid->load_d = err;
 247         pid->load_p = load;
 248         pid->load_i *= 9;
 249         pid->load_i += load;
 250         pid->load_i /= 10;
 251 }
 252
 253 long ckrm_get_pressure(ckrm_load_t* ckrm_load, int local_group)
 254 {
 255         long pressure;
 256         pressure = ckrm_load->load_p * PID_KP;
 257         pressure += ckrm_load->load_i * PID_KI;
 258         pressure += ckrm_load->load_d * PID_KD;
 259         pressure /= 100;
 260         return pressure;
 261 }
 262
 263 /*
 264  *  called after a task is switched out. Update the local cvt accounting
 265  *  we need to stick with long instead of long long due to nonexistent
 266  *  64-bit division
 267  */
 268 void update_local_cvt(struct task_struct *p, unsigned long nsec)
 269 {
 270         ckrm_lrq_t * lrq = get_task_lrq(p);
 271         unsigned long cvt_inc;
 272
 273         /*
 274          * consume from savings if eshare is larger than egrt
 275          */
 276         if (lrq->savings && lrq->over_weight) {
 277                 unsigned long savings_used;
 278
 279                 savings_used = nsec;
 280                 savings_used >>= CKRM_WEIGHT_SHIFT;
 281                 savings_used *= lrq->over_weight;
 282                 if (savings_used > lrq->savings)
 283                         savings_used = lrq->savings;
 284                 lrq->savings -= savings_used;
 285         }
 286
 287         //BUG_ON(local_class_weight(lrq) == 0);
 288         cvt_inc = nsec / local_class_weight(lrq);
 289
 290         /*
 291          * For a certain processor, CKRM allocates CPU time propotional
 292          * to the class's local_weight. So once a class consumed nsec,
 293          * it will wait for X (nsec) for its next turn.
 294          *
 295          * X is calculated based on the following fomular
 296          *     nsec / local_weight < X / (CKRM_MAX_WEIGHT - local_weight)
 297          * if local_weight is small, then approximated as
 298          *     nsec / local_weight < X / (CKRM_MAX_WEIGHT)
 299          */
 300 #define CVT_STARVATION_LIMIT (200LL*NSEC_PER_MS)
 301 #define CVT_STARVATION_INC_LIMIT (CVT_STARVATION_LIMIT >> CKRM_WEIGHT_SHIFT)
 302
 303         if (unlikely(lrq->skewed_weight)) {
 304                 unsigned long long starvation_limit = CVT_STARVATION_INC_LIMIT;
 305
 306                 starvation_limit *= local_class_weight(lrq);
 307                 if (unlikely(cvt_inc > starvation_limit))
 308                         cvt_inc = nsec / lrq->skewed_weight;
 309         }
 310
 311         /* now update the CVT accounting */
 312
 313         lrq->local_cvt += cvt_inc;
 314         lrq->uncounted_ns += nsec;
 315         update_class_priority(lrq);
 316 }