CKRM cpu controller version 8.2.

author Marc Fiuczynski <mef@cs.princeton.edu>

Thu, 30 Sep 2004 12:45:42 +0000 (12:45 +0000)

committer Marc Fiuczynski <mef@cs.princeton.edu>

Thu, 30 Sep 2004 12:45:42 +0000 (12:45 +0000)
author Marc Fiuczynski <mef@cs.princeton.edu>
Thu, 30 Sep 2004 12:45:42 +0000 (12:45 +0000)
committer Marc Fiuczynski <mef@cs.princeton.edu>
Thu, 30 Sep 2004 12:45:42 +0000 (12:45 +0000)
diff --git a/include/linux/ckrm_sched.h b/include/linux/ckrm_sched.h

index b7e6b30..62b3ba2 100644 (file)
--- a/include/linux/ckrm_sched.h
+++ b/include/linux/ckrm_sched.h
@@ -71,8 +71,6 @@ struct ckrm_runqueue {
          * 
          * initialized to be 0
          * a class can't accumulate more than SAVING_THRESHOLD of savings
-        * savings are kept in normalized form (like cvt)
-        * so when task share change the savings should be scaled accordingly
          */
         unsigned long long savings;
  
@@ -256,7 +254,7 @@ void ckrm_cpu_change_class(void *task, void *old, void *new);
  #define CPU_DEMAND_INIT 3
  
  /*functions exported by ckrm_cpu_monitor.c*/
-void ckrm_cpu_monitor(void);
+void ckrm_cpu_monitor(int check_min);
  int ckrm_cpu_monitor_init(void);
  void ckrm_cpu_stat_init(struct ckrm_cpu_class_stat *stat);
  void cpu_demand_event(struct ckrm_cpu_demand_stat* local_stat, int event, unsigned long long len);
@@ -274,18 +272,21 @@ void adjust_local_weight(void);
   * 
   * CLASS_QUANTIZER:
   * 
- * A class with 5% share, can execute 50M nsecs / per sec ~ 2^28.
+ * A class with 50% share, can execute 500 ms / per sec ~ 2^29 ns.
   * It's share will be set to 512 = 2^9. The globl CLASSQUEUE_SIZE is set to 2^7.
   * With CLASS_QUANTIZER=16, the local_cvt of this class will increase
- * by 2^28/2^9 = 2^19 = 512K.
- * Setting CLASS_QUANTIZER to 16, 2^(19-16) = 8 slots / per second.
- * A class with 5% shares, will cover 80 slots / per second.
+ * by 2^29/2^9 = 2^20 = 1024K.
+ * Setting CLASS_QUANTIZER to 16, 2^(20-16) = 16 slots / per second.
+  * Do the same math, a class with any share value, will cover 16 slots / per second. 
+ * So 2^8 total slots is good track for 8 seconds of system execution
   *
   * PRIORITY_QUANTIZER:
   *
   * How much can top priorities of class impact slot bonus.
- * There are 40 nice priorities. "2" will allow upto 10 slots improvement
- * in the RQ thus for 50% class it can perform ~1sec starvation.
+ * There are 40 nice priorities, range from -20 to 19, with default nice = 0
+ * "2" will allow upto 5 slots improvement 
+ * when certain task within the class  has a nice value of -20 
+ * in the RQ thus for 50% class it can perform ~300 msec starvation.
   *
   *******************************************************************/
  
@@ -322,7 +323,7 @@ void adjust_local_weight(void);
  /*
   * to improve system responsiveness
   * an inactive class is put a little bit ahead of the current class when it wakes up
- * the amount is set in normalized termis to simplify the calculation
+ * the amount is set in normalized term to simplify the calculation
   * for class with 100% share, it can be 2s ahead
   * while for class with 10% share, it can be 200ms ahead
   */
diff --git a/kernel/ckrm/ckrm_cpu_class.c b/kernel/ckrm/ckrm_cpu_class.c

index cdb8af7..09ea6ba 100644 (file)
--- a/kernel/ckrm/ckrm_cpu_class.c
+++ b/kernel/ckrm/ckrm_cpu_class.c
@@ -180,6 +180,9 @@ static void ckrm_free_cpu_class(void *my_res)
         write_unlock(&class_list_lock);
  
         kfree(cls);
+
+       //call ckrm_cpu_monitor after class removed
+       ckrm_cpu_monitor(0);
  }                              
  
  /*
@@ -220,6 +223,10 @@ int ckrm_cpu_set_share(void *my_res, struct ckrm_shares *new_share)
         if (cls->parent) {
                 spin_unlock(&parres->cnt_lock);
         }
+
+       //call ckrm_cpu_monitor after changes are changed
+       ckrm_cpu_monitor(0);
+
         return rc;
  }                                                      
                         
@@ -269,7 +276,7 @@ int ckrm_cpu_get_stats(void *my_res, struct seq_file * sfile)
                    );
         for_each_online_cpu(i) {
                 lrq = get_ckrm_lrq(cls,i);              
-               seq_printf(sfile, "\tlrq %d demand= %lu weight= %d lrq_load= %lu cvt= %llu sav=%llu\n",i,stat->local_stats[i].cpu_demand,local_class_weight(lrq),lrq->lrq_load,lrq->local_cvt,lrq->savings);
+               seq_printf(sfile, "\tlrq %d demand= %lu weight= %d lrq_load= %lu cvt= %llu sav= %llu\n",i,stat->local_stats[i].cpu_demand,local_class_weight(lrq),lrq->lrq_load,lrq->local_cvt,lrq->savings);
         }
  
         seq_printf(sfile, "-------- CPU Class Status END ---------\n");
diff --git a/kernel/ckrm/ckrm_cpu_monitor.c b/kernel/ckrm/ckrm_cpu_monitor.c

index 09bdb21..11f65d7 100644 (file)
--- a/kernel/ckrm/ckrm_cpu_monitor.c
+++ b/kernel/ckrm/ckrm_cpu_monitor.c
@@ -357,6 +357,10 @@ static int update_child_effective(struct ckrm_core_class *parent)
                     c_cls->stat.ehl *
                     get_myhard_limit(c_cls) / c_cls->shares.total_guarantee;
  
+               set_eshare(&c_cls->stat,c_cls->stat.egrt);
+               set_meshare(&c_cls->stat,c_cls->stat.megrt);
+
+
                 child_core = ckrm_get_next_child(parent, child_core);
         };
         return 0;
@@ -386,15 +390,18 @@ static int update_effectives(struct ckrm_core_class *root_core)
                 / cls->shares.total_guarantee;
         cls->stat.mehl = cls->stat.ehl * get_myhard_limit(cls)
                 / cls->shares.total_guarantee;
-       
+       set_eshare(&cls->stat,cls->stat.egrt);
+       set_meshare(&cls->stat,cls->stat.megrt);
+
   repeat:
         //check exit
         if (!cur_core)
                 return 0;
  
-       //visit this node
-       if (update_child_effective(cur_core) < 0)
-               return ret; //invalid cur_core node
+       //visit this node only once
+       if (! child_core)
+               if (update_child_effective(cur_core) < 0)
+                       return ret; //invalid cur_core node
         
         //next child
         child_core = ckrm_get_next_child(cur_core, child_core);
@@ -439,37 +446,30 @@ static inline int get_my_node_surplus(struct ckrm_cpu_class *cls)
  }
  
  /**
- * node_surplus_consume: consume the surplus
- * @ckeck_sl: if check_sl is set, then check soft_limit
- * @total_grt: total guarantee 
+ * consume_surplus: decides how much surplus a node can consume
+ * @ckeck_sl: if check_sl is set, then check soft_limitx
   * return how much consumed
- * return -1 on error
   *
   * implements all the CKRM Scheduling Requirement
- * update total_grt if necessary 
+ * assume c_cls is valid
   */
-static inline int node_surplus_consume(int surplus,
-                                      struct ckrm_core_class *child_core,
+static inline int consume_surplus(int surplus,
+                                      struct ckrm_cpu_class *c_cls,
                                        struct ckrm_cpu_class *p_cls,
                                        int check_sl
                                        )
  {
         int consumed = 0;
         int inc_limit;
-       int glut = 1;
-
-       struct ckrm_cpu_class *c_cls = ckrm_get_cpu_class(child_core);
         int total_grt = p_cls->shares.total_guarantee;
  
         BUG_ON(surplus < 0);
  
-       if (! c_cls || ! total_grt)
-               goto out;
-
         /*can't consume more than demand or hard limit*/
         if (c_cls->stat.eshare >= c_cls->stat.max_demand)
                 goto out;
  
+       //the surplus allocation is propotional to grt
         consumed =
                 surplus * c_cls->shares.my_guarantee / total_grt;
  
@@ -481,25 +481,106 @@ static inline int node_surplus_consume(int surplus,
  
         if (check_sl) {
                 int esl = p_cls->stat.eshare * get_soft_limit(c_cls)
-                       /p_cls->shares.total_guarantee;
+                       /total_grt;
                 if (esl < c_cls->stat.max_demand)
                         inc_limit = esl - c_cls->stat.eshare;
         }
  
-
         if (consumed > inc_limit)
                 consumed = inc_limit;
-       else
-               glut = 0;
  
          BUG_ON(consumed < 0);
-       set_eshare(&c_cls->stat,c_cls->stat.eshare + consumed);
-        BUG_ON(c_cls->stat.eshare < 0);
+ out:          
+       return consumed;
+}
+
+/*
+ * how much a node can consume for itself?
+ */
+static inline int consume_self_surplus(int surplus,
+                                      struct ckrm_cpu_class *p_cls,
+                                      int check_sl
+                                      )
+{
+       int consumed = 0;
+       int inc_limit;
+       int total_grt = p_cls->shares.total_guarantee;
+       int max_demand = get_mmax_demand(&p_cls->stat);
+
+       BUG_ON(surplus < 0);
  
+       /*can't consume more than demand or hard limit*/
+       if (p_cls->stat.meshare >= max_demand)
+               goto out;
+
+       //the surplus allocation is propotional to grt
+       consumed =
+               surplus * p_cls->shares.unused_guarantee / total_grt;
+
+       if (! consumed) //no more share
+               goto out;
+
+       //hard limit and demand limit
+       inc_limit = max_demand - p_cls->stat.meshare;
+
+       if (check_sl) {
+               int mesl = p_cls->stat.eshare * get_mysoft_limit(p_cls)
+                       /total_grt;
+               if (mesl < max_demand)
+                       inc_limit = mesl - p_cls->stat.meshare;
+       }
+
+       if (consumed > inc_limit)
+               consumed = inc_limit;
+
+        BUG_ON(consumed < 0);
   out:          
         return consumed;
  }
  
+
+/*
+ * allocate surplus to all its children and also its default class
+ */
+static int alloc_surplus_single_round(
+                                     int surplus,
+                                     struct ckrm_core_class *parent,
+                                     struct ckrm_cpu_class *p_cls,
+                                     int check_sl)
+{
+       struct ckrm_cpu_class *c_cls;
+       struct ckrm_core_class *child_core = NULL;
+       int total_consumed = 0,consumed;
+
+       //first allocate to the default class
+       consumed  =
+               consume_self_surplus(surplus,p_cls,check_sl);
+
+       if (consumed > 0) {
+               set_meshare(&p_cls->stat,p_cls->stat.meshare + consumed);
+               total_consumed += consumed;
+       }
+
+       do {
+               child_core = ckrm_get_next_child(parent, child_core);
+               if (child_core)  {
+                       c_cls = ckrm_get_cpu_class(child_core);
+                       if (! c_cls)
+                               return -1;
+
+                       consumed    =
+                               consume_surplus(surplus, c_cls,
+                                                    p_cls,check_sl);
+                       if (consumed > 0) {
+                               set_eshare(&c_cls->stat,c_cls->stat.eshare + consumed);
+                               total_consumed += consumed;
+                       }
+               }
+       } while (child_core);
+
+       return total_consumed;
+}
+
  /**
   * alloc_surplus_node: re-allocate the shares for children under parent
   * @parent: parent node
@@ -512,80 +593,63 @@ static inline int node_surplus_consume(int surplus,
   */
  static int alloc_surplus_node(struct ckrm_core_class *parent)
  {
-       int total_surplus , old_surplus;
-       struct ckrm_cpu_class *p_cls = ckrm_get_cpu_class(parent);
-       struct ckrm_core_class *child_core = NULL;
-       int self_share;
+       struct ckrm_cpu_class *p_cls,*c_cls;
+       int total_surplus,consumed;
         int check_sl;
         int ret = -1;
+       struct ckrm_core_class *child_core = NULL;
  
+       p_cls = ckrm_get_cpu_class(parent);
         if (! p_cls)
-               return ret;
-
-       total_surplus = get_my_node_surplus(p_cls);
+               goto realloc_out;
  
         /*
-        * initialize effective_share
+        * get total surplus
          */
+       total_surplus = p_cls->stat.eshare - p_cls->stat.egrt;
+       BUG_ON(total_surplus < 0);
+       total_surplus += get_my_node_surplus(p_cls);
+
         do {
                 child_core = ckrm_get_next_child(parent, child_core);
                 if (child_core) {
-                       struct ckrm_cpu_class *c_cls;
-
                         c_cls = ckrm_get_cpu_class(child_core);                         
                         if (! c_cls)
-                               return ret; 
+                               goto realloc_out;
  
                         total_surplus += get_node_surplus(c_cls);
-
-                       set_eshare(&c_cls->stat, c_cls->stat.egrt);
                 }
         } while (child_core);
  
-       if (! total_surplus)
+
+       if (! total_surplus) {
+               ret = 0;
                 goto realloc_out;
+       }
  
-       /* distribute the surplus */
-       child_core = NULL;
+       /* 
+        * distributing the surplus 
+        * first with the check_sl enabled
+        * once all the tasks has research the soft limit, disable check_sl and try again
+        */
+       
         check_sl = 1;
-       old_surplus = 0;
         do {
-               if (!child_core) {//start a new round
+               consumed = alloc_surplus_single_round(total_surplus,parent,p_cls,check_sl);
+               if (consumed < 0) //something is wrong
+                       goto realloc_out;
  
-                       //ok, everybody reached the soft limit
-                       if (old_surplus == total_surplus) 
-                               check_sl = 0;
-                       old_surplus = total_surplus;
-               }
+               if (! consumed)
+                       check_sl = 0;
+               else
+                       total_surplus -= consumed;
  
-               child_core = ckrm_get_next_child(parent, child_core);
-               if (child_core)  {
-                       int consumed = 0;
-                       consumed -=
-                               node_surplus_consume(old_surplus, child_core,
-                                                    p_cls,check_sl);
-                       if (consumed >= 0) 
-                               total_surplus -= consumed;
-                       else
-                               return ret;     
-               }
-               //start a new round if something is allocated in the last round
-       } while (child_core || check_sl || total_surplus != old_surplus);
+       } while ((total_surplus > 0) && (consumed || check_sl) );
  
- realloc_out:
-       /*how much for itself*/
-       self_share = p_cls->stat.eshare *
-           p_cls->shares.unused_guarantee / p_cls->shares.total_guarantee;
-
-       if (self_share < p_cls->stat.max_demand) {
-               /*any remaining surplus goes to the default class*/
-               self_share += total_surplus;    
-               if (self_share > p_cls->stat.max_demand)
-                       self_share = p_cls->stat.max_demand;
-       }
+       ret = 0;
         
-       set_meshare(&p_cls->stat, self_share);
-       return 0;
+ realloc_out:
+       return ret;
  }
  
  /**
@@ -597,29 +661,27 @@ static int alloc_surplus_node(struct ckrm_core_class *parent)
  static int alloc_surplus(struct ckrm_core_class *root_core)
  {
         struct ckrm_core_class *cur_core, *child_core;
-       struct ckrm_cpu_class *cls;
+       //      struct ckrm_cpu_class *cls;
         int ret = -1;
  
         /*initialize*/
         cur_core = root_core;
         child_core = NULL;
-       cls = ckrm_get_cpu_class(cur_core);
-
-       //set root eshare
-       set_eshare(&cls->stat, cls->stat.egrt);
+       //      cls = ckrm_get_cpu_class(cur_core);
  
         /*the ckrm idle tasks get all what's remaining*/
         /*hzheng: uncomment the following like for hard limit support */
         //      update_ckrm_idle(CKRM_SHARE_MAX - cls->stat.max_demand);
         
-      repeat:
+ repeat:
         //check exit
         if (!cur_core)
                 return 0;
  
-       //visit this node
-       if ( alloc_surplus_node(cur_core) < 0 )
-               return ret;
+       //visit this node only once
+       if (! child_core) 
+               if ( alloc_surplus_node(cur_core) < 0 )
+                       return ret;
  
         //next child
         child_core = ckrm_get_next_child(cur_core, child_core);
@@ -708,7 +770,7 @@ static int ckrm_cpu_idled(void *nothing)
         /*similar to cpu_idle */
         while (1) {
                 while (!need_resched()) {
-                       ckrm_cpu_monitor();
+                       ckrm_cpu_monitor(1);
                         if (current_cpu_data.hlt_works_ok) {
                                 local_irq_disable();
                                 if (!need_resched()) {
@@ -830,12 +892,13 @@ void adjust_local_weight(void)
  /**********************************************/
  /**
   *ckrm_cpu_monitor - adjust relative shares of the classes based on their progress
+ *@check_min: if check_min is set, the call can't be within 100ms of last call
   *
   * this function is called every CPU_MONITOR_INTERVAL
   * it computes the cpu demand of each class
   * and re-allocate the un-used shares to other classes
   */
-void ckrm_cpu_monitor(void)
+void ckrm_cpu_monitor(int check_min)
  {
         static spinlock_t lock = SPIN_LOCK_UNLOCKED; 
         static unsigned long long last_check = 0;
@@ -855,9 +918,9 @@ void ckrm_cpu_monitor(void)
         now = sched_clock();
  
         //consecutive check should be at least 100ms apart
-       if (now - last_check < MIN_CPU_MONITOR_INTERVAL) {
+       if (check_min && (now - last_check < MIN_CPU_MONITOR_INTERVAL))
                 goto outunlock;
-       }
+
         last_check = now;
  
         if (update_effectives(root_core) != 0)
@@ -889,7 +952,7 @@ static int ckrm_cpu_monitord(void *nothing)
                 /*sleep for sometime before next try*/
                 set_current_state(TASK_INTERRUPTIBLE);
                 schedule_timeout(CPU_MONITOR_INTERVAL);
-               ckrm_cpu_monitor();
+               ckrm_cpu_monitor(1);
                 if (thread_exit) {
                         break;
                 }
@@ -910,8 +973,6 @@ void ckrm_start_monitor(void)
  
  void ckrm_kill_monitor(void)
  {
-       // int interval = HZ;
-
         printk("killing process %d\n", cpu_monitor_pid);
         if (cpu_monitor_pid > 0) {
                 thread_exit = 1;
diff --git a/kernel/ckrm_sched.c b/kernel/ckrm_sched.c

index 9c653a3..1ca2611 100644 (file)
--- a/kernel/ckrm_sched.c
+++ b/kernel/ckrm_sched.c
@@ -77,7 +77,7 @@ static inline void check_inactive_class(ckrm_lrq_t * lrq,CVT_t cur_cvt)
                 lrq->savings -= savings_used;
                 unscale_cvt(savings_used,lrq);
                 BUG_ON(lrq->local_cvt < savings_used);
-               // lrq->local_cvt -= savings_used;
+               lrq->local_cvt -= savings_used;
         }               
  }
author	Marc Fiuczynski <mef@cs.princeton.edu>
	Thu, 30 Sep 2004 12:45:42 +0000 (12:45 +0000)
committer	Marc Fiuczynski <mef@cs.princeton.edu>
	Thu, 30 Sep 2004 12:45:42 +0000 (12:45 +0000)
include/linux/ckrm_sched.h		patch \| blob \| history
kernel/ckrm/ckrm_cpu_class.c		patch \| blob \| history
kernel/ckrm/ckrm_cpu_monitor.c		patch \| blob \| history
kernel/ckrm_sched.c		patch \| blob \| history