CKRM cpu controller version 8.2.
authorMarc Fiuczynski <mef@cs.princeton.edu>
Thu, 30 Sep 2004 12:45:42 +0000 (12:45 +0000)
committerMarc Fiuczynski <mef@cs.princeton.edu>
Thu, 30 Sep 2004 12:45:42 +0000 (12:45 +0000)
include/linux/ckrm_sched.h
kernel/ckrm/ckrm_cpu_class.c
kernel/ckrm/ckrm_cpu_monitor.c
kernel/ckrm_sched.c

index b7e6b30..62b3ba2 100644 (file)
@@ -71,8 +71,6 @@ struct ckrm_runqueue {
         * 
         * initialized to be 0
         * a class can't accumulate more than SAVING_THRESHOLD of savings
-        * savings are kept in normalized form (like cvt)
-        * so when task share change the savings should be scaled accordingly
         */
        unsigned long long savings;
 
@@ -256,7 +254,7 @@ void ckrm_cpu_change_class(void *task, void *old, void *new);
 #define CPU_DEMAND_INIT 3
 
 /*functions exported by ckrm_cpu_monitor.c*/
-void ckrm_cpu_monitor(void);
+void ckrm_cpu_monitor(int check_min);
 int ckrm_cpu_monitor_init(void);
 void ckrm_cpu_stat_init(struct ckrm_cpu_class_stat *stat);
 void cpu_demand_event(struct ckrm_cpu_demand_stat* local_stat, int event, unsigned long long len);
@@ -274,18 +272,21 @@ void adjust_local_weight(void);
  * 
  * CLASS_QUANTIZER:
  * 
- * A class with 5% share, can execute 50M nsecs / per sec ~ 2^28.
+ * A class with 50% share, can execute 500 ms / per sec ~ 2^29 ns.
  * It's share will be set to 512 = 2^9. The globl CLASSQUEUE_SIZE is set to 2^7.
  * With CLASS_QUANTIZER=16, the local_cvt of this class will increase
- * by 2^28/2^9 = 2^19 = 512K.
- * Setting CLASS_QUANTIZER to 16, 2^(19-16) = 8 slots / per second.
- * A class with 5% shares, will cover 80 slots / per second.
+ * by 2^29/2^9 = 2^20 = 1024K.
+ * Setting CLASS_QUANTIZER to 16, 2^(20-16) = 16 slots / per second.
+  * Do the same math, a class with any share value, will cover 16 slots / per second. 
+ * So 2^8 total slots is good track for 8 seconds of system execution
  *
  * PRIORITY_QUANTIZER:
  *
  * How much can top priorities of class impact slot bonus.
- * There are 40 nice priorities. "2" will allow upto 10 slots improvement
- * in the RQ thus for 50% class it can perform ~1sec starvation.
+ * There are 40 nice priorities, range from -20 to 19, with default nice = 0
+ * "2" will allow upto 5 slots improvement 
+ * when certain task within the class  has a nice value of -20 
+ * in the RQ thus for 50% class it can perform ~300 msec starvation.
  *
  *******************************************************************/
 
@@ -322,7 +323,7 @@ void adjust_local_weight(void);
 /*
  * to improve system responsiveness
  * an inactive class is put a little bit ahead of the current class when it wakes up
- * the amount is set in normalized termis to simplify the calculation
+ * the amount is set in normalized term to simplify the calculation
  * for class with 100% share, it can be 2s ahead
  * while for class with 10% share, it can be 200ms ahead
  */
index cdb8af7..09ea6ba 100644 (file)
@@ -180,6 +180,9 @@ static void ckrm_free_cpu_class(void *my_res)
        write_unlock(&class_list_lock);
 
        kfree(cls);
+
+       //call ckrm_cpu_monitor after class removed
+       ckrm_cpu_monitor(0);
 }                              
 
 /*
@@ -220,6 +223,10 @@ int ckrm_cpu_set_share(void *my_res, struct ckrm_shares *new_share)
        if (cls->parent) {
                spin_unlock(&parres->cnt_lock);
        }
+
+       //call ckrm_cpu_monitor after changes are changed
+       ckrm_cpu_monitor(0);
+
        return rc;
 }                                                      
                        
@@ -269,7 +276,7 @@ int ckrm_cpu_get_stats(void *my_res, struct seq_file * sfile)
                   );
        for_each_online_cpu(i) {
                lrq = get_ckrm_lrq(cls,i);              
-               seq_printf(sfile, "\tlrq %d demand= %lu weight= %d lrq_load= %lu cvt= %llu sav=%llu\n",i,stat->local_stats[i].cpu_demand,local_class_weight(lrq),lrq->lrq_load,lrq->local_cvt,lrq->savings);
+               seq_printf(sfile, "\tlrq %d demand= %lu weight= %d lrq_load= %lu cvt= %llu sav= %llu\n",i,stat->local_stats[i].cpu_demand,local_class_weight(lrq),lrq->lrq_load,lrq->local_cvt,lrq->savings);
        }
 
        seq_printf(sfile, "-------- CPU Class Status END ---------\n");
index 09bdb21..11f65d7 100644 (file)
@@ -357,6 +357,10 @@ static int update_child_effective(struct ckrm_core_class *parent)
                    c_cls->stat.ehl *
                    get_myhard_limit(c_cls) / c_cls->shares.total_guarantee;
 
+               set_eshare(&c_cls->stat,c_cls->stat.egrt);
+               set_meshare(&c_cls->stat,c_cls->stat.megrt);
+
+
                child_core = ckrm_get_next_child(parent, child_core);
        };
        return 0;
@@ -386,15 +390,18 @@ static int update_effectives(struct ckrm_core_class *root_core)
                / cls->shares.total_guarantee;
        cls->stat.mehl = cls->stat.ehl * get_myhard_limit(cls)
                / cls->shares.total_guarantee;
-       
+       set_eshare(&cls->stat,cls->stat.egrt);
+       set_meshare(&cls->stat,cls->stat.megrt);
+
  repeat:
        //check exit
        if (!cur_core)
                return 0;
 
-       //visit this node
-       if (update_child_effective(cur_core) < 0)
-               return ret; //invalid cur_core node
+       //visit this node only once
+       if (! child_core)
+               if (update_child_effective(cur_core) < 0)
+                       return ret; //invalid cur_core node
        
        //next child
        child_core = ckrm_get_next_child(cur_core, child_core);
@@ -439,37 +446,30 @@ static inline int get_my_node_surplus(struct ckrm_cpu_class *cls)
 }
 
 /**
- * node_surplus_consume: consume the surplus
- * @ckeck_sl: if check_sl is set, then check soft_limit
- * @total_grt: total guarantee 
+ * consume_surplus: decides how much surplus a node can consume
+ * @ckeck_sl: if check_sl is set, then check soft_limitx
  * return how much consumed
- * return -1 on error
  *
  * implements all the CKRM Scheduling Requirement
- * update total_grt if necessary 
+ * assume c_cls is valid
  */
-static inline int node_surplus_consume(int surplus,
-                                      struct ckrm_core_class *child_core,
+static inline int consume_surplus(int surplus,
+                                      struct ckrm_cpu_class *c_cls,
                                       struct ckrm_cpu_class *p_cls,
                                       int check_sl
                                       )
 {
        int consumed = 0;
        int inc_limit;
-       int glut = 1;
-
-       struct ckrm_cpu_class *c_cls = ckrm_get_cpu_class(child_core);
        int total_grt = p_cls->shares.total_guarantee;
 
        BUG_ON(surplus < 0);
 
-       if (! c_cls || ! total_grt)
-               goto out;
-
        /*can't consume more than demand or hard limit*/
        if (c_cls->stat.eshare >= c_cls->stat.max_demand)
                goto out;
 
+       //the surplus allocation is propotional to grt
        consumed =
                surplus * c_cls->shares.my_guarantee / total_grt;
 
@@ -481,25 +481,106 @@ static inline int node_surplus_consume(int surplus,
 
        if (check_sl) {
                int esl = p_cls->stat.eshare * get_soft_limit(c_cls)
-                       /p_cls->shares.total_guarantee;
+                       /total_grt;
                if (esl < c_cls->stat.max_demand)
                        inc_limit = esl - c_cls->stat.eshare;
        }
 
-
        if (consumed > inc_limit)
                consumed = inc_limit;
-       else
-               glut = 0;
 
         BUG_ON(consumed < 0);
-       set_eshare(&c_cls->stat,c_cls->stat.eshare + consumed);
-        BUG_ON(c_cls->stat.eshare < 0);
+ out:          
+       return consumed;
+}
+
+/*
+ * how much a node can consume for itself?
+ */
+static inline int consume_self_surplus(int surplus,
+                                      struct ckrm_cpu_class *p_cls,
+                                      int check_sl
+                                      )
+{
+       int consumed = 0;
+       int inc_limit;
+       int total_grt = p_cls->shares.total_guarantee;
+       int max_demand = get_mmax_demand(&p_cls->stat);
+
+       BUG_ON(surplus < 0);
 
+       /*can't consume more than demand or hard limit*/
+       if (p_cls->stat.meshare >= max_demand)
+               goto out;
+
+       //the surplus allocation is propotional to grt
+       consumed =
+               surplus * p_cls->shares.unused_guarantee / total_grt;
+
+       if (! consumed) //no more share
+               goto out;
+
+       //hard limit and demand limit
+       inc_limit = max_demand - p_cls->stat.meshare;
+
+       if (check_sl) {
+               int mesl = p_cls->stat.eshare * get_mysoft_limit(p_cls)
+                       /total_grt;
+               if (mesl < max_demand)
+                       inc_limit = mesl - p_cls->stat.meshare;
+       }
+
+       if (consumed > inc_limit)
+               consumed = inc_limit;
+
+        BUG_ON(consumed < 0);
  out:          
        return consumed;
 }
 
+
+/*
+ * allocate surplus to all its children and also its default class
+ */
+static int alloc_surplus_single_round(
+                                     int surplus,
+                                     struct ckrm_core_class *parent,
+                                     struct ckrm_cpu_class *p_cls,
+                                     int check_sl)
+{
+       struct ckrm_cpu_class *c_cls;
+       struct ckrm_core_class *child_core = NULL;
+       int total_consumed = 0,consumed;
+
+       //first allocate to the default class
+       consumed  =
+               consume_self_surplus(surplus,p_cls,check_sl);
+
+       if (consumed > 0) {
+               set_meshare(&p_cls->stat,p_cls->stat.meshare + consumed);
+               total_consumed += consumed;
+       }
+
+       do {
+               child_core = ckrm_get_next_child(parent, child_core);
+               if (child_core)  {
+                       c_cls = ckrm_get_cpu_class(child_core);
+                       if (! c_cls)
+                               return -1;
+
+                       consumed    =
+                               consume_surplus(surplus, c_cls,
+                                                    p_cls,check_sl);
+                       if (consumed > 0) {
+                               set_eshare(&c_cls->stat,c_cls->stat.eshare + consumed);
+                               total_consumed += consumed;
+                       }
+               }
+       } while (child_core);
+
+       return total_consumed;
+}
+
 /**
  * alloc_surplus_node: re-allocate the shares for children under parent
  * @parent: parent node
@@ -512,80 +593,63 @@ static inline int node_surplus_consume(int surplus,
  */
 static int alloc_surplus_node(struct ckrm_core_class *parent)
 {
-       int total_surplus , old_surplus;
-       struct ckrm_cpu_class *p_cls = ckrm_get_cpu_class(parent);
-       struct ckrm_core_class *child_core = NULL;
-       int self_share;
+       struct ckrm_cpu_class *p_cls,*c_cls;
+       int total_surplus,consumed;
        int check_sl;
        int ret = -1;
+       struct ckrm_core_class *child_core = NULL;
 
+       p_cls = ckrm_get_cpu_class(parent);
        if (! p_cls)
-               return ret;
-
-       total_surplus = get_my_node_surplus(p_cls);
+               goto realloc_out;
 
        /*
-        * initialize effective_share
+        * get total surplus
         */
+       total_surplus = p_cls->stat.eshare - p_cls->stat.egrt;
+       BUG_ON(total_surplus < 0);
+       total_surplus += get_my_node_surplus(p_cls);
+
        do {
                child_core = ckrm_get_next_child(parent, child_core);
                if (child_core) {
-                       struct ckrm_cpu_class *c_cls;
-
                        c_cls = ckrm_get_cpu_class(child_core);                         
                        if (! c_cls)
-                               return ret; 
+                               goto realloc_out;
 
                        total_surplus += get_node_surplus(c_cls);
-
-                       set_eshare(&c_cls->stat, c_cls->stat.egrt);
                }
        } while (child_core);
 
-       if (! total_surplus)
+
+       if (! total_surplus) {
+               ret = 0;
                goto realloc_out;
+       }
 
-       /* distribute the surplus */
-       child_core = NULL;
+       /* 
+        * distributing the surplus 
+        * first with the check_sl enabled
+        * once all the tasks has research the soft limit, disable check_sl and try again
+        */
+       
        check_sl = 1;
-       old_surplus = 0;
        do {
-               if (!child_core) {//start a new round
+               consumed = alloc_surplus_single_round(total_surplus,parent,p_cls,check_sl);
+               if (consumed < 0) //something is wrong
+                       goto realloc_out;
 
-                       //ok, everybody reached the soft limit
-                       if (old_surplus == total_surplus) 
-                               check_sl = 0;
-                       old_surplus = total_surplus;
-               }
+               if (! consumed)
+                       check_sl = 0;
+               else
+                       total_surplus -= consumed;
 
-               child_core = ckrm_get_next_child(parent, child_core);
-               if (child_core)  {
-                       int consumed = 0;
-                       consumed -=
-                               node_surplus_consume(old_surplus, child_core,
-                                                    p_cls,check_sl);
-                       if (consumed >= 0) 
-                               total_surplus -= consumed;
-                       else
-                               return ret;     
-               }
-               //start a new round if something is allocated in the last round
-       } while (child_core || check_sl || total_surplus != old_surplus);
+       } while ((total_surplus > 0) && (consumed || check_sl) );
 
- realloc_out:
-       /*how much for itself*/
-       self_share = p_cls->stat.eshare *
-           p_cls->shares.unused_guarantee / p_cls->shares.total_guarantee;
-
-       if (self_share < p_cls->stat.max_demand) {
-               /*any remaining surplus goes to the default class*/
-               self_share += total_surplus;    
-               if (self_share > p_cls->stat.max_demand)
-                       self_share = p_cls->stat.max_demand;
-       }
+       ret = 0;
        
-       set_meshare(&p_cls->stat, self_share);
-       return 0;
+ realloc_out:
+       return ret;
 }
 
 /**
@@ -597,29 +661,27 @@ static int alloc_surplus_node(struct ckrm_core_class *parent)
 static int alloc_surplus(struct ckrm_core_class *root_core)
 {
        struct ckrm_core_class *cur_core, *child_core;
-       struct ckrm_cpu_class *cls;
+       //      struct ckrm_cpu_class *cls;
        int ret = -1;
 
        /*initialize*/
        cur_core = root_core;
        child_core = NULL;
-       cls = ckrm_get_cpu_class(cur_core);
-
-       //set root eshare
-       set_eshare(&cls->stat, cls->stat.egrt);
+       //      cls = ckrm_get_cpu_class(cur_core);
 
        /*the ckrm idle tasks get all what's remaining*/
        /*hzheng: uncomment the following like for hard limit support */
        //      update_ckrm_idle(CKRM_SHARE_MAX - cls->stat.max_demand);
        
     repeat:
+ repeat:
        //check exit
        if (!cur_core)
                return 0;
 
-       //visit this node
-       if ( alloc_surplus_node(cur_core) < 0 )
-               return ret;
+       //visit this node only once
+       if (! child_core) 
+               if ( alloc_surplus_node(cur_core) < 0 )
+                       return ret;
 
        //next child
        child_core = ckrm_get_next_child(cur_core, child_core);
@@ -708,7 +770,7 @@ static int ckrm_cpu_idled(void *nothing)
        /*similar to cpu_idle */
        while (1) {
                while (!need_resched()) {
-                       ckrm_cpu_monitor();
+                       ckrm_cpu_monitor(1);
                        if (current_cpu_data.hlt_works_ok) {
                                local_irq_disable();
                                if (!need_resched()) {
@@ -830,12 +892,13 @@ void adjust_local_weight(void)
 /**********************************************/
 /**
  *ckrm_cpu_monitor - adjust relative shares of the classes based on their progress
+ *@check_min: if check_min is set, the call can't be within 100ms of last call
  *
  * this function is called every CPU_MONITOR_INTERVAL
  * it computes the cpu demand of each class
  * and re-allocate the un-used shares to other classes
  */
-void ckrm_cpu_monitor(void)
+void ckrm_cpu_monitor(int check_min)
 {
        static spinlock_t lock = SPIN_LOCK_UNLOCKED; 
        static unsigned long long last_check = 0;
@@ -855,9 +918,9 @@ void ckrm_cpu_monitor(void)
        now = sched_clock();
 
        //consecutive check should be at least 100ms apart
-       if (now - last_check < MIN_CPU_MONITOR_INTERVAL) {
+       if (check_min && (now - last_check < MIN_CPU_MONITOR_INTERVAL))
                goto outunlock;
-       }
+
        last_check = now;
 
        if (update_effectives(root_core) != 0)
@@ -889,7 +952,7 @@ static int ckrm_cpu_monitord(void *nothing)
                /*sleep for sometime before next try*/
                set_current_state(TASK_INTERRUPTIBLE);
                schedule_timeout(CPU_MONITOR_INTERVAL);
-               ckrm_cpu_monitor();
+               ckrm_cpu_monitor(1);
                if (thread_exit) {
                        break;
                }
@@ -910,8 +973,6 @@ void ckrm_start_monitor(void)
 
 void ckrm_kill_monitor(void)
 {
-       // int interval = HZ;
-
        printk("killing process %d\n", cpu_monitor_pid);
        if (cpu_monitor_pid > 0) {
                thread_exit = 1;
index 9c653a3..1ca2611 100644 (file)
@@ -77,7 +77,7 @@ static inline void check_inactive_class(ckrm_lrq_t * lrq,CVT_t cur_cvt)
                lrq->savings -= savings_used;
                unscale_cvt(savings_used,lrq);
                BUG_ON(lrq->local_cvt < savings_used);
-               // lrq->local_cvt -= savings_used;
+               lrq->local_cvt -= savings_used;
        }               
 }