ckrm_E16rc1 cpu controller v7
authorMarc Fiuczynski <mef@cs.princeton.edu>
Mon, 27 Sep 2004 02:39:16 +0000 (02:39 +0000)
committerMarc Fiuczynski <mef@cs.princeton.edu>
Mon, 27 Sep 2004 02:39:16 +0000 (02:39 +0000)
include/linux/ckrm_sched.h
kernel/ckrm/ckrm_cpu_class.c
kernel/ckrm/ckrm_cpu_monitor.c
kernel/ckrm_sched.c
kernel/sched.c

index b3e180a..fc62d99 100644 (file)
@@ -117,7 +117,7 @@ struct ckrm_usage {
        unsigned long samples[USAGE_WINDOW_SIZE]; //record usages 
        unsigned long sample_pointer; //pointer for the sliding window
        unsigned long long last_ns; //ns for last sample
-       unsigned long long last_sample_jiffies; //in number of jiffies
+       long long last_sample_jiffies; //in number of jiffies
 };
 
 /*
@@ -169,19 +169,27 @@ static inline void ckrm_sample_usage(struct ckrm_cpu_class* clsptr)
        unsigned long long cur_sample;
        int duration = jiffies - usage->last_sample_jiffies;
 
-//     printk("\tckrm_sample_usage %ld %p: %lld\n",jiffies, clsptr,cur_sample);
+       //jiffies wasn't start from 0
+       //so it need to be properly handled
+       if (unlikely(!usage->last_sample_jiffies)) 
+               usage->last_sample_jiffies = jiffies;
 
+       //called too frequenctly
        if (duration < USAGE_SAMPLE_FREQ)
                return;
 
+       usage->last_sample_jiffies = jiffies;
+
        cur_sample = clsptr->stat.total_ns - usage->last_ns; 
+       usage->last_ns = clsptr->stat.total_ns;
+
        //scale it based on the sample duration
-       cur_sample *= ((duration << 10)/USAGE_SAMPLE_FREQ);
-       cur_sample >>= 10;
+       cur_sample *= ((USAGE_SAMPLE_FREQ<< 15)/duration);
+       cur_sample >>= 15;
+       usage->samples[usage->sample_pointer] = cur_sample;
+       //      printk("sample = %llu jiffies=%lu \n",cur_sample, jiffies);
 
-       usage->samples[usage->sample_pointer++] = cur_sample;
-       usage->last_sample_jiffies = jiffies;
-       usage->last_ns = clsptr->stat.total_ns;
+       usage->sample_pointer ++;
        if (usage->sample_pointer >= USAGE_WINDOW_SIZE)
                usage->sample_pointer = 0;
 }
@@ -208,7 +216,7 @@ static inline int get_ckrm_usage(struct ckrm_cpu_class* clsptr, int duration)
         total *= 100;
         do_div(total,nr_samples);
         do_div(total,NS_PER_SAMPLE);
-        // printk("percent %lld\n",total);
+       do_div(total,cpus_weight(cpu_online_map));
         return total;
 }
 
@@ -258,8 +266,32 @@ void adjust_local_weight(void);
 #define get_cls_local_stat(cls,cpu) (&(cls)->stat.local_stats[cpu])
 #define get_rq_local_stat(lrq,cpu) (get_cls_local_stat((lrq)->cpu_class,cpu))
 
-#define CLASS_QUANTIZER 22     //shift from ns to increase class bonus
-#define PRIORITY_QUANTIZER 0   //controls how much a high prio task can borrow
+/********************************************************************
+ * Parameters that determine how quickly CVT's progress and how
+ * priority can impact a LRQ's runqueue position. See also
+ * get_effective_prio(). These parameters need to adjusted
+ * in accordance to the following example and understanding.
+ * 
+ * CLASS_QUANTIZER:
+ * 
+ * A class with 5% share, can execute 50M nsecs / per sec ~ 2^28.
+ * It's share will be set to 512 = 2^9. The globl CLASSQUEUE_SIZE is set to 2^7.
+ * With CLASS_QUANTIZER=16, the local_cvt of this class will increase
+ * by 2^28/2^9 = 2^19 = 512K.
+ * Setting CLASS_QUANTIZER to 16, 2^(19-16) = 8 slots / per second.
+ * A class with 5% shares, will cover 80 slots / per second.
+ *
+ * PRIORITY_QUANTIZER:
+ *
+ * How much can top priorities of class impact slot bonus.
+ * There are 40 nice priorities. "2" will allow upto 10 slots improvement
+ * in the RQ thus for 50% class it can perform ~1sec starvation.
+ *
+ *******************************************************************/
+
+#define CLASS_QUANTIZER 16     //shift from ns to increase class bonus
+#define PRIORITY_QUANTIZER 2   //controls how much a high prio task can borrow
+
 #define CKRM_SHARE_ACCURACY 10
 #define NSEC_PER_MS 1000000
 #define NSEC_PER_JIFFIES (NSEC_PER_SEC/HZ)
@@ -504,15 +536,16 @@ void ckrm_load_sample(ckrm_load_t* ckrm_load,int cpu);
 long pid_get_pressure(ckrm_load_t* ckrm_load, int local_group);
 #define rq_ckrm_load(rq) (&((rq)->ckrm_load))
 
-static inline void ckrm_sched_tick(int j,int this_cpu,struct ckrm_load_struct* ckrm_load)
+static inline void ckrm_sched_tick(unsigned long j,int this_cpu,struct ckrm_load_struct* ckrm_load)
 {
        read_lock(&class_list_lock);
-
+       
 #ifdef CONFIG_SMP
        ckrm_load_sample(ckrm_load,this_cpu);
 #endif
 
-       if (!(j % CVT_UPDATE_TICK)) {
+       if (! (j % CVT_UPDATE_TICK)) {
+               //              printk("ckrm_sched j=%lu\n",j);
                classqueue_update_base(get_cpu_classqueue(this_cpu));
                update_class_cputime(this_cpu);
        }
index 2624a47..ad45380 100644 (file)
@@ -269,7 +269,7 @@ int ckrm_cpu_get_stats(void *my_res, struct seq_file * sfile)
                   );
        for_each_online_cpu(i) {
                lrq = get_ckrm_lrq(cls,i);              
-               seq_printf(sfile, "\tlrq %d demand= %lu weight= %d lrq_load= %lu cvt= %llu\n",i,stat->local_stats[i].cpu_demand,local_class_weight(lrq),lrq->lrq_load,lrq->local_cvt);
+               seq_printf(sfile, "\tlrq %d demand= %lu weight= %d lrq_load= %lu cvt= %llu sav=%lu\n",i,stat->local_stats[i].cpu_demand,local_class_weight(lrq),lrq->lrq_load,lrq->local_cvt,lrq->savings);
        }
 
        seq_printf(sfile, "-------- CPU Class Status END ---------\n");
index 70e155a..c83c83f 100644 (file)
@@ -884,14 +884,11 @@ static int thread_exit = 0;
 
 static int ckrm_cpu_monitord(void *nothing)
 {
-       wait_queue_head_t wait;
-
-       init_waitqueue_head(&wait);
-
        daemonize("ckrm_cpu_ctrld");
        for (;;) {
                /*sleep for sometime before next try*/
-               interruptible_sleep_on_timeout(&wait, CPU_MONITOR_INTERVAL);
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(CPU_MONITOR_INTERVAL);
                ckrm_cpu_monitor();
                if (thread_exit) {
                        break;
@@ -913,15 +910,14 @@ void ckrm_start_monitor(void)
 
 void ckrm_kill_monitor(void)
 {
-       wait_queue_head_t wait;
        int interval = HZ;
-       init_waitqueue_head(&wait);
 
        printk("killing process %d\n", cpu_monitor_pid);
        if (cpu_monitor_pid > 0) {
                thread_exit = 1;
                while (thread_exit != 2) {
-                       interruptible_sleep_on_timeout(&wait, interval);
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       schedule_timeout(CPU_MONITOR_INTERVAL);
                }
        }
 }
index e762b2d..9c653a3 100644 (file)
@@ -28,6 +28,80 @@ struct ckrm_cpu_class * get_default_cpu_class(void) {
 /*                CVT Management                       */
 /*******************************************************/
 
+static inline void check_inactive_class(ckrm_lrq_t * lrq,CVT_t cur_cvt)
+{
+       CVT_t min_cvt;
+       CVT_t bonus;
+
+       //just a safty measure
+       if (unlikely(! cur_cvt))
+               return; 
+
+       /*
+        * Always leaving a small bonus for inactive classes 
+        * allows them to compete for cycles immediately when the become
+        * active. This should improve interactive behavior
+        */
+       bonus = INTERACTIVE_BONUS(lrq);
+       //cvt can't be negative
+       if (cur_cvt > bonus)
+               min_cvt = cur_cvt - bonus;
+       else
+               min_cvt = 0;
+       
+       if (lrq->local_cvt < min_cvt) {
+               CVT_t lost_cvt;
+
+               lost_cvt = scale_cvt(min_cvt - lrq->local_cvt,lrq);
+               lrq->local_cvt = min_cvt;
+
+               /* add what the class lost to its savings*/
+               lrq->savings += lost_cvt;
+               if (lrq->savings > MAX_SAVINGS)
+                       lrq->savings = MAX_SAVINGS; 
+       } else if (lrq->savings) {
+               /*
+                *if a class saving and falling behind
+                * then start to use it saving in a leaking bucket way
+                */
+               CVT_t savings_used;
+
+               savings_used = scale_cvt((lrq->local_cvt - min_cvt),lrq);
+               if (savings_used > lrq->savings)
+                       savings_used = lrq->savings;
+               
+               if (savings_used > SAVINGS_LEAK_SPEED)
+                       savings_used = SAVINGS_LEAK_SPEED;
+
+               BUG_ON(lrq->savings < savings_used);
+               lrq->savings -= savings_used;
+               unscale_cvt(savings_used,lrq);
+               BUG_ON(lrq->local_cvt < savings_used);
+               // lrq->local_cvt -= savings_used;
+       }               
+}
+
+/*
+ * return the max_cvt of all the classes
+ */
+static inline CVT_t get_max_cvt(int this_cpu)
+{
+        struct ckrm_cpu_class *clsptr;
+        ckrm_lrq_t * lrq;
+        CVT_t max_cvt;
+
+        max_cvt = 0;
+
+        /*update class time, at the same time get max_cvt */
+        list_for_each_entry(clsptr, &active_cpu_classes, links) {
+                lrq = get_ckrm_lrq(clsptr, this_cpu);
+                if (lrq->local_cvt > max_cvt)
+                        max_cvt = lrq->local_cvt;
+        }
+
+       return max_cvt;
+}
+
 /**
  * update_class_cputime - updates cvt of inactive classes
  * -- an inactive class shouldn't starve others when it comes back
@@ -40,7 +114,7 @@ void update_class_cputime(int this_cpu)
 {
        struct ckrm_cpu_class *clsptr;
        ckrm_lrq_t * lrq;
-       CVT_t cur_cvt,min_cvt;
+       CVT_t cur_cvt;
 
        /*
         *  a class's local_cvt must not be significantly smaller than min_cvt 
@@ -55,11 +129,24 @@ void update_class_cputime(int this_cpu)
         *  starvation of other classes.
          *  
         */
-
-       // printk("update_class_cputime(%d)\n",this_cpu);
-
        cur_cvt = get_local_cur_cvt(this_cpu);
 
+       /*
+        * cur_cvt == 0 means the system is now idle
+        * in this case, we use max_cvt as cur_cvt
+        * max_cvt roughly represents the cvt of the class 
+        * that has just finished running
+        *
+        * fairness wouldn't be a problem since we account for whatever lost in savings
+        * if the system is not busy, the system responsiveness is not a problem.
+        * still fine if the sytem is busy, but happened to be idle at this certain point
+        * since bias toward interactive classes (class priority) is a more important way to improve system responsiveness
+        */
+       if (unlikely(! cur_cvt))  {
+               cur_cvt = get_max_cvt(this_cpu);
+               //return;
+       }
+
        /* 
         *  - check the local cvt of all the classes 
         *  - update total_ns received by the class
@@ -72,45 +159,9 @@ void update_class_cputime(int this_cpu)
                clsptr->stat.total_ns += lrq->uncounted_ns;
                ckrm_sample_usage(clsptr);
                spin_unlock(&clsptr->stat.stat_lock);
-
                lrq->uncounted_ns = 0;
 
-               /*
-                * Always leaving a small bonus for inactive classes 
-                * allows them to compete for cycles immediately when the become
-                * active. This should improve interactive behavior
-                */
-               min_cvt = cur_cvt - INTERACTIVE_BONUS(lrq);
-               
-               if (lrq->local_cvt < min_cvt) {
-                       CVT_t lost_cvt;
-
-                       lost_cvt = scale_cvt(min_cvt - lrq->local_cvt,lrq);
-                       lrq->local_cvt = min_cvt;
-
-                       /* add what the class lost to its savings*/
-                       lrq->savings += lost_cvt;
-                       if (lrq->savings > MAX_SAVINGS)
-                               lrq->savings = MAX_SAVINGS; 
-
-               } else if (lrq->savings) {
-                       /*
-                        *if a class saving and falling behind
-                        * then start to use it saving in a leaking bucket way
-                        */
-                       CVT_t savings_used;
-
-                       savings_used = scale_cvt((lrq->local_cvt - min_cvt),lrq);
-                       if (savings_used > lrq->savings)
-                               savings_used = lrq->savings;
-
-                       if (savings_used > SAVINGS_LEAK_SPEED)
-                               savings_used = SAVINGS_LEAK_SPEED;
-
-                       lrq->savings -= savings_used;
-                       unscale_cvt(savings_used,lrq);
-                       lrq->local_cvt -= savings_used;
-               }               
+               check_inactive_class(lrq,cur_cvt);              
        }
 }
 
index 148d1ac..85fb705 100644 (file)
  *  otherwise compare task priority 
  */
 #define TASK_PREEMPTS_CURR(p, rq) \
-       (((p)->cpu_class != (rq)->curr->cpu_class) && ((rq)->curr != (rq)->idle))? class_preempts_curr((p),(rq)->curr) : ((p)->prio < (rq)->curr->prio)
-
+       ( ((p)->cpu_class != (rq)->curr->cpu_class) \
+         && ((rq)->curr != (rq)->idle) && ((p) != (rq)->idle )) \
+         ? class_preempts_curr((p),(rq)->curr)  \
+         : ((p)->prio < (rq)->curr->prio)
 #else
-
 #define TASK_PREEMPTS_CURR(p, rq) \
        ((p)->prio < (rq)->curr->prio)
 #endif
@@ -2568,7 +2569,7 @@ void scheduler_tick(int user_ticks, int sys_ticks)
                        cpustat->idle += sys_ticks;
                if (wake_priority_sleeper(rq))
                        goto out;
-//will break   ckrm_sched_tick(jiffies,cpu,rq_ckrm_load(rq));
+               ckrm_sched_tick(jiffies,cpu,rq_ckrm_load(rq));
                rebalance_tick(cpu, rq, IDLE);
                return;
        }