From 539b61e8a29c8630e352afa21a2acd482c30edf1 Mon Sep 17 00:00:00 2001
From: Marc Fiuczynski <mef@cs.princeton.edu>
Date: Mon, 27 Sep 2004 02:39:16 +0000
Subject: [PATCH] ckrm_E16rc1 cpu controller v7

---
 include/linux/ckrm_sched.h     |  59 +++++++++++----
 kernel/ckrm/ckrm_cpu_class.c   |   2 +-
 kernel/ckrm/ckrm_cpu_monitor.c |  12 +--
 kernel/ckrm_sched.c            | 133 +++++++++++++++++++++++----------
 kernel/sched.c                 |   9 ++-
 5 files changed, 148 insertions(+), 67 deletions(-)

diff --git a/include/linux/ckrm_sched.h b/include/linux/ckrm_sched.h
index b3e180a5e..fc62d99cd 100644
--- a/include/linux/ckrm_sched.h
+++ b/include/linux/ckrm_sched.h
@@ -117,7 +117,7 @@ struct ckrm_usage {
 	unsigned long samples[USAGE_WINDOW_SIZE]; //record usages 
 	unsigned long sample_pointer; //pointer for the sliding window
 	unsigned long long last_ns; //ns for last sample
-	unsigned long long last_sample_jiffies; //in number of jiffies
+	long long last_sample_jiffies; //in number of jiffies
 };
 
 /*
@@ -169,19 +169,27 @@ static inline void ckrm_sample_usage(struct ckrm_cpu_class* clsptr)
 	unsigned long long cur_sample;
 	int duration = jiffies - usage->last_sample_jiffies;
 
-//	printk("\tckrm_sample_usage %ld %p: %lld\n",jiffies, clsptr,cur_sample);
+	//jiffies wasn't start from 0
+	//so it need to be properly handled
+	if (unlikely(!usage->last_sample_jiffies)) 
+		usage->last_sample_jiffies = jiffies;
 
+	//called too frequenctly
 	if (duration < USAGE_SAMPLE_FREQ)
 		return;
 
+	usage->last_sample_jiffies = jiffies;
+
 	cur_sample = clsptr->stat.total_ns - usage->last_ns; 
+	usage->last_ns = clsptr->stat.total_ns;
+
 	//scale it based on the sample duration
-	cur_sample *= ((duration << 10)/USAGE_SAMPLE_FREQ);
-	cur_sample >>= 10;
+	cur_sample *= ((USAGE_SAMPLE_FREQ<< 15)/duration);
+	cur_sample >>= 15;
+	usage->samples[usage->sample_pointer] = cur_sample;
+	//	printk("sample = %llu jiffies=%lu \n",cur_sample, jiffies);
 
-	usage->samples[usage->sample_pointer++] = cur_sample;
-	usage->last_sample_jiffies = jiffies;
-	usage->last_ns = clsptr->stat.total_ns;
+	usage->sample_pointer ++;
 	if (usage->sample_pointer >= USAGE_WINDOW_SIZE)
 		usage->sample_pointer = 0;
 }
@@ -208,7 +216,7 @@ static inline int get_ckrm_usage(struct ckrm_cpu_class* clsptr, int duration)
         total *= 100;
         do_div(total,nr_samples);
         do_div(total,NS_PER_SAMPLE);
-        // printk("percent %lld\n",total);
+	do_div(total,cpus_weight(cpu_online_map));
         return total;
 }
 
@@ -258,8 +266,32 @@ void adjust_local_weight(void);
 #define get_cls_local_stat(cls,cpu) (&(cls)->stat.local_stats[cpu])
 #define get_rq_local_stat(lrq,cpu) (get_cls_local_stat((lrq)->cpu_class,cpu))
 
-#define CLASS_QUANTIZER 22	//shift from ns to increase class bonus
-#define PRIORITY_QUANTIZER 0	//controls how much a high prio task can borrow
+/********************************************************************
+ * Parameters that determine how quickly CVT's progress and how
+ * priority can impact a LRQ's runqueue position. See also
+ * get_effective_prio(). These parameters need to adjusted
+ * in accordance to the following example and understanding.
+ * 
+ * CLASS_QUANTIZER:
+ * 
+ * A class with 5% share, can execute 50M nsecs / per sec ~ 2^28.
+ * It's share will be set to 512 = 2^9. The globl CLASSQUEUE_SIZE is set to 2^7.
+ * With CLASS_QUANTIZER=16, the local_cvt of this class will increase
+ * by 2^28/2^9 = 2^19 = 512K.
+ * Setting CLASS_QUANTIZER to 16, 2^(19-16) = 8 slots / per second.
+ * A class with 5% shares, will cover 80 slots / per second.
+ *
+ * PRIORITY_QUANTIZER:
+ *
+ * How much can top priorities of class impact slot bonus.
+ * There are 40 nice priorities. "2" will allow upto 10 slots improvement
+ * in the RQ thus for 50% class it can perform ~1sec starvation.
+ *
+ *******************************************************************/
+
+#define CLASS_QUANTIZER 16 	//shift from ns to increase class bonus
+#define PRIORITY_QUANTIZER 2	//controls how much a high prio task can borrow
+
 #define CKRM_SHARE_ACCURACY 10
 #define NSEC_PER_MS 1000000
 #define NSEC_PER_JIFFIES (NSEC_PER_SEC/HZ)
@@ -504,15 +536,16 @@ void ckrm_load_sample(ckrm_load_t* ckrm_load,int cpu);
 long pid_get_pressure(ckrm_load_t* ckrm_load, int local_group);
 #define rq_ckrm_load(rq) (&((rq)->ckrm_load))
 
-static inline void ckrm_sched_tick(int j,int this_cpu,struct ckrm_load_struct* ckrm_load)
+static inline void ckrm_sched_tick(unsigned long j,int this_cpu,struct ckrm_load_struct* ckrm_load)
 {
 	read_lock(&class_list_lock);
-
+       
 #ifdef CONFIG_SMP
 	ckrm_load_sample(ckrm_load,this_cpu);
 #endif
 
-	if (!(j % CVT_UPDATE_TICK)) {
+	if (! (j % CVT_UPDATE_TICK)) {
+		//		printk("ckrm_sched j=%lu\n",j);
 		classqueue_update_base(get_cpu_classqueue(this_cpu));
 		update_class_cputime(this_cpu);
 	}
diff --git a/kernel/ckrm/ckrm_cpu_class.c b/kernel/ckrm/ckrm_cpu_class.c
index 2624a4797..ad45380ee 100644
--- a/kernel/ckrm/ckrm_cpu_class.c
+++ b/kernel/ckrm/ckrm_cpu_class.c
@@ -269,7 +269,7 @@ int ckrm_cpu_get_stats(void *my_res, struct seq_file * sfile)
 		   );
 	for_each_online_cpu(i) {
 		lrq = get_ckrm_lrq(cls,i);		
-		seq_printf(sfile, "\tlrq %d demand= %lu weight= %d lrq_load= %lu cvt= %llu\n",i,stat->local_stats[i].cpu_demand,local_class_weight(lrq),lrq->lrq_load,lrq->local_cvt);
+		seq_printf(sfile, "\tlrq %d demand= %lu weight= %d lrq_load= %lu cvt= %llu sav=%lu\n",i,stat->local_stats[i].cpu_demand,local_class_weight(lrq),lrq->lrq_load,lrq->local_cvt,lrq->savings);
 	}
 
 	seq_printf(sfile, "-------- CPU Class Status END ---------\n");
diff --git a/kernel/ckrm/ckrm_cpu_monitor.c b/kernel/ckrm/ckrm_cpu_monitor.c
index 70e155a79..c83c83fca 100644
--- a/kernel/ckrm/ckrm_cpu_monitor.c
+++ b/kernel/ckrm/ckrm_cpu_monitor.c
@@ -884,14 +884,11 @@ static int thread_exit = 0;
 
 static int ckrm_cpu_monitord(void *nothing)
 {
-	wait_queue_head_t wait;
-
-	init_waitqueue_head(&wait);
-
 	daemonize("ckrm_cpu_ctrld");
 	for (;;) {
 		/*sleep for sometime before next try*/
-		interruptible_sleep_on_timeout(&wait, CPU_MONITOR_INTERVAL);
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(CPU_MONITOR_INTERVAL);
 		ckrm_cpu_monitor();
 		if (thread_exit) {
 			break;
@@ -913,15 +910,14 @@ void ckrm_start_monitor(void)
 
 void ckrm_kill_monitor(void)
 {
-	wait_queue_head_t wait;
 	int interval = HZ;
-	init_waitqueue_head(&wait);
 
 	printk("killing process %d\n", cpu_monitor_pid);
 	if (cpu_monitor_pid > 0) {
 		thread_exit = 1;
 		while (thread_exit != 2) {
-			interruptible_sleep_on_timeout(&wait, interval);
+			set_current_state(TASK_INTERRUPTIBLE);
+			schedule_timeout(CPU_MONITOR_INTERVAL);
 		}
 	}
 }
diff --git a/kernel/ckrm_sched.c b/kernel/ckrm_sched.c
index e762b2d7c..9c653a3b6 100644
--- a/kernel/ckrm_sched.c
+++ b/kernel/ckrm_sched.c
@@ -28,6 +28,80 @@ struct ckrm_cpu_class * get_default_cpu_class(void) {
 /*                CVT Management                       */
 /*******************************************************/
 
+static inline void check_inactive_class(ckrm_lrq_t * lrq,CVT_t cur_cvt)
+{
+	CVT_t min_cvt;
+	CVT_t bonus;
+
+	//just a safty measure
+	if (unlikely(! cur_cvt))
+		return; 
+
+	/*
+	 * Always leaving a small bonus for inactive classes 
+	 * allows them to compete for cycles immediately when the become
+	 * active. This should improve interactive behavior
+	 */
+	bonus = INTERACTIVE_BONUS(lrq);
+	//cvt can't be negative
+	if (cur_cvt > bonus)
+		min_cvt = cur_cvt - bonus;
+	else
+		min_cvt = 0;
+	
+	if (lrq->local_cvt < min_cvt) {
+		CVT_t lost_cvt;
+
+		lost_cvt = scale_cvt(min_cvt - lrq->local_cvt,lrq);
+		lrq->local_cvt = min_cvt;
+
+		/* add what the class lost to its savings*/
+		lrq->savings += lost_cvt;
+		if (lrq->savings > MAX_SAVINGS)
+			lrq->savings = MAX_SAVINGS; 
+	} else if (lrq->savings) {
+		/*
+		 *if a class saving and falling behind
+		 * then start to use it saving in a leaking bucket way
+		 */
+		CVT_t savings_used;
+
+		savings_used = scale_cvt((lrq->local_cvt - min_cvt),lrq);
+		if (savings_used > lrq->savings)
+			savings_used = lrq->savings;
+		
+		if (savings_used > SAVINGS_LEAK_SPEED)
+			savings_used = SAVINGS_LEAK_SPEED;
+
+		BUG_ON(lrq->savings < savings_used);
+		lrq->savings -= savings_used;
+		unscale_cvt(savings_used,lrq);
+		BUG_ON(lrq->local_cvt < savings_used);
+		// lrq->local_cvt -= savings_used;
+	}		
+}
+
+/*
+ * return the max_cvt of all the classes
+ */
+static inline CVT_t get_max_cvt(int this_cpu)
+{
+        struct ckrm_cpu_class *clsptr;
+        ckrm_lrq_t * lrq;
+        CVT_t max_cvt;
+
+        max_cvt = 0;
+
+        /*update class time, at the same time get max_cvt */
+        list_for_each_entry(clsptr, &active_cpu_classes, links) {
+                lrq = get_ckrm_lrq(clsptr, this_cpu);
+                if (lrq->local_cvt > max_cvt)
+                        max_cvt = lrq->local_cvt;
+        }
+
+	return max_cvt;
+}
+
 /**
  * update_class_cputime - updates cvt of inactive classes
  * -- an inactive class shouldn't starve others when it comes back
@@ -40,7 +114,7 @@ void update_class_cputime(int this_cpu)
 {
 	struct ckrm_cpu_class *clsptr;
 	ckrm_lrq_t * lrq;
-	CVT_t cur_cvt,min_cvt;
+	CVT_t cur_cvt;
 
 	/*
 	 *  a class's local_cvt must not be significantly smaller than min_cvt 
@@ -55,11 +129,24 @@ void update_class_cputime(int this_cpu)
 	 *  starvation of other classes.
          *  
 	 */
-
-	// printk("update_class_cputime(%d)\n",this_cpu);
-
 	cur_cvt = get_local_cur_cvt(this_cpu);
 
+	/*
+	 * cur_cvt == 0 means the system is now idle
+	 * in this case, we use max_cvt as cur_cvt
+	 * max_cvt roughly represents the cvt of the class 
+	 * that has just finished running
+	 *
+	 * fairness wouldn't be a problem since we account for whatever lost in savings
+	 * if the system is not busy, the system responsiveness is not a problem.
+	 * still fine if the sytem is busy, but happened to be idle at this certain point
+	 * since bias toward interactive classes (class priority) is a more important way to improve system responsiveness
+	 */
+	if (unlikely(! cur_cvt))  {
+		cur_cvt = get_max_cvt(this_cpu);
+		//return;
+	}
+
 	/* 
 	 *  - check the local cvt of all the classes 
 	 *  - update total_ns received by the class
@@ -72,45 +159,9 @@ void update_class_cputime(int this_cpu)
 		clsptr->stat.total_ns += lrq->uncounted_ns;
 		ckrm_sample_usage(clsptr);
 		spin_unlock(&clsptr->stat.stat_lock);
-
 		lrq->uncounted_ns = 0;
 
-		/*
-		 * Always leaving a small bonus for inactive classes 
-		 * allows them to compete for cycles immediately when the become
-		 * active. This should improve interactive behavior
-		 */
-		min_cvt = cur_cvt - INTERACTIVE_BONUS(lrq);
-		
-		if (lrq->local_cvt < min_cvt) {
-			CVT_t lost_cvt;
-
-			lost_cvt = scale_cvt(min_cvt - lrq->local_cvt,lrq);
-			lrq->local_cvt = min_cvt;
-
-			/* add what the class lost to its savings*/
-			lrq->savings += lost_cvt;
-			if (lrq->savings > MAX_SAVINGS)
-				lrq->savings = MAX_SAVINGS; 
-
-		} else if (lrq->savings) {
-			/*
-			 *if a class saving and falling behind
-			 * then start to use it saving in a leaking bucket way
-			 */
-			CVT_t savings_used;
-
-			savings_used = scale_cvt((lrq->local_cvt - min_cvt),lrq);
-			if (savings_used > lrq->savings)
-				savings_used = lrq->savings;
-
-			if (savings_used > SAVINGS_LEAK_SPEED)
-				savings_used = SAVINGS_LEAK_SPEED;
-
-			lrq->savings -= savings_used;
-			unscale_cvt(savings_used,lrq);
-			lrq->local_cvt -= savings_used;
-		}		
+		check_inactive_class(lrq,cur_cvt);		
 	}
 }
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 148d1ac9b..85fb705c1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -162,10 +162,11 @@
  *  otherwise compare task priority 
  */
 #define TASK_PREEMPTS_CURR(p, rq) \
-	(((p)->cpu_class != (rq)->curr->cpu_class) && ((rq)->curr != (rq)->idle))? class_preempts_curr((p),(rq)->curr) : ((p)->prio < (rq)->curr->prio)
-
+	( ((p)->cpu_class != (rq)->curr->cpu_class) \
+	  && ((rq)->curr != (rq)->idle) && ((p) != (rq)->idle )) \
+	  ? class_preempts_curr((p),(rq)->curr)  \
+	  : ((p)->prio < (rq)->curr->prio)
 #else
-
 #define TASK_PREEMPTS_CURR(p, rq) \
 	((p)->prio < (rq)->curr->prio)
 #endif
@@ -2568,7 +2569,7 @@ void scheduler_tick(int user_ticks, int sys_ticks)
 			cpustat->idle += sys_ticks;
 		if (wake_priority_sleeper(rq))
 			goto out;
-//will break	ckrm_sched_tick(jiffies,cpu,rq_ckrm_load(rq));
+		ckrm_sched_tick(jiffies,cpu,rq_ckrm_load(rq));
 		rebalance_tick(cpu, rq, IDLE);
 		return;
 	}
-- 
2.47.0