*
* initialized to be 0
* a class can't accumulate more than SAVING_THRESHOLD of savings
- * savings are kept in normalized form (like cvt)
- * so when task share change the savings should be scaled accordingly
*/
unsigned long long savings;
#define CPU_DEMAND_INIT 3
/*functions exported by ckrm_cpu_monitor.c*/
-void ckrm_cpu_monitor(void);
+void ckrm_cpu_monitor(int check_min);
int ckrm_cpu_monitor_init(void);
void ckrm_cpu_stat_init(struct ckrm_cpu_class_stat *stat);
void cpu_demand_event(struct ckrm_cpu_demand_stat* local_stat, int event, unsigned long long len);
*
* CLASS_QUANTIZER:
*
- * A class with 5% share, can execute 50M nsecs / per sec ~ 2^28.
+ * A class with 50% share, can execute 500 ms / per sec ~ 2^29 ns.
* It's share will be set to 512 = 2^9. The globl CLASSQUEUE_SIZE is set to 2^7.
* With CLASS_QUANTIZER=16, the local_cvt of this class will increase
- * by 2^28/2^9 = 2^19 = 512K.
- * Setting CLASS_QUANTIZER to 16, 2^(19-16) = 8 slots / per second.
- * A class with 5% shares, will cover 80 slots / per second.
+ * by 2^29/2^9 = 2^20 = 1024K.
+ * Setting CLASS_QUANTIZER to 16, 2^(20-16) = 16 slots / per second.
+ * Do the same math, a class with any share value, will cover 16 slots / per second.
+ * So 2^8 total slots is good track for 8 seconds of system execution
*
* PRIORITY_QUANTIZER:
*
* How much can top priorities of class impact slot bonus.
- * There are 40 nice priorities. "2" will allow upto 10 slots improvement
- * in the RQ thus for 50% class it can perform ~1sec starvation.
+ * There are 40 nice priorities, range from -20 to 19, with default nice = 0
+ * "2" will allow upto 5 slots improvement
+ * when certain task within the class has a nice value of -20
+ * in the RQ thus for 50% class it can perform ~300 msec starvation.
*
*******************************************************************/
/*
* to improve system responsiveness
* an inactive class is put a little bit ahead of the current class when it wakes up
- * the amount is set in normalized termis to simplify the calculation
+ * the amount is set in normalized term to simplify the calculation
* for class with 100% share, it can be 2s ahead
* while for class with 10% share, it can be 200ms ahead
*/
c_cls->stat.ehl *
get_myhard_limit(c_cls) / c_cls->shares.total_guarantee;
+ set_eshare(&c_cls->stat,c_cls->stat.egrt);
+ set_meshare(&c_cls->stat,c_cls->stat.megrt);
+
+
child_core = ckrm_get_next_child(parent, child_core);
};
return 0;
/ cls->shares.total_guarantee;
cls->stat.mehl = cls->stat.ehl * get_myhard_limit(cls)
/ cls->shares.total_guarantee;
-
+ set_eshare(&cls->stat,cls->stat.egrt);
+ set_meshare(&cls->stat,cls->stat.megrt);
+
repeat:
//check exit
if (!cur_core)
return 0;
- //visit this node
- if (update_child_effective(cur_core) < 0)
- return ret; //invalid cur_core node
+ //visit this node only once
+ if (! child_core)
+ if (update_child_effective(cur_core) < 0)
+ return ret; //invalid cur_core node
//next child
child_core = ckrm_get_next_child(cur_core, child_core);
}
/**
- * node_surplus_consume: consume the surplus
- * @ckeck_sl: if check_sl is set, then check soft_limit
- * @total_grt: total guarantee
+ * consume_surplus: decides how much surplus a node can consume
+ * @ckeck_sl: if check_sl is set, then check soft_limitx
* return how much consumed
- * return -1 on error
*
* implements all the CKRM Scheduling Requirement
- * update total_grt if necessary
+ * assume c_cls is valid
*/
-static inline int node_surplus_consume(int surplus,
- struct ckrm_core_class *child_core,
+static inline int consume_surplus(int surplus,
+ struct ckrm_cpu_class *c_cls,
struct ckrm_cpu_class *p_cls,
int check_sl
)
{
int consumed = 0;
int inc_limit;
- int glut = 1;
-
- struct ckrm_cpu_class *c_cls = ckrm_get_cpu_class(child_core);
int total_grt = p_cls->shares.total_guarantee;
BUG_ON(surplus < 0);
- if (! c_cls || ! total_grt)
- goto out;
-
/*can't consume more than demand or hard limit*/
if (c_cls->stat.eshare >= c_cls->stat.max_demand)
goto out;
+ //the surplus allocation is propotional to grt
consumed =
surplus * c_cls->shares.my_guarantee / total_grt;
if (check_sl) {
int esl = p_cls->stat.eshare * get_soft_limit(c_cls)
- /p_cls->shares.total_guarantee;
+ /total_grt;
if (esl < c_cls->stat.max_demand)
inc_limit = esl - c_cls->stat.eshare;
}
-
if (consumed > inc_limit)
consumed = inc_limit;
- else
- glut = 0;
BUG_ON(consumed < 0);
- set_eshare(&c_cls->stat,c_cls->stat.eshare + consumed);
- BUG_ON(c_cls->stat.eshare < 0);
+ out:
+ return consumed;
+}
+
+/*
+ * how much a node can consume for itself?
+ */
+static inline int consume_self_surplus(int surplus,
+ struct ckrm_cpu_class *p_cls,
+ int check_sl
+ )
+{
+ int consumed = 0;
+ int inc_limit;
+ int total_grt = p_cls->shares.total_guarantee;
+ int max_demand = get_mmax_demand(&p_cls->stat);
+
+ BUG_ON(surplus < 0);
+ /*can't consume more than demand or hard limit*/
+ if (p_cls->stat.meshare >= max_demand)
+ goto out;
+
+ //the surplus allocation is propotional to grt
+ consumed =
+ surplus * p_cls->shares.unused_guarantee / total_grt;
+
+ if (! consumed) //no more share
+ goto out;
+
+ //hard limit and demand limit
+ inc_limit = max_demand - p_cls->stat.meshare;
+
+ if (check_sl) {
+ int mesl = p_cls->stat.eshare * get_mysoft_limit(p_cls)
+ /total_grt;
+ if (mesl < max_demand)
+ inc_limit = mesl - p_cls->stat.meshare;
+ }
+
+ if (consumed > inc_limit)
+ consumed = inc_limit;
+
+ BUG_ON(consumed < 0);
out:
return consumed;
}
+
+/*
+ * allocate surplus to all its children and also its default class
+ */
+static int alloc_surplus_single_round(
+ int surplus,
+ struct ckrm_core_class *parent,
+ struct ckrm_cpu_class *p_cls,
+ int check_sl)
+{
+ struct ckrm_cpu_class *c_cls;
+ struct ckrm_core_class *child_core = NULL;
+ int total_consumed = 0,consumed;
+
+ //first allocate to the default class
+ consumed =
+ consume_self_surplus(surplus,p_cls,check_sl);
+
+ if (consumed > 0) {
+ set_meshare(&p_cls->stat,p_cls->stat.meshare + consumed);
+ total_consumed += consumed;
+ }
+
+ do {
+ child_core = ckrm_get_next_child(parent, child_core);
+ if (child_core) {
+ c_cls = ckrm_get_cpu_class(child_core);
+ if (! c_cls)
+ return -1;
+
+ consumed =
+ consume_surplus(surplus, c_cls,
+ p_cls,check_sl);
+ if (consumed > 0) {
+ set_eshare(&c_cls->stat,c_cls->stat.eshare + consumed);
+ total_consumed += consumed;
+ }
+ }
+ } while (child_core);
+
+ return total_consumed;
+}
+
/**
* alloc_surplus_node: re-allocate the shares for children under parent
* @parent: parent node
*/
static int alloc_surplus_node(struct ckrm_core_class *parent)
{
- int total_surplus , old_surplus;
- struct ckrm_cpu_class *p_cls = ckrm_get_cpu_class(parent);
- struct ckrm_core_class *child_core = NULL;
- int self_share;
+ struct ckrm_cpu_class *p_cls,*c_cls;
+ int total_surplus,consumed;
int check_sl;
int ret = -1;
+ struct ckrm_core_class *child_core = NULL;
+ p_cls = ckrm_get_cpu_class(parent);
if (! p_cls)
- return ret;
-
- total_surplus = get_my_node_surplus(p_cls);
+ goto realloc_out;
/*
- * initialize effective_share
+ * get total surplus
*/
+ total_surplus = p_cls->stat.eshare - p_cls->stat.egrt;
+ BUG_ON(total_surplus < 0);
+ total_surplus += get_my_node_surplus(p_cls);
+
do {
child_core = ckrm_get_next_child(parent, child_core);
if (child_core) {
- struct ckrm_cpu_class *c_cls;
-
c_cls = ckrm_get_cpu_class(child_core);
if (! c_cls)
- return ret;
+ goto realloc_out;
total_surplus += get_node_surplus(c_cls);
-
- set_eshare(&c_cls->stat, c_cls->stat.egrt);
}
} while (child_core);
- if (! total_surplus)
+
+ if (! total_surplus) {
+ ret = 0;
goto realloc_out;
+ }
- /* distribute the surplus */
- child_core = NULL;
+ /*
+ * distributing the surplus
+ * first with the check_sl enabled
+ * once all the tasks has research the soft limit, disable check_sl and try again
+ */
+
check_sl = 1;
- old_surplus = 0;
do {
- if (!child_core) {//start a new round
+ consumed = alloc_surplus_single_round(total_surplus,parent,p_cls,check_sl);
+ if (consumed < 0) //something is wrong
+ goto realloc_out;
- //ok, everybody reached the soft limit
- if (old_surplus == total_surplus)
- check_sl = 0;
- old_surplus = total_surplus;
- }
+ if (! consumed)
+ check_sl = 0;
+ else
+ total_surplus -= consumed;
- child_core = ckrm_get_next_child(parent, child_core);
- if (child_core) {
- int consumed = 0;
- consumed -=
- node_surplus_consume(old_surplus, child_core,
- p_cls,check_sl);
- if (consumed >= 0)
- total_surplus -= consumed;
- else
- return ret;
- }
- //start a new round if something is allocated in the last round
- } while (child_core || check_sl || total_surplus != old_surplus);
+ } while ((total_surplus > 0) && (consumed || check_sl) );
- realloc_out:
- /*how much for itself*/
- self_share = p_cls->stat.eshare *
- p_cls->shares.unused_guarantee / p_cls->shares.total_guarantee;
-
- if (self_share < p_cls->stat.max_demand) {
- /*any remaining surplus goes to the default class*/
- self_share += total_surplus;
- if (self_share > p_cls->stat.max_demand)
- self_share = p_cls->stat.max_demand;
- }
+ ret = 0;
- set_meshare(&p_cls->stat, self_share);
- return 0;
+ realloc_out:
+ return ret;
}
/**
static int alloc_surplus(struct ckrm_core_class *root_core)
{
struct ckrm_core_class *cur_core, *child_core;
- struct ckrm_cpu_class *cls;
+ // struct ckrm_cpu_class *cls;
int ret = -1;
/*initialize*/
cur_core = root_core;
child_core = NULL;
- cls = ckrm_get_cpu_class(cur_core);
-
- //set root eshare
- set_eshare(&cls->stat, cls->stat.egrt);
+ // cls = ckrm_get_cpu_class(cur_core);
/*the ckrm idle tasks get all what's remaining*/
/*hzheng: uncomment the following like for hard limit support */
// update_ckrm_idle(CKRM_SHARE_MAX - cls->stat.max_demand);
- repeat:
+ repeat:
//check exit
if (!cur_core)
return 0;
- //visit this node
- if ( alloc_surplus_node(cur_core) < 0 )
- return ret;
+ //visit this node only once
+ if (! child_core)
+ if ( alloc_surplus_node(cur_core) < 0 )
+ return ret;
//next child
child_core = ckrm_get_next_child(cur_core, child_core);
/*similar to cpu_idle */
while (1) {
while (!need_resched()) {
- ckrm_cpu_monitor();
+ ckrm_cpu_monitor(1);
if (current_cpu_data.hlt_works_ok) {
local_irq_disable();
if (!need_resched()) {
/**********************************************/
/**
*ckrm_cpu_monitor - adjust relative shares of the classes based on their progress
+ *@check_min: if check_min is set, the call can't be within 100ms of last call
*
* this function is called every CPU_MONITOR_INTERVAL
* it computes the cpu demand of each class
* and re-allocate the un-used shares to other classes
*/
-void ckrm_cpu_monitor(void)
+void ckrm_cpu_monitor(int check_min)
{
static spinlock_t lock = SPIN_LOCK_UNLOCKED;
static unsigned long long last_check = 0;
now = sched_clock();
//consecutive check should be at least 100ms apart
- if (now - last_check < MIN_CPU_MONITOR_INTERVAL) {
+ if (check_min && (now - last_check < MIN_CPU_MONITOR_INTERVAL))
goto outunlock;
- }
+
last_check = now;
if (update_effectives(root_core) != 0)
/*sleep for sometime before next try*/
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(CPU_MONITOR_INTERVAL);
- ckrm_cpu_monitor();
+ ckrm_cpu_monitor(1);
if (thread_exit) {
break;
}
void ckrm_kill_monitor(void)
{
- // int interval = HZ;
-
printk("killing process %d\n", cpu_monitor_pid);
if (cpu_monitor_pid > 0) {
thread_exit = 1;