1 /* ckrm_cpu_monitor.c - Hierarchical CKRM CPU Resource Monitor
3 * Copyright (C) Haoqiang Zheng, IBM Corp. 2004
4 * (C) Hubertus Franke, IBM Corp. 2004
6 * Latest version, more details at http://ckrm.sf.net
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
17 * 23 June 2004: Created
20 #include <linux/module.h>
21 #include <linux/init.h>
22 #include <asm/errno.h>
23 #include <linux/list.h>
24 #include <linux/spinlock.h>
25 #include <linux/ckrm.h>
26 #include <linux/ckrm_rc.h>
27 #include <linux/ckrm_tc.h>
28 #include <asm/div64.h>
29 #include <linux/ckrm_sched.h>
31 // #define CONFIG_CKRM_SUPPORT_MAXLIMITS
33 #define CPU_MONITOR_INTERVAL (HZ) /*how often do we adjust the shares*/
35 #define CKRM_CPU_DEMAND_RUN 0
36 #define CKRM_CPU_DEMAND_SLEEP 1
37 //sample task cpu demand every 32ms
38 #define CPU_DEMAND_TASK_RECALC ( 32*1000*1000LL)
39 #define CPU_DEMAND_CLASS_RECALC (256*1000*1000LL)
40 #define CPU_DEMAND_TP_CLASS 0
41 #define CPU_DEMAND_TP_TASK 1
43 static void update_ckrm_idle(unsigned long surplus);
45 void cpu_demand_check_sleep(struct ckrm_cpu_class_stat *stat, int cpu);
46 int alloc_surplus(struct ckrm_core_class *root_core);
47 extern struct ckrm_cpu_class *ckrm_get_cpu_class(struct ckrm_core_class *core);
49 /*interface to share definition*/
50 static inline int get_my_grt(struct ckrm_cpu_class *cls)
52 return cls->shares.unused_guarantee;
55 static inline int get_soft_limit(struct ckrm_cpu_class *cls)
57 return cls->shares.my_limit;
60 static inline int get_mysoft_limit(struct ckrm_cpu_class *cls)
62 return cls->shares.total_guarantee;
65 static inline int get_hard_limit(struct ckrm_cpu_class *cls)
67 return cls->shares.total_guarantee;
70 static inline int get_myhard_limit(struct ckrm_cpu_class *cls)
72 return cls->shares.total_guarantee;
75 static inline void set_eshare(struct ckrm_cpu_class_stat *stat,
81 BUG_ON(new_share < 0);
82 stat->eshare = new_share;
85 static inline void set_meshare(struct ckrm_cpu_class_stat *stat,
91 BUG_ON(new_share < 0);
92 stat->meshare = new_share;
96 *get_self_cpu_demand - get cpu demand of the class itself (excluding children)
98 * self_cpu_demand = sum(cpu demand of all local queues)
100 static inline unsigned long get_self_cpu_demand(struct ckrm_cpu_class_stat *stat)
106 for_each_online_cpu(i) {
107 cpu_demand_check_sleep(stat,i);
108 cpu_demand += stat->local_stats[i].cpu_demand;
112 return (cpu_demand/cpuonline);
116 * my max demand = min(cpu_demand, my effective hard limit)
118 static inline unsigned long get_mmax_demand(struct ckrm_cpu_class_stat* stat)
120 unsigned long mmax_demand = get_self_cpu_demand(stat);
121 if (mmax_demand > stat->mehl)
122 mmax_demand = stat->mehl;
127 static inline void cpu_demand_stat_init(struct ckrm_cpu_demand_stat* local_stat, int type)
129 unsigned long long now = sched_clock();
132 local_stat->total = 0;
133 local_stat->last_sleep = now;
135 case CPU_DEMAND_TP_CLASS:
136 local_stat->recalc_interval = CPU_DEMAND_CLASS_RECALC;
137 local_stat->cpu_demand = 0;
139 case CPU_DEMAND_TP_TASK:
140 local_stat->recalc_interval = CPU_DEMAND_TASK_RECALC;
141 //for task, the init cpu_demand is copied from its parent
148 void ckrm_cpu_stat_init(struct ckrm_cpu_class_stat *stat, int eshares)
152 stat->stat_lock = SPIN_LOCK_UNLOCKED;
154 stat->max_demand = 0;
156 for (i=0; i<NR_CPUS; i++) {
157 cpu_demand_stat_init(&stat->local_stats[i],CPU_DEMAND_TP_CLASS);
162 stat->ehl = CKRM_SHARE_MAX; /*default: no limit*/
163 stat->mehl = CKRM_SHARE_MAX; /*default: no limit */
165 stat->eshare = eshares;
166 stat->meshare = eshares;
168 stat->has_savings = 0;
169 stat->demand_per_share = 0;
173 #if 0 // keep handy for debugging if necessary
174 void ckrm_cpu_class_dump(struct ckrm_cpu_class *clsptr,int num)
176 struct ckrm_cpu_class_stat* stat = &clsptr->stat;
177 printk("%d> %p[%d] mg=%d lim=%d tg=%d maxlim=%d ug=%d\n",num,
178 clsptr, (clsptr == get_default_cpu_class()),
179 clsptr->shares.my_guarantee,
180 clsptr->shares.my_limit,
181 clsptr->shares.total_guarantee,
182 clsptr->shares.max_limit,
183 clsptr->shares.unused_guarantee);
184 printk(" egrt=%d megrt=%d ehl=%d mehl=%d esh=%d mesh=%d\n",
185 stat->egrt,stat->megrt,stat->ehl,stat->mehl,
186 stat->eshare,stat->meshare);
190 /**********************************************/
191 /* surplus allocation */
192 /**********************************************/
195 * surplus = egrt - demand
196 * if surplus < 0, surplus = 0
198 static inline int get_node_surplus(struct ckrm_cpu_class *cls)
200 int surplus = cls->stat.egrt - cls->stat.max_demand;
209 * consume savings in advance because this class give surplus to others
210 * this is a quick hack, should be integrated with balance_savings()
212 static inline void consumed_surplus_savings(struct ckrm_cpu_class *clsptr,
213 int savings_consumed)
215 long long total_savings;
221 for_each_online_cpu(i) {
222 lrq = get_ckrm_lrq(clsptr,i);
223 total_savings += lrq->savings;
227 total_savings -= savings_consumed;
228 if (total_savings < 0)
231 //get the average savings
232 do_div(total_savings,cpu_online);
233 for_each_online_cpu(i) {
234 lrq = get_ckrm_lrq(clsptr,i);
235 lrq->savings = total_savings;
239 static inline int get_my_node_surplus(struct ckrm_cpu_class *cls)
241 int surplus = cls->stat.megrt - get_mmax_demand(&cls->stat);
242 int savings_consumed;
248 * a quick hack about the hierarchy savings distribution
249 * may not be the right way to do
251 * since this node give its surplus to other nodes,
252 * it's savings should be consumed
253 * suppose CPU_MONITOR_INTERVAL = (HZ)
254 * savings_consumed is roughly how much savings will be consumed for the next second
257 savings_consumed = surplus * HZ * (NSEC_PER_MS >> CKRM_SHARE_SHIFT);
258 consumed_surplus_savings(cls, savings_consumed) ;
265 * all the class in the queue consume the surplus in order
266 * each class consume the amount propotional to its egrt
268 static int consume_surplus_in_order(struct list_head* queue,
269 struct ckrm_cpu_class *p_cls,
273 struct ckrm_cpu_class *clsptr;
276 * get total_grt of the classes in the queue
277 * total_grt can be maintained instead of re-calcuated each time
279 list_for_each_entry(clsptr,queue,surplus_queue) {
280 if (unlikely(clsptr == p_cls))
281 total_grt += clsptr->stat.megrt;
283 total_grt += clsptr->stat.egrt;
290 list_for_each_entry(clsptr,queue,surplus_queue) {
291 int surplus_per_share;
292 int consumed, my_grt;
296 (total_surplus << CKRM_SHARE_SHIFT) / total_grt;
298 if (surplus_per_share <= 0)
301 if (unlikely(clsptr == p_cls)) //self_node consuming
302 my_grt = clsptr->stat.megrt;
304 my_grt = clsptr->stat.egrt;
306 BUG_ON(clsptr->stat.demand_per_share <= 0);
308 if (clsptr->stat.demand_per_share < surplus_per_share)
309 surplus_per_share = clsptr->stat.demand_per_share;
311 consumed = surplus_per_share * my_grt;
312 consumed >>= CKRM_SHARE_SHIFT;
313 total_surplus -= consumed;
314 BUG_ON(total_surplus < 0);
317 if (unlikely(clsptr == p_cls))
318 set_meshare(&clsptr->stat,clsptr->stat.meshare + consumed);
320 set_eshare(&clsptr->stat,clsptr->stat.eshare + consumed);
323 if (total_surplus <= 1) //if total_suplus too small, no need to allocate again
325 return total_surplus;
329 * link all the children of parent and the parent itself using their surplus_queue field
330 * link the whole queue using src_queue
331 * if anything wrong return -1
333 static int get_class_surplus_queue(struct ckrm_core_class *parent,
334 struct list_head* src_queue)
336 struct ckrm_core_class *child_core = NULL;
337 struct ckrm_cpu_class *p_cls,*c_cls;
340 p_cls = ckrm_get_cpu_class(parent);
344 INIT_LIST_HEAD(src_queue);
346 //add the parent node itself
347 list_add(&p_cls->surplus_queue,src_queue);
349 child_core = ckrm_get_next_child(parent, child_core);
351 c_cls = ckrm_get_cpu_class(child_core);
354 list_add(&c_cls->surplus_queue,src_queue);
356 } while (child_core);
365 * insert the class to queue based on stat->demand_per_share
368 static void insert_surplus_queue(struct list_head* queue, struct ckrm_cpu_class *clsptr)
370 struct ckrm_cpu_class *cur_cls = NULL;
371 int end_of_queue = 1;
373 list_for_each_entry(cur_cls,queue,surplus_queue) {
374 if (cur_cls->stat.demand_per_share >= clsptr->stat.demand_per_share) {
381 if (! cur_cls || end_of_queue)
382 list_add_tail(&clsptr->surplus_queue,queue);
384 list_add_tail(&clsptr->surplus_queue,&cur_cls->surplus_queue);
388 * copy all classes in src_queue to dst_queue,
389 * reorder the classes based on their normalized demand
390 * if a class already saturate (eshare >= demand), also remove it from src_queue
391 * return the total guarantee of the selected classes
393 * @src_queue: source queue
394 * @dst_queue: destination queue
395 * @check_sl: check soft limit
396 * @check_savings: only class has savings should be considered
399 static unsigned long reorder_surplus_queue(struct list_head* src_queue,
400 struct list_head* dst_queue,
401 int check_sl, int check_savings,
402 struct ckrm_cpu_class *p_cls)
404 struct ckrm_cpu_class *clsptr, *tmp;
406 INIT_LIST_HEAD(dst_queue);
408 list_for_each_entry_safe(clsptr,tmp,src_queue,surplus_queue) {
409 struct ckrm_cpu_class_stat* stat = &clsptr->stat;
411 int max_demand, eshare, esl,grt;
413 if (unlikely(clsptr == p_cls)) {
414 max_demand = get_mmax_demand(stat);
415 eshare = stat->meshare;
416 esl = get_mysoft_limit(clsptr);
419 max_demand = stat->max_demand;
420 eshare = stat->eshare;
421 esl = get_soft_limit(clsptr);
425 //hard limit and demand limit
426 inc_limit = max_demand - eshare;
428 //no additional share needed
429 if (inc_limit <= 0 || ! grt) {
430 list_del(&clsptr->surplus_queue);
435 if (check_savings && ! stat->has_savings)
442 soft_limit = p_cls->stat.eshare * esl
443 / p_cls->shares.total_guarantee;
445 if (soft_limit < max_demand)
446 inc_limit = soft_limit - eshare;
447 if ( inc_limit <= 0) /* can turn negative */
452 //get the stat->demand_per_share
453 stat->demand_per_share =
454 (inc_limit << CKRM_SHARE_SHIFT) / grt;
456 list_del_init(&clsptr->surplus_queue);
457 //insert the class to the queue
458 insert_surplus_queue(dst_queue,clsptr);
464 * get all the surplus that should be reallocated to the children
466 static inline int get_total_surplus(struct ckrm_cpu_class *p_cls,
467 struct ckrm_core_class *parent)
469 struct ckrm_cpu_class *c_cls;
471 struct ckrm_core_class *child_core = NULL;
473 //additional share assigned to this sub node from parent
474 total_surplus = p_cls->stat.eshare - p_cls->stat.egrt;
475 BUG_ON(total_surplus < 0);
477 //surplus of this node
478 total_surplus += get_my_node_surplus(p_cls);
480 child_core = ckrm_get_next_child(parent, child_core);
482 c_cls = ckrm_get_cpu_class(child_core);
488 total_surplus += get_node_surplus(c_cls);
490 } while (child_core);
492 return total_surplus;
495 * alloc_surplus_node: re-allocate the shares for a single level
496 * @parent: parent node
497 * return the remaining surplus
499 * The surplus reallocation policy is like below.
500 * -- the classes that have eshare >= demand don't need any additional share.
501 * So they don't participate the surplus allocation.
502 * -- all the other classes received share in this order:
503 * 1. has savings, not over soft limit
504 * 2. has savings, but over soft limit
505 * 3. no savings, not over soft limit
506 * 4. no savings, over soft limit
508 * In each of the 4 levels above, classes get surplus propotionally to its guarantee
510 static int alloc_surplus_node(struct ckrm_core_class *parent)
512 struct ckrm_cpu_class *p_cls;
515 struct list_head src_queue, dst_queue;
517 p_cls = ckrm_get_cpu_class(parent);
518 if (! p_cls) //safty check
522 total_surplus = get_total_surplus(p_cls,parent);
524 if (! total_surplus) //no surplus to be allocated
528 * first round, allocated to tasks with savings, check_sl
530 get_class_surplus_queue(parent,&src_queue);
531 reorder_surplus_queue(&src_queue, &dst_queue, 1, 1,p_cls);
532 if (! list_empty(&dst_queue)) {
533 total_surplus = consume_surplus_in_order(&dst_queue,p_cls,total_surplus);
539 * second round, check savings, but no check_sl
541 //merge the src_queue and dst_queue and reorder
542 list_splice(&dst_queue, &src_queue);
543 reorder_surplus_queue(&src_queue, &dst_queue, 0, 1,p_cls);
544 if (! list_empty(&dst_queue)) {
545 total_surplus = consume_surplus_in_order(&dst_queue,p_cls,total_surplus);
551 * third round, no check savings, but check_sl
553 //merge the src_queue and dst_queue and reorder
554 list_splice(&dst_queue, &src_queue);
555 reorder_surplus_queue(&src_queue, &dst_queue, 1, 0,p_cls);
556 if (! list_empty(&dst_queue)) {
557 total_surplus = consume_surplus_in_order(&dst_queue,p_cls,total_surplus);
562 * fourth round, no check savings, no check_sl
564 //merge the src_queue and dst_queue and reorder
565 list_splice(&dst_queue, &src_queue);
566 reorder_surplus_queue(&src_queue, &dst_queue, 0, 0,p_cls);
567 if (! list_empty(&dst_queue))
568 total_surplus = consume_surplus_in_order(&dst_queue,p_cls,total_surplus);
575 * return true if the class total savings > MIN_SAVINGS
577 static int balance_local_savings(struct ckrm_cpu_class *clsptr, int cpu_online)
579 unsigned long long total_savings;
582 #define CLASS_MIN_SAVINGS (10 * NSEC_PER_MS)
585 for_each_online_cpu(i) {
586 lrq = get_ckrm_lrq(clsptr,i);
587 total_savings += lrq->savings;
590 if (total_savings < CLASS_MIN_SAVINGS)
593 //get the average savings
594 do_div(total_savings,cpu_online);
595 for_each_online_cpu(i) {
596 lrq = get_ckrm_lrq(clsptr,i);
597 lrq->savings = total_savings;
601 * hzheng: this is another quick hack
602 * only say I have savings when this node has more demand
603 * ignoring the requirement of child classes
605 if (clsptr->stat.megrt < get_mmax_demand(&clsptr->stat))
612 * check savings status
613 * set has_savings field if the class or its sub class has savings
615 static void check_savings_status(struct ckrm_core_class *root_core)
617 struct ckrm_cpu_class *clsptr;
620 cpu_online = cpus_weight(cpu_online_map);
622 //class status: demand, share,total_ns prio, index
623 list_for_each_entry(clsptr,&active_cpu_classes,links)
624 clsptr->stat.has_savings = balance_local_savings(clsptr,cpu_online);
628 * alloc_surplus - reallocate unused shares
630 * class A's usused share should be allocated to its siblings
631 * the re-allocation goes downward from the top
633 int alloc_surplus(struct ckrm_core_class *root_core)
635 struct ckrm_core_class *cur_core, *child_core;
636 // struct ckrm_cpu_class *cls;
639 check_savings_status(root_core);
642 cur_core = root_core;
644 // cls = ckrm_get_cpu_class(cur_core);
646 /*the ckrm idle tasks get all what's remaining*/
647 /*hzheng: uncomment the following like for hard limit support */
648 // update_ckrm_idle(CKRM_SHARE_MAX - cls->stat.max_demand);
655 //visit this node only once
657 if ( alloc_surplus_node(cur_core) < 0 )
661 child_core = ckrm_get_next_child(cur_core, child_core);
664 cur_core = child_core;
667 } else { //no more child, go back
668 child_core = cur_core;
669 cur_core = child_core->hnode.parent;
676 /**********************************************/
678 /**********************************************/
681 * How CPU demand is calculated:
682 * consider class local runqueue (clr) first
683 * at any time, a clr can at the following three states
684 * -- run: a task belonning to this class is running on this cpu
685 * -- wait: at least one of its task is running, but the class is not running
686 * -- sleep: none of the task of this class is runnable
688 * cpu_demand(t1,t2) = r(t1,t2)/(r(t1,t2)+s(t1,t2))
690 * the cpu_demand of a class =
691 * sum of cpu_demand of all the class local runqueues
695 * update_cpu_demand_stat -
697 * should be called whenever the state of a task/task local queue changes
698 * -- when deschedule : report how much run
699 * -- when enqueue: report how much sleep
701 * how often should we recalculate the cpu demand
702 * the number is in ns
704 static inline void update_cpu_demand_stat(struct ckrm_cpu_demand_stat* local_stat,
705 int state, unsigned long long len)
707 local_stat->total += len;
708 if (state == CKRM_CPU_DEMAND_RUN)
709 local_stat->run += len;
711 if (local_stat->total >= local_stat->recalc_interval) {
712 local_stat->total >>= CKRM_SHARE_SHIFT;
713 if (unlikely(local_stat->run > ULONG_MAX))
714 local_stat->run = ULONG_MAX;
716 if (unlikely(local_stat->total > ULONG_MAX))
717 local_stat->total = ULONG_MAX;
719 do_div(local_stat->run,(unsigned long)local_stat->total);
721 if (unlikely(local_stat->total > ULONG_MAX)) {
722 //happens after very long sleep
723 local_stat->cpu_demand = local_stat->run;
725 local_stat->cpu_demand =
726 (local_stat->cpu_demand + local_stat->run) >> 1;
728 local_stat->total = 0;
734 * cpu_demand_event - and cpu_demand event occured
735 * @event: one of the following three events:
736 * CPU_DEMAND_ENQUEUE: local class enqueue
737 * CPU_DEMAND_DEQUEUE: local class dequeue
738 * CPU_DEMAND_DESCHEDULE: one task belong a certain local class deschedule
739 * @len: valid only for CPU_DEMAND_DESCHEDULE, how long the task has been run
741 void cpu_demand_event(struct ckrm_cpu_demand_stat* local_stat, int event, unsigned long long len)
744 case CPU_DEMAND_ENQUEUE:
745 len = sched_clock() - local_stat->last_sleep;
746 local_stat->last_sleep = 0;
747 update_cpu_demand_stat(local_stat,CKRM_CPU_DEMAND_SLEEP,len);
749 case CPU_DEMAND_DEQUEUE:
750 if (! local_stat->last_sleep) {
751 local_stat->last_sleep = sched_clock();
754 case CPU_DEMAND_DESCHEDULE:
755 update_cpu_demand_stat(local_stat,CKRM_CPU_DEMAND_RUN,len);
757 case CPU_DEMAND_INIT: //for task init only
758 cpu_demand_stat_init(local_stat,CPU_DEMAND_TP_TASK);
766 * check all the class local queue
768 * to deal with excessive long run/sleep state
769 * -- whenever the the ckrm_cpu_monitor is called, check if the class is in sleep state, if yes, then update sleep record
771 void cpu_demand_check_sleep(struct ckrm_cpu_class_stat *stat, int cpu)
773 struct ckrm_cpu_demand_stat * local_stat = &stat->local_stats[cpu];
774 unsigned long long sleep,now;
775 if (local_stat->last_sleep) {
777 sleep = now - local_stat->last_sleep;
778 local_stat->last_sleep = now;
779 update_cpu_demand_stat(local_stat,CKRM_CPU_DEMAND_SLEEP,sleep);
784 * update_max_demand: update effective cpu demand for each class
787 * Assume: the root_core->parent == NULL
789 static int update_max_demand(struct ckrm_core_class *root_core)
791 struct ckrm_core_class *cur_core, *child_core;
792 struct ckrm_cpu_class *cls,*c_cls;
795 cur_core = root_core;
799 if (!cur_core) { //normal exit
804 cls = ckrm_get_cpu_class(cur_core);
805 if (! cls) //invalid c_cls, abort
808 if (!child_core) //first child
809 cls->stat.max_demand = get_mmax_demand(&cls->stat);
811 c_cls = ckrm_get_cpu_class(child_core);
813 cls->stat.max_demand += c_cls->stat.max_demand;
814 else //invalid c_cls, abort
818 //check class hard limit
819 if (cls->stat.max_demand > cls->stat.ehl)
820 cls->stat.max_demand = cls->stat.ehl;
823 child_core = ckrm_get_next_child(cur_core, child_core);
826 cur_core = child_core;
829 } else { //no more child, go back
830 child_core = cur_core;
831 cur_core = child_core->hnode.parent;
838 /**********************************************/
839 /* effective guarantee & limit */
840 /**********************************************/
842 *update_child_effective - update egrt, ehl, mehl for all children of parent
843 *@parent: the parent node
844 *return -1 if anything wrong
847 static int update_child_effective(struct ckrm_core_class *parent)
849 struct ckrm_cpu_class *p_cls = ckrm_get_cpu_class(parent);
850 struct ckrm_core_class *child_core;
856 child_core = ckrm_get_next_child(parent, NULL);
858 struct ckrm_cpu_class *c_cls = ckrm_get_cpu_class(child_core);
864 c_cls->shares.my_guarantee / p_cls->shares.total_guarantee;
866 c_cls->stat.megrt = c_cls->stat.egrt * get_my_grt(c_cls)
867 / c_cls->shares.total_guarantee;
871 get_hard_limit(c_cls) / p_cls->shares.total_guarantee;
875 get_myhard_limit(c_cls) / c_cls->shares.total_guarantee;
877 set_eshare(&c_cls->stat,c_cls->stat.egrt);
878 set_meshare(&c_cls->stat,c_cls->stat.megrt);
881 child_core = ckrm_get_next_child(parent, child_core);
887 * update_effectives: update egrt, ehl, mehl for the whole tree
888 * should be called only when class structure changed
890 * return -1 if anything wrong happened (eg: the structure changed during the process)
892 int update_effectives(void)
894 struct ckrm_core_class *root_core = get_default_cpu_class()->core;
895 struct ckrm_core_class *cur_core, *child_core;
896 struct ckrm_cpu_class *cls;
899 cur_core = root_core;
901 cls = ckrm_get_cpu_class(cur_core);
903 //initialize the effectives for root
904 cls->stat.egrt = CKRM_SHARE_MAX; /*egrt of the root is always 100% */
905 cls->stat.megrt = cls->stat.egrt * get_my_grt(cls)
906 / cls->shares.total_guarantee;
907 cls->stat.ehl = CKRM_SHARE_MAX * get_hard_limit(cls)
908 / cls->shares.total_guarantee;
909 cls->stat.mehl = cls->stat.ehl * get_myhard_limit(cls)
910 / cls->shares.total_guarantee;
911 set_eshare(&cls->stat,cls->stat.egrt);
912 set_meshare(&cls->stat,cls->stat.megrt);
919 //visit this node only once
921 if (update_child_effective(cur_core) < 0)
922 return ret; //invalid cur_core node
925 child_core = ckrm_get_next_child(cur_core, child_core);
928 //go down to the next hier
929 cur_core = child_core;
931 } else { //no more child, go back
932 child_core = cur_core;
933 cur_core = child_core->hnode.parent;
938 /**********************************************/
939 /* CKRM Idle Tasks */
940 /**********************************************/
942 #ifdef CONFIG_CKRM_SUPPORT_MAXLIMITS
944 struct ckrm_cpu_class ckrm_idle_class_obj, *ckrm_idle_class;
945 struct task_struct* ckrm_idle_tasks[NR_CPUS];
947 /*how many ckrm idle tasks should I wakeup*/
948 static inline int get_nr_idle(unsigned long surplus)
950 int cpu_online = cpus_weight(cpu_online_map);
953 nr_idle = surplus * cpu_online;
954 nr_idle >>= CKRM_SHARE_SHIFT;
959 if (nr_idle > cpu_online)
960 nr_idle = cpu_online;
966 * update_ckrm_idle: update the status of the idle class according
968 * surplus: new system surplus
971 * -- update share of the idle class
972 * -- wakeup idle tasks according to surplus
974 void update_ckrm_idle(unsigned long surplus)
976 int nr_idle = get_nr_idle(surplus);
978 struct task_struct* idle_task;
980 set_eshare(&ckrm_idle_class->stat,surplus);
981 set_meshare(&ckrm_idle_class->stat,surplus);
982 /*wake up nr_idle idle tasks*/
983 for_each_online_cpu(i) {
984 idle_task = ckrm_idle_tasks[i];
985 if (unlikely(idle_task->cpu_class != ckrm_idle_class)) {
986 ckrm_cpu_change_class(idle_task,
987 idle_task->cpu_class,
994 wake_up_process(idle_task);
997 idle_task->state = TASK_INTERRUPTIBLE;
998 set_tsk_need_resched(idle_task);
1003 static int ckrm_cpu_idled(void *nothing)
1005 set_user_nice(current,19);
1006 daemonize("ckrm_idle_task");
1008 //deactivate it, it will be awakened by ckrm_cpu_monitor
1009 current->state = TASK_INTERRUPTIBLE;
1012 /*similar to cpu_idle */
1014 while (!need_resched()) {
1015 ckrm_cpu_monitor(1);
1016 if (current_cpu_data.hlt_works_ok) {
1017 local_irq_disable();
1018 if (!need_resched()) {
1019 set_tsk_need_resched(current);
1031 * ckrm_start_ckrm_idle:
1032 * create the ckrm_idle_class and starts the idle tasks
1035 void ckrm_start_ckrm_idle(void)
1039 ckrm_shares_t shares;
1041 ckrm_idle_class = &ckrm_idle_class_obj;
1042 memset(ckrm_idle_class,0,sizeof(shares));
1043 /*don't care about the shares */
1044 init_cpu_class(ckrm_idle_class,&shares);
1045 printk(KERN_INFO"ckrm idle class %x created\n",(int)ckrm_idle_class);
1047 for_each_online_cpu(i) {
1048 ret = kernel_thread(ckrm_cpu_idled, 0, CLONE_KERNEL);
1050 /*warn on error, but the system should still work without it*/
1052 printk(KERN_ERR"Warn: can't start ckrm idle tasks\n");
1054 ckrm_idle_tasks[i] = find_task_by_pid(ret);
1055 if (!ckrm_idle_tasks[i])
1056 printk(KERN_ERR"Warn: can't find ckrm idle tasks %d\n",ret);
1061 void ckrm_stop_ckrm_idle(void)
1063 BUG_ON(1); // not yet implemented
1068 static inline void ckrm_start_ckrm_idle(void) { };
1069 static inline void ckrm_stop_ckrm_idle(void) { };
1070 static inline void update_ckrm_idle(unsigned long surplus) { };
1075 /**********************************************/
1077 /**********************************************/
1079 * adjust_class_local_weight: adjust the local weight for each cpu
1081 * lrq->weight = lpr->pressure * class->weight / total_pressure
1083 static void adjust_lrq_weight(struct ckrm_cpu_class *clsptr, int cpu_online)
1085 unsigned long total_pressure = 0;
1088 unsigned long class_weight;
1089 unsigned long long lw;
1090 struct ckrm_cpu_class_stat *stat;
1091 unsigned long oweight;
1092 unsigned long skewed_limit;
1094 * if a local queue gets less than 1/SKEWED_SHARE_RATIO of the eshare
1095 * then we set the skewed_share
1097 #define SKEWED_SHARE_RATIO 8
1098 #define SKEWED_WEIGHT_MIN 3
1100 /* get total pressure of the class, if there is not pressure (.. class is
1101 * idle, then leave the weights as is
1103 for_each_online_cpu(i) {
1104 lrq = get_ckrm_lrq(clsptr,i);
1105 total_pressure += lrq->lrq_load;
1108 if (! total_pressure)
1111 stat = &clsptr->stat;
1113 class_weight = cpu_class_weight(clsptr) * cpu_online;
1115 /* calculate or skewed limit weight */
1116 skewed_limit = SHARE_TO_WEIGHT(stat->meshare/SKEWED_SHARE_RATIO);
1117 if (skewed_limit < SKEWED_WEIGHT_MIN)
1118 skewed_limit = SKEWED_WEIGHT_MIN;
1120 /* calculate over_weight */
1121 BUG_ON(stat->meshare < stat->megrt);
1122 oweight = ((stat->meshare - stat->megrt) << CKRM_SHARE_SHIFT) / stat->meshare;
1123 oweight = SHARE_TO_WEIGHT(oweight);
1126 * update weight for each cpu, minimun is 1
1128 for_each_online_cpu(i) {
1129 lrq = get_ckrm_lrq(clsptr,i);
1130 lrq->over_weight = oweight;
1131 if (! lrq->lrq_load) {
1132 /* give idle class a high share to boost
1135 lw = cpu_class_weight(clsptr);
1136 if (unlikely(lw==0))
1141 do_div(lw,total_pressure);
1142 if (unlikely(lw==0))
1144 else if (unlikely(lw > CKRM_MAX_WEIGHT))
1145 lw = CKRM_MAX_WEIGHT;
1147 BUG_ON(lw > CKRM_MAX_WEIGHT);
1150 * set is_skewed and local_weight in proper order
1151 * to avoid race condition
1153 lrq->local_weight = lw;
1154 if (lw < skewed_limit)
1155 lrq->skewed_weight = skewed_limit;
1157 lrq->skewed_weight = 0;
1158 BUG_ON((local_class_weight(lrq) == 1) && (! lrq->skewed_weight));
1163 * assume called with class_list_lock read lock held
1166 void adjust_local_weight(void)
1168 static spinlock_t lock = SPIN_LOCK_UNLOCKED;
1169 struct ckrm_cpu_class *clsptr;
1172 //do nothing if someone already holding the lock
1173 if (! spin_trylock(&lock))
1176 cpu_online = cpus_weight(cpu_online_map);
1178 //class status: demand, share,total_ns prio, index
1179 list_for_each_entry(clsptr,&active_cpu_classes,links) {
1180 adjust_lrq_weight(clsptr,cpu_online);
1186 /**********************************************/
1188 /**********************************************/
1190 *ckrm_cpu_monitor - adjust relative shares of the classes based on their progress
1191 *@check_min: if check_min is set, the call can't be within 100ms of last call
1193 * this function is called every CPU_MONITOR_INTERVAL
1194 * it computes the cpu demand of each class
1195 * and re-allocate the un-used shares to other classes
1197 void ckrm_cpu_monitor(int check_min)
1199 static spinlock_t lock = SPIN_LOCK_UNLOCKED;
1200 static unsigned long long last_check = 0;
1201 struct ckrm_core_class *root_core = get_default_cpu_class()->core;
1202 unsigned long long now;
1205 #define MIN_CPU_MONITOR_INTERVAL (100*1000*1000) /* 100 MSEC */
1207 if (ckrm_cpu_disabled() || !root_core)
1210 //do nothing if someone already holding the lock
1211 if (! spin_trylock(&lock))
1214 read_lock(&class_list_lock);
1216 now = sched_clock();
1218 //consecutive check should be at least 100ms apart
1219 if (check_min && (now - last_check < MIN_CPU_MONITOR_INTERVAL))
1224 if (update_effectives() != 0) {
1229 if (update_max_demand(root_core) != 0) {
1234 #warning mef: alloc_surplus call back in system;
1235 if (alloc_surplus(root_core) != 0) {
1240 adjust_local_weight();
1243 read_unlock(&class_list_lock);
1248 printk("ckrm_cpu_monitor(%d) exits prematurely cause=%d\n",check_min,loc);
1252 /*****************************************************/
1253 /* Supporting Functions */
1254 /*****************************************************/
1255 static pid_t cpu_monitor_pid = -1;
1256 static int thread_exit = 0;
1258 static int ckrm_cpu_monitord(void *nothing)
1260 daemonize("ckrm_cpu_ctrld");
1261 printk("cpu_monitord started\n");
1264 /*sleep for sometime before next try*/
1265 set_current_state(TASK_INTERRUPTIBLE);
1266 schedule_timeout(CPU_MONITOR_INTERVAL);
1267 ckrm_cpu_monitor(1);
1272 cpu_monitor_pid = -1;
1274 printk(KERN_DEBUG "cpu_monitord exit\n");
1278 void ckrm_cpu_start_monitor(void)
1280 if (cpu_monitor_pid != -1) {
1281 /* already started ... */
1284 cpu_monitor_pid = kernel_thread(ckrm_cpu_monitord, 0, CLONE_KERNEL);
1285 if (cpu_monitor_pid < 0) {
1286 printk(KERN_DEBUG "ckrm_cpu_monitord for failed\n");
1290 void ckrm_cpu_kill_monitor(void)
1292 printk(KERN_DEBUG "killing process %d\n", cpu_monitor_pid);
1293 if (cpu_monitor_pid > 0) {
1295 while (thread_exit != 2) {
1296 set_current_state(TASK_INTERRUPTIBLE);
1297 schedule_timeout(CPU_MONITOR_INTERVAL);
1302 static int __init ckrm_cpu_init_monitor(void)
1304 if (ckrm_cpu_enabled())
1305 ckrm_cpu_start_monitor();
1309 __initcall(ckrm_cpu_init_monitor);