1 /* ckrm_cpu_monitor.c - Hierarchical CKRM CPU Resource Monitor
3 * Copyright (C) Haoqiang Zheng, IBM Corp. 2004
4 * (C) Hubertus Franke, IBM Corp. 2004
6 * Latest version, more details at http://ckrm.sf.net
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
17 * 23 June 2004: Created
20 #include <linux/module.h>
21 #include <linux/init.h>
22 #include <asm/errno.h>
23 #include <linux/list.h>
24 #include <linux/spinlock.h>
25 #include <linux/ckrm.h>
26 #include <linux/ckrm_rc.h>
27 #include <linux/ckrm_tc.h>
28 #include <asm/div64.h>
29 #include <linux/ckrm_sched.h>
31 #define CPU_MONITOR_INTERVAL (4*HZ) /*how often do we adjust the shares*/
32 #define CKRM_SHARE_ACCURACY 7
33 #define CKRM_SHARE_MAX (1<<CKRM_SHARE_ACCURACY)
35 extern struct ckrm_cpu_class *ckrm_get_cpu_class(struct ckrm_core_class *core);
37 void ckrm_cpu_stat_init(struct ckrm_cpu_class_stat *stat)
40 struct ckrm_cpu_class_local_stat* local_stat;
41 unsigned long long now = sched_clock();
43 stat->stat_lock = SPIN_LOCK_UNLOCKED;
47 for (i=0; i< NR_CPUS; i++) {
48 local_stat = &stat->local_stats[i];
50 local_stat->total = 0;
51 local_stat->last_sleep = now;
52 local_stat->cpu_demand = 0;
55 stat->effective_guarantee = 0;
56 stat->effective_limit = 0;
58 stat->effective_share = 100;
59 stat->self_effective_share = 100;
61 /**********************************************/
63 /**********************************************/
66 * How CPU demand is calculated:
67 * consider class local runqueue (clr) first
68 * at any time, a clr can at the following three states
69 * -- run: a task belonning to this class is running on this cpu
70 * -- wait: at least one of its task is running, but the class is not running
71 * -- sleep: none of the task of this class is runnable
73 * cpu_demand(t1,t2) = r(t1,t2)/(r(t1,t2)+s(t1,t2))
75 * the cpu_demand of a class =
76 * sum of cpu_demand of all the class local runqueues
80 * update_cpu_demand - update a state change
82 * should be called whenever the state of a local queue changes
83 * -- when deschedule : report how much run
84 * -- when enqueue: report how much sleep
86 * to deal with excessive long run/sleep state
87 * -- whenever the the ckrm_cpu_monitor is called, check if the class is in sleep state, if yes, then update sleep record
89 #define CKRM_CPU_DEMAND_RUN 0
90 #define CKRM_CPU_DEMAND_SLEEP 1
91 //how often should we recalculate the cpu demand, in ns
92 #define CPU_DEMAND_CAL_THRESHOLD (1000000000LL)
93 static inline void update_local_cpu_demand(struct ckrm_cpu_class_local_stat* local_stat,int state, unsigned long long len)
95 local_stat->total += len;
96 if (state == CKRM_CPU_DEMAND_RUN)
97 local_stat->run += len;
99 if (local_stat->total >= CPU_DEMAND_CAL_THRESHOLD) {
100 local_stat->total >>= CKRM_SHARE_ACCURACY;
101 if (local_stat->total > 0xFFFFFFFF)
102 local_stat->total = 0xFFFFFFFF;
104 do_div(local_stat->run,(unsigned long)local_stat->total);
105 local_stat->cpu_demand +=local_stat->run;
106 local_stat->cpu_demand >>= 1;
107 local_stat->total = 0;
112 static inline void cpu_demand_update_run(struct ckrm_cpu_class_local_stat* local_stat, unsigned long long len)
114 update_local_cpu_demand(local_stat,CKRM_CPU_DEMAND_RUN,len);
117 static inline void cpu_demand_update_sleep(struct ckrm_cpu_class_local_stat* local_stat, unsigned long long len)
119 update_local_cpu_demand(local_stat,CKRM_CPU_DEMAND_SLEEP,len);
122 #define CPU_DEMAND_ENQUEUE 0
123 #define CPU_DEMAND_DEQUEUE 1
124 #define CPU_DEMAND_DESCHEDULE 2
127 * cpu_demand_event - and cpu_demand event occured
128 * @event: one of the following three events:
129 * CPU_DEMAND_ENQUEUE: local class enqueue
130 * CPU_DEMAND_DEQUEUE: local class dequeue
131 * CPU_DEMAND_DESCHEDULE: one task belong a certain local class deschedule
132 * @len: valid only for CPU_DEMAND_DESCHEDULE, how long the task has been run
134 void cpu_demand_event(struct ckrm_cpu_class_local_stat* local_stat, int event, unsigned long long len)
137 case CPU_DEMAND_ENQUEUE:
138 len = sched_clock() - local_stat->last_sleep;
139 local_stat->last_sleep = 0;
140 cpu_demand_update_sleep(local_stat,len);
142 case CPU_DEMAND_DEQUEUE:
143 local_stat->last_sleep = sched_clock();
145 case CPU_DEMAND_DESCHEDULE:
146 cpu_demand_update_run(local_stat,len);
154 * check all the class local queue
155 * if local queueu is not in runqueue, then it's in sleep state
156 * if compare to last sleep,
158 static inline void cpu_demand_check_sleep(struct ckrm_cpu_class_stat *stat, int cpu)
160 struct ckrm_cpu_class_local_stat * local_stat = &stat->local_stats[cpu];
161 unsigned long long sleep,now;
162 if (local_stat->last_sleep) {
164 sleep = now - local_stat->last_sleep;
165 local_stat->last_sleep = now;
166 cpu_demand_update_sleep(local_stat,sleep);
171 *get_self_cpu_demand - get cpu demand of the class itself (excluding children)
173 * self_cpu_demand = sum(cpu demand of all local queues)
175 static unsigned long get_self_cpu_demand(struct ckrm_cpu_class_stat
181 for_each_online_cpu(i) {
182 cpu_demand_check_sleep(stat,i);
183 cpu_demand += stat->local_stats[i].cpu_demand;
186 if (cpu_demand > CKRM_SHARE_MAX)
187 cpu_demand = CKRM_SHARE_MAX;
192 * update effective cpu demand for each class
193 * assume the root_core->parent == NULL
195 static void update_cpu_demand(struct ckrm_core_class *root_core)
197 struct ckrm_core_class *cur_core, *child_core;
198 struct ckrm_cpu_class *cls;
200 cur_core = root_core;
204 * update cpu_demand of each node
210 cls = ckrm_get_cpu_class(cur_core);
211 if (!child_core) //first child
212 cls->stat.cpu_demand = get_self_cpu_demand(&cls->stat);
214 cls->stat.cpu_demand +=
215 ckrm_get_cpu_class(child_core)->stat.cpu_demand;
216 if (cls->stat.cpu_demand > CKRM_SHARE_MAX)
217 cls->stat.cpu_demand = CKRM_SHARE_MAX;
221 child_core = ckrm_get_next_child(cur_core, child_core);
224 cur_core = child_core;
227 } else { //no more child, go back
228 child_core = cur_core;
229 cur_core = child_core->hnode.parent;
234 /**********************************************/
235 /* effective guarantee & limit */
236 /**********************************************/
237 static inline void set_effective_share(struct ckrm_cpu_class_stat *stat,
242 stat->effective_share = new_share;
245 static inline void set_self_effective_share(struct ckrm_cpu_class_stat *stat,
250 stat->self_effective_share = new_share;
253 static inline void update_child_effective(struct ckrm_core_class *parent)
255 struct ckrm_cpu_class *p_cls = ckrm_get_cpu_class(parent);
256 struct ckrm_core_class *child_core = ckrm_get_next_child(parent, NULL);
259 struct ckrm_cpu_class *c_cls = ckrm_get_cpu_class(child_core);
261 c_cls->stat.effective_guarantee =
262 p_cls->stat.effective_guarantee *
263 c_cls->shares.my_guarantee / p_cls->shares.total_guarantee;
264 c_cls->stat.effective_limit =
265 p_cls->stat.effective_guarantee * c_cls->shares.my_limit /
266 p_cls->shares.total_guarantee;
268 child_core = ckrm_get_next_child(parent, child_core);
274 * update effective guarantee and effective limit
275 * -- effective share = parent->effective->share * share/parent->total_share
276 * -- effective limit = parent->effective->share * limit/parent->total_share
277 * should be called only when class structure changed
279 static void update_effective_guarantee_limit(struct ckrm_core_class *root_core)
281 struct ckrm_core_class *cur_core, *child_core = NULL;
282 struct ckrm_cpu_class *cls;
284 cur_core = root_core;
285 cls = ckrm_get_cpu_class(cur_core);
286 cls->stat.effective_guarantee = CKRM_SHARE_MAX;
287 cls->stat.effective_limit = cls->stat.effective_guarantee;
295 update_child_effective(cur_core);
297 child_core = ckrm_get_next_child(cur_core, child_core);
300 cur_core = child_core;
303 } else { //no more child, go back
304 child_core = cur_core;
305 cur_core = child_core->hnode.parent;
310 /**********************************************/
311 /* surplus allocation */
312 /**********************************************/
315 * surplus = my_effective_share - demand
316 * if surplus < 0, surplus = 0
318 static inline int get_node_surplus(struct ckrm_cpu_class *cls)
320 int surplus = cls->stat.effective_guarantee - cls->stat.cpu_demand;
329 * consume the surplus
330 * return how much consumed
331 * set glut when necessary
333 static inline int node_surplus_consume(int old_surplus,
334 struct ckrm_core_class *child_core,
335 struct ckrm_cpu_class *p_cls)
340 struct ckrm_cpu_class *c_cls = ckrm_get_cpu_class(child_core);
342 if (c_cls->stat.glut)
346 if (c_cls->stat.effective_share >= c_cls->stat.cpu_demand) {
347 c_cls->stat.glut = 1;
352 old_surplus * c_cls->shares.my_guarantee /
353 p_cls->shares.total_guarantee;
356 inc_limit = c_cls->stat.effective_limit - c_cls->stat.effective_share;
357 if (inc_limit <= consumed) {
358 c_cls->stat.glut = 1;
359 consumed = inc_limit;
362 c_cls->stat.effective_share += consumed;
368 * re-allocate the shares for all the childs under this node
370 * 1. get total surplus
371 * 2. allocate surplus
372 * 3. set the effective_share of each node
374 static void alloc_surplus_node(struct ckrm_core_class *parent)
376 int total_surplus = 0, old_surplus = 0;
377 struct ckrm_cpu_class *p_cls = ckrm_get_cpu_class(parent);
378 struct ckrm_core_class *child_core = NULL;
383 * total_surplus = sum(child_surplus)
385 * initialize effective_share
388 child_core = ckrm_get_next_child(parent, child_core);
390 struct ckrm_cpu_class *c_cls =
391 ckrm_get_cpu_class(child_core);
392 ckrm_stat_t *stat = &c_cls->stat;
394 total_surplus += get_node_surplus(c_cls);
396 set_effective_share(stat, stat->effective_guarantee);
398 } while (child_core);
400 /*distribute the surplus */
403 if (!child_core) //keep the surplus of last round
404 old_surplus = total_surplus;
406 child_core = ckrm_get_next_child(parent, child_core);
409 node_surplus_consume(old_surplus, child_core,
412 //start a new round if something is allocated in the last round
413 } while (child_core || (total_surplus != old_surplus));
415 //any remaining surplus goes to the default class
416 self_share = p_cls->stat.effective_share *
417 p_cls->shares.unused_guarantee / p_cls->shares.total_guarantee;
418 self_share += total_surplus;
420 set_self_effective_share(&p_cls->stat, self_share);
424 * alloc_surplus - reallocate unused shares
426 * class A's usused share should be allocated to its siblings
428 static void alloc_surplus(struct ckrm_core_class *root_core)
430 struct ckrm_core_class *cur_core, *child_core = NULL;
431 struct ckrm_cpu_class *cls;
433 cur_core = root_core;
434 cls = ckrm_get_cpu_class(cur_core);
436 set_effective_share(&cls->stat, cls->stat.effective_guarantee);
443 alloc_surplus_node(cur_core);
445 child_core = ckrm_get_next_child(cur_core, child_core);
448 cur_core = child_core;
451 } else { //no more child, go back
452 child_core = cur_core;
453 cur_core = child_core->hnode.parent;
459 *ckrm_cpu_monitor - adjust relative shares of the classes based on their progress
461 * this function is called every CPU_MONITOR_INTERVAL
462 * it computes the cpu demand of each class
463 * and re-allocate the un-used shares to other classes
465 void ckrm_cpu_monitor(void)
467 struct ckrm_core_class *root_core = default_cpu_class->core;
471 update_effective_guarantee_limit(root_core);
472 update_cpu_demand(root_core);
473 alloc_surplus(root_core);
476 /*****************************************************/
477 /* Supporting Functions */
478 /*****************************************************/
479 static pid_t cpu_monitor_pid = -1;
480 static int thread_exit = 0;
482 static int ckrm_cpu_monitord(void *nothing)
484 wait_queue_head_t wait;
486 init_waitqueue_head(&wait);
488 daemonize("ckrm_cpu_ctrld");
490 /*sleep for sometime before next try*/
491 interruptible_sleep_on_timeout(&wait, CPU_MONITOR_INTERVAL);
497 cpu_monitor_pid = -1;
499 printk("cpu_monitord exit\n");
503 void ckrm_start_monitor(void)
505 cpu_monitor_pid = kernel_thread(ckrm_cpu_monitord, 0, CLONE_KERNEL);
506 if (cpu_monitor_pid < 0) {
507 printk("ckrm_cpu_monitord for failed\n");
511 void ckrm_kill_monitor(void)
513 wait_queue_head_t wait;
515 init_waitqueue_head(&wait);
517 printk("killing process %d\n", cpu_monitor_pid);
518 if (cpu_monitor_pid > 0) {
520 while (thread_exit != 2) {
521 interruptible_sleep_on_timeout(&wait, interval);
526 int ckrm_cpu_monitor_init(void)
528 ckrm_start_monitor();
532 void ckrm_cpu_monitor_exit(void)
537 module_init(ckrm_cpu_monitor_init);
538 module_exit(ckrm_cpu_monitor_exit);
540 MODULE_AUTHOR("Haoqiang Zheng <hzheng@cs.columbia.edu>");
541 MODULE_DESCRIPTION("Hierarchical CKRM CPU Resource Monitor");
542 MODULE_LICENSE("GPL");