1 /* kernel/ckrm_sched.c - Supporting functions for ckrm scheduling
3 * Copyright (C) Haoqiang Zheng, IBM Corp. 2004
4 * (C) Hubertus Franke, IBM Corp. 2004
6 * Latest version, more details at http://ckrm.sf.net
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
15 #include <linux/init.h>
16 #include <linux/ckrm_sched.h>
18 rwlock_t class_list_lock = RW_LOCK_UNLOCKED;
19 LIST_HEAD(active_cpu_classes); // list of active cpu classes; anchor
21 struct ckrm_cpu_class default_cpu_class_obj;
23 unsigned int ckrm_sched_mode __cacheline_aligned_in_smp =
24 #ifdef CONFIG_CKRM_CPU_SCHEDULE_AT_BOOT
25 CKRM_SCHED_MODE_ENABLED;
27 CKRM_SCHED_MODE_DISABLED;
30 static int __init ckrm_cpu_enabled_setup(char *str)
32 ckrm_sched_mode = CKRM_SCHED_MODE_ENABLED;
36 static int __init ckrm_cpu_disabled_setup(char *str)
38 ckrm_sched_mode = CKRM_SCHED_MODE_DISABLED;
42 __setup("ckrmcpu", ckrm_cpu_enabled_setup);
43 __setup("nockrmcpu",ckrm_cpu_disabled_setup);
45 struct ckrm_cpu_class * get_default_cpu_class(void) {
46 return (&default_cpu_class_obj);
49 /*******************************************************/
51 /*******************************************************/
53 //an absolute bonus of 200ms for classes when reactivated
54 #define INTERACTIVE_BONUS(lrq) ((200*NSEC_PER_MS)/local_class_weight(lrq))
56 static void check_inactive_class(ckrm_lrq_t * lrq,CVT_t cur_cvt)
61 //just a safty measure
62 if (unlikely(! cur_cvt))
66 * Always leaving a small bonus for inactive classes
67 * allows them to compete for cycles immediately when the become
68 * active. This should improve interactive behavior
70 bonus = INTERACTIVE_BONUS(lrq);
71 //cvt can't be negative
72 if (likely(cur_cvt > bonus))
73 min_cvt = cur_cvt - bonus;
77 if (lrq->local_cvt < min_cvt) {
78 // if (lrq->local_cvt < min_cvt && ! lrq_nr_running(lrq)) {
81 if (unlikely(lrq->local_cvt == 0)) {
82 lrq->local_cvt = cur_cvt;
85 lost_cvt = min_cvt - lrq->local_cvt;
86 lost_cvt *= local_class_weight(lrq);
87 lrq->local_cvt = min_cvt;
90 /* add what the class lost to its savings*/
91 #if 1 /*zhq debugging*/
92 lrq->savings += lost_cvt;
94 if (lrq->savings > MAX_SAVINGS)
95 lrq->savings = MAX_SAVINGS;
96 #if 0 /* zhq debugging*/
97 printk("lrq= %x savings: %llu lost= %llu\n",(int)lrq,lrq->savings,lost_cvt);
103 * return the max_cvt of all the classes
105 CVT_t get_max_cvt(int this_cpu)
107 struct ckrm_cpu_class *clsptr;
113 list_for_each_entry(clsptr, &active_cpu_classes, links) {
114 lrq = get_ckrm_lrq(clsptr, this_cpu);
115 if (lrq->local_cvt > max_cvt)
116 max_cvt = lrq->local_cvt;
122 CVT_t get_min_cvt(int this_cpu)
124 struct ckrm_cpu_class *clsptr;
128 max_cvt = 0xFFFFFFFFFFFFFLLU;
130 list_for_each_entry(clsptr, &active_cpu_classes, links) {
131 lrq = get_ckrm_lrq(clsptr, this_cpu);
132 if (lrq->local_cvt < max_cvt)
133 max_cvt = lrq->local_cvt;
140 * update_class_cputime - updates cvt of inactive classes
141 * -- an inactive class shouldn't starve others when it comes back
142 * -- the cpu time it lost when it's inactive should be accumulated
143 * -- its accumulated saving should be compensated (in a leaky bucket fashion)
145 * class_list_lock must have been acquired
147 void update_class_cputime(int this_cpu, int idle)
149 struct ckrm_cpu_class *clsptr;
154 * a class's local_cvt must not be significantly smaller than min_cvt
155 * of active classes otherwise, it will starve other classes when it
158 * Hence we keep all local_cvt's within a range of the min_cvt off
159 * all active classes (approximated by the local_cvt of the currently
160 * running class) and account for how many cycles where thus taken
161 * from an inactive class building a savings (not to exceed a few seconds)
162 * for a class to gradually make up upon reactivation, without
163 * starvation of other classes.
166 cur_cvt = get_local_cur_cvt(this_cpu);
169 * cur_cvt == 0 means the system is now idle
170 * in this case, we use max_cvt as cur_cvt
171 * max_cvt roughly represents the cvt of the class
172 * that has just finished running
174 * fairness wouldn't be a problem since we account for whatever lost in savings
175 * if the system is not busy, the system responsiveness is not a problem.
176 * still fine if the sytem is busy, but happened to be idle at this certain point
177 * since bias toward interactive classes (class priority) is a more important way to improve system responsiveness
179 if (unlikely(! cur_cvt)) {
180 cur_cvt = get_max_cvt(this_cpu);
185 * - check the local cvt of all the classes
186 * - update total_ns received by the class
187 * - do a usage sampling for the whole class
189 list_for_each_entry(clsptr, &active_cpu_classes, links) {
190 lrq = get_ckrm_lrq(clsptr, this_cpu);
192 spin_lock(&clsptr->stat.stat_lock);
193 clsptr->stat.total_ns += lrq->uncounted_ns;
194 ckrm_sample_usage(clsptr);
195 spin_unlock(&clsptr->stat.stat_lock);
196 lrq->uncounted_ns = 0;
198 check_inactive_class(lrq,cur_cvt);
202 /*******************************************************/
203 /* PID load balancing stuff */
204 /*******************************************************/
210 * runqueue load is the local_weight of all the classes on this cpu
211 * must be called with class_list_lock held
213 static unsigned long ckrm_cpu_load(int cpu)
215 struct ckrm_cpu_class *clsptr;
217 struct ckrm_cpu_demand_stat* l_stat;
221 list_for_each_entry(clsptr,&active_cpu_classes,links) {
222 lrq = get_ckrm_lrq(clsptr,cpu);
223 l_stat = get_cls_local_stat(clsptr,cpu);
225 load = WEIGHT_TO_SHARE(lrq->local_weight);
227 if (l_stat->cpu_demand < load)
228 load = l_stat->cpu_demand;
236 * sample pid load periodically
239 void ckrm_load_sample(ckrm_load_t* pid,int cpu)
244 load = ckrm_cpu_load(cpu);
245 err = load - pid->load_p;
253 long ckrm_get_pressure(ckrm_load_t* ckrm_load, int local_group)
256 pressure = ckrm_load->load_p * PID_KP;
257 pressure += ckrm_load->load_i * PID_KI;
258 pressure += ckrm_load->load_d * PID_KD;
264 * called after a task is switched out. Update the local cvt accounting
265 * we need to stick with long instead of long long due to nonexistent
268 void update_local_cvt(struct task_struct *p, unsigned long nsec)
270 ckrm_lrq_t * lrq = get_task_lrq(p);
271 unsigned long cvt_inc;
274 * consume from savings if eshare is larger than egrt
276 if (lrq->savings && lrq->over_weight) {
277 unsigned long savings_used;
280 savings_used >>= CKRM_WEIGHT_SHIFT;
281 savings_used *= lrq->over_weight;
282 if (savings_used > lrq->savings)
283 savings_used = lrq->savings;
284 lrq->savings -= savings_used;
287 //BUG_ON(local_class_weight(lrq) == 0);
288 cvt_inc = nsec / local_class_weight(lrq);
291 * For a certain processor, CKRM allocates CPU time propotional
292 * to the class's local_weight. So once a class consumed nsec,
293 * it will wait for X (nsec) for its next turn.
295 * X is calculated based on the following fomular
296 * nsec / local_weight < X / (CKRM_MAX_WEIGHT - local_weight)
297 * if local_weight is small, then approximated as
298 * nsec / local_weight < X / (CKRM_MAX_WEIGHT)
300 #define CVT_STARVATION_LIMIT (200LL*NSEC_PER_MS)
301 #define CVT_STARVATION_INC_LIMIT (CVT_STARVATION_LIMIT >> CKRM_WEIGHT_SHIFT)
303 if (unlikely(lrq->skewed_weight)) {
304 unsigned long long starvation_limit = CVT_STARVATION_INC_LIMIT;
306 starvation_limit *= local_class_weight(lrq);
307 if (unlikely(cvt_inc > starvation_limit))
308 cvt_inc = nsec / lrq->skewed_weight;
311 /* now update the CVT accounting */
313 lrq->local_cvt += cvt_inc;
314 lrq->uncounted_ns += nsec;
315 update_class_priority(lrq);