/* kernel/ckrm_sched.c - Supporting functions for ckrm scheduling * * Copyright (C) Haoqiang Zheng, IBM Corp. 2004 * (C) Hubertus Franke, IBM Corp. 2004 * * Latest version, more details at http://ckrm.sf.net * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * */ #include #include #include rwlock_t class_list_lock = RW_LOCK_UNLOCKED; LIST_HEAD(active_cpu_classes); // list of active cpu classes; anchor struct ckrm_cpu_class default_cpu_class_obj; unsigned int ckrm_sched_mode __cacheline_aligned_in_smp = #ifdef CONFIG_CKRM_CPU_SCHEDULE_AT_BOOT CKRM_SCHED_MODE_ENABLED; #else CKRM_SCHED_MODE_DISABLED; #endif static int __init ckrm_cpu_enabled_setup(char *str) { ckrm_sched_mode = CKRM_SCHED_MODE_ENABLED; return 1; } static int __init ckrm_cpu_disabled_setup(char *str) { ckrm_sched_mode = CKRM_SCHED_MODE_DISABLED; return 1; } __setup("ckrmcpu", ckrm_cpu_enabled_setup); __setup("nockrmcpu",ckrm_cpu_disabled_setup); struct ckrm_cpu_class * get_default_cpu_class(void) { return (&default_cpu_class_obj); } /*******************************************************/ /* CVT Management */ /*******************************************************/ //an absolute bonus of 200ms for classes when reactivated #define INTERACTIVE_BONUS(lrq) ((200*NSEC_PER_MS)/local_class_weight(lrq)) static void check_inactive_class(ckrm_lrq_t * lrq,CVT_t cur_cvt) { CVT_t min_cvt; CVT_t bonus; //just a safty measure if (unlikely(! cur_cvt)) return; /* * Always leaving a small bonus for inactive classes * allows them to compete for cycles immediately when the become * active. This should improve interactive behavior */ bonus = INTERACTIVE_BONUS(lrq); //cvt can't be negative if (likely(cur_cvt > bonus)) min_cvt = cur_cvt - bonus; else min_cvt = 0; if (lrq->local_cvt < min_cvt) { // if (lrq->local_cvt < min_cvt && ! lrq_nr_running(lrq)) { CVT_t lost_cvt; if (unlikely(lrq->local_cvt == 0)) { lrq->local_cvt = cur_cvt; return; } lost_cvt = min_cvt - lrq->local_cvt; lost_cvt *= local_class_weight(lrq); lrq->local_cvt = min_cvt; BUG_ON(lost_cvt < 0); /* add what the class lost to its savings*/ #if 1 /*zhq debugging*/ lrq->savings += lost_cvt; #endif if (lrq->savings > MAX_SAVINGS) lrq->savings = MAX_SAVINGS; #if 0 /* zhq debugging*/ printk("lrq= %x savings: %llu lost= %llu\n",(int)lrq,lrq->savings,lost_cvt); #endif } } /* * return the max_cvt of all the classes */ CVT_t get_max_cvt(int this_cpu) { struct ckrm_cpu_class *clsptr; ckrm_lrq_t * lrq; CVT_t max_cvt; max_cvt = 0; list_for_each_entry(clsptr, &active_cpu_classes, links) { lrq = get_ckrm_lrq(clsptr, this_cpu); if (lrq->local_cvt > max_cvt) max_cvt = lrq->local_cvt; } return max_cvt; } CVT_t get_min_cvt(int this_cpu) { struct ckrm_cpu_class *clsptr; ckrm_lrq_t * lrq; CVT_t max_cvt; max_cvt = 0xFFFFFFFFFFFFFLLU; list_for_each_entry(clsptr, &active_cpu_classes, links) { lrq = get_ckrm_lrq(clsptr, this_cpu); if (lrq->local_cvt < max_cvt) max_cvt = lrq->local_cvt; } return max_cvt; } /** * update_class_cputime - updates cvt of inactive classes * -- an inactive class shouldn't starve others when it comes back * -- the cpu time it lost when it's inactive should be accumulated * -- its accumulated saving should be compensated (in a leaky bucket fashion) * * class_list_lock must have been acquired */ void update_class_cputime(int this_cpu, int idle) { struct ckrm_cpu_class *clsptr; ckrm_lrq_t * lrq; CVT_t cur_cvt; /* * a class's local_cvt must not be significantly smaller than min_cvt * of active classes otherwise, it will starve other classes when it * is reactivated. * * Hence we keep all local_cvt's within a range of the min_cvt off * all active classes (approximated by the local_cvt of the currently * running class) and account for how many cycles where thus taken * from an inactive class building a savings (not to exceed a few seconds) * for a class to gradually make up upon reactivation, without * starvation of other classes. * */ cur_cvt = get_local_cur_cvt(this_cpu); /* * cur_cvt == 0 means the system is now idle * in this case, we use max_cvt as cur_cvt * max_cvt roughly represents the cvt of the class * that has just finished running * * fairness wouldn't be a problem since we account for whatever lost in savings * if the system is not busy, the system responsiveness is not a problem. * still fine if the sytem is busy, but happened to be idle at this certain point * since bias toward interactive classes (class priority) is a more important way to improve system responsiveness */ if (unlikely(! cur_cvt)) { cur_cvt = get_max_cvt(this_cpu); //return; } /* * - check the local cvt of all the classes * - update total_ns received by the class * - do a usage sampling for the whole class */ list_for_each_entry(clsptr, &active_cpu_classes, links) { lrq = get_ckrm_lrq(clsptr, this_cpu); spin_lock(&clsptr->stat.stat_lock); clsptr->stat.total_ns += lrq->uncounted_ns; ckrm_sample_usage(clsptr); spin_unlock(&clsptr->stat.stat_lock); lrq->uncounted_ns = 0; check_inactive_class(lrq,cur_cvt); } } /*******************************************************/ /* PID load balancing stuff */ /*******************************************************/ #define PID_KP 20 #define PID_KI 60 #define PID_KD 20 /* * runqueue load is the local_weight of all the classes on this cpu * must be called with class_list_lock held */ static unsigned long ckrm_cpu_load(int cpu) { struct ckrm_cpu_class *clsptr; ckrm_lrq_t* lrq; struct ckrm_cpu_demand_stat* l_stat; int total_load = 0; int load; list_for_each_entry(clsptr,&active_cpu_classes,links) { lrq = get_ckrm_lrq(clsptr,cpu); l_stat = get_cls_local_stat(clsptr,cpu); load = WEIGHT_TO_SHARE(lrq->local_weight); if (l_stat->cpu_demand < load) load = l_stat->cpu_demand; total_load += load; } return total_load; } /** * sample pid load periodically */ void ckrm_load_sample(ckrm_load_t* pid,int cpu) { long load; long err; load = ckrm_cpu_load(cpu); err = load - pid->load_p; pid->load_d = err; pid->load_p = load; pid->load_i *= 9; pid->load_i += load; pid->load_i /= 10; } long ckrm_get_pressure(ckrm_load_t* ckrm_load, int local_group) { long pressure; pressure = ckrm_load->load_p * PID_KP; pressure += ckrm_load->load_i * PID_KI; pressure += ckrm_load->load_d * PID_KD; pressure /= 100; return pressure; } /* * called after a task is switched out. Update the local cvt accounting * we need to stick with long instead of long long due to nonexistent * 64-bit division */ void update_local_cvt(struct task_struct *p, unsigned long nsec) { ckrm_lrq_t * lrq = get_task_lrq(p); unsigned long cvt_inc; /* * consume from savings if eshare is larger than egrt */ if (lrq->savings && lrq->over_weight) { unsigned long savings_used; savings_used = nsec; savings_used >>= CKRM_WEIGHT_SHIFT; savings_used *= lrq->over_weight; if (savings_used > lrq->savings) savings_used = lrq->savings; lrq->savings -= savings_used; } //BUG_ON(local_class_weight(lrq) == 0); cvt_inc = nsec / local_class_weight(lrq); /* * For a certain processor, CKRM allocates CPU time propotional * to the class's local_weight. So once a class consumed nsec, * it will wait for X (nsec) for its next turn. * * X is calculated based on the following fomular * nsec / local_weight < X / (CKRM_MAX_WEIGHT - local_weight) * if local_weight is small, then approximated as * nsec / local_weight < X / (CKRM_MAX_WEIGHT) */ #define CVT_STARVATION_LIMIT (200LL*NSEC_PER_MS) #define CVT_STARVATION_INC_LIMIT (CVT_STARVATION_LIMIT >> CKRM_WEIGHT_SHIFT) if (unlikely(lrq->skewed_weight)) { unsigned long long starvation_limit = CVT_STARVATION_INC_LIMIT; starvation_limit *= local_class_weight(lrq); if (unlikely(cvt_inc > starvation_limit)) cvt_inc = nsec / lrq->skewed_weight; } /* now update the CVT accounting */ lrq->local_cvt += cvt_inc; lrq->uncounted_ns += nsec; update_class_priority(lrq); }