From 4878cdce5aea21aca101f10f081af23e0e7bc539 Mon Sep 17 00:00:00 2001 From: Mark Huang Date: Fri, 10 Sep 2004 15:15:24 +0000 Subject: [PATCH] Kernel crashes, back out ckrm-e15-bugfixes-merge for now --- include/linux/ckrm_classqueue.h | 2 +- include/linux/ckrm_sched.h | 305 +++++------- include/linux/sched.h | 26 +- init/Kconfig | 11 +- init/main.c | 7 +- kernel/Makefile | 5 +- kernel/ckrm/Makefile | 3 +- kernel/ckrm/ckrm_cpu_class.c | 129 +++-- kernel/ckrm/ckrm_cpu_monitor.c | 686 ++++++------------------- kernel/ckrm/ckrm_tc.c | 1 - kernel/ckrm/rbce/rbcemod.c | 32 +- kernel/ckrm_classqueue.c | 49 +- kernel/ckrm_sched.c | 113 ++--- kernel/sched.c | 852 +++++++++++++------------------- kernel/vserver/dlimit.c | 4 - kernel/vserver/sysctl.c | 3 - 16 files changed, 735 insertions(+), 1493 deletions(-) diff --git a/include/linux/ckrm_classqueue.h b/include/linux/ckrm_classqueue.h index a825336cb..1bdf9b775 100644 --- a/include/linux/ckrm_classqueue.h +++ b/include/linux/ckrm_classqueue.h @@ -116,7 +116,7 @@ void classqueue_update_prio(struct classqueue_struct *cq, cq_node_t * node, int cq_node_t *classqueue_get_head(struct classqueue_struct *cq); /*update the base priority of the classqueue*/ -void classqueue_update_base(struct classqueue_struct *cq); +void classqueue_update_base(struct classqueue_struct *cq, int new_base); /** * class_compare_prio: compare the priority of this two nodes diff --git a/include/linux/ckrm_sched.h b/include/linux/ckrm_sched.h index 6b55e2c7f..9d82214fb 100644 --- a/include/linux/ckrm_sched.h +++ b/include/linux/ckrm_sched.h @@ -15,35 +15,30 @@ #ifndef _CKRM_SCHED_H #define _CKRM_SCHED_H +#define CC_BUG_ON_DO(cond,action) do { if (cond) action; BUG_ON(cond); } while(0) +#define CC_BUG_ON(cond) BUG_ON(cond) + #include #include #include -#include -#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long)) +//update every second +#define CVT_UPDATE_TICK (1*HZ/1 ?: 1) +#define CLASS_BONUS_RATE 22 // shift from ns to increase class bonus +#define PRIORITY_BONUS_RATE 0 // ?? Hubertus +#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long)) struct prio_array { - unsigned int nr_active; + int nr_active; unsigned long bitmap[BITMAP_SIZE]; struct list_head queue[MAX_PRIO]; }; -#ifdef CONFIG_CKRM_CPU_SCHEDULE -#define rq_active(p,rq) (get_task_lrq(p)->active) -#define rq_expired(p,rq) (get_task_lrq(p)->expired) -int __init init_ckrm_sched_res(void); -#else -#define rq_active(p,rq) (rq->active) -#define rq_expired(p,rq) (rq->expired) -static inline void init_ckrm_sched_res(void) {} -static inline int ckrm_cpu_monitor_init(void) {return 0;} -#endif - -#ifdef CONFIG_CKRM_CPU_SCHEDULE -struct ckrm_runqueue { +struct ckrm_local_runqueue { cq_node_t classqueue_linkobj; /*links in classqueue */ struct ckrm_cpu_class *cpu_class; // class it belongs to struct classqueue_struct *classqueue; // classqueue it belongs tow + CVT_t uncounted_cvt; unsigned long long uncounted_ns; prio_array_t *active, *expired, arrays[2]; @@ -60,15 +55,19 @@ struct ckrm_runqueue { * updated on enqueue, dequeue */ int top_priority; - CVT_t local_cvt; - - unsigned long lrq_load; - int local_weight; - + CVT_t local_cvt; // snapshot of local_cvt, update on every loadbalance unsigned long magic; //for debugging }; -typedef struct ckrm_runqueue ckrm_lrq_t; +/** + * @last_sleep: the last time it sleeps, last_sleep = 0 when not sleeping + */ +struct ckrm_cpu_class_local_stat { + unsigned long long run; + unsigned long long total; + unsigned long long last_sleep; + unsigned long cpu_demand; /*estimated cpu demand */ +}; /** * ckrm_cpu_class_stat - cpu usage statistics maintained for each class @@ -79,24 +78,23 @@ struct ckrm_cpu_class_stat { unsigned long long total_ns; /*how much nano-secs it has consumed */ - struct ckrm_cpu_demand_stat local_stats[NR_CPUS]; - - /* - * - */ - unsigned long max_demand; /* the maximun a class can consume */ - int egrt,megrt; /*effective guarantee*/ - int ehl,mehl; /*effective hard limit, my effective hard limit*/ + struct ckrm_cpu_class_local_stat local_stats[NR_CPUS]; + unsigned long cpu_demand; + /*temp stat used by cpu monitor */ + int effective_guarantee; + int effective_limit; + int glut; //true or false /* - * eshare: for both default class and its children - * meshare: just for the default class + * effective_share: for both default class and its children + * self_effective_share: just for the default class */ - int eshare; - int meshare; + int effective_share; + int self_effective_share; }; -#define CKRM_CPU_CLASS_MAGIC 0x7af2abe3 +typedef struct ckrm_cpu_class_stat ckrm_stat_t; + /* * manages the class status * there should be only one instance of this object for each class in the whole system @@ -106,67 +104,72 @@ struct ckrm_cpu_class { struct ckrm_core_class *parent; struct ckrm_shares shares; spinlock_t cnt_lock; // always grab parent's lock first and then child's + CVT_t global_cvt; // total cummulative virtual time struct ckrm_cpu_class_stat stat; struct list_head links; // for linking up in cpu classes - ckrm_lrq_t local_queues[NR_CPUS]; // runqueues - unsigned long magic; //for debugging + struct ckrm_local_runqueue local_queues[NR_CPUS]; // runqueues }; -#define cpu_class_weight(cls) (cls->stat.meshare) -#define local_class_weight(lrq) (lrq->local_weight) +#if CONFIG_CKRM_CPU_SCHEDULE +#define rq_active(p,rq) (get_task_class_queue(p)->active) +#define rq_expired(p,rq) (get_task_class_queue(p)->expired) +#else +#define rq_active(p,rq) (rq->active) +#define rq_expired(p,rq) (rq->expired) +#endif -static inline int valid_cpu_class(struct ckrm_cpu_class * cls) -{ - return (cls && cls->magic == CKRM_CPU_CLASS_MAGIC); -} +//#define cpu_class_weight(cls) (cls->shares.my_guarantee) +#define cpu_class_weight(cls) (cls->stat.self_effective_share) + +#define bpt_queue(cpu) (& (cpu_rq(cpu)->classqueue) ) +CVT_t get_min_cvt(int cpu); struct classqueue_struct *get_cpu_classqueue(int cpu); -struct ckrm_cpu_class * get_default_cpu_class(void); -#define lrq_nr_running(lrq) \ - (lrq->active->nr_active + lrq->expired->nr_active) +extern struct ckrm_cpu_class default_cpu_class_obj; +#define default_cpu_class (&default_cpu_class_obj) -static inline ckrm_lrq_t * -get_ckrm_lrq(struct ckrm_cpu_class*cls, int cpu) +#define local_queue_nr_running(local_queue) \ + (local_queue->active->nr_active + local_queue->expired->nr_active) + +static inline struct ckrm_local_runqueue * +get_ckrm_local_runqueue(struct ckrm_cpu_class*cls, int cpu) { return &(cls->local_queues[cpu]); } -static inline ckrm_lrq_t *get_task_lrq(struct task_struct *p) +static inline struct ckrm_local_runqueue *get_task_class_queue(struct task_struct *p) { return &(p->cpu_class->local_queues[task_cpu(p)]); } #define task_list_entry(list) list_entry(list,struct task_struct,run_list) -#define class_list_entry(list) list_entry(list,struct ckrm_runqueue,classqueue_linkobj) +#define class_list_entry(list) list_entry(list,struct ckrm_local_runqueue,classqueue_linkobj) /* some additional interfaces exported from sched.c */ struct runqueue; +void dequeue_task(struct task_struct *p, prio_array_t * array); +void enqueue_task(struct task_struct *p, prio_array_t * array); +struct runqueue *task_rq_lock(task_t * p, unsigned long *flags); +void task_rq_unlock(struct runqueue *rq, unsigned long *flags); +extern spinlock_t cvt_lock; extern rwlock_t class_list_lock; extern struct list_head active_cpu_classes; -unsigned int task_timeslice(task_t *p); -void _ckrm_cpu_change_class(task_t *task, struct ckrm_cpu_class *newcls); +/*functions exported by ckrm_cpu_class.c*/ +int __init init_ckrm_sched_res(void); void init_cpu_classes(void); -void init_cpu_class(struct ckrm_cpu_class *cls,ckrm_shares_t* shares); -void ckrm_cpu_change_class(void *task, void *old, void *new); - - -#define CPU_DEMAND_ENQUEUE 0 -#define CPU_DEMAND_DEQUEUE 1 -#define CPU_DEMAND_DESCHEDULE 2 -#define CPU_DEMAND_INIT 3 /*functions exported by ckrm_cpu_monitor.c*/ void ckrm_cpu_monitor(void); -int ckrm_cpu_monitor_init(void); void ckrm_cpu_stat_init(struct ckrm_cpu_class_stat *stat); -void cpu_demand_event(struct ckrm_cpu_demand_stat* local_stat, int event, unsigned long long len); -void adjust_local_weight(void); +#define CPU_DEMAND_ENQUEUE 0 +#define CPU_DEMAND_DEQUEUE 1 +#define CPU_DEMAND_DESCHEDULE 2 +void cpu_demand_event(struct ckrm_cpu_class_local_stat* local_stat, int event, unsigned long long len); -#define get_task_lrq_stat(p) (&(p)->cpu_class->stat.local_stats[task_cpu(p)]) -#define get_cls_local_stat(cls,cpu) (&(cls)->stat.local_stats[cpu]) -#define get_rq_local_stat(lrq,cpu) (get_cls_local_stat((lrq)->cpu_class,cpu)) +#define get_task_local_stat(p) (&(p)->cpu_class->stat.local_stats[task_cpu(p)]) +#define get_rq_local_stat(lrq,cpu) (&(lrq)->cpu_class->stat.local_stats[cpu]) /** * get_effective_prio: return the effective priority of a class local queue @@ -178,15 +181,14 @@ void adjust_local_weight(void); * currently, prio increases by 1 if either: top_priority increase by one * or, local_cvt increases by 4ms */ -#define CLASS_QUANTIZER 22 //shift from ns to increase class bonus -#define PRIORITY_QUANTIZER 0 //controls how much a high prio task can borrow -#define CVT_INTERACTIVE_BONUS ((CLASSQUEUE_SIZE << CLASS_QUANTIZER)*2) -static inline int get_effective_prio(ckrm_lrq_t * lrq) +static inline int get_effective_prio(struct ckrm_local_runqueue * lcq) { int prio; - prio = lrq->local_cvt >> CLASS_QUANTIZER; // cumulative usage - prio += lrq->top_priority >> PRIORITY_QUANTIZER; // queue urgency + // cumulative usage + prio = lcq->local_cvt >> CLASS_BONUS_RATE; + // queue urgency + prio += lcq->top_priority >> PRIORITY_BONUS_RATE; return prio; } @@ -204,8 +206,9 @@ static inline int get_effective_prio(ckrm_lrq_t * lrq) * -- rq_get_next_task (queue switch) * -- update_local_cvt * -- schedule + * -- update_global_cvt */ -static inline void update_class_priority(ckrm_lrq_t *local_rq) +static inline void update_class_priority(struct ckrm_local_runqueue *local_rq) { int effective_prio = get_effective_prio(local_rq); classqueue_update_prio(local_rq->classqueue, @@ -217,81 +220,42 @@ static inline void update_class_priority(ckrm_lrq_t *local_rq) * set the new top priority and reposition the queue * called when: task enqueue/dequeue and queue switch */ -static inline void set_top_priority(ckrm_lrq_t *lrq, +static inline void set_top_priority(struct ckrm_local_runqueue *class_queue, int new_priority) { - lrq->top_priority = new_priority; - update_class_priority(lrq); -} - -/* - * task_load: how much load this task counts - */ -static inline unsigned long task_load(struct task_struct* p) -{ - return (task_timeslice(p) * p->demand_stat.cpu_demand); -} - -/* - * runqueue load is the local_weight of all the classes on this cpu - * must be called with class_list_lock held - */ -static inline unsigned long ckrm_cpu_load(int cpu) -{ - struct ckrm_cpu_class *clsptr; - ckrm_lrq_t* lrq; - struct ckrm_cpu_demand_stat* l_stat; - int total_load = 0; - int load; - - list_for_each_entry(clsptr,&active_cpu_classes,links) { - lrq = get_ckrm_lrq(clsptr,cpu); - l_stat = get_cls_local_stat(clsptr,cpu); - load = lrq->local_weight; - if (l_stat->cpu_demand < load) - load = l_stat->cpu_demand; - total_load += load; - } - return total_load; + class_queue->top_priority = new_priority; + update_class_priority(class_queue); } static inline void class_enqueue_task(struct task_struct *p, prio_array_t * array) { - ckrm_lrq_t *lrq; + struct ckrm_local_runqueue *queue; int effective_prio; + queue = get_task_class_queue(p); - lrq = get_task_lrq(p); - - cpu_demand_event(&p->demand_stat,CPU_DEMAND_ENQUEUE,0); - lrq->lrq_load += task_load(p); - - if ((p->prio < lrq->top_priority) && (array == lrq->active)) - set_top_priority(lrq, p->prio); - - if (! cls_in_classqueue(&lrq->classqueue_linkobj)) { - cpu_demand_event(get_task_lrq_stat(p),CPU_DEMAND_ENQUEUE,0); - effective_prio = get_effective_prio(lrq); - classqueue_enqueue(lrq->classqueue, &lrq->classqueue_linkobj, effective_prio); + if (! cls_in_classqueue(&queue->classqueue_linkobj)) { + cpu_demand_event(get_task_local_stat(p),CPU_DEMAND_ENQUEUE,0); + /*make sure the cvt of this class is up to date*/ + queue->local_cvt = get_min_cvt(task_cpu(p)); + effective_prio = get_effective_prio(queue); + classqueue_enqueue(queue->classqueue, &queue->classqueue_linkobj, effective_prio); } + + if ((p->prio < queue->top_priority) && (array == queue->active)) + set_top_priority(queue, p->prio); } static inline void class_dequeue_task(struct task_struct *p, prio_array_t * array) { - ckrm_lrq_t *lrq = get_task_lrq(p); - unsigned long load = task_load(p); - - BUG_ON(lrq->lrq_load < load); - lrq->lrq_load -= load; + struct ckrm_local_runqueue *queue = get_task_class_queue(p); - cpu_demand_event(&p->demand_stat,CPU_DEMAND_DEQUEUE,0); - - if ((array == lrq->active) && (p->prio == lrq->top_priority) + if ((array == queue->active) && (p->prio == queue->top_priority) && list_empty(&(array->queue[p->prio]))) - set_top_priority(lrq, + set_top_priority(queue, find_next_bit(array->bitmap, MAX_PRIO, p->prio)); } @@ -302,81 +266,32 @@ static inline void class_dequeue_task(struct task_struct *p, */ static inline void update_local_cvt(struct task_struct *p, unsigned long nsec) { - ckrm_lrq_t * lrq = get_task_lrq(p); - - unsigned long cvt_inc = nsec / local_class_weight(lrq); + struct ckrm_local_runqueue *class_queue = get_task_class_queue(p); + struct ckrm_cpu_class *cls = class_queue->cpu_class; - lrq->local_cvt += cvt_inc; - lrq->uncounted_ns += nsec; + unsigned long cvt_inc = nsec / cpu_class_weight(cls); - update_class_priority(lrq); -} - -static inline int class_preempts_curr(struct task_struct * p, struct task_struct* curr) -{ - struct cq_node_struct* node1 = &(get_task_lrq(p)->classqueue_linkobj); - struct cq_node_struct* node2 = &(get_task_lrq(curr)->classqueue_linkobj); + class_queue->local_cvt += cvt_inc; + class_queue->uncounted_cvt += cvt_inc; - return (class_compare_prio(node1,node2) < 0); + class_queue->uncounted_ns += nsec; + update_class_priority(class_queue); } /* - * return a random value with range [0, (val-1)] + * called during loadbalancing + * to charge the class with locally accumulated cvt */ -static inline int get_ckrm_rand(unsigned long val) -{ - int rand; - - if (! val) - return 0; - - get_random_bytes(&rand,sizeof(rand)); - return (rand % val); -} - -void update_class_cputime(int this_cpu); - -/**********************************************/ -/* PID_LOAD_BALANCING */ -/**********************************************/ -struct ckrm_load_struct { - unsigned long load_p; /*propotional*/ - unsigned long load_i; /*integral */ - long load_d; /*derivative */ -}; - -typedef struct ckrm_load_struct ckrm_load_t; +void update_global_cvts(int this_cpu); -static inline void ckrm_load_init(ckrm_load_t* ckrm_load) { - ckrm_load->load_p = 0; - ckrm_load->load_i = 0; - ckrm_load->load_d = 0; -} - -void ckrm_load_sample(ckrm_load_t* ckrm_load,int cpu); -long pid_get_pressure(ckrm_load_t* ckrm_load, int local_group); -#define rq_ckrm_load(rq) (&((rq)->ckrm_load)) - -static inline void ckrm_sched_tick(int j,int this_cpu,struct ckrm_load_struct* ckrm_load) +/** + * + */ +static inline int class_preempts_curr(struct task_struct * p, struct task_struct* curr) { -#define CVT_UPDATE_TICK ((HZ/2)?:1) -#define CKRM_BASE_UPDATE_RATE 400 - - read_lock(&class_list_lock); - -#ifdef CONFIG_SMP - ckrm_load_sample(ckrm_load,this_cpu); -#endif + struct cq_node_struct* node1 = &(get_task_class_queue(p)->classqueue_linkobj); + struct cq_node_struct* node2 = &(get_task_class_queue(curr)->classqueue_linkobj); - if (!(j % CVT_UPDATE_TICK)) - update_class_cputime(this_cpu); - - if (! (j % CKRM_BASE_UPDATE_RATE)) - classqueue_update_base(get_cpu_classqueue(this_cpu)); - - read_unlock(&class_list_lock); + return (class_compare_prio(node1,node2) < 0); } - -#endif /*CONFIG_CKRM_CPU_SCHEDULE */ - #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 0199d9de6..b922e873e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -398,24 +398,6 @@ int set_current_groups(struct group_info *group_info); struct audit_context; /* See audit.c */ struct mempolicy; -#ifdef CONFIG_CKRM_CPU_SCHEDULE -/** - * ckrm_cpu_demand_stat - used to track the cpu demand of a task/class - * @run: how much time it has been running since the counter started - * @total: total time since the counter started - * @last_sleep: the last time it sleeps, last_sleep = 0 when not sleeping - * @recalc_interval: how often do we recalculate the cpu_demand - * @cpu_demand: moving average of run/total - */ -struct ckrm_cpu_demand_stat { - unsigned long long run; - unsigned long long total; - unsigned long long last_sleep; - unsigned long long recalc_interval; - unsigned long cpu_demand; /*estimated cpu demand */ -}; -#endif - struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ struct thread_info *thread_info; @@ -507,6 +489,7 @@ struct task_struct { /* signal handlers */ struct signal_struct *signal; struct sighand_struct *sighand; + sigset_t blocked, real_blocked; struct sigpending pending; @@ -565,9 +548,7 @@ struct task_struct { struct list_head taskclass_link; #ifdef CONFIG_CKRM_CPU_SCHEDULE struct ckrm_cpu_class *cpu_class; - //track cpu demand of this task - struct ckrm_cpu_demand_stat demand_stat; -#endif //CONFIG_CKRM_CPU_SCHEDULE +#endif #endif // CONFIG_CKRM_TYPE_TASKCLASS #endif // CONFIG_CKRM @@ -893,7 +874,6 @@ static inline int capable(int cap) } #endif - /* * Routines for handling mm_structs */ @@ -1027,7 +1007,7 @@ static inline struct mm_struct * get_task_mm(struct task_struct * task) return mm; } - + /* set thread flags in other task's structures * - see asm/thread_info.h for TIF_xxxx flags available */ diff --git a/init/Kconfig b/init/Kconfig index 1c01815d6..77387418e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -172,12 +172,21 @@ config CKRM_RES_NUMTASKS config CKRM_CPU_SCHEDULE bool "CKRM CPU scheduler" depends on CKRM_TYPE_TASKCLASS - default y + default m help Use CKRM CPU scheduler instead of Linux Scheduler Say N if unsure, Y to use the feature. +config CKRM_CPU_MONITOR + bool "CKRM CPU Resoure Monitor" + depends on CKRM_CPU_SCHEDULE + default m + help + Monitor CPU Resource Usage of the classes + + Say N if unsure, Y to use the feature. + config CKRM_TYPE_SOCKETCLASS bool "Class Manager for socket groups" depends on CKRM diff --git a/init/main.c b/init/main.c index 502ae948e..5c3a795b7 100644 --- a/init/main.c +++ b/init/main.c @@ -50,7 +50,11 @@ #include #include -#include +#ifdef CONFIG_CKRM_CPU_SCHEDULE +int __init init_ckrm_sched_res(void); +#else +#define init_ckrm_sched_res() ((void)0) +#endif /* * This is one of the first .c files built. Error out early @@ -462,7 +466,6 @@ asmlinkage void __init start_kernel(void) * printk() and can access its per-cpu storage. */ smp_prepare_boot_cpu(); - /* * Set up the scheduler prior starting any interrupts (such as the * timer interrupt). Full topology setup happens at smp_init() diff --git a/kernel/Makefile b/kernel/Makefile index ec5001052..905f3c59d 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -27,9 +27,12 @@ obj-$(CONFIG_COMPAT) += compat.o obj-$(CONFIG_IKCONFIG) += configs.o obj-$(CONFIG_IKCONFIG_PROC) += configs.o obj-$(CONFIG_STOP_MACHINE) += stop_machine.o -obj-$(CONFIG_CKRM_CPU_SCHEDULE) += ckrm_classqueue.o ckrm_sched.o +obj-$(CONFIG_CKRM_CPU_SCHEDULE) += ckrm_classqueue.o +obj-$(CONFIG_CKRM_CPU_SCHEDULE) += ckrm_sched.o obj-$(CONFIG_AUDIT) += audit.o obj-$(CONFIG_AUDITSYSCALL) += auditsc.o +obj-$(CONFIG_KGDB) += kgdbstub.o + ifneq ($(CONFIG_IA64),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff --git a/kernel/ckrm/Makefile b/kernel/ckrm/Makefile index c039f8e79..3da88775d 100644 --- a/kernel/ckrm/Makefile +++ b/kernel/ckrm/Makefile @@ -9,4 +9,5 @@ endif obj-$(CONFIG_CKRM_RES_NUMTASKS) += ckrm_tasks.o obj-$(CONFIG_CKRM_TYPE_SOCKETCLASS) += ckrm_sockc.o obj-$(CONFIG_CKRM_RES_LISTENAQ) += ckrm_listenaq.o - obj-$(CONFIG_CKRM_CPU_SCHEDULE) += ckrm_cpu_class.o ckrm_cpu_monitor.o + obj-$(CONFIG_CKRM_CPU_SCHEDULE) += ckrm_cpu_class.o + obj-$(CONFIG_CKRM_CPU_MONITOR) += ckrm_cpu_monitor.o diff --git a/kernel/ckrm/ckrm_cpu_class.c b/kernel/ckrm/ckrm_cpu_class.c index a066e7330..0ded7f3c6 100644 --- a/kernel/ckrm/ckrm_cpu_class.c +++ b/kernel/ckrm/ckrm_cpu_class.c @@ -23,31 +23,17 @@ #include #include -struct ckrm_res_ctlr cpu_rcbs; -/** - * insert_cpu_class - insert a class to active_cpu_class list - * - * insert the class in decreasing order of class weight - */ -static inline void insert_cpu_class(struct ckrm_cpu_class *cls) -{ - list_add(&cls->links,&active_cpu_classes); -} +struct ckrm_res_ctlr cpu_rcbs; /* * initialize a class object and its local queues */ -void init_cpu_class(struct ckrm_cpu_class *cls,ckrm_shares_t* shares) + static void init_cpu_class(struct ckrm_cpu_class *cls,ckrm_shares_t* shares) { int i,j,k; prio_array_t *array; - ckrm_lrq_t* queue; - - cls->shares = *shares; - cls->cnt_lock = SPIN_LOCK_UNLOCKED; - ckrm_cpu_stat_init(&cls->stat); - cls->magic = CKRM_CPU_CLASS_MAGIC; + struct ckrm_local_runqueue* queue; for (i = 0 ; i < NR_CPUS ; i++) { queue = &cls->local_queues[i]; @@ -71,37 +57,35 @@ void init_cpu_class(struct ckrm_cpu_class *cls,ckrm_shares_t* shares) queue->classqueue = get_cpu_classqueue(i); queue->top_priority = MAX_PRIO; cq_node_init(&queue->classqueue_linkobj); - queue->local_cvt = CVT_INTERACTIVE_BONUS; - queue->lrq_load = 0; - queue->local_weight = cpu_class_weight(cls); + queue->local_cvt = 0; + queue->uncounted_cvt = 0; queue->uncounted_ns = 0; queue->magic = 0x43FF43D7; } + cls->shares = *shares; + cls->global_cvt = 0; + cls->cnt_lock = SPIN_LOCK_UNLOCKED; + ckrm_cpu_stat_init(&cls->stat); + // add to class list write_lock(&class_list_lock); - insert_cpu_class(cls); + list_add(&cls->links,&active_cpu_classes); write_unlock(&class_list_lock); } static inline void set_default_share(ckrm_shares_t *shares) { shares->my_guarantee = 0; - shares->total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; - shares->unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; shares->my_limit = CKRM_SHARE_DFLT_MAX_LIMIT; + shares->total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; shares->max_limit = CKRM_SHARE_DFLT_MAX_LIMIT; - shares->cur_max_limit = 0; + shares->unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; + shares->cur_max_limit = CKRM_SHARE_DFLT_MAX_LIMIT; } -struct ckrm_cpu_class * ckrm_get_cpu_class(struct ckrm_core_class *core) -{ - struct ckrm_cpu_class * cls; - cls = ckrm_get_res_class(core, cpu_rcbs.resid, struct ckrm_cpu_class); - if (valid_cpu_class(cls)) - return cls; - else - return NULL; +struct ckrm_cpu_class * ckrm_get_cpu_class(struct ckrm_core_class *core) { + return ckrm_get_res_class(core, cpu_rcbs.resid, struct ckrm_cpu_class); } @@ -110,7 +94,7 @@ void* ckrm_alloc_cpu_class(struct ckrm_core_class *core, struct ckrm_core_class struct ckrm_cpu_class *cls; if (! parent) /*root class*/ - cls = get_default_cpu_class(); + cls = default_cpu_class; else cls = (struct ckrm_cpu_class *) kmalloc(sizeof(struct ckrm_cpu_class),GFP_ATOMIC); @@ -129,7 +113,7 @@ void* ckrm_alloc_cpu_class(struct ckrm_core_class *core, struct ckrm_core_class cls->parent = parent; } } else - printk(KERN_ERR"alloc_cpu_class failed\n"); + printk("alloc_cpu_class failed GFP_ATOMIC\n"); return cls; } @@ -148,7 +132,7 @@ static void ckrm_free_cpu_class(void *my_res) return; /*the default class can't be freed*/ - if (cls == get_default_cpu_class()) + if (cls == default_cpu_class) return; // Assuming there will be no children when this function is called @@ -203,16 +187,7 @@ int ckrm_cpu_set_share(void *my_res, struct ckrm_shares *new_share) parres = NULL; } - /* - * hzheng: CKRM_SHARE_DONTCARE should be handled - */ - if (new_share->my_guarantee == CKRM_SHARE_DONTCARE) - new_share->my_guarantee = 0; - rc = set_shares(new_share, cur, par); - if (cur->my_limit == CKRM_SHARE_DONTCARE) - cur->my_limit = cur->max_limit; - spin_unlock(&cls->cnt_lock); if (cls->parent) { @@ -221,6 +196,9 @@ int ckrm_cpu_set_share(void *my_res, struct ckrm_shares *new_share) return rc; } +/* + * translate the global_CVT to ticks + */ static int ckrm_cpu_get_share(void *my_res, struct ckrm_shares *shares) { @@ -235,54 +213,64 @@ static int ckrm_cpu_get_share(void *my_res, int ckrm_cpu_get_stats(void *my_res, struct seq_file * sfile) { struct ckrm_cpu_class *cls = my_res; - struct ckrm_cpu_class_stat* stat = &cls->stat; - ckrm_lrq_t* lrq; - int i; if (!cls) return -EINVAL; seq_printf(sfile, "-------- CPU Class Status Start---------\n"); - seq_printf(sfile, "Share:\n\tgrt= %d limit= %d total_grt= %d max_limit= %d\n", + seq_printf(sfile, " gua= %d limit= %d\n", cls->shares.my_guarantee, - cls->shares.my_limit, + cls->shares.my_limit); + seq_printf(sfile, " total_gua= %d limit= %d\n", cls->shares.total_guarantee, cls->shares.max_limit); - seq_printf(sfile, "\tunused_grt= %d cur_max_limit= %d\n", + seq_printf(sfile, " used_gua= %d cur_limit= %d\n", cls->shares.unused_guarantee, cls->shares.cur_max_limit); - seq_printf(sfile, "Effective:\n\tegrt= %d\n",stat->egrt); - seq_printf(sfile, "\tmegrt= %d\n",stat->megrt); - seq_printf(sfile, "\tehl= %d\n",stat->ehl); - seq_printf(sfile, "\tmehl= %d\n",stat->mehl); - seq_printf(sfile, "\teshare= %d\n",stat->eshare); - seq_printf(sfile, "\tmeshare= %d\n",cpu_class_weight(cls)); - seq_printf(sfile, "\ttotal_ns= %llu\n",stat->total_ns); - seq_printf(sfile, "\tmax_demand= %lu\n",stat->max_demand); - for_each_online_cpu(i) { - lrq = get_ckrm_lrq(cls,i); - seq_printf(sfile, "\tlrq %d demand= %lu weight= %d lrq_load= %lu cvt= %llu\n",i,stat->local_stats[i].cpu_demand,local_class_weight(lrq),lrq->lrq_load,lrq->local_cvt); - } - + seq_printf(sfile, " Share= %d\n",cpu_class_weight(cls)); + seq_printf(sfile, " cvt= %llu\n",cls->local_queues[0].local_cvt); + seq_printf(sfile, " total_ns= %llu\n",cls->stat.total_ns); + seq_printf(sfile, " prio= %d\n",cls->local_queues[0].classqueue_linkobj.prio); + seq_printf(sfile, " index= %d\n",cls->local_queues[0].classqueue_linkobj.index); + seq_printf(sfile, " run= %llu\n",cls->stat.local_stats[0].run); + seq_printf(sfile, " total= %llu\n",cls->stat.local_stats[0].total); + seq_printf(sfile, " cpu_demand= %lu\n",cls->stat.cpu_demand); + + seq_printf(sfile, " effective_guarantee= %d\n",cls->stat.effective_guarantee); + seq_printf(sfile, " effective_limit= %d\n",cls->stat.effective_limit); + seq_printf(sfile, " effective_share= %d\n",cls->stat.effective_share); seq_printf(sfile, "-------- CPU Class Status END ---------\n"); + return 0; } /* * task will remain in the same cpu but on a different local runqueue */ -void ckrm_cpu_change_class(void *task, void *old, void *new) +static void ckrm_cpu_change_class(void *task, void *old, void *new) { struct task_struct *tsk = task; struct ckrm_cpu_class *newcls = new; + unsigned long flags; + struct runqueue *rq; + prio_array_t *array; /*sanity checking*/ if (!task || ! old || !new) return; - _ckrm_cpu_change_class(tsk,newcls); + rq = task_rq_lock(tsk,&flags); + array = tsk->array; + if (array) { + dequeue_task(tsk,array); + tsk->cpu_class = newcls; + enqueue_task(tsk,rq_active(tsk,rq)); + } else { + tsk->cpu_class = newcls; + } + task_rq_unlock(rq,&flags); } /*dummy function, not used*/ @@ -309,7 +297,7 @@ static int ckrm_cpu_set_config(void *my_res, const char *cfgstr) } struct ckrm_res_ctlr cpu_rcbs = { - .res_name = "cpu", + .res_name = "CKRM CPU Class", .res_hdepth = 1, .resid = -1, .res_alloc = ckrm_alloc_cpu_class, @@ -351,11 +339,10 @@ void init_cpu_classes(void) //init classqueues for each processor for (i=0; i < NR_CPUS; i++) classqueue_init(get_cpu_classqueue(i)); - - /* - * hzheng: initialize the default cpu class - * required for E14/E15 since ckrm_init is called after sched_init - */ +/* + * hzheng: initialize the default cpu class + * required for E14 since ckrm_init is called after sched_init + */ ckrm_alloc_cpu_class(NULL,NULL); } diff --git a/kernel/ckrm/ckrm_cpu_monitor.c b/kernel/ckrm/ckrm_cpu_monitor.c index 8d6f301dc..674ee6e50 100644 --- a/kernel/ckrm/ckrm_cpu_monitor.c +++ b/kernel/ckrm/ckrm_cpu_monitor.c @@ -28,85 +28,36 @@ #include #include -#define CPU_MONITOR_INTERVAL (2*HZ) /*how often do we adjust the shares*/ -#define CKRM_SHARE_ACCURACY 10 +#define CPU_MONITOR_INTERVAL (4*HZ) /*how often do we adjust the shares*/ +#define CKRM_SHARE_ACCURACY 7 #define CKRM_SHARE_MAX (1<shares.my_limit; -} - -static inline int get_mysoft_limit(struct ckrm_cpu_class *cls) -{ - return cls->shares.total_guarantee; -} - -static inline int get_hard_limit(struct ckrm_cpu_class *cls) -{ - return cls->shares.total_guarantee; -} - -static inline int get_myhard_limit(struct ckrm_cpu_class *cls) -{ - return cls->shares.total_guarantee; -} - - -static inline void cpu_demand_stat_init(struct ckrm_cpu_demand_stat* local_stat, int type) -{ - unsigned long long now = sched_clock(); - - local_stat->run = 0; - local_stat->total = 0; - local_stat->last_sleep = now; - switch (type) { - case CPU_DEMAND_TP_CLASS: - local_stat->recalc_interval = CPU_DEMAND_CLASS_RECALC; - local_stat->cpu_demand = 0; - break; - case CPU_DEMAND_TP_TASK: - local_stat->recalc_interval = CPU_DEMAND_TASK_RECALC; - //for task, the init cpu_demand is copied from its parent - break; - default: - BUG(); - } -} void ckrm_cpu_stat_init(struct ckrm_cpu_class_stat *stat) { int i; + struct ckrm_cpu_class_local_stat* local_stat; + unsigned long long now = sched_clock(); stat->stat_lock = SPIN_LOCK_UNLOCKED; stat->total_ns = 0; - stat->max_demand = 0; + stat->cpu_demand = 0; for (i=0; i< NR_CPUS; i++) { - cpu_demand_stat_init(&stat->local_stats[i],CPU_DEMAND_TP_CLASS); + local_stat = &stat->local_stats[i]; + local_stat->run = 0; + local_stat->total = 0; + local_stat->last_sleep = now; + local_stat->cpu_demand = 0; } - stat->egrt = 0; - stat->megrt = 0; - stat->ehl = CKRM_SHARE_MAX; /*default: no limit*/ - stat->mehl = CKRM_SHARE_MAX; /*default: no limit */ - - stat->eshare = CKRM_SHARE_MAX; - stat->meshare = CKRM_SHARE_MAX; + stat->effective_guarantee = 0; + stat->effective_limit = 0; + stat->glut = 0; + stat->effective_share = 100; + stat->self_effective_share = 100; } - /**********************************************/ /* cpu demand */ /**********************************************/ @@ -126,42 +77,52 @@ void ckrm_cpu_stat_init(struct ckrm_cpu_class_stat *stat) */ /** - * update_cpu_demand_stat - + * update_cpu_demand - update a state change * - * should be called whenever the state of a task/task local queue changes + * should be called whenever the state of a local queue changes * -- when deschedule : report how much run * -- when enqueue: report how much sleep * - * how often should we recalculate the cpu demand - * the number is in ns + * to deal with excessive long run/sleep state + * -- whenever the the ckrm_cpu_monitor is called, check if the class is in sleep state, if yes, then update sleep record */ -static inline void update_cpu_demand_stat(struct ckrm_cpu_demand_stat* local_stat,int state, unsigned long long len) +#define CKRM_CPU_DEMAND_RUN 0 +#define CKRM_CPU_DEMAND_SLEEP 1 +//how often should we recalculate the cpu demand, in ns +#define CPU_DEMAND_CAL_THRESHOLD (1000000000LL) +static inline void update_local_cpu_demand(struct ckrm_cpu_class_local_stat* local_stat,int state, unsigned long long len) { local_stat->total += len; if (state == CKRM_CPU_DEMAND_RUN) local_stat->run += len; - if (local_stat->total >= local_stat->recalc_interval) { + if (local_stat->total >= CPU_DEMAND_CAL_THRESHOLD) { local_stat->total >>= CKRM_SHARE_ACCURACY; - if (unlikely(local_stat->run > 0xFFFFFFFF)) - local_stat->run = 0xFFFFFFFF; - - if (local_stat->total > 0xFFFFFFFF) + if (local_stat->total > 0xFFFFFFFF) local_stat->total = 0xFFFFFFFF; - - do_div(local_stat->run,(unsigned long)local_stat->total); - if (local_stat->total > 0xFFFFFFFF) //happens after very long sleep - local_stat->cpu_demand = local_stat->run; - else { - local_stat->cpu_demand += local_stat->run; - local_stat->cpu_demand >>= 1; - } + do_div(local_stat->run,(unsigned long)local_stat->total); + local_stat->cpu_demand +=local_stat->run; + local_stat->cpu_demand >>= 1; local_stat->total = 0; local_stat->run = 0; } } +static inline void cpu_demand_update_run(struct ckrm_cpu_class_local_stat* local_stat, unsigned long long len) +{ + update_local_cpu_demand(local_stat,CKRM_CPU_DEMAND_RUN,len); +} + +static inline void cpu_demand_update_sleep(struct ckrm_cpu_class_local_stat* local_stat, unsigned long long len) +{ + update_local_cpu_demand(local_stat,CKRM_CPU_DEMAND_SLEEP,len); +} + +#define CPU_DEMAND_ENQUEUE 0 +#define CPU_DEMAND_DEQUEUE 1 +#define CPU_DEMAND_DESCHEDULE 2 + /** * cpu_demand_event - and cpu_demand event occured * @event: one of the following three events: @@ -170,24 +131,19 @@ static inline void update_cpu_demand_stat(struct ckrm_cpu_demand_stat* local_sta * CPU_DEMAND_DESCHEDULE: one task belong a certain local class deschedule * @len: valid only for CPU_DEMAND_DESCHEDULE, how long the task has been run */ -void cpu_demand_event(struct ckrm_cpu_demand_stat* local_stat, int event, unsigned long long len) +void cpu_demand_event(struct ckrm_cpu_class_local_stat* local_stat, int event, unsigned long long len) { switch (event) { case CPU_DEMAND_ENQUEUE: len = sched_clock() - local_stat->last_sleep; local_stat->last_sleep = 0; - update_cpu_demand_stat(local_stat,CKRM_CPU_DEMAND_SLEEP,len); + cpu_demand_update_sleep(local_stat,len); break; case CPU_DEMAND_DEQUEUE: - if (! local_stat->last_sleep) { - local_stat->last_sleep = sched_clock(); - } + local_stat->last_sleep = sched_clock(); break; case CPU_DEMAND_DESCHEDULE: - update_cpu_demand_stat(local_stat,CKRM_CPU_DEMAND_RUN,len); - break; - case CPU_DEMAND_INIT: //for task init only - cpu_demand_stat_init(local_stat,CPU_DEMAND_TP_TASK); + cpu_demand_update_run(local_stat,len); break; default: BUG(); @@ -196,19 +152,18 @@ void cpu_demand_event(struct ckrm_cpu_demand_stat* local_stat, int event, unsign /** * check all the class local queue - * - * to deal with excessive long run/sleep state - * -- whenever the the ckrm_cpu_monitor is called, check if the class is in sleep state, if yes, then update sleep record + * if local queueu is not in runqueue, then it's in sleep state + * if compare to last sleep, */ static inline void cpu_demand_check_sleep(struct ckrm_cpu_class_stat *stat, int cpu) { - struct ckrm_cpu_demand_stat * local_stat = &stat->local_stats[cpu]; + struct ckrm_cpu_class_local_stat * local_stat = &stat->local_stats[cpu]; unsigned long long sleep,now; if (local_stat->last_sleep) { now = sched_clock(); sleep = now - local_stat->last_sleep; local_stat->last_sleep = now; - update_cpu_demand_stat(local_stat,CKRM_CPU_DEMAND_SLEEP,sleep); + cpu_demand_update_sleep(local_stat,sleep); } } @@ -217,72 +172,51 @@ static inline void cpu_demand_check_sleep(struct ckrm_cpu_class_stat *stat, int * * self_cpu_demand = sum(cpu demand of all local queues) */ -static inline unsigned long get_self_cpu_demand(struct ckrm_cpu_class_stat *stat) +static unsigned long get_self_cpu_demand(struct ckrm_cpu_class_stat + *stat) { int cpu_demand = 0; int i; - int cpuonline = 0; for_each_online_cpu(i) { cpu_demand_check_sleep(stat,i); cpu_demand += stat->local_stats[i].cpu_demand; - cpuonline ++; } - return (cpu_demand/cpuonline); + if (cpu_demand > CKRM_SHARE_MAX) + cpu_demand = CKRM_SHARE_MAX; + return cpu_demand; } /* - * my max demand = min(cpu_demand, my effective hard limit) + * update effective cpu demand for each class + * assume the root_core->parent == NULL */ -static inline unsigned long get_mmax_demand(struct ckrm_cpu_class_stat* stat) -{ - unsigned long mmax_demand = get_self_cpu_demand(stat); - if (mmax_demand > stat->mehl) - mmax_demand = stat->mehl; - - return mmax_demand; -} - -/** - * update_max_demand: update effective cpu demand for each class - * return -1 on error - * - * Assume: the root_core->parent == NULL - */ -static int update_max_demand(struct ckrm_core_class *root_core) +static void update_cpu_demand(struct ckrm_core_class *root_core) { struct ckrm_core_class *cur_core, *child_core; - struct ckrm_cpu_class *cls,*c_cls; - int ret = -1; + struct ckrm_cpu_class *cls; cur_core = root_core; child_core = NULL; - - repeat: - if (!cur_core) { //normal exit - ret = 0; - goto out; - } + /* + * iterate the tree + * update cpu_demand of each node + */ + repeat: + if (!cur_core) + return; cls = ckrm_get_cpu_class(cur_core); - if (! cls) //invalid c_cls, abort - goto out; - if (!child_core) //first child - cls->stat.max_demand = get_mmax_demand(&cls->stat); + cls->stat.cpu_demand = get_self_cpu_demand(&cls->stat); else { - c_cls = ckrm_get_cpu_class(child_core); - if (c_cls) - cls->stat.max_demand += c_cls->stat.max_demand; - else //invalid c_cls, abort - goto out; + cls->stat.cpu_demand += + ckrm_get_cpu_class(child_core)->stat.cpu_demand; + if (cls->stat.cpu_demand > CKRM_SHARE_MAX) + cls->stat.cpu_demand = CKRM_SHARE_MAX; } - //check class hard limit - if (cls->stat.max_demand > cls->stat.ehl) - cls->stat.max_demand = cls->stat.ehl; - //next child child_core = ckrm_get_next_child(cur_core, child_core); if (child_core) { @@ -295,111 +229,78 @@ static int update_max_demand(struct ckrm_core_class *root_core) cur_core = child_core->hnode.parent; } goto repeat; - out: - return ret; } /**********************************************/ /* effective guarantee & limit */ /**********************************************/ -static inline void set_eshare(struct ckrm_cpu_class_stat *stat, +static inline void set_effective_share(struct ckrm_cpu_class_stat *stat, int new_share) { if (!new_share) new_share = 1; - stat->eshare = new_share; + stat->effective_share = new_share; } -static inline void set_meshare(struct ckrm_cpu_class_stat *stat, +static inline void set_self_effective_share(struct ckrm_cpu_class_stat *stat, int new_share) { if (!new_share) new_share = 1; - stat->meshare = new_share; + stat->self_effective_share = new_share; } -/** - *update_child_effective - update egrt, ehl, mehl for all children of parent - *@parent: the parent node - *return -1 if anything wrong - * - */ -static int update_child_effective(struct ckrm_core_class *parent) +static inline void update_child_effective(struct ckrm_core_class *parent) { struct ckrm_cpu_class *p_cls = ckrm_get_cpu_class(parent); - struct ckrm_core_class *child_core; - - if (! p_cls) - return -1; + struct ckrm_core_class *child_core = ckrm_get_next_child(parent, NULL); - child_core = ckrm_get_next_child(parent, NULL); while (child_core) { struct ckrm_cpu_class *c_cls = ckrm_get_cpu_class(child_core); - if (! c_cls) - return -1; - c_cls->stat.egrt = - p_cls->stat.egrt * + c_cls->stat.effective_guarantee = + p_cls->stat.effective_guarantee * c_cls->shares.my_guarantee / p_cls->shares.total_guarantee; - - c_cls->stat.megrt = c_cls->stat.egrt * c_cls->shares.unused_guarantee - / c_cls->shares.total_guarantee; - - c_cls->stat.ehl = - p_cls->stat.ehl * - get_hard_limit(c_cls) / p_cls->shares.total_guarantee; - - c_cls->stat.mehl = - c_cls->stat.ehl * - get_myhard_limit(c_cls) / c_cls->shares.total_guarantee; + c_cls->stat.effective_limit = + p_cls->stat.effective_guarantee * c_cls->shares.my_limit / + p_cls->shares.total_guarantee; child_core = ckrm_get_next_child(parent, child_core); }; - return 0; + } -/** - * update_effectives: update egrt, ehl, mehl for the whole tree +/* + * update effective guarantee and effective limit + * -- effective share = parent->effective->share * share/parent->total_share + * -- effective limit = parent->effective->share * limit/parent->total_share * should be called only when class structure changed - * - * return -1 if anything wrong happened (eg: the structure changed during the process) */ -static int update_effectives(struct ckrm_core_class *root_core) +static void update_effective_guarantee_limit(struct ckrm_core_class *root_core) { - struct ckrm_core_class *cur_core, *child_core; + struct ckrm_core_class *cur_core, *child_core = NULL; struct ckrm_cpu_class *cls; cur_core = root_core; - child_core = NULL; cls = ckrm_get_cpu_class(cur_core); + cls->stat.effective_guarantee = CKRM_SHARE_MAX; + cls->stat.effective_limit = cls->stat.effective_guarantee; - //initialize the effectives for root - cls->stat.egrt = CKRM_SHARE_MAX; /*egrt of the root is always 100% */ - cls->stat.megrt = cls->stat.egrt * cls->shares.unused_guarantee - / cls->shares.total_guarantee; - cls->stat.ehl = CKRM_SHARE_MAX * get_hard_limit(cls) - / cls->shares.total_guarantee; - cls->stat.mehl = cls->stat.ehl * get_myhard_limit(cls) - / cls->shares.total_guarantee; - - repeat: + repeat: //check exit if (!cur_core) - return 0; + return; //visit this node - if (update_child_effective(cur_core) == -1) { - return -1; //invalid cur_core node - } - + update_child_effective(cur_core); //next child child_core = ckrm_get_next_child(cur_core, child_core); - if (child_core) { - //go down to the next hier + //go down cur_core = child_core; child_core = NULL; - } else { //no more child, go back + goto repeat; + } else { //no more child, go back child_core = cur_core; cur_core = child_core->hnode.parent; } @@ -411,12 +312,12 @@ static int update_effectives(struct ckrm_core_class *root_core) /**********************************************/ /* - * surplus = egrt - demand + * surplus = my_effective_share - demand * if surplus < 0, surplus = 0 */ static inline int get_node_surplus(struct ckrm_cpu_class *cls) { - int surplus = cls->stat.egrt - cls->stat.max_demand; + int surplus = cls->stat.effective_guarantee - cls->stat.cpu_demand; if (surplus < 0) surplus = 0; @@ -424,81 +325,47 @@ static inline int get_node_surplus(struct ckrm_cpu_class *cls) return surplus; } -static inline int get_my_node_surplus(struct ckrm_cpu_class *cls) -{ - int surplus = cls->stat.megrt - get_mmax_demand(&cls->stat); - - if (surplus < 0) - surplus = 0; - - return surplus; -} - -/** - * node_surplus_consume: consume the surplus - * @ckeck_sl: if check_sl is set, then check soft_limit - * @total_grt: total guarantee +/* + * consume the surplus * return how much consumed - * - * implements all the CKRM Scheduling Requirement - * update total_grt if necessary + * set glut when necessary */ -static inline int node_surplus_consume(int surplus, +static inline int node_surplus_consume(int old_surplus, struct ckrm_core_class *child_core, - struct ckrm_cpu_class *p_cls, - int check_sl, - int *total_grt - ) + struct ckrm_cpu_class *p_cls) { int consumed = 0; int inc_limit; - int glut = 1; struct ckrm_cpu_class *c_cls = ckrm_get_cpu_class(child_core); - if (! c_cls || ! *total_grt) + if (c_cls->stat.glut) goto out; - /*can't consume more than demand or hard limit*/ - if (c_cls->stat.eshare >= c_cls->stat.max_demand) + //check demand + if (c_cls->stat.effective_share >= c_cls->stat.cpu_demand) { + c_cls->stat.glut = 1; goto out; - - consumed = - surplus * c_cls->shares.my_guarantee / *total_grt; - - if (! consumed) //no more share - goto out; - - //hard limit and demand limit - inc_limit = c_cls->stat.max_demand - c_cls->stat.eshare; - - if (check_sl) { - int esl = p_cls->stat.eshare * get_soft_limit(c_cls) - /p_cls->shares.total_guarantee; - if (esl < c_cls->stat.max_demand) - inc_limit = esl - c_cls->stat.eshare; } + consumed = + old_surplus * c_cls->shares.my_guarantee / + p_cls->shares.total_guarantee; - if (consumed > inc_limit) + //check limit + inc_limit = c_cls->stat.effective_limit - c_cls->stat.effective_share; + if (inc_limit <= consumed) { + c_cls->stat.glut = 1; consumed = inc_limit; - else - glut = 0; - - c_cls->stat.eshare += consumed; - - out: - if (glut) - *total_grt -= c_cls->shares.my_guarantee; + } + c_cls->stat.effective_share += consumed; + out: return consumed; } -/** - * alloc_surplus_node: re-allocate the shares for children under parent - * @parent: parent node - * return the remaining surplus - * +/* + * re-allocate the shares for all the childs under this node * task: * 1. get total surplus * 2. allocate surplus @@ -506,99 +373,71 @@ static inline int node_surplus_consume(int surplus, */ static void alloc_surplus_node(struct ckrm_core_class *parent) { - int total_surplus , old_surplus; + int total_surplus = 0, old_surplus = 0; struct ckrm_cpu_class *p_cls = ckrm_get_cpu_class(parent); struct ckrm_core_class *child_core = NULL; int self_share; - int total_grt = p_cls->shares.total_guarantee; - int check_sl; - - if (! p_cls) - return; - total_surplus = get_my_node_surplus(p_cls); /* + * calculate surplus + * total_surplus = sum(child_surplus) + * reset glut flag * initialize effective_share */ do { child_core = ckrm_get_next_child(parent, child_core); if (child_core) { - struct ckrm_cpu_class *c_cls; - - c_cls = ckrm_get_cpu_class(child_core); if (! c_cls) - return; + struct ckrm_cpu_class *c_cls = + ckrm_get_cpu_class(child_core); + ckrm_stat_t *stat = &c_cls->stat; total_surplus += get_node_surplus(c_cls); - - set_eshare(&c_cls->stat, c_cls->stat.egrt); + stat->glut = 0; + set_effective_share(stat, stat->effective_guarantee); } } while (child_core); - if (! total_surplus) - goto realloc_out; - - /* distribute the surplus */ + /*distribute the surplus */ child_core = NULL; - check_sl = 1; - old_surplus = 0; do { - if (!child_core) {//start a new round - - //ok, everybody reached the soft limit - if (old_surplus == total_surplus) - check_sl = 0; - + if (!child_core) //keep the surplus of last round old_surplus = total_surplus; - } child_core = ckrm_get_next_child(parent, child_core); - if (child_core) + if (child_core) { total_surplus -= - node_surplus_consume(old_surplus, child_core, - p_cls,check_sl,&total_grt); + node_surplus_consume(old_surplus, child_core, + p_cls); + } //start a new round if something is allocated in the last round - } while (child_core || check_sl || total_surplus != old_surplus); + } while (child_core || (total_surplus != old_surplus)); - realloc_out: - /*how much for itself*/ - self_share = p_cls->stat.eshare * + //any remaining surplus goes to the default class + self_share = p_cls->stat.effective_share * p_cls->shares.unused_guarantee / p_cls->shares.total_guarantee; + self_share += total_surplus; - if (self_share < p_cls->stat.max_demand) { - /*any remaining surplus goes to the default class*/ - self_share += total_surplus; - if (self_share > p_cls->stat.max_demand) - self_share = p_cls->stat.max_demand; - } - - set_meshare(&p_cls->stat, self_share); + set_self_effective_share(&p_cls->stat, self_share); } /** * alloc_surplus - reallocate unused shares * * class A's usused share should be allocated to its siblings - * the re-allocation goes downward from the top */ -static int alloc_surplus(struct ckrm_core_class *root_core) +static void alloc_surplus(struct ckrm_core_class *root_core) { - struct ckrm_core_class *cur_core, *child_core; + struct ckrm_core_class *cur_core, *child_core = NULL; struct ckrm_cpu_class *cls; - int ret = 0; - /*initialize*/ cur_core = root_core; - child_core = NULL; cls = ckrm_get_cpu_class(cur_core); - set_eshare(&cls->stat, cls->stat.egrt); - /*the ckrm idle tasks get all what's remaining*/ - /*hzheng: uncomment the following like for hard limit support */ - // update_ckrm_idle(CKRM_SHARE_MAX - cls->stat.max_demand); - + cls->stat.glut = 0; + set_effective_share(&cls->stat, cls->stat.effective_guarantee); repeat: //check exit if (!cur_core) - return ret; + return; //visit this node alloc_surplus_node(cur_core); @@ -616,199 +455,6 @@ static int alloc_surplus(struct ckrm_core_class *root_core) goto repeat; } -/**********************************************/ -/* CKRM Idle Tasks */ -/**********************************************/ -struct ckrm_cpu_class ckrm_idle_class_obj, *ckrm_idle_class; -struct task_struct* ckrm_idle_tasks[NR_CPUS]; - -/*how many ckrm idle tasks should I wakeup*/ -static inline int get_nr_idle(unsigned long surplus) -{ - int cpu_online = cpus_weight(cpu_online_map); - int nr_idle = 0; - - nr_idle = surplus * cpu_online; - nr_idle >>= CKRM_SHARE_ACCURACY; - - if (surplus) - nr_idle ++; - - if (nr_idle > cpu_online) - nr_idle = cpu_online; - - return nr_idle; -} - -/** - * update_ckrm_idle: update the status of the idle class according to the new surplus - * surplus: new system surplus - * - * Task: - * -- update share of the idle class - * -- wakeup idle tasks according to surplus - */ -void update_ckrm_idle(unsigned long surplus) -{ - int nr_idle = get_nr_idle(surplus); - int i; - struct task_struct* idle_task; - - set_eshare(&ckrm_idle_class->stat,surplus); - set_meshare(&ckrm_idle_class->stat,surplus); - /*wake up nr_idle idle tasks*/ - for_each_online_cpu(i) { - idle_task = ckrm_idle_tasks[i]; - if (unlikely(idle_task->cpu_class != ckrm_idle_class)) { - ckrm_cpu_change_class(idle_task, - idle_task->cpu_class, - ckrm_idle_class); - } - if (! idle_task) - continue; - if (i < nr_idle) { - //activate it - wake_up_process(idle_task); - } else { - //deactivate it - idle_task->state = TASK_INTERRUPTIBLE; - set_tsk_need_resched(idle_task); - } - } -} - -static int ckrm_cpu_idled(void *nothing) -{ - set_user_nice(current,19); - daemonize("ckrm_idle_task"); - - //deactivate it, it will be waked up by ckrm_cpu_monitor - current->state = TASK_INTERRUPTIBLE; - schedule(); - - /*similar to cpu_idle */ - while (1) { - while (!need_resched()) { - ckrm_cpu_monitor(); - if (current_cpu_data.hlt_works_ok) { - local_irq_disable(); - if (!need_resched()) { - set_tsk_need_resched(current); - safe_halt(); - } else - local_irq_enable(); - } - } - schedule(); - } - return 0; -} - -/** - * ckrm_start_ckrm_idle: - * create the ckrm_idle_class and starts the idle tasks - * - */ -void ckrm_start_ckrm_idle(void) -{ - int i; - int ret; - ckrm_shares_t shares; - - ckrm_idle_class = &ckrm_idle_class_obj; - memset(ckrm_idle_class,0,sizeof(shares)); - /*don't care about the shares */ - init_cpu_class(ckrm_idle_class,&shares); - printk(KERN_INFO"ckrm idle class %x created\n",(int)ckrm_idle_class); - - for_each_online_cpu(i) { - ret = kernel_thread(ckrm_cpu_idled, 0, CLONE_KERNEL); - - /*warn on error, but the system should still work without it*/ - if (ret < 0) - printk(KERN_ERR"Warn: can't start ckrm idle tasks\n"); - else { - ckrm_idle_tasks[i] = find_task_by_pid(ret); - if (!ckrm_idle_tasks[i]) - printk(KERN_ERR"Warn: can't find ckrm idle tasks %d\n",ret); - } - } -} - -/**********************************************/ -/* Local Weight */ -/**********************************************/ -/** - * adjust_class_local_weight: adjust the local weight for each cpu - * - * lrq->weight = lpr->pressure * class->weight / total_pressure - */ -static void adjust_lrq_weight(struct ckrm_cpu_class *clsptr, int cpu_online) -{ - unsigned long total_pressure = 0; - ckrm_lrq_t* lrq; - int i; - unsigned long class_weight; - unsigned long long lw; - - //get total pressure - for_each_online_cpu(i) { - lrq = get_ckrm_lrq(clsptr,i); - total_pressure += lrq->lrq_load; - } - - if (! total_pressure) - return; - - class_weight = cpu_class_weight(clsptr) * cpu_online; - - /* - * update weight for each cpu, minimun is 1 - */ - for_each_online_cpu(i) { - lrq = get_ckrm_lrq(clsptr,i); - if (! lrq->lrq_load) - /*give idle class a high share to boost interactiveness */ - lw = cpu_class_weight(clsptr); - else { - lw = lrq->lrq_load * class_weight; - do_div(lw,total_pressure); - if (!lw) - lw = 1; - else if (lw > CKRM_SHARE_MAX) - lw = CKRM_SHARE_MAX; - } - - lrq->local_weight = lw; - } -} - -/* - * assume called with class_list_lock read lock held - */ -void adjust_local_weight(void) -{ - static spinlock_t lock = SPIN_LOCK_UNLOCKED; - struct ckrm_cpu_class *clsptr; - int cpu_online; - - //do nothing if someone already holding the lock - if (! spin_trylock(&lock)) - return; - - cpu_online = cpus_weight(cpu_online_map); - - //class status: demand, share,total_ns prio, index - list_for_each_entry(clsptr,&active_cpu_classes,links) { - adjust_lrq_weight(clsptr,cpu_online); - } - - spin_unlock(&lock); -} - -/**********************************************/ -/* Main */ -/**********************************************/ /** *ckrm_cpu_monitor - adjust relative shares of the classes based on their progress * @@ -818,43 +464,13 @@ void adjust_local_weight(void) */ void ckrm_cpu_monitor(void) { - static spinlock_t lock = SPIN_LOCK_UNLOCKED; - static unsigned long long last_check = 0; - struct ckrm_core_class *root_core = get_default_cpu_class()->core; - unsigned long long now; -#define MIN_CPU_MONITOR_INTERVAL 100000000UL - + struct ckrm_core_class *root_core = default_cpu_class->core; if (!root_core) return; - //do nothing if someone already holding the lock - if (! spin_trylock(&lock)) - return; - - read_lock(&class_list_lock); - - now = sched_clock(); - - //consecutive check should be at least 100ms apart - if (now - last_check < MIN_CPU_MONITOR_INTERVAL) { - goto outunlock; - } - last_check = now; - - - if (update_effectives(root_core) != 0) - goto outunlock; - - if (update_max_demand(root_core) != 0) - goto outunlock; - - if (alloc_surplus(root_core) != 0) - goto outunlock; - - adjust_local_weight(); - outunlock: - read_unlock(&class_list_lock); - spin_unlock(&lock); + update_effective_guarantee_limit(root_core); + update_cpu_demand(root_core); + alloc_surplus(root_core); } /*****************************************************/ @@ -910,8 +526,6 @@ void ckrm_kill_monitor(void) int ckrm_cpu_monitor_init(void) { ckrm_start_monitor(); - /*hzheng: uncomment the following like for hard limit support */ - // ckrm_start_ckrm_idle(); return 0; } diff --git a/kernel/ckrm/ckrm_tc.c b/kernel/ckrm/ckrm_tc.c index 590972736..316266494 100644 --- a/kernel/ckrm/ckrm_tc.c +++ b/kernel/ckrm/ckrm_tc.c @@ -490,7 +490,6 @@ static void ckrm_reclassify_all_tasks(void) } else { read_unlock(&tasklist_lock); } - pos++; } } diff --git a/kernel/ckrm/rbce/rbcemod.c b/kernel/ckrm/rbce/rbcemod.c index 4ecb673a5..fa8d2c470 100644 --- a/kernel/ckrm/rbce/rbcemod.c +++ b/kernel/ckrm/rbce/rbcemod.c @@ -1374,32 +1374,28 @@ int reclassify_pid(int pid) int set_tasktag(int pid, char *tag) { char *tp; - int rc = 0; struct task_struct *tsk; struct rbce_private_data *pdata; - int len; if (!tag) { return -EINVAL; } - len = strlen(tag) + 1; - tp = kmalloc(len, GFP_ATOMIC); - if (!tp) { - return -ENOMEM; - } - strncpy(tp,tag,len); - read_lock(&tasklist_lock); if ((tsk = find_task_by_pid(pid)) == NULL) { - rc = -EINVAL; - goto out; + return -EINVAL; + } + + tp = kmalloc(strlen(tag) + 1, GFP_ATOMIC); + + if (!tp) { + return -ENOMEM; } if (unlikely(!RBCE_DATA(tsk))) { RBCE_DATAP(tsk) = create_private_data(NULL, 0); if (!RBCE_DATA(tsk)) { - rc = -ENOMEM; - goto out; + kfree(tp); + return -ENOMEM; } } pdata = RBCE_DATA(tsk); @@ -1407,14 +1403,10 @@ int set_tasktag(int pid, char *tag) kfree(pdata->app_tag); } pdata->app_tag = tp; + strcpy(pdata->app_tag, tag); + rbce_ckrm_reclassify(pid); - out: - read_unlock(&tasklist_lock); - if (rc != 0) - kfree(tp); - else - rbce_ckrm_reclassify(pid); - return rc; + return 0; } /*====================== Classification Functions =======================*/ diff --git a/kernel/ckrm_classqueue.c b/kernel/ckrm_classqueue.c index 0400844a3..1929aaf4e 100644 --- a/kernel/ckrm_classqueue.c +++ b/kernel/ckrm_classqueue.c @@ -133,42 +133,11 @@ void classqueue_update_prio(struct classqueue_struct *cq, //add to new positon, round robin for classes with same priority list_add_tail(&(node->list), &cq->array.queue[index]); - __set_bit(index, cq->array.bitmap); + __set_bit(index, cq->array.bitmap); + node->index = index; } -/** - *classqueue_get_min_prio: return the priority of the last node in queue - * - * this function can be called without runqueue lock held - */ -static inline int classqueue_get_min_prio(struct classqueue_struct *cq) -{ - cq_node_t *result = NULL; - int pos; - - /* - * search over the bitmap to get the first class in the queue - */ - pos = find_next_bit(cq->array.bitmap, CLASSQUEUE_SIZE, cq->base_offset); - //do circular search from the beginning - if (pos >= CLASSQUEUE_SIZE) - pos = find_first_bit(cq->array.bitmap, CLASSQUEUE_SIZE); - - if (pos < CLASSQUEUE_SIZE) { - result = list_entry(cq->array.queue[pos].next, cq_node_t, list); - if (list_empty(&cq->array.queue[pos])) - result = NULL; - } - if (result) - return result->prio; - else - return 0; -} - -/** - * this function must be called with runqueue lock held - */ cq_node_t *classqueue_get_head(struct classqueue_struct *cq) { cq_node_t *result = NULL; @@ -178,9 +147,9 @@ cq_node_t *classqueue_get_head(struct classqueue_struct *cq) * search over the bitmap to get the first class in the queue */ pos = find_next_bit(cq->array.bitmap, CLASSQUEUE_SIZE, cq->base_offset); - //do circular search from the beginning - if (pos >= CLASSQUEUE_SIZE) + if (pos >= CLASSQUEUE_SIZE) { //do circular search from the beginning pos = find_first_bit(cq->array.bitmap, CLASSQUEUE_SIZE); + } if (pos < CLASSQUEUE_SIZE) { BUG_ON(list_empty(&cq->array.queue[pos])); @@ -193,17 +162,15 @@ cq_node_t *classqueue_get_head(struct classqueue_struct *cq) * Moving the end of queue forward * the new_base here is logical, we need to translate to the abosule position */ -void classqueue_update_base(struct classqueue_struct *cq) +void classqueue_update_base(struct classqueue_struct *cq, int new_base) { - int new_base; - - if (! cq_nr_member(cq)) { + if (!cq_nr_member(cq)) { cq->base_offset = -1; //not defined return; } - new_base = classqueue_get_min_prio(cq); - + // assert(new_base >= cq->base); + if (new_base > cq->base) { cq->base_offset = get_index(cq, &new_base); cq->base = new_base; diff --git a/kernel/ckrm_sched.c b/kernel/ckrm_sched.c index 5ba06e1ac..ba716d4c5 100644 --- a/kernel/ckrm_sched.c +++ b/kernel/ckrm_sched.c @@ -15,90 +15,57 @@ #include #include -rwlock_t class_list_lock = RW_LOCK_UNLOCKED; -LIST_HEAD(active_cpu_classes); // list of active cpu classes; anchor - -struct ckrm_cpu_class default_cpu_class_obj; - -struct ckrm_cpu_class * get_default_cpu_class(void) { - return (&default_cpu_class_obj); -} - /*******************************************************/ /* CVT Management */ /*******************************************************/ -/** - * update_class_cputime - update the total cpu time received by a class - * - * class_list_lock must have been acquired +#define CVT_WINDOW_SIZE (CLASSQUEUE_SIZE << CLASS_BONUS_RATE) +static CVT_t max_CVT = CVT_WINDOW_SIZE; + +/* + * Also ensure that the classes global cvt is upgraded to the + * minimum CVT in the system, as a class might not have run for a while */ -void update_class_cputime(int this_cpu) +static void update_global_cvt(struct ckrm_cpu_class *cpu_class, int cpu) { - struct ckrm_cpu_class *clsptr; - ckrm_lrq_t * lrq; - CVT_t max_cvt, min_cvt; - - max_cvt = 0; - - /*update class time, at the same time get max_cvt */ - list_for_each_entry(clsptr, &active_cpu_classes, links) { - lrq = get_ckrm_lrq(clsptr, this_cpu); + struct ckrm_local_runqueue *class_queue = + get_ckrm_local_runqueue(cpu_class, cpu); + CVT_t min_cvt; + CVT_t local_cvt_old = class_queue->local_cvt; - spin_lock(&clsptr->stat.stat_lock); - clsptr->stat.total_ns += lrq->uncounted_ns; - spin_unlock(&clsptr->stat.stat_lock); - - lrq->uncounted_ns = 0; - if (lrq->local_cvt > max_cvt) - max_cvt = lrq->local_cvt; + spin_lock(&cvt_lock); + if (class_queue->uncounted_cvt) { + cpu_class->global_cvt += class_queue->uncounted_cvt; + class_queue->uncounted_cvt = 0; } - min_cvt = max_cvt - CVT_INTERACTIVE_BONUS; - BUG_ON(min_cvt < 0); + min_cvt = max_CVT - CVT_WINDOW_SIZE; + if (cpu_class->global_cvt < min_cvt) + cpu_class->global_cvt = min_cvt; + else if (cpu_class->global_cvt > max_CVT) + max_CVT = cpu_class->global_cvt; - /*check again, make sure no one get too small cvt*/ - list_for_each_entry(clsptr, &active_cpu_classes, links) { - lrq = get_ckrm_lrq(clsptr, this_cpu); - if (lrq->local_cvt < min_cvt) - lrq->local_cvt = min_cvt; - } -} +/* update local cvt from global cvt*/ +#if 0 + class_queue->local_cvt = cpu_class->global_cvt; +#endif + spin_unlock(&cvt_lock); -/*******************************************************/ -/* PID load balancing stuff */ -/*******************************************************/ -#define PID_SAMPLE_T 32 -#define PID_KP 20 -#define PID_KI 60 -#define PID_KD 20 + if (class_queue->local_cvt != local_cvt_old) + update_class_priority(class_queue); +} -/** - * sample pid load periodically +/* + * class_list_lock must have been acquired */ -void ckrm_load_sample(ckrm_load_t* pid,int cpu) +void update_global_cvts(int this_cpu) { - long load; - long err; - - if (jiffies % PID_SAMPLE_T) - return; - - adjust_local_weight(); - - load = ckrm_cpu_load(cpu); - err = load - pid->load_p; - pid->load_d = err; - pid->load_p = load; - pid->load_i *= 9; - pid->load_i += load; - pid->load_i /= 10; -} + struct ckrm_cpu_class *clsptr; + struct ckrm_local_runqueue *class_queue; -long pid_get_pressure(ckrm_load_t* ckrm_load, int local_group) -{ - long pressure; - pressure = ckrm_load->load_p * PID_KP; - pressure += ckrm_load->load_i * PID_KI; - pressure += ckrm_load->load_d * PID_KD; - pressure /= 100; - return pressure; + /*for each class*/ + list_for_each_entry(clsptr, &active_cpu_classes, links) { + update_global_cvt(clsptr, this_cpu); + class_queue = get_ckrm_local_runqueue(clsptr, this_cpu); + clsptr->stat.total_ns += class_queue->uncounted_ns; + class_queue->uncounted_ns = 0; + } } diff --git a/kernel/sched.c b/kernel/sched.c index f0e2dce7d..fa04c39c8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -159,20 +159,6 @@ #define LOW_CREDIT(p) \ ((p)->interactive_credit < -CREDIT_LIMIT) -#ifdef CONFIG_CKRM_CPU_SCHEDULE -/* - * if belong to different class, compare class priority - * otherwise compare task priority - */ -#define TASK_PREEMPTS_CURR(p, rq) \ - (((p)->cpu_class != (rq)->curr->cpu_class) && ((rq)->curr != (rq)->idle))? class_preempts_curr((p),(rq)->curr) : ((p)->prio < (rq)->curr->prio) - -#else - -#define TASK_PREEMPTS_CURR(p, rq) \ - ((p)->prio < (rq)->curr->prio) -#endif - /* * BASE_TIMESLICE scales user-nice values [ -20 ... 19 ] * to time slice values. @@ -188,7 +174,7 @@ ((MAX_TIMESLICE - MIN_TIMESLICE) * \ (MAX_PRIO-1 - (p)->static_prio) / (MAX_USER_PRIO-1))) -unsigned int task_timeslice(task_t *p) +static unsigned int task_timeslice(task_t *p) { return BASE_TIMESLICE(p); } @@ -199,8 +185,32 @@ unsigned int task_timeslice(task_t *p) * These are the runqueue data structures: */ typedef struct runqueue runqueue_t; + +#ifdef CONFIG_CKRM_CPU_SCHEDULE #include -#include +#endif + +#ifdef CONFIG_CKRM_CPU_SCHEDULE + +/** + * if belong to different class, compare class priority + * otherwise compare task priority + */ +#define TASK_PREEMPTS_CURR(p, rq) \ + (((p)->cpu_class != (rq)->curr->cpu_class) && ((rq)->curr != (rq)->idle))? class_preempts_curr((p),(rq)->curr) : ((p)->prio < (rq)->curr->prio) +#else +#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long)) +struct prio_array { + unsigned int nr_active; + unsigned long bitmap[BITMAP_SIZE]; + struct list_head queue[MAX_PRIO]; +}; +#define rq_active(p,rq) (rq->active) +#define rq_expired(p,rq) (rq->expired) +#define ckrm_rebalance_tick(j,this_cpu) do {} while (0) +#define TASK_PREEMPTS_CURR(p, rq) \ + ((p)->prio < (rq)->curr->prio) +#endif /* * This is the main, per-CPU runqueue data structure. @@ -217,7 +227,7 @@ struct runqueue { * remote CPUs use both these fields when doing load calculation. */ unsigned long nr_running; -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) unsigned long cpu_load; #endif unsigned long long nr_switches, nr_preempt; @@ -226,8 +236,8 @@ struct runqueue { task_t *curr, *idle; struct mm_struct *prev_mm; #ifdef CONFIG_CKRM_CPU_SCHEDULE + unsigned long ckrm_cpu_load; struct classqueue_struct classqueue; - ckrm_load_t ckrm_load; #else prio_array_t *active, *expired, arrays[2]; #endif @@ -267,52 +277,77 @@ static DEFINE_PER_CPU(struct runqueue, runqueues); # define task_running(rq, p) ((rq)->curr == (p)) #endif +#ifdef CONFIG_CKRM_CPU_SCHEDULE +#include +spinlock_t cvt_lock = SPIN_LOCK_UNLOCKED; +rwlock_t class_list_lock = RW_LOCK_UNLOCKED; +LIST_HEAD(active_cpu_classes); // list of active cpu classes; anchor +struct ckrm_cpu_class default_cpu_class_obj; + /* - * task_rq_lock - lock the runqueue a given task resides on and disable - * interrupts. Note the ordering: we can safely lookup the task_rq without - * explicitly disabling preemption. + * the minimum CVT allowed is the base_cvt + * otherwise, it will starve others */ -static runqueue_t *task_rq_lock(task_t *p, unsigned long *flags) +CVT_t get_min_cvt(int cpu) { - struct runqueue *rq; + cq_node_t *node; + struct ckrm_local_runqueue * lrq; + CVT_t min_cvt; -repeat_lock_task: - local_irq_save(*flags); - rq = task_rq(p); - spin_lock(&rq->lock); - if (unlikely(rq != task_rq(p))) { - spin_unlock_irqrestore(&rq->lock, *flags); - goto repeat_lock_task; - } - return rq; -} - -static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags) -{ - spin_unlock_irqrestore(&rq->lock, *flags); + node = classqueue_get_head(bpt_queue(cpu)); + lrq = (node) ? class_list_entry(node) : NULL; + + if (lrq) + min_cvt = lrq->local_cvt; + else + min_cvt = 0; + + return min_cvt; } /* - * rq_lock - lock a given runqueue and disable interrupts. + * update the classueue base for all the runqueues + * TODO: we can only update half of the min_base to solve the movebackward issue */ -static runqueue_t *this_rq_lock(void) -{ - runqueue_t *rq; +static inline void check_update_class_base(int this_cpu) { + unsigned long min_base = 0xFFFFFFFF; + cq_node_t *node; + int i; - local_irq_disable(); - rq = this_rq(); - spin_lock(&rq->lock); + if (! cpu_online(this_cpu)) return; - return rq; + /* + * find the min_base across all the processors + */ + for_each_online_cpu(i) { + /* + * I should change it to directly use bpt->base + */ + node = classqueue_get_head(bpt_queue(i)); + if (node && node->prio < min_base) { + min_base = node->prio; + } + } + if (min_base != 0xFFFFFFFF) + classqueue_update_base(bpt_queue(this_cpu),min_base); } -static inline void rq_unlock(runqueue_t *rq) +static inline void ckrm_rebalance_tick(int j,int this_cpu) { - spin_unlock_irq(&rq->lock); +#ifdef CONFIG_CKRM_CPU_SCHEDULE + read_lock(&class_list_lock); + if (!(j % CVT_UPDATE_TICK)) + update_global_cvts(this_cpu); + +#define CKRM_BASE_UPDATE_RATE 400 + if (! (jiffies % CKRM_BASE_UPDATE_RATE)) + check_update_class_base(this_cpu); + + read_unlock(&class_list_lock); +#endif } -#ifdef CONFIG_CKRM_CPU_SCHEDULE -static inline ckrm_lrq_t *rq_get_next_class(struct runqueue *rq) +static inline struct ckrm_local_runqueue *rq_get_next_class(struct runqueue *rq) { cq_node_t *node = classqueue_get_head(&rq->classqueue); return ((node) ? class_list_entry(node) : NULL); @@ -322,8 +357,7 @@ static inline struct task_struct * rq_get_next_task(struct runqueue* rq) { prio_array_t *array; struct task_struct *next; - ckrm_lrq_t *queue; - int idx; + struct ckrm_local_runqueue *queue; int cpu = smp_processor_id(); next = rq->idle; @@ -331,7 +365,7 @@ static inline struct task_struct * rq_get_next_task(struct runqueue* rq) if ((queue = rq_get_next_class(rq))) { array = queue->active; //check switch active/expired queue - if (unlikely(!array->nr_active)) { + if (unlikely(!queue->active->nr_active)) { queue->active = queue->expired; queue->expired = array; queue->expired_timestamp = 0; @@ -344,20 +378,20 @@ static inline struct task_struct * rq_get_next_task(struct runqueue* rq) &queue->classqueue_linkobj); cpu_demand_event(get_rq_local_stat(queue,cpu),CPU_DEMAND_DEQUEUE,0); } - goto retry_next_class; - } - BUG_ON(!array->nr_active); - idx = queue->top_priority; - if (queue->top_priority == MAX_PRIO) { - BUG_ON(1); + goto retry_next_class; } - - next = task_list_entry(array->queue[idx].next); + BUG_ON(!queue->active->nr_active); + next = task_list_entry(array->queue[queue->top_priority].next); } return next; } -#else /*! CONFIG_CKRM_CPU_SCHEDULE*/ + +static inline void rq_load_inc(runqueue_t *rq, struct task_struct *p) { rq->ckrm_cpu_load += cpu_class_weight(p->cpu_class); } +static inline void rq_load_dec(runqueue_t *rq, struct task_struct *p) { rq->ckrm_cpu_load -= cpu_class_weight(p->cpu_class); } + +#else /*CONFIG_CKRM_CPU_SCHEDULE*/ + static inline struct task_struct * rq_get_next_task(struct runqueue* rq) { prio_array_t *array; @@ -384,14 +418,59 @@ static inline struct task_struct * rq_get_next_task(struct runqueue* rq) static inline void class_enqueue_task(struct task_struct* p, prio_array_t *array) { } static inline void class_dequeue_task(struct task_struct* p, prio_array_t *array) { } static inline void init_cpu_classes(void) { } -#define rq_ckrm_load(rq) NULL -static inline void ckrm_sched_tick(int j,int this_cpu,void* name) {} +static inline void rq_load_inc(runqueue_t *rq, struct task_struct *p) { } +static inline void rq_load_dec(runqueue_t *rq, struct task_struct *p) { } #endif /* CONFIG_CKRM_CPU_SCHEDULE */ + +/* + * task_rq_lock - lock the runqueue a given task resides on and disable + * interrupts. Note the ordering: we can safely lookup the task_rq without + * explicitly disabling preemption. + */ +runqueue_t *task_rq_lock(task_t *p, unsigned long *flags) +{ + struct runqueue *rq; + +repeat_lock_task: + local_irq_save(*flags); + rq = task_rq(p); + spin_lock(&rq->lock); + if (unlikely(rq != task_rq(p))) { + spin_unlock_irqrestore(&rq->lock, *flags); + goto repeat_lock_task; + } + return rq; +} + +void task_rq_unlock(runqueue_t *rq, unsigned long *flags) +{ + spin_unlock_irqrestore(&rq->lock, *flags); +} + +/* + * rq_lock - lock a given runqueue and disable interrupts. + */ +static runqueue_t *this_rq_lock(void) +{ + runqueue_t *rq; + + local_irq_disable(); + rq = this_rq(); + spin_lock(&rq->lock); + + return rq; +} + +static inline void rq_unlock(runqueue_t *rq) +{ + spin_unlock_irq(&rq->lock); +} + /* * Adding/removing a task to/from a priority array: */ -static void dequeue_task(struct task_struct *p, prio_array_t *array) +void dequeue_task(struct task_struct *p, prio_array_t *array) { BUG_ON(! array); array->nr_active--; @@ -401,7 +480,7 @@ static void dequeue_task(struct task_struct *p, prio_array_t *array) class_dequeue_task(p,array); } -static void enqueue_task(struct task_struct *p, prio_array_t *array) +void enqueue_task(struct task_struct *p, prio_array_t *array) { list_add_tail(&p->run_list, array->queue + p->prio); __set_bit(p->prio, array->bitmap); @@ -465,6 +544,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq) { enqueue_task(p, rq_active(p,rq)); rq->nr_running++; + rq_load_inc(rq,p); } /* @@ -474,6 +554,7 @@ static inline void __activate_idle_task(task_t *p, runqueue_t *rq) { enqueue_task_head(p, rq_active(p,rq)); rq->nr_running++; + rq_load_inc(rq,p); } static void recalc_task_prio(task_t *p, unsigned long long now) @@ -605,6 +686,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) static void deactivate_task(struct task_struct *p, runqueue_t *rq) { rq->nr_running--; + rq_load_dec(rq,p); if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible++; dequeue_task(p, p->array); @@ -978,10 +1060,6 @@ void fastcall sched_fork(task_t *p) INIT_LIST_HEAD(&p->run_list); p->array = NULL; spin_lock_init(&p->switch_lock); -#ifdef CONFIG_CKRM_CPU_SCHEDULE - cpu_demand_event(&p->demand_stat,CPU_DEMAND_INIT,0); -#endif - #ifdef CONFIG_PREEMPT /* * During context-switch we hold precisely one spinlock, which @@ -1057,7 +1135,7 @@ void fastcall wake_up_forked_process(task_t * p) p->array = current->array; p->array->nr_active++; rq->nr_running++; - class_enqueue_task(p,p->array); + rq_load_inc(rq,p); } task_rq_unlock(rq, &flags); } @@ -1390,7 +1468,7 @@ lock_again: p->array = current->array; p->array->nr_active++; rq->nr_running++; - class_enqueue_task(p,p->array); + rq_load_inc(rq,p); } } else { /* Not the local CPU - must adjust timestamp */ @@ -1495,9 +1573,13 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, { dequeue_task(p, src_array); src_rq->nr_running--; + rq_load_dec(src_rq,p); + set_task_cpu(p, this_cpu); this_rq->nr_running++; + rq_load_inc(this_rq,p); enqueue_task(p, this_array); + p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) + this_rq->timestamp_last_tick; /* @@ -1537,61 +1619,133 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, } #ifdef CONFIG_CKRM_CPU_SCHEDULE -static inline int ckrm_preferred_task(task_t *tmp,long min, long max, - int phase, enum idle_type idle) + +struct ckrm_cpu_class *find_unbalanced_class(int busiest_cpu, int this_cpu, unsigned long *cls_imbalance) { - long pressure = task_load(tmp); - - if (pressure > max) - return 0; + struct ckrm_cpu_class *most_unbalanced_class = NULL; + struct ckrm_cpu_class *clsptr; + int max_unbalance = 0; - if ((idle == NOT_IDLE) && ! phase && (pressure <= min)) - return 0; - return 1; + list_for_each_entry(clsptr,&active_cpu_classes,links) { + struct ckrm_local_runqueue *this_lrq = get_ckrm_local_runqueue(clsptr,this_cpu); + struct ckrm_local_runqueue *busiest_lrq = get_ckrm_local_runqueue(clsptr,busiest_cpu); + int unbalance_degree; + + unbalance_degree = (local_queue_nr_running(busiest_lrq) - local_queue_nr_running(this_lrq)) * cpu_class_weight(clsptr); + if (unbalance_degree >= *cls_imbalance) + continue; // already looked at this class + + if (unbalance_degree > max_unbalance) { + max_unbalance = unbalance_degree; + most_unbalanced_class = clsptr; + } + } + *cls_imbalance = max_unbalance; + return most_unbalanced_class; } + /* - * move tasks for a specic local class - * return number of tasks pulled + * find_busiest_queue - find the busiest runqueue among the cpus in cpumask. */ -static inline int ckrm_cls_move_tasks(ckrm_lrq_t* src_lrq,ckrm_lrq_t*dst_lrq, - runqueue_t *this_rq, - runqueue_t *busiest, - struct sched_domain *sd, - int this_cpu, - enum idle_type idle, - long* pressure_imbalance) +static int find_busiest_cpu(runqueue_t *this_rq, int this_cpu, int idle, + int *imbalance) { - prio_array_t *array, *dst_array; + int cpu_load, load, max_load, i, busiest_cpu; + runqueue_t *busiest, *rq_src; + + + /*Hubertus ... the concept of nr_running is replace with cpu_load */ + cpu_load = this_rq->ckrm_cpu_load; + + busiest = NULL; + busiest_cpu = -1; + + max_load = -1; + for_each_online_cpu(i) { + rq_src = cpu_rq(i); + load = rq_src->ckrm_cpu_load; + + if ((load > max_load) && (rq_src != this_rq)) { + busiest = rq_src; + busiest_cpu = i; + max_load = load; + } + } + + if (likely(!busiest)) + goto out; + + *imbalance = max_load - cpu_load; + + /* It needs an at least ~25% imbalance to trigger balancing. */ + if (!idle && ((*imbalance)*4 < max_load)) { + busiest = NULL; + goto out; + } + + double_lock_balance(this_rq, busiest); + /* + * Make sure nothing changed since we checked the + * runqueue length. + */ + if (busiest->ckrm_cpu_load <= cpu_load) { + spin_unlock(&busiest->lock); + busiest = NULL; + } +out: + return (busiest ? busiest_cpu : -1); +} + +static int load_balance(int this_cpu, runqueue_t *this_rq, + struct sched_domain *sd, enum idle_type idle) +{ + int imbalance, idx; + int busiest_cpu; + runqueue_t *busiest; + prio_array_t *array; struct list_head *head, *curr; task_t *tmp; - int idx; - int pulled = 0; - int phase = -1; - long pressure_min, pressure_max; - /*hzheng: magic : 90% balance is enough*/ - long balance_min = *pressure_imbalance / 10; -/* - * we don't want to migrate tasks that will reverse the balance - * or the tasks that make too small difference - */ -#define CKRM_BALANCE_MAX_RATIO 100 -#define CKRM_BALANCE_MIN_RATIO 1 - start: - phase ++; + struct ckrm_local_runqueue * busiest_local_queue; + struct ckrm_cpu_class *clsptr; + int weight; + unsigned long cls_imbalance; // so we can retry other classes + + // need to update global CVT based on local accumulated CVTs + read_lock(&class_list_lock); + busiest_cpu = find_busiest_cpu(this_rq, this_cpu, idle, &imbalance); + if (busiest_cpu == -1) + goto out; + + busiest = cpu_rq(busiest_cpu); + + /* + * We only want to steal a number of tasks equal to 1/2 the imbalance, + * otherwise we'll just shift the imbalance to the new queue: + */ + imbalance /= 2; + + /* now find class on that runqueue with largest inbalance */ + cls_imbalance = 0xFFFFFFFF; + + retry_other_class: + clsptr = find_unbalanced_class(busiest_cpu, this_cpu, &cls_imbalance); + if (!clsptr) + goto out_unlock; + + busiest_local_queue = get_ckrm_local_runqueue(clsptr,busiest_cpu); + weight = cpu_class_weight(clsptr); + /* * We first consider expired tasks. Those will likely not be * executed in the near future, and they are most likely to * be cache-cold, thus switching CPUs has the least effect * on them. */ - if (src_lrq->expired->nr_active) { - array = src_lrq->expired; - dst_array = dst_lrq->expired; - } else { - array = src_lrq->active; - dst_array = dst_lrq->active; - } + if (busiest_local_queue->expired->nr_active) + array = busiest_local_queue->expired; + else + array = busiest_local_queue->active; new_array: /* Start searching at priority 0: */ @@ -1602,15 +1756,11 @@ static inline int ckrm_cls_move_tasks(ckrm_lrq_t* src_lrq,ckrm_lrq_t*dst_lrq, else idx = find_next_bit(array->bitmap, MAX_PRIO, idx); if (idx >= MAX_PRIO) { - if (array == src_lrq->expired && src_lrq->active->nr_active) { - array = src_lrq->active; - dst_array = dst_lrq->active; + if (array == busiest_local_queue->expired && busiest_local_queue->active->nr_active) { + array = busiest_local_queue->active; goto new_array; } - if ((! phase) && (! pulled) && (idle != IDLE)) - goto start; //try again - else - goto out; //finished search for this lrq + goto retry_other_class; } head = array->queue + idx; @@ -1620,365 +1770,42 @@ static inline int ckrm_cls_move_tasks(ckrm_lrq_t* src_lrq,ckrm_lrq_t*dst_lrq, curr = curr->prev; - if (!can_migrate_task(tmp, busiest, this_cpu, sd, idle)) { + if (!can_migrate_task(tmp, busiest, this_cpu, sd,idle)) { if (curr != head) goto skip_queue; idx++; goto skip_bitmap; } - - pressure_min = *pressure_imbalance * CKRM_BALANCE_MIN_RATIO/100; - pressure_max = *pressure_imbalance * CKRM_BALANCE_MAX_RATIO/100; + pull_task(busiest, array, tmp, this_rq, rq_active(tmp,this_rq),this_cpu); /* - * skip the tasks that will reverse the balance too much + * tmp BUG FIX: hzheng + * load balancing can make the busiest local queue empty + * thus it should be removed from bpt */ - if (ckrm_preferred_task(tmp,pressure_min,pressure_max,phase,idle)) { - *pressure_imbalance -= task_load(tmp); - pull_task(busiest, array, tmp, - this_rq, dst_array, this_cpu); - pulled++; - - if (*pressure_imbalance <= balance_min) - goto out; + if (! local_queue_nr_running(busiest_local_queue)) { + classqueue_dequeue(busiest_local_queue->classqueue,&busiest_local_queue->classqueue_linkobj); + cpu_demand_event(get_rq_local_stat(busiest_local_queue,busiest_cpu),CPU_DEMAND_DEQUEUE,0); } - - if (curr != head) - goto skip_queue; - idx++; - goto skip_bitmap; - out: - return pulled; -} - -static inline long ckrm_rq_imbalance(runqueue_t *this_rq,runqueue_t *dst_rq) -{ - long imbalance; - /* - * make sure after balance, imbalance' > - imbalance/2 - * we don't want the imbalance be reversed too much - */ - imbalance = pid_get_pressure(rq_ckrm_load(dst_rq),0) - - pid_get_pressure(rq_ckrm_load(this_rq),1); - imbalance /= 2; - return imbalance; -} - -/* - * try to balance the two runqueues - * - * Called with both runqueues locked. - * if move_tasks is called, it will try to move at least one task over - */ -static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest, - unsigned long max_nr_move, struct sched_domain *sd, - enum idle_type idle) -{ - struct ckrm_cpu_class *clsptr,*vip_cls = NULL; - ckrm_lrq_t* src_lrq,*dst_lrq; - long pressure_imbalance, pressure_imbalance_old; - int src_cpu = task_cpu(busiest->curr); - struct list_head *list; - int pulled = 0; - long imbalance; - - imbalance = ckrm_rq_imbalance(this_rq,busiest); - - if ((idle == NOT_IDLE && imbalance <= 0) || busiest->nr_running <= 1) - goto out; - //try to find the vip class - list_for_each_entry(clsptr,&active_cpu_classes,links) { - src_lrq = get_ckrm_lrq(clsptr,src_cpu); - - if (! lrq_nr_running(src_lrq)) - continue; - - if (! vip_cls || cpu_class_weight(vip_cls) < cpu_class_weight(clsptr) ) - { - vip_cls = clsptr; - } + imbalance -= weight; + if (!idle && (imbalance>0)) { + if (curr != head) + goto skip_queue; + idx++; + goto skip_bitmap; } - - /* - * do search from the most significant class - * hopefully, less tasks will be migrated this way - */ - clsptr = vip_cls; - - move_class: - if (! clsptr) - goto out; - - - src_lrq = get_ckrm_lrq(clsptr,src_cpu); - if (! lrq_nr_running(src_lrq)) - goto other_class; - - dst_lrq = get_ckrm_lrq(clsptr,this_cpu); - - //how much pressure for this class should be transferred - pressure_imbalance = src_lrq->lrq_load * imbalance/src_lrq->local_weight; - if (pulled && ! pressure_imbalance) - goto other_class; - - pressure_imbalance_old = pressure_imbalance; - - //move tasks - pulled += - ckrm_cls_move_tasks(src_lrq,dst_lrq, - this_rq, - busiest, - sd,this_cpu,idle, - &pressure_imbalance); - - /* - * hzheng: 2 is another magic number - * stop balancing if the imbalance is less than 25% of the orig - */ - if (pressure_imbalance <= (pressure_imbalance_old >> 2)) - goto out; - - //update imbalance - imbalance *= pressure_imbalance / pressure_imbalance_old; - other_class: - //who is next? - list = clsptr->links.next; - if (list == &active_cpu_classes) - list = list->next; - clsptr = list_entry(list, typeof(*clsptr), links); - if (clsptr != vip_cls) - goto move_class; + out_unlock: + spin_unlock(&busiest->lock); out: - return pulled; -} - -/** - * ckrm_check_balance - is load balancing necessary? - * return 0 if load balancing is not necessary - * otherwise return the average load of the system - * also, update nr_group - * - * heuristics: - * no load balancing if it's load is over average - * no load balancing if it's load is far more than the min - * task: - * read the status of all the runqueues - */ -static unsigned long ckrm_check_balance(struct sched_domain *sd, int this_cpu, - enum idle_type idle, int* nr_group) -{ - struct sched_group *group = sd->groups; - unsigned long min_load, max_load, avg_load; - unsigned long total_load, this_load, total_pwr; - - max_load = this_load = total_load = total_pwr = 0; - min_load = 0xFFFFFFFF; - *nr_group = 0; - - do { - cpumask_t tmp; - unsigned long load; - int local_group; - int i, nr_cpus = 0; - - /* Tally up the load of all CPUs in the group */ - cpus_and(tmp, group->cpumask, cpu_online_map); - if (unlikely(cpus_empty(tmp))) - goto nextgroup; - - avg_load = 0; - local_group = cpu_isset(this_cpu, group->cpumask); - - for_each_cpu_mask(i, tmp) { - load = pid_get_pressure(rq_ckrm_load(cpu_rq(i)),local_group); - nr_cpus++; - avg_load += load; - } - - if (!nr_cpus) - goto nextgroup; - - total_load += avg_load; - total_pwr += group->cpu_power; - - /* Adjust by relative CPU power of the group */ - avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; - - if (local_group) { - this_load = avg_load; - goto nextgroup; - } else if (avg_load > max_load) { - max_load = avg_load; - } - if (avg_load < min_load) { - min_load = avg_load; - } -nextgroup: - group = group->next; - *nr_group = *nr_group + 1; - } while (group != sd->groups); - - if (!max_load || this_load >= max_load) - goto out_balanced; - - avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr; - - /* hzheng: debugging: 105 is a magic number - * 100*max_load <= sd->imbalance_pct*this_load) - * should use imbalance_pct instead - */ - if (this_load > avg_load - || 100*max_load < 105*this_load - || 100*min_load < 70*this_load - ) - goto out_balanced; - - return avg_load; - out_balanced: - return 0; -} - -/** - * any group that has above average load is considered busy - * find the busiest queue from any of busy group - */ -static runqueue_t * -ckrm_find_busy_queue(struct sched_domain *sd, int this_cpu, - unsigned long avg_load, enum idle_type idle, - int nr_group) -{ - struct sched_group *group; - runqueue_t * busiest=NULL; - unsigned long rand; - - group = sd->groups; - rand = get_ckrm_rand(nr_group); - nr_group = 0; - - do { - unsigned long load,total_load,max_load; - cpumask_t tmp; - int i; - runqueue_t * grp_busiest; - - cpus_and(tmp, group->cpumask, cpu_online_map); - if (unlikely(cpus_empty(tmp))) - goto find_nextgroup; - - total_load = 0; - max_load = 0; - grp_busiest = NULL; - for_each_cpu_mask(i, tmp) { - load = pid_get_pressure(rq_ckrm_load(cpu_rq(i)),0); - total_load += load; - if (load > max_load) { - max_load = load; - grp_busiest = cpu_rq(i); - } - } - - total_load = (total_load * SCHED_LOAD_SCALE) / group->cpu_power; - if (total_load > avg_load) { - busiest = grp_busiest; - if (nr_group >= rand) - break; - } - find_nextgroup: - group = group->next; - nr_group ++; - } while (group != sd->groups); - - return busiest; -} - -/** - * load_balance - pressure based load balancing algorithm used by ckrm - */ -static int ckrm_load_balance(int this_cpu, runqueue_t *this_rq, - struct sched_domain *sd, enum idle_type idle) -{ - runqueue_t *busiest; - unsigned long avg_load; - int nr_moved,nr_group; - - avg_load = ckrm_check_balance(sd, this_cpu, idle, &nr_group); - if (! avg_load) - goto out_balanced; - - busiest = ckrm_find_busy_queue(sd,this_cpu,avg_load,idle,nr_group); - if (! busiest) - goto out_balanced; - /* - * This should be "impossible", but since load - * balancing is inherently racy and statistical, - * it could happen in theory. - */ - if (unlikely(busiest == this_rq)) { - WARN_ON(1); - goto out_balanced; - } - - nr_moved = 0; - if (busiest->nr_running > 1) { - /* - * Attempt to move tasks. If find_busiest_group has found - * an imbalance but busiest->nr_running <= 1, the group is - * still unbalanced. nr_moved simply stays zero, so it is - * correctly treated as an imbalance. - */ - double_lock_balance(this_rq, busiest); - nr_moved = move_tasks(this_rq, this_cpu, busiest, - 0,sd, idle); - spin_unlock(&busiest->lock); - if (nr_moved) { - adjust_local_weight(); - } - } - - if (!nr_moved) - sd->nr_balance_failed ++; - else - sd->nr_balance_failed = 0; - - /* We were unbalanced, so reset the balancing interval */ - sd->balance_interval = sd->min_interval; - - return nr_moved; - -out_balanced: - /* tune up the balancing interval */ - if (sd->balance_interval < sd->max_interval) - sd->balance_interval *= 2; - + read_unlock(&class_list_lock); return 0; } -/* - * this_rq->lock is already held - */ -static inline int load_balance_newidle(int this_cpu, runqueue_t *this_rq, - struct sched_domain *sd) -{ - int ret; - read_lock(&class_list_lock); - ret = ckrm_load_balance(this_cpu,this_rq,sd,NEWLY_IDLE); - read_unlock(&class_list_lock); - return ret; -} -static inline int load_balance(int this_cpu, runqueue_t *this_rq, - struct sched_domain *sd, enum idle_type idle) +static inline void idle_balance(int this_cpu, runqueue_t *this_rq) { - int ret; - - spin_lock(&this_rq->lock); - read_lock(&class_list_lock); - ret= ckrm_load_balance(this_cpu,this_rq,sd,NEWLY_IDLE); - read_unlock(&class_list_lock); - spin_unlock(&this_rq->lock); - return ret; } -#else /*! CONFIG_CKRM_CPU_SCHEDULE */ +#else /* CONFIG_CKRM_CPU_SCHEDULE */ /* * move_tasks tries to move up to max_nr_move tasks from busiest to this_rq, * as part of a balancing operation within "domain". Returns the number of @@ -2343,8 +2170,6 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, out: return nr_moved; } -#endif /* CONFIG_CKRM_CPU_SCHEDULE*/ - /* * idle_balance is called by schedule() if this_cpu is about to become @@ -2430,6 +2255,7 @@ next_group: group = group->next; } while (group != sd->groups); } +#endif /* CONFIG_CKRM_CPU_SCHEDULE*/ /* * rebalance_tick will get called every timer tick, on every CPU. @@ -2450,6 +2276,8 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq, unsigned long j = jiffies + CPU_OFFSET(this_cpu); struct sched_domain *sd; + ckrm_rebalance_tick(j,this_cpu); + /* Update our load */ old_load = this_rq->cpu_load; this_load = this_rq->nr_running * SCHED_LOAD_SCALE; @@ -2488,7 +2316,9 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq, */ static inline void rebalance_tick(int cpu, runqueue_t *rq, enum idle_type idle) { + ckrm_rebalance_tick(jiffies,cpu); } + static inline void idle_balance(int cpu, runqueue_t *rq) { } @@ -2510,6 +2340,7 @@ static inline int wake_priority_sleeper(runqueue_t *rq) } DEFINE_PER_CPU(struct kernel_stat, kstat) = { { 0 } }; + EXPORT_PER_CPU_SYMBOL(kstat); /* @@ -2533,7 +2364,7 @@ EXPORT_PER_CPU_SYMBOL(kstat); #define EXPIRED_STARVING(rq) \ (STARVATION_LIMIT && ((rq)->expired_timestamp && \ (jiffies - (rq)->expired_timestamp >= \ - STARVATION_LIMIT * (lrq_nr_running(rq)) + 1))) + STARVATION_LIMIT * (local_queue_nr_running(rq)) + 1))) #endif /* @@ -2616,7 +2447,7 @@ void scheduler_tick(int user_ticks, int sys_ticks) if (vx_need_resched(p)) { #ifdef CONFIG_CKRM_CPU_SCHEDULE /* Hubertus ... we can abstract this out */ - ckrm_lrq_t* rq = get_task_lrq(p); + struct ckrm_local_runqueue* rq = get_task_class_queue(p); #endif dequeue_task(p, rq->active); set_tsk_need_resched(p); @@ -2663,7 +2494,6 @@ void scheduler_tick(int user_ticks, int sys_ticks) out_unlock: spin_unlock(&rq->lock); out: - ckrm_sched_tick(jiffies,cpu,rq_ckrm_load(rq)); rebalance_tick(cpu, rq, NOT_IDLE); } @@ -2804,19 +2634,6 @@ need_resched: spin_lock_irq(&rq->lock); -#ifdef CONFIG_CKRM_CPU_SCHEDULE - if (prev != rq->idle) { - unsigned long long run = now - prev->timestamp; - ckrm_lrq_t * lrq = get_task_lrq(prev); - - lrq->lrq_load -= task_load(prev); - cpu_demand_event(&prev->demand_stat,CPU_DEMAND_DESCHEDULE,run); - lrq->lrq_load += task_load(prev); - - cpu_demand_event(get_task_lrq_stat(prev),CPU_DEMAND_DESCHEDULE,run); - update_local_cvt(prev, run); - } -#endif /* * if entering off of a kernel preemption go straight * to picking the next task. @@ -2865,14 +2682,17 @@ pick_next: #endif if (unlikely(!rq->nr_running)) { idle_balance(cpu, rq); + if (!rq->nr_running) { + next = rq->idle; + rq->expired_timestamp = 0; + wake_sleeping_dependent(cpu, rq); + goto switch_tasks; + } } next = rq_get_next_task(rq); - if (next == rq->idle) { - rq->expired_timestamp = 0; - wake_sleeping_dependent(cpu, rq); + if (next == rq->idle) goto switch_tasks; - } if (dependent_sleeper(cpu, rq, next)) { next = rq->idle; @@ -2914,6 +2734,14 @@ switch_tasks: rq->nr_preempt++; RCU_qsctr(task_cpu(prev))++; +#ifdef CONFIG_CKRM_CPU_SCHEDULE + if (prev != rq->idle) { + unsigned long long run = now - prev->timestamp; + cpu_demand_event(get_task_local_stat(prev),CPU_DEMAND_DESCHEDULE,run); + update_local_cvt(prev, run); + } +#endif + prev->sleep_avg -= run_time; if ((long)prev->sleep_avg <= 0) { prev->sleep_avg = 0; @@ -2946,6 +2774,7 @@ switch_tasks: } EXPORT_SYMBOL(schedule); + #ifdef CONFIG_PREEMPT /* * this is is the entry point to schedule() from in-kernel preemption @@ -4095,7 +3924,9 @@ static int migration_thread(void * data) } if (rq->active_balance) { +#ifndef CONFIG_CKRM_CPU_SCHEDULE active_load_balance(rq, cpu); +#endif rq->active_balance = 0; } @@ -4570,6 +4401,9 @@ void __init sched_init(void) { runqueue_t *rq; int i; +#ifndef CONFIG_CKRM_CPU_SCHEDULE + int j, k; +#endif #ifdef CONFIG_SMP /* Set up an initial dummy domain for early boot */ @@ -4592,35 +4426,22 @@ void __init sched_init(void) for (i = 0; i < NR_CPUS; i++) { #ifndef CONFIG_CKRM_CPU_SCHEDULE - int j, k; prio_array_t *array; - +#endif rq = cpu_rq(i); spin_lock_init(&rq->lock); - for (j = 0; j < 2; j++) { - array = rq->arrays + j; - for (k = 0; k < MAX_PRIO; k++) { - INIT_LIST_HEAD(array->queue + k); - __clear_bit(k, array->bitmap); - } - // delimiter for bitsearch - __set_bit(MAX_PRIO, array->bitmap); - } +#ifndef CONFIG_CKRM_CPU_SCHEDULE rq->active = rq->arrays; rq->expired = rq->arrays + 1; #else - rq = cpu_rq(i); - spin_lock_init(&rq->lock); + rq->ckrm_cpu_load = 0; #endif rq->best_expired_prio = MAX_PRIO; #ifdef CONFIG_SMP rq->sd = &sched_domain_init; rq->cpu_load = 0; -#ifdef CONFIG_CKRM_CPU_SCHEDULE - ckrm_load_init(rq_ckrm_load(rq)); -#endif rq->active_balance = 0; rq->push_cpu = 0; rq->migration_thread = NULL; @@ -4629,6 +4450,17 @@ void __init sched_init(void) INIT_LIST_HEAD(&rq->hold_queue); atomic_set(&rq->nr_iowait, 0); +#ifndef CONFIG_CKRM_CPU_SCHEDULE + for (j = 0; j < 2; j++) { + array = rq->arrays + j; + for (k = 0; k < MAX_PRIO; k++) { + INIT_LIST_HEAD(array->queue + k); + __clear_bit(k, array->bitmap); + } + // delimiter for bitsearch + __set_bit(MAX_PRIO, array->bitmap); + } +#endif } /* @@ -4640,7 +4472,7 @@ void __init sched_init(void) rq->idle = current; set_task_cpu(current, smp_processor_id()); #ifdef CONFIG_CKRM_CPU_SCHEDULE - current->cpu_class = get_default_cpu_class(); + current->cpu_class = default_cpu_class; current->array = NULL; #endif wake_up_forked_process(current); @@ -4734,30 +4566,10 @@ EXPORT_SYMBOL(task_running_sys); #ifdef CONFIG_CKRM_CPU_SCHEDULE /** * return the classqueue object of a certain processor + * Note: not supposed to be used in performance sensitive functions */ struct classqueue_struct * get_cpu_classqueue(int cpu) { return (& (cpu_rq(cpu)->classqueue) ); } - -/** - * _ckrm_cpu_change_class - change the class of a task - */ -void _ckrm_cpu_change_class(task_t *tsk, struct ckrm_cpu_class *newcls) -{ - prio_array_t *array; - struct runqueue *rq; - unsigned long flags; - - rq = task_rq_lock(tsk,&flags); - array = tsk->array; - if (array) { - dequeue_task(tsk,array); - tsk->cpu_class = newcls; - enqueue_task(tsk,rq_active(tsk,rq)); - } else - tsk->cpu_class = newcls; - - task_rq_unlock(rq,&flags); -} #endif diff --git a/kernel/vserver/dlimit.c b/kernel/vserver/dlimit.c index 3f58b7300..c7cbe7dc7 100644 --- a/kernel/vserver/dlimit.c +++ b/kernel/vserver/dlimit.c @@ -437,7 +437,3 @@ no_blim: return; } -#include - -EXPORT_SYMBOL_GPL(locate_dl_info); -EXPORT_SYMBOL_GPL(rcu_free_dl_info); diff --git a/kernel/vserver/sysctl.c b/kernel/vserver/sysctl.c index 79df3cff3..32fde9a93 100644 --- a/kernel/vserver/sysctl.c +++ b/kernel/vserver/sysctl.c @@ -158,6 +158,3 @@ static ctl_table vserver_table[] = { { .ctl_name = 0 } }; -#include - -EXPORT_SYMBOL_GPL(vx_debug_dlimit); -- 2.47.0