From 2405d9c7d438e26dbf782e26edda03daaab279ac Mon Sep 17 00:00:00 2001 From: Planet-Lab Support Date: Fri, 16 Jul 2004 20:14:56 +0000 Subject: [PATCH] This commit was manufactured by cvs2svn to create branch 'ckrm'. --- include/linux/ckrm_classqueue.h | 129 ++++++++ include/linux/ckrm_sched.h | 297 +++++++++++++++++ kernel/ckrm/ckrm_cpu_class.c | 350 +++++++++++++++++++++ kernel/ckrm/ckrm_cpu_monitor.c | 542 ++++++++++++++++++++++++++++++++ kernel/ckrm_classqueue.c | 178 +++++++++++ kernel/ckrm_sched.c | 71 +++++ 6 files changed, 1567 insertions(+) create mode 100644 include/linux/ckrm_classqueue.h create mode 100644 include/linux/ckrm_sched.h create mode 100644 kernel/ckrm/ckrm_cpu_class.c create mode 100644 kernel/ckrm/ckrm_cpu_monitor.c create mode 100644 kernel/ckrm_classqueue.c create mode 100644 kernel/ckrm_sched.c diff --git a/include/linux/ckrm_classqueue.h b/include/linux/ckrm_classqueue.h new file mode 100644 index 000000000..1bdf9b775 --- /dev/null +++ b/include/linux/ckrm_classqueue.h @@ -0,0 +1,129 @@ +/* include/linux/ckrm_classqueue.h : cpu control for CKRM + * + * Copyright (C) Haoqiang Zheng, IBM Corp. 2003 + * (C) Hubertus Franke, IBM Corp. 2003 + * + * Circular queue functionality for CKRM cpu controller + * + * Latest version, more details at http://ckrm.sf.net + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +/* Changes + * + * Aug 28, 2003 + * Created. + * July 07, 2004 + * clean up, add comments + * + */ + +#ifndef _CKRM_CLASSQUEUE_H +#define _CKRM_CLASSQUEUE_H + +#include + +#define CLASSQUEUE_SIZE 128 +#define CQ_BITMAP_SIZE ((((CLASSQUEUE_SIZE+1+7)/8)+sizeof(long)-1)/sizeof(long)) + +/** + * struct cq_prio_array: duplicates prio_array defined in sched.c + * + * I duplicate this data structure to make ckrm_classqueue implementation more modular + */ +struct cq_prio_array { + int nr_active; + unsigned long bitmap[CQ_BITMAP_SIZE]; + struct list_head queue[CLASSQUEUE_SIZE]; +}; + +/** + * struct classqueue_struct - a runqueue of class local runqueues + * @array: priority array + * @base: base priority + * @base_offset: index in array for the base + * + * classqueue can be thought of as runqueue of classes (instead of runqueue of tasks) + * as task runqueue, each processor has a classqueue + * a class enters the classqueue when the first task in this class local runqueue shows up + * a class enters the classqueue when the last task in the local runqueue leaves + * class local runqueues are ordered based their priority + * + * status: + * hzheng: is 32bit base long enough? + */ +struct classqueue_struct { + struct cq_prio_array array; + unsigned long base; + unsigned long base_offset; +}; + +/** + * struct cq_node_struct - the link object between class local runqueue and classqueue + * @list: links the class local runqueue to classqueue + * @prio: class priority, which is caculated based on it's progress (cvt) and urgency (top_priority) + * @index: real index into the classqueue array, calculated based on priority + * + * NOTE: make sure list is empty when it's not in classqueue + */ +struct cq_node_struct { + struct list_head list; + int prio; + int index; +}; +typedef struct cq_node_struct cq_node_t; + +typedef unsigned long long CVT_t; // cummulative virtual time + +static inline void cq_node_init(cq_node_t * node) +{ + node->prio = 0; + node->index = -1; + INIT_LIST_HEAD(&node->list); +} + +/*if the class is in classqueue*/ +static inline int cls_in_classqueue(cq_node_t * node) +{ + return !list_empty(&node->list); +} + +/*initialize the data structure*/ +int classqueue_init(struct classqueue_struct *cq); + +/*add the class to classqueue*/ +void classqueue_enqueue(struct classqueue_struct *cq, cq_node_t * node, int prio); + +/** + * classqueue_dequeue - remove the class from classqueue + * + * internal: + * called when the last task is removed from the queue + * checked on load balancing and schedule + * hzheng: why don't I call it on class_dequeue_task? + */ +void classqueue_dequeue(struct classqueue_struct *cq, cq_node_t * node); + +/*change the position of the class in classqueue*/ +void classqueue_update_prio(struct classqueue_struct *cq, cq_node_t * node, int new_prio); + +/*return the first class in classqueue*/ +cq_node_t *classqueue_get_head(struct classqueue_struct *cq); + +/*update the base priority of the classqueue*/ +void classqueue_update_base(struct classqueue_struct *cq, int new_base); + +/** + * class_compare_prio: compare the priority of this two nodes + */ +static inline int class_compare_prio(struct cq_node_struct* node1, struct cq_node_struct* node2) +{ + return ( node1->prio - node2->prio); +} + +#endif diff --git a/include/linux/ckrm_sched.h b/include/linux/ckrm_sched.h new file mode 100644 index 000000000..9d82214fb --- /dev/null +++ b/include/linux/ckrm_sched.h @@ -0,0 +1,297 @@ +/* include/linux/ckrm_sched.h - Supports CKRM scheduling + * + * Copyright (C) Haoqiang Zheng, IBM Corp. 2004 + * Copyright (C) Hubertus Franke, IBM Corp. 2004 + * + * Latest version, more details at http://ckrm.sf.net + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#ifndef _CKRM_SCHED_H +#define _CKRM_SCHED_H + +#define CC_BUG_ON_DO(cond,action) do { if (cond) action; BUG_ON(cond); } while(0) +#define CC_BUG_ON(cond) BUG_ON(cond) + +#include +#include +#include + +//update every second +#define CVT_UPDATE_TICK (1*HZ/1 ?: 1) +#define CLASS_BONUS_RATE 22 // shift from ns to increase class bonus +#define PRIORITY_BONUS_RATE 0 // ?? Hubertus + +#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long)) +struct prio_array { + int nr_active; + unsigned long bitmap[BITMAP_SIZE]; + struct list_head queue[MAX_PRIO]; +}; + +struct ckrm_local_runqueue { + cq_node_t classqueue_linkobj; /*links in classqueue */ + struct ckrm_cpu_class *cpu_class; // class it belongs to + struct classqueue_struct *classqueue; // classqueue it belongs tow + CVT_t uncounted_cvt; + unsigned long long uncounted_ns; + + prio_array_t *active, *expired, arrays[2]; + /* + set to 0 on init, become null or array switch + set to jiffies whenever an non-interactive job expires + reset to jiffies if expires + */ + unsigned long expired_timestamp; + + /* + * highest priority of tasks in active + * initialized to be MAX_PRIO + * updated on enqueue, dequeue + */ + int top_priority; + CVT_t local_cvt; // snapshot of local_cvt, update on every loadbalance + unsigned long magic; //for debugging +}; + +/** + * @last_sleep: the last time it sleeps, last_sleep = 0 when not sleeping + */ +struct ckrm_cpu_class_local_stat { + unsigned long long run; + unsigned long long total; + unsigned long long last_sleep; + unsigned long cpu_demand; /*estimated cpu demand */ +}; + +/** + * ckrm_cpu_class_stat - cpu usage statistics maintained for each class + * + */ +struct ckrm_cpu_class_stat { + spinlock_t stat_lock; + + unsigned long long total_ns; /*how much nano-secs it has consumed */ + + struct ckrm_cpu_class_local_stat local_stats[NR_CPUS]; + unsigned long cpu_demand; + + /*temp stat used by cpu monitor */ + int effective_guarantee; + int effective_limit; + int glut; //true or false + /* + * effective_share: for both default class and its children + * self_effective_share: just for the default class + */ + int effective_share; + int self_effective_share; +}; + +typedef struct ckrm_cpu_class_stat ckrm_stat_t; + +/* + * manages the class status + * there should be only one instance of this object for each class in the whole system + */ +struct ckrm_cpu_class { + struct ckrm_core_class *core; + struct ckrm_core_class *parent; + struct ckrm_shares shares; + spinlock_t cnt_lock; // always grab parent's lock first and then child's + CVT_t global_cvt; // total cummulative virtual time + struct ckrm_cpu_class_stat stat; + struct list_head links; // for linking up in cpu classes + struct ckrm_local_runqueue local_queues[NR_CPUS]; // runqueues +}; + +#if CONFIG_CKRM_CPU_SCHEDULE +#define rq_active(p,rq) (get_task_class_queue(p)->active) +#define rq_expired(p,rq) (get_task_class_queue(p)->expired) +#else +#define rq_active(p,rq) (rq->active) +#define rq_expired(p,rq) (rq->expired) +#endif + +//#define cpu_class_weight(cls) (cls->shares.my_guarantee) +#define cpu_class_weight(cls) (cls->stat.self_effective_share) + +#define bpt_queue(cpu) (& (cpu_rq(cpu)->classqueue) ) +CVT_t get_min_cvt(int cpu); + +struct classqueue_struct *get_cpu_classqueue(int cpu); + +extern struct ckrm_cpu_class default_cpu_class_obj; +#define default_cpu_class (&default_cpu_class_obj) + +#define local_queue_nr_running(local_queue) \ + (local_queue->active->nr_active + local_queue->expired->nr_active) + +static inline struct ckrm_local_runqueue * +get_ckrm_local_runqueue(struct ckrm_cpu_class*cls, int cpu) +{ + return &(cls->local_queues[cpu]); +} + +static inline struct ckrm_local_runqueue *get_task_class_queue(struct task_struct *p) +{ + return &(p->cpu_class->local_queues[task_cpu(p)]); +} + +#define task_list_entry(list) list_entry(list,struct task_struct,run_list) +#define class_list_entry(list) list_entry(list,struct ckrm_local_runqueue,classqueue_linkobj) + +/* some additional interfaces exported from sched.c */ +struct runqueue; +void dequeue_task(struct task_struct *p, prio_array_t * array); +void enqueue_task(struct task_struct *p, prio_array_t * array); +struct runqueue *task_rq_lock(task_t * p, unsigned long *flags); +void task_rq_unlock(struct runqueue *rq, unsigned long *flags); +extern spinlock_t cvt_lock; +extern rwlock_t class_list_lock; +extern struct list_head active_cpu_classes; + +/*functions exported by ckrm_cpu_class.c*/ +int __init init_ckrm_sched_res(void); +void init_cpu_classes(void); + +/*functions exported by ckrm_cpu_monitor.c*/ +void ckrm_cpu_monitor(void); +void ckrm_cpu_stat_init(struct ckrm_cpu_class_stat *stat); +#define CPU_DEMAND_ENQUEUE 0 +#define CPU_DEMAND_DEQUEUE 1 +#define CPU_DEMAND_DESCHEDULE 2 +void cpu_demand_event(struct ckrm_cpu_class_local_stat* local_stat, int event, unsigned long long len); + +#define get_task_local_stat(p) (&(p)->cpu_class->stat.local_stats[task_cpu(p)]) +#define get_rq_local_stat(lrq,cpu) (&(lrq)->cpu_class->stat.local_stats[cpu]) + +/** + * get_effective_prio: return the effective priority of a class local queue + * + * class priority = progress * a + urgency * b + * progress = queue cvt + * urgency = queue top priority + * a and b are scaling factors + * currently, prio increases by 1 if either: top_priority increase by one + * or, local_cvt increases by 4ms + */ +static inline int get_effective_prio(struct ckrm_local_runqueue * lcq) +{ + int prio; + + // cumulative usage + prio = lcq->local_cvt >> CLASS_BONUS_RATE; + // queue urgency + prio += lcq->top_priority >> PRIORITY_BONUS_RATE; + + return prio; +} + +/** + * update_class_priority: + * + * called whenever cvt or top_priority changes + * + * internal: (calling structure) + * update_class_priority + * -- set_top_priority + * -- class_enqueue_task + * -- class_dequeue_task + * -- rq_get_next_task (queue switch) + * -- update_local_cvt + * -- schedule + * -- update_global_cvt + */ +static inline void update_class_priority(struct ckrm_local_runqueue *local_rq) +{ + int effective_prio = get_effective_prio(local_rq); + classqueue_update_prio(local_rq->classqueue, + &local_rq->classqueue_linkobj, + effective_prio); +} + +/* + * set the new top priority and reposition the queue + * called when: task enqueue/dequeue and queue switch + */ +static inline void set_top_priority(struct ckrm_local_runqueue *class_queue, + int new_priority) +{ + class_queue->top_priority = new_priority; + update_class_priority(class_queue); +} + +static inline void class_enqueue_task(struct task_struct *p, + prio_array_t * array) +{ + struct ckrm_local_runqueue *queue; + int effective_prio; + + queue = get_task_class_queue(p); + + if (! cls_in_classqueue(&queue->classqueue_linkobj)) { + cpu_demand_event(get_task_local_stat(p),CPU_DEMAND_ENQUEUE,0); + /*make sure the cvt of this class is up to date*/ + queue->local_cvt = get_min_cvt(task_cpu(p)); + effective_prio = get_effective_prio(queue); + classqueue_enqueue(queue->classqueue, &queue->classqueue_linkobj, effective_prio); + } + + if ((p->prio < queue->top_priority) && (array == queue->active)) + set_top_priority(queue, p->prio); + +} + +static inline void class_dequeue_task(struct task_struct *p, + prio_array_t * array) +{ + struct ckrm_local_runqueue *queue = get_task_class_queue(p); + + if ((array == queue->active) && (p->prio == queue->top_priority) + && list_empty(&(array->queue[p->prio]))) + set_top_priority(queue, + find_next_bit(array->bitmap, MAX_PRIO, + p->prio)); +} + +/* + * called after a task is switched out. Update the local cvt accounting + * we need to stick with long instead of long long due to nonexistent 64-bit division + */ +static inline void update_local_cvt(struct task_struct *p, unsigned long nsec) +{ + struct ckrm_local_runqueue *class_queue = get_task_class_queue(p); + struct ckrm_cpu_class *cls = class_queue->cpu_class; + + unsigned long cvt_inc = nsec / cpu_class_weight(cls); + + class_queue->local_cvt += cvt_inc; + class_queue->uncounted_cvt += cvt_inc; + + class_queue->uncounted_ns += nsec; + update_class_priority(class_queue); +} + +/* + * called during loadbalancing + * to charge the class with locally accumulated cvt + */ +void update_global_cvts(int this_cpu); + +/** + * + */ +static inline int class_preempts_curr(struct task_struct * p, struct task_struct* curr) +{ + struct cq_node_struct* node1 = &(get_task_class_queue(p)->classqueue_linkobj); + struct cq_node_struct* node2 = &(get_task_class_queue(curr)->classqueue_linkobj); + + return (class_compare_prio(node1,node2) < 0); +} +#endif diff --git a/kernel/ckrm/ckrm_cpu_class.c b/kernel/ckrm/ckrm_cpu_class.c new file mode 100644 index 000000000..0ded7f3c6 --- /dev/null +++ b/kernel/ckrm/ckrm_cpu_class.c @@ -0,0 +1,350 @@ +/* kernel/ckrm/ckrm_cpu_class.c - CPU Class resource controller for CKRM + * + * Copyright (C) Haoqiang Zheng, IBM Corp. 2004 + * (C) Hubertus Franke, IBM Corp. 2004 + * + * Latest version, more details at http://ckrm.sf.net + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +struct ckrm_res_ctlr cpu_rcbs; + +/* + * initialize a class object and its local queues + */ + static void init_cpu_class(struct ckrm_cpu_class *cls,ckrm_shares_t* shares) +{ + int i,j,k; + prio_array_t *array; + struct ckrm_local_runqueue* queue; + + for (i = 0 ; i < NR_CPUS ; i++) { + queue = &cls->local_queues[i]; + queue->active = queue->arrays; + queue->expired = queue->arrays+1; + + for (j = 0; j < 2; j++) { + array = queue->arrays + j; + for (k = 0; k < MAX_PRIO; k++) { + INIT_LIST_HEAD(array->queue + k); + __clear_bit(k, array->bitmap); + } + // delimiter for bitsearch + __set_bit(MAX_PRIO, array->bitmap); + array->nr_active = 0; + } + + queue->expired_timestamp = 0; + + queue->cpu_class = cls; + queue->classqueue = get_cpu_classqueue(i); + queue->top_priority = MAX_PRIO; + cq_node_init(&queue->classqueue_linkobj); + queue->local_cvt = 0; + queue->uncounted_cvt = 0; + queue->uncounted_ns = 0; + queue->magic = 0x43FF43D7; + } + + cls->shares = *shares; + cls->global_cvt = 0; + cls->cnt_lock = SPIN_LOCK_UNLOCKED; + ckrm_cpu_stat_init(&cls->stat); + + // add to class list + write_lock(&class_list_lock); + list_add(&cls->links,&active_cpu_classes); + write_unlock(&class_list_lock); +} + +static inline void set_default_share(ckrm_shares_t *shares) +{ + shares->my_guarantee = 0; + shares->my_limit = CKRM_SHARE_DFLT_MAX_LIMIT; + shares->total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; + shares->max_limit = CKRM_SHARE_DFLT_MAX_LIMIT; + shares->unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; + shares->cur_max_limit = CKRM_SHARE_DFLT_MAX_LIMIT; +} + +struct ckrm_cpu_class * ckrm_get_cpu_class(struct ckrm_core_class *core) { + return ckrm_get_res_class(core, cpu_rcbs.resid, struct ckrm_cpu_class); +} + + +void* ckrm_alloc_cpu_class(struct ckrm_core_class *core, struct ckrm_core_class *parent) +{ + struct ckrm_cpu_class *cls; + + if (! parent) /*root class*/ + cls = default_cpu_class; + else + cls = (struct ckrm_cpu_class *) kmalloc(sizeof(struct ckrm_cpu_class),GFP_ATOMIC); + + if (cls) { + ckrm_shares_t shares; + if ((! parent) && (core)) { + /* + * the default class is already initialized + * so only update the core structure + */ + cls->core = core; + } else { + set_default_share(&shares); + init_cpu_class(cls,&shares); + cls->core = core; + cls->parent = parent; + } + } else + printk("alloc_cpu_class failed GFP_ATOMIC\n"); + + return cls; +} + +/* + * hzheng: this is not a stable implementation + * need to check race condition issue here + */ +static void ckrm_free_cpu_class(void *my_res) +{ + struct ckrm_cpu_class *cls = my_res, *parres, *childres; + ckrm_core_class_t *child = NULL; + int maxlimit; + + if (!cls) + return; + + /*the default class can't be freed*/ + if (cls == default_cpu_class) + return; + + // Assuming there will be no children when this function is called + parres = ckrm_get_cpu_class(cls->parent); + + // return child's limit/guarantee to parent node + spin_lock(&parres->cnt_lock); + child_guarantee_changed(&parres->shares, cls->shares.my_guarantee, 0); + // run thru parent's children and get the new max_limit of the parent + ckrm_lock_hier(parres->core); + maxlimit = 0; + while ((child = ckrm_get_next_child(parres->core, child)) != NULL) { + childres = ckrm_get_cpu_class(child); + if (maxlimit < childres->shares.my_limit) { + maxlimit = childres->shares.my_limit; + } + } + ckrm_unlock_hier(parres->core); + if (parres->shares.cur_max_limit < maxlimit) { + parres->shares.cur_max_limit = maxlimit; + } + + spin_unlock(&parres->cnt_lock); + + write_lock(&class_list_lock); + list_del(&cls->links); + write_unlock(&class_list_lock); + + kfree(cls); +} + +/* + * the system will adjust to the new share automatically + */ +int ckrm_cpu_set_share(void *my_res, struct ckrm_shares *new_share) +{ + struct ckrm_cpu_class *parres, *cls = my_res; + struct ckrm_shares *cur = &cls->shares, *par; + int rc = -EINVAL; + + if (!cls) + return rc; + + if (cls->parent) { + parres = ckrm_get_cpu_class(cls->parent); + spin_lock(&parres->cnt_lock); + spin_lock(&cls->cnt_lock); + par = &parres->shares; + } else { + spin_lock(&cls->cnt_lock); + par = NULL; + parres = NULL; + } + + rc = set_shares(new_share, cur, par); + + spin_unlock(&cls->cnt_lock); + if (cls->parent) { + spin_unlock(&parres->cnt_lock); + } + return rc; +} + +/* + * translate the global_CVT to ticks + */ +static int ckrm_cpu_get_share(void *my_res, + struct ckrm_shares *shares) +{ + struct ckrm_cpu_class *cls = my_res; + + if (!cls) + return -EINVAL; + *shares = cls->shares; + return 0; +} + +int ckrm_cpu_get_stats(void *my_res, struct seq_file * sfile) +{ + struct ckrm_cpu_class *cls = my_res; + + if (!cls) + return -EINVAL; + + seq_printf(sfile, "-------- CPU Class Status Start---------\n"); + seq_printf(sfile, " gua= %d limit= %d\n", + cls->shares.my_guarantee, + cls->shares.my_limit); + seq_printf(sfile, " total_gua= %d limit= %d\n", + cls->shares.total_guarantee, + cls->shares.max_limit); + seq_printf(sfile, " used_gua= %d cur_limit= %d\n", + cls->shares.unused_guarantee, + cls->shares.cur_max_limit); + + seq_printf(sfile, " Share= %d\n",cpu_class_weight(cls)); + seq_printf(sfile, " cvt= %llu\n",cls->local_queues[0].local_cvt); + seq_printf(sfile, " total_ns= %llu\n",cls->stat.total_ns); + seq_printf(sfile, " prio= %d\n",cls->local_queues[0].classqueue_linkobj.prio); + seq_printf(sfile, " index= %d\n",cls->local_queues[0].classqueue_linkobj.index); + seq_printf(sfile, " run= %llu\n",cls->stat.local_stats[0].run); + seq_printf(sfile, " total= %llu\n",cls->stat.local_stats[0].total); + seq_printf(sfile, " cpu_demand= %lu\n",cls->stat.cpu_demand); + + seq_printf(sfile, " effective_guarantee= %d\n",cls->stat.effective_guarantee); + seq_printf(sfile, " effective_limit= %d\n",cls->stat.effective_limit); + seq_printf(sfile, " effective_share= %d\n",cls->stat.effective_share); + seq_printf(sfile, "-------- CPU Class Status END ---------\n"); + + + return 0; +} + +/* + * task will remain in the same cpu but on a different local runqueue + */ +static void ckrm_cpu_change_class(void *task, void *old, void *new) +{ + struct task_struct *tsk = task; + struct ckrm_cpu_class *newcls = new; + unsigned long flags; + struct runqueue *rq; + prio_array_t *array; + + /*sanity checking*/ + if (!task || ! old || !new) + return; + + rq = task_rq_lock(tsk,&flags); + array = tsk->array; + if (array) { + dequeue_task(tsk,array); + tsk->cpu_class = newcls; + enqueue_task(tsk,rq_active(tsk,rq)); + } else { + tsk->cpu_class = newcls; + } + task_rq_unlock(rq,&flags); +} + +/*dummy function, not used*/ +static int ckrm_cpu_show_config(void *my_res, struct seq_file *sfile) +{ + struct ckrm_cpu_class *cls = my_res; + + if (!cls) + return -EINVAL; + + seq_printf(sfile, "cls=%s,parameter=somevalue\n","ckrm_cpu class"); + return 0; +} + +/*dummy function, not used*/ +static int ckrm_cpu_set_config(void *my_res, const char *cfgstr) +{ + struct ckrm_cpu_class *cls = my_res; + + if (!cls) + return -EINVAL; + printk("ckrm_cpu config='%s'\n",cfgstr); + return 0; +} + +struct ckrm_res_ctlr cpu_rcbs = { + .res_name = "CKRM CPU Class", + .res_hdepth = 1, + .resid = -1, + .res_alloc = ckrm_alloc_cpu_class, + .res_free = ckrm_free_cpu_class, + .set_share_values = ckrm_cpu_set_share, + .get_share_values = ckrm_cpu_get_share, + .get_stats = ckrm_cpu_get_stats, + .show_config = ckrm_cpu_show_config, + .set_config = ckrm_cpu_set_config, + .change_resclass = ckrm_cpu_change_class, +}; + +int __init init_ckrm_sched_res(void) +{ + struct ckrm_classtype *clstype; + int resid = cpu_rcbs.resid; + + clstype = ckrm_find_classtype_by_name("taskclass"); + if (clstype == NULL) { + printk(KERN_INFO" Unknown ckrm classtype"); + return -ENOENT; + } + + if (resid == -1) { /*not registered */ + resid = ckrm_register_res_ctlr(clstype,&cpu_rcbs); + printk("........init_ckrm_sched_res , resid= %d\n",resid); + } + return 0; +} + +/* + * initialize the class structure + * add the default class: class 0 + */ +void init_cpu_classes(void) +{ + int i; + + //init classqueues for each processor + for (i=0; i < NR_CPUS; i++) + classqueue_init(get_cpu_classqueue(i)); +/* + * hzheng: initialize the default cpu class + * required for E14 since ckrm_init is called after sched_init + */ + ckrm_alloc_cpu_class(NULL,NULL); +} + + +EXPORT_SYMBOL(ckrm_get_cpu_class); diff --git a/kernel/ckrm/ckrm_cpu_monitor.c b/kernel/ckrm/ckrm_cpu_monitor.c new file mode 100644 index 000000000..674ee6e50 --- /dev/null +++ b/kernel/ckrm/ckrm_cpu_monitor.c @@ -0,0 +1,542 @@ +/* ckrm_cpu_monitor.c - Hierarchical CKRM CPU Resource Monitor + * + * Copyright (C) Haoqiang Zheng, IBM Corp. 2004 + * (C) Hubertus Franke, IBM Corp. 2004 + * + * Latest version, more details at http://ckrm.sf.net + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +/* Changes + * + * 23 June 2004: Created + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CPU_MONITOR_INTERVAL (4*HZ) /*how often do we adjust the shares*/ +#define CKRM_SHARE_ACCURACY 7 +#define CKRM_SHARE_MAX (1<stat_lock = SPIN_LOCK_UNLOCKED; + stat->total_ns = 0; + stat->cpu_demand = 0; + + for (i=0; i< NR_CPUS; i++) { + local_stat = &stat->local_stats[i]; + local_stat->run = 0; + local_stat->total = 0; + local_stat->last_sleep = now; + local_stat->cpu_demand = 0; + } + + stat->effective_guarantee = 0; + stat->effective_limit = 0; + stat->glut = 0; + stat->effective_share = 100; + stat->self_effective_share = 100; +} +/**********************************************/ +/* cpu demand */ +/**********************************************/ + +/* + * How CPU demand is calculated: + * consider class local runqueue (clr) first + * at any time, a clr can at the following three states + * -- run: a task belonning to this class is running on this cpu + * -- wait: at least one of its task is running, but the class is not running + * -- sleep: none of the task of this class is runnable + * + * cpu_demand(t1,t2) = r(t1,t2)/(r(t1,t2)+s(t1,t2)) + * + * the cpu_demand of a class = + * sum of cpu_demand of all the class local runqueues + */ + +/** + * update_cpu_demand - update a state change + * + * should be called whenever the state of a local queue changes + * -- when deschedule : report how much run + * -- when enqueue: report how much sleep + * + * to deal with excessive long run/sleep state + * -- whenever the the ckrm_cpu_monitor is called, check if the class is in sleep state, if yes, then update sleep record + */ +#define CKRM_CPU_DEMAND_RUN 0 +#define CKRM_CPU_DEMAND_SLEEP 1 +//how often should we recalculate the cpu demand, in ns +#define CPU_DEMAND_CAL_THRESHOLD (1000000000LL) +static inline void update_local_cpu_demand(struct ckrm_cpu_class_local_stat* local_stat,int state, unsigned long long len) +{ + local_stat->total += len; + if (state == CKRM_CPU_DEMAND_RUN) + local_stat->run += len; + + if (local_stat->total >= CPU_DEMAND_CAL_THRESHOLD) { + local_stat->total >>= CKRM_SHARE_ACCURACY; + if (local_stat->total > 0xFFFFFFFF) + local_stat->total = 0xFFFFFFFF; + + do_div(local_stat->run,(unsigned long)local_stat->total); + local_stat->cpu_demand +=local_stat->run; + local_stat->cpu_demand >>= 1; + local_stat->total = 0; + local_stat->run = 0; + } +} + +static inline void cpu_demand_update_run(struct ckrm_cpu_class_local_stat* local_stat, unsigned long long len) +{ + update_local_cpu_demand(local_stat,CKRM_CPU_DEMAND_RUN,len); +} + +static inline void cpu_demand_update_sleep(struct ckrm_cpu_class_local_stat* local_stat, unsigned long long len) +{ + update_local_cpu_demand(local_stat,CKRM_CPU_DEMAND_SLEEP,len); +} + +#define CPU_DEMAND_ENQUEUE 0 +#define CPU_DEMAND_DEQUEUE 1 +#define CPU_DEMAND_DESCHEDULE 2 + +/** + * cpu_demand_event - and cpu_demand event occured + * @event: one of the following three events: + * CPU_DEMAND_ENQUEUE: local class enqueue + * CPU_DEMAND_DEQUEUE: local class dequeue + * CPU_DEMAND_DESCHEDULE: one task belong a certain local class deschedule + * @len: valid only for CPU_DEMAND_DESCHEDULE, how long the task has been run + */ +void cpu_demand_event(struct ckrm_cpu_class_local_stat* local_stat, int event, unsigned long long len) +{ + switch (event) { + case CPU_DEMAND_ENQUEUE: + len = sched_clock() - local_stat->last_sleep; + local_stat->last_sleep = 0; + cpu_demand_update_sleep(local_stat,len); + break; + case CPU_DEMAND_DEQUEUE: + local_stat->last_sleep = sched_clock(); + break; + case CPU_DEMAND_DESCHEDULE: + cpu_demand_update_run(local_stat,len); + break; + default: + BUG(); + } +} + +/** + * check all the class local queue + * if local queueu is not in runqueue, then it's in sleep state + * if compare to last sleep, + */ +static inline void cpu_demand_check_sleep(struct ckrm_cpu_class_stat *stat, int cpu) +{ + struct ckrm_cpu_class_local_stat * local_stat = &stat->local_stats[cpu]; + unsigned long long sleep,now; + if (local_stat->last_sleep) { + now = sched_clock(); + sleep = now - local_stat->last_sleep; + local_stat->last_sleep = now; + cpu_demand_update_sleep(local_stat,sleep); + } +} + +/** + *get_self_cpu_demand - get cpu demand of the class itself (excluding children) + * + * self_cpu_demand = sum(cpu demand of all local queues) + */ +static unsigned long get_self_cpu_demand(struct ckrm_cpu_class_stat + *stat) +{ + int cpu_demand = 0; + int i; + + for_each_online_cpu(i) { + cpu_demand_check_sleep(stat,i); + cpu_demand += stat->local_stats[i].cpu_demand; + } + + if (cpu_demand > CKRM_SHARE_MAX) + cpu_demand = CKRM_SHARE_MAX; + return cpu_demand; +} + +/* + * update effective cpu demand for each class + * assume the root_core->parent == NULL + */ +static void update_cpu_demand(struct ckrm_core_class *root_core) +{ + struct ckrm_core_class *cur_core, *child_core; + struct ckrm_cpu_class *cls; + + cur_core = root_core; + child_core = NULL; + /* + * iterate the tree + * update cpu_demand of each node + */ + repeat: + if (!cur_core) + return; + + cls = ckrm_get_cpu_class(cur_core); + if (!child_core) //first child + cls->stat.cpu_demand = get_self_cpu_demand(&cls->stat); + else { + cls->stat.cpu_demand += + ckrm_get_cpu_class(child_core)->stat.cpu_demand; + if (cls->stat.cpu_demand > CKRM_SHARE_MAX) + cls->stat.cpu_demand = CKRM_SHARE_MAX; + } + + //next child + child_core = ckrm_get_next_child(cur_core, child_core); + if (child_core) { + //go down + cur_core = child_core; + child_core = NULL; + goto repeat; + } else { //no more child, go back + child_core = cur_core; + cur_core = child_core->hnode.parent; + } + goto repeat; +} + +/**********************************************/ +/* effective guarantee & limit */ +/**********************************************/ +static inline void set_effective_share(struct ckrm_cpu_class_stat *stat, + int new_share) +{ + if (!new_share) + new_share = 1; + stat->effective_share = new_share; +} + +static inline void set_self_effective_share(struct ckrm_cpu_class_stat *stat, + int new_share) +{ + if (!new_share) + new_share = 1; + stat->self_effective_share = new_share; +} + +static inline void update_child_effective(struct ckrm_core_class *parent) +{ + struct ckrm_cpu_class *p_cls = ckrm_get_cpu_class(parent); + struct ckrm_core_class *child_core = ckrm_get_next_child(parent, NULL); + + while (child_core) { + struct ckrm_cpu_class *c_cls = ckrm_get_cpu_class(child_core); + + c_cls->stat.effective_guarantee = + p_cls->stat.effective_guarantee * + c_cls->shares.my_guarantee / p_cls->shares.total_guarantee; + c_cls->stat.effective_limit = + p_cls->stat.effective_guarantee * c_cls->shares.my_limit / + p_cls->shares.total_guarantee; + + child_core = ckrm_get_next_child(parent, child_core); + }; + +} + +/* + * update effective guarantee and effective limit + * -- effective share = parent->effective->share * share/parent->total_share + * -- effective limit = parent->effective->share * limit/parent->total_share + * should be called only when class structure changed + */ +static void update_effective_guarantee_limit(struct ckrm_core_class *root_core) +{ + struct ckrm_core_class *cur_core, *child_core = NULL; + struct ckrm_cpu_class *cls; + + cur_core = root_core; + cls = ckrm_get_cpu_class(cur_core); + cls->stat.effective_guarantee = CKRM_SHARE_MAX; + cls->stat.effective_limit = cls->stat.effective_guarantee; + + repeat: + //check exit + if (!cur_core) + return; + + //visit this node + update_child_effective(cur_core); + //next child + child_core = ckrm_get_next_child(cur_core, child_core); + if (child_core) { + //go down + cur_core = child_core; + child_core = NULL; + goto repeat; + } else { //no more child, go back + child_core = cur_core; + cur_core = child_core->hnode.parent; + } + goto repeat; +} + +/**********************************************/ +/* surplus allocation */ +/**********************************************/ + +/* + * surplus = my_effective_share - demand + * if surplus < 0, surplus = 0 + */ +static inline int get_node_surplus(struct ckrm_cpu_class *cls) +{ + int surplus = cls->stat.effective_guarantee - cls->stat.cpu_demand; + + if (surplus < 0) + surplus = 0; + + return surplus; +} + +/* + * consume the surplus + * return how much consumed + * set glut when necessary + */ +static inline int node_surplus_consume(int old_surplus, + struct ckrm_core_class *child_core, + struct ckrm_cpu_class *p_cls) +{ + int consumed = 0; + int inc_limit; + + struct ckrm_cpu_class *c_cls = ckrm_get_cpu_class(child_core); + + if (c_cls->stat.glut) + goto out; + + //check demand + if (c_cls->stat.effective_share >= c_cls->stat.cpu_demand) { + c_cls->stat.glut = 1; + goto out; + } + + consumed = + old_surplus * c_cls->shares.my_guarantee / + p_cls->shares.total_guarantee; + + //check limit + inc_limit = c_cls->stat.effective_limit - c_cls->stat.effective_share; + if (inc_limit <= consumed) { + c_cls->stat.glut = 1; + consumed = inc_limit; + } + + c_cls->stat.effective_share += consumed; + out: + return consumed; +} + +/* + * re-allocate the shares for all the childs under this node + * task: + * 1. get total surplus + * 2. allocate surplus + * 3. set the effective_share of each node + */ +static void alloc_surplus_node(struct ckrm_core_class *parent) +{ + int total_surplus = 0, old_surplus = 0; + struct ckrm_cpu_class *p_cls = ckrm_get_cpu_class(parent); + struct ckrm_core_class *child_core = NULL; + int self_share; + + /* + * calculate surplus + * total_surplus = sum(child_surplus) + * reset glut flag + * initialize effective_share + */ + do { + child_core = ckrm_get_next_child(parent, child_core); + if (child_core) { + struct ckrm_cpu_class *c_cls = + ckrm_get_cpu_class(child_core); + ckrm_stat_t *stat = &c_cls->stat; + + total_surplus += get_node_surplus(c_cls); + stat->glut = 0; + set_effective_share(stat, stat->effective_guarantee); + } + } while (child_core); + + /*distribute the surplus */ + child_core = NULL; + do { + if (!child_core) //keep the surplus of last round + old_surplus = total_surplus; + + child_core = ckrm_get_next_child(parent, child_core); + if (child_core) { + total_surplus -= + node_surplus_consume(old_surplus, child_core, + p_cls); + } + //start a new round if something is allocated in the last round + } while (child_core || (total_surplus != old_surplus)); + + //any remaining surplus goes to the default class + self_share = p_cls->stat.effective_share * + p_cls->shares.unused_guarantee / p_cls->shares.total_guarantee; + self_share += total_surplus; + + set_self_effective_share(&p_cls->stat, self_share); +} + +/** + * alloc_surplus - reallocate unused shares + * + * class A's usused share should be allocated to its siblings + */ +static void alloc_surplus(struct ckrm_core_class *root_core) +{ + struct ckrm_core_class *cur_core, *child_core = NULL; + struct ckrm_cpu_class *cls; + + cur_core = root_core; + cls = ckrm_get_cpu_class(cur_core); + cls->stat.glut = 0; + set_effective_share(&cls->stat, cls->stat.effective_guarantee); + repeat: + //check exit + if (!cur_core) + return; + + //visit this node + alloc_surplus_node(cur_core); + //next child + child_core = ckrm_get_next_child(cur_core, child_core); + if (child_core) { + //go down + cur_core = child_core; + child_core = NULL; + goto repeat; + } else { //no more child, go back + child_core = cur_core; + cur_core = child_core->hnode.parent; + } + goto repeat; +} + +/** + *ckrm_cpu_monitor - adjust relative shares of the classes based on their progress + * + * this function is called every CPU_MONITOR_INTERVAL + * it computes the cpu demand of each class + * and re-allocate the un-used shares to other classes + */ +void ckrm_cpu_monitor(void) +{ + struct ckrm_core_class *root_core = default_cpu_class->core; + if (!root_core) + return; + + update_effective_guarantee_limit(root_core); + update_cpu_demand(root_core); + alloc_surplus(root_core); +} + +/*****************************************************/ +/* Supporting Functions */ +/*****************************************************/ +static pid_t cpu_monitor_pid = -1; +static int thread_exit = 0; + +static int ckrm_cpu_monitord(void *nothing) +{ + wait_queue_head_t wait; + + init_waitqueue_head(&wait); + + daemonize("ckrm_cpu_ctrld"); + for (;;) { + /*sleep for sometime before next try*/ + interruptible_sleep_on_timeout(&wait, CPU_MONITOR_INTERVAL); + ckrm_cpu_monitor(); + if (thread_exit) { + break; + } + } + cpu_monitor_pid = -1; + thread_exit = 2; + printk("cpu_monitord exit\n"); + return 0; +} + +void ckrm_start_monitor(void) +{ + cpu_monitor_pid = kernel_thread(ckrm_cpu_monitord, 0, CLONE_KERNEL); + if (cpu_monitor_pid < 0) { + printk("ckrm_cpu_monitord for failed\n"); + } +} + +void ckrm_kill_monitor(void) +{ + wait_queue_head_t wait; + int interval = HZ; + init_waitqueue_head(&wait); + + printk("killing process %d\n", cpu_monitor_pid); + if (cpu_monitor_pid > 0) { + thread_exit = 1; + while (thread_exit != 2) { + interruptible_sleep_on_timeout(&wait, interval); + } + } +} + +int ckrm_cpu_monitor_init(void) +{ + ckrm_start_monitor(); + return 0; +} + +void ckrm_cpu_monitor_exit(void) +{ + ckrm_kill_monitor(); +} + +module_init(ckrm_cpu_monitor_init); +module_exit(ckrm_cpu_monitor_exit); + +MODULE_AUTHOR("Haoqiang Zheng "); +MODULE_DESCRIPTION("Hierarchical CKRM CPU Resource Monitor"); +MODULE_LICENSE("GPL"); diff --git a/kernel/ckrm_classqueue.c b/kernel/ckrm_classqueue.c new file mode 100644 index 000000000..1929aaf4e --- /dev/null +++ b/kernel/ckrm_classqueue.c @@ -0,0 +1,178 @@ +/* kernel/ckrm_classqueue.c : implements the class queue + * + * Copyright (C) Haoqiang Zheng, IBM Corp. 2003 + * (C) Hubertus Franke, IBM Corp. 2003 + * + * Class queue functionality for CKRM cpu controller + * + * Latest version, more details at http://ckrm.sf.net + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +/* Changes + * + * Aug 28, 2003 + * Created. + * July 08, 2004 + * classqueue now has a fixed size + * major clean up + * function/structure names are changed to more intuitive ones + */ +#include +#include + +#define cq_nr_member(cq) (cq->array.nr_active) + +/** + * get_index - translate the logical priority to the real index in the queue + * + * validate the position + * a valid prio is [cq->base,cq->base + size -1] + */ +static inline unsigned long get_index(struct classqueue_struct *cq, int *prio) +{ + unsigned long index; + int max_prio; + + if (!cq_nr_member(cq)) + return 0; + + max_prio = cq->base + (CLASSQUEUE_SIZE - 1); + if (*prio > max_prio) + *prio = max_prio; + if (*prio < cq->base) + *prio = cq->base; + + index = (cq->base_offset + (*prio - cq->base)) ; + if (index >= CLASSQUEUE_SIZE) + index -= CLASSQUEUE_SIZE; + + return index; +} + +/** + * initialize a class queue object + */ +int classqueue_init(struct classqueue_struct *cq) +{ + int i; + struct cq_prio_array *array; + + array = &cq->array; + for (i = 0; i < CLASSQUEUE_SIZE; i++) { + INIT_LIST_HEAD(array->queue + i); + __clear_bit(i, array->bitmap); + } + // delimiter for bitsearch + __set_bit(CLASSQUEUE_SIZE, array->bitmap); + array->nr_active = 0; + + cq->base = 0; + cq->base_offset = -1; //not valid yet + + return 0; +} + +/** + *classqueue_enqueue - add the class to classqueue based on its prio + */ +void classqueue_enqueue(struct classqueue_struct *cq, + cq_node_t * node, int prio) +{ + int index; + + //get real index + if (cq_nr_member(cq)) { + index = get_index(cq, &prio); + } else { //the first one + cq->base = prio; + cq->base_offset = 0; + index = 0; + } + + //add to the queue + list_add(&(node->list), &cq->array.queue[index]); + __set_bit(index, cq->array.bitmap); + cq->array.nr_active++; + + node->index = index; + node->prio = prio; +} + +void classqueue_dequeue(struct classqueue_struct *cq, cq_node_t * node) +{ + //delete from queue + list_del_init(&(node->list)); + cq->array.nr_active--; + + //check clear the bitmap + if (list_empty(&cq->array.queue[node->index])) + __clear_bit(node->index, cq->array.bitmap); +} + +void classqueue_update_prio(struct classqueue_struct *cq, + cq_node_t * node, int new_pos) +{ + int index; + + if (! cls_in_classqueue(node)) + return; + + index = get_index(cq, &new_pos); + node->prio = new_pos; + + //remove from the original position + list_del_init(&(node->list)); + if (list_empty(&cq->array.queue[node->index])) + __clear_bit(node->index, cq->array.bitmap); + + //add to new positon, round robin for classes with same priority + list_add_tail(&(node->list), &cq->array.queue[index]); + __set_bit(index, cq->array.bitmap); + + node->index = index; +} + +cq_node_t *classqueue_get_head(struct classqueue_struct *cq) +{ + cq_node_t *result = NULL; + int pos; + + /* + * search over the bitmap to get the first class in the queue + */ + pos = find_next_bit(cq->array.bitmap, CLASSQUEUE_SIZE, cq->base_offset); + if (pos >= CLASSQUEUE_SIZE) { //do circular search from the beginning + pos = find_first_bit(cq->array.bitmap, CLASSQUEUE_SIZE); + } + + if (pos < CLASSQUEUE_SIZE) { + BUG_ON(list_empty(&cq->array.queue[pos])); + result = list_entry(cq->array.queue[pos].next, cq_node_t, list); + } + return result; +} + +/** + * Moving the end of queue forward + * the new_base here is logical, we need to translate to the abosule position + */ +void classqueue_update_base(struct classqueue_struct *cq, int new_base) +{ + if (!cq_nr_member(cq)) { + cq->base_offset = -1; //not defined + return; + } + + // assert(new_base >= cq->base); + + if (new_base > cq->base) { + cq->base_offset = get_index(cq, &new_base); + cq->base = new_base; + } +} diff --git a/kernel/ckrm_sched.c b/kernel/ckrm_sched.c new file mode 100644 index 000000000..ba716d4c5 --- /dev/null +++ b/kernel/ckrm_sched.c @@ -0,0 +1,71 @@ +/* kernel/ckrm_sched.c - Supporting functions for ckrm scheduling + * + * Copyright (C) Haoqiang Zheng, IBM Corp. 2004 + * (C) Hubertus Franke, IBM Corp. 2004 + * + * Latest version, more details at http://ckrm.sf.net + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ +#include +#include +#include + +/*******************************************************/ +/* CVT Management */ +/*******************************************************/ +#define CVT_WINDOW_SIZE (CLASSQUEUE_SIZE << CLASS_BONUS_RATE) +static CVT_t max_CVT = CVT_WINDOW_SIZE; + +/* + * Also ensure that the classes global cvt is upgraded to the + * minimum CVT in the system, as a class might not have run for a while + */ +static void update_global_cvt(struct ckrm_cpu_class *cpu_class, int cpu) +{ + struct ckrm_local_runqueue *class_queue = + get_ckrm_local_runqueue(cpu_class, cpu); + CVT_t min_cvt; + CVT_t local_cvt_old = class_queue->local_cvt; + + spin_lock(&cvt_lock); + if (class_queue->uncounted_cvt) { + cpu_class->global_cvt += class_queue->uncounted_cvt; + class_queue->uncounted_cvt = 0; + } + min_cvt = max_CVT - CVT_WINDOW_SIZE; + if (cpu_class->global_cvt < min_cvt) + cpu_class->global_cvt = min_cvt; + else if (cpu_class->global_cvt > max_CVT) + max_CVT = cpu_class->global_cvt; + +/* update local cvt from global cvt*/ +#if 0 + class_queue->local_cvt = cpu_class->global_cvt; +#endif + spin_unlock(&cvt_lock); + + if (class_queue->local_cvt != local_cvt_old) + update_class_priority(class_queue); +} + +/* + * class_list_lock must have been acquired + */ +void update_global_cvts(int this_cpu) +{ + struct ckrm_cpu_class *clsptr; + struct ckrm_local_runqueue *class_queue; + + /*for each class*/ + list_for_each_entry(clsptr, &active_cpu_classes, links) { + update_global_cvt(clsptr, this_cpu); + class_queue = get_ckrm_local_runqueue(clsptr, this_cpu); + clsptr->stat.total_ns += class_queue->uncounted_ns; + class_queue->uncounted_ns = 0; + } +} -- 2.43.0