1 /* kernel/ckrm/ckrm_cpu_class.c - CPU Class resource controller for CKRM
3 * Copyright (C) Haoqiang Zheng, IBM Corp. 2004
4 * (C) Hubertus Franke, IBM Corp. 2004
6 * Latest version, more details at http://ckrm.sf.net
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
14 #include <linux/module.h>
15 #include <linux/init.h>
16 #include <linux/slab.h>
17 #include <asm/errno.h>
18 #include <linux/sched.h>
19 #include <linux/ckrm.h>
20 #include <linux/ckrm_rc.h>
21 #include <linux/ckrm_tc.h>
22 #include <linux/ckrm_sched.h>
23 #include <linux/ckrm_classqueue.h>
24 #include <linux/seq_file.h>
25 #include <linux/parser.h>
27 #define CPU_CTRL_NAME "cpu"
29 struct ckrm_res_ctlr cpu_rcbs;
31 #define CKRM_CPU_USAGE_DETAIL_MAX 3
32 static int usage_detail = 3; /* 0: show usage
35 * 3: show per runqueue stats
38 static int ckrm_cpu_set_mode(enum ckrm_sched_mode mode);
41 * update effective share setting after:
43 * -- change class share
44 * we don't need to call update_effectives() when add new class since
45 * the defaults grt of new class is 0
46 * CAUTION: might need a lock here
48 static inline void update_class_effectives(void)
50 // update_effectives();
55 * insert_cpu_class - insert a class to active_cpu_class list
57 * insert the class in decreasing order of class weight
59 static inline void insert_cpu_class(struct ckrm_cpu_class *cls)
61 list_add(&cls->links,&active_cpu_classes);
65 * initialize a class object and its local queues
68 CVT_t get_min_cvt_locking(int cpu);
69 ckrm_lrq_t *rq_get_dflt_lrq(int cpu);
71 static void init_cpu_class_lrq(struct ckrm_cpu_class *cls,
75 ckrm_lrq_t *queue = cls->local_queues[cpu];
77 queue->active = queue->arrays;
78 queue->expired = queue->arrays+1;
80 for (j = 0; j < 2; j++) {
81 prio_array_t *array = queue->arrays + j;
82 for (k = 0; k < MAX_PRIO; k++) {
83 INIT_LIST_HEAD(array->queue + k);
84 __clear_bit(k, array->bitmap);
86 // delimiter for bitsearch
87 __set_bit(MAX_PRIO, array->bitmap);
91 queue->expired_timestamp = 0;
92 queue->best_expired_prio = MAX_PRIO;
94 queue->cpu_class = cls;
95 queue->classqueue = get_cpu_classqueue(cpu);
96 queue->top_priority = MAX_PRIO;
97 cq_node_init(&queue->classqueue_linkobj);
98 queue->local_cvt = isdflt ? 0 : get_min_cvt_locking(cpu);
100 queue->local_weight = cpu_class_weight(cls);
101 if (queue->local_weight == 0)
102 queue->local_weight = 1;
103 queue->over_weight = 0;
104 queue->skewed_weight = CKRM_MAX_WEIGHT/2; /*otherwise class might starve on start*/
105 queue->uncounted_ns = 0;
107 queue->magic = CKRM_LRQ_MAGIC;
110 void init_cpu_class(struct ckrm_cpu_class *cls,ckrm_shares_t* shares)
114 struct ckrm_cpu_class *dfltcls;
116 dfltcls = get_default_cpu_class();
118 isdflt = (cls==dfltcls);
120 cls->shares = *shares;
121 cls->cnt_lock = SPIN_LOCK_UNLOCKED;
122 ckrm_cpu_stat_init(&cls->stat,isdflt ? CKRM_SHARE_MAX : 1);
123 ckrm_usage_init(&cls->usage);
124 cls->magic = CKRM_CPU_CLASS_MAGIC;
126 memset(cls->local_queues,0,NR_CPUS*sizeof(ckrm_lrq_t*));
129 for (i=0; i< NR_CPUS; i++) {
130 cls->local_queues[i] = rq_get_dflt_lrq(i);
131 init_cpu_class_lrq(cls,i,1);
135 cls->local_queues[i] = kmalloc(sizeof(ckrm_lrq_t),
137 BUG_ON(cls->local_queues[i]==NULL);
138 init_cpu_class_lrq(cls,i,0);
142 write_lock(&class_list_lock);
143 insert_cpu_class(cls);
144 write_unlock(&class_list_lock);
147 static inline void set_default_share(ckrm_shares_t *shares)
149 shares->my_guarantee = 0;
150 shares->total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
151 shares->unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
152 shares->my_limit = CKRM_SHARE_DFLT_MAX_LIMIT;
153 shares->max_limit = CKRM_SHARE_DFLT_MAX_LIMIT;
154 shares->cur_max_limit = 0;
157 struct ckrm_cpu_class * ckrm_get_cpu_class(struct ckrm_core_class *core)
159 struct ckrm_cpu_class * cls;
160 cls = ckrm_get_res_class(core, cpu_rcbs.resid, struct ckrm_cpu_class);
161 if (valid_cpu_class(cls))
162 return (ckrm_cpu_enabled() ? cls : get_default_cpu_class());
167 void* ckrm_alloc_cpu_class(struct ckrm_core_class *core,
168 struct ckrm_core_class *parent)
170 struct ckrm_cpu_class *cls;
172 if (! parent) /*root class*/
173 cls = get_default_cpu_class();
175 cls = (struct ckrm_cpu_class *) kmalloc(sizeof(struct ckrm_cpu_class),GFP_ATOMIC);
178 ckrm_shares_t shares;
179 if ((! parent) && (core)) {
181 * the default class is already initialized
182 * so only update the core structure
186 set_default_share(&shares);
187 init_cpu_class(cls,&shares);
189 cls->parent = parent;
192 printk(KERN_ERR"alloc_cpu_class failed\n");
197 void ckrm_cpu_class_queue_delete_sync(struct ckrm_cpu_class *clsptr);
199 static void ckrm_free_cpu_class(void *my_res)
201 struct ckrm_cpu_class *cls = my_res, *parres, *childres;
202 ckrm_core_class_t *child = NULL;
209 /*the default class can't be freed*/
210 if (cls == get_default_cpu_class())
213 // Assuming there will be no children when this function is called
214 parres = ckrm_get_cpu_class(cls->parent);
216 // return child's limit/guarantee to parent node
217 spin_lock(&parres->cnt_lock);
218 child_guarantee_changed(&parres->shares, cls->shares.my_guarantee, 0);
219 // run thru parent's children and get the new max_limit of the parent
220 ckrm_lock_hier(parres->core);
222 while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
223 childres = ckrm_get_cpu_class(child);
224 if (maxlimit < childres->shares.my_limit) {
225 maxlimit = childres->shares.my_limit;
228 ckrm_unlock_hier(parres->core);
229 if (parres->shares.cur_max_limit < maxlimit) {
230 parres->shares.cur_max_limit = maxlimit;
233 spin_unlock(&parres->cnt_lock);
235 write_lock(&class_list_lock);
236 list_del(&cls->links);
237 write_unlock(&class_list_lock);
239 ckrm_cpu_class_queue_delete_sync(cls);
242 ckrm_lrq_t *lrq = get_ckrm_lrq(cls,i);
249 //call ckrm_cpu_monitor after class is removed
250 if (ckrm_cpu_enabled())
251 update_class_effectives();
255 * the system will adjust to the new share automatically
257 int ckrm_cpu_set_share(void *my_res, struct ckrm_shares *new_share)
259 struct ckrm_cpu_class *parres, *cls = my_res;
260 struct ckrm_shares *cur = &cls->shares, *par;
263 if (ckrm_cpu_disabled())
267 if (new_share->total_guarantee > CKRM_SHARE_MAX)
271 parres = ckrm_get_cpu_class(cls->parent);
272 spin_lock(&parres->cnt_lock);
273 spin_lock(&cls->cnt_lock);
274 par = &parres->shares;
276 spin_lock(&cls->cnt_lock);
282 * hzheng: CKRM_SHARE_DONTCARE should be handled
284 if (new_share->my_guarantee == CKRM_SHARE_DONTCARE)
285 new_share->my_guarantee = 0;
287 rc = set_shares(new_share, cur, par);
288 if (!rc && cur->my_limit == CKRM_SHARE_DONTCARE)
289 cur->my_limit = cur->max_limit;
292 spin_unlock(&cls->cnt_lock);
294 spin_unlock(&parres->cnt_lock);
297 //call ckrm_cpu_monitor after changes are changed
298 update_class_effectives();
303 static int ckrm_cpu_get_share(void *my_res,
304 struct ckrm_shares *shares)
306 struct ckrm_cpu_class *cls = my_res;
308 if (ckrm_cpu_disabled())
313 *shares = cls->shares;
319 * obtain a sequence of <num> usage informations
320 * returns number of usages reported.
322 * report IN: specifies the sequence of jiffies for which to report
323 * must be ordered (smallest first)
324 * OUT: returns the usage in each field
329 int ckrm_cpu_get_usage(struct ckrm_cpu_class* clsptr,
330 int num, ulong report[])
332 struct ckrm_usage* usage = &clsptr->usage;
333 unsigned long long total = 0;
334 int i, idx, cur, num_ofs;
336 num_ofs = cur = i = 0;
337 idx = usage->sample_pointer;
339 for ( num_ofs = 0; num_ofs < num ; num_ofs++ ) {
341 int duration = report[num_ofs];
342 unsigned long long totval = 0;
344 nr_samples = duration/USAGE_SAMPLE_FREQ?:1;
346 if (nr_samples > USAGE_MAX_HISTORY)
347 nr_samples = USAGE_MAX_HISTORY;
349 for ( ; i< nr_samples; i++) {
351 idx = USAGE_MAX_HISTORY;
353 total += usage->samples[idx];
355 totval = total * 1000;
356 do_div(totval,NS_PER_SAMPLE);
357 do_div(totval,nr_samples * cpus_weight(cpu_online_map));
358 report[num_ofs] = totval;
364 int ckrm_cpu_get_stats(void *my_res, struct seq_file * sfile)
366 struct ckrm_cpu_class *cls = my_res;
367 struct ckrm_cpu_class_stat* stat = &cls->stat;
370 ulong usage[3] = { 2*HZ, 10*HZ, 60*HZ };
372 if (!cls || ckrm_cpu_disabled())
375 ckrm_cpu_get_usage(cls,3,usage);
377 /* this will after full stabilization become the only cpu usage stats
380 seq_printf(sfile, "cpu-usage(2,10,60)= %lu %lu %lu\n",
381 usage[0],usage[1],usage[2]);
383 if (usage_detail < 1)
386 /* the extended statistics we can decide whether we want to make the
387 * additional statistics available over config options
388 * eitherway they should be reported in a more concised form
389 * during stabilization, this is OK
392 seq_printf(sfile, "-------- CPU Class Status Start---------\n");
393 seq_printf(sfile, "Share:\n\tgrt= %d limit= %d total_grt= %d max_limit= %d\n",
394 cls->shares.my_guarantee,
395 cls->shares.my_limit,
396 cls->shares.total_guarantee,
397 cls->shares.max_limit);
398 seq_printf(sfile, "\tunused_grt= %d cur_max_limit= %d\n",
399 cls->shares.unused_guarantee,
400 cls->shares.cur_max_limit);
402 if (usage_detail < 2)
405 seq_printf(sfile, "Effective:\n\tegrt= %d\n",stat->egrt);
406 seq_printf(sfile, "\tmegrt= %d\n",stat->megrt);
407 seq_printf(sfile, "\tehl= %d\n",stat->ehl);
408 seq_printf(sfile, "\tmehl= %d\n",stat->mehl);
409 seq_printf(sfile, "\teshare= %d\n",stat->eshare);
410 seq_printf(sfile, "\tmeshare= %d\n",stat->meshare);
411 seq_printf(sfile, "\tmax_demand= %lu\n",stat->max_demand);
412 seq_printf(sfile, "\ttotal_ns= %llu\n",stat->total_ns);
413 seq_printf(sfile, "\tusage(2,10,60)= %lu %lu %lu\n",
414 usage[0],usage[1],usage[2]);
416 if (usage_detail < 3)
419 /* provide per run queue information */
420 for_each_online_cpu(i) {
421 lrq = get_ckrm_lrq(cls,i);
422 seq_printf(sfile, "\tlrq %d demand= %lu weight= %d "
423 "lrq_load= %lu cvt= %llu sav= %llu\n",
424 i,stat->local_stats[i].cpu_demand,
425 local_class_weight(lrq),lrq->lrq_load,
426 lrq->local_cvt,lrq->savings);
430 seq_printf(sfile, "-------- CPU Class Status END ---------\n");
435 * task will remain in the same cpu but on a different local runqueue
437 void ckrm_cpu_change_class(void *task, void *old, void *new)
439 struct task_struct *tsk = task;
440 struct ckrm_cpu_class *newcls = new;
443 if (!task || ! old || !new)
446 if (ckrm_cpu_disabled())
447 newcls = get_default_cpu_class();
448 _ckrm_cpu_change_class(tsk,newcls);
451 enum config_token_t {
452 config_usage_detail, /* define usage level */
453 config_disable, /* always use default linux scheduling */
454 /* effectively disables the ckrm scheduler */
455 config_enable, /* always uses ckrm scheduling behavior */
456 config_err /* parsing error */
459 #define CKRM_SCHED_MODE_DISABLED_STR "disabled"
460 #define CKRM_SCHED_MODE_ENABLED_STR "enabled"
462 static char *ckrm_sched_mode_str[] = {
463 CKRM_SCHED_MODE_DISABLED_STR,
464 CKRM_SCHED_MODE_ENABLED_STR
467 static match_table_t config_tokens = {
468 { config_disable, "mode="CKRM_SCHED_MODE_DISABLED_STR },
469 { config_enable, "mode="CKRM_SCHED_MODE_ENABLED_STR },
470 { config_usage_detail, "usage_detail=%u" },
474 static int ckrm_cpu_show_config(void *my_res, struct seq_file *sfile)
476 struct ckrm_cpu_class *cls = my_res;
481 seq_printf(sfile, "res=%s,mode=%s",
482 CPU_CTRL_NAME,ckrm_sched_mode_str[ckrm_sched_mode]);
483 if (!ckrm_cpu_disabled()) /* enabled || mixed */
484 seq_printf(sfile, ",usage_detail=%u",usage_detail);
485 seq_printf(sfile,"\n");
489 static int ckrm_cpu_set_config(void *my_res, const char *cfgstr)
491 struct ckrm_cpu_class *cls = my_res;
493 char **cfgstr_p = (char**)&cfgstr;
494 substring_t args[MAX_OPT_ARGS];
496 enum ckrm_sched_mode new_sched_mode;
501 new_sched_mode = ckrm_sched_mode;
504 while ((p = strsep(cfgstr_p, ",")) != NULL) {
509 token = match_token(p, config_tokens, args);
511 case config_usage_detail:
512 if (ckrm_cpu_disabled() ||
513 (match_int(&args[0], &option)) ||
514 (option > CKRM_CPU_USAGE_DETAIL_MAX))
518 usage_detail = option;
521 new_sched_mode = CKRM_SCHED_MODE_DISABLED;
524 new_sched_mode = CKRM_SCHED_MODE_ENABLED;
530 rc = ckrm_cpu_set_mode(new_sched_mode);
534 struct ckrm_res_ctlr cpu_rcbs = {
535 .res_name = CPU_CTRL_NAME,
538 .res_alloc = ckrm_alloc_cpu_class,
539 .res_free = ckrm_free_cpu_class,
540 .set_share_values = ckrm_cpu_set_share,
541 .get_share_values = ckrm_cpu_get_share,
542 .get_stats = ckrm_cpu_get_stats,
543 .show_config = ckrm_cpu_show_config,
544 .set_config = ckrm_cpu_set_config,
545 .change_resclass = ckrm_cpu_change_class,
548 int __init init_ckrm_sched_res(void)
550 struct ckrm_classtype *clstype;
551 int resid = cpu_rcbs.resid;
553 clstype = ckrm_find_classtype_by_name("taskclass");
554 if (clstype == NULL) {
555 printk(KERN_INFO" Unknown ckrm classtype<taskclass>");
559 if (resid == -1) { /*not registered */
560 resid = ckrm_register_res_ctlr(clstype,&cpu_rcbs);
561 printk(KERN_DEBUG "........init_ckrm_sched_res , resid= %d\n",resid);
567 * initialize the class structure
568 * add the default class: class 0
570 void init_cpu_classes(void)
574 //init classqueues for each processor
575 for (i=0; i < NR_CPUS; i++)
576 classqueue_init(get_cpu_classqueue(i),ckrm_cpu_enabled());
578 ckrm_alloc_cpu_class(NULL,NULL);
581 void ckrm_cpu_class_queue_update(int on);
582 void ckrm_cpu_start_monitor(void);
583 void ckrm_cpu_kill_monitor(void);
585 static int ckrm_cpu_set_mode(enum ckrm_sched_mode mode)
587 struct task_struct *proc, *tsk;
588 struct ckrm_cpu_class *new_cls = NULL;
591 if (mode == ckrm_sched_mode)
594 printk("ckrm_cpu_set_mode from <%s> to <%s> pid=%d\n",
595 ckrm_sched_mode_str[ckrm_sched_mode],
596 ckrm_sched_mode_str[mode],
599 if (mode == CKRM_SCHED_MODE_DISABLED) {
600 ckrm_cpu_kill_monitor();
601 new_cls = get_default_cpu_class();
603 ckrm_cpu_class_queue_update(1);
606 /* run twice through the list to catch everyone,
607 * current and transient once
610 read_lock(&tasklist_lock);
612 ckrm_sched_mode = mode;
613 /* we have to run through the list twice
614 * first catch all existing tasks
615 * and then deal with some potential race condition
617 for ( i=2 ; i-- ; ) {
618 /* lock class_list_lock ? */
620 do_each_thread(proc, tsk) {
621 if (mode == CKRM_SCHED_MODE_ENABLED) {
622 new_cls = ckrm_get_res_class(class_core(tsk->taskclass),
624 struct ckrm_cpu_class);
626 _ckrm_cpu_change_class(tsk,new_cls);
627 } while_each_thread(proc, tsk);
629 read_unlock(&tasklist_lock);
631 if (mode == CKRM_SCHED_MODE_DISABLED)
632 ckrm_cpu_class_queue_update(0);
634 ckrm_cpu_start_monitor();
638 EXPORT_SYMBOL(ckrm_get_cpu_class);