This commit was manufactured by cvs2svn to create tag
[linux-2.6.git] / kernel / ckrm / ckrm_cpu_class.c
1 /* kernel/ckrm/ckrm_cpu_class.c - CPU Class resource controller for CKRM
2  *
3  * Copyright (C) Haoqiang Zheng,  IBM Corp. 2004
4  *           (C) Hubertus Franke, IBM Corp. 2004
5  * 
6  * Latest version, more details at http://ckrm.sf.net
7  * 
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  */
14 #include <linux/module.h>
15 #include <linux/init.h>
16 #include <linux/slab.h>
17 #include <asm/errno.h>
18 #include <linux/sched.h>
19 #include <linux/ckrm.h>
20 #include <linux/ckrm_rc.h>
21 #include <linux/ckrm_tc.h>
22 #include <linux/ckrm_sched.h>
23 #include <linux/ckrm_classqueue.h>
24 #include <linux/seq_file.h>
25 #include <linux/parser.h>
26
27 #define CPU_CTRL_NAME  "cpu"
28
29 struct ckrm_res_ctlr cpu_rcbs;
30
31 #define CKRM_CPU_USAGE_DETAIL_MAX 3
32 static int usage_detail = 3;  /* 0: show usage 
33                                * 1: show settings
34                                * 2: show effectives
35                                * 3: show per runqueue stats
36                                */
37
38 static int ckrm_cpu_set_mode(enum ckrm_sched_mode mode);
39
40 /*
41  * update effective share setting after:
42  * -- remove class
43  * -- change class share
44  * we don't need to call update_effectives() when add new class since 
45  * the defaults grt of new class is 0
46  * CAUTION: might need a lock here
47  */
48 static inline void update_class_effectives(void) 
49 {
50         //      update_effectives();
51         ckrm_cpu_monitor(0);
52 }
53
54 /**
55  * insert_cpu_class - insert a class to active_cpu_class list
56  *
57  * insert the class in decreasing order of class weight
58  */
59 static inline void insert_cpu_class(struct ckrm_cpu_class *cls)
60 {
61         list_add(&cls->links,&active_cpu_classes);
62 }
63
64 /*
65  *  initialize a class object and its local queues
66  */
67
68 CVT_t get_min_cvt_locking(int cpu);
69 ckrm_lrq_t *rq_get_dflt_lrq(int cpu);
70
71 static void init_cpu_class_lrq(struct ckrm_cpu_class *cls, 
72                                int cpu, int isdflt)
73 {
74         int j,k;
75         ckrm_lrq_t *queue = cls->local_queues[cpu];
76
77         queue->active   = queue->arrays;
78         queue->expired  = queue->arrays+1;
79         
80         for (j = 0; j < 2; j++) {
81                 prio_array_t *array = queue->arrays + j;
82                 for (k = 0; k < MAX_PRIO; k++) {
83                         INIT_LIST_HEAD(array->queue + k);
84                         __clear_bit(k, array->bitmap);
85                 }
86                 // delimiter for bitsearch
87                 __set_bit(MAX_PRIO, array->bitmap);
88                 array->nr_active = 0;
89         }
90         
91         queue->expired_timestamp = 0;
92         queue->best_expired_prio = MAX_PRIO;
93         
94         queue->cpu_class = cls;
95         queue->classqueue = get_cpu_classqueue(cpu);
96         queue->top_priority = MAX_PRIO;
97         cq_node_init(&queue->classqueue_linkobj);
98         queue->local_cvt = isdflt ? 0 : get_min_cvt_locking(cpu);
99         queue->lrq_load = 0;
100         queue->local_weight = cpu_class_weight(cls);
101         if (queue->local_weight == 0)
102                 queue->local_weight = 1;
103         queue->over_weight = 0;
104         queue->skewed_weight = CKRM_MAX_WEIGHT/2; /*otherwise class might starve on start*/
105         queue->uncounted_ns = 0;
106         queue->savings = 0;
107         queue->magic = CKRM_LRQ_MAGIC;
108 }
109
110 void init_cpu_class(struct ckrm_cpu_class *cls,ckrm_shares_t* shares) 
111 {
112         int i;      
113         int isdflt;
114         struct ckrm_cpu_class *dfltcls;
115
116         dfltcls = get_default_cpu_class();
117
118         isdflt = (cls==dfltcls);
119
120         cls->shares = *shares;
121         cls->cnt_lock = SPIN_LOCK_UNLOCKED;
122         ckrm_cpu_stat_init(&cls->stat,isdflt ? CKRM_SHARE_MAX : 1);
123         ckrm_usage_init(&cls->usage);
124         cls->magic = CKRM_CPU_CLASS_MAGIC;
125
126         memset(cls->local_queues,0,NR_CPUS*sizeof(ckrm_lrq_t*));
127         
128         if (isdflt) {
129                 for (i=0; i< NR_CPUS; i++) {
130                         cls->local_queues[i] = rq_get_dflt_lrq(i);
131                         init_cpu_class_lrq(cls,i,1);
132                 }
133         } else {
134                 for_each_cpu(i) {
135                         cls->local_queues[i] = kmalloc(sizeof(ckrm_lrq_t),
136                                                        GFP_KERNEL);
137                         BUG_ON(cls->local_queues[i]==NULL);
138                         init_cpu_class_lrq(cls,i,0);
139                 }
140         }
141
142         write_lock(&class_list_lock);
143         insert_cpu_class(cls);
144         write_unlock(&class_list_lock);
145 }
146
147 static inline void set_default_share(ckrm_shares_t *shares)
148 {
149         shares->my_guarantee     = 0;
150         shares->total_guarantee  = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
151         shares->unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
152         shares->my_limit         = CKRM_SHARE_DFLT_MAX_LIMIT;
153         shares->max_limit        = CKRM_SHARE_DFLT_MAX_LIMIT;
154         shares->cur_max_limit    = 0;
155 }
156
157 struct ckrm_cpu_class * ckrm_get_cpu_class(struct ckrm_core_class *core)
158 {
159         struct ckrm_cpu_class * cls;
160         cls = ckrm_get_res_class(core, cpu_rcbs.resid, struct ckrm_cpu_class);
161         if (valid_cpu_class(cls))
162                 return (ckrm_cpu_enabled() ? cls : get_default_cpu_class());
163         else
164                 return NULL;
165 }
166
167 void* ckrm_alloc_cpu_class(struct ckrm_core_class *core, 
168                            struct ckrm_core_class *parent) 
169 {               
170         struct ckrm_cpu_class *cls;
171
172         if (! parent) /*root class*/
173                 cls =  get_default_cpu_class();
174         else
175                 cls = (struct ckrm_cpu_class *) kmalloc(sizeof(struct ckrm_cpu_class),GFP_ATOMIC);
176
177         if (cls) {
178                 ckrm_shares_t shares;           
179                 if ((! parent) && (core)) { 
180                         /*
181                          * the default class is already initialized
182                          * so only update the core structure
183                          */
184                         cls->core = core;                       
185                 } else {
186                         set_default_share(&shares);
187                         init_cpu_class(cls,&shares);
188                         cls->core = core;
189                         cls->parent = parent;                   
190                 }
191         } else
192                 printk(KERN_ERR"alloc_cpu_class failed\n");
193
194         return cls;
195 }               
196
197 void ckrm_cpu_class_queue_delete_sync(struct ckrm_cpu_class *clsptr);
198
199 static void ckrm_free_cpu_class(void *my_res) 
200 {                       
201         struct ckrm_cpu_class *cls = my_res, *parres, *childres;
202         ckrm_core_class_t *child = NULL;
203         int maxlimit;
204         int i;
205
206         if (!cls) 
207                 return;
208
209         /*the default class can't be freed*/
210         if (cls == get_default_cpu_class()) 
211                 return;
212
213         // Assuming there will be no children when this function is called
214         parres = ckrm_get_cpu_class(cls->parent);
215
216         // return child's limit/guarantee to parent node
217         spin_lock(&parres->cnt_lock);
218         child_guarantee_changed(&parres->shares, cls->shares.my_guarantee, 0);
219         // run thru parent's children and get the new max_limit of the parent
220         ckrm_lock_hier(parres->core);
221         maxlimit = 0;
222         while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
223                 childres = ckrm_get_cpu_class(child);
224                 if (maxlimit < childres->shares.my_limit) {
225                         maxlimit = childres->shares.my_limit;
226                 }
227         }
228         ckrm_unlock_hier(parres->core);
229         if (parres->shares.cur_max_limit < maxlimit) {
230                 parres->shares.cur_max_limit = maxlimit;
231         }
232
233         spin_unlock(&parres->cnt_lock);
234
235         write_lock(&class_list_lock);
236         list_del(&cls->links);
237         write_unlock(&class_list_lock);
238
239         ckrm_cpu_class_queue_delete_sync(cls);
240
241         for_each_cpu(i) {
242                 ckrm_lrq_t *lrq = get_ckrm_lrq(cls,i);
243                 if (!lrq) continue;
244                 lrq->magic = -99;
245                 kfree(lrq);
246         }
247         kfree(cls);
248
249         //call ckrm_cpu_monitor after class is removed
250         if (ckrm_cpu_enabled())
251                 update_class_effectives();
252 }                               
253
254 /*
255  *  the system will adjust to the new share automatically  
256  */                     
257 int ckrm_cpu_set_share(void *my_res, struct ckrm_shares *new_share) 
258 {       
259         struct ckrm_cpu_class *parres, *cls = my_res;
260         struct ckrm_shares *cur = &cls->shares, *par;
261         int rc = -EINVAL;
262
263         if (ckrm_cpu_disabled())
264                 return -ENOSYS;
265         if (!cls)
266                 return rc;
267         if (new_share->total_guarantee > CKRM_SHARE_MAX)
268                 return -E2BIG;
269
270         if (cls->parent) {
271                 parres = ckrm_get_cpu_class(cls->parent);
272                 spin_lock(&parres->cnt_lock);
273                 spin_lock(&cls->cnt_lock);
274                 par = &parres->shares;
275         } else {
276                 spin_lock(&cls->cnt_lock);
277                 par = NULL;
278                 parres = NULL;
279         }
280
281         /*
282          * hzheng: CKRM_SHARE_DONTCARE should be handled
283          */
284         if (new_share->my_guarantee == CKRM_SHARE_DONTCARE)
285                 new_share->my_guarantee = 0;
286
287         rc = set_shares(new_share, cur, par);
288         if (!rc && cur->my_limit == CKRM_SHARE_DONTCARE)
289                 cur->my_limit = cur->max_limit;
290
291
292         spin_unlock(&cls->cnt_lock);
293         if (cls->parent) {
294                 spin_unlock(&parres->cnt_lock);
295         }
296
297         //call ckrm_cpu_monitor after changes are changed
298         update_class_effectives();
299
300         return rc;
301 }                                                       
302                         
303 static int ckrm_cpu_get_share(void *my_res,
304                               struct ckrm_shares *shares)
305 {                       
306         struct ckrm_cpu_class *cls = my_res;
307
308         if (ckrm_cpu_disabled())
309                 return -ENOSYS;
310         if (!cls)
311                 return -EINVAL;
312
313         *shares = cls->shares;
314         return 0;
315 }                               
316
317 /*
318  *   get_ckrm_usage():
319  *     obtain a sequence of <num> usage informations
320  *     returns number of usages reported.
321  *
322  *     report IN:  specifies the sequence of jiffies for which to report
323  *                 must be ordered (smallest first)
324  *            OUT: returns the usage in each field
325  *
326  */
327
328
329 int ckrm_cpu_get_usage(struct ckrm_cpu_class* clsptr, 
330                        int num, ulong report[])
331 {
332         struct ckrm_usage* usage = &clsptr->usage;
333         unsigned long long total = 0;
334         int i, idx, cur, num_ofs;
335
336         num_ofs = cur = i = 0;
337         idx = usage->sample_pointer;    
338
339         for ( num_ofs = 0; num_ofs < num ; num_ofs++ ) {
340                 int nr_samples;
341                 int duration = report[num_ofs]; 
342                 unsigned long long totval = 0;
343
344                 nr_samples = duration/USAGE_SAMPLE_FREQ?:1;
345                 
346                 if (nr_samples > USAGE_MAX_HISTORY)
347                         nr_samples = USAGE_MAX_HISTORY;
348
349                 for ( ; i< nr_samples; i++) {
350                         if (! idx)
351                                 idx = USAGE_MAX_HISTORY;
352                         idx --;
353                         total += usage->samples[idx];
354                 }
355                 totval = total * 1000;
356                 do_div(totval,NS_PER_SAMPLE);
357                 do_div(totval,nr_samples * cpus_weight(cpu_online_map));
358                 report[num_ofs] = totval;
359         }
360
361         return num;
362 }
363
364 int ckrm_cpu_get_stats(void *my_res, struct seq_file * sfile)
365 {
366         struct ckrm_cpu_class *cls = my_res;
367         struct ckrm_cpu_class_stat* stat = &cls->stat;
368         ckrm_lrq_t* lrq;
369         int i;
370         ulong usage[3] = { 2*HZ, 10*HZ, 60*HZ };
371
372         if (!cls || ckrm_cpu_disabled()) 
373                 return -EINVAL;
374
375         ckrm_cpu_get_usage(cls,3,usage);
376
377         /* this will after full stabilization become the only cpu usage stats
378          */
379
380         seq_printf(sfile, "cpu-usage(2,10,60)= %lu %lu %lu\n",
381                    usage[0],usage[1],usage[2]);
382
383         if (usage_detail < 1) 
384                 return 0;
385
386         /* the extended statistics we can decide whether we want to make the 
387          * additional statistics available over config options
388          * eitherway they should be reported in a more concised form
389          * during stabilization, this is OK
390          */
391
392         seq_printf(sfile, "-------- CPU Class Status Start---------\n");
393         seq_printf(sfile, "Share:\n\tgrt= %d limit= %d total_grt= %d max_limit= %d\n",
394                    cls->shares.my_guarantee,
395                    cls->shares.my_limit,
396                    cls->shares.total_guarantee,
397                    cls->shares.max_limit);
398         seq_printf(sfile, "\tunused_grt= %d cur_max_limit= %d\n",
399                    cls->shares.unused_guarantee,
400                    cls->shares.cur_max_limit);
401
402         if (usage_detail < 2) 
403                 goto out;
404
405         seq_printf(sfile, "Effective:\n\tegrt= %d\n",stat->egrt);
406         seq_printf(sfile, "\tmegrt= %d\n",stat->megrt);
407         seq_printf(sfile, "\tehl= %d\n",stat->ehl);
408         seq_printf(sfile, "\tmehl= %d\n",stat->mehl);
409         seq_printf(sfile, "\teshare= %d\n",stat->eshare);
410         seq_printf(sfile, "\tmeshare= %d\n",stat->meshare);
411         seq_printf(sfile, "\tmax_demand= %lu\n",stat->max_demand);
412         seq_printf(sfile, "\ttotal_ns= %llu\n",stat->total_ns);
413         seq_printf(sfile, "\tusage(2,10,60)= %lu %lu %lu\n",
414                    usage[0],usage[1],usage[2]);
415
416         if (usage_detail < 3) 
417                 goto out;
418
419         /* provide per run queue information */
420         for_each_online_cpu(i) {
421                 lrq = get_ckrm_lrq(cls,i);              
422                 seq_printf(sfile, "\tlrq %d demand= %lu weight= %d "
423                            "lrq_load= %lu cvt= %llu sav= %llu\n",
424                            i,stat->local_stats[i].cpu_demand,
425                            local_class_weight(lrq),lrq->lrq_load,
426                            lrq->local_cvt,lrq->savings);
427         }
428
429 out:
430         seq_printf(sfile, "-------- CPU Class Status END ---------\n");
431         return 0;
432 }
433
434 /*
435  * task will remain in the same cpu but on a different local runqueue
436  */
437 void ckrm_cpu_change_class(void *task, void *old, void *new)
438 {               
439         struct task_struct *tsk = task;                    
440         struct ckrm_cpu_class *newcls = new;
441
442         /*sanity checking*/
443         if (!task || ! old || !new)
444                 return; 
445
446         if (ckrm_cpu_disabled())
447                 newcls = get_default_cpu_class();
448         _ckrm_cpu_change_class(tsk,newcls);
449 }                                                       
450
451 enum config_token_t {
452         config_usage_detail,   /* define usage level                      */
453         config_disable,        /* always use default linux scheduling     */
454                                /* effectively disables the ckrm scheduler */
455         config_enable,         /* always uses ckrm scheduling behavior    */
456         config_err             /* parsing error */
457 };
458
459 #define CKRM_SCHED_MODE_DISABLED_STR "disabled"
460 #define CKRM_SCHED_MODE_ENABLED_STR  "enabled"
461
462 static char *ckrm_sched_mode_str[] = { 
463                 CKRM_SCHED_MODE_DISABLED_STR,
464                 CKRM_SCHED_MODE_ENABLED_STR
465 };
466
467 static match_table_t config_tokens = {
468         { config_disable,      "mode="CKRM_SCHED_MODE_DISABLED_STR },
469         { config_enable,       "mode="CKRM_SCHED_MODE_ENABLED_STR  },
470         { config_usage_detail, "usage_detail=%u"                   },
471         { config_err,          NULL                                }
472 };
473
474 static int ckrm_cpu_show_config(void *my_res, struct seq_file *sfile)
475 {
476         struct ckrm_cpu_class *cls = my_res;
477
478         if (!cls) 
479                 return -EINVAL;
480
481         seq_printf(sfile, "res=%s,mode=%s",
482                    CPU_CTRL_NAME,ckrm_sched_mode_str[ckrm_sched_mode]);
483         if (!ckrm_cpu_disabled())  /* enabled || mixed */
484                 seq_printf(sfile, ",usage_detail=%u",usage_detail);
485         seq_printf(sfile,"\n");
486         return 0;
487 }
488
489 static int ckrm_cpu_set_config(void *my_res, const char *cfgstr)
490 {
491         struct ckrm_cpu_class *cls = my_res;
492         char *p;
493         char **cfgstr_p = (char**)&cfgstr;
494         substring_t args[MAX_OPT_ARGS];
495         int option,rc;
496         enum ckrm_sched_mode new_sched_mode;
497
498         if (!cls) 
499                 return -EINVAL;
500
501         new_sched_mode = ckrm_sched_mode;       
502         rc = 0;
503
504         while ((p = strsep(cfgstr_p, ",")) != NULL) {
505                 int token;
506                 if (!*p)
507                         continue;
508                 
509                 token = match_token(p, config_tokens, args);
510                 switch (token) {
511                 case config_usage_detail:
512                         if (ckrm_cpu_disabled() || 
513                             (match_int(&args[0], &option)) ||
514                             (option > CKRM_CPU_USAGE_DETAIL_MAX))
515                         {
516                                 return -EINVAL;
517                         }
518                         usage_detail = option;
519                         break;
520                 case config_disable:
521                         new_sched_mode = CKRM_SCHED_MODE_DISABLED;
522                         break;
523                 case config_enable:
524                         new_sched_mode = CKRM_SCHED_MODE_ENABLED;
525                         break;
526                 case config_err:
527                         return -EINVAL;
528                 }
529         }
530         rc = ckrm_cpu_set_mode(new_sched_mode);
531         return rc;
532 }
533         
534 struct ckrm_res_ctlr cpu_rcbs = {
535         .res_name          = CPU_CTRL_NAME,
536         .res_hdepth        = 1,
537         .resid             = -1,
538         .res_alloc         = ckrm_alloc_cpu_class,
539         .res_free          = ckrm_free_cpu_class,
540         .set_share_values  = ckrm_cpu_set_share,
541         .get_share_values  = ckrm_cpu_get_share,
542         .get_stats         = ckrm_cpu_get_stats,
543         .show_config       = ckrm_cpu_show_config,
544         .set_config        = ckrm_cpu_set_config,
545         .change_resclass   = ckrm_cpu_change_class,
546 };
547
548 int __init init_ckrm_sched_res(void)
549 {
550         struct ckrm_classtype *clstype;
551         int resid = cpu_rcbs.resid;
552
553         clstype = ckrm_find_classtype_by_name("taskclass");
554         if (clstype == NULL) {
555                 printk(KERN_INFO" Unknown ckrm classtype<taskclass>");
556                 return -ENOENT;
557         }
558
559         if (resid == -1) { /*not registered */
560                 resid = ckrm_register_res_ctlr(clstype,&cpu_rcbs);
561                 printk(KERN_DEBUG "........init_ckrm_sched_res , resid= %d\n",resid);
562         }
563         return 0;
564 }
565
566 /*
567  * initialize the class structure
568  * add the default class: class 0
569  */
570 void init_cpu_classes(void) 
571 {
572         int i;
573
574         //init classqueues for each processor
575         for (i=0; i < NR_CPUS; i++)
576                 classqueue_init(get_cpu_classqueue(i),ckrm_cpu_enabled()); 
577
578         ckrm_alloc_cpu_class(NULL,NULL);
579 }
580
581 void ckrm_cpu_class_queue_update(int on);
582 void ckrm_cpu_start_monitor(void);
583 void ckrm_cpu_kill_monitor(void);
584
585 static int ckrm_cpu_set_mode(enum ckrm_sched_mode mode) 
586 {
587         struct task_struct *proc, *tsk;
588         struct ckrm_cpu_class *new_cls = NULL;
589         int i;
590
591         if (mode == ckrm_sched_mode)
592                 return 0;
593
594         printk("ckrm_cpu_set_mode from <%s> to <%s> pid=%d\n",
595                    ckrm_sched_mode_str[ckrm_sched_mode],
596                    ckrm_sched_mode_str[mode], 
597                    current->pid);
598
599         if (mode == CKRM_SCHED_MODE_DISABLED) {
600                 ckrm_cpu_kill_monitor();
601                 new_cls = get_default_cpu_class();
602         } else {
603                 ckrm_cpu_class_queue_update(1);
604         }
605                              
606         /* run twice through the list to catch everyone,
607          * current and transient once
608          */
609
610         read_lock(&tasklist_lock);
611
612         ckrm_sched_mode = mode;
613         /* we have to run through the list twice
614          * first catch all existing tasks
615          * and then deal with some potential race condition
616          */
617         for ( i=2 ; i-- ; ) {
618                 /* lock class_list_lock ? */
619         
620                 do_each_thread(proc, tsk) {
621                         if (mode == CKRM_SCHED_MODE_ENABLED) {
622                                 new_cls = ckrm_get_res_class(class_core(tsk->taskclass),
623                                                              cpu_rcbs.resid,
624                                                              struct ckrm_cpu_class);
625                         }       
626                         _ckrm_cpu_change_class(tsk,new_cls);
627                 } while_each_thread(proc, tsk);
628         }
629         read_unlock(&tasklist_lock);
630
631         if (mode == CKRM_SCHED_MODE_DISABLED) 
632                 ckrm_cpu_class_queue_update(0);
633         else 
634                 ckrm_cpu_start_monitor();
635         return 0;
636 }
637
638 EXPORT_SYMBOL(ckrm_get_cpu_class);
639
640
641