This commit was manufactured by cvs2svn to create tag
[linux-2.6.git] / kernel / ckrm / ckrm_numtasks.c
1 /* ckrm_numtasks.c - "Number of tasks" resource controller for CKRM
2  *
3  * Copyright (C) Chandra Seetharaman,  IBM Corp. 2003
4  * 
5  * Latest version, more details at http://ckrm.sf.net
6  * 
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  */
13
14 /* Changes
15  * 
16  * 31 Mar 2004: Created
17  * 
18  */
19
20 /*
21  * Code Description: TBD
22  */
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/slab.h>
27 #include <asm/errno.h>
28 #include <asm/div64.h>
29 #include <linux/list.h>
30 #include <linux/spinlock.h>
31 #include <linux/parser.h>
32 #include <linux/ckrm_rc.h>
33 #include <linux/ckrm_tc.h>
34 #include <linux/ckrm_tsk.h>
35
36 #define DEF_TOTAL_NUM_TASKS (131072)    // 128 K
37 #define DEF_FORKRATE (1000000)                  // 1 million tasks
38 #define DEF_FORKRATE_INTERVAL (3600)    // per hour
39 #define NUMTASKS_DEBUG
40 #define NUMTASKS_NAME "numtasks"
41 #define SYS_TOTAL_TASKS "sys_total_tasks"
42 #define FORKRATE "forkrate"
43 #define FORKRATE_INTERVAL "forkrate_interval"
44
45 static int total_numtasks = DEF_TOTAL_NUM_TASKS;
46 static int total_cnt_alloc = 0;
47 static int forkrate = DEF_FORKRATE;
48 static int forkrate_interval = DEF_FORKRATE_INTERVAL;
49 static ckrm_core_class_t *root_core;
50
51 typedef struct ckrm_numtasks {
52         struct ckrm_core_class *core;   // the core i am part of...
53         struct ckrm_core_class *parent; // parent of the core above.
54         struct ckrm_shares shares;
55         spinlock_t cnt_lock;    // always grab parent's lock before child's
56         int cnt_guarantee;      // num_tasks guarantee in local units
57         int cnt_unused;         // has to borrow if more than this is needed
58         int cnt_limit;          // no tasks over this limit.
59         atomic_t cnt_cur_alloc; // current alloc from self
60         atomic_t cnt_borrowed;  // borrowed from the parent
61
62         int over_guarantee;     // turn on/off when cur_alloc goes 
63                                 // over/under guarantee
64
65         // internally maintained statictics to compare with max numbers
66         int limit_failures;     // # failures as request was over the limit
67         int borrow_sucesses;    // # successful borrows
68         int borrow_failures;    // # borrow failures
69
70         // Maximum the specific statictics has reached.
71         int max_limit_failures;
72         int max_borrow_sucesses;
73         int max_borrow_failures;
74
75         // Total number of specific statistics
76         int tot_limit_failures;
77         int tot_borrow_sucesses;
78         int tot_borrow_failures;
79
80         // fork rate fields
81         int forks_in_period;
82         unsigned long period_start;
83 } ckrm_numtasks_t;
84
85 struct ckrm_res_ctlr numtasks_rcbs;
86
87 /* Initialize rescls values
88  * May be called on each rcfs unmount or as part of error recovery
89  * to make share values sane.
90  * Does not traverse hierarchy reinitializing children.
91  */
92 static void numtasks_res_initcls_one(ckrm_numtasks_t * res)
93 {
94         res->shares.my_guarantee = CKRM_SHARE_DONTCARE;
95         res->shares.my_limit = CKRM_SHARE_DONTCARE;
96         res->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
97         res->shares.max_limit = CKRM_SHARE_DFLT_MAX_LIMIT;
98         res->shares.unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
99         res->shares.cur_max_limit = 0;
100
101         res->cnt_guarantee = CKRM_SHARE_DONTCARE;
102         res->cnt_unused = CKRM_SHARE_DONTCARE;
103         res->cnt_limit = CKRM_SHARE_DONTCARE;
104
105         res->over_guarantee = 0;
106
107         res->limit_failures = 0;
108         res->borrow_sucesses = 0;
109         res->borrow_failures = 0;
110
111         res->max_limit_failures = 0;
112         res->max_borrow_sucesses = 0;
113         res->max_borrow_failures = 0;
114
115         res->tot_limit_failures = 0;
116         res->tot_borrow_sucesses = 0;
117         res->tot_borrow_failures = 0;
118
119         res->forks_in_period = 0;
120         res->period_start = jiffies;
121
122         atomic_set(&res->cnt_cur_alloc, 0);
123         atomic_set(&res->cnt_borrowed, 0);
124         return;
125 }
126
127 #if 0
128 static void numtasks_res_initcls(void *my_res)
129 {
130         ckrm_numtasks_t *res = my_res;
131
132         /* Write a version which propagates values all the way down 
133            and replace rcbs callback with that version */
134
135 }
136 #endif
137
138 static int numtasks_get_ref_local(void *arg, int force)
139 {
140         int rc, resid = numtasks_rcbs.resid, borrowed = 0;
141         unsigned long now = jiffies, chg_at;
142         ckrm_numtasks_t *res;
143         ckrm_core_class_t *core = arg;
144
145         if ((resid < 0) || (core == NULL))
146                 return 1;
147
148         res = ckrm_get_res_class(core, resid, ckrm_numtasks_t);
149         if (res == NULL)
150                 return 1;
151
152         // force is not associated with fork. So, if force is specified
153         // we don't have to bother about forkrate.
154         if (!force) {
155                 // Take care of wraparound situation
156                 chg_at = res->period_start + forkrate_interval * HZ;
157                 if (chg_at < res->period_start) {
158                         chg_at += forkrate_interval * HZ;
159                         now += forkrate_interval * HZ;
160                 }
161                 if (chg_at <= now) {
162                         res->period_start = now;
163                         res->forks_in_period = 0;
164                 }
165         
166                 if (res->forks_in_period >= forkrate) {
167                         return 0;
168                 }
169         }
170
171         atomic_inc(&res->cnt_cur_alloc);
172
173         rc = 1;
174         if (((res->parent) && (res->cnt_unused == CKRM_SHARE_DONTCARE)) ||
175             (atomic_read(&res->cnt_cur_alloc) > res->cnt_unused)) {
176
177                 rc = 0;
178                 if (!force && (res->cnt_limit != CKRM_SHARE_DONTCARE) &&
179                     (atomic_read(&res->cnt_cur_alloc) > res->cnt_limit)) {
180                         res->limit_failures++;
181                         res->tot_limit_failures++;
182                 } else if (res->parent != NULL) {
183                         if ((rc =
184                              numtasks_get_ref_local(res->parent, force)) == 1) {
185                                 atomic_inc(&res->cnt_borrowed);
186                                 res->borrow_sucesses++;
187                                 res->tot_borrow_sucesses++;
188                                 res->over_guarantee = 1;
189                                 borrowed++;
190                         } else {
191                                 res->borrow_failures++;
192                                 res->tot_borrow_failures++;
193                         }
194                 } else {
195                         rc = force;
196                 }
197         } else if (res->over_guarantee) {
198                 res->over_guarantee = 0;
199
200                 if (res->max_limit_failures < res->limit_failures) {
201                         res->max_limit_failures = res->limit_failures;
202                 }
203                 if (res->max_borrow_sucesses < res->borrow_sucesses) {
204                         res->max_borrow_sucesses = res->borrow_sucesses;
205                 }
206                 if (res->max_borrow_failures < res->borrow_failures) {
207                         res->max_borrow_failures = res->borrow_failures;
208                 }
209                 res->limit_failures = 0;
210                 res->borrow_sucesses = 0;
211                 res->borrow_failures = 0;
212         }
213
214         if (!rc) {
215                 atomic_dec(&res->cnt_cur_alloc);
216         } else if (!borrowed) { 
217                 total_cnt_alloc++;
218                 if (!force) { // force is not associated with a real fork.
219                         res->forks_in_period++;
220                 }
221         }
222         return rc;
223 }
224
225 static void numtasks_put_ref_local(void *arg)
226 {
227         int resid = numtasks_rcbs.resid;
228         ckrm_numtasks_t *res;
229         ckrm_core_class_t *core = arg;
230
231         if ((resid == -1) || (core == NULL)) {
232                 return;
233         }
234
235         res = ckrm_get_res_class(core, resid, ckrm_numtasks_t);
236         if (res == NULL)
237                 return;
238         if (unlikely(atomic_read(&res->cnt_cur_alloc) == 0)) {
239                 printk(KERN_WARNING "numtasks_put_ref: Trying to decrement "
240                                         "counter below 0\n");
241                 return;
242         }
243         atomic_dec(&res->cnt_cur_alloc);
244         if (atomic_read(&res->cnt_borrowed) > 0) {
245                 atomic_dec(&res->cnt_borrowed);
246                 numtasks_put_ref_local(res->parent);
247         } else {
248                 total_cnt_alloc--;
249         }
250                 
251         return;
252 }
253
254 static void *numtasks_res_alloc(struct ckrm_core_class *core,
255                                 struct ckrm_core_class *parent)
256 {
257         ckrm_numtasks_t *res;
258
259         res = kmalloc(sizeof(ckrm_numtasks_t), GFP_ATOMIC);
260
261         if (res) {
262                 memset(res, 0, sizeof(ckrm_numtasks_t));
263                 res->core = core;
264                 res->parent = parent;
265                 numtasks_res_initcls_one(res);
266                 res->cnt_lock = SPIN_LOCK_UNLOCKED;
267                 if (parent == NULL) {
268                         // I am part of root class. So set the max tasks 
269                         // to available default
270                         res->cnt_guarantee = total_numtasks;
271                         res->cnt_unused = total_numtasks;
272                         res->cnt_limit = total_numtasks;
273                         root_core = core; // store the root core.
274                 }
275                 try_module_get(THIS_MODULE);
276         } else {
277                 printk(KERN_ERR
278                        "numtasks_res_alloc: failed GFP_ATOMIC alloc\n");
279         }
280         return res;
281 }
282
283 /*
284  * No locking of this resource class object necessary as we are not
285  * supposed to be assigned (or used) when/after this function is called.
286  */
287 static void numtasks_res_free(void *my_res)
288 {
289         ckrm_numtasks_t *res = my_res, *parres, *childres;
290         ckrm_core_class_t *child = NULL;
291         int i, borrowed, maxlimit, resid = numtasks_rcbs.resid;
292
293         if (!res)
294                 return;
295
296         // Assuming there will be no children when this function is called
297
298         parres = ckrm_get_res_class(res->parent, resid, ckrm_numtasks_t);
299
300         if (unlikely(atomic_read(&res->cnt_cur_alloc) < 0)) {
301                 printk(KERN_WARNING "numtasks_res: counter below 0\n");
302         }
303         if (unlikely(atomic_read(&res->cnt_cur_alloc) > 0 ||
304                                 atomic_read(&res->cnt_borrowed) > 0)) {
305                 printk(KERN_WARNING "numtasks_res_free: resource still "
306                        "alloc'd %p\n", res);
307                 if ((borrowed = atomic_read(&res->cnt_borrowed)) > 0) {
308                         for (i = 0; i < borrowed; i++) {
309                                 numtasks_put_ref_local(parres->core);
310                         }
311                 }
312         }
313         // return child's limit/guarantee to parent node
314         spin_lock(&parres->cnt_lock);
315         child_guarantee_changed(&parres->shares, res->shares.my_guarantee, 0);
316
317         // run thru parent's children and get the new max_limit of the parent
318         ckrm_lock_hier(parres->core);
319         maxlimit = 0;
320         while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
321                 childres = ckrm_get_res_class(child, resid, ckrm_numtasks_t);
322                 if (maxlimit < childres->shares.my_limit) {
323                         maxlimit = childres->shares.my_limit;
324                 }
325         }
326         ckrm_unlock_hier(parres->core);
327         if (parres->shares.cur_max_limit < maxlimit) {
328                 parres->shares.cur_max_limit = maxlimit;
329         }
330
331         spin_unlock(&parres->cnt_lock);
332         kfree(res);
333         module_put(THIS_MODULE);
334         return;
335 }
336
337
338 /*
339  * Recalculate the guarantee and limit in real units... and propagate the
340  * same to children.
341  * Caller is responsible for protecting res and for the integrity of parres
342  */
343 static void
344 recalc_and_propagate(ckrm_numtasks_t * res, ckrm_numtasks_t * parres)
345 {
346         ckrm_core_class_t *child = NULL;
347         ckrm_numtasks_t *childres;
348         int resid = numtasks_rcbs.resid;
349
350         if (parres) {
351                 struct ckrm_shares *par = &parres->shares;
352                 struct ckrm_shares *self = &res->shares;
353
354                 // calculate cnt_guarantee and cnt_limit
355                 //
356                 if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE) {
357                         res->cnt_guarantee = CKRM_SHARE_DONTCARE;
358                 } else if (par->total_guarantee) {
359                         u64 temp = (u64) self->my_guarantee * parres->cnt_guarantee;
360                         do_div(temp, par->total_guarantee);
361                         res->cnt_guarantee = (int) temp;
362                 } else {
363                         res->cnt_guarantee = 0;
364                 }
365
366                 if (parres->cnt_limit == CKRM_SHARE_DONTCARE) {
367                         res->cnt_limit = CKRM_SHARE_DONTCARE;
368                 } else if (par->max_limit) {
369                         u64 temp = (u64) self->my_limit * parres->cnt_limit;
370                         do_div(temp, par->max_limit);
371                         res->cnt_limit = (int) temp;
372                 } else {
373                         res->cnt_limit = 0;
374                 }
375
376                 // Calculate unused units
377                 if (res->cnt_guarantee == CKRM_SHARE_DONTCARE) {
378                         res->cnt_unused = CKRM_SHARE_DONTCARE;
379                 } else if (self->total_guarantee) {
380                         u64 temp = (u64) self->unused_guarantee * res->cnt_guarantee;
381                         do_div(temp, self->total_guarantee);
382                         res->cnt_unused = (int) temp;
383                 } else {
384                         res->cnt_unused = 0;
385                 }
386         }
387         // propagate to children
388         ckrm_lock_hier(res->core);
389         while ((child = ckrm_get_next_child(res->core, child)) != NULL) {
390                 childres = ckrm_get_res_class(child, resid, ckrm_numtasks_t);
391                 if (childres) {
392                     spin_lock(&childres->cnt_lock);
393                     recalc_and_propagate(childres, res);
394                     spin_unlock(&childres->cnt_lock);
395                 } else {
396                         printk(KERN_ERR "%s: numtasks resclass missing\n",__FUNCTION__);
397                 }
398         }
399         ckrm_unlock_hier(res->core);
400         return;
401 }
402
403 static int numtasks_set_share_values(void *my_res, struct ckrm_shares *new)
404 {
405         ckrm_numtasks_t *parres, *res = my_res;
406         struct ckrm_shares *cur = &res->shares, *par;
407         int rc = -EINVAL, resid = numtasks_rcbs.resid;
408
409         if (!res)
410                 return rc;
411
412         if (res->parent) {
413                 parres =
414                     ckrm_get_res_class(res->parent, resid, ckrm_numtasks_t);
415                 spin_lock(&parres->cnt_lock);
416                 spin_lock(&res->cnt_lock);
417                 par = &parres->shares;
418         } else {
419                 spin_lock(&res->cnt_lock);
420                 par = NULL;
421                 parres = NULL;
422         }
423
424         rc = set_shares(new, cur, par);
425
426         if ((rc == 0) && parres) {
427                 // Calculate parent's unused units
428                 if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE) {
429                         parres->cnt_unused = CKRM_SHARE_DONTCARE;
430                 } else if (par->total_guarantee) {
431                         u64 temp = (u64) par->unused_guarantee * parres->cnt_guarantee;
432                         do_div(temp, par->total_guarantee);
433                         parres->cnt_unused = (int) temp;
434                 } else {
435                         parres->cnt_unused = 0;
436                 }
437                 recalc_and_propagate(res, parres);
438         }
439         spin_unlock(&res->cnt_lock);
440         if (res->parent) {
441                 spin_unlock(&parres->cnt_lock);
442         }
443         return rc;
444 }
445
446 static int numtasks_get_share_values(void *my_res, struct ckrm_shares *shares)
447 {
448         ckrm_numtasks_t *res = my_res;
449
450         if (!res)
451                 return -EINVAL;
452         *shares = res->shares;
453         return 0;
454 }
455
456 static int numtasks_get_stats(void *my_res, struct seq_file *sfile)
457 {
458         ckrm_numtasks_t *res = my_res;
459
460         if (!res)
461                 return -EINVAL;
462
463         seq_printf(sfile, "Number of tasks resource:\n");
464         seq_printf(sfile, "Total Over limit failures: %d\n",
465                    res->tot_limit_failures);
466         seq_printf(sfile, "Total Over guarantee sucesses: %d\n",
467                    res->tot_borrow_sucesses);
468         seq_printf(sfile, "Total Over guarantee failures: %d\n",
469                    res->tot_borrow_failures);
470
471         seq_printf(sfile, "Maximum Over limit failures: %d\n",
472                    res->max_limit_failures);
473         seq_printf(sfile, "Maximum Over guarantee sucesses: %d\n",
474                    res->max_borrow_sucesses);
475         seq_printf(sfile, "Maximum Over guarantee failures: %d\n",
476                    res->max_borrow_failures);
477 #ifdef NUMTASKS_DEBUG
478         seq_printf(sfile,
479                    "cur_alloc %d; borrowed %d; cnt_guar %d; cnt_limit %d "
480                    "cnt_unused %d, unused_guarantee %d, cur_max_limit %d\n",
481                    atomic_read(&res->cnt_cur_alloc),
482                    atomic_read(&res->cnt_borrowed), res->cnt_guarantee,
483                    res->cnt_limit, res->cnt_unused,
484                    res->shares.unused_guarantee,
485                    res->shares.cur_max_limit);
486 #endif
487
488         return 0;
489 }
490
491 static int numtasks_show_config(void *my_res, struct seq_file *sfile)
492 {
493         ckrm_numtasks_t *res = my_res;
494
495         if (!res)
496                 return -EINVAL;
497
498         seq_printf(sfile, "res=%s,%s=%d,%s=%d,%s=%d\n", NUMTASKS_NAME,
499                         SYS_TOTAL_TASKS, total_numtasks,
500                         FORKRATE, forkrate,
501                         FORKRATE_INTERVAL, forkrate_interval);
502         return 0;
503 }
504
505 enum numtasks_token_t {
506         numtasks_token_total,
507         numtasks_token_forkrate,
508         numtasks_token_interval,
509         numtasks_token_err
510 };
511
512 static match_table_t numtasks_tokens = {
513         {numtasks_token_total, SYS_TOTAL_TASKS "=%d"},
514         {numtasks_token_forkrate, FORKRATE "=%d"},
515         {numtasks_token_interval, FORKRATE_INTERVAL "=%d"},
516         {numtasks_token_err, NULL},
517 };
518
519 static void reset_forkrates(ckrm_core_class_t *parent, unsigned long now)
520 {
521         ckrm_numtasks_t *parres;
522         ckrm_core_class_t *child = NULL;
523
524         parres = ckrm_get_res_class(parent, numtasks_rcbs.resid,
525                                  ckrm_numtasks_t);
526         if (!parres) {
527                 return;
528         }
529         parres->forks_in_period = 0;
530         parres->period_start = now;
531
532         ckrm_lock_hier(parent);
533         while ((child = ckrm_get_next_child(parent, child)) != NULL) {
534                 reset_forkrates(child, now);
535         }
536         ckrm_unlock_hier(parent);
537 }
538
539 static int numtasks_set_config(void *my_res, const char *cfgstr)
540 {
541         char *p;
542         ckrm_numtasks_t *res = my_res;
543         int new_total, fr = 0, itvl = 0, err = 0;
544
545         if (!res)
546                 return -EINVAL;
547
548         while ((p = strsep((char**)&cfgstr, ",")) != NULL) {
549                 substring_t args[MAX_OPT_ARGS];
550                 int token;
551                 if (!*p)
552                         continue;
553
554                 token = match_token(p, numtasks_tokens, args);
555                 switch (token) {
556                 case numtasks_token_total:
557                         if (match_int(args, &new_total) ||
558                                                 (new_total < total_cnt_alloc)) {
559                                 err = -EINVAL;
560                         } else {
561                                 total_numtasks = new_total;
562                         
563                                 // res is the default class, as config is present only
564                                 // in that directory
565                                 spin_lock(&res->cnt_lock);
566                                 res->cnt_guarantee = total_numtasks;
567                                 res->cnt_unused = total_numtasks;
568                                 res->cnt_limit = total_numtasks;
569                                 recalc_and_propagate(res, NULL);
570                                 spin_unlock(&res->cnt_lock);
571                         }
572                         break;
573                 case numtasks_token_forkrate:
574                         if (match_int(args, &fr) || (fr <= 0)) {
575                                 err = -EINVAL;
576                         } else {
577                                 forkrate = fr;
578                         }
579                         break;
580                 case numtasks_token_interval:
581                         if (match_int(args, &itvl) || (itvl <= 0)) {
582                                 err = -EINVAL;
583                         } else {
584                                 forkrate_interval = itvl;
585                         }
586                         break;
587                 default:
588                         err = -EINVAL;
589                 }
590         }
591         if ((fr > 0) || (itvl > 0)) {
592                 reset_forkrates(root_core, jiffies);
593         }
594         return err;
595 }
596
597 static void numtasks_change_resclass(void *task, void *old, void *new)
598 {
599         ckrm_numtasks_t *oldres = old;
600         ckrm_numtasks_t *newres = new;
601
602         if (oldres != (void *)-1) {
603                 struct task_struct *tsk = task;
604                 if (!oldres) {
605                         struct ckrm_core_class *old_core =
606                             &(tsk->parent->taskclass->core);
607                         oldres =
608                             ckrm_get_res_class(old_core, numtasks_rcbs.resid,
609                                                ckrm_numtasks_t);
610                 }
611                 numtasks_put_ref_local(oldres->core);
612         }
613         if (newres) {
614                 (void)numtasks_get_ref_local(newres->core, 1);
615         }
616 }
617
618 struct ckrm_res_ctlr numtasks_rcbs = {
619         .res_name = NUMTASKS_NAME,
620         .res_hdepth = 1,
621         .resid = -1,
622         .res_alloc = numtasks_res_alloc,
623         .res_free = numtasks_res_free,
624         .set_share_values = numtasks_set_share_values,
625         .get_share_values = numtasks_get_share_values,
626         .get_stats = numtasks_get_stats,
627         .show_config = numtasks_show_config,
628         .set_config = numtasks_set_config,
629         .change_resclass = numtasks_change_resclass,
630 };
631
632 int __init init_ckrm_numtasks_res(void)
633 {
634         struct ckrm_classtype *clstype;
635         int resid = numtasks_rcbs.resid;
636
637         clstype = ckrm_find_classtype_by_name("taskclass");
638         if (clstype == NULL) {
639                 printk(KERN_INFO " Unknown ckrm classtype<taskclass>");
640                 return -ENOENT;
641         }
642
643         if (resid == -1) {
644                 resid = ckrm_register_res_ctlr(clstype, &numtasks_rcbs);
645                 printk(KERN_DEBUG "........init_ckrm_numtasks_res -> %d\n", resid);
646                 if (resid != -1) {
647                         ckrm_numtasks_register(numtasks_get_ref_local,
648                                                numtasks_put_ref_local);
649                         numtasks_rcbs.classtype = clstype;
650                 }
651         }
652         return 0;
653 }
654
655 void __exit exit_ckrm_numtasks_res(void)
656 {
657         if (numtasks_rcbs.resid != -1) {
658                 ckrm_numtasks_register(NULL, NULL);
659         }
660         ckrm_unregister_res_ctlr(&numtasks_rcbs);
661         numtasks_rcbs.resid = -1;
662 }
663
664 module_init(init_ckrm_numtasks_res)
665     module_exit(exit_ckrm_numtasks_res)
666
667     MODULE_LICENSE("GPL");