1 /* linux/drivers/block/ckrm_io.c : Block I/O Resource Controller for CKRM
3 * Copyright (C) Shailabh Nagar, IBM Corp. 2004
6 * Provides best-effort block I/O bandwidth control for CKRM
7 * This file provides the CKRM API. The underlying scheduler is a
8 * modified Complete-Fair Queueing (CFQ) iosched.
10 * Latest version, more details at http://ckrm.sf.net
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
22 * Third complete rewrite for CKRM's current API
26 #include <linux/module.h>
27 #include <linux/slab.h>
28 #include <linux/string.h>
29 #include <asm/errno.h>
30 #include <asm/div64.h>
31 #include <linux/list.h>
32 #include <linux/spinlock.h>
35 #include <linux/ckrm_tc.h>
36 #include <linux/ckrm-io.h>
38 /* Tie to cfq priorities */
39 #define CKI_IOPRIO_NORM IOPRIO_NORM
41 /* Divisor to get fraction of bandwidth represented by an IOPRIO value */
42 /* FIXME: Will not work if IOPRIO_NR > 100 */
43 #define CKI_IOPRIO_DIV (IOPRIO_NR-1)
44 /* Minimum ioprio value to be assigned to a class */
45 #define CKI_IOPRIO_MIN 1
47 #define CKI_IOUSAGE_UNIT 512
49 typedef struct ckrm_io_stats{
50 struct timeval epochstart ; /* all measurements relative to this
52 unsigned long blksz; /* size of bandwidth unit */
53 atomic_t blkrd; /* read units submitted to DD */
54 atomic_t blkwr; /* write units submitted to DD */
56 } cki_stats_t; /* per class I/O statistics */
59 * Currently local unit == CFQ I/O priority directly.
60 * CFQ ionice values have an implied bandwidth share so they
61 * can be added, subdivided etc. as long as the initial allocation
62 * of the systemwide default's total is set to the highest CFQ ionice
63 * value (== 100% of disk bandwidth)
66 typedef struct ckrm_io_class {
68 struct ckrm_core_class *core;
69 struct ckrm_core_class *parent;
71 struct ckrm_shares shares;
72 spinlock_t shares_lock; /* protect share changes */
74 /* Absolute shares of this class
78 int cnt_guarantee; /* Allocation as parent */
79 int cnt_unused; /* Allocation to default subclass */
81 /* Statistics, for class and default subclass */
89 /* Internal functions */
90 static inline void cki_reset_stats(cki_stats_t *usg);
91 static inline void init_icls_one(cki_icls_t *icls);
92 static inline int cki_div(int *a, int b, int c);
93 //static inline int cki_recalc(cki_icls_t *icls, int rel2abs);
94 static void cki_recalc_propagate(cki_icls_t *res, cki_icls_t *parres);
96 extern void cki_cfq_set(icls_tsk_t tskicls, icls_ioprio_t tskioprio);
98 /* CKRM Resource Controller API functions */
99 static void * cki_alloc(struct ckrm_core_class *this,
100 struct ckrm_core_class * parent);
101 static void cki_free(void *res);
102 static int cki_setshare(void *res, struct ckrm_shares * shares);
103 static int cki_getshare(void *res, struct ckrm_shares * shares);
104 static int cki_getstats(void *res, struct seq_file *);
105 static int cki_resetstats(void *res);
106 static int cki_showconfig(void *res, struct seq_file *sfile);
107 static int cki_setconfig(void *res, const char *cfgstr);
108 static void cki_chgcls(void *tsk, void *oldres, void *newres);
111 struct ckrm_res_ctlr cki_rcbs;
113 static inline void cki_reset_stats(cki_stats_t *stats)
116 atomic_set(&stats->blkrd,0);
117 atomic_set(&stats->blkwr,0);
121 static inline void init_icls_stats(cki_icls_t *icls)
125 do_gettimeofday(&tv);
126 icls->stats.epochstart = icls->mystats.epochstart = tv;
127 icls->stats.blksz = icls->mystats.blksz = CKI_IOUSAGE_UNIT;
128 cki_reset_stats(&icls->stats);
129 cki_reset_stats(&icls->mystats);
132 /* Initialize icls to default values
133 * No other classes touched, locks not reinitialized.
136 static inline void init_icls_one(cki_icls_t *icls)
138 // Assign zero as initial guarantee otherwise creations
139 // could fail due to inadequate share
141 //icls->shares.my_guarantee =
142 // (CKI_IOPRIO_MIN * CKRM_SHARE_DFLT_TOTAL_GUARANTEE) /
144 icls->shares.my_guarantee = 0;
145 icls->shares.my_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
146 icls->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
147 icls->shares.max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
149 icls->shares.unused_guarantee = icls->shares.total_guarantee -
150 icls->shares.my_guarantee;
151 icls->shares.cur_max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
154 icls->cnt_guarantee = icls->cnt_unused = IOPRIO_IDLE;
156 //Same rationale icls->ioprio = CKI_IOPRIO_MIN;
157 //IOPRIO_IDLE equivalence to zero my_guarantee (set above) relies
158 //on former being zero.
160 init_icls_stats(icls);
164 static inline int cki_div(int *a, int b, int c)
166 u64 temp = (u64) b * c ;
167 do_div(temp,CKI_IOPRIO_DIV);
174 /* Recalculate absolute shares from relative (rel2abs=1)
175 * or vice versa (rel2abs=0)
176 * Caller should have a lock on icls
179 static void cki_recalc_propagate(cki_icls_t *res, cki_icls_t *parres)
182 ckrm_core_class_t *child = NULL;
183 cki_icls_t *childres;
184 int resid = cki_rcbs.resid;
187 struct ckrm_shares *par = &parres->shares;
188 struct ckrm_shares *self = &res->shares;
192 if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE) {
193 res->cnt_guarantee = CKRM_SHARE_DONTCARE;
194 } else if (par->total_guarantee) {
195 u64 temp = (u64) self->my_guarantee *
196 parres->cnt_guarantee;
197 do_div(temp, par->total_guarantee);
198 res->cnt_guarantee = (int) temp;
200 res->cnt_guarantee = 0;
203 if (res->cnt_guarantee == CKRM_SHARE_DONTCARE) {
204 res->cnt_unused = CKRM_SHARE_DONTCARE;
205 } else if (self->total_guarantee) {
206 u64 temp = (u64) self->unused_guarantee *
208 do_div(temp, self->total_guarantee);
209 res->cnt_unused = (int) temp;
214 // propagate to children
215 ckrm_lock_hier(res->core);
216 while ((child = ckrm_get_next_child(res->core,child)) != NULL){
217 childres = ckrm_get_res_class(child, resid,
220 spin_lock(&childres->shares_lock);
221 cki_recalc_propagate(childres, res);
222 spin_unlock(&childres->shares_lock);
224 ckrm_unlock_hier(res->core);
228 static inline int cki_recalc(cki_icls_t *icls, int rel2abs)
232 if (icls->parent == NULL) {
233 /* Root, as parent, always gets all */
235 temp = icls->shares.my_guarantee * (IOPRIO_NR-1);
236 do_div(temp, icls->shares.total_guarantee);
238 icls->total = IOPRIO_NR-1;
239 icls->ioprio = temp ;
240 icls->unused = icls->total - icls->ioprio;
241 // icls->unused = (IOPRIO_NR-1)-icls->ioprio;
247 parres = ckrm_get_res_class(icls->parent,
251 printk(KERN_ERR "cki_recalc: error getting "
252 "resclass from core \n");
257 temp = (icls->shares.my_guarantee *
259 do_div(temp, parres->shares.total_guarantee);
271 void *cki_tsk_icls(struct task_struct *tsk)
273 return (void *) ckrm_get_res_class(class_core(tsk->taskclass),
274 cki_rcbs.resid, cki_icls_t);
277 int cki_tsk_ioprio(struct task_struct *tsk)
279 cki_icls_t *icls = ckrm_get_res_class(class_core(tsk->taskclass),
280 cki_rcbs.resid, cki_icls_t);
281 return icls->cnt_unused;
284 static void *cki_alloc(struct ckrm_core_class *core,
285 struct ckrm_core_class *parent)
289 icls = kmalloc(sizeof(cki_icls_t), GFP_ATOMIC);
291 printk(KERN_ERR "cki_res_alloc failed GFP_ATOMIC\n");
295 memset(icls, 0, sizeof(cki_icls_t));
297 icls->parent = parent;
298 icls->shares_lock = SPIN_LOCK_UNLOCKED;
300 if (parent == NULL) {
302 /* Root class gets same as "normal" CFQ priorities to
303 * retain compatibility of behaviour in the absence of
307 icls->cnt_guarantee = icls->cnt_unused = IOPRIO_NR-1;
309 /* Default gets normal, not minimum */
310 //icls->unused = IOPRIO_NORM;
311 //icls->unused = icls->guarantee-icls->myguarantee;
312 //icls->limit = icls->mylimit = IOPRIO_NR;
314 /* Compute shares in abstract units */
315 icls->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
317 // my_guarantee for root is meaningless. Set to default
318 icls->shares.my_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
320 icls->shares.unused_guarantee =
321 CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
323 //temp = (u64) icls->cnt_unused * icls->shares.total_guarantee;
324 //do_div(temp, CKI_IOPRIO_DIV);
325 // temp now has root's default's share
326 //icls->shares.unused_guarantee =
327 // icls->shares.total_guarantee - temp;
329 icls->shares.my_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
330 icls->shares.max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
331 icls->shares.cur_max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
335 /* No propagation to parent needed if icls'
336 initial share is zero */
338 try_module_get(THIS_MODULE);
342 static void cki_free(void *res)
344 cki_icls_t *icls = res, *parres;
349 /* Deallocate CFQ queues */
351 /* Currently CFQ queues are deallocated when empty. Since no task
352 * should belong to this icls, no new requests will get added to the
355 * When CFQ switches to persistent queues, call its "put" function
356 * so it gets deallocated after the last pending request is serviced.
360 parres = ckrm_get_res_class(icls->parent,
364 printk(KERN_ERR "cki_free: error getting "
365 "resclass from core \n");
369 /* Update parent's shares */
370 spin_lock(&parres->shares_lock);
371 child_guarantee_changed(&parres->shares, icls->shares.my_guarantee, 0);
372 parres->cnt_unused += icls->cnt_guarantee;
373 spin_unlock(&parres->shares_lock);
376 module_put(THIS_MODULE);
381 static int cki_setshare(void *res, struct ckrm_shares *new)
383 cki_icls_t *icls = res, *parres;
384 struct ckrm_shares *cur, *par;
385 int rc = -EINVAL, resid = cki_rcbs.resid;
388 printk(KERN_ERR "No class\n");
394 /* limits not supported */
395 if ((new->max_limit != CKRM_SHARE_UNCHANGED)
396 || (new->my_limit != CKRM_SHARE_UNCHANGED)) {
397 printk(KERN_ERR "limits not supported\n");
403 ckrm_get_res_class(icls->parent, resid, cki_icls_t);
405 printk(KERN_ERR "cki_setshare: error getting "
406 "resclass from core \n");
409 spin_lock(&parres->shares_lock);
410 spin_lock(&icls->shares_lock);
411 par = &parres->shares;
413 spin_lock(&icls->shares_lock);
418 rc = set_shares(new, cur, par);
419 printk(KERN_ERR "rc from set_shares %d\n", rc);
421 if ((!rc) && parres) {
423 if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE) {
424 parres->cnt_unused = CKRM_SHARE_DONTCARE;
425 } else if (par->total_guarantee) {
426 u64 temp = (u64) par->unused_guarantee *
427 parres->cnt_guarantee;
428 do_div(temp, par->total_guarantee);
429 parres->cnt_unused = (int) temp;
431 parres->cnt_unused = 0;
433 cki_recalc_propagate(res, parres);
436 int old = icls->ioprio;
438 rc = cki_recalc(icls,0);
441 int raise_tot = icls->ioprio - old ;
442 parres->unused -= raise_tot ;
446 spin_unlock(&icls->shares_lock);
448 spin_unlock(&parres->shares_lock);
453 static int cki_getshare(void *res, struct ckrm_shares * shares)
455 cki_icls_t *icls = res;
459 *shares = icls->shares;
463 static int cki_getstats(void *res, struct seq_file *sfile)
465 cki_icls_t *icls = res;
471 seq_printf(sfile, "%d my_read\n",atomic_read(&icls->mystats.blkrd));
472 seq_printf(sfile, "%d my_write\n",atomic_read(&icls->mystats.blkwr));
473 seq_printf(sfile, "%d total_read\n",atomic_read(&icls->stats.blkrd));
474 seq_printf(sfile, "%d total_write\n",atomic_read(&icls->stats.blkwr));
477 seq_printf(sfile, "%d total ioprio\n",icls->cnt_guarantee);
478 seq_printf(sfile, "%d unused/default ioprio\n",icls->cnt_unused);
483 static int cki_resetstats(void *res)
485 cki_icls_t *icls = res;
490 init_icls_stats(icls);
494 static int cki_showconfig(void *res, struct seq_file *sfile)
499 static int cki_setconfig(void *res, const char *cfgstr)
504 static void cki_chgcls(void *tsk, void *oldres, void *newres)
506 /* cki_icls_t *oldicls = oldres, *newicls = newres; */
508 /* Nothing needs to be done
509 * Future requests from task will go to the new class's CFQ q
510 * Old ones will continue to get satisfied from the original q
512 * Once CFQ moves to a persistent queue model and if refcounts on
513 * icls's CFQ queues are used, a decrement op would be needed here
521 struct ckrm_res_ctlr cki_rcbs = {
525 .res_alloc = cki_alloc,
526 .res_free = cki_free,
527 .set_share_values = cki_setshare,
528 .get_share_values = cki_getshare,
529 .get_stats = cki_getstats,
530 .reset_stats = cki_resetstats,
531 .show_config = cki_showconfig,
532 .set_config = cki_setconfig,
533 .change_resclass = cki_chgcls,
538 int __init cki_init(void)
540 struct ckrm_classtype *clstype;
541 int resid = cki_rcbs.resid;
543 clstype = ckrm_find_classtype_by_name("taskclass");
544 if (clstype == NULL) {
545 printk(KERN_INFO "init_cki: classtype<taskclass> not found\n");
550 resid = ckrm_register_res_ctlr(clstype, &cki_rcbs);
552 cki_rcbs.classtype = clstype;
553 cki_cfq_set(cki_tsk_icls,cki_tsk_ioprio);
560 void __exit cki_exit(void)
562 ckrm_unregister_res_ctlr(&cki_rcbs);
564 cki_rcbs.classtype = NULL;
565 cki_cfq_set(NULL,NULL);
568 module_init(cki_init)
569 module_exit(cki_exit)
571 MODULE_AUTHOR("Shailabh Nagar <nagar@watson.ibm.com>");
572 MODULE_DESCRIPTION("CKRM Disk I/O Resource Controller");
573 MODULE_LICENSE("GPL");