1 /* linux/drivers/block/ckrm_io.c : Block I/O Resource Controller for CKRM
3 * Copyright (C) Shailabh Nagar, IBM Corp. 2004
6 * Provides best-effort block I/O bandwidth control for CKRM
7 * This file provides the CKRM API. The underlying scheduler is a
8 * modified Complete-Fair Queueing (CFQ) iosched.
10 * Latest version, more details at http://ckrm.sf.net
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
22 * Third complete rewrite for CKRM's current API
26 #include <linux/module.h>
27 #include <linux/slab.h>
28 #include <linux/string.h>
29 #include <asm/errno.h>
30 #include <asm/div64.h>
31 #include <linux/list.h>
32 #include <linux/spinlock.h>
35 #include <linux/ckrm_tc.h>
36 #include <linux/ckrm-io.h>
38 /* Tie to cfq priorities */
39 #define CKI_IOPRIO_NORM IOPRIO_NORM
41 /* Divisor to get fraction of bandwidth represented by an IOPRIO value */
42 /* FIXME: Will not work if IOPRIO_NR > 100 */
43 #define CKI_IOPRIO_DIV (IOPRIO_NR-1)
44 /* Minimum ioprio value to be assigned to a class */
45 #define CKI_IOPRIO_MIN 1
47 #define CKI_IOUSAGE_UNIT 512
49 typedef struct ckrm_io_stats{
50 struct timeval epochstart ; /* all measurements relative to this
52 unsigned long blksz; /* size of bandwidth unit */
53 atomic_t blkrd; /* read units submitted to DD */
54 atomic_t blkwr; /* write units submitted to DD */
56 } cki_stats_t; /* per class I/O statistics */
59 * Currently local unit == CFQ I/O priority directly.
60 * CFQ ionice values have an implied bandwidth share so they
61 * can be added, subdivided etc. as long as the initial allocation
62 * of the systemwide default's total is set to the highest CFQ ionice
63 * value (== 100% of disk bandwidth)
66 typedef struct ckrm_io_class {
68 struct ckrm_core_class *core;
69 struct ckrm_core_class *parent;
71 struct ckrm_shares shares;
72 spinlock_t shares_lock; /* protect share changes */
74 /* Absolute shares of this class
78 int cnt_guarantee; /* Allocation as parent */
79 int cnt_unused; /* Allocation to default subclass */
81 /* Statistics, for class and default subclass */
89 /* Internal functions */
90 static inline void cki_reset_stats(cki_stats_t *usg);
91 static inline void init_icls_one(cki_icls_t *icls);
92 static inline int cki_div(int *a, int b, int c);
93 //static inline int cki_recalc(cki_icls_t *icls, int rel2abs);
94 static void cki_recalc_propagate(cki_icls_t *res, cki_icls_t *parres);
96 /* External functions e.g. interface to ioscheduler */
97 void *cki_tsk_icls (struct task_struct *tsk);
98 int cki_tsk_ioprio (struct task_struct *tsk);
100 extern void cki_cfq_set(icls_tsk_t tskicls, icls_ioprio_t tskioprio);
102 /* CKRM Resource Controller API functions */
103 static void * cki_alloc(struct ckrm_core_class *this,
104 struct ckrm_core_class * parent);
105 static void cki_free(void *res);
106 static int cki_setshare(void *res, struct ckrm_shares * shares);
107 static int cki_getshare(void *res, struct ckrm_shares * shares);
108 static int cki_getstats(void *res, struct seq_file *);
109 static int cki_resetstats(void *res);
110 static int cki_showconfig(void *res, struct seq_file *sfile);
111 static int cki_setconfig(void *res, const char *cfgstr);
112 static void cki_chgcls(void *tsk, void *oldres, void *newres);
115 struct ckrm_res_ctlr cki_rcbs;
117 static inline void cki_reset_stats(cki_stats_t *stats)
120 atomic_set(&stats->blkrd,0);
121 atomic_set(&stats->blkwr,0);
125 static inline void init_icls_stats(cki_icls_t *icls)
129 do_gettimeofday(&tv);
130 icls->stats.epochstart = icls->mystats.epochstart = tv;
131 icls->stats.blksz = icls->mystats.blksz = CKI_IOUSAGE_UNIT;
132 cki_reset_stats(&icls->stats);
133 cki_reset_stats(&icls->mystats);
136 /* Initialize icls to default values
137 * No other classes touched, locks not reinitialized.
140 static inline void init_icls_one(cki_icls_t *icls)
142 // Assign zero as initial guarantee otherwise creations
143 // could fail due to inadequate share
145 //icls->shares.my_guarantee =
146 // (CKI_IOPRIO_MIN * CKRM_SHARE_DFLT_TOTAL_GUARANTEE) /
148 icls->shares.my_guarantee = 0;
149 icls->shares.my_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
150 icls->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
151 icls->shares.max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
153 icls->shares.unused_guarantee = icls->shares.total_guarantee -
154 icls->shares.my_guarantee;
155 icls->shares.cur_max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
158 icls->cnt_guarantee = icls->cnt_unused = IOPRIO_IDLE;
160 //Same rationale icls->ioprio = CKI_IOPRIO_MIN;
161 //IOPRIO_IDLE equivalence to zero my_guarantee (set above) relies
162 //on former being zero.
164 init_icls_stats(icls);
168 static inline int cki_div(int *a, int b, int c)
170 u64 temp = (u64) b * c ;
171 do_div(temp,CKI_IOPRIO_DIV);
178 /* Recalculate absolute shares from relative (rel2abs=1)
179 * or vice versa (rel2abs=0)
180 * Caller should have a lock on icls
183 static void cki_recalc_propagate(cki_icls_t *res, cki_icls_t *parres)
186 ckrm_core_class_t *child = NULL;
187 cki_icls_t *childres;
188 int resid = cki_rcbs.resid;
191 struct ckrm_shares *par = &parres->shares;
192 struct ckrm_shares *self = &res->shares;
196 if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE) {
197 res->cnt_guarantee = CKRM_SHARE_DONTCARE;
198 } else if (par->total_guarantee) {
199 u64 temp = (u64) self->my_guarantee *
200 parres->cnt_guarantee;
201 do_div(temp, par->total_guarantee);
202 res->cnt_guarantee = (int) temp;
204 res->cnt_guarantee = 0;
207 if (res->cnt_guarantee == CKRM_SHARE_DONTCARE) {
208 res->cnt_unused = CKRM_SHARE_DONTCARE;
209 } else if (self->total_guarantee) {
210 u64 temp = (u64) self->unused_guarantee *
212 do_div(temp, self->total_guarantee);
213 res->cnt_unused = (int) temp;
218 // propagate to children
219 ckrm_lock_hier(res->core);
220 while ((child = ckrm_get_next_child(res->core,child)) != NULL){
221 childres = ckrm_get_res_class(child, resid,
224 spin_lock(&childres->shares_lock);
225 cki_recalc_propagate(childres, res);
226 spin_unlock(&childres->shares_lock);
228 ckrm_unlock_hier(res->core);
232 static inline int cki_recalc(cki_icls_t *icls, int rel2abs)
236 if (icls->parent == NULL) {
237 /* Root, as parent, always gets all */
239 temp = icls->shares.my_guarantee * (IOPRIO_NR-1);
240 do_div(temp, icls->shares.total_guarantee);
242 icls->total = IOPRIO_NR-1;
243 icls->ioprio = temp ;
244 icls->unused = icls->total - icls->ioprio;
245 // icls->unused = (IOPRIO_NR-1)-icls->ioprio;
251 parres = ckrm_get_res_class(icls->parent,
255 printk(KERN_ERR "cki_recalc: error getting "
256 "resclass from core \n");
261 temp = (icls->shares.my_guarantee *
263 do_div(temp, parres->shares.total_guarantee);
275 void *cki_tsk_icls(struct task_struct *tsk)
277 return (void *) ckrm_get_res_class(class_core(tsk->taskclass),
278 cki_rcbs.resid, cki_icls_t);
281 int cki_tsk_ioprio(struct task_struct *tsk)
283 cki_icls_t *icls = ckrm_get_res_class(class_core(tsk->taskclass),
284 cki_rcbs.resid, cki_icls_t);
285 return icls->cnt_unused;
288 static void *cki_alloc(struct ckrm_core_class *core,
289 struct ckrm_core_class *parent)
293 icls = kmalloc(sizeof(cki_icls_t), GFP_ATOMIC);
295 printk(KERN_ERR "cki_res_alloc failed GFP_ATOMIC\n");
299 memset(icls, 0, sizeof(cki_icls_t));
301 icls->parent = parent;
302 icls->shares_lock = SPIN_LOCK_UNLOCKED;
304 if (parent == NULL) {
306 /* Root class gets same as "normal" CFQ priorities to
307 * retain compatibility of behaviour in the absence of
311 icls->cnt_guarantee = icls->cnt_unused = IOPRIO_NR-1;
313 /* Default gets normal, not minimum */
314 //icls->unused = IOPRIO_NORM;
315 //icls->unused = icls->guarantee-icls->myguarantee;
316 //icls->limit = icls->mylimit = IOPRIO_NR;
318 /* Compute shares in abstract units */
319 icls->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
321 // my_guarantee for root is meaningless. Set to default
322 icls->shares.my_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
324 icls->shares.unused_guarantee =
325 CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
327 //temp = (u64) icls->cnt_unused * icls->shares.total_guarantee;
328 //do_div(temp, CKI_IOPRIO_DIV);
329 // temp now has root's default's share
330 //icls->shares.unused_guarantee =
331 // icls->shares.total_guarantee - temp;
333 icls->shares.my_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
334 icls->shares.max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
335 icls->shares.cur_max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
339 /* No propagation to parent needed if icls'
340 initial share is zero */
342 try_module_get(THIS_MODULE);
346 static void cki_free(void *res)
348 cki_icls_t *icls = res, *parres;
353 /* Deallocate CFQ queues */
355 /* Currently CFQ queues are deallocated when empty. Since no task
356 * should belong to this icls, no new requests will get added to the
359 * When CFQ switches to persistent queues, call its "put" function
360 * so it gets deallocated after the last pending request is serviced.
364 parres = ckrm_get_res_class(icls->parent,
368 printk(KERN_ERR "cki_free: error getting "
369 "resclass from core \n");
373 /* Update parent's shares */
374 spin_lock(&parres->shares_lock);
375 child_guarantee_changed(&parres->shares, icls->shares.my_guarantee, 0);
376 parres->cnt_unused += icls->cnt_guarantee;
377 spin_unlock(&parres->shares_lock);
380 module_put(THIS_MODULE);
385 static int cki_setshare(void *res, struct ckrm_shares *new)
387 cki_icls_t *icls = res, *parres;
388 struct ckrm_shares *cur, *par;
389 int rc = -EINVAL, resid = cki_rcbs.resid;
392 printk(KERN_ERR "No class\n");
398 /* limits not supported */
399 if ((new->max_limit != CKRM_SHARE_UNCHANGED)
400 || (new->my_limit != CKRM_SHARE_UNCHANGED)) {
401 printk(KERN_ERR "limits not supported\n");
407 ckrm_get_res_class(icls->parent, resid, cki_icls_t);
409 printk(KERN_ERR "cki_setshare: error getting "
410 "resclass from core \n");
413 spin_lock(&parres->shares_lock);
414 spin_lock(&icls->shares_lock);
415 par = &parres->shares;
417 spin_lock(&icls->shares_lock);
422 rc = set_shares(new, cur, par);
423 printk(KERN_ERR "rc from set_shares %d\n", rc);
425 if ((!rc) && parres) {
427 if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE) {
428 parres->cnt_unused = CKRM_SHARE_DONTCARE;
429 } else if (par->total_guarantee) {
430 u64 temp = (u64) par->unused_guarantee *
431 parres->cnt_guarantee;
432 do_div(temp, par->total_guarantee);
433 parres->cnt_unused = (int) temp;
435 parres->cnt_unused = 0;
437 cki_recalc_propagate(res, parres);
440 int old = icls->ioprio;
442 rc = cki_recalc(icls,0);
445 int raise_tot = icls->ioprio - old ;
446 parres->unused -= raise_tot ;
450 spin_unlock(&icls->shares_lock);
452 spin_unlock(&parres->shares_lock);
457 static int cki_getshare(void *res, struct ckrm_shares * shares)
459 cki_icls_t *icls = res;
463 *shares = icls->shares;
467 static int cki_getstats(void *res, struct seq_file *sfile)
469 cki_icls_t *icls = res;
475 seq_printf(sfile, "%d my_read\n",atomic_read(&icls->mystats.blkrd));
476 seq_printf(sfile, "%d my_write\n",atomic_read(&icls->mystats.blkwr));
477 seq_printf(sfile, "%d total_read\n",atomic_read(&icls->stats.blkrd));
478 seq_printf(sfile, "%d total_write\n",atomic_read(&icls->stats.blkwr));
481 seq_printf(sfile, "%d total ioprio\n",icls->cnt_guarantee);
482 seq_printf(sfile, "%d unused/default ioprio\n",icls->cnt_unused);
487 static int cki_resetstats(void *res)
489 cki_icls_t *icls = res;
494 init_icls_stats(icls);
498 static int cki_showconfig(void *res, struct seq_file *sfile)
503 static int cki_setconfig(void *res, const char *cfgstr)
508 static void cki_chgcls(void *tsk, void *oldres, void *newres)
510 /* cki_icls_t *oldicls = oldres, *newicls = newres; */
512 /* Nothing needs to be done
513 * Future requests from task will go to the new class's CFQ q
514 * Old ones will continue to get satisfied from the original q
516 * Once CFQ moves to a persistent queue model and if refcounts on
517 * icls's CFQ queues are used, a decrement op would be needed here
525 struct ckrm_res_ctlr cki_rcbs = {
529 .res_alloc = cki_alloc,
530 .res_free = cki_free,
531 .set_share_values = cki_setshare,
532 .get_share_values = cki_getshare,
533 .get_stats = cki_getstats,
534 .reset_stats = cki_resetstats,
535 .show_config = cki_showconfig,
536 .set_config = cki_setconfig,
537 .change_resclass = cki_chgcls,
542 int __init cki_init(void)
544 struct ckrm_classtype *clstype;
545 int resid = cki_rcbs.resid;
547 clstype = ckrm_find_classtype_by_name("taskclass");
548 if (clstype == NULL) {
549 printk(KERN_INFO "init_cki: classtype<taskclass> not found\n");
554 resid = ckrm_register_res_ctlr(clstype, &cki_rcbs);
556 cki_rcbs.classtype = clstype;
557 cki_cfq_set(cki_tsk_icls,cki_tsk_ioprio);
564 void __exit cki_exit(void)
566 ckrm_unregister_res_ctlr(&cki_rcbs);
568 cki_rcbs.classtype = NULL;
569 cki_cfq_set(NULL,NULL);
572 module_init(cki_init)
573 module_exit(cki_exit)
575 MODULE_AUTHOR("Shailabh Nagar <nagar@watson.ibm.com>");
576 MODULE_DESCRIPTION("CKRM Disk I/O Resource Controller");
577 MODULE_LICENSE("GPL");