1 /* linux/drivers/block/ckrm_io.c : Block I/O Resource Controller for CKRM
3 * Copyright (C) Shailabh Nagar, IBM Corp. 2004
6 * Provides best-effort block I/O bandwidth control for CKRM
7 * This file provides the CKRM API. The underlying scheduler is a
8 * modified Complete-Fair Queueing (CFQ) iosched.
10 * Latest version, more details at http://ckrm.sf.net
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
22 * Third complete rewrite for CKRM's current API
26 #include <linux/module.h>
27 #include <linux/slab.h>
28 #include <linux/string.h>
29 #include <asm/errno.h>
30 #include <asm/div64.h>
31 #include <linux/list.h>
32 #include <linux/spinlock.h>
35 #include <linux/ckrm_tc.h>
36 #include <linux/ckrm-io.h>
38 /* Tie to cfq priorities */
39 #define CKI_IOPRIO_NORM IOPRIO_NORM
41 /* Divisor to get fraction of bandwidth represented by an IOPRIO value */
42 /* FIXME: Will not work if IOPRIO_NR > 100 */
43 #define CKI_IOPRIO_DIV (IOPRIO_NR-1)
44 /* Minimum ioprio value to be assigned to a class */
45 #define CKI_IOPRIO_MIN 1
47 #define CKI_IOUSAGE_UNIT 512
49 typedef struct ckrm_io_stats{
50 struct timeval epochstart ; /* all measurements relative to this
52 unsigned long blksz; /* size of bandwidth unit */
53 atomic_t blkrd; /* read units submitted to DD */
54 atomic_t blkwr; /* write units submitted to DD */
56 } cki_stats_t; /* per class I/O statistics */
59 * Currently local unit == CFQ I/O priority directly.
60 * CFQ ionice values have an implied bandwidth share so they
61 * can be added, subdivided etc. as long as the initial allocation
62 * of the systemwide default's total is set to the highest CFQ ionice
63 * value (== 100% of disk bandwidth)
66 typedef struct ckrm_io_class {
68 struct ckrm_core_class *core;
69 struct ckrm_core_class *parent;
71 struct ckrm_shares shares;
72 spinlock_t shares_lock; /* protect share changes */
74 /* Absolute shares of this class
81 /* Statistics, for class and default subclass */
89 /* Internal functions */
90 static inline void cki_reset_stats(cki_stats_t *usg);
91 static inline void init_icls_one(cki_icls_t *icls);
92 static inline int cki_div(int *a, int b, int c);
93 static inline int cki_recalc(cki_icls_t *icls, int rel2abs);
95 #ifdef DOES_NOT_WORK_AND_NOT_NEEDED
96 /* External functions e.g. interface to ioscheduler */
97 inline void *cki_tsk_icls(struct task_struct *tsk);
98 inline int cki_tsk_ioprio(struct task_struct *tsk);
101 extern void cki_cfq_set(icls_tsk_t tskicls, icls_ioprio_t tskioprio);
103 /* CKRM Resource Controller API functions */
104 static void * cki_alloc(struct ckrm_core_class *this,
105 struct ckrm_core_class * parent);
106 static void cki_free(void *res);
107 static int cki_setshare(void *res, struct ckrm_shares * shares);
108 static int cki_getshare(void *res, struct ckrm_shares * shares);
109 static int cki_getstats(void *res, struct seq_file *);
110 static int cki_resetstats(void *res);
111 static int cki_showconfig(void *res, struct seq_file *sfile);
112 static int cki_setconfig(void *res, const char *cfgstr);
113 static void cki_chgcls(void *tsk, void *oldres, void *newres);
116 struct ckrm_res_ctlr cki_rcbs;
118 static inline void cki_reset_stats(cki_stats_t *stats)
121 atomic_set(&stats->blkrd,0);
122 atomic_set(&stats->blkwr,0);
126 static inline void init_icls_stats(cki_icls_t *icls)
130 do_gettimeofday(&tv);
131 icls->stats.epochstart = icls->mystats.epochstart = tv;
132 icls->stats.blksz = icls->mystats.blksz = CKI_IOUSAGE_UNIT;
133 cki_reset_stats(&icls->stats);
134 cki_reset_stats(&icls->mystats);
137 /* Initialize icls to default values
138 * No other classes touched, locks not reinitialized.
141 static inline void init_icls_one(cki_icls_t *icls)
143 icls->shares.my_guarantee =
144 (CKI_IOPRIO_MIN * CKRM_SHARE_DFLT_TOTAL_GUARANTEE) /
146 icls->shares.my_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
147 icls->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
148 icls->shares.max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
150 icls->shares.unused_guarantee = icls->shares.total_guarantee -
151 icls->shares.my_guarantee;
152 icls->shares.cur_max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
155 icls->ioprio = CKI_IOPRIO_MIN;
158 init_icls_stats(icls);
162 static inline int cki_div(int *a, int b, int c)
164 u64 temp = (u64) b * c ;
165 do_div(temp,CKI_IOPRIO_DIV);
172 /* Recalculate absolute shares from relative (rel2abs=1)
173 * or vice versa (rel2abs=0)
174 * Caller should have a lock on icls
177 static inline int cki_recalc(cki_icls_t *icls, int rel2abs)
181 if (icls->parent == NULL) {
182 /* Root, as parent, always gets all */
184 temp = icls->shares.my_guarantee * (IOPRIO_NR-1);
185 do_div(temp, icls->shares.total_guarantee);
187 icls->ioprio = temp ;
188 icls->unused = (IOPRIO_NR-1)-icls->ioprio;
194 parres = ckrm_get_res_class(icls->parent,
198 printk(KERN_ERR "cki_recalc: error getting "
199 "resclass from core \n");
203 partot = parres->ioprio + parres->unused;
205 temp = (icls->shares.my_guarantee * (parres->ioprio + parres->unused));
206 do_div(temp, parres->shares.total_guarantee);
218 inline void *cki_icls_tsk(struct task_struct *tsk)
220 return (void *) ckrm_get_res_class(class_core(tsk->taskclass),
221 cki_rcbs.resid, cki_icls_t);
224 inline int cki_icls_ioprio(struct task_struct *tsk)
226 cki_icls_t *icls = ckrm_get_res_class(class_core(tsk->taskclass),
227 cki_rcbs.resid, cki_icls_t);
231 static void *cki_alloc(struct ckrm_core_class *core,
232 struct ckrm_core_class *parent)
236 icls = kmalloc(sizeof(cki_icls_t), GFP_ATOMIC);
238 printk(KERN_ERR "cki_res_alloc failed GFP_ATOMIC\n");
242 memset(icls, 0, sizeof(cki_icls_t));
244 icls->parent = parent;
245 icls->shares_lock = SPIN_LOCK_UNLOCKED;
247 if (parent == NULL) {
250 /* Root class gets same as "normal" CFQ priorities to
251 * retain compatibility of behaviour in the absence of
255 icls->ioprio = IOPRIO_NORM;
256 icls->unused = (IOPRIO_NR-1)-IOPRIO_NORM;
258 /* Default gets normal, not minimum */
259 //icls->unused = IOPRIO_NORM;
260 //icls->unused = icls->guarantee-icls->myguarantee;
261 //icls->limit = icls->mylimit = IOPRIO_NR;
263 /* Compute shares in abstract units */
264 icls->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
265 temp = (u64) icls->ioprio * icls->shares.total_guarantee;
266 do_div(temp, CKI_IOPRIO_DIV);
267 icls->shares.my_guarantee = (int) temp;
269 //icls->shares.my_limit = CKRM_SHARE_DFLT_MAX_LIMIT;
270 //icls->shares.max_limit = CKRM_SHARE_DFLT_MAX_LIMIT;
271 icls->shares.my_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
272 icls->shares.max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
275 icls->shares.unused_guarantee =
276 icls->shares.total_guarantee -
277 icls->shares.my_guarantee;
278 //icls->shares.cur_max_limit = CKRM_SHARE_DFLT_MAX_LIMIT;
279 icls->shares.cur_max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
284 try_module_get(THIS_MODULE);
288 static void cki_free(void *res)
290 cki_icls_t *icls = res, *parres;
295 /* Deallocate CFQ queues */
297 /* Currently CFQ queues are deallocated when empty. Since no task
298 * should belong to this icls, no new requests will get added to the
301 * When CFQ switches to persistent queues, call its "put" function
302 * so it gets deallocated after the last pending request is serviced.
306 parres = ckrm_get_res_class(icls->parent,
310 printk(KERN_ERR "cki_free: error getting "
311 "resclass from core \n");
315 /* Update parent's shares */
316 spin_lock(&parres->shares_lock);
317 child_guarantee_changed(&parres->shares, icls->shares.my_guarantee, 0);
318 parres->unused += icls->ioprio;
319 spin_unlock(&parres->shares_lock);
322 module_put(THIS_MODULE);
327 static int cki_setshare(void *res, struct ckrm_shares *new)
329 cki_icls_t *icls = res, *parres;
330 struct ckrm_shares *cur, *par;
331 int rc = -EINVAL, resid = cki_rcbs.resid;
334 printk(KERN_ERR "No class\n");
340 /* limits not supported */
341 if ((new->max_limit != CKRM_SHARE_UNCHANGED)
342 || (new->my_limit != CKRM_SHARE_UNCHANGED)) {
343 printk(KERN_ERR "limits changed max_limit %d my_limit %d\n",
344 new->max_limit, new->my_limit);
351 ckrm_get_res_class(icls->parent, resid, cki_icls_t);
353 printk(KERN_ERR "cki_setshare: error getting "
354 "resclass from core \n");
357 spin_lock(&parres->shares_lock);
358 spin_lock(&icls->shares_lock);
359 par = &parres->shares;
361 spin_lock(&icls->shares_lock);
366 rc = set_shares(new, cur, par);
368 printk(KERN_ERR "rc from set_shares %d\n", rc);
371 int old = icls->ioprio;
372 rc = cki_recalc(icls,0);
375 int raise_tot = icls->ioprio - old ;
376 parres->unused += raise_tot ;
379 spin_unlock(&icls->shares_lock);
381 spin_unlock(&parres->shares_lock);
386 static int cki_getshare(void *res, struct ckrm_shares * shares)
388 cki_icls_t *icls = res;
392 *shares = icls->shares;
396 static int cki_getstats(void *res, struct seq_file *sfile)
398 cki_icls_t *icls = res;
404 seq_printf(sfile, "%d my_read\n",atomic_read(&icls->mystats.blkrd));
405 seq_printf(sfile, "%d my_write\n",atomic_read(&icls->mystats.blkwr));
406 seq_printf(sfile, "%d total_read\n",atomic_read(&icls->stats.blkrd));
407 seq_printf(sfile, "%d total_write\n",atomic_read(&icls->stats.blkwr));
410 seq_printf(sfile, "%d ioprio\n",icls->ioprio);
411 seq_printf(sfile, "%d unused\n",icls->unused);
416 static int cki_resetstats(void *res)
418 cki_icls_t *icls = res;
423 init_icls_stats(icls);
427 static int cki_showconfig(void *res, struct seq_file *sfile)
432 static int cki_setconfig(void *res, const char *cfgstr)
437 static void cki_chgcls(void *tsk, void *oldres, void *newres)
439 /* cki_icls_t *oldicls = oldres, *newicls = newres; */
441 /* Nothing needs to be done
442 * Future requests from task will go to the new class's CFQ q
443 * Old ones will continue to get satisfied from the original q
445 * Once CFQ moves to a persistent queue model and if refcounts on
446 * icls's CFQ queues are used, a decrement op would be needed here
454 struct ckrm_res_ctlr cki_rcbs = {
458 .res_alloc = cki_alloc,
459 .res_free = cki_free,
460 .set_share_values = cki_setshare,
461 .get_share_values = cki_getshare,
462 .get_stats = cki_getstats,
463 .reset_stats = cki_resetstats,
464 .show_config = cki_showconfig,
465 .set_config = cki_setconfig,
466 .change_resclass = cki_chgcls,
471 int __init cki_init(void)
473 struct ckrm_classtype *clstype;
474 int resid = cki_rcbs.resid;
476 clstype = ckrm_find_classtype_by_name("taskclass");
477 if (clstype == NULL) {
478 printk(KERN_INFO "init_cki: classtype<taskclass> not found\n");
483 resid = ckrm_register_res_ctlr(clstype, &cki_rcbs);
485 cki_rcbs.classtype = clstype;
486 cki_cfq_set(cki_icls_tsk,cki_icls_ioprio);
493 void __exit cki_exit(void)
495 ckrm_unregister_res_ctlr(&cki_rcbs);
497 cki_rcbs.classtype = NULL;
498 cki_cfq_set(NULL,NULL);
501 module_init(cki_init)
502 module_exit(cki_exit)
504 MODULE_AUTHOR("Shailabh Nagar <nagar@watson.ibm.com>");
505 MODULE_DESCRIPTION("CKRM Disk I/O Resource Controller");
506 MODULE_LICENSE("GPL");