From d79d5cae5e2a33c62ed0b67cd4c395e32e8d13a6 Mon Sep 17 00:00:00 2001 From: Planet-Lab Support Date: Mon, 13 Sep 2004 03:06:07 +0000 Subject: [PATCH] This commit was manufactured by cvs2svn to create branch 'ckrm'. --- drivers/block/cfq-iosched-orig.c | 706 +++++++++++++++++++++++++++++++ drivers/block/ckrm-io.c | 507 ++++++++++++++++++++++ drivers/block/ckrm-iostub.c | 64 +++ include/linux/ckrm-io.h | 43 ++ 4 files changed, 1320 insertions(+) create mode 100644 drivers/block/cfq-iosched-orig.c create mode 100644 drivers/block/ckrm-io.c create mode 100644 drivers/block/ckrm-iostub.c create mode 100644 include/linux/ckrm-io.h diff --git a/drivers/block/cfq-iosched-orig.c b/drivers/block/cfq-iosched-orig.c new file mode 100644 index 000000000..977d32ddd --- /dev/null +++ b/drivers/block/cfq-iosched-orig.c @@ -0,0 +1,706 @@ +/* + * linux/drivers/block/cfq-iosched.c + * + * CFQ, or complete fairness queueing, disk scheduler. + * + * Based on ideas from a previously unfinished io + * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli. + * + * Copyright (C) 2003 Jens Axboe + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * tunables + */ +static int cfq_quantum = 4; +static int cfq_queued = 8; + +#define CFQ_QHASH_SHIFT 6 +#define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT) +#define list_entry_qhash(entry) list_entry((entry), struct cfq_queue, cfq_hash) + +#define CFQ_MHASH_SHIFT 8 +#define CFQ_MHASH_BLOCK(sec) ((sec) >> 3) +#define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT) +#define CFQ_MHASH_FN(sec) (hash_long(CFQ_MHASH_BLOCK((sec)),CFQ_MHASH_SHIFT)) +#define ON_MHASH(crq) !list_empty(&(crq)->hash) +#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) +#define list_entry_hash(ptr) list_entry((ptr), struct cfq_rq, hash) + +#define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list) + +#define RQ_DATA(rq) ((struct cfq_rq *) (rq)->elevator_private) + +static kmem_cache_t *crq_pool; +static kmem_cache_t *cfq_pool; +static mempool_t *cfq_mpool; + +struct cfq_data { + struct list_head rr_list; + struct list_head *dispatch; + struct list_head *cfq_hash; + + struct list_head *crq_hash; + + unsigned int busy_queues; + unsigned int max_queued; + + mempool_t *crq_pool; +}; + +struct cfq_queue { + struct list_head cfq_hash; + struct list_head cfq_list; + struct rb_root sort_list; + int pid; + int queued[2]; +#if 0 + /* + * with a simple addition like this, we can do io priorities. almost. + * does need a split request free list, too. + */ + int io_prio +#endif +}; + +struct cfq_rq { + struct rb_node rb_node; + sector_t rb_key; + + struct request *request; + + struct cfq_queue *cfq_queue; + + struct list_head hash; +}; + +static void cfq_put_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq); +static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *cfqd, int pid); +static void cfq_dispatch_sort(struct list_head *head, struct cfq_rq *crq); + +/* + * lots of deadline iosched dupes, can be abstracted later... + */ +static inline void __cfq_del_crq_hash(struct cfq_rq *crq) +{ + list_del_init(&crq->hash); +} + +static inline void cfq_del_crq_hash(struct cfq_rq *crq) +{ + if (ON_MHASH(crq)) + __cfq_del_crq_hash(crq); +} + +static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq) +{ + cfq_del_crq_hash(crq); + + if (q->last_merge == crq->request) + q->last_merge = NULL; +} + +static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq) +{ + struct request *rq = crq->request; + + BUG_ON(ON_MHASH(crq)); + + list_add(&crq->hash, &cfqd->crq_hash[CFQ_MHASH_FN(rq_hash_key(rq))]); +} + +static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset) +{ + struct list_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)]; + struct list_head *entry, *next = hash_list->next; + + while ((entry = next) != hash_list) { + struct cfq_rq *crq = list_entry_hash(entry); + struct request *__rq = crq->request; + + next = entry->next; + + BUG_ON(!ON_MHASH(crq)); + + if (!rq_mergeable(__rq)) { + __cfq_del_crq_hash(crq); + continue; + } + + if (rq_hash_key(__rq) == offset) + return __rq; + } + + return NULL; +} + +/* + * rb tree support functions + */ +#define RB_NONE (2) +#define RB_EMPTY(node) ((node)->rb_node == NULL) +#define RB_CLEAR(node) ((node)->rb_color = RB_NONE) +#define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL) +#define ON_RB(node) ((node)->rb_color != RB_NONE) +#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node) +#define rq_rb_key(rq) (rq)->sector + +static inline void cfq_del_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) +{ + if (ON_RB(&crq->rb_node)) { + cfqq->queued[rq_data_dir(crq->request)]--; + rb_erase(&crq->rb_node, &cfqq->sort_list); + crq->cfq_queue = NULL; + } +} + +static struct cfq_rq * +__cfq_add_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) +{ + struct rb_node **p = &cfqq->sort_list.rb_node; + struct rb_node *parent = NULL; + struct cfq_rq *__crq; + + while (*p) { + parent = *p; + __crq = rb_entry_crq(parent); + + if (crq->rb_key < __crq->rb_key) + p = &(*p)->rb_left; + else if (crq->rb_key > __crq->rb_key) + p = &(*p)->rb_right; + else + return __crq; + } + + rb_link_node(&crq->rb_node, parent, p); + return 0; +} + +static void +cfq_add_crq_rb(struct cfq_data *cfqd, struct cfq_queue *cfqq,struct cfq_rq *crq) +{ + struct request *rq = crq->request; + struct cfq_rq *__alias; + + crq->rb_key = rq_rb_key(rq); + cfqq->queued[rq_data_dir(rq)]++; +retry: + __alias = __cfq_add_crq_rb(cfqq, crq); + if (!__alias) { + rb_insert_color(&crq->rb_node, &cfqq->sort_list); + crq->cfq_queue = cfqq; + return; + } + + cfq_del_crq_rb(cfqq, __alias); + cfq_dispatch_sort(cfqd->dispatch, __alias); + goto retry; +} + +static struct request * +cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector) +{ + struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->tgid); + struct rb_node *n; + + if (!cfqq) + goto out; + + n = cfqq->sort_list.rb_node; + while (n) { + struct cfq_rq *crq = rb_entry_crq(n); + + if (sector < crq->rb_key) + n = n->rb_left; + else if (sector > crq->rb_key) + n = n->rb_right; + else + return crq->request; + } + +out: + return NULL; +} + +static void cfq_remove_request(request_queue_t *q, struct request *rq) +{ + struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_rq *crq = RQ_DATA(rq); + + if (crq) { + struct cfq_queue *cfqq = crq->cfq_queue; + + cfq_remove_merge_hints(q, crq); + list_del_init(&rq->queuelist); + + if (cfqq) { + cfq_del_crq_rb(cfqq, crq); + + if (RB_EMPTY(&cfqq->sort_list)) + cfq_put_queue(cfqd, cfqq); + } + } +} + +static int +cfq_merge(request_queue_t *q, struct request **req, struct bio *bio) +{ + struct cfq_data *cfqd = q->elevator.elevator_data; + struct request *__rq; + int ret; + + ret = elv_try_last_merge(q, bio); + if (ret != ELEVATOR_NO_MERGE) { + __rq = q->last_merge; + goto out_insert; + } + + __rq = cfq_find_rq_hash(cfqd, bio->bi_sector); + if (__rq) { + BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector); + + if (elv_rq_merge_ok(__rq, bio)) { + ret = ELEVATOR_BACK_MERGE; + goto out; + } + } + + __rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio)); + if (__rq) { + if (elv_rq_merge_ok(__rq, bio)) { + ret = ELEVATOR_FRONT_MERGE; + goto out; + } + } + + return ELEVATOR_NO_MERGE; +out: + q->last_merge = __rq; +out_insert: + *req = __rq; + return ret; +} + +static void cfq_merged_request(request_queue_t *q, struct request *req) +{ + struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_rq *crq = RQ_DATA(req); + + cfq_del_crq_hash(crq); + cfq_add_crq_hash(cfqd, crq); + + if (ON_RB(&crq->rb_node) && (rq_rb_key(req) != crq->rb_key)) { + struct cfq_queue *cfqq = crq->cfq_queue; + + cfq_del_crq_rb(cfqq, crq); + cfq_add_crq_rb(cfqd, cfqq, crq); + } + + q->last_merge = req; +} + +static void +cfq_merged_requests(request_queue_t *q, struct request *req, + struct request *next) +{ + cfq_merged_request(q, req); + cfq_remove_request(q, next); +} + +static void cfq_dispatch_sort(struct list_head *head, struct cfq_rq *crq) +{ + struct list_head *entry = head; + struct request *__rq; + + if (!list_empty(head)) { + __rq = list_entry_rq(head->next); + + if (crq->request->sector < __rq->sector) { + entry = head->prev; + goto link; + } + } + + while ((entry = entry->prev) != head) { + __rq = list_entry_rq(entry); + + if (crq->request->sector <= __rq->sector) + break; + } + +link: + list_add_tail(&crq->request->queuelist, entry); +} + +static inline void +__cfq_dispatch_requests(request_queue_t *q, struct cfq_data *cfqd, + struct cfq_queue *cfqq) +{ + struct cfq_rq *crq = rb_entry_crq(rb_first(&cfqq->sort_list)); + + cfq_del_crq_rb(cfqq, crq); + cfq_remove_merge_hints(q, crq); + cfq_dispatch_sort(cfqd->dispatch, crq); +} + +static int cfq_dispatch_requests(request_queue_t *q, struct cfq_data *cfqd) +{ + struct cfq_queue *cfqq; + struct list_head *entry, *tmp; + int ret, queued, good_queues; + + if (list_empty(&cfqd->rr_list)) + return 0; + + queued = ret = 0; +restart: + good_queues = 0; + list_for_each_safe(entry, tmp, &cfqd->rr_list) { + cfqq = list_entry_cfqq(cfqd->rr_list.next); + + BUG_ON(RB_EMPTY(&cfqq->sort_list)); + + __cfq_dispatch_requests(q, cfqd, cfqq); + + if (RB_EMPTY(&cfqq->sort_list)) + cfq_put_queue(cfqd, cfqq); + else + good_queues++; + + queued++; + ret = 1; + } + + if ((queued < cfq_quantum) && good_queues) + goto restart; + + return ret; +} + +static struct request *cfq_next_request(request_queue_t *q) +{ + struct cfq_data *cfqd = q->elevator.elevator_data; + struct request *rq; + + if (!list_empty(cfqd->dispatch)) { + struct cfq_rq *crq; +dispatch: + rq = list_entry_rq(cfqd->dispatch->next); + + crq = RQ_DATA(rq); + if (crq) + cfq_remove_merge_hints(q, crq); + + return rq; + } + + if (cfq_dispatch_requests(q, cfqd)) + goto dispatch; + + return NULL; +} + +static inline struct cfq_queue * +__cfq_find_cfq_hash(struct cfq_data *cfqd, int pid, const int hashval) +{ + struct list_head *hash_list = &cfqd->cfq_hash[hashval]; + struct list_head *entry; + + list_for_each(entry, hash_list) { + struct cfq_queue *__cfqq = list_entry_qhash(entry); + + if (__cfqq->pid == pid) + return __cfqq; + } + + return NULL; +} + +static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *cfqd, int pid) +{ + const int hashval = hash_long(current->tgid, CFQ_QHASH_SHIFT); + + return __cfq_find_cfq_hash(cfqd, pid, hashval); +} + +static void cfq_put_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) +{ + cfqd->busy_queues--; + list_del(&cfqq->cfq_list); + list_del(&cfqq->cfq_hash); + mempool_free(cfqq, cfq_mpool); +} + +static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, int pid) +{ + const int hashval = hash_long(current->tgid, CFQ_QHASH_SHIFT); + struct cfq_queue *cfqq = __cfq_find_cfq_hash(cfqd, pid, hashval); + + if (!cfqq) { + cfqq = mempool_alloc(cfq_mpool, GFP_NOIO); + + INIT_LIST_HEAD(&cfqq->cfq_hash); + INIT_LIST_HEAD(&cfqq->cfq_list); + RB_CLEAR_ROOT(&cfqq->sort_list); + + cfqq->pid = pid; + cfqq->queued[0] = cfqq->queued[1] = 0; + list_add(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]); + } + + return cfqq; +} + +static void cfq_enqueue(struct cfq_data *cfqd, struct cfq_rq *crq) +{ + struct cfq_queue *cfqq; + + cfqq = cfq_get_queue(cfqd, current->tgid); + + cfq_add_crq_rb(cfqd, cfqq, crq); + + if (list_empty(&cfqq->cfq_list)) { + list_add(&cfqq->cfq_list, &cfqd->rr_list); + cfqd->busy_queues++; + } +} + +static void +cfq_insert_request(request_queue_t *q, struct request *rq, int where) +{ + struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_rq *crq = RQ_DATA(rq); + + switch (where) { + case ELEVATOR_INSERT_BACK: + while (cfq_dispatch_requests(q, cfqd)) + ; + list_add_tail(&rq->queuelist, cfqd->dispatch); + break; + case ELEVATOR_INSERT_FRONT: + list_add(&rq->queuelist, cfqd->dispatch); + break; + case ELEVATOR_INSERT_SORT: + BUG_ON(!blk_fs_request(rq)); + cfq_enqueue(cfqd, crq); + break; + default: + printk("%s: bad insert point %d\n", __FUNCTION__,where); + return; + } + + if (rq_mergeable(rq)) { + cfq_add_crq_hash(cfqd, crq); + + if (!q->last_merge) + q->last_merge = rq; + } +} + +static int cfq_queue_empty(request_queue_t *q) +{ + struct cfq_data *cfqd = q->elevator.elevator_data; + + if (list_empty(cfqd->dispatch) && list_empty(&cfqd->rr_list)) + return 1; + + return 0; +} + +static struct request * +cfq_former_request(request_queue_t *q, struct request *rq) +{ + struct cfq_rq *crq = RQ_DATA(rq); + struct rb_node *rbprev = rb_prev(&crq->rb_node); + + if (rbprev) + return rb_entry_crq(rbprev)->request; + + return NULL; +} + +static struct request * +cfq_latter_request(request_queue_t *q, struct request *rq) +{ + struct cfq_rq *crq = RQ_DATA(rq); + struct rb_node *rbnext = rb_next(&crq->rb_node); + + if (rbnext) + return rb_entry_crq(rbnext)->request; + + return NULL; +} + +static int cfq_may_queue(request_queue_t *q, int rw) +{ + struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_queue *cfqq; + int ret = 1; + + if (!cfqd->busy_queues) + goto out; + + cfqq = cfq_find_cfq_hash(cfqd, current->tgid); + if (cfqq) { + int limit = (q->nr_requests - cfq_queued) / cfqd->busy_queues; + + if (limit < 3) + limit = 3; + else if (limit > cfqd->max_queued) + limit = cfqd->max_queued; + + if (cfqq->queued[rw] > limit) + ret = 0; + } +out: + return ret; +} + +static void cfq_put_request(request_queue_t *q, struct request *rq) +{ + struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_rq *crq = RQ_DATA(rq); + + if (crq) { + BUG_ON(q->last_merge == rq); + BUG_ON(ON_MHASH(crq)); + + mempool_free(crq, cfqd->crq_pool); + rq->elevator_private = NULL; + } +} + +static int cfq_set_request(request_queue_t *q, struct request *rq, int gfp_mask) +{ + struct cfq_data *cfqd = q->elevator.elevator_data; + struct cfq_rq *crq = mempool_alloc(cfqd->crq_pool, gfp_mask); + + if (crq) { + RB_CLEAR(&crq->rb_node); + crq->request = rq; + crq->cfq_queue = NULL; + INIT_LIST_HEAD(&crq->hash); + rq->elevator_private = crq; + return 0; + } + + return 1; +} + +static void cfq_exit(request_queue_t *q, elevator_t *e) +{ + struct cfq_data *cfqd = e->elevator_data; + + e->elevator_data = NULL; + mempool_destroy(cfqd->crq_pool); + kfree(cfqd->crq_hash); + kfree(cfqd->cfq_hash); + kfree(cfqd); +} + +static int cfq_init(request_queue_t *q, elevator_t *e) +{ + struct cfq_data *cfqd; + int i; + + cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); + if (!cfqd) + return -ENOMEM; + + memset(cfqd, 0, sizeof(*cfqd)); + INIT_LIST_HEAD(&cfqd->rr_list); + + cfqd->crq_hash = kmalloc(sizeof(struct list_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL); + if (!cfqd->crq_hash) + goto out_crqhash; + + cfqd->cfq_hash = kmalloc(sizeof(struct list_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL); + if (!cfqd->cfq_hash) + goto out_cfqhash; + + cfqd->crq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, crq_pool); + if (!cfqd->crq_pool) + goto out_crqpool; + + for (i = 0; i < CFQ_MHASH_ENTRIES; i++) + INIT_LIST_HEAD(&cfqd->crq_hash[i]); + for (i = 0; i < CFQ_QHASH_ENTRIES; i++) + INIT_LIST_HEAD(&cfqd->cfq_hash[i]); + + cfqd->dispatch = &q->queue_head; + e->elevator_data = cfqd; + + /* + * just set it to some high value, we want anyone to be able to queue + * some requests. fairness is handled differently + */ + cfqd->max_queued = q->nr_requests; + q->nr_requests = 8192; + + return 0; +out_crqpool: + kfree(cfqd->cfq_hash); +out_cfqhash: + kfree(cfqd->crq_hash); +out_crqhash: + kfree(cfqd); + return -ENOMEM; +} + +static int __init cfq_slab_setup(void) +{ + crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0, + NULL, NULL); + + if (!crq_pool) + panic("cfq_iosched: can't init crq pool\n"); + + cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0, + NULL, NULL); + + if (!cfq_pool) + panic("cfq_iosched: can't init cfq pool\n"); + + cfq_mpool = mempool_create(64, mempool_alloc_slab, mempool_free_slab, cfq_pool); + + if (!cfq_mpool) + panic("cfq_iosched: can't init cfq mpool\n"); + + return 0; +} + +subsys_initcall(cfq_slab_setup); + +elevator_t iosched_cfq = { + .elevator_name = "cfq", + .elevator_merge_fn = cfq_merge, + .elevator_merged_fn = cfq_merged_request, + .elevator_merge_req_fn = cfq_merged_requests, + .elevator_next_req_fn = cfq_next_request, + .elevator_add_req_fn = cfq_insert_request, + .elevator_remove_req_fn = cfq_remove_request, + .elevator_queue_empty_fn = cfq_queue_empty, + .elevator_former_req_fn = cfq_former_request, + .elevator_latter_req_fn = cfq_latter_request, + .elevator_set_req_fn = cfq_set_request, + .elevator_put_req_fn = cfq_put_request, + .elevator_may_queue_fn = cfq_may_queue, + .elevator_init_fn = cfq_init, + .elevator_exit_fn = cfq_exit, +}; + +EXPORT_SYMBOL(iosched_cfq); diff --git a/drivers/block/ckrm-io.c b/drivers/block/ckrm-io.c new file mode 100644 index 000000000..ce166e855 --- /dev/null +++ b/drivers/block/ckrm-io.c @@ -0,0 +1,507 @@ +/* linux/drivers/block/ckrm_io.c : Block I/O Resource Controller for CKRM + * + * Copyright (C) Shailabh Nagar, IBM Corp. 2004 + * + * + * Provides best-effort block I/O bandwidth control for CKRM + * This file provides the CKRM API. The underlying scheduler is a + * modified Complete-Fair Queueing (CFQ) iosched. + * + * Latest version, more details at http://ckrm.sf.net + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +/* Changes + * + * 29 July 2004 + * Third complete rewrite for CKRM's current API + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* Tie to cfq priorities */ +#define CKI_IOPRIO_NORM IOPRIO_NORM + +/* Divisor to get fraction of bandwidth represented by an IOPRIO value */ +/* FIXME: Will not work if IOPRIO_NR > 100 */ +#define CKI_IOPRIO_DIV (IOPRIO_NR-1) +/* Minimum ioprio value to be assigned to a class */ +#define CKI_IOPRIO_MIN 1 + +#define CKI_IOUSAGE_UNIT 512 + +typedef struct ckrm_io_stats{ + struct timeval epochstart ; /* all measurements relative to this + start time */ + unsigned long blksz; /* size of bandwidth unit */ + atomic_t blkrd; /* read units submitted to DD */ + atomic_t blkwr; /* write units submitted to DD */ + +} cki_stats_t; /* per class I/O statistics */ + +/* Note + * Currently local unit == CFQ I/O priority directly. + * CFQ ionice values have an implied bandwidth share so they + * can be added, subdivided etc. as long as the initial allocation + * of the systemwide default's total is set to the highest CFQ ionice + * value (== 100% of disk bandwidth) + */ + +typedef struct ckrm_io_class { + + struct ckrm_core_class *core; + struct ckrm_core_class *parent; + + struct ckrm_shares shares; + spinlock_t shares_lock; /* protect share changes */ + + /* Absolute shares of this class + * in local units. + */ + + int ioprio; + int unused; + + /* Statistics, for class and default subclass */ + cki_stats_t stats; + cki_stats_t mystats; + +} cki_icls_t; + + + +/* Internal functions */ +static inline void cki_reset_stats(cki_stats_t *usg); +static inline void init_icls_one(cki_icls_t *icls); +static inline int cki_div(int *a, int b, int c); +static inline int cki_recalc(cki_icls_t *icls, int rel2abs); + +#ifdef DOES_NOT_WORK_AND_NOT_NEEDED +/* External functions e.g. interface to ioscheduler */ +inline void *cki_tsk_icls(struct task_struct *tsk); +inline int cki_tsk_ioprio(struct task_struct *tsk); +#endif + +extern void cki_cfq_set(icls_tsk_t tskicls, icls_ioprio_t tskioprio); + +/* CKRM Resource Controller API functions */ +static void * cki_alloc(struct ckrm_core_class *this, + struct ckrm_core_class * parent); +static void cki_free(void *res); +static int cki_setshare(void *res, struct ckrm_shares * shares); +static int cki_getshare(void *res, struct ckrm_shares * shares); +static int cki_getstats(void *res, struct seq_file *); +static int cki_resetstats(void *res); +static int cki_showconfig(void *res, struct seq_file *sfile); +static int cki_setconfig(void *res, const char *cfgstr); +static void cki_chgcls(void *tsk, void *oldres, void *newres); + + +struct ckrm_res_ctlr cki_rcbs; + +static inline void cki_reset_stats(cki_stats_t *stats) +{ + if (stats) { + atomic_set(&stats->blkrd,0); + atomic_set(&stats->blkwr,0); + } +} + +static inline void init_icls_stats(cki_icls_t *icls) +{ + struct timeval tv; + + do_gettimeofday(&tv); + icls->stats.epochstart = icls->mystats.epochstart = tv; + icls->stats.blksz = icls->mystats.blksz = CKI_IOUSAGE_UNIT; + cki_reset_stats(&icls->stats); + cki_reset_stats(&icls->mystats); +} + +/* Initialize icls to default values + * No other classes touched, locks not reinitialized. + */ + +static inline void init_icls_one(cki_icls_t *icls) +{ + icls->shares.my_guarantee = + (CKI_IOPRIO_MIN * CKRM_SHARE_DFLT_TOTAL_GUARANTEE) / + CKI_IOPRIO_DIV ; + icls->shares.my_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; + icls->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; + icls->shares.max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; + + icls->shares.unused_guarantee = icls->shares.total_guarantee - + icls->shares.my_guarantee; + icls->shares.cur_max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; + + + icls->ioprio = CKI_IOPRIO_MIN; + icls->unused = 0 ; + + init_icls_stats(icls); +} + + +static inline int cki_div(int *a, int b, int c) +{ + u64 temp = (u64) b * c ; + do_div(temp,CKI_IOPRIO_DIV); + *a = (int) temp; + + return 0; +} + + +/* Recalculate absolute shares from relative (rel2abs=1) + * or vice versa (rel2abs=0) + * Caller should have a lock on icls + */ + +static inline int cki_recalc(cki_icls_t *icls, int rel2abs) +{ + u64 temp; + + if (icls->parent == NULL) { + /* Root, as parent, always gets all */ + + temp = icls->shares.my_guarantee * (IOPRIO_NR-1); + do_div(temp, icls->shares.total_guarantee); + + icls->ioprio = temp ; + icls->unused = (IOPRIO_NR-1)-icls->ioprio; + + } else { + cki_icls_t *parres; + int partot ; + + parres = ckrm_get_res_class(icls->parent, + cki_rcbs.resid, + cki_icls_t); + if (!parres) { + printk(KERN_ERR "cki_recalc: error getting " + "resclass from core \n"); + return -EINVAL; + } + + partot = parres->ioprio + parres->unused; + + temp = (icls->shares.my_guarantee * (parres->ioprio + parres->unused)); + do_div(temp, parres->shares.total_guarantee); + + icls->ioprio = temp; + icls->unused = 0; + + } + + return 0; + +} + + +inline void *cki_icls_tsk(struct task_struct *tsk) +{ + return (void *) ckrm_get_res_class(class_core(tsk->taskclass), + cki_rcbs.resid, cki_icls_t); +} + +inline int cki_icls_ioprio(struct task_struct *tsk) +{ + cki_icls_t *icls = ckrm_get_res_class(class_core(tsk->taskclass), + cki_rcbs.resid, cki_icls_t); + return icls->ioprio; +} + +static void *cki_alloc(struct ckrm_core_class *core, + struct ckrm_core_class *parent) +{ + cki_icls_t *icls; + + icls = kmalloc(sizeof(cki_icls_t), GFP_ATOMIC); + if (!icls) { + printk(KERN_ERR "cki_res_alloc failed GFP_ATOMIC\n"); + return NULL; + } + + memset(icls, 0, sizeof(cki_icls_t)); + icls->core = core; + icls->parent = parent; + icls->shares_lock = SPIN_LOCK_UNLOCKED; + + if (parent == NULL) { + u64 temp; + + /* Root class gets same as "normal" CFQ priorities to + * retain compatibility of behaviour in the absence of + * other classes + */ + + icls->ioprio = IOPRIO_NORM; + icls->unused = (IOPRIO_NR-1)-IOPRIO_NORM; + + /* Default gets normal, not minimum */ + //icls->unused = IOPRIO_NORM; + //icls->unused = icls->guarantee-icls->myguarantee; + //icls->limit = icls->mylimit = IOPRIO_NR; + + /* Compute shares in abstract units */ + icls->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; + temp = (u64) icls->ioprio * icls->shares.total_guarantee; + do_div(temp, CKI_IOPRIO_DIV); + icls->shares.my_guarantee = (int) temp; + + //icls->shares.my_limit = CKRM_SHARE_DFLT_MAX_LIMIT; + //icls->shares.max_limit = CKRM_SHARE_DFLT_MAX_LIMIT; + icls->shares.my_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; + icls->shares.max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; + + + icls->shares.unused_guarantee = + icls->shares.total_guarantee - + icls->shares.my_guarantee; + //icls->shares.cur_max_limit = CKRM_SHARE_DFLT_MAX_LIMIT; + icls->shares.cur_max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; + + } else { + init_icls_one(icls); + } + try_module_get(THIS_MODULE); + return icls; +} + +static void cki_free(void *res) +{ + cki_icls_t *icls = res, *parres; + + if (!res) + return; + + /* Deallocate CFQ queues */ + + /* Currently CFQ queues are deallocated when empty. Since no task + * should belong to this icls, no new requests will get added to the + * CFQ queue. + * + * When CFQ switches to persistent queues, call its "put" function + * so it gets deallocated after the last pending request is serviced. + * + */ + + parres = ckrm_get_res_class(icls->parent, + cki_rcbs.resid, + cki_icls_t); + if (!parres) { + printk(KERN_ERR "cki_free: error getting " + "resclass from core \n"); + return; + } + + /* Update parent's shares */ + spin_lock(&parres->shares_lock); + child_guarantee_changed(&parres->shares, icls->shares.my_guarantee, 0); + parres->unused += icls->ioprio; + spin_unlock(&parres->shares_lock); + + kfree(res); + module_put(THIS_MODULE); + return; +} + + +static int cki_setshare(void *res, struct ckrm_shares *new) +{ + cki_icls_t *icls = res, *parres; + struct ckrm_shares *cur, *par; + int rc = -EINVAL, resid = cki_rcbs.resid; + + if (!icls) { + printk(KERN_ERR "No class\n"); + return rc; + } + + cur = &icls->shares; + + /* limits not supported */ + if ((new->max_limit != CKRM_SHARE_UNCHANGED) + || (new->my_limit != CKRM_SHARE_UNCHANGED)) { + printk(KERN_ERR "limits changed max_limit %d my_limit %d\n", + new->max_limit, new->my_limit); + + return -EINVAL; + } + + if (icls->parent) { + parres = + ckrm_get_res_class(icls->parent, resid, cki_icls_t); + if (!parres) { + printk(KERN_ERR "cki_setshare: error getting " + "resclass from core \n"); + return -EINVAL; + } + spin_lock(&parres->shares_lock); + spin_lock(&icls->shares_lock); + par = &parres->shares; + } else { + spin_lock(&icls->shares_lock); + parres = NULL; + par = NULL; + } + + rc = set_shares(new, cur, par); + + printk(KERN_ERR "rc from set_shares %d\n", rc); + + if (!rc) { + int old = icls->ioprio; + rc = cki_recalc(icls,0); + + if (!rc && parres) { + int raise_tot = icls->ioprio - old ; + parres->unused += raise_tot ; + } + } + spin_unlock(&icls->shares_lock); + if (icls->parent) { + spin_unlock(&parres->shares_lock); + } + return rc; +} + +static int cki_getshare(void *res, struct ckrm_shares * shares) +{ + cki_icls_t *icls = res; + + if (!icls) + return -EINVAL; + *shares = icls->shares; + return 0; +} + +static int cki_getstats(void *res, struct seq_file *sfile) +{ + cki_icls_t *icls = res; + + if (!icls) + return -EINVAL; + +/* + seq_printf(sfile, "%d my_read\n",atomic_read(&icls->mystats.blkrd)); + seq_printf(sfile, "%d my_write\n",atomic_read(&icls->mystats.blkwr)); + seq_printf(sfile, "%d total_read\n",atomic_read(&icls->stats.blkrd)); + seq_printf(sfile, "%d total_write\n",atomic_read(&icls->stats.blkwr)); +*/ + + seq_printf(sfile, "%d ioprio\n",icls->ioprio); + seq_printf(sfile, "%d unused\n",icls->unused); + + return 0; +} + +static int cki_resetstats(void *res) +{ + cki_icls_t *icls = res; + + if (!res) + return -EINVAL; + + init_icls_stats(icls); + return 0; +} + +static int cki_showconfig(void *res, struct seq_file *sfile) +{ + return -ENOSYS; +} + +static int cki_setconfig(void *res, const char *cfgstr) +{ + return -ENOSYS; +} + +static void cki_chgcls(void *tsk, void *oldres, void *newres) +{ + /* cki_icls_t *oldicls = oldres, *newicls = newres; */ + + /* Nothing needs to be done + * Future requests from task will go to the new class's CFQ q + * Old ones will continue to get satisfied from the original q + * + * Once CFQ moves to a persistent queue model and if refcounts on + * icls's CFQ queues are used, a decrement op would be needed here + */ + + return; +} + + + +struct ckrm_res_ctlr cki_rcbs = { + .res_name = "cki", + .res_hdepth = 1, + .resid = -1, + .res_alloc = cki_alloc, + .res_free = cki_free, + .set_share_values = cki_setshare, + .get_share_values = cki_getshare, + .get_stats = cki_getstats, + .reset_stats = cki_resetstats, + .show_config = cki_showconfig, + .set_config = cki_setconfig, + .change_resclass = cki_chgcls, +}; + + + +int __init cki_init(void) +{ + struct ckrm_classtype *clstype; + int resid = cki_rcbs.resid; + + clstype = ckrm_find_classtype_by_name("taskclass"); + if (clstype == NULL) { + printk(KERN_INFO "init_cki: classtype not found\n"); + return -ENOENT; + } + + if (resid == -1) { + resid = ckrm_register_res_ctlr(clstype, &cki_rcbs); + if (resid != -1) { + cki_rcbs.classtype = clstype; + cki_cfq_set(cki_icls_tsk,cki_icls_ioprio); + } + } + + return 0; +} + +void __exit cki_exit(void) +{ + ckrm_unregister_res_ctlr(&cki_rcbs); + cki_rcbs.resid = -1; + cki_rcbs.classtype = NULL; + cki_cfq_set(NULL,NULL); +} + +module_init(cki_init) +module_exit(cki_exit) + +MODULE_AUTHOR("Shailabh Nagar "); +MODULE_DESCRIPTION("CKRM Disk I/O Resource Controller"); +MODULE_LICENSE("GPL"); + diff --git a/drivers/block/ckrm-iostub.c b/drivers/block/ckrm-iostub.c new file mode 100644 index 000000000..63beff3e3 --- /dev/null +++ b/drivers/block/ckrm-iostub.c @@ -0,0 +1,64 @@ +/* ckrm-iostub.c - Stub file for ckrm_io module + * + * Copyright (C) Shailabh Nagar, IBM Corp. 2004 + * + * Latest version, more details at http://ckrm.sf.net + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +/* Changes + * + * 07 Aug 2004: Created + * + */ + +#include +#include +#include + +static spinlock_t stub_lock = SPIN_LOCK_UNLOCKED; + +static icls_tsk_t tskiclstub; +static icls_ioprio_t tskiopriostub; + + +void cki_cfq_set(icls_tsk_t tskicls, icls_ioprio_t tskioprio) +{ + spin_lock(&stub_lock); + tskiclstub = tskicls; + tskiopriostub = tskioprio; + spin_unlock(&stub_lock); +} + +inline void *cki_hash_key(struct task_struct *tsk) +{ + void *ret; + spin_lock(&stub_lock); + if (tskiclstub) + ret = (*tskiclstub)(tsk); + else + ret = (void *) tsk->tgid; + spin_unlock(&stub_lock); + return ret; +} + +inline int cki_ioprio(struct task_struct *tsk) +{ + int ret; + spin_lock(&stub_lock); + if (tskiopriostub) + ret = (*tskiopriostub)(tsk); + else + ret = tsk->ioprio; + spin_unlock(&stub_lock); + return ret; +} + +EXPORT_SYMBOL(cki_cfq_set); +EXPORT_SYMBOL(cki_hash_key); +EXPORT_SYMBOL(cki_ioprio); diff --git a/include/linux/ckrm-io.h b/include/linux/ckrm-io.h new file mode 100644 index 000000000..6d6e12749 --- /dev/null +++ b/include/linux/ckrm-io.h @@ -0,0 +1,43 @@ +/* linux/drivers/block/ckrm_io.c : Block I/O Resource Controller for CKRM + * + * Copyright (C) Shailabh Nagar, IBM Corp. 2004 + * + * + * Provides best-effort block I/O bandwidth control for CKRM + * This file provides the CKRM API. The underlying scheduler is a + * modified Complete-Fair Queueing (CFQ) iosched. + * + * Latest version, more details at http://ckrm.sf.net + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +/* Changes + * + * 29 July 2004 + * Third complete rewrite for CKRM's current API + * + */ + + +#ifndef _LINUX_CKRM_IO_H +#define _LINUX_CKRM_IO_H + +typedef void *(*icls_tsk_t) (struct task_struct *tsk); +typedef int (*icls_ioprio_t) (struct task_struct *tsk); + + +#ifdef CONFIG_CKRM_RES_BLKIO + +#ifdef DOES_NOT_WORK_AND_NOT_NEEDED +extern inline icls_tsk_t cki_tsk_icls; +extern inline icls_ioprio_t cki_tsk_ioprio; +#endif + +#endif /* CONFIG_CKRM_RES_BLKIO */ + +#endif -- 2.43.0