2 * Copyright (C) 2003 Sistina Software
4 * This file is released under the GPL.
10 #include <linux/mempool.h>
11 #include <linux/module.h>
12 #include <linux/sched.h>
13 #include <linux/slab.h>
15 #define BIO_POOL_SIZE 256
18 /*-----------------------------------------------------------------
19 * Bio set, move this to bio.c
20 *---------------------------------------------------------------*/
21 #define BV_NAME_SIZE 16
24 char name[BV_NAME_SIZE];
27 atomic_t allocated; /* FIXME: debug */
30 #define BIOVEC_NR_POOLS 6
32 char name[BV_NAME_SIZE];
33 kmem_cache_t *bio_slab;
35 struct biovec_pool pools[BIOVEC_NR_POOLS];
38 static void bio_set_exit(struct bio_set *bs)
41 struct biovec_pool *bp;
44 mempool_destroy(bs->bio_pool);
47 kmem_cache_destroy(bs->bio_slab);
49 for (i = 0; i < BIOVEC_NR_POOLS; i++) {
52 mempool_destroy(bp->pool);
55 kmem_cache_destroy(bp->slab);
59 static void mk_name(char *str, size_t len, const char *prefix, unsigned count)
61 snprintf(str, len, "%s-%u", prefix, count);
64 static int bio_set_init(struct bio_set *bs, const char *slab_prefix,
65 unsigned pool_entries, unsigned scale)
67 /* FIXME: this must match bvec_index(), why not go the
68 * whole hog and have a pool per power of 2 ? */
69 static unsigned _vec_lengths[BIOVEC_NR_POOLS] = {
70 1, 4, 16, 64, 128, BIO_MAX_PAGES
75 struct biovec_pool *bp;
77 /* zero the bs so we can tear down properly on error */
78 memset(bs, 0, sizeof(*bs));
81 * Set up the bio pool.
83 snprintf(bs->name, sizeof(bs->name), "%s-bio", slab_prefix);
85 bs->bio_slab = kmem_cache_create(bs->name, sizeof(struct bio), 0,
86 SLAB_HWCACHE_ALIGN, NULL, NULL);
88 DMWARN("can't init bio slab");
92 bs->bio_pool = mempool_create(pool_entries, mempool_alloc_slab,
93 mempool_free_slab, bs->bio_slab);
95 DMWARN("can't init bio pool");
100 * Set up the biovec pools.
102 for (i = 0; i < BIOVEC_NR_POOLS; i++) {
104 bp->nr_vecs = _vec_lengths[i];
105 atomic_set(&bp->allocated, 1); /* FIXME: debug */
108 size = bp->nr_vecs * sizeof(struct bio_vec);
110 mk_name(bp->name, sizeof(bp->name), slab_prefix, i);
111 bp->slab = kmem_cache_create(bp->name, size, 0,
112 SLAB_HWCACHE_ALIGN, NULL, NULL);
114 DMWARN("can't init biovec slab cache");
121 bp->pool = mempool_create(pool_entries, mempool_alloc_slab,
122 mempool_free_slab, bp->slab);
124 DMWARN("can't init biovec mempool");
137 static inline unsigned bvec_index(unsigned nr)
141 case 2 ... 4: return 1;
142 case 5 ... 16: return 2;
143 case 17 ... 64: return 3;
144 case 65 ... 128:return 4;
145 case 129 ... BIO_MAX_PAGES: return 5;
152 static unsigned _bio_count = 0;
153 struct bio *bio_set_alloc(struct bio_set *bs, int gfp_mask, int nr_iovecs)
155 struct biovec_pool *bp;
156 struct bio_vec *bv = NULL;
160 bio = mempool_alloc(bs->bio_pool, gfp_mask);
166 if (likely(nr_iovecs)) {
167 idx = bvec_index(nr_iovecs);
168 bp = bs->pools + idx;
169 bv = mempool_alloc(bp->pool, gfp_mask);
171 mempool_free(bio, bs->bio_pool);
175 memset(bv, 0, bp->nr_vecs * sizeof(*bv));
176 bio->bi_flags |= idx << BIO_POOL_OFFSET;
177 bio->bi_max_vecs = bp->nr_vecs;
178 atomic_inc(&bp->allocated);
185 static void bio_set_free(struct bio_set *bs, struct bio *bio)
187 struct biovec_pool *bp = bs->pools + BIO_POOL_IDX(bio);
189 if (atomic_dec_and_test(&bp->allocated))
192 mempool_free(bio->bi_io_vec, bp->pool);
193 mempool_free(bio, bs->bio_pool);
196 /*-----------------------------------------------------------------
198 *---------------------------------------------------------------*/
199 static struct bio_set _bios;
201 /* FIXME: can we shrink this ? */
205 struct task_struct *sleeper;
206 io_notify_fn callback;
211 * io contexts are only dynamically allocated for asynchronous
212 * io. Since async io is likely to be the majority of io we'll
213 * have the same number of io contexts as buffer heads ! (FIXME:
216 static unsigned _num_ios;
217 static mempool_t *_io_pool;
219 static void *alloc_io(int gfp_mask, void *pool_data)
221 return kmalloc(sizeof(struct io), gfp_mask);
224 static void free_io(void *element, void *pool_data)
229 static unsigned int pages_to_ios(unsigned int pages)
231 return 4 * pages; /* too many ? */
234 static int resize_pool(unsigned int new_ios)
240 /* free off the pool */
241 mempool_destroy(_io_pool);
243 bio_set_exit(&_bios);
246 /* resize the pool */
247 r = mempool_resize(_io_pool, new_ios, GFP_KERNEL);
251 /* create new pool */
252 _io_pool = mempool_create(new_ios, alloc_io, free_io, NULL);
256 r = bio_set_init(&_bios, "dm-io", 512, 1);
258 mempool_destroy(_io_pool);
269 int dm_io_get(unsigned int num_pages)
271 return resize_pool(_num_ios + pages_to_ios(num_pages));
274 void dm_io_put(unsigned int num_pages)
276 resize_pool(_num_ios - pages_to_ios(num_pages));
279 /*-----------------------------------------------------------------
280 * We need to keep track of which region a bio is doing io for.
281 * In order to save a memory allocation we store this the last
282 * bvec which we know is unused (blech).
283 *---------------------------------------------------------------*/
284 static inline void bio_set_region(struct bio *bio, unsigned region)
286 bio->bi_io_vec[bio->bi_max_vecs - 1].bv_len = region;
289 static inline unsigned bio_get_region(struct bio *bio)
291 return bio->bi_io_vec[bio->bi_max_vecs - 1].bv_len;
294 /*-----------------------------------------------------------------
295 * We need an io object to keep track of the number of bios that
296 * have been dispatched for a particular io.
297 *---------------------------------------------------------------*/
298 static void dec_count(struct io *io, unsigned int region, int error)
301 set_bit(region, &io->error);
303 if (atomic_dec_and_test(&io->count)) {
305 wake_up_process(io->sleeper);
309 io_notify_fn fn = io->callback;
310 void *context = io->context;
312 mempool_free(io, _io_pool);
318 /* FIXME Move this to bio.h? */
319 static void zero_fill_bio(struct bio *bio)
325 bio_for_each_segment(bv, bio, i) {
326 char *data = bvec_kmap_irq(bv, &flags);
327 memset(data, 0, bv->bv_len);
328 flush_dcache_page(bv->bv_page);
329 bvec_kunmap_irq(data, &flags);
333 static int endio(struct bio *bio, unsigned int done, int error)
335 struct io *io = (struct io *) bio->bi_private;
337 /* keep going until we've finished */
341 if (error && bio_data_dir(bio) == READ)
344 dec_count(io, bio_get_region(bio), error);
350 static void bio_dtr(struct bio *bio)
353 bio_set_free(&_bios, bio);
356 /*-----------------------------------------------------------------
357 * These little objects provide an abstraction for getting a new
358 * destination page for io.
359 *---------------------------------------------------------------*/
361 void (*get_page)(struct dpages *dp,
362 struct page **p, unsigned long *len, unsigned *offset);
363 void (*next_page)(struct dpages *dp);
370 * Functions for getting the pages from a list.
372 static void list_get_page(struct dpages *dp,
373 struct page **p, unsigned long *len, unsigned *offset)
375 unsigned o = dp->context_u;
376 struct page_list *pl = (struct page_list *) dp->context_ptr;
379 *len = PAGE_SIZE - o;
383 static void list_next_page(struct dpages *dp)
385 struct page_list *pl = (struct page_list *) dp->context_ptr;
386 dp->context_ptr = pl->next;
390 static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offset)
392 dp->get_page = list_get_page;
393 dp->next_page = list_next_page;
394 dp->context_u = offset;
395 dp->context_ptr = pl;
399 * Functions for getting the pages from a bvec.
401 static void bvec_get_page(struct dpages *dp,
402 struct page **p, unsigned long *len, unsigned *offset)
404 struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr;
407 *offset = bvec->bv_offset;
410 static void bvec_next_page(struct dpages *dp)
412 struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr;
413 dp->context_ptr = bvec + 1;
416 static void bvec_dp_init(struct dpages *dp, struct bio_vec *bvec)
418 dp->get_page = bvec_get_page;
419 dp->next_page = bvec_next_page;
420 dp->context_ptr = bvec;
423 static void vm_get_page(struct dpages *dp,
424 struct page **p, unsigned long *len, unsigned *offset)
426 *p = vmalloc_to_page(dp->context_ptr);
427 *offset = dp->context_u;
428 *len = PAGE_SIZE - dp->context_u;
431 static void vm_next_page(struct dpages *dp)
433 dp->context_ptr += PAGE_SIZE - dp->context_u;
437 static void vm_dp_init(struct dpages *dp, void *data)
439 dp->get_page = vm_get_page;
440 dp->next_page = vm_next_page;
441 dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1);
442 dp->context_ptr = data;
445 /*-----------------------------------------------------------------
446 * IO routines that accept a list of pages.
447 *---------------------------------------------------------------*/
448 static void do_region(int rw, unsigned int region, struct io_region *where,
449 struct dpages *dp, struct io *io)
456 sector_t remaining = where->count;
460 * Allocate a suitably sized bio, we add an extra
461 * bvec for bio_get/set_region().
463 num_bvecs = (remaining / (PAGE_SIZE >> 9)) + 2;
465 bio = bio_set_alloc(&_bios, GFP_NOIO, num_bvecs);
466 bio->bi_sector = where->sector + (where->count - remaining);
467 bio->bi_bdev = where->bdev;
468 bio->bi_end_io = endio;
469 bio->bi_private = io;
470 bio->bi_destructor = bio_dtr;
471 bio_set_region(bio, region);
474 * Try and add as many pages as possible.
477 dp->get_page(dp, &page, &len, &offset);
478 len = min(len, to_bytes(remaining));
479 if (!bio_add_page(bio, page, len, offset))
483 remaining -= to_sector(len);
487 atomic_inc(&io->count);
492 static void dispatch_io(int rw, unsigned int num_regions,
493 struct io_region *where, struct dpages *dp,
494 struct io *io, int sync)
497 struct dpages old_pages = *dp;
500 rw |= (1 << BIO_RW_SYNC);
503 * For multiple regions we need to be careful to rewind
504 * the dp object for each call to do_region.
506 for (i = 0; i < num_regions; i++) {
509 do_region(rw, i, where + i, dp, io);
513 * Drop the extra refence that we were holding to avoid
514 * the io being completed too early.
519 static int sync_io(unsigned int num_regions, struct io_region *where,
520 int rw, struct dpages *dp, unsigned long *error_bits)
524 if (num_regions > 1 && rw != WRITE) {
530 atomic_set(&io.count, 1); /* see dispatch_io() */
531 io.sleeper = current;
533 dispatch_io(rw, num_regions, where, dp, &io, 1);
536 set_current_state(TASK_UNINTERRUPTIBLE);
538 if (!atomic_read(&io.count) || signal_pending(current))
543 set_current_state(TASK_RUNNING);
545 if (atomic_read(&io.count))
548 *error_bits = io.error;
549 return io.error ? -EIO : 0;
552 static int async_io(unsigned int num_regions, struct io_region *where, int rw,
553 struct dpages *dp, io_notify_fn fn, void *context)
557 if (num_regions > 1 && rw != WRITE) {
563 io = mempool_alloc(_io_pool, GFP_NOIO);
565 atomic_set(&io->count, 1); /* see dispatch_io() */
568 io->context = context;
570 dispatch_io(rw, num_regions, where, dp, io, 0);
574 int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw,
575 struct page_list *pl, unsigned int offset,
576 unsigned long *error_bits)
579 list_dp_init(&dp, pl, offset);
580 return sync_io(num_regions, where, rw, &dp, error_bits);
583 int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where, int rw,
584 struct bio_vec *bvec, unsigned long *error_bits)
587 bvec_dp_init(&dp, bvec);
588 return sync_io(num_regions, where, rw, &dp, error_bits);
591 int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw,
592 void *data, unsigned long *error_bits)
595 vm_dp_init(&dp, data);
596 return sync_io(num_regions, where, rw, &dp, error_bits);
599 int dm_io_async(unsigned int num_regions, struct io_region *where, int rw,
600 struct page_list *pl, unsigned int offset,
601 io_notify_fn fn, void *context)
604 list_dp_init(&dp, pl, offset);
605 return async_io(num_regions, where, rw, &dp, fn, context);
608 int dm_io_async_bvec(unsigned int num_regions, struct io_region *where, int rw,
609 struct bio_vec *bvec, io_notify_fn fn, void *context)
612 bvec_dp_init(&dp, bvec);
613 return async_io(num_regions, where, rw, &dp, fn, context);
616 int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw,
617 void *data, io_notify_fn fn, void *context)
620 vm_dp_init(&dp, data);
621 return async_io(num_regions, where, rw, &dp, fn, context);
624 EXPORT_SYMBOL(dm_io_get);
625 EXPORT_SYMBOL(dm_io_put);
626 EXPORT_SYMBOL(dm_io_sync);
627 EXPORT_SYMBOL(dm_io_async);
628 EXPORT_SYMBOL(dm_io_sync_bvec);
629 EXPORT_SYMBOL(dm_io_async_bvec);
630 EXPORT_SYMBOL(dm_io_sync_vm);
631 EXPORT_SYMBOL(dm_io_async_vm);