return ret;
}
+EXPORT_SYMBOL(blk_get_backing_dev_info);
+
void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data)
{
q->activity_fn = fn;
EXPORT_SYMBOL(blk_queue_make_request);
+/**
+ * blk_queue_ordered - does this queue support ordered writes
+ * @q: the request queue
+ * @flag: see below
+ *
+ * Description:
+ * For journalled file systems, doing ordered writes on a commit
+ * block instead of explicitly doing wait_on_buffer (which is bad
+ * for performance) can be a big win. Block drivers supporting this
+ * feature should call this function and indicate so.
+ *
+ **/
+void blk_queue_ordered(request_queue_t *q, int flag)
+{
+ if (flag)
+ set_bit(QUEUE_FLAG_ORDERED, &q->queue_flags);
+ else
+ clear_bit(QUEUE_FLAG_ORDERED, &q->queue_flags);
+}
+
+EXPORT_SYMBOL(blk_queue_ordered);
+
+/**
+ * blk_queue_issue_flush_fn - set function for issuing a flush
+ * @q: the request queue
+ * @iff: the function to be called issuing the flush
+ *
+ * Description:
+ * If a driver supports issuing a flush command, the support is notified
+ * to the block layer by defining it through this call.
+ *
+ **/
+void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff)
+{
+ q->issue_flush_fn = iff;
+}
+
+EXPORT_SYMBOL(blk_queue_issue_flush_fn);
+
/**
* blk_queue_bounce_limit - set bounce buffer limit for queue
* @q: the request queue for the device
printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors);
}
- q->max_sectors = max_sectors;
+ q->max_sectors = q->max_hw_sectors = max_sectors;
}
EXPORT_SYMBOL(blk_queue_max_sectors);
void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b)
{
/* zero is "infinity" */
- t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors);
+ t->max_sectors = t->max_hw_sectors =
+ min_not_zero(t->max_sectors,b->max_sectors);
t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments);
t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);
EXPORT_SYMBOL(blk_queue_find_tag);
/**
- * blk_queue_free_tags - release tag maintenance info
+ * __blk_queue_free_tags - release tag maintenance info
* @q: the request queue for the device
*
* Notes:
* blk_cleanup_queue() will take care of calling this function, if tagging
- * has been used. So there's usually no need to call this directly, unless
- * tagging is just being disabled but the queue remains in function.
+ * has been used. So there's no need to call this directly.
**/
-void blk_queue_free_tags(request_queue_t *q)
+static void __blk_queue_free_tags(request_queue_t *q)
{
struct blk_queue_tag *bqt = q->queue_tags;
q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED);
}
+/**
+ * blk_queue_free_tags - release tag maintenance info
+ * @q: the request queue for the device
+ *
+ * Notes:
+ * This is used to disabled tagged queuing to a device, yet leave
+ * queue in function.
+ **/
+void blk_queue_free_tags(request_queue_t *q)
+{
+ clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
+}
+
EXPORT_SYMBOL(blk_queue_free_tags);
static int
init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth)
{
int bits, i;
+ struct request **tag_index;
+ unsigned long *tag_map;
if (depth > q->nr_requests * 2) {
depth = q->nr_requests * 2;
__FUNCTION__, depth);
}
- tags->tag_index = kmalloc(depth * sizeof(struct request *), GFP_ATOMIC);
- if (!tags->tag_index)
+ tag_index = kmalloc(depth * sizeof(struct request *), GFP_ATOMIC);
+ if (!tag_index)
goto fail;
bits = (depth / BLK_TAGS_PER_LONG) + 1;
- tags->tag_map = kmalloc(bits * sizeof(unsigned long), GFP_ATOMIC);
- if (!tags->tag_map)
+ tag_map = kmalloc(bits * sizeof(unsigned long), GFP_ATOMIC);
+ if (!tag_map)
goto fail;
- memset(tags->tag_index, 0, depth * sizeof(struct request *));
- memset(tags->tag_map, 0, bits * sizeof(unsigned long));
+ memset(tag_index, 0, depth * sizeof(struct request *));
+ memset(tag_map, 0, bits * sizeof(unsigned long));
tags->max_depth = depth;
tags->real_max_depth = bits * BITS_PER_LONG;
+ tags->tag_index = tag_index;
+ tags->tag_map = tag_map;
/*
* set the upper bits if the depth isn't a multiple of the word size
*/
for (i = depth; i < bits * BLK_TAGS_PER_LONG; i++)
- __set_bit(i, tags->tag_map);
+ __set_bit(i, tag_map);
- INIT_LIST_HEAD(&tags->busy_list);
- tags->busy = 0;
- atomic_set(&tags->refcnt, 1);
return 0;
fail:
- kfree(tags->tag_index);
+ kfree(tag_index);
return -ENOMEM;
}
int blk_queue_init_tags(request_queue_t *q, int depth,
struct blk_queue_tag *tags)
{
- if (!tags) {
+ int rc;
+
+ BUG_ON(tags && q->queue_tags && tags != q->queue_tags);
+
+ if (!tags && !q->queue_tags) {
tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC);
if (!tags)
goto fail;
if (init_tag_map(q, tags, depth))
goto fail;
+
+ INIT_LIST_HEAD(&tags->busy_list);
+ tags->busy = 0;
+ atomic_set(&tags->refcnt, 1);
+ } else if (q->queue_tags) {
+ if ((rc = blk_queue_resize_tags(q, depth)))
+ return rc;
+ set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
+ return 0;
} else
atomic_inc(&tags->refcnt);
if (rl->rq_pool)
mempool_destroy(rl->rq_pool);
- if (blk_queue_tagged(q))
- blk_queue_free_tags(q);
+ if (q->queue_tags)
+ __blk_queue_free_tags(q);
kmem_cache_free(requestq_cachep, q);
}
}
rq->flags |= REQ_NOMERGE;
- rq->waiting = &wait;
+ if (!rq->waiting)
+ rq->waiting = &wait;
elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
generic_unplug_device(q);
- wait_for_completion(&wait);
+ wait_for_completion(rq->waiting);
rq->waiting = NULL;
if (rq->errors)
EXPORT_SYMBOL(blk_execute_rq);
+/**
+ * blkdev_issue_flush - queue a flush
+ * @bdev: blockdev to issue flush for
+ * @error_sector: error sector
+ *
+ * Description:
+ * Issue a flush for the block device in question. Caller can supply
+ * room for storing the error offset in case of a flush error, if they
+ * wish to. Caller must run wait_for_completion() on its own.
+ */
+int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
+{
+ request_queue_t *q;
+
+ if (bdev->bd_disk == NULL)
+ return -ENXIO;
+
+ q = bdev_get_queue(bdev);
+ if (!q)
+ return -ENXIO;
+ if (!q->issue_flush_fn)
+ return -EOPNOTSUPP;
+
+ return q->issue_flush_fn(q, bdev->bd_disk, error_sector);
+}
+
+EXPORT_SYMBOL(blkdev_issue_flush);
+
+/**
+ * blkdev_scsi_issue_flush_fn - issue flush for SCSI devices
+ * @q: device queue
+ * @disk: gendisk
+ * @error_sector: error offset
+ *
+ * Description:
+ * Devices understanding the SCSI command set, can use this function as
+ * a helper for issuing a cache flush. Note: driver is required to store
+ * the error offset (in case of error flushing) in ->sector of struct
+ * request.
+ */
+int blkdev_scsi_issue_flush_fn(request_queue_t *q, struct gendisk *disk,
+ sector_t *error_sector)
+{
+ struct request *rq = blk_get_request(q, WRITE, __GFP_WAIT);
+ int ret;
+
+ rq->flags |= REQ_BLOCK_PC | REQ_SOFTBARRIER;
+ rq->sector = 0;
+ memset(rq->cmd, 0, sizeof(rq->cmd));
+ rq->cmd[0] = 0x35;
+ rq->cmd_len = 12;
+ rq->data = NULL;
+ rq->data_len = 0;
+ rq->timeout = 60 * HZ;
+
+ ret = blk_execute_rq(q, disk, rq);
+
+ if (ret && error_sector)
+ *error_sector = rq->sector;
+
+ blk_put_request(rq);
+ return ret;
+}
+
+EXPORT_SYMBOL(blkdev_scsi_issue_flush_fn);
+
void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)
{
int rw = rq_data_dir(rq);
static int __make_request(request_queue_t *q, struct bio *bio)
{
struct request *req, *freereq = NULL;
- int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, ra;
+ int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err;
sector_t sector;
sector = bio->bi_sector;
spin_lock_prefetch(q->queue_lock);
- barrier = test_bit(BIO_RW_BARRIER, &bio->bi_rw);
-
- ra = bio->bi_rw & (1 << BIO_RW_AHEAD);
+ barrier = bio_barrier(bio);
+ if (barrier && !(q->queue_flags & (1 << QUEUE_FLAG_ORDERED))) {
+ err = -EOPNOTSUPP;
+ goto end_io;
+ }
again:
spin_lock_irq(q->queue_lock);
/*
* READA bit set
*/
- if (ra)
+ err = -EWOULDBLOCK;
+ if (bio_rw_ahead(bio))
goto end_io;
freereq = get_request_wait(q, rw);
req->flags |= REQ_CMD;
/*
- * inherit FAILFAST from bio and don't stack up
- * retries for read ahead
+ * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
*/
- if (ra || test_bit(BIO_RW_FAILFAST, &bio->bi_rw))
+ if (bio_rw_ahead(bio) || bio_failfast(bio))
req->flags |= REQ_FAILFAST;
/*
return 0;
end_io:
- bio_endio(bio, nr_sectors << 9, -EWOULDBLOCK);
+ bio_endio(bio, nr_sectors << 9, err);
return 0;
}
sector_t maxsector;
int ret, nr_sectors = bio_sectors(bio);
+ might_sleep();
/* Test device or partition size, when known. */
maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
if (maxsector) {
break;
}
- if (unlikely(bio_sectors(bio) > q->max_sectors)) {
+ if (unlikely(bio_sectors(bio) > q->max_hw_sectors)) {
printk("bio too big device %s (%u > %u)\n",
bdevname(bio->bi_bdev, b),
bio_sectors(bio),
- q->max_sectors);
+ q->max_hw_sectors);
goto end_io;
}
static int __end_that_request_first(struct request *req, int uptodate,
int nr_bytes)
{
- int total_bytes, bio_nbytes, error = 0, next_idx = 0;
+ int total_bytes, bio_nbytes, error, next_idx = 0;
struct bio *bio;
+ /*
+ * extend uptodate bool to allow < 0 value to be direct io error
+ */
+ error = 0;
+ if (end_io_error(uptodate))
+ error = !uptodate ? -EIO : uptodate;
+
/*
* for a REQ_BLOCK_PC request, we want to carry any eventual
* sense key with us all the way through
req->errors = 0;
if (!uptodate) {
- error = -EIO;
if (blk_fs_request(req) && !(req->flags & REQ_QUIET))
printk("end_request: I/O error, dev %s, sector %llu\n",
req->rq_disk ? req->rq_disk->disk_name : "?",
/**
* end_that_request_first - end I/O on a request
* @req: the request being processed
- * @uptodate: 0 for I/O error
+ * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error
* @nr_sectors: number of sectors to end I/O on
*
* Description:
/**
* end_that_request_chunk - end I/O on a request
* @req: the request being processed
- * @uptodate: 0 for I/O error
+ * @uptodate: 1 for success, 0 for I/O error, < 0 for specific error
* @nr_bytes: number of bytes to complete
*
* Description:
unsigned long ra_kb;
ssize_t ret = queue_var_store(&ra_kb, page, count);
+ spin_lock_irq(q->queue_lock);
if (ra_kb > (q->max_sectors >> 1))
ra_kb = (q->max_sectors >> 1);
q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
+ spin_unlock_irq(q->queue_lock);
+
return ret;
}
+static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
+{
+ int max_sectors_kb = q->max_sectors >> 1;
+
+ return queue_var_show(max_sectors_kb, (page));
+}
+
+static ssize_t
+queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
+{
+ unsigned long max_sectors_kb,
+ max_hw_sectors_kb = q->max_hw_sectors >> 1,
+ page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
+ ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
+ int ra_kb;
+
+ if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
+ return -EINVAL;
+ /*
+ * Take the queue lock to update the readahead and max_sectors
+ * values synchronously:
+ */
+ spin_lock_irq(q->queue_lock);
+ /*
+ * Trim readahead window as well, if necessary:
+ */
+ ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);
+ if (ra_kb > max_sectors_kb)
+ q->backing_dev_info.ra_pages =
+ max_sectors_kb >> (PAGE_CACHE_SHIFT - 10);
+
+ q->max_sectors = max_sectors_kb << 1;
+ spin_unlock_irq(q->queue_lock);
+
+ return ret;
+}
+
+static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
+{
+ int max_hw_sectors_kb = q->max_hw_sectors >> 1;
+
+ return queue_var_show(max_hw_sectors_kb, (page));
+}
+
+
static struct queue_sysfs_entry queue_requests_entry = {
.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
.show = queue_requests_show,
.store = queue_ra_store,
};
+static struct queue_sysfs_entry queue_max_sectors_entry = {
+ .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR },
+ .show = queue_max_sectors_show,
+ .store = queue_max_sectors_store,
+};
+
+static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
+ .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },
+ .show = queue_max_hw_sectors_show,
+};
+
static struct attribute *default_attrs[] = {
&queue_requests_entry.attr,
&queue_ra_entry.attr,
+ &queue_max_hw_sectors_entry.attr,
+ &queue_max_sectors_entry.attr,
NULL,
};