vserver 2.0 rc7

[linux-2.6.git] / drivers / block / ll_rw_blk.c
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c

index 3994af7..11ef9d9 100644 (file)
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -238,7 +238,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
         q->make_request_fn = mfn;
         q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
         q->backing_dev_info.state = 0;
-       q->backing_dev_info.memory_backed = 0;
+       q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
         blk_queue_max_sectors(q, MAX_SECTORS);
         blk_queue_hardsect_size(q, 512);
         blk_queue_dma_alignment(q, 511);
@@ -267,6 +267,25 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
  
  EXPORT_SYMBOL(blk_queue_make_request);
  
+static inline void rq_init(request_queue_t *q, struct request *rq)
+{
+       INIT_LIST_HEAD(&rq->queuelist);
+
+       rq->errors = 0;
+       rq->rq_status = RQ_ACTIVE;
+       rq->bio = rq->biotail = NULL;
+       rq->buffer = NULL;
+       rq->ref_count = 1;
+       rq->q = q;
+       rq->waiting = NULL;
+       rq->special = NULL;
+       rq->data_len = 0;
+       rq->data = NULL;
+       rq->sense = NULL;
+       rq->end_io = NULL;
+       rq->end_io_data = NULL;
+}
+
  /**
   * blk_queue_ordered - does this queue support ordered writes
   * @q:     the request queue
@@ -281,10 +300,26 @@ EXPORT_SYMBOL(blk_queue_make_request);
   **/
  void blk_queue_ordered(request_queue_t *q, int flag)
  {
-       if (flag)
-               set_bit(QUEUE_FLAG_ORDERED, &q->queue_flags);
-       else
-               clear_bit(QUEUE_FLAG_ORDERED, &q->queue_flags);
+       switch (flag) {
+               case QUEUE_ORDERED_NONE:
+                       if (q->flush_rq)
+                               kmem_cache_free(request_cachep, q->flush_rq);
+                       q->flush_rq = NULL;
+                       q->ordered = flag;
+                       break;
+               case QUEUE_ORDERED_TAG:
+                       q->ordered = flag;
+                       break;
+               case QUEUE_ORDERED_FLUSH:
+                       q->ordered = flag;
+                       if (!q->flush_rq)
+                               q->flush_rq = kmem_cache_alloc(request_cachep,
+                                                               GFP_KERNEL);
+                       break;
+               default:
+                       printk("blk_queue_ordered: bad value %d\n", flag);
+                       break;
+       }
  }
  
  EXPORT_SYMBOL(blk_queue_ordered);
@@ -306,6 +341,169 @@ void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff)
  
  EXPORT_SYMBOL(blk_queue_issue_flush_fn);
  
+/*
+ * Cache flushing for ordered writes handling
+ */
+static void blk_pre_flush_end_io(struct request *flush_rq)
+{
+       struct request *rq = flush_rq->end_io_data;
+       request_queue_t *q = rq->q;
+
+       rq->flags |= REQ_BAR_PREFLUSH;
+
+       if (!flush_rq->errors)
+               elv_requeue_request(q, rq);
+       else {
+               q->end_flush_fn(q, flush_rq);
+               clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
+               q->request_fn(q);
+       }
+}
+
+static void blk_post_flush_end_io(struct request *flush_rq)
+{
+       struct request *rq = flush_rq->end_io_data;
+       request_queue_t *q = rq->q;
+
+       rq->flags |= REQ_BAR_POSTFLUSH;
+
+       q->end_flush_fn(q, flush_rq);
+       clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
+       q->request_fn(q);
+}
+
+struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq)
+{
+       struct request *flush_rq = q->flush_rq;
+
+       BUG_ON(!blk_barrier_rq(rq));
+
+       if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags))
+               return NULL;
+
+       rq_init(q, flush_rq);
+       flush_rq->elevator_private = NULL;
+       flush_rq->flags = REQ_BAR_FLUSH;
+       flush_rq->rq_disk = rq->rq_disk;
+       flush_rq->rl = NULL;
+
+       /*
+        * prepare_flush returns 0 if no flush is needed, just mark both
+        * pre and post flush as done in that case
+        */
+       if (!q->prepare_flush_fn(q, flush_rq)) {
+               rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH;
+               clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
+               return rq;
+       }
+
+       /*
+        * some drivers dequeue requests right away, some only after io
+        * completion. make sure the request is dequeued.
+        */
+       if (!list_empty(&rq->queuelist))
+               blkdev_dequeue_request(rq);
+
+       elv_deactivate_request(q, rq);
+
+       flush_rq->end_io_data = rq;
+       flush_rq->end_io = blk_pre_flush_end_io;
+
+       __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);
+       return flush_rq;
+}
+
+static void blk_start_post_flush(request_queue_t *q, struct request *rq)
+{
+       struct request *flush_rq = q->flush_rq;
+
+       BUG_ON(!blk_barrier_rq(rq));
+
+       rq_init(q, flush_rq);
+       flush_rq->elevator_private = NULL;
+       flush_rq->flags = REQ_BAR_FLUSH;
+       flush_rq->rq_disk = rq->rq_disk;
+       flush_rq->rl = NULL;
+
+       if (q->prepare_flush_fn(q, flush_rq)) {
+               flush_rq->end_io_data = rq;
+               flush_rq->end_io = blk_post_flush_end_io;
+
+               __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);
+               q->request_fn(q);
+       }
+}
+
+static inline int blk_check_end_barrier(request_queue_t *q, struct request *rq,
+                                       int sectors)
+{
+       if (sectors > rq->nr_sectors)
+               sectors = rq->nr_sectors;
+
+       rq->nr_sectors -= sectors;
+       return rq->nr_sectors;
+}
+
+static int __blk_complete_barrier_rq(request_queue_t *q, struct request *rq,
+                                    int sectors, int queue_locked)
+{
+       if (q->ordered != QUEUE_ORDERED_FLUSH)
+               return 0;
+       if (!blk_fs_request(rq) || !blk_barrier_rq(rq))
+               return 0;
+       if (blk_barrier_postflush(rq))
+               return 0;
+
+       if (!blk_check_end_barrier(q, rq, sectors)) {
+               unsigned long flags = 0;
+
+               if (!queue_locked)
+                       spin_lock_irqsave(q->queue_lock, flags);
+
+               blk_start_post_flush(q, rq);
+
+               if (!queue_locked)
+                       spin_unlock_irqrestore(q->queue_lock, flags);
+       }
+
+       return 1;
+}
+
+/**
+ * blk_complete_barrier_rq - complete possible barrier request
+ * @q:  the request queue for the device
+ * @rq:  the request
+ * @sectors:  number of sectors to complete
+ *
+ * Description:
+ *   Used in driver end_io handling to determine whether to postpone
+ *   completion of a barrier request until a post flush has been done. This
+ *   is the unlocked variant, used if the caller doesn't already hold the
+ *   queue lock.
+ **/
+int blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors)
+{
+       return __blk_complete_barrier_rq(q, rq, sectors, 0);
+}
+EXPORT_SYMBOL(blk_complete_barrier_rq);
+
+/**
+ * blk_complete_barrier_rq_locked - complete possible barrier request
+ * @q:  the request queue for the device
+ * @rq:  the request
+ * @sectors:  number of sectors to complete
+ *
+ * Description:
+ *   See blk_complete_barrier_rq(). This variant must be used if the caller
+ *   holds the queue lock.
+ **/
+int blk_complete_barrier_rq_locked(request_queue_t *q, struct request *rq,
+                                  int sectors)
+{
+       return __blk_complete_barrier_rq(q, rq, sectors, 1);
+}
+EXPORT_SYMBOL(blk_complete_barrier_rq_locked);
+
  /**
   * blk_queue_bounce_limit - set bounce buffer limit for queue
   * @q:  the request queue for the device
@@ -618,6 +816,7 @@ fail:
   * blk_queue_init_tags - initialize the queue tag info
   * @q:  the request queue for the device
   * @depth:  the maximum queue depth supported
+ * @tags: the tag to use
   **/
  int blk_queue_init_tags(request_queue_t *q, int depth,
                         struct blk_queue_tag *tags)
@@ -1390,7 +1589,8 @@ void blk_run_queue(struct request_queue *q)
  
         spin_lock_irqsave(q->queue_lock, flags);
         blk_remove_plug(q);
-       q->request_fn(q);
+       if (!elv_queue_empty(q))
+               q->request_fn(q);
         spin_unlock_irqrestore(q->queue_lock, flags);
  }
  EXPORT_SYMBOL(blk_run_queue);
@@ -1428,6 +1628,8 @@ void blk_cleanup_queue(request_queue_t * q)
         if (q->queue_tags)
                 __blk_queue_free_tags(q);
  
+       blk_queue_ordered(q, QUEUE_ORDERED_NONE);
+
         kmem_cache_free(requestq_cachep, q);
  }
  
@@ -1513,6 +1715,15 @@ request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
         if (blk_init_free_list(q))
                 goto out_init;
  
+       /*
+        * if caller didn't supply a lock, they get per-queue locking with
+        * our embedded lock
+        */
+       if (!lock) {
+               spin_lock_init(&q->__queue_lock);
+               lock = &q->__queue_lock;
+       }
+
         q->request_fn           = rfn;
         q->back_merge_fn        = ll_back_merge_fn;
         q->front_merge_fn       = ll_front_merge_fn;
@@ -1739,21 +1950,8 @@ rq_starved:
         if (ioc_batching(q, ioc))
                 ioc->nr_batch_requests--;
         
-       INIT_LIST_HEAD(&rq->queuelist);
-
-       rq->errors = 0;
-       rq->rq_status = RQ_ACTIVE;
-       rq->bio = rq->biotail = NULL;
-       rq->buffer = NULL;
-       rq->ref_count = 1;
-       rq->q = q;
+       rq_init(q, rq);
         rq->rl = rl;
-       rq->waiting = NULL;
-       rq->special = NULL;
-       rq->data_len = 0;
-       rq->data = NULL;
-       rq->sense = NULL;
-
  out:
         put_io_context(ioc);
         return rq;
@@ -1963,7 +2161,7 @@ EXPORT_SYMBOL(blk_rq_map_user);
  /**
   * blk_rq_unmap_user - unmap a request with user data
   * @rq:                request to be unmapped
- * @ubuf:      user buffer
+ * @bio:       bio for the request
   * @ulen:      length of user buffer
   *
   * Description:
@@ -2018,11 +2216,11 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
         }
  
         rq->flags |= REQ_NOMERGE;
-       if (!rq->waiting)
-               rq->waiting = &wait;
+       rq->waiting = &wait;
+       rq->end_io = blk_end_sync_rq;
         elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
         generic_unplug_device(q);
-       wait_for_completion(rq->waiting);
+       wait_for_completion(&wait);
         rq->waiting = NULL;
  
         if (rq->errors)
@@ -2171,7 +2369,7 @@ void disk_round_stats(struct gendisk *disk)
  /*
   * queue lock must be held
   */
-void __blk_put_request(request_queue_t *q, struct request *req)
+static void __blk_put_request(request_queue_t *q, struct request *req)
  {
         struct request_list *rl = req->rl;
  
@@ -2218,6 +2416,25 @@ void blk_put_request(struct request *req)
  
  EXPORT_SYMBOL(blk_put_request);
  
+/**
+ * blk_end_sync_rq - executes a completion event on a request
+ * @rq: request to complete
+ */
+void blk_end_sync_rq(struct request *rq)
+{
+       struct completion *waiting = rq->waiting;
+
+       rq->waiting = NULL;
+       __blk_put_request(rq->q, rq);
+
+       /*
+        * complete last, if this is a stack request the process (and thus
+        * the rq pointer) could be invalid right after this complete()
+        */
+       complete(waiting);
+}
+EXPORT_SYMBOL(blk_end_sync_rq);
+
  /**
   * blk_congestion_wait - wait for a queue to become uncongested
   * @rw: READ or WRITE
@@ -2352,7 +2569,7 @@ EXPORT_SYMBOL(__blk_attempt_remerge);
  static int __make_request(request_queue_t *q, struct bio *bio)
  {
         struct request *req, *freereq = NULL;
-       int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err;
+       int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync;
         sector_t sector;
  
         sector = bio->bi_sector;
@@ -2360,6 +2577,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
         cur_nr_sectors = bio_cur_sectors(bio);
  
         rw = bio_data_dir(bio);
+       sync = bio_sync(bio);
  
         /*
          * low level driver can indicate that it wants pages above a
@@ -2371,7 +2589,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
         spin_lock_prefetch(q->queue_lock);
  
         barrier = bio_barrier(bio);
-       if (barrier && !(q->queue_flags & (1 << QUEUE_FLAG_ORDERED))) {
+       if (barrier && (q->ordered == QUEUE_ORDERED_NONE)) {
                 err = -EOPNOTSUPP;
                 goto end_io;
         }
@@ -2491,7 +2709,7 @@ get_rq:
  out:
         if (freereq)
                 __blk_put_request(q, freereq);
-       if (bio_sync(bio))
+       if (sync)
                 __generic_unplug_device(q);
  
         spin_unlock_irq(q->queue_lock);
@@ -2978,7 +3196,6 @@ EXPORT_SYMBOL(end_that_request_chunk);
  void end_that_request_last(struct request *req)
  {
         struct gendisk *disk = req->rq_disk;
-       struct completion *waiting = req->waiting;
  
         if (unlikely(laptop_mode) && blk_fs_request(req))
                 laptop_io_completion();
@@ -2998,10 +3215,10 @@ void end_that_request_last(struct request *req)
                 disk_round_stats(disk);
                 disk->in_flight--;
         }
-       __blk_put_request(req->q, req);
-       /* Do this LAST! The structure may be freed immediately afterwards */
-       if (waiting)
-               complete(waiting);
+       if (req->end_io)
+               req->end_io(req);
+       else
+               __blk_put_request(req->q, req);
  }
  
  EXPORT_SYMBOL(end_that_request_last);