X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=include%2Flinux%2Fblkdev.h;h=aafe82788b4efcaebf6d7513d98a3fc038124c4c;hb=16c70f8c1b54b61c3b951b6fb220df250fe09b32;hp=eb4d10be7e6003d34674f812ec3a2e3b7174f8a6;hpb=9bf4aaab3e101692164d49b7ca357651eb691cb6;p=linux-2.6.git diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index eb4d10be7..aafe82788 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1,7 +1,6 @@ #ifndef _LINUX_BLKDEV_H #define _LINUX_BLKDEV_H -#include #include #include #include @@ -17,11 +16,14 @@ #include +struct scsi_ioctl_command; + struct request_queue; typedef struct request_queue request_queue_t; -struct elevator_s; -typedef struct elevator_s elevator_t; +struct elevator_queue; +typedef struct elevator_queue elevator_t; struct request_pm_state; +struct blk_trace; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ @@ -52,6 +54,33 @@ struct as_io_context { sector_t seek_mean; }; +struct cfq_queue; +struct cfq_io_context { + struct rb_node rb_node; + void *key; + + struct cfq_queue *cfqq[2]; + + struct io_context *ioc; + + unsigned long last_end_request; + sector_t last_request_pos; + unsigned long last_queue; + + unsigned long ttime_total; + unsigned long ttime_samples; + unsigned long ttime_mean; + + unsigned int seek_samples; + u64 seek_total; + sector_t seek_mean; + + struct list_head queue_list; + + void (*dtor)(struct io_context *); /* destructor */ + void (*exit)(struct io_context *); /* called on task exit */ +}; + /* * This is the per-process I/O subsystem state. It is refcounted and * kmalloc'ed. Currently all fields are modified in process io context @@ -59,7 +88,9 @@ struct as_io_context { */ struct io_context { atomic_t refcount; - pid_t pid; + struct task_struct *task; + + int (*set_ioprio)(struct io_context *, unsigned int); /* * For request batching @@ -68,16 +99,23 @@ struct io_context { int nr_batch_requests; /* Number of requests left in the batch */ struct as_io_context *aic; + struct rb_root cic_root; }; void put_io_context(struct io_context *ioc); void exit_io_context(void); -struct io_context *get_io_context(int gfp_flags); +struct io_context *current_io_context(gfp_t gfp_flags); +struct io_context *get_io_context(gfp_t gfp_flags); void copy_io_context(struct io_context **pdst, struct io_context **psrc); void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); +struct request; +typedef void (rq_end_io_fn)(struct request *, int); + struct request_list { int count[2]; + int starved[2]; + int elvpriv; mempool_t *rq_pool; wait_queue_head_t wait[2]; }; @@ -88,9 +126,9 @@ struct request_list { * try to put the fields that are referenced together in the same cacheline */ struct request { - struct list_head queuelist; /* looking for ->queue? you must _not_ - * access it directly, use - * blkdev_dequeue_request! */ + struct list_head queuelist; + struct list_head donelist; + unsigned long flags; /* see REQ_ bits below */ /* Maintain bio traversal state for part by part I/O submission. @@ -107,20 +145,15 @@ struct request { /* no. of sectors left to complete in the current segment */ unsigned int hard_cur_sectors; - /* no. of segments left to submit in the current bio */ - unsigned short nr_cbio_segments; - /* no. of sectors left to submit in the current bio */ - unsigned long nr_cbio_sectors; - - struct bio *cbio; /* next bio to submit */ - struct bio *bio; /* next unfinished bio to complete */ + struct bio *bio; struct bio *biotail; void *elevator_private; + void *completion_data; int rq_status; /* should split this into a few status bits */ - struct gendisk *rq_disk; int errors; + struct gendisk *rq_disk; unsigned long start_time; /* Number of scatter-gather DMA addr+len pairs after @@ -135,8 +168,9 @@ struct request { */ unsigned short nr_hw_segments; + unsigned short ioprio; + int tag; - char *buffer; int ref_count; request_queue_t *q; @@ -144,6 +178,7 @@ struct request { struct completion *waiting; void *special; + char *buffer; /* * when request is used as a packet command carrier @@ -152,17 +187,18 @@ struct request { unsigned char cmd[BLK_MAX_CDB]; unsigned int data_len; - void *data; - unsigned int sense_len; + void *data; void *sense; unsigned int timeout; + int retries; /* - * For Power Management requests + * completion callback. end_io_data should be folded in with waiting */ - struct request_pm_state *pm; + rq_end_io_fn *end_io; + void *end_io_data; }; /* @@ -171,13 +207,16 @@ struct request { enum rq_flag_bits { __REQ_RW, /* not set, read. set, write */ __REQ_FAILFAST, /* no low level driver retries */ + __REQ_SORTED, /* elevator knows about this request */ __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ __REQ_HARDBARRIER, /* may not be passed by drive either */ + __REQ_FUA, /* forced unit access */ __REQ_CMD, /* is a regular fs rw request */ __REQ_NOMERGE, /* don't touch this for merging */ __REQ_STARTED, /* drive already may have started this one */ __REQ_DONTPREP, /* don't call prep for this one */ __REQ_QUEUED, /* uses queueing */ + __REQ_ELVPRIV, /* elevator private data attached */ /* * for ATA/ATAPI devices */ @@ -195,18 +234,23 @@ enum rq_flag_bits { __REQ_PM_SUSPEND, /* suspend request */ __REQ_PM_RESUME, /* resume request */ __REQ_PM_SHUTDOWN, /* shutdown request */ + __REQ_ORDERED_COLOR, /* is before or after barrier */ + __REQ_RW_SYNC, /* request is sync (O_DIRECT) */ __REQ_NR_BITS, /* stops here */ }; #define REQ_RW (1 << __REQ_RW) #define REQ_FAILFAST (1 << __REQ_FAILFAST) +#define REQ_SORTED (1 << __REQ_SORTED) #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) +#define REQ_FUA (1 << __REQ_FUA) #define REQ_CMD (1 << __REQ_CMD) #define REQ_NOMERGE (1 << __REQ_NOMERGE) #define REQ_STARTED (1 << __REQ_STARTED) #define REQ_DONTPREP (1 << __REQ_DONTPREP) #define REQ_QUEUED (1 << __REQ_QUEUED) +#define REQ_ELVPRIV (1 << __REQ_ELVPRIV) #define REQ_PC (1 << __REQ_PC) #define REQ_BLOCK_PC (1 << __REQ_BLOCK_PC) #define REQ_SENSE (1 << __REQ_SENSE) @@ -220,6 +264,8 @@ enum rq_flag_bits { #define REQ_PM_SUSPEND (1 << __REQ_PM_SUSPEND) #define REQ_PM_RESUME (1 << __REQ_PM_RESUME) #define REQ_PM_SHUTDOWN (1 << __REQ_PM_SHUTDOWN) +#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) +#define REQ_RW_SYNC (1 << __REQ_RW_SYNC) /* * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME @@ -248,15 +294,15 @@ typedef void (unplug_fn) (request_queue_t *); struct bio_vec; typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *); typedef void (activity_fn) (void *data, int rw); +typedef int (issue_flush_fn) (request_queue_t *, struct gendisk *, sector_t *); +typedef void (prepare_flush_fn) (request_queue_t *, struct request *); +typedef void (softirq_done_fn)(struct request *); enum blk_queue_state { Queue_down, Queue_up, }; -#define BLK_TAGS_PER_LONG (sizeof(unsigned long) * 8) -#define BLK_TAGS_MASK (BLK_TAGS_PER_LONG - 1) - struct blk_queue_tag { struct request **tag_index; /* map of busy tags */ unsigned long *tag_map; /* bit map of free/busy tags */ @@ -274,7 +320,7 @@ struct request_queue */ struct list_head queue_head; struct request *last_merge; - elevator_t elevator; + elevator_t *elevator; /* * the queue request freelist, one for reads and one for writes @@ -290,6 +336,15 @@ struct request_queue unplug_fn *unplug_fn; merge_bvec_fn *merge_bvec_fn; activity_fn *activity_fn; + issue_flush_fn *issue_flush_fn; + prepare_flush_fn *prepare_flush_fn; + softirq_done_fn *softirq_done_fn; + + /* + * Dispatch queue sorting + */ + sector_t end_sector; + struct request *boundary_rq; /* * Auto-unplugging state @@ -313,7 +368,7 @@ struct request_queue * queue needs bounce pages for pages above this limit */ unsigned long bounce_pfn; - int bounce_gfp; + gfp_t bounce_gfp; /* * various queue flags, see QUEUE_* below @@ -321,8 +376,11 @@ struct request_queue unsigned long queue_flags; /* - * protects queue structures from reentrancy + * protects queue structures from reentrancy. ->__queue_lock should + * _never_ be used directly, it is queue private. always use + * ->queue_lock. */ + spinlock_t __queue_lock; spinlock_t *queue_lock; /* @@ -336,8 +394,10 @@ struct request_queue unsigned long nr_requests; /* Max # of requests */ unsigned int nr_congestion_on; unsigned int nr_congestion_off; + unsigned int nr_batching; - unsigned short max_sectors; + unsigned int max_sectors; + unsigned int max_hw_sectors; unsigned short max_phys_segments; unsigned short max_hw_segments; unsigned short hardsect_size; @@ -348,8 +408,7 @@ struct request_queue struct blk_queue_tag *queue_tags; - atomic_t refcnt; - + unsigned int nr_sorted; unsigned int in_flight; /* @@ -357,13 +416,24 @@ struct request_queue */ unsigned int sg_timeout; unsigned int sg_reserved_size; + int node; + + struct blk_trace *blk_trace; + + /* + * reserved for flush operations + */ + unsigned int ordered, next_ordered, ordseq; + int orderr, ordcolor; + struct request pre_flush_rq, bar_rq, post_flush_rq; + struct request *orig_bar_rq; + unsigned int bi_size; + + struct mutex sysfs_lock; }; #define RQ_INACTIVE (-1) #define RQ_ACTIVE 1 -#define RQ_SCSI_BUSY 0xffff -#define RQ_SCSI_DONE 0xfffe -#define RQ_SCSI_DISCONNECTING 0xffe0 #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ @@ -373,10 +443,52 @@ struct request_queue #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ +#define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ + +enum { + /* + * Hardbarrier is supported with one of the following methods. + * + * NONE : hardbarrier unsupported + * DRAIN : ordering by draining is enough + * DRAIN_FLUSH : ordering by draining w/ pre and post flushes + * DRAIN_FUA : ordering by draining w/ pre flush and FUA write + * TAG : ordering by tag is enough + * TAG_FLUSH : ordering by tag w/ pre and post flushes + * TAG_FUA : ordering by tag w/ pre flush and FUA write + */ + QUEUE_ORDERED_NONE = 0x00, + QUEUE_ORDERED_DRAIN = 0x01, + QUEUE_ORDERED_TAG = 0x02, + + QUEUE_ORDERED_PREFLUSH = 0x10, + QUEUE_ORDERED_POSTFLUSH = 0x20, + QUEUE_ORDERED_FUA = 0x40, + + QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | + QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, + QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | + QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, + QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | + QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, + QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | + QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, + + /* + * Ordered operation sequence + */ + QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ + QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ + QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ + QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ + QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ + QUEUE_ORDSEQ_DONE = 0x20, +}; #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) +#define blk_queue_flushing(q) ((q)->ordseq) #define blk_fs_request(rq) ((rq)->flags & REQ_CMD) #define blk_pc_request(rq) ((rq)->flags & REQ_BLOCK_PC) @@ -390,6 +502,10 @@ struct request_queue #define blk_pm_request(rq) \ ((rq)->flags & (REQ_PM_SUSPEND | REQ_PM_RESUME)) +#define blk_sorted_rq(rq) ((rq)->flags & REQ_SORTED) +#define blk_barrier_rq(rq) ((rq)->flags & REQ_HARDBARRIER) +#define blk_fua_rq(rq) ((rq)->flags & REQ_FUA) + #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) #define rq_data_dir(rq) ((rq)->flags & 1) @@ -433,32 +549,6 @@ static inline void blk_clear_queue_full(struct request_queue *q, int rw) */ #define blk_queue_headactive(q, head_active) -/* current index into bio being processed for submission */ -#define blk_rq_idx(rq) ((rq)->cbio->bi_vcnt - (rq)->nr_cbio_segments) - -/* current bio vector being processed */ -#define blk_rq_vec(rq) (bio_iovec_idx((rq)->cbio, blk_rq_idx(rq))) - -/* current offset with respect to start of the segment being submitted */ -#define blk_rq_offset(rq) \ - (((rq)->hard_cur_sectors - (rq)->current_nr_sectors) << 9) - -/* - * temporarily mapping a (possible) highmem bio (typically for PIO transfer) - */ - -/* Assumes rq->cbio != NULL */ -static inline char * rq_map_buffer(struct request *rq, unsigned long *flags) -{ - return (__bio_kmap_irq(rq->cbio, blk_rq_idx(rq), flags) - + blk_rq_offset(rq)); -} - -static inline void rq_unmap_buffer(char *buffer, unsigned long *flags) -{ - __bio_kunmap_irq(buffer, flags); -} - /* * q->prep_rq_fn return values */ @@ -506,26 +596,31 @@ extern void blk_unregister_queue(struct gendisk *disk); extern void register_disk(struct gendisk *dev); extern void generic_make_request(struct bio *bio); extern void blk_put_request(struct request *); -extern void blk_attempt_remerge(request_queue_t *, struct request *); -extern void __blk_attempt_remerge(request_queue_t *, struct request *); -extern struct request *blk_get_request(request_queue_t *, int, int); -extern void blk_put_request(struct request *); -extern void blk_insert_request(request_queue_t *, struct request *, int, void *, int); +extern void __blk_put_request(request_queue_t *, struct request *); +extern void blk_end_sync_rq(struct request *rq, int error); +extern struct request *blk_get_request(request_queue_t *, int, gfp_t); +extern void blk_insert_request(request_queue_t *, struct request *, int, void *); extern void blk_requeue_request(request_queue_t *, struct request *); extern void blk_plug_device(request_queue_t *); extern int blk_remove_plug(request_queue_t *); extern void blk_recount_segments(request_queue_t *, struct bio *); -extern int blk_phys_contig_segment(request_queue_t *q, struct bio *, struct bio *); -extern int blk_hw_contig_segment(request_queue_t *q, struct bio *, struct bio *); extern int scsi_cmd_ioctl(struct file *, struct gendisk *, unsigned int, void __user *); +extern int sg_scsi_ioctl(struct file *, struct request_queue *, + struct gendisk *, struct scsi_ioctl_command __user *); extern void blk_start_queue(request_queue_t *q); extern void blk_stop_queue(request_queue_t *q); +extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(request_queue_t *q); extern void blk_run_queue(request_queue_t *); extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); -extern struct request *blk_rq_map_user(request_queue_t *, int, void __user *, unsigned int); -extern int blk_rq_unmap_user(struct request *, struct bio *, unsigned int); -extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *); +extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int); +extern int blk_rq_unmap_user(struct bio *, unsigned int); +extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, gfp_t); +extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_iovec *, int); +extern int blk_execute_rq(request_queue_t *, struct gendisk *, + struct request *, int); +extern void blk_execute_rq_nowait(request_queue_t *, struct gendisk *, + struct request *, int, rq_end_io_fn *); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) { @@ -556,28 +651,58 @@ static inline void blk_run_address_space(struct address_space *mapping) */ extern int end_that_request_first(struct request *, int, int); extern int end_that_request_chunk(struct request *, int, int); -extern void end_that_request_last(struct request *); -extern int process_that_request_first(struct request *, unsigned int); +extern void end_that_request_last(struct request *, int); extern void end_request(struct request *req, int uptodate); +extern void blk_complete_request(struct request *); + +static inline int rq_all_done(struct request *rq, unsigned int nr_bytes) +{ + if (blk_fs_request(rq)) + return (nr_bytes >= (rq->hard_nr_sectors << 9)); + else if (blk_pc_request(rq)) + return nr_bytes >= rq->data_len; + + return 0; +} + +/* + * end_that_request_first/chunk() takes an uptodate argument. we account + * any value <= as an io error. 0 means -EIO for compatability reasons, + * any other < 0 value is the direct error type. An uptodate value of + * 1 indicates successful io completion + */ +#define end_io_error(uptodate) (unlikely((uptodate) <= 0)) static inline void blkdev_dequeue_request(struct request *req) { - BUG_ON(list_empty(&req->queuelist)); + elv_dequeue_request(req->q, req); +} - list_del_init(&req->queuelist); +/* + * This should be in elevator.h, but that requires pulling in rq and q + */ +static inline void elv_dispatch_add_tail(struct request_queue *q, + struct request *rq) +{ + if (q->last_merge == rq) + q->last_merge = NULL; + q->nr_sorted--; - if (req->rl) - elv_remove_request(req->q, req); + q->end_sector = rq_end_sector(rq); + q->boundary_rq = rq; + list_add_tail(&rq->queuelist, &q->queue_head); } /* * Access functions for manipulating queue properties */ +extern request_queue_t *blk_init_queue_node(request_fn_proc *rfn, + spinlock_t *lock, int node_id); extern request_queue_t *blk_init_queue(request_fn_proc *, spinlock_t *); extern void blk_cleanup_queue(request_queue_t *); extern void blk_queue_make_request(request_queue_t *, make_request_fn *); extern void blk_queue_bounce_limit(request_queue_t *, u64); -extern void blk_queue_max_sectors(request_queue_t *, unsigned short); +extern void blk_queue_max_sectors(request_queue_t *, unsigned int); extern void blk_queue_max_phys_segments(request_queue_t *, unsigned short); extern void blk_queue_max_hw_segments(request_queue_t *, unsigned short); extern void blk_queue_max_segment_size(request_queue_t *, unsigned int); @@ -587,7 +712,14 @@ extern void blk_queue_segment_boundary(request_queue_t *, unsigned long); extern void blk_queue_prep_rq(request_queue_t *, prep_rq_fn *pfn); extern void blk_queue_merge_bvec(request_queue_t *, merge_bvec_fn *); extern void blk_queue_dma_alignment(request_queue_t *, int); +extern void blk_queue_softirq_done(request_queue_t *, softirq_done_fn *); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); +extern int blk_queue_ordered(request_queue_t *, unsigned, prepare_flush_fn *); +extern void blk_queue_issue_flush_fn(request_queue_t *, issue_flush_fn *); +extern int blk_do_ordered(request_queue_t *, struct request **); +extern unsigned blk_ordered_cur_seq(request_queue_t *); +extern unsigned blk_ordered_req_seq(struct request *); +extern void blk_ordered_complete_seq(request_queue_t *, unsigned, int); extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); @@ -596,8 +728,9 @@ extern void __generic_unplug_device(request_queue_t *); extern long nr_blockdev_pages(void); int blk_get_queue(request_queue_t *); -request_queue_t *blk_alloc_queue(int); -#define blk_put_queue(q) blk_cleanup_queue((q)) +request_queue_t *blk_alloc_queue(gfp_t); +request_queue_t *blk_alloc_queue_node(gfp_t, int); +extern void blk_put_queue(request_queue_t *); /* * tag stuff @@ -615,18 +748,17 @@ extern void blk_queue_invalidate_tags(request_queue_t *); extern long blk_congestion_wait(int rw, long timeout); extern void blk_rq_bio_prep(request_queue_t *, struct request *, struct bio *); -extern void blk_rq_prep_restart(struct request *); +extern int blkdev_issue_flush(struct block_device *, sector_t *); #define MAX_PHYS_SEGMENTS 128 #define MAX_HW_SEGMENTS 128 -#define MAX_SECTORS 255 +#define SAFE_MAX_SECTORS 255 +#define BLK_DEF_MAX_SECTORS 1024 #define MAX_SEGMENT_SIZE 65536 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) -extern void drive_stat_acct(struct request *, int, int); - static inline int queue_hardsect_size(request_queue_t *q) { int retval = 512; @@ -671,7 +803,7 @@ static inline unsigned int blksize_bits(unsigned int size) return bits; } -extern inline unsigned int block_size(struct block_device *bdev) +static inline unsigned int block_size(struct block_device *bdev) { return bdev->bd_block_size; }