X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=drivers%2Fxen%2Fblkfront%2Fblkfront.c;fp=drivers%2Fxen%2Fblkfront%2Fblkfront.c;h=a911c766a70a61d7992edd1e33955d02582b6d31;hb=4e76c8a9fa413ccc09d3f7f664183dcce3555d57;hp=311b8398ed4d606958bb408462279d506176e4b3;hpb=1db395853d4f30d6120458bd279ede1f882a8525;p=linux-2.6.git diff --git a/drivers/xen/blkfront/blkfront.c b/drivers/xen/blkfront/blkfront.c index 311b8398e..a911c766a 100644 --- a/drivers/xen/blkfront/blkfront.c +++ b/drivers/xen/blkfront/blkfront.c @@ -8,9 +8,13 @@ * Copyright (c) 2004, Christian Limpach * Copyright (c) 2004, Andrew Warfield * Copyright (c) 2005, Christopher Clark + * Copyright (c) 2005, XenSource Ltd * - * This file may be distributed separately from the Linux kernel, or - * incorporated into other software packages, subject to the following license: + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without @@ -31,1450 +35,807 @@ * IN THE SOFTWARE. */ -#if 1 -#define ASSERT(_p) \ - if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \ - __LINE__, __FILE__); *(int*)0=0; } -#else -#define ASSERT(_p) -#endif - #include - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) #include "block.h" -#else -#include "common.h" -#include -#include -#endif - #include #include #include #include -#include -#include -#ifdef CONFIG_XEN_BLKDEV_GRANT -#include -#include -#endif - -typedef unsigned char byte; /* from linux/ide.h */ +#include +#include +#include +#include +#include -/* Control whether runtime update of vbds is enabled. */ -#define ENABLE_VBD_UPDATE 1 +#define BLKIF_STATE_DISCONNECTED 0 +#define BLKIF_STATE_CONNECTED 1 +#define BLKIF_STATE_SUSPENDED 2 -#if ENABLE_VBD_UPDATE -static void vbd_update(void); -#else -static void vbd_update(void){}; -#endif +#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ + (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) +#define GRANT_INVALID_REF 0 -#define BLKIF_STATE_CLOSED 0 -#define BLKIF_STATE_DISCONNECTED 1 -#define BLKIF_STATE_CONNECTED 2 +static void connect(struct blkfront_info *); +static void blkfront_closing(struct xenbus_device *); +static int blkfront_remove(struct xenbus_device *); +static int talk_to_backend(struct xenbus_device *, struct blkfront_info *); +static int setup_blkring(struct xenbus_device *, struct blkfront_info *); -#define WPRINTK(fmt, args...) printk(KERN_WARNING "xen_blk: " fmt, ##args) +static void kick_pending_request_queues(struct blkfront_info *); -static int blkif_handle = 0; -static unsigned int blkif_state = BLKIF_STATE_CLOSED; -static unsigned int blkif_evtchn = 0; -static unsigned int blkif_irq = 0; +static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs); +static void blkif_restart_queue(void *arg); +static void blkif_recover(struct blkfront_info *); +static void blkif_completion(struct blk_shadow *); +static void blkif_free(struct blkfront_info *, int); -static int blkif_control_rsp_valid; -static blkif_response_t blkif_control_rsp; -static blkif_front_ring_t blk_ring; +/** + * Entry point to this code when a new device is created. Allocate the basic + * structures and the ring buffer for communication with the backend, and + * inform the backend of the appropriate details for those. Switch to + * Initialised state. + */ +static int blkfront_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int err, vdevice, i; + struct blkfront_info *info; + + /* FIXME: Use dynamic device id if this is not set. */ + err = xenbus_scanf(XBT_NIL, dev->nodename, + "virtual-device", "%i", &vdevice); + if (err != 1) { + xenbus_dev_fatal(dev, err, "reading virtual-device"); + return err; + } + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); + return -ENOMEM; + } + + info->xbdev = dev; + info->vdevice = vdevice; + info->connected = BLKIF_STATE_DISCONNECTED; + INIT_WORK(&info->work, blkif_restart_queue, (void *)info); + + for (i = 0; i < BLK_RING_SIZE; i++) + info->shadow[i].req.id = i+1; + info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; + + /* Front end dir is a number, which is used as the id. */ + info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0); + dev->dev.driver_data = info; + + err = talk_to_backend(dev, info); + if (err) { + kfree(info); + dev->dev.driver_data = NULL; + return err; + } + + return 0; +} + + +/** + * We are reconnecting to the backend, due to a suspend/resume, or a backend + * driver restart. We tear down our blkif structure and recreate it, but + * leave the device-layer structures intact so that this is transparent to the + * rest of the kernel. + */ +static int blkfront_resume(struct xenbus_device *dev) +{ + struct blkfront_info *info = dev->dev.driver_data; + int err; + + DPRINTK("blkfront_resume: %s\n", dev->nodename); + + blkif_free(info, 1); + + err = talk_to_backend(dev, info); + if (!err) + blkif_recover(info); + + return err; +} + + +/* Common code used when first setting up, and when resuming. */ +static int talk_to_backend(struct xenbus_device *dev, + struct blkfront_info *info) +{ + const char *message = NULL; + struct xenbus_transaction xbt; + int err; + + /* Create shared ring, alloc event channel. */ + err = setup_blkring(dev, info); + if (err) + goto out; + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + goto destroy_blkring; + } + + err = xenbus_printf(xbt, dev->nodename, + "ring-ref","%u", info->ring_ref); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, dev->nodename, + "event-channel", "%u", info->evtchn); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + + err = xenbus_transaction_end(xbt, 0); + if (err) { + if (err == -EAGAIN) + goto again; + xenbus_dev_fatal(dev, err, "completing transaction"); + goto destroy_blkring; + } + + xenbus_switch_state(dev, XenbusStateInitialised); + + return 0; + + abort_transaction: + xenbus_transaction_end(xbt, 1); + if (message) + xenbus_dev_fatal(dev, err, "%s", message); + destroy_blkring: + blkif_free(info, 0); + out: + return err; +} -#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) -#ifdef CONFIG_XEN_BLKDEV_GRANT -static domid_t rdomid = 0; -static grant_ref_t gref_head, gref_terminal; -#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ - (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE) -#define GRANTREF_INVALID (1<<15) -#endif +static int setup_blkring(struct xenbus_device *dev, + struct blkfront_info *info) +{ + blkif_sring_t *sring; + int err; -static struct blk_shadow { - blkif_request_t req; - unsigned long request; - unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; -} blk_shadow[BLK_RING_SIZE]; -unsigned long blk_shadow_free; + info->ring_ref = GRANT_INVALID_REF; -static int recovery = 0; /* Recovery in progress: protected by blkif_io_lock */ + sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL); + if (!sring) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); + return -ENOMEM; + } + SHARED_RING_INIT(sring); + FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); -static void kick_pending_request_queues(void); + err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); + if (err < 0) { + free_page((unsigned long)sring); + info->ring.sring = NULL; + goto fail; + } + info->ring_ref = err; -int __init xlblk_init(void); + err = xenbus_alloc_evtchn(dev, &info->evtchn); + if (err) + goto fail; -static void blkif_completion(struct blk_shadow *s); + err = bind_evtchn_to_irqhandler( + info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info); + if (err <= 0) { + xenbus_dev_fatal(dev, err, + "bind_evtchn_to_irqhandler failed"); + goto fail; + } + info->irq = err; -static inline int GET_ID_FROM_FREELIST(void) -{ - unsigned long free = blk_shadow_free; - BUG_ON(free > BLK_RING_SIZE); - blk_shadow_free = blk_shadow[free].req.id; - blk_shadow[free].req.id = 0x0fffffee; /* debug */ - return free; + return 0; +fail: + blkif_free(info, 0); + return err; } -static inline void ADD_ID_TO_FREELIST(unsigned long id) + +/** + * Callback received when the backend's state changes. + */ +static void backend_changed(struct xenbus_device *dev, + enum xenbus_state backend_state) { - blk_shadow[id].req.id = blk_shadow_free; - blk_shadow[id].request = 0; - blk_shadow_free = id; -} + struct blkfront_info *info = dev->dev.driver_data; + struct block_device *bd; + DPRINTK("blkfront:backend_changed.\n"); -/************************ COMMON CODE (inlined) ************************/ + switch (backend_state) { + case XenbusStateUnknown: + case XenbusStateInitialising: + case XenbusStateInitWait: + case XenbusStateInitialised: + case XenbusStateClosed: + break; -/* Kernel-specific definitions used in the common code */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) -#define DISABLE_SCATTERGATHER() -#else -static int sg_operation = -1; -#define DISABLE_SCATTERGATHER() (sg_operation = -1) -#endif + case XenbusStateConnected: + connect(info); + break; -static inline void pickle_request(struct blk_shadow *s, blkif_request_t *r) -{ -#ifndef CONFIG_XEN_BLKDEV_GRANT - int i; -#endif + case XenbusStateClosing: + bd = bdget(info->dev); + if (bd == NULL) + xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); - s->req = *r; - -#ifndef CONFIG_XEN_BLKDEV_GRANT - for ( i = 0; i < r->nr_segments; i++ ) - s->req.frame_and_sects[i] = machine_to_phys(r->frame_and_sects[i]); -#endif + mutex_lock(&bd->bd_mutex); + if (info->users > 0) + xenbus_dev_error(dev, -EBUSY, + "Device in use; refusing to close"); + else + blkfront_closing(dev); + mutex_unlock(&bd->bd_mutex); + bdput(bd); + break; + } } -static inline void unpickle_request(blkif_request_t *r, struct blk_shadow *s) -{ -#ifndef CONFIG_XEN_BLKDEV_GRANT - int i; -#endif - - *r = s->req; -#ifndef CONFIG_XEN_BLKDEV_GRANT - for ( i = 0; i < s->req.nr_segments; i++ ) - r->frame_and_sects[i] = phys_to_machine(s->req.frame_and_sects[i]); -#endif -} +/* ** Connection ** */ -static inline void flush_requests(void) +/* + * Invoked when the backend is finally 'ready' (and has told produced + * the details about the physical device - #sectors, size, etc). + */ +static void connect(struct blkfront_info *info) +{ + unsigned long sectors, sector_size; + unsigned int binfo; + int err; + + if ((info->connected == BLKIF_STATE_CONNECTED) || + (info->connected == BLKIF_STATE_SUSPENDED) ) + return; + + DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend); + + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, + "sectors", "%lu", §ors, + "info", "%u", &binfo, + "sector-size", "%lu", §or_size, + NULL); + if (err) { + xenbus_dev_fatal(info->xbdev, err, + "reading backend fields at %s", + info->xbdev->otherend); + return; + } + + err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info); + if (err) { + xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", + info->xbdev->otherend); + return; + } + + (void)xenbus_switch_state(info->xbdev, XenbusStateConnected); + + /* Kick pending requests. */ + spin_lock_irq(&blkif_io_lock); + info->connected = BLKIF_STATE_CONNECTED; + kick_pending_request_queues(info); + spin_unlock_irq(&blkif_io_lock); + + add_disk(info->gd); +} + +/** + * Handle the change of state of the backend to Closing. We must delete our + * device-layer structures now, to ensure that writes are flushed through to + * the backend. Once is this done, we can switch to Closed in + * acknowledgement. + */ +static void blkfront_closing(struct xenbus_device *dev) { - DISABLE_SCATTERGATHER(); - RING_PUSH_REQUESTS(&blk_ring); - notify_via_evtchn(blkif_evtchn); -} + struct blkfront_info *info = dev->dev.driver_data; + unsigned long flags; + DPRINTK("blkfront_closing: %s removed\n", dev->nodename); -/************************** KERNEL VERSION 2.6 **************************/ + if (info->rq == NULL) + return; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) + spin_lock_irqsave(&blkif_io_lock, flags); + /* No more blkif_request(). */ + blk_stop_queue(info->rq); + /* No more gnttab callback work. */ + gnttab_cancel_free_callback(&info->callback); + flush_scheduled_work(); + spin_unlock_irqrestore(&blkif_io_lock, flags); -module_init(xlblk_init); + xlvbd_del(info); -#if ENABLE_VBD_UPDATE -static void update_vbds_task(void *unused) -{ - xlvbd_update_vbds(); + xenbus_switch_state(dev, XenbusStateClosed); } -static void vbd_update(void) -{ - static DECLARE_WORK(update_tq, update_vbds_task, NULL); - schedule_work(&update_tq); -} -#endif /* ENABLE_VBD_UPDATE */ -static void kick_pending_request_queues(void) +static int blkfront_remove(struct xenbus_device *dev) { - if ( (xlbd_blk_queue != NULL) && - test_bit(QUEUE_FLAG_STOPPED, &xlbd_blk_queue->queue_flags) ) - { - blk_start_queue(xlbd_blk_queue); - /* XXXcl call to request_fn should not be needed but - * we get stuck without... needs investigating - */ - xlbd_blk_queue->request_fn(xlbd_blk_queue); - } -} + struct blkfront_info *info = dev->dev.driver_data; + DPRINTK("blkfront_remove: %s removed\n", dev->nodename); -int blkif_open(struct inode *inode, struct file *filep) -{ - struct gendisk *gd = inode->i_bdev->bd_disk; - struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data; + blkif_free(info, 0); - /* Update of usage count is protected by per-device semaphore. */ - di->mi->usage++; - - return 0; -} + kfree(info); - -int blkif_release(struct inode *inode, struct file *filep) -{ - struct gendisk *gd = inode->i_bdev->bd_disk; - struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data; - - /* - * When usage drops to zero it may allow more VBD updates to occur. - * Update of usage count is protected by a per-device semaphore. - */ - if ( --di->mi->usage == 0 ) - vbd_update(); - - return 0; + return 0; } -int blkif_ioctl(struct inode *inode, struct file *filep, - unsigned command, unsigned long argument) +static inline int GET_ID_FROM_FREELIST( + struct blkfront_info *info) { - int i; - - DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", - command, (long)argument, inode->i_rdev); - - switch ( command ) - { - case HDIO_GETGEO: - /* return ENOSYS to use defaults */ - return -ENOSYS; - - case CDROMMULTISESSION: - DPRINTK("FIXME: support multisession CDs later\n"); - for ( i = 0; i < sizeof(struct cdrom_multisession); i++ ) - if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT; - return 0; - - default: - printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", - command); - return -ENOSYS; - } - - return 0; + unsigned long free = info->shadow_free; + BUG_ON(free > BLK_RING_SIZE); + info->shadow_free = info->shadow[free].req.id; + info->shadow[free].req.id = 0x0fffffee; /* debug */ + return free; } - -/* - * blkif_queue_request - * - * request block io - * - * id: for guest use only. - * operation: BLKIF_OP_{READ,WRITE,PROBE} - * buffer: buffer to read/write into. this should be a - * virtual address in the guest os. - */ -static int blkif_queue_request(struct request *req) +static inline void ADD_ID_TO_FREELIST( + struct blkfront_info *info, unsigned long id) { - struct xlbd_disk_info *di = - (struct xlbd_disk_info *)req->rq_disk->private_data; - unsigned long buffer_ma; - blkif_request_t *ring_req; - struct bio *bio; - struct bio_vec *bvec; - int idx; - unsigned long id; - unsigned int fsect, lsect; -#ifdef CONFIG_XEN_BLKDEV_GRANT - int ref; -#endif - - if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) ) - return 1; - - /* Fill out a communications ring structure. */ - ring_req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt); - id = GET_ID_FROM_FREELIST(); - blk_shadow[id].request = (unsigned long)req; - - ring_req->id = id; - ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : - BLKIF_OP_READ; - ring_req->sector_number = (blkif_sector_t)req->sector; - ring_req->device = di->xd_device; - - ring_req->nr_segments = 0; - rq_for_each_bio(bio, req) - { - bio_for_each_segment(bvec, bio, idx) - { - if ( ring_req->nr_segments == BLKIF_MAX_SEGMENTS_PER_REQUEST ) - BUG(); - buffer_ma = page_to_phys(bvec->bv_page); - fsect = bvec->bv_offset >> 9; - lsect = fsect + (bvec->bv_len >> 9) - 1; -#ifdef CONFIG_XEN_BLKDEV_GRANT - /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); - ASSERT( ref != -ENOSPC ); - - gnttab_grant_foreign_access_ref( - ref, - rdomid, - buffer_ma >> PAGE_SHIFT, - rq_data_dir(req) ); - - blk_shadow[id].frame[ring_req->nr_segments] = - buffer_ma >> PAGE_SHIFT; - - ring_req->frame_and_sects[ring_req->nr_segments++] = - (((u32) ref) << 16) | (fsect << 3) | lsect; - -#else - ring_req->frame_and_sects[ring_req->nr_segments++] = - buffer_ma | (fsect << 3) | lsect; -#endif - } - } - - blk_ring.req_prod_pvt++; - - /* Keep a private copy so we can reissue requests when recovering. */ - pickle_request(&blk_shadow[id], ring_req); - - return 0; + info->shadow[id].req.id = info->shadow_free; + info->shadow[id].request = 0; + info->shadow_free = id; } - -/* - * do_blkif_request - * read a block; request is in a request queue - */ -void do_blkif_request(request_queue_t *rq) +static inline void flush_requests(struct blkfront_info *info) { - struct request *req; - int queued; - - DPRINTK("Entered do_blkif_request\n"); - - queued = 0; - - while ( (req = elv_next_request(rq)) != NULL ) - { - if ( !blk_fs_request(req) ) - { - end_request(req, 0); - continue; - } - - if ( RING_FULL(&blk_ring) ) - { - blk_stop_queue(rq); - break; - } - - DPRINTK("do_blk_req %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n", - req, req->cmd, req->sector, req->current_nr_sectors, - req->nr_sectors, req->buffer, - rq_data_dir(req) ? "write" : "read"); - - blkdev_dequeue_request(req); - if ( blkif_queue_request(req) ) - { - blk_stop_queue(rq); - break; - } - - queued++; - } - - if ( queued != 0 ) - flush_requests(); -} + int notify; + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); -static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) -{ - struct request *req; - blkif_response_t *bret; - RING_IDX i, rp; - unsigned long flags; - - spin_lock_irqsave(&blkif_io_lock, flags); - - if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) || - unlikely(recovery) ) - { - spin_unlock_irqrestore(&blkif_io_lock, flags); - return IRQ_HANDLED; - } - - rp = blk_ring.sring->rsp_prod; - rmb(); /* Ensure we see queued responses up to 'rp'. */ - - for ( i = blk_ring.rsp_cons; i != rp; i++ ) - { - unsigned long id; - - bret = RING_GET_RESPONSE(&blk_ring, i); - id = bret->id; - req = (struct request *)blk_shadow[id].request; - - blkif_completion(&blk_shadow[id]); - - ADD_ID_TO_FREELIST(id); - - switch ( bret->operation ) - { - case BLKIF_OP_READ: - case BLKIF_OP_WRITE: - if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) - DPRINTK("Bad return from blkdev data request: %x\n", - bret->status); - - if ( unlikely(end_that_request_first - (req, - (bret->status == BLKIF_RSP_OKAY), - req->hard_nr_sectors)) ) - BUG(); - end_that_request_last(req); - - break; - case BLKIF_OP_PROBE: - memcpy(&blkif_control_rsp, bret, sizeof(*bret)); - blkif_control_rsp_valid = 1; - break; - default: - BUG(); - } - } - - blk_ring.rsp_cons = i; - - kick_pending_request_queues(); - - spin_unlock_irqrestore(&blkif_io_lock, flags); - - return IRQ_HANDLED; + if (notify) + notify_remote_via_irq(info->irq); } -#else -/************************** KERNEL VERSION 2.4 **************************/ - -static kdev_t sg_dev; -static unsigned long sg_next_sect; - -/* - * Request queues with outstanding work, but ring is currently full. - * We need no special lock here, as we always access this with the - * blkif_io_lock held. We only need a small maximum list. - */ -#define MAX_PENDING 8 -static request_queue_t *pending_queues[MAX_PENDING]; -static int nr_pending; - - -#define blkif_io_lock io_request_lock - -/*============================================================================*/ -#if ENABLE_VBD_UPDATE - -/* - * blkif_update_int/update-vbds_task - handle VBD update events. - * Schedule a task for keventd to run, which will update the VBDs and perform - * the corresponding updates to our view of VBD state. - */ -static void update_vbds_task(void *unused) -{ - xlvbd_update_vbds(); +static void kick_pending_request_queues(struct blkfront_info *info) +{ + if (!RING_FULL(&info->ring)) { + /* Re-enable calldowns. */ + blk_start_queue(info->rq); + /* Kick things off immediately. */ + do_blkif_request(info->rq); + } } -static void vbd_update(void) +static void blkif_restart_queue(void *arg) { - static struct tq_struct update_tq; - update_tq.routine = update_vbds_task; - schedule_task(&update_tq); + struct blkfront_info *info = (struct blkfront_info *)arg; + spin_lock_irq(&blkif_io_lock); + if (info->connected == BLKIF_STATE_CONNECTED) + kick_pending_request_queues(info); + spin_unlock_irq(&blkif_io_lock); } -#endif /* ENABLE_VBD_UPDATE */ -/*============================================================================*/ - -static void kick_pending_request_queues(void) +static void blkif_restart_queue_callback(void *arg) { - /* We kick pending request queues if the ring is reasonably empty. */ - if ( (nr_pending != 0) && - (RING_PENDING_REQUESTS(&blk_ring) < (BLK_RING_SIZE >> 1)) ) - { - /* Attempt to drain the queue, but bail if the ring becomes full. */ - while ( (nr_pending != 0) && !RING_FULL(&blk_ring) ) - do_blkif_request(pending_queues[--nr_pending]); - } + struct blkfront_info *info = (struct blkfront_info *)arg; + schedule_work(&info->work); } int blkif_open(struct inode *inode, struct file *filep) { - short xldev = inode->i_rdev; - struct gendisk *gd = get_gendisk(xldev); - xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); - short minor = MINOR(xldev); - - if ( gd->part[minor].nr_sects == 0 ) - { - /* - * Device either doesn't exist, or has zero capacity; we use a few - * cheesy heuristics to return the relevant error code - */ - if ( (gd->sizes[minor >> gd->minor_shift] != 0) || - ((minor & (gd->max_p - 1)) != 0) ) - { - /* - * We have a real device, but no such partition, or we just have a - * partition number so guess this is the problem. - */ - return -ENXIO; /* no such device or address */ - } - else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE ) - { - /* This is a removable device => assume that media is missing. */ - return -ENOMEDIUM; /* media not present (this is a guess) */ - } - else - { - /* Just go for the general 'no such device' error. */ - return -ENODEV; /* no such device */ - } - } - - /* Update of usage count is protected by per-device semaphore. */ - disk->usage++; - - return 0; + struct blkfront_info *info = inode->i_bdev->bd_disk->private_data; + info->users++; + return 0; } int blkif_release(struct inode *inode, struct file *filep) { - xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); + struct blkfront_info *info = inode->i_bdev->bd_disk->private_data; + info->users--; + if (info->users == 0) { + /* Check whether we have been instructed to close. We will + have ignored this request initially, as the device was + still mounted. */ + struct xenbus_device * dev = info->xbdev; + enum xenbus_state state = xenbus_read_driver_state(dev->otherend); - /* - * When usage drops to zero it may allow more VBD updates to occur. - * Update of usage count is protected by a per-device semaphore. - */ - if ( --disk->usage == 0 ) { - vbd_update(); - } - - return 0; + if (state == XenbusStateClosing) + blkfront_closing(dev); + } + return 0; } int blkif_ioctl(struct inode *inode, struct file *filep, - unsigned command, unsigned long argument) + unsigned command, unsigned long argument) { - kdev_t dev = inode->i_rdev; - struct hd_geometry *geo = (struct hd_geometry *)argument; - struct gendisk *gd; - struct hd_struct *part; - int i; - unsigned short cylinders; - byte heads, sectors; - - /* NB. No need to check permissions. That is done for us. */ - - DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", - command, (long) argument, dev); - - gd = get_gendisk(dev); - part = &gd->part[MINOR(dev)]; - - switch ( command ) - { - case BLKGETSIZE: - DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); - return put_user(part->nr_sects, (unsigned long *) argument); - - case BLKGETSIZE64: - DPRINTK_IOCTL(" BLKGETSIZE64: %x %llx\n", BLKGETSIZE64, - (u64)part->nr_sects * 512); - return put_user((u64)part->nr_sects * 512, (u64 *) argument); - - case BLKRRPART: /* re-read partition table */ - DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART); - return blkif_revalidate(dev); - - case BLKSSZGET: - return hardsect_size[MAJOR(dev)][MINOR(dev)]; - - case BLKBSZGET: /* get block size */ - DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET); - break; - - case BLKBSZSET: /* set block size */ - DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET); - break; - - case BLKRASET: /* set read-ahead */ - DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET); - break; - - case BLKRAGET: /* get read-ahead */ - DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET); - break; - - case HDIO_GETGEO: - DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO); - if (!argument) return -EINVAL; - - /* We don't have real geometry info, but let's at least return - values consistent with the size of the device */ - - heads = 0xff; - sectors = 0x3f; - cylinders = part->nr_sects / (heads * sectors); - - if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; - if (put_user(heads, (byte *)&geo->heads)) return -EFAULT; - if (put_user(sectors, (byte *)&geo->sectors)) return -EFAULT; - if (put_user(cylinders, (unsigned short *)&geo->cylinders)) return -EFAULT; - - return 0; - - case HDIO_GETGEO_BIG: - DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); - if (!argument) return -EINVAL; - - /* We don't have real geometry info, but let's at least return - values consistent with the size of the device */ - - heads = 0xff; - sectors = 0x3f; - cylinders = part->nr_sects / (heads * sectors); - - if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; - if (put_user(heads, (byte *)&geo->heads)) return -EFAULT; - if (put_user(sectors, (byte *)&geo->sectors)) return -EFAULT; - if (put_user(cylinders, (unsigned int *) &geo->cylinders)) return -EFAULT; - - return 0; - - case CDROMMULTISESSION: - DPRINTK("FIXME: support multisession CDs later\n"); - for ( i = 0; i < sizeof(struct cdrom_multisession); i++ ) - if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT; - return 0; - - case SCSI_IOCTL_GET_BUS_NUMBER: - DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in XL blkif"); - return -ENOSYS; - - default: - printk(KERN_ALERT "ioctl %08x not supported by XL blkif\n", command); - return -ENOSYS; - } - - return 0; -} + int i; + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", + command, (long)argument, inode->i_rdev); + switch (command) { + case CDROMMULTISESSION: + DPRINTK("FIXME: support multisession CDs later\n"); + for (i = 0; i < sizeof(struct cdrom_multisession); i++) + if (put_user(0, (char __user *)(argument + i))) + return -EFAULT; + return 0; -/* check media change: should probably do something here in some cases :-) */ -int blkif_check(kdev_t dev) -{ - DPRINTK("blkif_check\n"); - return 0; + default: + /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", + command);*/ + return -EINVAL; /* same return as native Linux */ + } + + return 0; } -int blkif_revalidate(kdev_t dev) + +int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) { - struct block_device *bd; - struct gendisk *gd; - xl_disk_t *disk; - unsigned long capacity; - int i, rc = 0; - - if ( (bd = bdget(dev)) == NULL ) - return -EINVAL; - - /* - * Update of partition info, and check of usage count, is protected - * by the per-block-device semaphore. - */ - down(&bd->bd_sem); - - if ( ((gd = get_gendisk(dev)) == NULL) || - ((disk = xldev_to_xldisk(dev)) == NULL) || - ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) ) - { - rc = -EINVAL; - goto out; - } - - if ( disk->usage > 1 ) - { - rc = -EBUSY; - goto out; - } - - /* Only reread partition table if VBDs aren't mapped to partitions. */ - if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) ) - { - for ( i = gd->max_p - 1; i >= 0; i-- ) - { - invalidate_device(dev+i, 1); - gd->part[MINOR(dev+i)].start_sect = 0; - gd->part[MINOR(dev+i)].nr_sects = 0; - gd->sizes[MINOR(dev+i)] = 0; - } - - grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity); - } + /* We don't have real geometry info, but let's at least return + values consistent with the size of the device */ + sector_t nsect = get_capacity(bd->bd_disk); + sector_t cylinders = nsect; - out: - up(&bd->bd_sem); - bdput(bd); - return rc; + hg->heads = 0xff; + hg->sectors = 0x3f; + sector_div(cylinders, hg->heads * hg->sectors); + hg->cylinders = cylinders; + if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect) + hg->cylinders = 0xffff; + return 0; } /* * blkif_queue_request * - * request block io - * + * request block io + * * id: for guest use only. * operation: BLKIF_OP_{READ,WRITE,PROBE} * buffer: buffer to read/write into. this should be a * virtual address in the guest os. */ -static int blkif_queue_request(unsigned long id, - int operation, - char * buffer, - unsigned long sector_number, - unsigned short nr_sectors, - kdev_t device) +static int blkif_queue_request(struct request *req) { - unsigned long buffer_ma = virt_to_bus(buffer); - unsigned long xid; - struct gendisk *gd; - blkif_request_t *req; - struct buffer_head *bh; - unsigned int fsect, lsect; -#ifdef CONFIG_XEN_BLKDEV_GRANT - int ref; -#endif - - fsect = (buffer_ma & ~PAGE_MASK) >> 9; - lsect = fsect + nr_sectors - 1; - - /* Buffer must be sector-aligned. Extent mustn't cross a page boundary. */ - if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) ) - BUG(); - if ( lsect > 7 ) - BUG(); - - buffer_ma &= PAGE_MASK; - - if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) ) - return 1; - - switch ( operation ) - { - - case BLKIF_OP_READ: - case BLKIF_OP_WRITE: - gd = get_gendisk(device); - - /* - * Update the sector_number we'll pass down as appropriate; note that - * we could sanity check that resulting sector will be in this - * partition, but this will happen in driver backend anyhow. - */ - sector_number += gd->part[MINOR(device)].start_sect; - - /* - * If this unit doesn't consist of virtual partitions then we clear - * the partn bits from the device number. - */ - if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & - GENHD_FL_VIRT_PARTNS) ) - device &= ~(gd->max_p - 1); - - if ( (sg_operation == operation) && - (sg_dev == device) && - (sg_next_sect == sector_number) ) - { - req = RING_GET_REQUEST(&blk_ring, - blk_ring.req_prod_pvt - 1); - bh = (struct buffer_head *)id; - - bh->b_reqnext = (struct buffer_head *)blk_shadow[req->id].request; - blk_shadow[req->id].request = (unsigned long)id; - -#ifdef CONFIG_XEN_BLKDEV_GRANT - /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); - ASSERT( ref != -ENOSPC ); - - gnttab_grant_foreign_access_ref( - ref, - rdomid, - buffer_ma >> PAGE_SHIFT, - ( operation == BLKIF_OP_WRITE ? 1 : 0 ) ); - - blk_shadow[id].frame[req->nr_segments] = - buffer_ma >> PAGE_SHIFT; - - req->frame_and_sects[req->nr_segments] = - (((u32) ref ) << 16) | (fsect << 3) | lsect; -#else - req->frame_and_sects[req->nr_segments] = - buffer_ma | (fsect << 3) | lsect; -#endif - if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST ) - sg_next_sect += nr_sectors; - else - DISABLE_SCATTERGATHER(); - - /* Update the copy of the request in the recovery ring. */ - pickle_request(&blk_shadow[req->id], req ); - - return 0; - } - else if ( RING_FULL(&blk_ring) ) - { - return 1; - } - else - { - sg_operation = operation; - sg_dev = device; - sg_next_sect = sector_number + nr_sectors; - } - break; - - default: - panic("unknown op %d\n", operation); - } - - /* Fill out a communications ring structure. */ - req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt); - - xid = GET_ID_FROM_FREELIST(); - blk_shadow[xid].request = (unsigned long)id; - - req->id = xid; - req->operation = operation; - req->sector_number = (blkif_sector_t)sector_number; - req->device = device; - req->nr_segments = 1; -#ifdef CONFIG_XEN_BLKDEV_GRANT - /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); - ASSERT( ref != -ENOSPC ); - - gnttab_grant_foreign_access_ref( - ref, - rdomid, - buffer_ma >> PAGE_SHIFT, - ( operation == BLKIF_OP_WRITE ? 1 : 0 ) ); - - blk_shadow[xid].frame[0] = buffer_ma >> PAGE_SHIFT; - - req->frame_and_sects[0] = (((u32) ref)<<16) | (fsect<<3) | lsect; -#else - req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect; -#endif - - /* Keep a private copy so we can reissue requests when recovering. */ - pickle_request(&blk_shadow[xid], req); - - blk_ring.req_prod_pvt++; - - return 0; + struct blkfront_info *info = req->rq_disk->private_data; + unsigned long buffer_mfn; + blkif_request_t *ring_req; + struct bio *bio; + struct bio_vec *bvec; + int idx; + unsigned long id; + unsigned int fsect, lsect; + int ref; + grant_ref_t gref_head; + + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) + return 1; + + if (gnttab_alloc_grant_references( + BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { + gnttab_request_free_callback( + &info->callback, + blkif_restart_queue_callback, + info, + BLKIF_MAX_SEGMENTS_PER_REQUEST); + return 1; + } + + /* Fill out a communications ring structure. */ + ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); + id = GET_ID_FROM_FREELIST(info); + info->shadow[id].request = (unsigned long)req; + + ring_req->id = id; + ring_req->operation = rq_data_dir(req) ? + BLKIF_OP_WRITE : BLKIF_OP_READ; + ring_req->sector_number = (blkif_sector_t)req->sector; + ring_req->handle = info->handle; + + ring_req->nr_segments = 0; + rq_for_each_bio (bio, req) { + bio_for_each_segment (bvec, bio, idx) { + BUG_ON(ring_req->nr_segments + == BLKIF_MAX_SEGMENTS_PER_REQUEST); + buffer_mfn = page_to_phys(bvec->bv_page) >> PAGE_SHIFT; + fsect = bvec->bv_offset >> 9; + lsect = fsect + (bvec->bv_len >> 9) - 1; + /* install a grant reference. */ + ref = gnttab_claim_grant_reference(&gref_head); + BUG_ON(ref == -ENOSPC); + + gnttab_grant_foreign_access_ref( + ref, + info->xbdev->otherend_id, + buffer_mfn, + rq_data_dir(req) ); + + info->shadow[id].frame[ring_req->nr_segments] = + mfn_to_pfn(buffer_mfn); + + ring_req->seg[ring_req->nr_segments] = + (struct blkif_request_segment) { + .gref = ref, + .first_sect = fsect, + .last_sect = lsect }; + + ring_req->nr_segments++; + } + } + + info->ring.req_prod_pvt++; + + /* Keep a private copy so we can reissue requests when recovering. */ + info->shadow[id].req = *ring_req; + + gnttab_free_grant_references(gref_head); + + return 0; } - /* * do_blkif_request * read a block; request is in a request queue */ void do_blkif_request(request_queue_t *rq) { - struct request *req; - struct buffer_head *bh, *next_bh; - int rw, nsect, full, queued = 0; - - DPRINTK("Entered do_blkif_request\n"); - - while ( !rq->plugged && !list_empty(&rq->queue_head)) - { - if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) - goto out; - - DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", - req, req->cmd, req->sector, - req->current_nr_sectors, req->nr_sectors, req->bh); - - rw = req->cmd; - if ( rw == READA ) - rw = READ; - if ( unlikely((rw != READ) && (rw != WRITE)) ) - panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw); - - req->errors = 0; - - bh = req->bh; - while ( bh != NULL ) - { - next_bh = bh->b_reqnext; - bh->b_reqnext = NULL; - - full = blkif_queue_request( - (unsigned long)bh, - (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE, - bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev); - - if ( full ) - { - bh->b_reqnext = next_bh; - pending_queues[nr_pending++] = rq; - if ( unlikely(nr_pending >= MAX_PENDING) ) - BUG(); - goto out; - } - - queued++; - - /* Dequeue the buffer head from the request. */ - nsect = bh->b_size >> 9; - bh = req->bh = next_bh; - - if ( bh != NULL ) - { - /* There's another buffer head to do. Update the request. */ - req->hard_sector += nsect; - req->hard_nr_sectors -= nsect; - req->sector = req->hard_sector; - req->nr_sectors = req->hard_nr_sectors; - req->current_nr_sectors = bh->b_size >> 9; - req->buffer = bh->b_data; - } - else - { - /* That was the last buffer head. Finalise the request. */ - if ( unlikely(end_that_request_first(req, 1, "XenBlk")) ) - BUG(); - blkdev_dequeue_request(req); - end_that_request_last(req); - } - } - } + struct blkfront_info *info = NULL; + struct request *req; + int queued; - out: - if ( queued != 0 ) - flush_requests(); -} + DPRINTK("Entered do_blkif_request\n"); + queued = 0; -static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) -{ - RING_IDX i, rp; - unsigned long flags; - struct buffer_head *bh, *next_bh; - - spin_lock_irqsave(&io_request_lock, flags); - - if ( unlikely(blkif_state == BLKIF_STATE_CLOSED || recovery) ) - { - spin_unlock_irqrestore(&io_request_lock, flags); - return; - } - - rp = blk_ring.sring->rsp_prod; - rmb(); /* Ensure we see queued responses up to 'rp'. */ - - for ( i = blk_ring.rsp_cons; i != rp; i++ ) - { - unsigned long id; - blkif_response_t *bret; - - bret = RING_GET_RESPONSE(&blk_ring, i); - id = bret->id; - bh = (struct buffer_head *)blk_shadow[id].request; - - blkif_completion(&blk_shadow[id]); - - ADD_ID_TO_FREELIST(id); - - switch ( bret->operation ) - { - case BLKIF_OP_READ: - case BLKIF_OP_WRITE: - if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) - DPRINTK("Bad return from blkdev data request: %lx\n", - bret->status); - for ( ; bh != NULL; bh = next_bh ) - { - next_bh = bh->b_reqnext; - bh->b_reqnext = NULL; - bh->b_end_io(bh, bret->status == BLKIF_RSP_OKAY); - } - - break; - case BLKIF_OP_PROBE: - memcpy(&blkif_control_rsp, bret, sizeof(*bret)); - blkif_control_rsp_valid = 1; - break; - default: - BUG(); - } - - } - blk_ring.rsp_cons = i; - - kick_pending_request_queues(); - - spin_unlock_irqrestore(&io_request_lock, flags); -} + while ((req = elv_next_request(rq)) != NULL) { + info = req->rq_disk->private_data; + if (!blk_fs_request(req)) { + end_request(req, 0); + continue; + } -#endif + if (RING_FULL(&info->ring)) + goto wait; -/***************************** COMMON CODE *******************************/ + DPRINTK("do_blk_req %p: cmd %p, sec %lx, " + "(%u/%li) buffer:%p [%s]\n", + req, req->cmd, req->sector, req->current_nr_sectors, + req->nr_sectors, req->buffer, + rq_data_dir(req) ? "write" : "read"); -#ifdef CONFIG_XEN_BLKDEV_GRANT -void blkif_control_probe_send(blkif_request_t *req, blkif_response_t *rsp, - unsigned long address) -{ - int ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); - ASSERT( ref != -ENOSPC ); - gnttab_grant_foreign_access_ref( ref, rdomid, address >> PAGE_SHIFT, 0 ); + blkdev_dequeue_request(req); + if (blkif_queue_request(req)) { + blk_requeue_request(rq, req); + wait: + /* Avoid pointless unplugs. */ + blk_stop_queue(rq); + break; + } - req->frame_and_sects[0] = (((u32) ref) << 16) | 7; + queued++; + } - blkif_control_send(req, rsp); + if (queued != 0) + flush_requests(info); } -#endif -void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp) -{ - unsigned long flags, id; - blkif_request_t *req_d; - - retry: - while ( RING_FULL(&blk_ring) ) - { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - } - - spin_lock_irqsave(&blkif_io_lock, flags); - if ( RING_FULL(&blk_ring) ) - { - spin_unlock_irqrestore(&blkif_io_lock, flags); - goto retry; - } - - DISABLE_SCATTERGATHER(); - req_d = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt); - *req_d = *req; - - id = GET_ID_FROM_FREELIST(); - req_d->id = id; - blk_shadow[id].request = (unsigned long)req; - - pickle_request(&blk_shadow[id], req); - - blk_ring.req_prod_pvt++; - flush_requests(); - - spin_unlock_irqrestore(&blkif_io_lock, flags); - - while ( !blkif_control_rsp_valid ) - { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - } - - memcpy(rsp, &blkif_control_rsp, sizeof(*rsp)); - blkif_control_rsp_valid = 0; -} - -/* Send a driver status notification to the domain controller. */ -static void send_driver_status(int ok) +static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) { - ctrl_msg_t cmsg = { - .type = CMSG_BLKIF_FE, - .subtype = CMSG_BLKIF_FE_DRIVER_STATUS, - .length = sizeof(blkif_fe_driver_status_t), - }; - blkif_fe_driver_status_t *msg = (void*)cmsg.msg; - - msg->status = (ok ? BLKIF_DRIVER_STATUS_UP : BLKIF_DRIVER_STATUS_DOWN); - - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); -} + struct request *req; + blkif_response_t *bret; + RING_IDX i, rp; + unsigned long flags; + struct blkfront_info *info = (struct blkfront_info *)dev_id; + + spin_lock_irqsave(&blkif_io_lock, flags); + + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { + spin_unlock_irqrestore(&blkif_io_lock, flags); + return IRQ_HANDLED; + } + + again: + rp = info->ring.sring->rsp_prod; + rmb(); /* Ensure we see queued responses up to 'rp'. */ + + for (i = info->ring.rsp_cons; i != rp; i++) { + unsigned long id; + int ret; + + bret = RING_GET_RESPONSE(&info->ring, i); + id = bret->id; + req = (struct request *)info->shadow[id].request; + + blkif_completion(&info->shadow[id]); + + ADD_ID_TO_FREELIST(info, id); + + switch (bret->operation) { + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + if (unlikely(bret->status != BLKIF_RSP_OKAY)) + DPRINTK("Bad return from blkdev data " + "request: %x\n", bret->status); + + ret = end_that_request_first( + req, (bret->status == BLKIF_RSP_OKAY), + req->hard_nr_sectors); + BUG_ON(ret); + end_that_request_last( + req, (bret->status == BLKIF_RSP_OKAY)); + break; + default: + BUG(); + } + } + + info->ring.rsp_cons = i; + + if (i != info->ring.req_prod_pvt) { + int more_to_do; + RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); + if (more_to_do) + goto again; + } else + info->ring.sring->rsp_event = i + 1; + + kick_pending_request_queues(info); + + spin_unlock_irqrestore(&blkif_io_lock, flags); + + return IRQ_HANDLED; +} + +static void blkif_free(struct blkfront_info *info, int suspend) +{ + /* Prevent new requests being issued until we fix things up. */ + spin_lock_irq(&blkif_io_lock); + info->connected = suspend ? + BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; + /* No more blkif_request(). */ + if (info->rq) + blk_stop_queue(info->rq); + /* No more gnttab callback work. */ + gnttab_cancel_free_callback(&info->callback); + flush_scheduled_work(); + spin_unlock_irq(&blkif_io_lock); + + /* Free resources associated with old device channel. */ + if (info->ring_ref != GRANT_INVALID_REF) { + gnttab_end_foreign_access(info->ring_ref, 0, + (unsigned long)info->ring.sring); + info->ring_ref = GRANT_INVALID_REF; + info->ring.sring = NULL; + } + if (info->irq) + unbind_from_irqhandler(info->irq, info); + info->evtchn = info->irq = 0; -/* Tell the controller to bring up the interface. */ -static void blkif_send_interface_connect(void) -{ - ctrl_msg_t cmsg = { - .type = CMSG_BLKIF_FE, - .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT, - .length = sizeof(blkif_fe_interface_connect_t), - }; - blkif_fe_interface_connect_t *msg = (void*)cmsg.msg; - - msg->handle = 0; - msg->shmem_frame = (virt_to_machine(blk_ring.sring) >> PAGE_SHIFT); - - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); } -static void blkif_free(void) +static void blkif_completion(struct blk_shadow *s) { - /* Prevent new requests being issued until we fix things up. */ - spin_lock_irq(&blkif_io_lock); - recovery = 1; - blkif_state = BLKIF_STATE_DISCONNECTED; - spin_unlock_irq(&blkif_io_lock); - - /* Free resources associated with old device channel. */ - if ( blk_ring.sring != NULL ) - { - free_page((unsigned long)blk_ring.sring); - blk_ring.sring = NULL; - } - free_irq(blkif_irq, NULL); - blkif_irq = 0; - - unbind_evtchn_from_irq(blkif_evtchn); - blkif_evtchn = 0; + int i; + for (i = 0; i < s->req.nr_segments; i++) + gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL); } -static void blkif_close(void) +static void blkif_recover(struct blkfront_info *info) { -} + int i; + blkif_request_t *req; + struct blk_shadow *copy; + int j; -/* Move from CLOSED to DISCONNECTED state. */ -static void blkif_disconnect(void) -{ - blkif_sring_t *sring; - - if ( blk_ring.sring != NULL ) - free_page((unsigned long)blk_ring.sring); - - sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL); - SHARED_RING_INIT(sring); - FRONT_RING_INIT(&blk_ring, sring, PAGE_SIZE); - blkif_state = BLKIF_STATE_DISCONNECTED; - blkif_send_interface_connect(); -} + /* Stage 1: Make a safe copy of the shadow state. */ + copy = kmalloc(sizeof(info->shadow), GFP_KERNEL | __GFP_NOFAIL); + memcpy(copy, info->shadow, sizeof(info->shadow)); -static void blkif_reset(void) -{ - blkif_free(); - blkif_disconnect(); -} + /* Stage 2: Set up free list. */ + memset(&info->shadow, 0, sizeof(info->shadow)); + for (i = 0; i < BLK_RING_SIZE; i++) + info->shadow[i].req.id = i+1; + info->shadow_free = info->ring.req_prod_pvt; + info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; -static void blkif_recover(void) -{ - int i; - blkif_request_t *req; - struct blk_shadow *copy; -#ifdef CONFIG_XEN_BLKDEV_GRANT - int j; -#endif - - /* Stage 1: Make a safe copy of the shadow state. */ - copy = (struct blk_shadow *)kmalloc(sizeof(blk_shadow), GFP_KERNEL); - BUG_ON(copy == NULL); - memcpy(copy, blk_shadow, sizeof(blk_shadow)); - - /* Stage 2: Set up free list. */ - memset(&blk_shadow, 0, sizeof(blk_shadow)); - for ( i = 0; i < BLK_RING_SIZE; i++ ) - blk_shadow[i].req.id = i+1; - blk_shadow_free = blk_ring.req_prod_pvt; - blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; - - /* Stage 3: Find pending requests and requeue them. */ - for ( i = 0; i < BLK_RING_SIZE; i++ ) - { - /* Not in use? */ - if ( copy[i].request == 0 ) - continue; - - /* Grab a request slot and unpickle shadow state into it. */ - req = RING_GET_REQUEST( - &blk_ring, blk_ring.req_prod_pvt); - unpickle_request(req, ©[i]); - - /* We get a new request id, and must reset the shadow state. */ - req->id = GET_ID_FROM_FREELIST(); - memcpy(&blk_shadow[req->id], ©[i], sizeof(copy[i])); - -#ifdef CONFIG_XEN_BLKDEV_GRANT - /* Rewrite any grant references invalidated by suspend/resume. */ - for ( j = 0; j < req->nr_segments; j++ ) - { - if ( req->frame_and_sects[j] & GRANTREF_INVALID ) - gnttab_grant_foreign_access_ref( - blkif_gref_from_fas(req->frame_and_sects[j]), - rdomid, - blk_shadow[req->id].frame[j], - rq_data_dir((struct request *) - blk_shadow[req->id].request)); - req->frame_and_sects[j] &= ~GRANTREF_INVALID; - } - blk_shadow[req->id].req = *req; -#endif - - blk_ring.req_prod_pvt++; - } - - kfree(copy); - - recovery = 0; - - /* blk_ring->req_prod will be set when we flush_requests().*/ - wmb(); - - /* Kicks things back into life. */ - flush_requests(); - - /* Now safe to left other people use the interface. */ - blkif_state = BLKIF_STATE_CONNECTED; -} + /* Stage 3: Find pending requests and requeue them. */ + for (i = 0; i < BLK_RING_SIZE; i++) { + /* Not in use? */ + if (copy[i].request == 0) + continue; -static void blkif_connect(blkif_fe_interface_status_t *status) -{ - int err = 0; - - blkif_evtchn = status->evtchn; - blkif_irq = bind_evtchn_to_irq(blkif_evtchn); -#ifdef CONFIG_XEN_BLKDEV_GRANT - rdomid = status->domid; -#endif - - err = request_irq(blkif_irq, blkif_int, SA_SAMPLE_RANDOM, "blkif", NULL); - if ( err ) - { - printk(KERN_ALERT "xen_blk: request_irq failed (err=%d)\n", err); - return; - } - - if ( recovery ) - { - blkif_recover(); - } - else - { - /* Transition to connected in case we need to do - * a partition probe on a whole disk. */ - blkif_state = BLKIF_STATE_CONNECTED; - - /* Probe for discs attached to the interface. */ - xlvbd_init(); - } - - /* Kick pending requests. */ - spin_lock_irq(&blkif_io_lock); - kick_pending_request_queues(); - spin_unlock_irq(&blkif_io_lock); -} + /* Grab a request slot and copy shadow state into it. */ + req = RING_GET_REQUEST( + &info->ring, info->ring.req_prod_pvt); + *req = copy[i].req; -static void unexpected(blkif_fe_interface_status_t *status) -{ - DPRINTK(" Unexpected blkif status %u in state %u\n", - status->status, blkif_state); -} + /* We get a new request id, and must reset the shadow state. */ + req->id = GET_ID_FROM_FREELIST(info); + memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); -static void blkif_status(blkif_fe_interface_status_t *status) -{ - if ( status->handle != blkif_handle ) - { - WPRINTK(" Invalid blkif: handle=%u\n", status->handle); - unexpected(status); - return; - } - - switch ( status->status ) - { - case BLKIF_INTERFACE_STATUS_CLOSED: - switch ( blkif_state ) - { - case BLKIF_STATE_CLOSED: - unexpected(status); - break; - case BLKIF_STATE_DISCONNECTED: - case BLKIF_STATE_CONNECTED: - unexpected(status); - blkif_close(); - break; - } - break; - - case BLKIF_INTERFACE_STATUS_DISCONNECTED: - switch ( blkif_state ) - { - case BLKIF_STATE_CLOSED: - blkif_disconnect(); - break; - case BLKIF_STATE_DISCONNECTED: - case BLKIF_STATE_CONNECTED: - /* unexpected(status); */ /* occurs during suspend/resume */ - blkif_reset(); - break; - } - break; - - case BLKIF_INTERFACE_STATUS_CONNECTED: - switch ( blkif_state ) - { - case BLKIF_STATE_CLOSED: - unexpected(status); - blkif_disconnect(); - blkif_connect(status); - break; - case BLKIF_STATE_DISCONNECTED: - blkif_connect(status); - break; - case BLKIF_STATE_CONNECTED: - unexpected(status); - blkif_connect(status); - break; - } - break; - - case BLKIF_INTERFACE_STATUS_CHANGED: - switch ( blkif_state ) - { - case BLKIF_STATE_CLOSED: - case BLKIF_STATE_DISCONNECTED: - unexpected(status); - break; - case BLKIF_STATE_CONNECTED: - vbd_update(); - break; - } - break; - - default: - WPRINTK(" Invalid blkif status: %d\n", status->status); - break; - } -} + /* Rewrite any grant references invalidated by susp/resume. */ + for (j = 0; j < req->nr_segments; j++) + gnttab_grant_foreign_access_ref( + req->seg[j].gref, + info->xbdev->otherend_id, + pfn_to_mfn(info->shadow[req->id].frame[j]), + rq_data_dir( + (struct request *) + info->shadow[req->id].request)); + info->shadow[req->id].req = *req; + info->ring.req_prod_pvt++; + } -static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) -{ - switch ( msg->subtype ) - { - case CMSG_BLKIF_FE_INTERFACE_STATUS: - blkif_status((blkif_fe_interface_status_t *) - &msg->msg[0]); - break; - default: - msg->length = 0; - break; - } - - ctrl_if_send_response(msg); -} + kfree(copy); -int wait_for_blkif(void) -{ - int err = 0; - int i; - send_driver_status(1); - - /* - * We should read 'nr_interfaces' from response message and wait - * for notifications before proceeding. For now we assume that we - * will be notified of exactly one interface. - */ - for ( i=0; (blkif_state != BLKIF_STATE_CONNECTED) && (i < 10*HZ); i++ ) - { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); - } - - if ( blkif_state != BLKIF_STATE_CONNECTED ) - { - printk(KERN_INFO "xen_blk: Timeout connecting to device!\n"); - err = -ENOSYS; - } - return err; -} + (void)xenbus_switch_state(info->xbdev, XenbusStateConnected); -int __init xlblk_init(void) -{ - int i; + spin_lock_irq(&blkif_io_lock); + + /* Now safe for us to use the shared ring */ + info->connected = BLKIF_STATE_CONNECTED; -#ifdef CONFIG_XEN_BLKDEV_GRANT - if ( 0 > gnttab_alloc_grant_references( MAXIMUM_OUTSTANDING_BLOCK_REQS, - &gref_head, &gref_terminal )) - return 1; - printk(KERN_ALERT "Blkif frontend is using grant tables.\n"); -#endif + /* Send off requeued requests */ + flush_requests(info); - if ( (xen_start_info.flags & SIF_INITDOMAIN) || - (xen_start_info.flags & SIF_BLK_BE_DOMAIN) ) - return 0; + /* Kick any other new requests queued since we resumed */ + kick_pending_request_queues(info); - printk(KERN_INFO "xen_blk: Initialising virtual block device driver\n"); + spin_unlock_irq(&blkif_io_lock); +} - blk_shadow_free = 0; - memset(blk_shadow, 0, sizeof(blk_shadow)); - for ( i = 0; i < BLK_RING_SIZE; i++ ) - blk_shadow[i].req.id = i+1; - blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; - (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx, - CALLBACK_IN_BLOCKING_CONTEXT); +/* ** Driver Registration ** */ - wait_for_blkif(); - return 0; -} +static struct xenbus_device_id blkfront_ids[] = { + { "vbd" }, + { "" } +}; -void blkdev_suspend(void) -{ -} -void blkdev_resume(void) +static struct xenbus_driver blkfront = { + .name = "vbd", + .owner = THIS_MODULE, + .ids = blkfront_ids, + .probe = blkfront_probe, + .remove = blkfront_remove, + .resume = blkfront_resume, + .otherend_changed = backend_changed, +}; + + +static int __init xlblk_init(void) { -#ifdef CONFIG_XEN_BLKDEV_GRANT - int i, j; - for ( i = 0; i < BLK_RING_SIZE; i++ ) - for ( j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++ ) - blk_shadow[i].req.frame_and_sects[j] |= GRANTREF_INVALID; -#endif - send_driver_status(1); + if (!is_running_on_xen()) + return -ENODEV; + + return xenbus_register_frontend(&blkfront); } +module_init(xlblk_init); -static void blkif_completion(struct blk_shadow *s) + +static void xlblk_exit(void) { - int i; -#ifdef CONFIG_XEN_BLKDEV_GRANT - for ( i = 0; i < s->req.nr_segments; i++ ) - gnttab_release_grant_reference( - &gref_head, blkif_gref_from_fas(s->req.frame_and_sects[i])); -#else - /* This is a hack to get the dirty logging bits set */ - if ( s->req.operation == BLKIF_OP_READ ) - { - for ( i = 0; i < s->req.nr_segments; i++ ) - { - unsigned long pfn = s->req.frame_and_sects[i] >> PAGE_SHIFT; - unsigned long mfn = phys_to_machine_mapping[pfn]; - xen_machphys_update(mfn, pfn); - } - } -#endif + return xenbus_unregister_driver(&blkfront); } +module_exit(xlblk_exit); + +MODULE_LICENSE("Dual BSD/GPL");