* Copyright (c) 2004, Christian Limpach
* Copyright (c) 2004, Andrew Warfield
* Copyright (c) 2005, Christopher Clark
+ * Copyright (c) 2005, XenSource Ltd
*
- * This file may be distributed separately from the Linux kernel, or
- * incorporated into other software packages, subject to the following license:
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* IN THE SOFTWARE.
*/
-#if 1
-#define ASSERT(_p) \
- if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
- __LINE__, __FILE__); *(int*)0=0; }
-#else
-#define ASSERT(_p)
-#endif
-
#include <linux/version.h>
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
#include "block.h"
-#else
-#include "common.h"
-#include <linux/blk.h>
-#include <linux/tqueue.h>
-#endif
-
#include <linux/cdrom.h>
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <scsi/scsi.h>
-#include <asm-xen/ctrl_if.h>
-#include <asm-xen/evtchn.h>
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-#include <asm-xen/xen-public/grant_table.h>
-#include <asm-xen/gnttab.h>
-#endif
-
-typedef unsigned char byte; /* from linux/ide.h */
+#include <xen/evtchn.h>
+#include <xen/xenbus.h>
+#include <xen/interface/grant_table.h>
+#include <xen/gnttab.h>
+#include <asm/hypervisor.h>
-/* Control whether runtime update of vbds is enabled. */
-#define ENABLE_VBD_UPDATE 1
+#define BLKIF_STATE_DISCONNECTED 0
+#define BLKIF_STATE_CONNECTED 1
+#define BLKIF_STATE_SUSPENDED 2
-#if ENABLE_VBD_UPDATE
-static void vbd_update(void);
-#else
-static void vbd_update(void){};
-#endif
+#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
+ (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
+#define GRANT_INVALID_REF 0
-#define BLKIF_STATE_CLOSED 0
-#define BLKIF_STATE_DISCONNECTED 1
-#define BLKIF_STATE_CONNECTED 2
+static void connect(struct blkfront_info *);
+static void blkfront_closing(struct xenbus_device *);
+static int blkfront_remove(struct xenbus_device *);
+static int talk_to_backend(struct xenbus_device *, struct blkfront_info *);
+static int setup_blkring(struct xenbus_device *, struct blkfront_info *);
-#define WPRINTK(fmt, args...) printk(KERN_WARNING "xen_blk: " fmt, ##args)
+static void kick_pending_request_queues(struct blkfront_info *);
-static int blkif_handle = 0;
-static unsigned int blkif_state = BLKIF_STATE_CLOSED;
-static unsigned int blkif_evtchn = 0;
-static unsigned int blkif_irq = 0;
+static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs);
+static void blkif_restart_queue(void *arg);
+static void blkif_recover(struct blkfront_info *);
+static void blkif_completion(struct blk_shadow *);
+static void blkif_free(struct blkfront_info *, int);
-static int blkif_control_rsp_valid;
-static blkif_response_t blkif_control_rsp;
-static blkif_front_ring_t blk_ring;
+/**
+ * Entry point to this code when a new device is created. Allocate the basic
+ * structures and the ring buffer for communication with the backend, and
+ * inform the backend of the appropriate details for those. Switch to
+ * Initialised state.
+ */
+static int blkfront_probe(struct xenbus_device *dev,
+ const struct xenbus_device_id *id)
+{
+ int err, vdevice, i;
+ struct blkfront_info *info;
+
+ /* FIXME: Use dynamic device id if this is not set. */
+ err = xenbus_scanf(XBT_NIL, dev->nodename,
+ "virtual-device", "%i", &vdevice);
+ if (err != 1) {
+ xenbus_dev_fatal(dev, err, "reading virtual-device");
+ return err;
+ }
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info) {
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
+ return -ENOMEM;
+ }
+
+ info->xbdev = dev;
+ info->vdevice = vdevice;
+ info->connected = BLKIF_STATE_DISCONNECTED;
+ INIT_WORK(&info->work, blkif_restart_queue, (void *)info);
+
+ for (i = 0; i < BLK_RING_SIZE; i++)
+ info->shadow[i].req.id = i+1;
+ info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
+
+ /* Front end dir is a number, which is used as the id. */
+ info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
+ dev->dev.driver_data = info;
+
+ err = talk_to_backend(dev, info);
+ if (err) {
+ kfree(info);
+ dev->dev.driver_data = NULL;
+ return err;
+ }
+
+ return 0;
+}
+
+
+/**
+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
+ * driver restart. We tear down our blkif structure and recreate it, but
+ * leave the device-layer structures intact so that this is transparent to the
+ * rest of the kernel.
+ */
+static int blkfront_resume(struct xenbus_device *dev)
+{
+ struct blkfront_info *info = dev->dev.driver_data;
+ int err;
+
+ DPRINTK("blkfront_resume: %s\n", dev->nodename);
+
+ blkif_free(info, 1);
+
+ err = talk_to_backend(dev, info);
+ if (!err)
+ blkif_recover(info);
+
+ return err;
+}
+
+
+/* Common code used when first setting up, and when resuming. */
+static int talk_to_backend(struct xenbus_device *dev,
+ struct blkfront_info *info)
+{
+ const char *message = NULL;
+ struct xenbus_transaction xbt;
+ int err;
+
+ /* Create shared ring, alloc event channel. */
+ err = setup_blkring(dev, info);
+ if (err)
+ goto out;
+
+again:
+ err = xenbus_transaction_start(&xbt);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "starting transaction");
+ goto destroy_blkring;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename,
+ "ring-ref","%u", info->ring_ref);
+ if (err) {
+ message = "writing ring-ref";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(xbt, dev->nodename,
+ "event-channel", "%u", info->evtchn);
+ if (err) {
+ message = "writing event-channel";
+ goto abort_transaction;
+ }
+
+ err = xenbus_transaction_end(xbt, 0);
+ if (err) {
+ if (err == -EAGAIN)
+ goto again;
+ xenbus_dev_fatal(dev, err, "completing transaction");
+ goto destroy_blkring;
+ }
+
+ xenbus_switch_state(dev, XenbusStateInitialised);
+
+ return 0;
+
+ abort_transaction:
+ xenbus_transaction_end(xbt, 1);
+ if (message)
+ xenbus_dev_fatal(dev, err, "%s", message);
+ destroy_blkring:
+ blkif_free(info, 0);
+ out:
+ return err;
+}
-#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-static domid_t rdomid = 0;
-static grant_ref_t gref_head, gref_terminal;
-#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
- (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE)
-#define GRANTREF_INVALID (1<<15)
-#endif
+static int setup_blkring(struct xenbus_device *dev,
+ struct blkfront_info *info)
+{
+ blkif_sring_t *sring;
+ int err;
-static struct blk_shadow {
- blkif_request_t req;
- unsigned long request;
- unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-} blk_shadow[BLK_RING_SIZE];
-unsigned long blk_shadow_free;
+ info->ring_ref = GRANT_INVALID_REF;
-static int recovery = 0; /* Recovery in progress: protected by blkif_io_lock */
+ sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL);
+ if (!sring) {
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
+ return -ENOMEM;
+ }
+ SHARED_RING_INIT(sring);
+ FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
-static void kick_pending_request_queues(void);
+ err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
+ if (err < 0) {
+ free_page((unsigned long)sring);
+ info->ring.sring = NULL;
+ goto fail;
+ }
+ info->ring_ref = err;
-int __init xlblk_init(void);
+ err = xenbus_alloc_evtchn(dev, &info->evtchn);
+ if (err)
+ goto fail;
-static void blkif_completion(struct blk_shadow *s);
+ err = bind_evtchn_to_irqhandler(
+ info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
+ if (err <= 0) {
+ xenbus_dev_fatal(dev, err,
+ "bind_evtchn_to_irqhandler failed");
+ goto fail;
+ }
+ info->irq = err;
-static inline int GET_ID_FROM_FREELIST(void)
-{
- unsigned long free = blk_shadow_free;
- BUG_ON(free > BLK_RING_SIZE);
- blk_shadow_free = blk_shadow[free].req.id;
- blk_shadow[free].req.id = 0x0fffffee; /* debug */
- return free;
+ return 0;
+fail:
+ blkif_free(info, 0);
+ return err;
}
-static inline void ADD_ID_TO_FREELIST(unsigned long id)
+
+/**
+ * Callback received when the backend's state changes.
+ */
+static void backend_changed(struct xenbus_device *dev,
+ enum xenbus_state backend_state)
{
- blk_shadow[id].req.id = blk_shadow_free;
- blk_shadow[id].request = 0;
- blk_shadow_free = id;
-}
+ struct blkfront_info *info = dev->dev.driver_data;
+ struct block_device *bd;
+ DPRINTK("blkfront:backend_changed.\n");
-/************************ COMMON CODE (inlined) ************************/
+ switch (backend_state) {
+ case XenbusStateUnknown:
+ case XenbusStateInitialising:
+ case XenbusStateInitWait:
+ case XenbusStateInitialised:
+ case XenbusStateClosed:
+ break;
-/* Kernel-specific definitions used in the common code */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#define DISABLE_SCATTERGATHER()
-#else
-static int sg_operation = -1;
-#define DISABLE_SCATTERGATHER() (sg_operation = -1)
-#endif
+ case XenbusStateConnected:
+ connect(info);
+ break;
-static inline void pickle_request(struct blk_shadow *s, blkif_request_t *r)
-{
-#ifndef CONFIG_XEN_BLKDEV_GRANT
- int i;
-#endif
+ case XenbusStateClosing:
+ bd = bdget(info->dev);
+ if (bd == NULL)
+ xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
- s->req = *r;
-
-#ifndef CONFIG_XEN_BLKDEV_GRANT
- for ( i = 0; i < r->nr_segments; i++ )
- s->req.frame_and_sects[i] = machine_to_phys(r->frame_and_sects[i]);
-#endif
+ mutex_lock(&bd->bd_mutex);
+ if (info->users > 0)
+ xenbus_dev_error(dev, -EBUSY,
+ "Device in use; refusing to close");
+ else
+ blkfront_closing(dev);
+ mutex_unlock(&bd->bd_mutex);
+ bdput(bd);
+ break;
+ }
}
-static inline void unpickle_request(blkif_request_t *r, struct blk_shadow *s)
-{
-#ifndef CONFIG_XEN_BLKDEV_GRANT
- int i;
-#endif
-
- *r = s->req;
-#ifndef CONFIG_XEN_BLKDEV_GRANT
- for ( i = 0; i < s->req.nr_segments; i++ )
- r->frame_and_sects[i] = phys_to_machine(s->req.frame_and_sects[i]);
-#endif
-}
+/* ** Connection ** */
-static inline void flush_requests(void)
+/*
+ * Invoked when the backend is finally 'ready' (and has told produced
+ * the details about the physical device - #sectors, size, etc).
+ */
+static void connect(struct blkfront_info *info)
+{
+ unsigned long sectors, sector_size;
+ unsigned int binfo;
+ int err;
+
+ if ((info->connected == BLKIF_STATE_CONNECTED) ||
+ (info->connected == BLKIF_STATE_SUSPENDED) )
+ return;
+
+ DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend);
+
+ err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+ "sectors", "%lu", §ors,
+ "info", "%u", &binfo,
+ "sector-size", "%lu", §or_size,
+ NULL);
+ if (err) {
+ xenbus_dev_fatal(info->xbdev, err,
+ "reading backend fields at %s",
+ info->xbdev->otherend);
+ return;
+ }
+
+ err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
+ if (err) {
+ xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
+ info->xbdev->otherend);
+ return;
+ }
+
+ (void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
+
+ /* Kick pending requests. */
+ spin_lock_irq(&blkif_io_lock);
+ info->connected = BLKIF_STATE_CONNECTED;
+ kick_pending_request_queues(info);
+ spin_unlock_irq(&blkif_io_lock);
+
+ add_disk(info->gd);
+}
+
+/**
+ * Handle the change of state of the backend to Closing. We must delete our
+ * device-layer structures now, to ensure that writes are flushed through to
+ * the backend. Once is this done, we can switch to Closed in
+ * acknowledgement.
+ */
+static void blkfront_closing(struct xenbus_device *dev)
{
- DISABLE_SCATTERGATHER();
- RING_PUSH_REQUESTS(&blk_ring);
- notify_via_evtchn(blkif_evtchn);
-}
+ struct blkfront_info *info = dev->dev.driver_data;
+ unsigned long flags;
+ DPRINTK("blkfront_closing: %s removed\n", dev->nodename);
-/************************** KERNEL VERSION 2.6 **************************/
+ if (info->rq == NULL)
+ return;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ spin_lock_irqsave(&blkif_io_lock, flags);
+ /* No more blkif_request(). */
+ blk_stop_queue(info->rq);
+ /* No more gnttab callback work. */
+ gnttab_cancel_free_callback(&info->callback);
+ flush_scheduled_work();
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
-module_init(xlblk_init);
+ xlvbd_del(info);
-#if ENABLE_VBD_UPDATE
-static void update_vbds_task(void *unused)
-{
- xlvbd_update_vbds();
+ xenbus_switch_state(dev, XenbusStateClosed);
}
-static void vbd_update(void)
-{
- static DECLARE_WORK(update_tq, update_vbds_task, NULL);
- schedule_work(&update_tq);
-}
-#endif /* ENABLE_VBD_UPDATE */
-static void kick_pending_request_queues(void)
+static int blkfront_remove(struct xenbus_device *dev)
{
- if ( (xlbd_blk_queue != NULL) &&
- test_bit(QUEUE_FLAG_STOPPED, &xlbd_blk_queue->queue_flags) )
- {
- blk_start_queue(xlbd_blk_queue);
- /* XXXcl call to request_fn should not be needed but
- * we get stuck without... needs investigating
- */
- xlbd_blk_queue->request_fn(xlbd_blk_queue);
- }
-}
+ struct blkfront_info *info = dev->dev.driver_data;
+ DPRINTK("blkfront_remove: %s removed\n", dev->nodename);
-int blkif_open(struct inode *inode, struct file *filep)
-{
- struct gendisk *gd = inode->i_bdev->bd_disk;
- struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
+ blkif_free(info, 0);
- /* Update of usage count is protected by per-device semaphore. */
- di->mi->usage++;
-
- return 0;
-}
+ kfree(info);
-
-int blkif_release(struct inode *inode, struct file *filep)
-{
- struct gendisk *gd = inode->i_bdev->bd_disk;
- struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
-
- /*
- * When usage drops to zero it may allow more VBD updates to occur.
- * Update of usage count is protected by a per-device semaphore.
- */
- if ( --di->mi->usage == 0 )
- vbd_update();
-
- return 0;
+ return 0;
}
-int blkif_ioctl(struct inode *inode, struct file *filep,
- unsigned command, unsigned long argument)
+static inline int GET_ID_FROM_FREELIST(
+ struct blkfront_info *info)
{
- int i;
-
- DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
- command, (long)argument, inode->i_rdev);
-
- switch ( command )
- {
- case HDIO_GETGEO:
- /* return ENOSYS to use defaults */
- return -ENOSYS;
-
- case CDROMMULTISESSION:
- DPRINTK("FIXME: support multisession CDs later\n");
- for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
- if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
- return 0;
-
- default:
- printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
- command);
- return -ENOSYS;
- }
-
- return 0;
+ unsigned long free = info->shadow_free;
+ BUG_ON(free > BLK_RING_SIZE);
+ info->shadow_free = info->shadow[free].req.id;
+ info->shadow[free].req.id = 0x0fffffee; /* debug */
+ return free;
}
-
-/*
- * blkif_queue_request
- *
- * request block io
- *
- * id: for guest use only.
- * operation: BLKIF_OP_{READ,WRITE,PROBE}
- * buffer: buffer to read/write into. this should be a
- * virtual address in the guest os.
- */
-static int blkif_queue_request(struct request *req)
+static inline void ADD_ID_TO_FREELIST(
+ struct blkfront_info *info, unsigned long id)
{
- struct xlbd_disk_info *di =
- (struct xlbd_disk_info *)req->rq_disk->private_data;
- unsigned long buffer_ma;
- blkif_request_t *ring_req;
- struct bio *bio;
- struct bio_vec *bvec;
- int idx;
- unsigned long id;
- unsigned int fsect, lsect;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- int ref;
-#endif
-
- if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
- return 1;
-
- /* Fill out a communications ring structure. */
- ring_req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
- id = GET_ID_FROM_FREELIST();
- blk_shadow[id].request = (unsigned long)req;
-
- ring_req->id = id;
- ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
- BLKIF_OP_READ;
- ring_req->sector_number = (blkif_sector_t)req->sector;
- ring_req->device = di->xd_device;
-
- ring_req->nr_segments = 0;
- rq_for_each_bio(bio, req)
- {
- bio_for_each_segment(bvec, bio, idx)
- {
- if ( ring_req->nr_segments == BLKIF_MAX_SEGMENTS_PER_REQUEST )
- BUG();
- buffer_ma = page_to_phys(bvec->bv_page);
- fsect = bvec->bv_offset >> 9;
- lsect = fsect + (bvec->bv_len >> 9) - 1;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- /* install a grant reference. */
- ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
- ASSERT( ref != -ENOSPC );
-
- gnttab_grant_foreign_access_ref(
- ref,
- rdomid,
- buffer_ma >> PAGE_SHIFT,
- rq_data_dir(req) );
-
- blk_shadow[id].frame[ring_req->nr_segments] =
- buffer_ma >> PAGE_SHIFT;
-
- ring_req->frame_and_sects[ring_req->nr_segments++] =
- (((u32) ref) << 16) | (fsect << 3) | lsect;
-
-#else
- ring_req->frame_and_sects[ring_req->nr_segments++] =
- buffer_ma | (fsect << 3) | lsect;
-#endif
- }
- }
-
- blk_ring.req_prod_pvt++;
-
- /* Keep a private copy so we can reissue requests when recovering. */
- pickle_request(&blk_shadow[id], ring_req);
-
- return 0;
+ info->shadow[id].req.id = info->shadow_free;
+ info->shadow[id].request = 0;
+ info->shadow_free = id;
}
-
-/*
- * do_blkif_request
- * read a block; request is in a request queue
- */
-void do_blkif_request(request_queue_t *rq)
+static inline void flush_requests(struct blkfront_info *info)
{
- struct request *req;
- int queued;
-
- DPRINTK("Entered do_blkif_request\n");
-
- queued = 0;
-
- while ( (req = elv_next_request(rq)) != NULL )
- {
- if ( !blk_fs_request(req) )
- {
- end_request(req, 0);
- continue;
- }
-
- if ( RING_FULL(&blk_ring) )
- {
- blk_stop_queue(rq);
- break;
- }
-
- DPRINTK("do_blk_req %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n",
- req, req->cmd, req->sector, req->current_nr_sectors,
- req->nr_sectors, req->buffer,
- rq_data_dir(req) ? "write" : "read");
-
- blkdev_dequeue_request(req);
- if ( blkif_queue_request(req) )
- {
- blk_stop_queue(rq);
- break;
- }
-
- queued++;
- }
-
- if ( queued != 0 )
- flush_requests();
-}
+ int notify;
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
-static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
-{
- struct request *req;
- blkif_response_t *bret;
- RING_IDX i, rp;
- unsigned long flags;
-
- spin_lock_irqsave(&blkif_io_lock, flags);
-
- if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) ||
- unlikely(recovery) )
- {
- spin_unlock_irqrestore(&blkif_io_lock, flags);
- return IRQ_HANDLED;
- }
-
- rp = blk_ring.sring->rsp_prod;
- rmb(); /* Ensure we see queued responses up to 'rp'. */
-
- for ( i = blk_ring.rsp_cons; i != rp; i++ )
- {
- unsigned long id;
-
- bret = RING_GET_RESPONSE(&blk_ring, i);
- id = bret->id;
- req = (struct request *)blk_shadow[id].request;
-
- blkif_completion(&blk_shadow[id]);
-
- ADD_ID_TO_FREELIST(id);
-
- switch ( bret->operation )
- {
- case BLKIF_OP_READ:
- case BLKIF_OP_WRITE:
- if ( unlikely(bret->status != BLKIF_RSP_OKAY) )
- DPRINTK("Bad return from blkdev data request: %x\n",
- bret->status);
-
- if ( unlikely(end_that_request_first
- (req,
- (bret->status == BLKIF_RSP_OKAY),
- req->hard_nr_sectors)) )
- BUG();
- end_that_request_last(req);
-
- break;
- case BLKIF_OP_PROBE:
- memcpy(&blkif_control_rsp, bret, sizeof(*bret));
- blkif_control_rsp_valid = 1;
- break;
- default:
- BUG();
- }
- }
-
- blk_ring.rsp_cons = i;
-
- kick_pending_request_queues();
-
- spin_unlock_irqrestore(&blkif_io_lock, flags);
-
- return IRQ_HANDLED;
+ if (notify)
+ notify_remote_via_irq(info->irq);
}
-#else
-/************************** KERNEL VERSION 2.4 **************************/
-
-static kdev_t sg_dev;
-static unsigned long sg_next_sect;
-
-/*
- * Request queues with outstanding work, but ring is currently full.
- * We need no special lock here, as we always access this with the
- * blkif_io_lock held. We only need a small maximum list.
- */
-#define MAX_PENDING 8
-static request_queue_t *pending_queues[MAX_PENDING];
-static int nr_pending;
-
-
-#define blkif_io_lock io_request_lock
-
-/*============================================================================*/
-#if ENABLE_VBD_UPDATE
-
-/*
- * blkif_update_int/update-vbds_task - handle VBD update events.
- * Schedule a task for keventd to run, which will update the VBDs and perform
- * the corresponding updates to our view of VBD state.
- */
-static void update_vbds_task(void *unused)
-{
- xlvbd_update_vbds();
+static void kick_pending_request_queues(struct blkfront_info *info)
+{
+ if (!RING_FULL(&info->ring)) {
+ /* Re-enable calldowns. */
+ blk_start_queue(info->rq);
+ /* Kick things off immediately. */
+ do_blkif_request(info->rq);
+ }
}
-static void vbd_update(void)
+static void blkif_restart_queue(void *arg)
{
- static struct tq_struct update_tq;
- update_tq.routine = update_vbds_task;
- schedule_task(&update_tq);
+ struct blkfront_info *info = (struct blkfront_info *)arg;
+ spin_lock_irq(&blkif_io_lock);
+ if (info->connected == BLKIF_STATE_CONNECTED)
+ kick_pending_request_queues(info);
+ spin_unlock_irq(&blkif_io_lock);
}
-#endif /* ENABLE_VBD_UPDATE */
-/*============================================================================*/
-
-static void kick_pending_request_queues(void)
+static void blkif_restart_queue_callback(void *arg)
{
- /* We kick pending request queues if the ring is reasonably empty. */
- if ( (nr_pending != 0) &&
- (RING_PENDING_REQUESTS(&blk_ring) < (BLK_RING_SIZE >> 1)) )
- {
- /* Attempt to drain the queue, but bail if the ring becomes full. */
- while ( (nr_pending != 0) && !RING_FULL(&blk_ring) )
- do_blkif_request(pending_queues[--nr_pending]);
- }
+ struct blkfront_info *info = (struct blkfront_info *)arg;
+ schedule_work(&info->work);
}
int blkif_open(struct inode *inode, struct file *filep)
{
- short xldev = inode->i_rdev;
- struct gendisk *gd = get_gendisk(xldev);
- xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
- short minor = MINOR(xldev);
-
- if ( gd->part[minor].nr_sects == 0 )
- {
- /*
- * Device either doesn't exist, or has zero capacity; we use a few
- * cheesy heuristics to return the relevant error code
- */
- if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
- ((minor & (gd->max_p - 1)) != 0) )
- {
- /*
- * We have a real device, but no such partition, or we just have a
- * partition number so guess this is the problem.
- */
- return -ENXIO; /* no such device or address */
- }
- else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
- {
- /* This is a removable device => assume that media is missing. */
- return -ENOMEDIUM; /* media not present (this is a guess) */
- }
- else
- {
- /* Just go for the general 'no such device' error. */
- return -ENODEV; /* no such device */
- }
- }
-
- /* Update of usage count is protected by per-device semaphore. */
- disk->usage++;
-
- return 0;
+ struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
+ info->users++;
+ return 0;
}
int blkif_release(struct inode *inode, struct file *filep)
{
- xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
+ struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
+ info->users--;
+ if (info->users == 0) {
+ /* Check whether we have been instructed to close. We will
+ have ignored this request initially, as the device was
+ still mounted. */
+ struct xenbus_device * dev = info->xbdev;
+ enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
- /*
- * When usage drops to zero it may allow more VBD updates to occur.
- * Update of usage count is protected by a per-device semaphore.
- */
- if ( --disk->usage == 0 ) {
- vbd_update();
- }
-
- return 0;
+ if (state == XenbusStateClosing)
+ blkfront_closing(dev);
+ }
+ return 0;
}
int blkif_ioctl(struct inode *inode, struct file *filep,
- unsigned command, unsigned long argument)
+ unsigned command, unsigned long argument)
{
- kdev_t dev = inode->i_rdev;
- struct hd_geometry *geo = (struct hd_geometry *)argument;
- struct gendisk *gd;
- struct hd_struct *part;
- int i;
- unsigned short cylinders;
- byte heads, sectors;
-
- /* NB. No need to check permissions. That is done for us. */
-
- DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
- command, (long) argument, dev);
-
- gd = get_gendisk(dev);
- part = &gd->part[MINOR(dev)];
-
- switch ( command )
- {
- case BLKGETSIZE:
- DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects);
- return put_user(part->nr_sects, (unsigned long *) argument);
-
- case BLKGETSIZE64:
- DPRINTK_IOCTL(" BLKGETSIZE64: %x %llx\n", BLKGETSIZE64,
- (u64)part->nr_sects * 512);
- return put_user((u64)part->nr_sects * 512, (u64 *) argument);
-
- case BLKRRPART: /* re-read partition table */
- DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART);
- return blkif_revalidate(dev);
-
- case BLKSSZGET:
- return hardsect_size[MAJOR(dev)][MINOR(dev)];
-
- case BLKBSZGET: /* get block size */
- DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET);
- break;
-
- case BLKBSZSET: /* set block size */
- DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET);
- break;
-
- case BLKRASET: /* set read-ahead */
- DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET);
- break;
-
- case BLKRAGET: /* get read-ahead */
- DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET);
- break;
-
- case HDIO_GETGEO:
- DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO);
- if (!argument) return -EINVAL;
-
- /* We don't have real geometry info, but let's at least return
- values consistent with the size of the device */
-
- heads = 0xff;
- sectors = 0x3f;
- cylinders = part->nr_sects / (heads * sectors);
-
- if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
- if (put_user(heads, (byte *)&geo->heads)) return -EFAULT;
- if (put_user(sectors, (byte *)&geo->sectors)) return -EFAULT;
- if (put_user(cylinders, (unsigned short *)&geo->cylinders)) return -EFAULT;
-
- return 0;
-
- case HDIO_GETGEO_BIG:
- DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
- if (!argument) return -EINVAL;
-
- /* We don't have real geometry info, but let's at least return
- values consistent with the size of the device */
-
- heads = 0xff;
- sectors = 0x3f;
- cylinders = part->nr_sects / (heads * sectors);
-
- if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
- if (put_user(heads, (byte *)&geo->heads)) return -EFAULT;
- if (put_user(sectors, (byte *)&geo->sectors)) return -EFAULT;
- if (put_user(cylinders, (unsigned int *) &geo->cylinders)) return -EFAULT;
-
- return 0;
-
- case CDROMMULTISESSION:
- DPRINTK("FIXME: support multisession CDs later\n");
- for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
- if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
- return 0;
-
- case SCSI_IOCTL_GET_BUS_NUMBER:
- DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in XL blkif");
- return -ENOSYS;
-
- default:
- printk(KERN_ALERT "ioctl %08x not supported by XL blkif\n", command);
- return -ENOSYS;
- }
-
- return 0;
-}
+ int i;
+ DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
+ command, (long)argument, inode->i_rdev);
+ switch (command) {
+ case CDROMMULTISESSION:
+ DPRINTK("FIXME: support multisession CDs later\n");
+ for (i = 0; i < sizeof(struct cdrom_multisession); i++)
+ if (put_user(0, (char __user *)(argument + i)))
+ return -EFAULT;
+ return 0;
-/* check media change: should probably do something here in some cases :-) */
-int blkif_check(kdev_t dev)
-{
- DPRINTK("blkif_check\n");
- return 0;
+ default:
+ /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
+ command);*/
+ return -EINVAL; /* same return as native Linux */
+ }
+
+ return 0;
}
-int blkif_revalidate(kdev_t dev)
+
+int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
{
- struct block_device *bd;
- struct gendisk *gd;
- xl_disk_t *disk;
- unsigned long capacity;
- int i, rc = 0;
-
- if ( (bd = bdget(dev)) == NULL )
- return -EINVAL;
-
- /*
- * Update of partition info, and check of usage count, is protected
- * by the per-block-device semaphore.
- */
- down(&bd->bd_sem);
-
- if ( ((gd = get_gendisk(dev)) == NULL) ||
- ((disk = xldev_to_xldisk(dev)) == NULL) ||
- ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
- {
- rc = -EINVAL;
- goto out;
- }
-
- if ( disk->usage > 1 )
- {
- rc = -EBUSY;
- goto out;
- }
-
- /* Only reread partition table if VBDs aren't mapped to partitions. */
- if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
- {
- for ( i = gd->max_p - 1; i >= 0; i-- )
- {
- invalidate_device(dev+i, 1);
- gd->part[MINOR(dev+i)].start_sect = 0;
- gd->part[MINOR(dev+i)].nr_sects = 0;
- gd->sizes[MINOR(dev+i)] = 0;
- }
-
- grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
- }
+ /* We don't have real geometry info, but let's at least return
+ values consistent with the size of the device */
+ sector_t nsect = get_capacity(bd->bd_disk);
+ sector_t cylinders = nsect;
- out:
- up(&bd->bd_sem);
- bdput(bd);
- return rc;
+ hg->heads = 0xff;
+ hg->sectors = 0x3f;
+ sector_div(cylinders, hg->heads * hg->sectors);
+ hg->cylinders = cylinders;
+ if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
+ hg->cylinders = 0xffff;
+ return 0;
}
/*
* blkif_queue_request
*
- * request block io
- *
+ * request block io
+ *
* id: for guest use only.
* operation: BLKIF_OP_{READ,WRITE,PROBE}
* buffer: buffer to read/write into. this should be a
* virtual address in the guest os.
*/
-static int blkif_queue_request(unsigned long id,
- int operation,
- char * buffer,
- unsigned long sector_number,
- unsigned short nr_sectors,
- kdev_t device)
+static int blkif_queue_request(struct request *req)
{
- unsigned long buffer_ma = virt_to_bus(buffer);
- unsigned long xid;
- struct gendisk *gd;
- blkif_request_t *req;
- struct buffer_head *bh;
- unsigned int fsect, lsect;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- int ref;
-#endif
-
- fsect = (buffer_ma & ~PAGE_MASK) >> 9;
- lsect = fsect + nr_sectors - 1;
-
- /* Buffer must be sector-aligned. Extent mustn't cross a page boundary. */
- if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
- BUG();
- if ( lsect > 7 )
- BUG();
-
- buffer_ma &= PAGE_MASK;
-
- if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
- return 1;
-
- switch ( operation )
- {
-
- case BLKIF_OP_READ:
- case BLKIF_OP_WRITE:
- gd = get_gendisk(device);
-
- /*
- * Update the sector_number we'll pass down as appropriate; note that
- * we could sanity check that resulting sector will be in this
- * partition, but this will happen in driver backend anyhow.
- */
- sector_number += gd->part[MINOR(device)].start_sect;
-
- /*
- * If this unit doesn't consist of virtual partitions then we clear
- * the partn bits from the device number.
- */
- if ( !(gd->flags[MINOR(device)>>gd->minor_shift] &
- GENHD_FL_VIRT_PARTNS) )
- device &= ~(gd->max_p - 1);
-
- if ( (sg_operation == operation) &&
- (sg_dev == device) &&
- (sg_next_sect == sector_number) )
- {
- req = RING_GET_REQUEST(&blk_ring,
- blk_ring.req_prod_pvt - 1);
- bh = (struct buffer_head *)id;
-
- bh->b_reqnext = (struct buffer_head *)blk_shadow[req->id].request;
- blk_shadow[req->id].request = (unsigned long)id;
-
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- /* install a grant reference. */
- ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
- ASSERT( ref != -ENOSPC );
-
- gnttab_grant_foreign_access_ref(
- ref,
- rdomid,
- buffer_ma >> PAGE_SHIFT,
- ( operation == BLKIF_OP_WRITE ? 1 : 0 ) );
-
- blk_shadow[id].frame[req->nr_segments] =
- buffer_ma >> PAGE_SHIFT;
-
- req->frame_and_sects[req->nr_segments] =
- (((u32) ref ) << 16) | (fsect << 3) | lsect;
-#else
- req->frame_and_sects[req->nr_segments] =
- buffer_ma | (fsect << 3) | lsect;
-#endif
- if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST )
- sg_next_sect += nr_sectors;
- else
- DISABLE_SCATTERGATHER();
-
- /* Update the copy of the request in the recovery ring. */
- pickle_request(&blk_shadow[req->id], req );
-
- return 0;
- }
- else if ( RING_FULL(&blk_ring) )
- {
- return 1;
- }
- else
- {
- sg_operation = operation;
- sg_dev = device;
- sg_next_sect = sector_number + nr_sectors;
- }
- break;
-
- default:
- panic("unknown op %d\n", operation);
- }
-
- /* Fill out a communications ring structure. */
- req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
-
- xid = GET_ID_FROM_FREELIST();
- blk_shadow[xid].request = (unsigned long)id;
-
- req->id = xid;
- req->operation = operation;
- req->sector_number = (blkif_sector_t)sector_number;
- req->device = device;
- req->nr_segments = 1;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- /* install a grant reference. */
- ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
- ASSERT( ref != -ENOSPC );
-
- gnttab_grant_foreign_access_ref(
- ref,
- rdomid,
- buffer_ma >> PAGE_SHIFT,
- ( operation == BLKIF_OP_WRITE ? 1 : 0 ) );
-
- blk_shadow[xid].frame[0] = buffer_ma >> PAGE_SHIFT;
-
- req->frame_and_sects[0] = (((u32) ref)<<16) | (fsect<<3) | lsect;
-#else
- req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect;
-#endif
-
- /* Keep a private copy so we can reissue requests when recovering. */
- pickle_request(&blk_shadow[xid], req);
-
- blk_ring.req_prod_pvt++;
-
- return 0;
+ struct blkfront_info *info = req->rq_disk->private_data;
+ unsigned long buffer_mfn;
+ blkif_request_t *ring_req;
+ struct bio *bio;
+ struct bio_vec *bvec;
+ int idx;
+ unsigned long id;
+ unsigned int fsect, lsect;
+ int ref;
+ grant_ref_t gref_head;
+
+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
+ return 1;
+
+ if (gnttab_alloc_grant_references(
+ BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
+ gnttab_request_free_callback(
+ &info->callback,
+ blkif_restart_queue_callback,
+ info,
+ BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ return 1;
+ }
+
+ /* Fill out a communications ring structure. */
+ ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
+ id = GET_ID_FROM_FREELIST(info);
+ info->shadow[id].request = (unsigned long)req;
+
+ ring_req->id = id;
+ ring_req->operation = rq_data_dir(req) ?
+ BLKIF_OP_WRITE : BLKIF_OP_READ;
+ ring_req->sector_number = (blkif_sector_t)req->sector;
+ ring_req->handle = info->handle;
+
+ ring_req->nr_segments = 0;
+ rq_for_each_bio (bio, req) {
+ bio_for_each_segment (bvec, bio, idx) {
+ BUG_ON(ring_req->nr_segments
+ == BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ buffer_mfn = page_to_phys(bvec->bv_page) >> PAGE_SHIFT;
+ fsect = bvec->bv_offset >> 9;
+ lsect = fsect + (bvec->bv_len >> 9) - 1;
+ /* install a grant reference. */
+ ref = gnttab_claim_grant_reference(&gref_head);
+ BUG_ON(ref == -ENOSPC);
+
+ gnttab_grant_foreign_access_ref(
+ ref,
+ info->xbdev->otherend_id,
+ buffer_mfn,
+ rq_data_dir(req) );
+
+ info->shadow[id].frame[ring_req->nr_segments] =
+ mfn_to_pfn(buffer_mfn);
+
+ ring_req->seg[ring_req->nr_segments] =
+ (struct blkif_request_segment) {
+ .gref = ref,
+ .first_sect = fsect,
+ .last_sect = lsect };
+
+ ring_req->nr_segments++;
+ }
+ }
+
+ info->ring.req_prod_pvt++;
+
+ /* Keep a private copy so we can reissue requests when recovering. */
+ info->shadow[id].req = *ring_req;
+
+ gnttab_free_grant_references(gref_head);
+
+ return 0;
}
-
/*
* do_blkif_request
* read a block; request is in a request queue
*/
void do_blkif_request(request_queue_t *rq)
{
- struct request *req;
- struct buffer_head *bh, *next_bh;
- int rw, nsect, full, queued = 0;
-
- DPRINTK("Entered do_blkif_request\n");
-
- while ( !rq->plugged && !list_empty(&rq->queue_head))
- {
- if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL )
- goto out;
-
- DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
- req, req->cmd, req->sector,
- req->current_nr_sectors, req->nr_sectors, req->bh);
-
- rw = req->cmd;
- if ( rw == READA )
- rw = READ;
- if ( unlikely((rw != READ) && (rw != WRITE)) )
- panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
-
- req->errors = 0;
-
- bh = req->bh;
- while ( bh != NULL )
- {
- next_bh = bh->b_reqnext;
- bh->b_reqnext = NULL;
-
- full = blkif_queue_request(
- (unsigned long)bh,
- (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE,
- bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
-
- if ( full )
- {
- bh->b_reqnext = next_bh;
- pending_queues[nr_pending++] = rq;
- if ( unlikely(nr_pending >= MAX_PENDING) )
- BUG();
- goto out;
- }
-
- queued++;
-
- /* Dequeue the buffer head from the request. */
- nsect = bh->b_size >> 9;
- bh = req->bh = next_bh;
-
- if ( bh != NULL )
- {
- /* There's another buffer head to do. Update the request. */
- req->hard_sector += nsect;
- req->hard_nr_sectors -= nsect;
- req->sector = req->hard_sector;
- req->nr_sectors = req->hard_nr_sectors;
- req->current_nr_sectors = bh->b_size >> 9;
- req->buffer = bh->b_data;
- }
- else
- {
- /* That was the last buffer head. Finalise the request. */
- if ( unlikely(end_that_request_first(req, 1, "XenBlk")) )
- BUG();
- blkdev_dequeue_request(req);
- end_that_request_last(req);
- }
- }
- }
+ struct blkfront_info *info = NULL;
+ struct request *req;
+ int queued;
- out:
- if ( queued != 0 )
- flush_requests();
-}
+ DPRINTK("Entered do_blkif_request\n");
+ queued = 0;
-static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
-{
- RING_IDX i, rp;
- unsigned long flags;
- struct buffer_head *bh, *next_bh;
-
- spin_lock_irqsave(&io_request_lock, flags);
-
- if ( unlikely(blkif_state == BLKIF_STATE_CLOSED || recovery) )
- {
- spin_unlock_irqrestore(&io_request_lock, flags);
- return;
- }
-
- rp = blk_ring.sring->rsp_prod;
- rmb(); /* Ensure we see queued responses up to 'rp'. */
-
- for ( i = blk_ring.rsp_cons; i != rp; i++ )
- {
- unsigned long id;
- blkif_response_t *bret;
-
- bret = RING_GET_RESPONSE(&blk_ring, i);
- id = bret->id;
- bh = (struct buffer_head *)blk_shadow[id].request;
-
- blkif_completion(&blk_shadow[id]);
-
- ADD_ID_TO_FREELIST(id);
-
- switch ( bret->operation )
- {
- case BLKIF_OP_READ:
- case BLKIF_OP_WRITE:
- if ( unlikely(bret->status != BLKIF_RSP_OKAY) )
- DPRINTK("Bad return from blkdev data request: %lx\n",
- bret->status);
- for ( ; bh != NULL; bh = next_bh )
- {
- next_bh = bh->b_reqnext;
- bh->b_reqnext = NULL;
- bh->b_end_io(bh, bret->status == BLKIF_RSP_OKAY);
- }
-
- break;
- case BLKIF_OP_PROBE:
- memcpy(&blkif_control_rsp, bret, sizeof(*bret));
- blkif_control_rsp_valid = 1;
- break;
- default:
- BUG();
- }
-
- }
- blk_ring.rsp_cons = i;
-
- kick_pending_request_queues();
-
- spin_unlock_irqrestore(&io_request_lock, flags);
-}
+ while ((req = elv_next_request(rq)) != NULL) {
+ info = req->rq_disk->private_data;
+ if (!blk_fs_request(req)) {
+ end_request(req, 0);
+ continue;
+ }
-#endif
+ if (RING_FULL(&info->ring))
+ goto wait;
-/***************************** COMMON CODE *******************************/
+ DPRINTK("do_blk_req %p: cmd %p, sec %lx, "
+ "(%u/%li) buffer:%p [%s]\n",
+ req, req->cmd, req->sector, req->current_nr_sectors,
+ req->nr_sectors, req->buffer,
+ rq_data_dir(req) ? "write" : "read");
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-void blkif_control_probe_send(blkif_request_t *req, blkif_response_t *rsp,
- unsigned long address)
-{
- int ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
- ASSERT( ref != -ENOSPC );
- gnttab_grant_foreign_access_ref( ref, rdomid, address >> PAGE_SHIFT, 0 );
+ blkdev_dequeue_request(req);
+ if (blkif_queue_request(req)) {
+ blk_requeue_request(rq, req);
+ wait:
+ /* Avoid pointless unplugs. */
+ blk_stop_queue(rq);
+ break;
+ }
- req->frame_and_sects[0] = (((u32) ref) << 16) | 7;
+ queued++;
+ }
- blkif_control_send(req, rsp);
+ if (queued != 0)
+ flush_requests(info);
}
-#endif
-void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
-{
- unsigned long flags, id;
- blkif_request_t *req_d;
-
- retry:
- while ( RING_FULL(&blk_ring) )
- {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(1);
- }
-
- spin_lock_irqsave(&blkif_io_lock, flags);
- if ( RING_FULL(&blk_ring) )
- {
- spin_unlock_irqrestore(&blkif_io_lock, flags);
- goto retry;
- }
-
- DISABLE_SCATTERGATHER();
- req_d = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
- *req_d = *req;
-
- id = GET_ID_FROM_FREELIST();
- req_d->id = id;
- blk_shadow[id].request = (unsigned long)req;
-
- pickle_request(&blk_shadow[id], req);
-
- blk_ring.req_prod_pvt++;
- flush_requests();
-
- spin_unlock_irqrestore(&blkif_io_lock, flags);
-
- while ( !blkif_control_rsp_valid )
- {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(1);
- }
-
- memcpy(rsp, &blkif_control_rsp, sizeof(*rsp));
- blkif_control_rsp_valid = 0;
-}
-
-/* Send a driver status notification to the domain controller. */
-static void send_driver_status(int ok)
+static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
{
- ctrl_msg_t cmsg = {
- .type = CMSG_BLKIF_FE,
- .subtype = CMSG_BLKIF_FE_DRIVER_STATUS,
- .length = sizeof(blkif_fe_driver_status_t),
- };
- blkif_fe_driver_status_t *msg = (void*)cmsg.msg;
-
- msg->status = (ok ? BLKIF_DRIVER_STATUS_UP : BLKIF_DRIVER_STATUS_DOWN);
-
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
+ struct request *req;
+ blkif_response_t *bret;
+ RING_IDX i, rp;
+ unsigned long flags;
+ struct blkfront_info *info = (struct blkfront_info *)dev_id;
+
+ spin_lock_irqsave(&blkif_io_lock, flags);
+
+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+ return IRQ_HANDLED;
+ }
+
+ again:
+ rp = info->ring.sring->rsp_prod;
+ rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+ for (i = info->ring.rsp_cons; i != rp; i++) {
+ unsigned long id;
+ int ret;
+
+ bret = RING_GET_RESPONSE(&info->ring, i);
+ id = bret->id;
+ req = (struct request *)info->shadow[id].request;
+
+ blkif_completion(&info->shadow[id]);
+
+ ADD_ID_TO_FREELIST(info, id);
+
+ switch (bret->operation) {
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ if (unlikely(bret->status != BLKIF_RSP_OKAY))
+ DPRINTK("Bad return from blkdev data "
+ "request: %x\n", bret->status);
+
+ ret = end_that_request_first(
+ req, (bret->status == BLKIF_RSP_OKAY),
+ req->hard_nr_sectors);
+ BUG_ON(ret);
+ end_that_request_last(
+ req, (bret->status == BLKIF_RSP_OKAY));
+ break;
+ default:
+ BUG();
+ }
+ }
+
+ info->ring.rsp_cons = i;
+
+ if (i != info->ring.req_prod_pvt) {
+ int more_to_do;
+ RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
+ if (more_to_do)
+ goto again;
+ } else
+ info->ring.sring->rsp_event = i + 1;
+
+ kick_pending_request_queues(info);
+
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+static void blkif_free(struct blkfront_info *info, int suspend)
+{
+ /* Prevent new requests being issued until we fix things up. */
+ spin_lock_irq(&blkif_io_lock);
+ info->connected = suspend ?
+ BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
+ /* No more blkif_request(). */
+ if (info->rq)
+ blk_stop_queue(info->rq);
+ /* No more gnttab callback work. */
+ gnttab_cancel_free_callback(&info->callback);
+ flush_scheduled_work();
+ spin_unlock_irq(&blkif_io_lock);
+
+ /* Free resources associated with old device channel. */
+ if (info->ring_ref != GRANT_INVALID_REF) {
+ gnttab_end_foreign_access(info->ring_ref, 0,
+ (unsigned long)info->ring.sring);
+ info->ring_ref = GRANT_INVALID_REF;
+ info->ring.sring = NULL;
+ }
+ if (info->irq)
+ unbind_from_irqhandler(info->irq, info);
+ info->evtchn = info->irq = 0;
-/* Tell the controller to bring up the interface. */
-static void blkif_send_interface_connect(void)
-{
- ctrl_msg_t cmsg = {
- .type = CMSG_BLKIF_FE,
- .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
- .length = sizeof(blkif_fe_interface_connect_t),
- };
- blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
-
- msg->handle = 0;
- msg->shmem_frame = (virt_to_machine(blk_ring.sring) >> PAGE_SHIFT);
-
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
}
-static void blkif_free(void)
+static void blkif_completion(struct blk_shadow *s)
{
- /* Prevent new requests being issued until we fix things up. */
- spin_lock_irq(&blkif_io_lock);
- recovery = 1;
- blkif_state = BLKIF_STATE_DISCONNECTED;
- spin_unlock_irq(&blkif_io_lock);
-
- /* Free resources associated with old device channel. */
- if ( blk_ring.sring != NULL )
- {
- free_page((unsigned long)blk_ring.sring);
- blk_ring.sring = NULL;
- }
- free_irq(blkif_irq, NULL);
- blkif_irq = 0;
-
- unbind_evtchn_from_irq(blkif_evtchn);
- blkif_evtchn = 0;
+ int i;
+ for (i = 0; i < s->req.nr_segments; i++)
+ gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
}
-static void blkif_close(void)
+static void blkif_recover(struct blkfront_info *info)
{
-}
+ int i;
+ blkif_request_t *req;
+ struct blk_shadow *copy;
+ int j;
-/* Move from CLOSED to DISCONNECTED state. */
-static void blkif_disconnect(void)
-{
- blkif_sring_t *sring;
-
- if ( blk_ring.sring != NULL )
- free_page((unsigned long)blk_ring.sring);
-
- sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL);
- SHARED_RING_INIT(sring);
- FRONT_RING_INIT(&blk_ring, sring, PAGE_SIZE);
- blkif_state = BLKIF_STATE_DISCONNECTED;
- blkif_send_interface_connect();
-}
+ /* Stage 1: Make a safe copy of the shadow state. */
+ copy = kmalloc(sizeof(info->shadow), GFP_KERNEL | __GFP_NOFAIL);
+ memcpy(copy, info->shadow, sizeof(info->shadow));
-static void blkif_reset(void)
-{
- blkif_free();
- blkif_disconnect();
-}
+ /* Stage 2: Set up free list. */
+ memset(&info->shadow, 0, sizeof(info->shadow));
+ for (i = 0; i < BLK_RING_SIZE; i++)
+ info->shadow[i].req.id = i+1;
+ info->shadow_free = info->ring.req_prod_pvt;
+ info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
-static void blkif_recover(void)
-{
- int i;
- blkif_request_t *req;
- struct blk_shadow *copy;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- int j;
-#endif
-
- /* Stage 1: Make a safe copy of the shadow state. */
- copy = (struct blk_shadow *)kmalloc(sizeof(blk_shadow), GFP_KERNEL);
- BUG_ON(copy == NULL);
- memcpy(copy, blk_shadow, sizeof(blk_shadow));
-
- /* Stage 2: Set up free list. */
- memset(&blk_shadow, 0, sizeof(blk_shadow));
- for ( i = 0; i < BLK_RING_SIZE; i++ )
- blk_shadow[i].req.id = i+1;
- blk_shadow_free = blk_ring.req_prod_pvt;
- blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
-
- /* Stage 3: Find pending requests and requeue them. */
- for ( i = 0; i < BLK_RING_SIZE; i++ )
- {
- /* Not in use? */
- if ( copy[i].request == 0 )
- continue;
-
- /* Grab a request slot and unpickle shadow state into it. */
- req = RING_GET_REQUEST(
- &blk_ring, blk_ring.req_prod_pvt);
- unpickle_request(req, ©[i]);
-
- /* We get a new request id, and must reset the shadow state. */
- req->id = GET_ID_FROM_FREELIST();
- memcpy(&blk_shadow[req->id], ©[i], sizeof(copy[i]));
-
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- /* Rewrite any grant references invalidated by suspend/resume. */
- for ( j = 0; j < req->nr_segments; j++ )
- {
- if ( req->frame_and_sects[j] & GRANTREF_INVALID )
- gnttab_grant_foreign_access_ref(
- blkif_gref_from_fas(req->frame_and_sects[j]),
- rdomid,
- blk_shadow[req->id].frame[j],
- rq_data_dir((struct request *)
- blk_shadow[req->id].request));
- req->frame_and_sects[j] &= ~GRANTREF_INVALID;
- }
- blk_shadow[req->id].req = *req;
-#endif
-
- blk_ring.req_prod_pvt++;
- }
-
- kfree(copy);
-
- recovery = 0;
-
- /* blk_ring->req_prod will be set when we flush_requests().*/
- wmb();
-
- /* Kicks things back into life. */
- flush_requests();
-
- /* Now safe to left other people use the interface. */
- blkif_state = BLKIF_STATE_CONNECTED;
-}
+ /* Stage 3: Find pending requests and requeue them. */
+ for (i = 0; i < BLK_RING_SIZE; i++) {
+ /* Not in use? */
+ if (copy[i].request == 0)
+ continue;
-static void blkif_connect(blkif_fe_interface_status_t *status)
-{
- int err = 0;
-
- blkif_evtchn = status->evtchn;
- blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- rdomid = status->domid;
-#endif
-
- err = request_irq(blkif_irq, blkif_int, SA_SAMPLE_RANDOM, "blkif", NULL);
- if ( err )
- {
- printk(KERN_ALERT "xen_blk: request_irq failed (err=%d)\n", err);
- return;
- }
-
- if ( recovery )
- {
- blkif_recover();
- }
- else
- {
- /* Transition to connected in case we need to do
- * a partition probe on a whole disk. */
- blkif_state = BLKIF_STATE_CONNECTED;
-
- /* Probe for discs attached to the interface. */
- xlvbd_init();
- }
-
- /* Kick pending requests. */
- spin_lock_irq(&blkif_io_lock);
- kick_pending_request_queues();
- spin_unlock_irq(&blkif_io_lock);
-}
+ /* Grab a request slot and copy shadow state into it. */
+ req = RING_GET_REQUEST(
+ &info->ring, info->ring.req_prod_pvt);
+ *req = copy[i].req;
-static void unexpected(blkif_fe_interface_status_t *status)
-{
- DPRINTK(" Unexpected blkif status %u in state %u\n",
- status->status, blkif_state);
-}
+ /* We get a new request id, and must reset the shadow state. */
+ req->id = GET_ID_FROM_FREELIST(info);
+ memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i]));
-static void blkif_status(blkif_fe_interface_status_t *status)
-{
- if ( status->handle != blkif_handle )
- {
- WPRINTK(" Invalid blkif: handle=%u\n", status->handle);
- unexpected(status);
- return;
- }
-
- switch ( status->status )
- {
- case BLKIF_INTERFACE_STATUS_CLOSED:
- switch ( blkif_state )
- {
- case BLKIF_STATE_CLOSED:
- unexpected(status);
- break;
- case BLKIF_STATE_DISCONNECTED:
- case BLKIF_STATE_CONNECTED:
- unexpected(status);
- blkif_close();
- break;
- }
- break;
-
- case BLKIF_INTERFACE_STATUS_DISCONNECTED:
- switch ( blkif_state )
- {
- case BLKIF_STATE_CLOSED:
- blkif_disconnect();
- break;
- case BLKIF_STATE_DISCONNECTED:
- case BLKIF_STATE_CONNECTED:
- /* unexpected(status); */ /* occurs during suspend/resume */
- blkif_reset();
- break;
- }
- break;
-
- case BLKIF_INTERFACE_STATUS_CONNECTED:
- switch ( blkif_state )
- {
- case BLKIF_STATE_CLOSED:
- unexpected(status);
- blkif_disconnect();
- blkif_connect(status);
- break;
- case BLKIF_STATE_DISCONNECTED:
- blkif_connect(status);
- break;
- case BLKIF_STATE_CONNECTED:
- unexpected(status);
- blkif_connect(status);
- break;
- }
- break;
-
- case BLKIF_INTERFACE_STATUS_CHANGED:
- switch ( blkif_state )
- {
- case BLKIF_STATE_CLOSED:
- case BLKIF_STATE_DISCONNECTED:
- unexpected(status);
- break;
- case BLKIF_STATE_CONNECTED:
- vbd_update();
- break;
- }
- break;
-
- default:
- WPRINTK(" Invalid blkif status: %d\n", status->status);
- break;
- }
-}
+ /* Rewrite any grant references invalidated by susp/resume. */
+ for (j = 0; j < req->nr_segments; j++)
+ gnttab_grant_foreign_access_ref(
+ req->seg[j].gref,
+ info->xbdev->otherend_id,
+ pfn_to_mfn(info->shadow[req->id].frame[j]),
+ rq_data_dir(
+ (struct request *)
+ info->shadow[req->id].request));
+ info->shadow[req->id].req = *req;
+ info->ring.req_prod_pvt++;
+ }
-static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
- switch ( msg->subtype )
- {
- case CMSG_BLKIF_FE_INTERFACE_STATUS:
- blkif_status((blkif_fe_interface_status_t *)
- &msg->msg[0]);
- break;
- default:
- msg->length = 0;
- break;
- }
-
- ctrl_if_send_response(msg);
-}
+ kfree(copy);
-int wait_for_blkif(void)
-{
- int err = 0;
- int i;
- send_driver_status(1);
-
- /*
- * We should read 'nr_interfaces' from response message and wait
- * for notifications before proceeding. For now we assume that we
- * will be notified of exactly one interface.
- */
- for ( i=0; (blkif_state != BLKIF_STATE_CONNECTED) && (i < 10*HZ); i++ )
- {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(1);
- }
-
- if ( blkif_state != BLKIF_STATE_CONNECTED )
- {
- printk(KERN_INFO "xen_blk: Timeout connecting to device!\n");
- err = -ENOSYS;
- }
- return err;
-}
+ (void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
-int __init xlblk_init(void)
-{
- int i;
+ spin_lock_irq(&blkif_io_lock);
+
+ /* Now safe for us to use the shared ring */
+ info->connected = BLKIF_STATE_CONNECTED;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- if ( 0 > gnttab_alloc_grant_references( MAXIMUM_OUTSTANDING_BLOCK_REQS,
- &gref_head, &gref_terminal ))
- return 1;
- printk(KERN_ALERT "Blkif frontend is using grant tables.\n");
-#endif
+ /* Send off requeued requests */
+ flush_requests(info);
- if ( (xen_start_info.flags & SIF_INITDOMAIN) ||
- (xen_start_info.flags & SIF_BLK_BE_DOMAIN) )
- return 0;
+ /* Kick any other new requests queued since we resumed */
+ kick_pending_request_queues(info);
- printk(KERN_INFO "xen_blk: Initialising virtual block device driver\n");
+ spin_unlock_irq(&blkif_io_lock);
+}
- blk_shadow_free = 0;
- memset(blk_shadow, 0, sizeof(blk_shadow));
- for ( i = 0; i < BLK_RING_SIZE; i++ )
- blk_shadow[i].req.id = i+1;
- blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
- (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
- CALLBACK_IN_BLOCKING_CONTEXT);
+/* ** Driver Registration ** */
- wait_for_blkif();
- return 0;
-}
+static struct xenbus_device_id blkfront_ids[] = {
+ { "vbd" },
+ { "" }
+};
-void blkdev_suspend(void)
-{
-}
-void blkdev_resume(void)
+static struct xenbus_driver blkfront = {
+ .name = "vbd",
+ .owner = THIS_MODULE,
+ .ids = blkfront_ids,
+ .probe = blkfront_probe,
+ .remove = blkfront_remove,
+ .resume = blkfront_resume,
+ .otherend_changed = backend_changed,
+};
+
+
+static int __init xlblk_init(void)
{
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- int i, j;
- for ( i = 0; i < BLK_RING_SIZE; i++ )
- for ( j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++ )
- blk_shadow[i].req.frame_and_sects[j] |= GRANTREF_INVALID;
-#endif
- send_driver_status(1);
+ if (!is_running_on_xen())
+ return -ENODEV;
+
+ return xenbus_register_frontend(&blkfront);
}
+module_init(xlblk_init);
-static void blkif_completion(struct blk_shadow *s)
+
+static void xlblk_exit(void)
{
- int i;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- for ( i = 0; i < s->req.nr_segments; i++ )
- gnttab_release_grant_reference(
- &gref_head, blkif_gref_from_fas(s->req.frame_and_sects[i]));
-#else
- /* This is a hack to get the dirty logging bits set */
- if ( s->req.operation == BLKIF_OP_READ )
- {
- for ( i = 0; i < s->req.nr_segments; i++ )
- {
- unsigned long pfn = s->req.frame_and_sects[i] >> PAGE_SHIFT;
- unsigned long mfn = phys_to_machine_mapping[pfn];
- xen_machphys_update(mfn, pfn);
- }
- }
-#endif
+ return xenbus_unregister_driver(&blkfront);
}
+module_exit(xlblk_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");