Merge to Fedora kernel-2.6.17-1.2187_FC5 patched with stable patch-2.6.17.13-vs2...
[linux-2.6.git] / drivers / xen / blkfront / blkfront.c
1 /******************************************************************************
2  * blkfront.c
3  * 
4  * XenLinux virtual block-device driver.
5  * 
6  * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
7  * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
8  * Copyright (c) 2004, Christian Limpach
9  * Copyright (c) 2004, Andrew Warfield
10  * Copyright (c) 2005, Christopher Clark
11  * Copyright (c) 2005, XenSource Ltd
12  * 
13  * This program is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU General Public License version 2
15  * as published by the Free Software Foundation; or, when distributed
16  * separately from the Linux kernel or incorporated into other
17  * software packages, subject to the following license:
18  * 
19  * Permission is hereby granted, free of charge, to any person obtaining a copy
20  * of this source file (the "Software"), to deal in the Software without
21  * restriction, including without limitation the rights to use, copy, modify,
22  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
23  * and to permit persons to whom the Software is furnished to do so, subject to
24  * the following conditions:
25  * 
26  * The above copyright notice and this permission notice shall be included in
27  * all copies or substantial portions of the Software.
28  * 
29  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
34  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
35  * IN THE SOFTWARE.
36  */
37
38 #include <linux/version.h>
39 #include "block.h"
40 #include <linux/cdrom.h>
41 #include <linux/sched.h>
42 #include <linux/interrupt.h>
43 #include <scsi/scsi.h>
44 #include <xen/evtchn.h>
45 #include <xen/xenbus.h>
46 #include <xen/interface/grant_table.h>
47 #include <xen/gnttab.h>
48 #include <asm/hypervisor.h>
49
50 #define BLKIF_STATE_DISCONNECTED 0
51 #define BLKIF_STATE_CONNECTED    1
52 #define BLKIF_STATE_SUSPENDED    2
53
54 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
55     (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
56 #define GRANT_INVALID_REF       0
57
58 static void connect(struct blkfront_info *);
59 static void blkfront_closing(struct xenbus_device *);
60 static int blkfront_remove(struct xenbus_device *);
61 static int talk_to_backend(struct xenbus_device *, struct blkfront_info *);
62 static int setup_blkring(struct xenbus_device *, struct blkfront_info *);
63
64 static void kick_pending_request_queues(struct blkfront_info *);
65
66 static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs);
67 static void blkif_restart_queue(void *arg);
68 static void blkif_recover(struct blkfront_info *);
69 static void blkif_completion(struct blk_shadow *);
70 static void blkif_free(struct blkfront_info *, int);
71
72
73 /**
74  * Entry point to this code when a new device is created.  Allocate the basic
75  * structures and the ring buffer for communication with the backend, and
76  * inform the backend of the appropriate details for those.  Switch to
77  * Initialised state.
78  */
79 static int blkfront_probe(struct xenbus_device *dev,
80                           const struct xenbus_device_id *id)
81 {
82         int err, vdevice, i;
83         struct blkfront_info *info;
84
85         /* FIXME: Use dynamic device id if this is not set. */
86         err = xenbus_scanf(XBT_NIL, dev->nodename,
87                            "virtual-device", "%i", &vdevice);
88         if (err != 1) {
89                 xenbus_dev_fatal(dev, err, "reading virtual-device");
90                 return err;
91         }
92
93         info = kzalloc(sizeof(*info), GFP_KERNEL);
94         if (!info) {
95                 xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
96                 return -ENOMEM;
97         }
98
99         info->xbdev = dev;
100         info->vdevice = vdevice;
101         info->connected = BLKIF_STATE_DISCONNECTED;
102         INIT_WORK(&info->work, blkif_restart_queue, (void *)info);
103
104         for (i = 0; i < BLK_RING_SIZE; i++)
105                 info->shadow[i].req.id = i+1;
106         info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
107
108         /* Front end dir is a number, which is used as the id. */
109         info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
110         dev->dev.driver_data = info;
111
112         err = talk_to_backend(dev, info);
113         if (err) {
114                 kfree(info);
115                 dev->dev.driver_data = NULL;
116                 return err;
117         }
118
119         return 0;
120 }
121
122
123 /**
124  * We are reconnecting to the backend, due to a suspend/resume, or a backend
125  * driver restart.  We tear down our blkif structure and recreate it, but
126  * leave the device-layer structures intact so that this is transparent to the
127  * rest of the kernel.
128  */
129 static int blkfront_resume(struct xenbus_device *dev)
130 {
131         struct blkfront_info *info = dev->dev.driver_data;
132         int err;
133
134         DPRINTK("blkfront_resume: %s\n", dev->nodename);
135
136         blkif_free(info, 1);
137
138         err = talk_to_backend(dev, info);
139         if (!err)
140                 blkif_recover(info);
141
142         return err;
143 }
144
145
146 /* Common code used when first setting up, and when resuming. */
147 static int talk_to_backend(struct xenbus_device *dev,
148                            struct blkfront_info *info)
149 {
150         const char *message = NULL;
151         struct xenbus_transaction xbt;
152         int err;
153
154         /* Create shared ring, alloc event channel. */
155         err = setup_blkring(dev, info);
156         if (err)
157                 goto out;
158
159 again:
160         err = xenbus_transaction_start(&xbt);
161         if (err) {
162                 xenbus_dev_fatal(dev, err, "starting transaction");
163                 goto destroy_blkring;
164         }
165
166         err = xenbus_printf(xbt, dev->nodename,
167                             "ring-ref","%u", info->ring_ref);
168         if (err) {
169                 message = "writing ring-ref";
170                 goto abort_transaction;
171         }
172         err = xenbus_printf(xbt, dev->nodename,
173                             "event-channel", "%u", info->evtchn);
174         if (err) {
175                 message = "writing event-channel";
176                 goto abort_transaction;
177         }
178
179         err = xenbus_transaction_end(xbt, 0);
180         if (err) {
181                 if (err == -EAGAIN)
182                         goto again;
183                 xenbus_dev_fatal(dev, err, "completing transaction");
184                 goto destroy_blkring;
185         }
186
187         xenbus_switch_state(dev, XenbusStateInitialised);
188
189         return 0;
190
191  abort_transaction:
192         xenbus_transaction_end(xbt, 1);
193         if (message)
194                 xenbus_dev_fatal(dev, err, "%s", message);
195  destroy_blkring:
196         blkif_free(info, 0);
197  out:
198         return err;
199 }
200
201
202 static int setup_blkring(struct xenbus_device *dev,
203                          struct blkfront_info *info)
204 {
205         blkif_sring_t *sring;
206         int err;
207
208         info->ring_ref = GRANT_INVALID_REF;
209
210         sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL);
211         if (!sring) {
212                 xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
213                 return -ENOMEM;
214         }
215         SHARED_RING_INIT(sring);
216         FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
217
218         err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
219         if (err < 0) {
220                 free_page((unsigned long)sring);
221                 info->ring.sring = NULL;
222                 goto fail;
223         }
224         info->ring_ref = err;
225
226         err = xenbus_alloc_evtchn(dev, &info->evtchn);
227         if (err)
228                 goto fail;
229
230         err = bind_evtchn_to_irqhandler(
231                 info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
232         if (err <= 0) {
233                 xenbus_dev_fatal(dev, err,
234                                  "bind_evtchn_to_irqhandler failed");
235                 goto fail;
236         }
237         info->irq = err;
238
239         return 0;
240 fail:
241         blkif_free(info, 0);
242         return err;
243 }
244
245
246 /**
247  * Callback received when the backend's state changes.
248  */
249 static void backend_changed(struct xenbus_device *dev,
250                             enum xenbus_state backend_state)
251 {
252         struct blkfront_info *info = dev->dev.driver_data;
253         struct block_device *bd;
254
255         DPRINTK("blkfront:backend_changed.\n");
256
257         switch (backend_state) {
258         case XenbusStateUnknown:
259         case XenbusStateInitialising:
260         case XenbusStateInitWait:
261         case XenbusStateInitialised:
262         case XenbusStateClosed:
263                 break;
264
265         case XenbusStateConnected:
266                 connect(info);
267                 break;
268
269         case XenbusStateClosing:
270                 bd = bdget(info->dev);
271                 if (bd == NULL)
272                         xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
273
274                 mutex_lock(&bd->bd_mutex);
275                 if (info->users > 0)
276                         xenbus_dev_error(dev, -EBUSY,
277                                          "Device in use; refusing to close");
278                 else
279                         blkfront_closing(dev);
280                 mutex_unlock(&bd->bd_mutex);
281                 bdput(bd);
282                 break;
283         }
284 }
285
286
287 /* ** Connection ** */
288
289
290 /*
291  * Invoked when the backend is finally 'ready' (and has told produced
292  * the details about the physical device - #sectors, size, etc).
293  */
294 static void connect(struct blkfront_info *info)
295 {
296         unsigned long sectors, sector_size;
297         unsigned int binfo;
298         int err;
299
300         if ((info->connected == BLKIF_STATE_CONNECTED) ||
301             (info->connected == BLKIF_STATE_SUSPENDED) )
302                 return;
303
304         DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend);
305
306         err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
307                             "sectors", "%lu", &sectors,
308                             "info", "%u", &binfo,
309                             "sector-size", "%lu", &sector_size,
310                             NULL);
311         if (err) {
312                 xenbus_dev_fatal(info->xbdev, err,
313                                  "reading backend fields at %s",
314                                  info->xbdev->otherend);
315                 return;
316         }
317
318         err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
319         if (err) {
320                 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
321                                  info->xbdev->otherend);
322                 return;
323         }
324
325         (void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
326
327         /* Kick pending requests. */
328         spin_lock_irq(&blkif_io_lock);
329         info->connected = BLKIF_STATE_CONNECTED;
330         kick_pending_request_queues(info);
331         spin_unlock_irq(&blkif_io_lock);
332
333         add_disk(info->gd);
334 }
335
336 /**
337  * Handle the change of state of the backend to Closing.  We must delete our
338  * device-layer structures now, to ensure that writes are flushed through to
339  * the backend.  Once is this done, we can switch to Closed in
340  * acknowledgement.
341  */
342 static void blkfront_closing(struct xenbus_device *dev)
343 {
344         struct blkfront_info *info = dev->dev.driver_data;
345         unsigned long flags;
346
347         DPRINTK("blkfront_closing: %s removed\n", dev->nodename);
348
349         if (info->rq == NULL)
350                 return;
351
352         spin_lock_irqsave(&blkif_io_lock, flags);
353         /* No more blkif_request(). */
354         blk_stop_queue(info->rq);
355         /* No more gnttab callback work. */
356         gnttab_cancel_free_callback(&info->callback);
357         flush_scheduled_work();
358         spin_unlock_irqrestore(&blkif_io_lock, flags);
359
360         xlvbd_del(info);
361
362         xenbus_switch_state(dev, XenbusStateClosed);
363 }
364
365
366 static int blkfront_remove(struct xenbus_device *dev)
367 {
368         struct blkfront_info *info = dev->dev.driver_data;
369
370         DPRINTK("blkfront_remove: %s removed\n", dev->nodename);
371
372         blkif_free(info, 0);
373
374         kfree(info);
375
376         return 0;
377 }
378
379
380 static inline int GET_ID_FROM_FREELIST(
381         struct blkfront_info *info)
382 {
383         unsigned long free = info->shadow_free;
384         BUG_ON(free > BLK_RING_SIZE);
385         info->shadow_free = info->shadow[free].req.id;
386         info->shadow[free].req.id = 0x0fffffee; /* debug */
387         return free;
388 }
389
390 static inline void ADD_ID_TO_FREELIST(
391         struct blkfront_info *info, unsigned long id)
392 {
393         info->shadow[id].req.id  = info->shadow_free;
394         info->shadow[id].request = 0;
395         info->shadow_free = id;
396 }
397
398 static inline void flush_requests(struct blkfront_info *info)
399 {
400         int notify;
401
402         RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
403
404         if (notify)
405                 notify_remote_via_irq(info->irq);
406 }
407
408 static void kick_pending_request_queues(struct blkfront_info *info)
409 {
410         if (!RING_FULL(&info->ring)) {
411                 /* Re-enable calldowns. */
412                 blk_start_queue(info->rq);
413                 /* Kick things off immediately. */
414                 do_blkif_request(info->rq);
415         }
416 }
417
418 static void blkif_restart_queue(void *arg)
419 {
420         struct blkfront_info *info = (struct blkfront_info *)arg;
421         spin_lock_irq(&blkif_io_lock);
422         if (info->connected == BLKIF_STATE_CONNECTED)
423                 kick_pending_request_queues(info);
424         spin_unlock_irq(&blkif_io_lock);
425 }
426
427 static void blkif_restart_queue_callback(void *arg)
428 {
429         struct blkfront_info *info = (struct blkfront_info *)arg;
430         schedule_work(&info->work);
431 }
432
433 int blkif_open(struct inode *inode, struct file *filep)
434 {
435         struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
436         info->users++;
437         return 0;
438 }
439
440
441 int blkif_release(struct inode *inode, struct file *filep)
442 {
443         struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
444         info->users--;
445         if (info->users == 0) {
446                 /* Check whether we have been instructed to close.  We will
447                    have ignored this request initially, as the device was
448                    still mounted. */
449                 struct xenbus_device * dev = info->xbdev;
450                 enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
451
452                 if (state == XenbusStateClosing)
453                         blkfront_closing(dev);
454         }
455         return 0;
456 }
457
458
459 int blkif_ioctl(struct inode *inode, struct file *filep,
460                 unsigned command, unsigned long argument)
461 {
462         int i;
463
464         DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
465                       command, (long)argument, inode->i_rdev);
466
467         switch (command) {
468         case CDROMMULTISESSION:
469                 DPRINTK("FIXME: support multisession CDs later\n");
470                 for (i = 0; i < sizeof(struct cdrom_multisession); i++)
471                         if (put_user(0, (char __user *)(argument + i)))
472                                 return -EFAULT;
473                 return 0;
474
475         default:
476                 /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
477                   command);*/
478                 return -EINVAL; /* same return as native Linux */
479         }
480
481         return 0;
482 }
483
484
485 int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
486 {
487         /* We don't have real geometry info, but let's at least return
488            values consistent with the size of the device */
489         sector_t nsect = get_capacity(bd->bd_disk);
490         sector_t cylinders = nsect;
491
492         hg->heads = 0xff;
493         hg->sectors = 0x3f;
494         sector_div(cylinders, hg->heads * hg->sectors);
495         hg->cylinders = cylinders;
496         if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
497                 hg->cylinders = 0xffff;
498         return 0;
499 }
500
501
502 /*
503  * blkif_queue_request
504  *
505  * request block io
506  *
507  * id: for guest use only.
508  * operation: BLKIF_OP_{READ,WRITE,PROBE}
509  * buffer: buffer to read/write into. this should be a
510  *   virtual address in the guest os.
511  */
512 static int blkif_queue_request(struct request *req)
513 {
514         struct blkfront_info *info = req->rq_disk->private_data;
515         unsigned long buffer_mfn;
516         blkif_request_t *ring_req;
517         struct bio *bio;
518         struct bio_vec *bvec;
519         int idx;
520         unsigned long id;
521         unsigned int fsect, lsect;
522         int ref;
523         grant_ref_t gref_head;
524
525         if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
526                 return 1;
527
528         if (gnttab_alloc_grant_references(
529                 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
530                 gnttab_request_free_callback(
531                         &info->callback,
532                         blkif_restart_queue_callback,
533                         info,
534                         BLKIF_MAX_SEGMENTS_PER_REQUEST);
535                 return 1;
536         }
537
538         /* Fill out a communications ring structure. */
539         ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
540         id = GET_ID_FROM_FREELIST(info);
541         info->shadow[id].request = (unsigned long)req;
542
543         ring_req->id = id;
544         ring_req->operation = rq_data_dir(req) ?
545                 BLKIF_OP_WRITE : BLKIF_OP_READ;
546         ring_req->sector_number = (blkif_sector_t)req->sector;
547         ring_req->handle = info->handle;
548
549         ring_req->nr_segments = 0;
550         rq_for_each_bio (bio, req) {
551                 bio_for_each_segment (bvec, bio, idx) {
552                         BUG_ON(ring_req->nr_segments
553                                == BLKIF_MAX_SEGMENTS_PER_REQUEST);
554                         buffer_mfn = page_to_phys(bvec->bv_page) >> PAGE_SHIFT;
555                         fsect = bvec->bv_offset >> 9;
556                         lsect = fsect + (bvec->bv_len >> 9) - 1;
557                         /* install a grant reference. */
558                         ref = gnttab_claim_grant_reference(&gref_head);
559                         BUG_ON(ref == -ENOSPC);
560
561                         gnttab_grant_foreign_access_ref(
562                                 ref,
563                                 info->xbdev->otherend_id,
564                                 buffer_mfn,
565                                 rq_data_dir(req) );
566
567                         info->shadow[id].frame[ring_req->nr_segments] =
568                                 mfn_to_pfn(buffer_mfn);
569
570                         ring_req->seg[ring_req->nr_segments] =
571                                 (struct blkif_request_segment) {
572                                         .gref       = ref,
573                                         .first_sect = fsect,
574                                         .last_sect  = lsect };
575
576                         ring_req->nr_segments++;
577                 }
578         }
579
580         info->ring.req_prod_pvt++;
581
582         /* Keep a private copy so we can reissue requests when recovering. */
583         info->shadow[id].req = *ring_req;
584
585         gnttab_free_grant_references(gref_head);
586
587         return 0;
588 }
589
590 /*
591  * do_blkif_request
592  *  read a block; request is in a request queue
593  */
594 void do_blkif_request(request_queue_t *rq)
595 {
596         struct blkfront_info *info = NULL;
597         struct request *req;
598         int queued;
599
600         DPRINTK("Entered do_blkif_request\n");
601
602         queued = 0;
603
604         while ((req = elv_next_request(rq)) != NULL) {
605                 info = req->rq_disk->private_data;
606                 if (!blk_fs_request(req)) {
607                         end_request(req, 0);
608                         continue;
609                 }
610
611                 if (RING_FULL(&info->ring))
612                         goto wait;
613
614                 DPRINTK("do_blk_req %p: cmd %p, sec %lx, "
615                         "(%u/%li) buffer:%p [%s]\n",
616                         req, req->cmd, req->sector, req->current_nr_sectors,
617                         req->nr_sectors, req->buffer,
618                         rq_data_dir(req) ? "write" : "read");
619
620
621                 blkdev_dequeue_request(req);
622                 if (blkif_queue_request(req)) {
623                         blk_requeue_request(rq, req);
624                 wait:
625                         /* Avoid pointless unplugs. */
626                         blk_stop_queue(rq);
627                         break;
628                 }
629
630                 queued++;
631         }
632
633         if (queued != 0)
634                 flush_requests(info);
635 }
636
637
638 static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
639 {
640         struct request *req;
641         blkif_response_t *bret;
642         RING_IDX i, rp;
643         unsigned long flags;
644         struct blkfront_info *info = (struct blkfront_info *)dev_id;
645
646         spin_lock_irqsave(&blkif_io_lock, flags);
647
648         if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
649                 spin_unlock_irqrestore(&blkif_io_lock, flags);
650                 return IRQ_HANDLED;
651         }
652
653  again:
654         rp = info->ring.sring->rsp_prod;
655         rmb(); /* Ensure we see queued responses up to 'rp'. */
656
657         for (i = info->ring.rsp_cons; i != rp; i++) {
658                 unsigned long id;
659                 int ret;
660
661                 bret = RING_GET_RESPONSE(&info->ring, i);
662                 id   = bret->id;
663                 req  = (struct request *)info->shadow[id].request;
664
665                 blkif_completion(&info->shadow[id]);
666
667                 ADD_ID_TO_FREELIST(info, id);
668
669                 switch (bret->operation) {
670                 case BLKIF_OP_READ:
671                 case BLKIF_OP_WRITE:
672                         if (unlikely(bret->status != BLKIF_RSP_OKAY))
673                                 DPRINTK("Bad return from blkdev data "
674                                         "request: %x\n", bret->status);
675
676                         ret = end_that_request_first(
677                                 req, (bret->status == BLKIF_RSP_OKAY),
678                                 req->hard_nr_sectors);
679                         BUG_ON(ret);
680                         end_that_request_last(
681                                 req, (bret->status == BLKIF_RSP_OKAY));
682                         break;
683                 default:
684                         BUG();
685                 }
686         }
687
688         info->ring.rsp_cons = i;
689
690         if (i != info->ring.req_prod_pvt) {
691                 int more_to_do;
692                 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
693                 if (more_to_do)
694                         goto again;
695         } else
696                 info->ring.sring->rsp_event = i + 1;
697
698         kick_pending_request_queues(info);
699
700         spin_unlock_irqrestore(&blkif_io_lock, flags);
701
702         return IRQ_HANDLED;
703 }
704
705 static void blkif_free(struct blkfront_info *info, int suspend)
706 {
707         /* Prevent new requests being issued until we fix things up. */
708         spin_lock_irq(&blkif_io_lock);
709         info->connected = suspend ?
710                 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
711         /* No more blkif_request(). */
712         if (info->rq)
713                 blk_stop_queue(info->rq);
714         /* No more gnttab callback work. */
715         gnttab_cancel_free_callback(&info->callback);
716         flush_scheduled_work();
717         spin_unlock_irq(&blkif_io_lock);
718
719         /* Free resources associated with old device channel. */
720         if (info->ring_ref != GRANT_INVALID_REF) {
721                 gnttab_end_foreign_access(info->ring_ref, 0,
722                                           (unsigned long)info->ring.sring);
723                 info->ring_ref = GRANT_INVALID_REF;
724                 info->ring.sring = NULL;
725         }
726         if (info->irq)
727                 unbind_from_irqhandler(info->irq, info);
728         info->evtchn = info->irq = 0;
729
730 }
731
732 static void blkif_completion(struct blk_shadow *s)
733 {
734         int i;
735         for (i = 0; i < s->req.nr_segments; i++)
736                 gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
737 }
738
739 static void blkif_recover(struct blkfront_info *info)
740 {
741         int i;
742         blkif_request_t *req;
743         struct blk_shadow *copy;
744         int j;
745
746         /* Stage 1: Make a safe copy of the shadow state. */
747         copy = kmalloc(sizeof(info->shadow), GFP_KERNEL | __GFP_NOFAIL);
748         memcpy(copy, info->shadow, sizeof(info->shadow));
749
750         /* Stage 2: Set up free list. */
751         memset(&info->shadow, 0, sizeof(info->shadow));
752         for (i = 0; i < BLK_RING_SIZE; i++)
753                 info->shadow[i].req.id = i+1;
754         info->shadow_free = info->ring.req_prod_pvt;
755         info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
756
757         /* Stage 3: Find pending requests and requeue them. */
758         for (i = 0; i < BLK_RING_SIZE; i++) {
759                 /* Not in use? */
760                 if (copy[i].request == 0)
761                         continue;
762
763                 /* Grab a request slot and copy shadow state into it. */
764                 req = RING_GET_REQUEST(
765                         &info->ring, info->ring.req_prod_pvt);
766                 *req = copy[i].req;
767
768                 /* We get a new request id, and must reset the shadow state. */
769                 req->id = GET_ID_FROM_FREELIST(info);
770                 memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
771
772                 /* Rewrite any grant references invalidated by susp/resume. */
773                 for (j = 0; j < req->nr_segments; j++)
774                         gnttab_grant_foreign_access_ref(
775                                 req->seg[j].gref,
776                                 info->xbdev->otherend_id,
777                                 pfn_to_mfn(info->shadow[req->id].frame[j]),
778                                 rq_data_dir(
779                                         (struct request *)
780                                         info->shadow[req->id].request));
781                 info->shadow[req->id].req = *req;
782
783                 info->ring.req_prod_pvt++;
784         }
785
786         kfree(copy);
787
788         (void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
789
790         spin_lock_irq(&blkif_io_lock);
791
792         /* Now safe for us to use the shared ring */
793         info->connected = BLKIF_STATE_CONNECTED;
794
795         /* Send off requeued requests */
796         flush_requests(info);
797
798         /* Kick any other new requests queued since we resumed */
799         kick_pending_request_queues(info);
800
801         spin_unlock_irq(&blkif_io_lock);
802 }
803
804
805 /* ** Driver Registration ** */
806
807
808 static struct xenbus_device_id blkfront_ids[] = {
809         { "vbd" },
810         { "" }
811 };
812
813
814 static struct xenbus_driver blkfront = {
815         .name = "vbd",
816         .owner = THIS_MODULE,
817         .ids = blkfront_ids,
818         .probe = blkfront_probe,
819         .remove = blkfront_remove,
820         .resume = blkfront_resume,
821         .otherend_changed = backend_changed,
822 };
823
824
825 static int __init xlblk_init(void)
826 {
827         if (!is_running_on_xen())
828                 return -ENODEV;
829
830         return xenbus_register_frontend(&blkfront);
831 }
832 module_init(xlblk_init);
833
834
835 static void xlblk_exit(void)
836 {
837         return xenbus_unregister_driver(&blkfront);
838 }
839 module_exit(xlblk_exit);
840
841 MODULE_LICENSE("Dual BSD/GPL");