3 * Authors: Dave Boutcher <boutcher@us.ibm.com>
4 * Ryan Arnold <ryanarn@us.ibm.com>
5 * Colin Devilbiss <devilbis@us.ibm.com>
6 * Stephen Rothwell <sfr@au1.ibm.com>
8 * (C) Copyright 2000-2004 IBM Corporation
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 * This routine provides access to disk space (termed "DASD" in historical
25 * IBM terms) owned and managed by an OS/400 partition running on the
26 * same box as this Linux partition.
28 * All disk operations are performed by sending messages back and forth to
29 * the OS/400 partition.
31 #include <linux/major.h>
33 #include <linux/module.h>
34 #include <linux/kernel.h>
35 #include <linux/blkdev.h>
36 #include <linux/genhd.h>
37 #include <linux/hdreg.h>
38 #include <linux/errno.h>
39 #include <linux/init.h>
40 #include <linux/string.h>
41 #include <linux/dma-mapping.h>
42 #include <linux/completion.h>
44 #include <asm/uaccess.h>
45 #include <asm/iSeries/HvTypes.h>
46 #include <asm/iSeries/HvLpEvent.h>
47 #include <asm/iSeries/HvLpConfig.h>
48 #include <asm/iSeries/vio.h>
50 MODULE_DESCRIPTION("iSeries Virtual DASD");
51 MODULE_AUTHOR("Dave Boutcher");
52 MODULE_LICENSE("GPL");
55 * We only support 7 partitions per physical disk....so with minor
56 * numbers 0-255 we get a maximum of 32 disks.
58 #define VIOD_GENHD_NAME "iseries/vd"
59 #define VIOD_GENHD_DEVFS_NAME "iseries/disc"
61 #define VIOD_VERS "1.64"
63 #define VIOD_KERN_WARNING KERN_WARNING "viod: "
64 #define VIOD_KERN_INFO KERN_INFO "viod: "
69 MAX_DISK_NAME = sizeof(((struct gendisk *)0)->disk_name)
72 static spinlock_t viodasd_spinlock = SPIN_LOCK_UNLOCKED;
75 #define VIOMAXBLOCKDMA 12
77 #define DEVICE_NO(cell) ((struct viodasd_device *)(cell) - &viodasd_devices[0])
94 } dma_info[VIOMAXBLOCKDMA];
97 struct vioblocklpevent {
98 struct HvLpEvent event;
105 struct open_data open_data;
106 struct rw_data rw_data;
111 #define vioblockflags_ro 0x0001
113 enum vioblocksubtype {
114 vioblockopen = 0x0001,
115 vioblockclose = 0x0002,
116 vioblockread = 0x0003,
117 vioblockwrite = 0x0004,
118 vioblockflush = 0x0005,
119 vioblockcheck = 0x0007
122 struct viodasd_waitevent {
123 struct completion com;
126 int max_disk; /* open */
129 static const struct vio_error_entry viodasd_err_table[] = {
130 { 0x0201, EINVAL, "Invalid Range" },
131 { 0x0202, EINVAL, "Invalid Token" },
132 { 0x0203, EIO, "DMA Error" },
133 { 0x0204, EIO, "Use Error" },
134 { 0x0205, EIO, "Release Error" },
135 { 0x0206, EINVAL, "Invalid Disk" },
136 { 0x0207, EBUSY, "Cant Lock" },
137 { 0x0208, EIO, "Already Locked" },
138 { 0x0209, EIO, "Already Unlocked" },
139 { 0x020A, EIO, "Invalid Arg" },
140 { 0x020B, EIO, "Bad IFS File" },
141 { 0x020C, EROFS, "Read Only Device" },
142 { 0x02FF, EIO, "Internal Error" },
147 * Figure out the biggest I/O request (in sectors) we can accept
149 #define VIODASD_MAXSECTORS (4096 / 512 * VIOMAXBLOCKDMA)
152 * Number of disk I/O requests we've sent to OS/400
154 static int num_req_outstanding;
157 * This is our internal structure for keeping track of disk devices
159 struct viodasd_device {
163 u16 bytes_per_sector;
167 struct gendisk *disk;
168 } viodasd_devices[MAX_DISKNO];
171 * External open entry point.
173 static int viodasd_open(struct inode *ino, struct file *fil)
175 struct viodasd_device *d = ino->i_bdev->bd_disk->private_data;
177 struct viodasd_waitevent we;
179 init_completion(&we.com);
181 /* Send the open event to OS/400 */
182 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
183 HvLpEvent_Type_VirtualIo,
184 viomajorsubtype_blockio | vioblockopen,
185 HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck,
186 viopath_sourceinst(viopath_hostLp),
187 viopath_targetinst(viopath_hostLp),
188 (u64)(unsigned long)&we, VIOVERSION << 16,
189 ((u64)DEVICE_NO(d) << 48) /* | ((u64)flags << 32) */,
192 printk(VIOD_KERN_WARNING "HV open failed %d\n", (int)hvrc);
196 wait_for_completion(&we.com);
198 /* Check the return code */
200 const struct vio_error_entry *err =
201 vio_lookup_rc(viodasd_err_table, we.sub_result);
203 printk(VIOD_KERN_WARNING
204 "bad rc opening disk: %d:0x%04x (%s)\n",
205 (int)we.rc, we.sub_result, err->msg);
213 * External release entry point.
215 static int viodasd_release(struct inode *ino, struct file *fil)
217 struct viodasd_device *d = ino->i_bdev->bd_disk->private_data;
220 /* Send the event to OS/400. We DON'T expect a response */
221 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
222 HvLpEvent_Type_VirtualIo,
223 viomajorsubtype_blockio | vioblockclose,
224 HvLpEvent_AckInd_NoAck, HvLpEvent_AckType_ImmediateAck,
225 viopath_sourceinst(viopath_hostLp),
226 viopath_targetinst(viopath_hostLp),
228 ((u64)DEVICE_NO(d) << 48) /* | ((u64)flags << 32) */,
231 printk(VIOD_KERN_WARNING "HV close call failed %d\n",
237 /* External ioctl entry point.
239 static int viodasd_ioctl(struct inode *ino, struct file *fil,
240 unsigned int cmd, unsigned long arg)
243 unsigned char sectors;
245 unsigned short cylinders;
246 struct hd_geometry *geo;
247 struct gendisk *gendisk;
248 struct viodasd_device *d;
252 geo = (struct hd_geometry *)arg;
255 err = verify_area(VERIFY_WRITE, geo, sizeof(*geo));
258 gendisk = ino->i_bdev->bd_disk;
259 d = gendisk->private_data;
260 sectors = d->sectors;
266 cylinders = d->cylinders;
268 cylinders = get_capacity(gendisk) / (sectors * heads);
269 if (__put_user(sectors, &geo->sectors) ||
270 __put_user(heads, &geo->heads) ||
271 __put_user(cylinders, &geo->cylinders) ||
272 __put_user(get_start_sect(ino->i_bdev), &geo->start))
281 * Our file operations table
283 static struct block_device_operations viodasd_fops = {
284 .owner = THIS_MODULE,
285 .open = viodasd_open,
286 .release = viodasd_release,
287 .ioctl = viodasd_ioctl,
293 static void viodasd_end_request(struct request *req, int uptodate,
296 if (end_that_request_first(req, uptodate, num_sectors))
298 add_disk_randomness(req->rq_disk);
299 end_that_request_last(req);
303 * Send an actual I/O request to OS/400
305 static int send_request(struct request *req)
312 struct vioblocklpevent *bevent;
313 struct scatterlist sg[VIOMAXBLOCKDMA];
316 struct viodasd_device *d;
319 start = (u64)req->sector << 9;
321 if (rq_data_dir(req) == READ) {
322 direction = DMA_FROM_DEVICE;
323 viocmd = viomajorsubtype_blockio | vioblockread;
326 direction = DMA_TO_DEVICE;
327 viocmd = viomajorsubtype_blockio | vioblockwrite;
331 d = req->rq_disk->private_data;
333 /* Now build the scatter-gather list */
334 nsg = blk_rq_map_sg(req->q, req, sg);
335 nsg = dma_map_sg(iSeries_vio_dev, sg, nsg, direction);
337 spin_lock_irqsave(&viodasd_spinlock, flags);
338 num_req_outstanding++;
340 /* This optimization handles a single DMA block */
342 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
343 HvLpEvent_Type_VirtualIo, viocmd,
344 HvLpEvent_AckInd_DoAck,
345 HvLpEvent_AckType_ImmediateAck,
346 viopath_sourceinst(viopath_hostLp),
347 viopath_targetinst(viopath_hostLp),
348 (u64)(unsigned long)req, VIOVERSION << 16,
349 ((u64)DEVICE_NO(d) << 48), start,
350 ((u64)sg_dma_address(&sg[0])) << 32,
353 bevent = (struct vioblocklpevent *)
354 vio_get_event_buffer(viomajorsubtype_blockio);
355 if (bevent == NULL) {
356 printk(VIOD_KERN_WARNING
357 "error allocating disk event buffer\n");
362 * Now build up the actual request. Note that we store
363 * the pointer to the request in the correlation
364 * token so we can match the response up later
366 memset(bevent, 0, sizeof(struct vioblocklpevent));
367 bevent->event.xFlags.xValid = 1;
368 bevent->event.xFlags.xFunction = HvLpEvent_Function_Int;
369 bevent->event.xFlags.xAckInd = HvLpEvent_AckInd_DoAck;
370 bevent->event.xFlags.xAckType = HvLpEvent_AckType_ImmediateAck;
371 bevent->event.xType = HvLpEvent_Type_VirtualIo;
372 bevent->event.xSubtype = viocmd;
373 bevent->event.xSourceLp = HvLpConfig_getLpIndex();
374 bevent->event.xTargetLp = viopath_hostLp;
375 bevent->event.xSizeMinus1 =
376 offsetof(struct vioblocklpevent, u.rw_data.dma_info) +
377 (sizeof(bevent->u.rw_data.dma_info[0]) * nsg) - 1;
378 bevent->event.xSourceInstanceId =
379 viopath_sourceinst(viopath_hostLp);
380 bevent->event.xTargetInstanceId =
381 viopath_targetinst(viopath_hostLp);
382 bevent->event.xCorrelationToken = (u64)req;
383 bevent->version = VIOVERSION;
384 bevent->disk = DEVICE_NO(d);
385 bevent->u.rw_data.offset = start;
388 * Copy just the dma information from the sg list
391 for (sgindex = 0; sgindex < nsg; sgindex++) {
392 bevent->u.rw_data.dma_info[sgindex].token =
393 sg_dma_address(&sg[sgindex]);
394 bevent->u.rw_data.dma_info[sgindex].len =
395 sg_dma_len(&sg[sgindex]);
398 /* Send the request */
399 hvrc = HvCallEvent_signalLpEvent(&bevent->event);
400 vio_free_event_buffer(viomajorsubtype_blockio, bevent);
403 if (hvrc != HvLpEvent_Rc_Good) {
404 printk(VIOD_KERN_WARNING
405 "error sending disk event to OS/400 (rc %d)\n",
409 spin_unlock_irqrestore(&viodasd_spinlock, flags);
413 num_req_outstanding--;
414 spin_unlock_irqrestore(&viodasd_spinlock, flags);
415 dma_unmap_sg(iSeries_vio_dev, sg, nsg, direction);
420 * This is the external request processing routine
422 static void do_viodasd_request(request_queue_t *q)
427 * If we already have the maximum number of requests
428 * outstanding to OS/400 just bail out. We'll come
431 while (num_req_outstanding < VIOMAXREQ) {
432 req = elv_next_request(q);
435 /* dequeue the current request from the queue */
436 blkdev_dequeue_request(req);
437 /* check that request contains a valid command */
438 if (!blk_fs_request(req)) {
439 viodasd_end_request(req, 0, req->hard_nr_sectors);
442 /* Try sending the request */
443 if (send_request(req) != 0)
444 viodasd_end_request(req, 0, req->hard_nr_sectors);
449 * Probe a single disk and fill in the viodasd_device structure
452 static void probe_disk(struct viodasd_device *d)
455 struct viodasd_waitevent we;
456 int dev_no = DEVICE_NO(d);
458 struct request_queue *q;
460 init_completion(&we.com);
462 /* Send the open event to OS/400 */
463 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
464 HvLpEvent_Type_VirtualIo,
465 viomajorsubtype_blockio | vioblockopen,
466 HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck,
467 viopath_sourceinst(viopath_hostLp),
468 viopath_targetinst(viopath_hostLp),
469 (u64)(unsigned long)&we, VIOVERSION << 16,
470 ((u64)dev_no << 48) | ((u64)vioblockflags_ro << 32),
473 printk(VIOD_KERN_WARNING "bad rc on HV open %d\n", (int)hvrc);
477 wait_for_completion(&we.com);
481 if (we.max_disk > (MAX_DISKNO - 1)) {
486 printk(VIOD_KERN_INFO
487 "Only examining the first %d "
488 "of %d disks connected\n",
489 MAX_DISKNO, we.max_disk + 1);
493 /* Send the close event to OS/400. We DON'T expect a response */
494 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
495 HvLpEvent_Type_VirtualIo,
496 viomajorsubtype_blockio | vioblockclose,
497 HvLpEvent_AckInd_NoAck, HvLpEvent_AckType_ImmediateAck,
498 viopath_sourceinst(viopath_hostLp),
499 viopath_targetinst(viopath_hostLp),
501 ((u64)dev_no << 48) | ((u64)vioblockflags_ro << 32),
504 printk(VIOD_KERN_WARNING
505 "bad rc sending event to OS/400 %d\n", (int)hvrc);
508 printk(VIOD_KERN_INFO "disk %d: %lu sectors (%lu MB) "
509 "CHS=%d/%d/%d sector size %d\n",
510 dev_no, (unsigned long)(d->size >> 9),
511 (unsigned long)(d->size >> 20),
512 (int)d->cylinders, (int)d->tracks,
513 (int)d->sectors, (int)d->bytes_per_sector);
514 /* create the request queue for the disk */
515 spin_lock_init(&d->q_lock);
516 q = blk_init_queue(do_viodasd_request, &d->q_lock);
518 printk(VIOD_KERN_WARNING "cannot allocate queue for disk %d\n",
522 g = alloc_disk(1 << PARTITION_SHIFT);
524 printk(VIOD_KERN_WARNING
525 "cannot allocate disk structure for disk %d\n",
527 blk_cleanup_queue(q);
532 blk_queue_max_hw_segments(q, VIOMAXBLOCKDMA);
533 blk_queue_max_phys_segments(q, VIOMAXBLOCKDMA);
534 blk_queue_max_sectors(q, VIODASD_MAXSECTORS);
535 g->major = VIODASD_MAJOR;
536 g->first_minor = dev_no << PARTITION_SHIFT;
538 snprintf(g->disk_name, sizeof(g->disk_name),
539 VIOD_GENHD_NAME "%c%c",
540 'a' + (dev_no / 26) - 1, 'a' + (dev_no % 26));
542 snprintf(g->disk_name, sizeof(g->disk_name),
543 VIOD_GENHD_NAME "%c", 'a' + (dev_no % 26));
544 snprintf(g->devfs_name, sizeof(g->devfs_name),
545 "%s%d", VIOD_GENHD_DEVFS_NAME, dev_no);
546 g->fops = &viodasd_fops;
549 set_capacity(g, d->size >> 9);
551 /* register us in the global list */
555 /* returns the total number of scatterlist elements converted */
556 static int block_event_to_scatterlist(const struct vioblocklpevent *bevent,
557 struct scatterlist *sg, int *total_len)
560 const struct rw_data *rw_data = &bevent->u.rw_data;
561 static const int offset =
562 offsetof(struct vioblocklpevent, u.rw_data.dma_info);
563 static const int element_size = sizeof(rw_data->dma_info[0]);
565 numsg = ((bevent->event.xSizeMinus1 + 1) - offset) / element_size;
566 if (numsg > VIOMAXBLOCKDMA)
567 numsg = VIOMAXBLOCKDMA;
570 memset(sg, 0, sizeof(sg[0]) * VIOMAXBLOCKDMA);
572 for (i = 0; (i < numsg) && (rw_data->dma_info[i].len > 0); ++i) {
573 sg_dma_address(&sg[i]) = rw_data->dma_info[i].token;
574 sg_dma_len(&sg[i]) = rw_data->dma_info[i].len;
575 *total_len += rw_data->dma_info[i].len;
581 * Restart all queues, starting with the one _after_ the disk given,
582 * thus reducing the chance of starvation of higher numbered disks.
584 static void viodasd_restart_all_queues_starting_from(int first_index)
588 for (i = first_index + 1; i < MAX_DISKNO; ++i)
589 if (viodasd_devices[i].disk)
590 blk_run_queue(viodasd_devices[i].disk->queue);
591 for (i = 0; i <= first_index; ++i)
592 if (viodasd_devices[i].disk)
593 blk_run_queue(viodasd_devices[i].disk->queue);
597 * For read and write requests, decrement the number of outstanding requests,
598 * Free the DMA buffers we allocated.
600 static int viodasd_handle_read_write(struct vioblocklpevent *bevent)
602 int num_sg, num_sect, pci_direction, total_len;
604 struct scatterlist sg[VIOMAXBLOCKDMA];
605 struct HvLpEvent *event = &bevent->event;
606 unsigned long irq_flags;
611 num_sg = block_event_to_scatterlist(bevent, sg, &total_len);
612 num_sect = total_len >> 9;
613 if (event->xSubtype == (viomajorsubtype_blockio | vioblockread))
614 pci_direction = DMA_FROM_DEVICE;
616 pci_direction = DMA_TO_DEVICE;
617 dma_unmap_sg(iSeries_vio_dev, sg, num_sg, pci_direction);
620 * Since this is running in interrupt mode, we need to make sure
621 * we're not stepping on any global I/O operations
623 spin_lock_irqsave(&viodasd_spinlock, irq_flags);
624 num_req_outstanding--;
625 spin_unlock_irqrestore(&viodasd_spinlock, irq_flags);
627 req = (struct request *)bevent->event.xCorrelationToken;
628 device_no = DEVICE_NO(req->rq_disk->private_data);
630 error = event->xRc != HvLpEvent_Rc_Good;
632 const struct vio_error_entry *err;
633 err = vio_lookup_rc(viodasd_err_table, bevent->sub_result);
634 printk(VIOD_KERN_WARNING "read/write error %d:0x%04x (%s)\n",
635 event->xRc, bevent->sub_result, err->msg);
636 num_sect = req->hard_nr_sectors;
638 qlock = req->q->queue_lock;
639 spin_lock_irqsave(qlock, irq_flags);
640 viodasd_end_request(req, !error, num_sect);
641 spin_unlock_irqrestore(qlock, irq_flags);
643 /* Finally, try to get more requests off of this device's queue */
644 viodasd_restart_all_queues_starting_from(device_no);
649 /* This routine handles incoming block LP events */
650 static void handle_block_event(struct HvLpEvent *event)
652 struct vioblocklpevent *bevent = (struct vioblocklpevent *)event;
653 struct viodasd_waitevent *pwe;
656 /* Notification that a partition went away! */
658 /* First, we should NEVER get an int here...only acks */
659 if (event->xFlags.xFunction == HvLpEvent_Function_Int) {
660 printk(VIOD_KERN_WARNING
661 "Yikes! got an int in viodasd event handler!\n");
662 if (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck) {
663 event->xRc = HvLpEvent_Rc_InvalidSubtype;
664 HvCallEvent_ackLpEvent(event);
668 switch (event->xSubtype & VIOMINOR_SUBTYPE_MASK) {
671 * Handle a response to an open request. We get all the
672 * disk information in the response, so update it. The
673 * correlation token contains a pointer to a waitevent
674 * structure that has a completion in it. update the
675 * return code in the waitevent structure and post the
676 * completion to wake up the guy who sent the request
678 pwe = (struct viodasd_waitevent *)event->xCorrelationToken;
679 pwe->rc = event->xRc;
680 pwe->sub_result = bevent->sub_result;
681 if (event->xRc == HvLpEvent_Rc_Good) {
682 const struct open_data *data = &bevent->u.open_data;
683 struct viodasd_device *device =
684 &viodasd_devices[bevent->disk];
686 bevent->flags & vioblockflags_ro;
687 device->size = data->disk_size;
688 device->cylinders = data->cylinders;
689 device->tracks = data->tracks;
690 device->sectors = data->sectors;
691 device->bytes_per_sector = data->bytes_per_sector;
692 pwe->max_disk = data->max_disk;
700 viodasd_handle_read_write(bevent);
704 printk(VIOD_KERN_WARNING "invalid subtype!");
705 if (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck) {
706 event->xRc = HvLpEvent_Rc_InvalidSubtype;
707 HvCallEvent_ackLpEvent(event);
713 * Initialize the whole device driver. Handle module and non-module
716 static int __init viodasd_init(void)
720 /* Try to open to our host lp */
721 if (viopath_hostLp == HvLpIndexInvalid)
724 if (viopath_hostLp == HvLpIndexInvalid) {
725 printk(VIOD_KERN_WARNING "invalid hosting partition\n");
729 printk(VIOD_KERN_INFO "vers " VIOD_VERS ", hosting partition %d\n",
732 /* register the block device */
733 if (register_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME)) {
734 printk(VIOD_KERN_WARNING
735 "Unable to get major number %d for %s\n",
736 VIODASD_MAJOR, VIOD_GENHD_NAME);
739 /* Actually open the path to the hosting partition */
740 if (viopath_open(viopath_hostLp, viomajorsubtype_blockio,
742 printk(VIOD_KERN_WARNING
743 "error opening path to host partition %d\n",
745 unregister_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME);
749 /* Initialize our request handler */
750 vio_setHandler(viomajorsubtype_blockio, handle_block_event);
752 for (i = 0; i < MAX_DISKNO; i++)
753 probe_disk(&viodasd_devices[i]);
757 module_init(viodasd_init);
759 void viodasd_exit(void)
762 struct viodasd_device *d;
764 for (i = 0; i < MAX_DISKNO; i++) {
765 d = &viodasd_devices[i];
767 del_gendisk(d->disk);
769 blk_cleanup_queue(d->disk->queue);
773 vio_clearHandler(viomajorsubtype_blockio);
774 unregister_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME);
775 viopath_close(viopath_hostLp, viomajorsubtype_blockio, VIOMAXREQ + 2);
778 module_exit(viodasd_exit);