2 * I2O Random Block Storage Class OSM
4 * (C) Copyright 1999-2002 Red Hat
6 * Written by Alan Cox, Building Number Three Ltd
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
18 * For the purpose of avoiding doubt the preferred form of the work
19 * for making modifications shall be a standards compliant form such
20 * gzipped tar and not one requiring a proprietary or patent encumbered
23 * This is a beta test release. Most of the good code was taken
24 * from the nbd driver by Pavel Machek, who in turn took some of it
25 * from loop.c. Isn't free software great for reusability 8)
29 * Multiple device handling error fixes,
30 * Added a queue depth.
32 * FC920 has an rmw bug. Dont or in the end marker.
33 * Removed queue walk, fixed for 64bitness.
34 * Rewrote much of the code over time
35 * Added indirect block lists
36 * Handle 64K limits on many controllers
37 * Don't use indirects on the Promise (breaks)
38 * Heavily chop down the queue depths
40 * Independent queues per IOP
41 * Support for dynamic device creation/deletion
43 * Support for larger I/Os through merge* functions
44 * (taken from DAC960 driver)
45 * Boji T Kannanthanam:
46 * Set the I2O Block devices to be detected in increasing
47 * order of TIDs during boot.
48 * Search and set the I2O block device that we boot off from as
49 * the first device to be claimed (as /dev/i2o/hda)
50 * Properly attach/detach I2O gendisk structure from the system
51 * gendisk list. The I2O block devices now appear in
53 * Markus Lidel <Markus.Lidel@shadowconnect.com>:
54 * Minor bugfixes for 2.6.
57 * Serial number scanning to find duplicates for FC multipathing
60 #include <linux/major.h>
62 #include <linux/module.h>
63 #include <linux/init.h>
64 #include <linux/sched.h>
66 #include <linux/stat.h>
67 #include <linux/pci.h>
68 #include <linux/errno.h>
69 #include <linux/file.h>
70 #include <linux/ioctl.h>
71 #include <linux/i2o.h>
72 #include <linux/blkdev.h>
73 #include <linux/blkpg.h>
74 #include <linux/slab.h>
75 #include <linux/hdreg.h>
76 #include <linux/spinlock.h>
77 #include <linux/bio.h>
79 #include <linux/notifier.h>
80 #include <linux/reboot.h>
82 #include <asm/uaccess.h>
83 #include <asm/semaphore.h>
84 #include <linux/completion.h>
86 #include <linux/smp_lock.h>
87 #include <linux/wait.h>
89 #define MAJOR_NR I2O_MAJOR
93 #define MAX_I2OB_DEPTH 8
94 #define MAX_I2OB_RETRIES 4
98 #define DEBUG( s ) printk( s )
104 * Events that this OSM is interested in
106 #define I2OB_EVENT_MASK (I2O_EVT_IND_BSA_VOLUME_LOAD | \
107 I2O_EVT_IND_BSA_VOLUME_UNLOAD | \
108 I2O_EVT_IND_BSA_VOLUME_UNLOAD_REQ | \
109 I2O_EVT_IND_BSA_CAPACITY_CHANGE | \
110 I2O_EVT_IND_BSA_SCSI_SMART )
114 * Some of these can be made smaller later
117 static int i2ob_context;
118 static struct block_device_operations i2ob_fops;
121 * I2O Block device descriptor
125 struct i2o_controller *controller;
126 struct i2o_device *i2odev;
131 struct request *head, *tail;
132 request_queue_t *req_queue;
134 int max_direct; /* Not yet used properly */
141 int media_change_flag;
148 * We should cache align these to avoid ping-ponging lines on SMP
149 * boxes under heavy I/O load...
154 struct i2ob_request *next;
157 int sg_dma_direction;
159 struct scatterlist sg_table[16];
163 * Per IOP request queue information
165 * We have a separate request_queue_t per IOP so that a heavilly
166 * loaded I2O block device on an IOP does not starve block devices
167 * across all I2O controllers.
170 struct i2ob_iop_queue
172 unsigned int queue_depth;
173 struct i2ob_request request_queue[MAX_I2OB_DEPTH];
174 struct i2ob_request *i2ob_qhead;
175 request_queue_t *req_queue;
178 static struct i2ob_iop_queue *i2ob_queues[MAX_I2O_CONTROLLERS];
181 * Each I2O disk is one of these.
184 static struct i2ob_device i2ob_dev[MAX_I2OB];
185 static int i2ob_dev_count = 0;
188 * Mutex and spin lock for event handling synchronization
189 * evt_msg contains the last event.
191 static DECLARE_MUTEX_LOCKED(i2ob_evt_sem);
192 static DECLARE_COMPLETION(i2ob_thread_dead);
193 static spinlock_t i2ob_evt_lock = SPIN_LOCK_UNLOCKED;
194 static u32 evt_msg[MSG_FRAME_SIZE];
196 static void i2o_block_reply(struct i2o_handler *, struct i2o_controller *,
197 struct i2o_message *);
198 static void i2ob_new_device(struct i2o_controller *, struct i2o_device *);
199 static void i2ob_del_device(struct i2o_controller *, struct i2o_device *);
200 static void i2ob_reboot_event(void);
201 static int i2ob_install_device(struct i2o_controller *, struct i2o_device *, int);
202 static void i2ob_end_request(struct request *);
203 static void i2ob_request(request_queue_t *);
204 static int i2ob_init_iop(unsigned int);
205 static int i2ob_query_device(struct i2ob_device *, int, int, void*, int);
206 static int i2ob_evt(void *);
208 static int evt_pid = 0;
209 static int evt_running = 0;
210 static int scan_unit = 0;
213 * I2O OSM registration structure...keeps getting bigger and bigger :)
215 static struct i2o_handler i2o_block_handler =
223 I2O_CLASS_RANDOM_BLOCK_STORAGE
227 * i2ob_get - Get an I2O message
228 * @dev: I2O block device
230 * Get a message from the FIFO used for this block device. The message is returned
231 * or the I2O 'no message' value of 0xFFFFFFFF if nothing is available.
234 static u32 i2ob_get(struct i2ob_device *dev)
236 struct i2o_controller *c=dev->controller;
237 return I2O_POST_READ32(c);
240 static int i2ob_build_sglist(struct i2ob_device *dev, struct i2ob_request *ireq)
242 struct scatterlist *sg = ireq->sg_table;
245 nents = blk_rq_map_sg(dev->req_queue, ireq->req, ireq->sg_table);
247 if (rq_data_dir(ireq->req) == READ)
248 ireq->sg_dma_direction = PCI_DMA_FROMDEVICE;
250 ireq->sg_dma_direction = PCI_DMA_TODEVICE;
252 ireq->sg_nents = pci_map_sg(dev->controller->pdev, sg, nents, ireq->sg_dma_direction);
253 return ireq->sg_nents;
256 void i2ob_free_sglist(struct i2ob_device *dev, struct i2ob_request *ireq)
258 struct pci_dev *pdev = dev->controller->pdev;
259 struct scatterlist *sg = ireq->sg_table;
260 int nents = ireq->sg_nents;
261 pci_unmap_sg(pdev, sg, nents, ireq->sg_dma_direction);
265 * i2ob_send - Turn a request into a message and send it
268 * @ireq: Request structure
269 * @unit: Device identity
271 * Generate an I2O BSAREAD request. This interface function is called for devices that
272 * appear to explode when they are fed indirect chain pointers (notably right now this
273 * appears to afflict Promise hardwre, so be careful what you feed the hardware
275 * No cleanup is done by this interface. It is done on the interrupt side when the
279 static int i2ob_send(u32 m, struct i2ob_device *dev, struct i2ob_request *ireq, int unit)
281 struct i2o_controller *c = dev->controller;
286 struct request *req = ireq->req;
287 int count = req->nr_sectors<<9;
288 struct scatterlist *sg;
292 // printk(KERN_INFO "i2ob_send called\n");
293 /* Map the message to a virtual address */
294 msg = c->msg_virt + m;
296 sgnum = i2ob_build_sglist(dev, ireq);
298 /* FIXME: if we have no resources how should we get out of this */
303 * Build the message based on the request.
305 i2o_raw_writel(i2ob_context|(unit<<8), msg+8);
306 i2o_raw_writel(ireq->num, msg+12);
307 i2o_raw_writel(req->nr_sectors << 9, msg+20);
310 * Mask out partitions from now on
313 /* This can be optimised later - just want to be sure its right for
315 offset = ((u64)req->sector) << 9;
316 i2o_raw_writel( offset & 0xFFFFFFFF, msg+24);
317 i2o_raw_writel(offset>>32, msg+28);
321 if(rq_data_dir(req) == READ)
324 i2o_raw_writel(I2O_CMD_BLOCK_READ<<24|HOST_TID<<12|tid, msg+4);
325 for(i = sgnum; i > 0; i--)
328 i2o_raw_writel(0x10000000|sg_dma_len(sg), mptr);
330 i2o_raw_writel(0xD0000000|sg_dma_len(sg), mptr);
331 i2o_raw_writel(sg_dma_address(sg), mptr+4);
333 count -= sg_dma_len(sg);
339 i2o_raw_writel(0, msg+16);break;
341 i2o_raw_writel(0x201F0008, msg+16);break;
342 case CACHE_SMARTFETCH:
343 if(req->nr_sectors > 16)
344 i2o_raw_writel(0x201F0008, msg+16);
346 i2o_raw_writel(0x001F0000, msg+16);
350 // printk("Reading %d entries %d bytes.\n",
351 // mptr-msg-8, req->nr_sectors<<9);
353 else if(rq_data_dir(req) == WRITE)
356 i2o_raw_writel(I2O_CMD_BLOCK_WRITE<<24|HOST_TID<<12|tid, msg+4);
357 for(i = sgnum; i > 0; i--)
360 i2o_raw_writel(0x14000000|sg_dma_len(sg), mptr);
362 i2o_raw_writel(0xD4000000|sg_dma_len(sg), mptr);
363 i2o_raw_writel(sg_dma_address(sg), mptr+4);
365 count -= sg_dma_len(sg);
372 i2o_raw_writel(0, msg+16);break;
373 case CACHE_WRITETHROUGH:
374 i2o_raw_writel(0x001F0008, msg+16);break;
375 case CACHE_WRITEBACK:
376 i2o_raw_writel(0x001F0010, msg+16);break;
377 case CACHE_SMARTBACK:
378 if(req->nr_sectors > 16)
379 i2o_raw_writel(0x001F0004, msg+16);
381 i2o_raw_writel(0x001F0010, msg+16);
383 case CACHE_SMARTTHROUGH:
384 if(req->nr_sectors > 16)
385 i2o_raw_writel(0x001F0004, msg+16);
387 i2o_raw_writel(0x001F0010, msg+16);
390 // printk("Writing %d entries %d bytes.\n",
391 // mptr-msg-8, req->nr_sectors<<9);
393 i2o_raw_writel(I2O_MESSAGE_SIZE(mptr-msg)>>2 | SGL_OFFSET_8, msg);
397 printk(KERN_ERR "Request count botched by %d.\n", count);
400 i2o_post_message(c,m);
401 i2ob_queues[c->unit]->queue_depth ++;
407 * Remove a request from the _locked_ request list. We update both the
408 * list chain and if this is the last item the tail pointer. Caller
409 * must hold the lock.
412 static inline void i2ob_unhook_request(struct i2ob_request *ireq,
415 ireq->next = i2ob_queues[iop]->i2ob_qhead;
416 i2ob_queues[iop]->i2ob_qhead = ireq;
420 * Request completion handler
423 static inline void i2ob_end_request(struct request *req)
425 /* FIXME - pci unmap the request */
428 * Loop until all of the buffers that are linked
429 * to this request have been marked updated and
433 while (end_that_request_first( req, !req->errors, req->hard_cur_sectors ));
436 * It is now ok to complete the request.
438 end_that_request_last( req );
439 DEBUG("IO COMPLETED\n");
443 * OSM reply handler. This gets all the message replies
446 static void i2o_block_reply(struct i2o_handler *h, struct i2o_controller *c, struct i2o_message *msg)
449 struct i2ob_request *ireq = NULL;
453 struct i2ob_device *dev = &i2ob_dev[unit];
462 * FAILed message from controller
463 * We increment the error count and abort it
465 * In theory this will never happen. The I2O block class
466 * specification states that block devices never return
467 * FAILs but instead use the REQ status field...but
468 * better be on the safe side since no one really follows
469 * the spec to the book :)
471 ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
474 spin_lock_irqsave(dev->req_queue->queue_lock, flags);
475 i2ob_unhook_request(ireq, c->unit);
476 i2ob_end_request(ireq->req);
477 spin_unlock_irqrestore(dev->req_queue->queue_lock, flags);
479 /* Now flush the message by making it a NOP */
481 m[0]|=(I2O_CMD_UTIL_NOP)<<24;
482 i2o_post_message(c, (unsigned long) m - (unsigned long) c->msg_virt);
487 if(msg->function == I2O_CMD_UTIL_EVT_REGISTER)
489 spin_lock(&i2ob_evt_lock);
490 memcpy(evt_msg, msg, (m[0]>>16)<<2);
491 spin_unlock(&i2ob_evt_lock);
499 * This is HACK, but Intel Integrated RAID allows user
500 * to delete a volume that is claimed, locked, and in use
501 * by the OS. We have to check for a reply from a
502 * non-existent device and flag it as an error or the system
505 ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
507 printk(KERN_WARNING "I2O Block: Data transfer to deleted device!\n");
508 spin_lock_irqsave(dev->req_queue->queue_lock, flags);
509 i2ob_unhook_request(ireq, c->unit);
510 i2ob_end_request(ireq->req);
511 spin_unlock_irqrestore(dev->req_queue->queue_lock, flags);
516 * Lets see what is cooking. We stuffed the
517 * request in the context.
520 ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
530 "Failure communicating to device",
532 "Device is not ready",
534 "Media is locked by another user",
536 "Failure communicating to device",
537 "Device bus failure",
538 "Device is locked by another user",
539 "Device is write protected",
541 "Volume has changed, waiting for acknowledgement"
547 * Device not ready means two things. One is that the
548 * the thing went offline (but not a removal media)
550 * The second is that you have a SuperTrak 100 and the
551 * firmware got constipated. Unlike standard i2o card
552 * setups the supertrak returns an error rather than
553 * blocking for the timeout in these cases.
555 * Don't stick a supertrak100 into cache aggressive modes
559 printk(KERN_ERR "\n/dev/%s error: %s", dev->i2odev->dev_name,
560 bsa_errors[m[4]&0XFFFF]);
562 printk(" - DDM attempted %d retries", (m[4]>>16)&0x00FF );
567 ireq->req->errors = 0;
570 * Dequeue the request. We use irqsave locks as one day we
571 * may be running polled controllers from a BH...
574 i2ob_free_sglist(dev, ireq);
575 spin_lock_irqsave(dev->req_queue->queue_lock, flags);
576 i2ob_unhook_request(ireq, c->unit);
577 i2ob_end_request(ireq->req);
578 i2ob_queues[c->unit]->queue_depth --;
581 * We may be able to do more I/O
584 i2ob_request(dev->gd->queue);
585 spin_unlock_irqrestore(dev->req_queue->queue_lock, flags);
589 * Event handler. Needs to be a separate thread b/c we may have
590 * to do things like scan a partition table, or query parameters
591 * which cannot be done from an interrupt or from a bottom half.
593 static int i2ob_evt(void *dummy)
597 struct i2ob_device *dev;
599 //The only event that has data is the SCSI_SMART event.
609 daemonize("i2oblock");
610 allow_signal(SIGKILL);
616 if(down_interruptible(&i2ob_evt_sem))
619 printk("exiting...");
624 * Keep another CPU/interrupt from overwriting the
625 * message while we're reading it
627 * We stuffed the unit in the TxContext and grab the event mask
628 * None of the BSA we care about events have EventData
630 spin_lock_irqsave(&i2ob_evt_lock, flags);
631 evt_local = (struct i2o_reply *)evt_msg;
632 spin_unlock_irqrestore(&i2ob_evt_lock, flags);
634 unit = le32_to_cpu(evt_local->header[3]);
635 evt = le32_to_cpu(evt_local->evt_indicator);
637 dev = &i2ob_dev[unit];
641 * New volume loaded on same TID, so we just re-install.
642 * The TID/controller don't change as it is the same
643 * I2O device. It's just new media that we have to
646 case I2O_EVT_IND_BSA_VOLUME_LOAD:
648 i2ob_install_device(dev->i2odev->controller,
655 * No media, so set all parameters to 0 and set the media
656 * change flag. The I2O device is still valid, just doesn't
657 * have media, so we don't want to clear the controller or
660 case I2O_EVT_IND_BSA_VOLUME_UNLOAD:
662 struct gendisk *p = dev->gd;
663 blk_queue_max_sectors(dev->gd->queue, 0);
667 dev->media_change_flag = 1;
671 case I2O_EVT_IND_BSA_VOLUME_UNLOAD_REQ:
672 printk(KERN_WARNING "%s: Attempt to eject locked media\n",
673 dev->i2odev->dev_name);
677 * The capacity has changed and we are going to be
678 * updating the max_sectors and other information
679 * about this disk. We try a revalidate first. If
680 * the block device is in use, we don't want to
681 * do that as there may be I/Os bound for the disk
682 * at the moment. In that case we read the size
683 * from the device and update the information ourselves
684 * and the user can later force a partition table
685 * update through an ioctl.
687 case I2O_EVT_IND_BSA_CAPACITY_CHANGE:
691 if(i2ob_query_device(dev, 0x0004, 0, &size, 8) !=0 )
692 i2ob_query_device(dev, 0x0000, 4, &size, 8);
694 spin_lock_irqsave(dev->req_queue->queue_lock, flags);
695 set_capacity(dev->gd, size>>9);
696 spin_unlock_irqrestore(dev->req_queue->queue_lock, flags);
701 * We got a SCSI SMART event, we just log the relevant
702 * information and let the user decide what they want
703 * to do with the information.
705 case I2O_EVT_IND_BSA_SCSI_SMART:
708 printk(KERN_INFO "I2O Block: %s received a SCSI SMART Event\n",dev->i2odev->dev_name);
709 evt_local->data[16]='\0';
710 sprintf(buf,"%s",&evt_local->data[0]);
711 printk(KERN_INFO " Disk Serial#:%s\n",buf);
712 printk(KERN_INFO " ASC 0x%02x \n",evt_local->ASC);
713 printk(KERN_INFO " ASCQ 0x%02x \n",evt_local->ASCQ);
725 * An event we didn't ask for. Call the card manufacturer
726 * and tell them to fix their firmware :)
731 * If a promise card reports 0x20 event then the brown stuff
732 * hit the fan big time. The card seems to recover but loses
733 * the pending writes. Deeply ungood except for testing fsck
735 if(dev->i2odev->controller->promise)
736 panic("I2O controller firmware failed. Reboot and force a filesystem check.\n");
738 printk(KERN_INFO "%s: Received event 0x%X we didn't register for\n"
739 KERN_INFO " Blame the I2O card manufacturer 8)\n",
740 dev->i2odev->dev_name, evt);
745 complete_and_exit(&i2ob_thread_dead,0);
750 * The I2O block driver is listed as one of those that pulls the
751 * front entry off the queue before processing it. This is important
752 * to remember here. If we drop the io lock then CURRENT will change
753 * on us. We must unlink CURRENT in this routine before we return, if
757 static void i2ob_request(request_queue_t *q)
760 struct i2ob_request *ireq;
761 struct i2ob_device *dev;
764 while ((req = elv_next_request(q)) != NULL) {
765 dev = req->rq_disk->private_data;
768 * Queue depths probably belong with some kind of
769 * generic IOP commit control. Certainly it's not right
772 if(i2ob_queues[dev->unit]->queue_depth >= dev->depth)
780 if(i2ob_queues[dev->unit]->queue_depth == 0)
781 printk(KERN_ERR "i2o_block: message queue and request queue empty!!\n");
785 * Everything ok, so pull from kernel queue onto our queue
788 blkdev_dequeue_request(req);
790 ireq = i2ob_queues[dev->unit]->i2ob_qhead;
791 i2ob_queues[dev->unit]->i2ob_qhead = ireq->next;
794 i2ob_send(m, dev, ireq, dev->index);
800 * SCSI-CAM for ioctl geometry mapping
801 * Duplicated with SCSI - this should be moved into somewhere common
804 * LBA -> CHS mapping table taken from:
806 * "Incorporating the I2O Architecture into BIOS for Intel Architecture
809 * This is an I2O document that is only available to I2O members,
812 * From my understanding, this is how all the I2O cards do this
814 * Disk Size | Sectors | Heads | Cylinders
815 * ---------------+---------+-------+-------------------
816 * 1 < X <= 528M | 63 | 16 | X/(63 * 16 * 512)
817 * 528M < X <= 1G | 63 | 32 | X/(63 * 32 * 512)
818 * 1 < X <528M | 63 | 16 | X/(63 * 16 * 512)
819 * 1 < X <528M | 63 | 16 | X/(63 * 16 * 512)
822 #define BLOCK_SIZE_528M 1081344
823 #define BLOCK_SIZE_1G 2097152
824 #define BLOCK_SIZE_21G 4403200
825 #define BLOCK_SIZE_42G 8806400
826 #define BLOCK_SIZE_84G 17612800
828 static void i2o_block_biosparam(
829 unsigned long capacity,
830 unsigned short *cyls,
834 unsigned long heads, sectors, cylinders;
836 sectors = 63L; /* Maximize sectors per track */
837 if(capacity <= BLOCK_SIZE_528M)
839 else if(capacity <= BLOCK_SIZE_1G)
841 else if(capacity <= BLOCK_SIZE_21G)
843 else if(capacity <= BLOCK_SIZE_42G)
848 cylinders = (unsigned long)capacity / (heads * sectors);
850 *cyls = (unsigned short) cylinders; /* Stuff return values */
851 *secs = (unsigned char) sectors;
852 *hds = (unsigned char) heads;
856 * Issue device specific ioctl calls.
859 static int i2ob_ioctl(struct inode *inode, struct file *file,
860 unsigned int cmd, unsigned long arg)
862 struct gendisk *disk = inode->i_bdev->bd_disk;
863 struct i2ob_device *dev = disk->private_data;
865 /* Anyone capable of this syscall can do *real bad* things */
867 if (!capable(CAP_SYS_ADMIN))
872 struct hd_geometry g;
873 i2o_block_biosparam(get_capacity(disk),
874 &g.cylinders, &g.heads, &g.sectors);
875 g.start = get_start_sect(inode->i_bdev);
876 return copy_to_user((void *)arg,&g, sizeof(g))?-EFAULT:0;
880 return put_user(dev->rcache, (int *)arg);
882 return put_user(dev->wcache, (int *)arg);
884 if(arg<0||arg>CACHE_SMARTFETCH)
889 if(arg!=0 && (arg<CACHE_WRITETHROUGH || arg>CACHE_SMARTBACK))
898 * Close the block device down
901 static int i2ob_release(struct inode *inode, struct file *file)
903 struct gendisk *disk = inode->i_bdev->bd_disk;
904 struct i2ob_device *dev = disk->private_data;
907 * This is to deail with the case of an application
908 * opening a device and then the device dissapears while
909 * it's in use, and then the application tries to release
910 * it. ex: Unmounting a deleted RAID volume at reboot.
911 * If we send messages, it will just cause FAILs since
912 * the TID no longer exists.
917 if (dev->refcnt <= 0)
918 printk(KERN_ALERT "i2ob_release: refcount(%d) <= 0\n", dev->refcnt);
923 * Flush the onboard cache on unmount
926 int *query_done = &dev->done_flag;
927 msg[0] = (FIVE_WORD_MSG_SIZE|SGL_OFFSET_0);
928 msg[1] = I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|dev->tid;
929 msg[2] = i2ob_context|0x40000000;
930 msg[3] = (u32)query_done;
932 DEBUG("Flushing...");
933 i2o_post_wait(dev->controller, msg, 20, 60);
938 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
939 msg[1] = I2O_CMD_BLOCK_MUNLOCK<<24|HOST_TID<<12|dev->tid;
940 msg[2] = i2ob_context|0x40000000;
941 msg[3] = (u32)query_done;
943 DEBUG("Unlocking...");
944 i2o_post_wait(dev->controller, msg, 20, 2);
945 DEBUG("Unlocked.\n");
947 msg[0] = FOUR_WORD_MSG_SIZE|SGL_OFFSET_0;
948 msg[1] = I2O_CMD_BLOCK_POWER<<24 | HOST_TID << 12 | dev->tid;
949 if(dev->flags & (1<<3|1<<4)) /* Removable */
954 if(i2o_post_wait(dev->controller, msg, 20, 60)==0)
958 * Now unclaim the device.
961 if (i2o_release_device(dev->i2odev, &i2o_block_handler))
962 printk(KERN_ERR "i2ob_release: controller rejected unclaim.\n");
970 * Open the block device.
973 static int i2ob_open(struct inode *inode, struct file *file)
975 struct gendisk *disk = inode->i_bdev->bd_disk;
976 struct i2ob_device *dev = disk->private_data;
986 if(i2o_claim_device(dev->i2odev, &i2o_block_handler))
989 printk(KERN_INFO "I2O Block: Could not open device\n");
997 if(dev->power > 0x1f)
999 msg[0] = FOUR_WORD_MSG_SIZE|SGL_OFFSET_0;
1000 msg[1] = I2O_CMD_BLOCK_POWER<<24 | HOST_TID << 12 | dev->tid;
1001 msg[4] = 0x02 << 24;
1002 if(i2o_post_wait(dev->controller, msg, 20, 60) == 0)
1007 * Mount the media if needed. Note that we don't use
1008 * the lock bit. Since we have to issue a lock if it
1009 * refuses a mount (quite possible) then we might as
1010 * well just send two messages out.
1012 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1013 msg[1] = I2O_CMD_BLOCK_MMOUNT<<24|HOST_TID<<12|dev->tid;
1017 i2o_post_wait(dev->controller, msg, 24, 2);
1022 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1023 msg[1] = I2O_CMD_BLOCK_MLOCK<<24|HOST_TID<<12|dev->tid;
1026 i2o_post_wait(dev->controller, msg, 20, 2);
1033 * Issue a device query
1036 static int i2ob_query_device(struct i2ob_device *dev, int table,
1037 int field, void *buf, int buflen)
1039 return i2o_query_scalar(dev->controller, dev->tid,
1040 table, field, buf, buflen);
1045 * Install the I2O block device we found.
1048 static int i2ob_install_device(struct i2o_controller *c, struct i2o_device *d, int unit)
1055 struct i2ob_device *dev=&i2ob_dev[unit];
1056 struct gendisk *disk;
1062 * For logging purposes...
1064 printk(KERN_INFO "i2ob: Installing tid %d device at unit %d\n",
1065 d->lct_data.tid, unit);
1068 * If this is the first I2O block device found on this IOP,
1069 * we need to initialize all the queue data structures
1070 * before any I/O can be performed. If it fails, this
1071 * device is useless.
1073 if(!i2ob_queues[c->unit]) {
1074 if(i2ob_init_iop(c->unit))
1078 q = i2ob_queues[c->unit]->req_queue;
1081 * This will save one level of lookup/indirection in critical
1082 * code so that we can directly get the queue ptr from the
1083 * device instead of having to go the IOP data structure.
1088 * Allocate a gendisk structure and initialize it
1090 disk = alloc_disk(16);
1095 /* initialize gendik structure */
1096 disk->major = MAJOR_NR;
1097 disk->first_minor = unit<<4;
1099 disk->fops = &i2ob_fops;
1100 sprintf(disk->disk_name, "i2o/hd%c", 'a' + unit);
1101 disk->private_data = dev;
1104 * Ask for the current media data. If that isn't supported
1105 * then we ask for the device capacity data
1107 if(i2ob_query_device(dev, 0x0004, 1, &blocksize, 4) != 0
1108 || i2ob_query_device(dev, 0x0004, 0, &size, 8) !=0 )
1110 i2ob_query_device(dev, 0x0000, 3, &blocksize, 4);
1111 i2ob_query_device(dev, 0x0000, 4, &size, 8);
1114 if(i2ob_query_device(dev, 0x0000, 2, &power, 2)!=0)
1116 i2ob_query_device(dev, 0x0000, 5, &flags, 4);
1117 i2ob_query_device(dev, 0x0000, 6, &status, 4);
1118 set_capacity(disk, size>>9);
1121 * Max number of Scatter-Gather Elements
1124 dev->power = power; /* Save power state in device proper */
1127 segments = (d->controller->status_block->inbound_frame_size - 7) / 2;
1132 dev->power = power; /* Save power state */
1133 dev->flags = flags; /* Keep the type info */
1135 blk_queue_max_sectors(q, 96); /* 256 might be nicer but many controllers
1136 explode on 65536 or higher */
1137 blk_queue_max_phys_segments(q, segments);
1138 blk_queue_max_hw_segments(q, segments);
1140 dev->rcache = CACHE_SMARTFETCH;
1141 dev->wcache = CACHE_WRITETHROUGH;
1143 if(d->controller->battery == 0)
1144 dev->wcache = CACHE_WRITETHROUGH;
1146 if(d->controller->promise)
1147 dev->wcache = CACHE_WRITETHROUGH;
1149 if(d->controller->short_req)
1151 blk_queue_max_sectors(q, 8);
1152 blk_queue_max_phys_segments(q, 8);
1153 blk_queue_max_hw_segments(q, 8);
1156 strcpy(d->dev_name, disk->disk_name);
1157 strcpy(disk->devfs_name, disk->disk_name);
1159 printk(KERN_INFO "%s: Max segments %d, queue depth %d, byte limit %d.\n",
1160 d->dev_name, dev->max_segments, dev->depth, dev->max_sectors<<9);
1162 i2ob_query_device(dev, 0x0000, 0, &type, 1);
1164 printk(KERN_INFO "%s: ", d->dev_name);
1167 case 0: printk("Disk Storage");break;
1168 case 4: printk("WORM");break;
1169 case 5: printk("CD-ROM");break;
1170 case 7: printk("Optical device");break;
1172 printk("Type %d", type);
1177 if((flags^status)&(1<<4|1<<3)) /* Missing media or device */
1179 printk(KERN_INFO " Not loaded.\n");
1180 /* Device missing ? */
1181 if((flags^status)&(1<<4))
1186 printk(": %dMB, %d byte sectors",
1187 (int)(size>>20), blocksize);
1192 i2ob_query_device(dev, 0x0003, 0, &cachesize, 4);
1195 printk(", %dMb cache", cachesize>>10);
1197 printk(", %dKb cache", cachesize);
1200 printk(KERN_INFO "%s: Maximum sectors/read set to %d.\n",
1201 d->dev_name, dev->max_sectors);
1204 * Register for the events we're interested in and that the
1205 * device actually supports.
1208 i2o_event_register(c, d->lct_data.tid, i2ob_context, unit,
1209 (I2OB_EVENT_MASK & d->lct_data.event_capabilities));
1214 * Initialize IOP specific queue structures. This is called
1215 * once for each IOP that has a block device sitting behind it.
1217 static int i2ob_init_iop(unsigned int unit)
1221 i2ob_queues[unit] = (struct i2ob_iop_queue *) kmalloc(sizeof(struct i2ob_iop_queue), GFP_ATOMIC);
1222 if(!i2ob_queues[unit])
1224 printk(KERN_WARNING "Could not allocate request queue for I2O block device!\n");
1228 for(i = 0; i< MAX_I2OB_DEPTH; i++)
1230 i2ob_queues[unit]->request_queue[i].next = &i2ob_queues[unit]->request_queue[i+1];
1231 i2ob_queues[unit]->request_queue[i].num = i;
1234 /* Queue is MAX_I2OB + 1... */
1235 i2ob_queues[unit]->request_queue[i].next = NULL;
1236 i2ob_queues[unit]->i2ob_qhead = &i2ob_queues[unit]->request_queue[0];
1237 i2ob_queues[unit]->queue_depth = 0;
1239 i2ob_queues[unit]->lock = SPIN_LOCK_UNLOCKED;
1240 i2ob_queues[unit]->req_queue = blk_init_queue(i2ob_request, &i2ob_queues[unit]->lock);
1241 if (!i2ob_queues[unit]->req_queue) {
1242 kfree(i2ob_queues[unit]);
1246 i2ob_queues[unit]->req_queue->queuedata = &i2ob_queues[unit];
1252 * Probe the I2O subsytem for block class devices
1254 static void i2ob_scan(int bios)
1259 struct i2o_device *d, *b=NULL;
1260 struct i2o_controller *c;
1262 for(i=0; i< MAX_I2O_CONTROLLERS; i++)
1264 c=i2o_find_controller(i);
1270 * The device list connected to the I2O Controller is doubly linked
1271 * Here we traverse the end of the list , and start claiming devices
1272 * from that end. This assures that within an I2O controller atleast
1273 * the newly created volumes get claimed after the older ones, thus
1274 * mapping to same major/minor (and hence device file name) after
1276 * The exception being:
1277 * 1. If there was a TID reuse.
1278 * 2. There was more than one I2O controller.
1283 for (d=c->devices;d!=NULL;d=d->next)
1298 if(d->lct_data.class_id!=I2O_CLASS_RANDOM_BLOCK_STORAGE)
1301 if(d->lct_data.user_tid != 0xFFF)
1306 if(d->lct_data.bios_info != 0x80)
1308 printk(KERN_INFO "Claiming as Boot device: Controller %d, TID %d\n", c->unit, d->lct_data.tid);
1312 if(d->lct_data.bios_info == 0x80)
1313 continue; /*Already claimed on pass 1 */
1316 if(scan_unit<MAX_I2OB)
1317 i2ob_new_device(c, d);
1321 printk(KERN_WARNING "i2o_block: too many device, registering only %d.\n", scan_unit);
1324 i2o_unlock_controller(c);
1328 static void i2ob_probe(void)
1331 * Some overhead/redundancy involved here, while trying to
1332 * claim the first boot volume encountered as /dev/i2o/hda
1333 * everytime. All the i2o_controllers are searched and the
1334 * first i2o block device marked as bootable is claimed
1335 * If an I2O block device was booted off , the bios sets
1336 * its bios_info field to 0x80, this what we search for.
1337 * Assuming that the bootable volume is /dev/i2o/hda
1338 * everytime will prevent any kernel panic while mounting
1342 printk(KERN_INFO "i2o_block: Checking for Boot device...\n");
1346 * Now the remainder.
1348 printk(KERN_INFO "i2o_block: Checking for I2O Block devices...\n");
1354 * New device notification handler. Called whenever a new
1355 * I2O block storage device is added to the system.
1357 * Should we spin lock around this to keep multiple devs from
1358 * getting updated at the same time?
1361 void i2ob_new_device(struct i2o_controller *c, struct i2o_device *d)
1363 struct i2ob_device *dev;
1366 printk(KERN_INFO "i2o_block: New device detected\n");
1367 printk(KERN_INFO " Controller %d Tid %d\n",c->unit, d->lct_data.tid);
1369 /* Check for available space */
1370 if(i2ob_dev_count>=MAX_I2OB)
1372 printk(KERN_ERR "i2o_block: No more devices allowed!\n");
1375 for(unit = 0; unit < MAX_I2OB; unit ++)
1377 if(!i2ob_dev[unit].i2odev)
1381 if(i2o_claim_device(d, &i2o_block_handler))
1383 printk(KERN_INFO "i2o_block: Unable to claim device. Installation aborted\n");
1387 dev = &i2ob_dev[unit];
1389 dev->controller = c;
1390 dev->tid = d->lct_data.tid;
1391 dev->unit = c->unit;
1393 if(i2ob_install_device(c,d,unit)) {
1394 i2o_release_device(d, &i2o_block_handler);
1395 printk(KERN_ERR "i2o_block: Could not install new device\n");
1399 i2o_release_device(d, &i2o_block_handler);
1402 i2o_device_notify_on(d, &i2o_block_handler);
1409 * Deleted device notification handler. Called when a device we
1410 * are talking to has been deleted by the user or some other
1411 * mysterious fource outside the kernel.
1413 void i2ob_del_device(struct i2o_controller *c, struct i2o_device *d)
1416 unsigned long flags;
1417 struct i2ob_device *dev;
1419 for(unit = 0; unit < MAX_I2OB; unit ++)
1421 dev = &i2ob_dev[unit];
1422 if(dev->i2odev == d)
1424 printk(KERN_INFO " /dev/%s: Controller %d Tid %d\n",
1425 d->dev_name, c->unit, d->lct_data.tid);
1430 printk(KERN_INFO "I2O Block Device Deleted\n");
1432 if(unit >= MAX_I2OB)
1434 printk(KERN_ERR "i2ob_del_device called, but not in dev table!\n");
1438 spin_lock_irqsave(dev->req_queue->queue_lock, flags);
1441 * Need to do this...we somtimes get two events from the IRTOS
1442 * in a row and that causes lots of problems.
1444 i2o_device_notify_off(d, &i2o_block_handler);
1447 * This will force errors when i2ob_get_queue() is called
1451 struct gendisk *gd = dev->gd;
1457 spin_unlock_irqrestore(dev->req_queue->queue_lock, flags);
1458 dev->req_queue = NULL;
1465 * The media didn't really change...the device is just gone
1467 dev->media_change_flag = 1;
1473 * Have we seen a media change ?
1475 static int i2ob_media_change(struct gendisk *disk)
1477 struct i2ob_device *p = disk->private_data;
1478 if(p->media_change_flag)
1480 p->media_change_flag=0;
1486 static int i2ob_revalidate(struct gendisk *disk)
1488 struct i2ob_device *p = disk->private_data;
1489 return i2ob_install_device(p->controller, p->i2odev, p->index);
1493 * Reboot notifier. This is called by i2o_core when the system
1496 static void i2ob_reboot_event(void)
1500 for(i=0;i<MAX_I2OB;i++)
1502 struct i2ob_device *dev=&i2ob_dev[i];
1507 * Flush the onboard cache
1510 int *query_done = &dev->done_flag;
1511 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1512 msg[1] = I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|dev->tid;
1513 msg[2] = i2ob_context|0x40000000;
1514 msg[3] = (u32)query_done;
1517 DEBUG("Flushing...");
1518 i2o_post_wait(dev->controller, msg, 20, 60);
1520 DEBUG("Unlocking...");
1524 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1525 msg[1] = I2O_CMD_BLOCK_MUNLOCK<<24|HOST_TID<<12|dev->tid;
1526 msg[2] = i2ob_context|0x40000000;
1527 msg[3] = (u32)query_done;
1529 i2o_post_wait(dev->controller, msg, 20, 2);
1531 DEBUG("Unlocked.\n");
1536 static struct block_device_operations i2ob_fops =
1538 .owner = THIS_MODULE,
1540 .release = i2ob_release,
1541 .ioctl = i2ob_ioctl,
1542 .media_changed = i2ob_media_change,
1543 .revalidate_disk= i2ob_revalidate,
1547 * And here should be modules and kernel interface
1548 * (Just smiley confuses emacs :-)
1551 static int i2o_block_init(void)
1555 printk(KERN_INFO "I2O Block Storage OSM v0.9\n");
1556 printk(KERN_INFO " (c) Copyright 1999-2001 Red Hat Software.\n");
1559 * Register the block device interfaces
1561 if (register_blkdev(MAJOR_NR, "i2o_block"))
1565 printk(KERN_INFO "i2o_block: registered device at major %d\n", MAJOR_NR);
1571 for(i = 0; i < MAX_I2O_CONTROLLERS; i++)
1572 i2ob_queues[i] = NULL;
1575 * Now fill in the boiler plate
1578 for (i = 0; i < MAX_I2OB; i++) {
1579 struct i2ob_device *dev = &i2ob_dev[i];
1583 dev->controller = NULL;
1588 dev->depth = MAX_I2OB_DEPTH;
1589 dev->max_sectors = 2;
1594 * Register the OSM handler as we will need this to probe for
1595 * drives, geometry and other goodies.
1598 if(i2o_install_handler(&i2o_block_handler)<0)
1600 unregister_blkdev(MAJOR_NR, "i2o_block");
1601 printk(KERN_ERR "i2o_block: unable to register OSM.\n");
1604 i2ob_context = i2o_block_handler.context;
1607 * Initialize event handling thread
1609 init_MUTEX_LOCKED(&i2ob_evt_sem);
1610 evt_pid = kernel_thread(i2ob_evt, NULL, CLONE_SIGHAND);
1613 printk(KERN_ERR "i2o_block: Could not initialize event thread. Aborting\n");
1614 i2o_remove_handler(&i2o_block_handler);
1622 unregister_blkdev(MAJOR_NR, "i2o_block");
1627 static void i2o_block_exit(void)
1632 printk(KERN_INFO "Killing I2O block threads...");
1633 i = kill_proc(evt_pid, SIGKILL, 1);
1635 printk("waiting...\n");
1637 /* Be sure it died */
1638 wait_for_completion(&i2ob_thread_dead);
1643 * Unregister for updates from any devices..otherwise we still
1644 * get them and the core jumps to random memory :O
1646 if(i2ob_dev_count) {
1647 struct i2o_device *d;
1648 for(i = 0; i < MAX_I2OB; i++)
1649 if((d = i2ob_dev[i].i2odev))
1650 i2ob_del_device(d->controller, d);
1654 * We may get further callbacks for ourself. The i2o_core
1655 * code handles this case reasonably sanely. The problem here
1656 * is we shouldn't get them .. but a couple of cards feel
1657 * obliged to tell us stuff we don't care about.
1659 * This isnt ideal at all but will do for now.
1662 set_current_state(TASK_UNINTERRUPTIBLE);
1663 schedule_timeout(HZ);
1669 i2o_remove_handler(&i2o_block_handler);
1672 * Return the block device
1674 if (unregister_blkdev(MAJOR_NR, "i2o_block") != 0)
1675 printk("i2o_block: cleanup_module failed\n");
1678 * release request queue
1680 for (i = 0; i < MAX_I2O_CONTROLLERS; i ++)
1681 if(i2ob_queues[i]) {
1682 blk_cleanup_queue(i2ob_queues[i]->req_queue);
1683 kfree(i2ob_queues[i]);
1687 MODULE_AUTHOR("Red Hat");
1688 MODULE_DESCRIPTION("I2O Block Device OSM");
1689 MODULE_LICENSE("GPL");
1691 module_init(i2o_block_init);
1692 module_exit(i2o_block_exit);