2 * I2O Random Block Storage Class OSM
4 * (C) Copyright 1999-2002 Red Hat
6 * Written by Alan Cox, Building Number Three Ltd
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
18 * For the purpose of avoiding doubt the preferred form of the work
19 * for making modifications shall be a standards compliant form such
20 * gzipped tar and not one requiring a proprietary or patent encumbered
23 * This is a beta test release. Most of the good code was taken
24 * from the nbd driver by Pavel Machek, who in turn took some of it
25 * from loop.c. Isn't free software great for reusability 8)
29 * Multiple device handling error fixes,
30 * Added a queue depth.
32 * FC920 has an rmw bug. Dont or in the end marker.
33 * Removed queue walk, fixed for 64bitness.
34 * Rewrote much of the code over time
35 * Added indirect block lists
36 * Handle 64K limits on many controllers
37 * Don't use indirects on the Promise (breaks)
38 * Heavily chop down the queue depths
40 * Independent queues per IOP
41 * Support for dynamic device creation/deletion
43 * Support for larger I/Os through merge* functions
44 * (taken from DAC960 driver)
45 * Boji T Kannanthanam:
46 * Set the I2O Block devices to be detected in increasing
47 * order of TIDs during boot.
48 * Search and set the I2O block device that we boot off from as
49 * the first device to be claimed (as /dev/i2o/hda)
50 * Properly attach/detach I2O gendisk structure from the system
51 * gendisk list. The I2O block devices now appear in
53 * Markus Lidel <Markus.Lidel@shadowconnect.com>:
54 * Minor bugfixes for 2.6.
57 * Serial number scanning to find duplicates for FC multipathing
60 #include <linux/major.h>
62 #include <linux/module.h>
63 #include <linux/init.h>
64 #include <linux/sched.h>
66 #include <linux/stat.h>
67 #include <linux/pci.h>
68 #include <linux/errno.h>
69 #include <linux/file.h>
70 #include <linux/ioctl.h>
71 #include <linux/i2o.h>
72 #include <linux/blkdev.h>
73 #include <linux/blkpg.h>
74 #include <linux/slab.h>
75 #include <linux/hdreg.h>
76 #include <linux/spinlock.h>
77 #include <linux/bio.h>
79 #include <linux/notifier.h>
80 #include <linux/reboot.h>
82 #include <asm/uaccess.h>
83 #include <asm/semaphore.h>
84 #include <linux/completion.h>
86 #include <asm/atomic.h>
87 #include <linux/smp_lock.h>
88 #include <linux/wait.h>
90 #define MAJOR_NR I2O_MAJOR
94 #define MAX_I2OB_DEPTH 8
95 #define MAX_I2OB_RETRIES 4
99 #define DEBUG( s ) printk( s )
105 * Events that this OSM is interested in
107 #define I2OB_EVENT_MASK (I2O_EVT_IND_BSA_VOLUME_LOAD | \
108 I2O_EVT_IND_BSA_VOLUME_UNLOAD | \
109 I2O_EVT_IND_BSA_VOLUME_UNLOAD_REQ | \
110 I2O_EVT_IND_BSA_CAPACITY_CHANGE | \
111 I2O_EVT_IND_BSA_SCSI_SMART )
114 #define I2O_LOCK(unit) (i2ob_dev[(unit)].req_queue->queue_lock)
117 * Some of these can be made smaller later
120 static int i2ob_media_change_flag[MAX_I2OB];
121 static u32 i2ob_max_sectors[MAX_I2OB<<4];
123 static int i2ob_context;
126 * I2O Block device descriptor
130 struct i2o_controller *controller;
131 struct i2o_device *i2odev;
136 struct request *head, *tail;
137 request_queue_t *req_queue;
139 int max_direct; /* Not yet used properly */
150 * We should cache align these to avoid ping-ponging lines on SMP
151 * boxes under heavy I/O load...
156 struct i2ob_request *next;
159 int sg_dma_direction;
161 struct scatterlist sg_table[16];
165 * Per IOP requst queue information
167 * We have a separate requeust_queue_t per IOP so that a heavilly
168 * loaded I2O block device on an IOP does not starve block devices
169 * across all I2O controllers.
172 struct i2ob_iop_queue
174 atomic_t queue_depth;
175 struct i2ob_request request_queue[MAX_I2OB_DEPTH];
176 struct i2ob_request *i2ob_qhead;
177 request_queue_t *req_queue;
180 static struct i2ob_iop_queue *i2ob_queues[MAX_I2O_CONTROLLERS];
183 * Each I2O disk is one of these.
186 static struct i2ob_device i2ob_dev[MAX_I2OB<<4];
187 static int i2ob_dev_count = 0;
188 static struct gendisk *i2ob_disk[MAX_I2OB];
191 * Mutex and spin lock for event handling synchronization
192 * evt_msg contains the last event.
194 static DECLARE_MUTEX_LOCKED(i2ob_evt_sem);
195 static DECLARE_COMPLETION(i2ob_thread_dead);
196 static spinlock_t i2ob_evt_lock = SPIN_LOCK_UNLOCKED;
197 static u32 evt_msg[MSG_FRAME_SIZE];
199 static void i2o_block_reply(struct i2o_handler *, struct i2o_controller *,
200 struct i2o_message *);
201 static void i2ob_new_device(struct i2o_controller *, struct i2o_device *);
202 static void i2ob_del_device(struct i2o_controller *, struct i2o_device *);
203 static void i2ob_reboot_event(void);
204 static int i2ob_install_device(struct i2o_controller *, struct i2o_device *, int);
205 static void i2ob_end_request(struct request *);
206 static void i2ob_request(request_queue_t *);
207 static int i2ob_init_iop(unsigned int);
208 static int i2ob_query_device(struct i2ob_device *, int, int, void*, int);
209 static int i2ob_evt(void *);
211 static int evt_pid = 0;
212 static int evt_running = 0;
213 static int scan_unit = 0;
216 * I2O OSM registration structure...keeps getting bigger and bigger :)
218 static struct i2o_handler i2o_block_handler =
226 I2O_CLASS_RANDOM_BLOCK_STORAGE
230 * i2ob_get - Get an I2O message
231 * @dev: I2O block device
233 * Get a message from the FIFO used for this block device. The message is returned
234 * or the I2O 'no message' value of 0xFFFFFFFF if nothing is available.
237 static u32 i2ob_get(struct i2ob_device *dev)
239 struct i2o_controller *c=dev->controller;
240 return I2O_POST_READ32(c);
243 static int i2ob_build_sglist(struct i2ob_device *dev, struct i2ob_request *ireq)
245 struct scatterlist *sg = ireq->sg_table;
248 nents = blk_rq_map_sg(dev->req_queue, ireq->req, ireq->sg_table);
250 if (rq_data_dir(ireq->req) == READ)
251 ireq->sg_dma_direction = PCI_DMA_FROMDEVICE;
253 ireq->sg_dma_direction = PCI_DMA_TODEVICE;
255 ireq->sg_nents = pci_map_sg(dev->controller->pdev, sg, nents, ireq->sg_dma_direction);
256 return ireq->sg_nents;
259 void i2ob_free_sglist(struct i2ob_device *dev, struct i2ob_request *ireq)
261 struct pci_dev *pdev = dev->controller->pdev;
262 struct scatterlist *sg = ireq->sg_table;
263 int nents = ireq->sg_nents;
264 pci_unmap_sg(pdev, sg, nents, ireq->sg_dma_direction);
268 * i2ob_send - Turn a request into a message and send it
271 * @ireq: Request structure
272 * @unit: Device identity
274 * Generate an I2O BSAREAD request. This interface function is called for devices that
275 * appear to explode when they are fed indirect chain pointers (notably right now this
276 * appears to afflict Promise hardwre, so be careful what you feed the hardware
278 * No cleanup is done by this interface. It is done on the interrupt side when the
282 static int i2ob_send(u32 m, struct i2ob_device *dev, struct i2ob_request *ireq, int unit)
284 struct i2o_controller *c = dev->controller;
289 struct request *req = ireq->req;
290 int count = req->nr_sectors<<9;
291 struct scatterlist *sg;
295 // printk(KERN_INFO "i2ob_send called\n");
296 /* Map the message to a virtual address */
297 msg = c->mem_offset + m;
299 sgnum = i2ob_build_sglist(dev, ireq);
301 /* FIXME: if we have no resources how should we get out of this */
306 * Build the message based on the request.
308 i2o_raw_writel(i2ob_context|(unit<<8), msg+8);
309 i2o_raw_writel(ireq->num, msg+12);
310 i2o_raw_writel(req->nr_sectors << 9, msg+20);
313 * Mask out partitions from now on
317 /* This can be optimised later - just want to be sure its right for
319 offset = ((u64)req->sector) << 9;
320 i2o_raw_writel( offset & 0xFFFFFFFF, msg+24);
321 i2o_raw_writel(offset>>32, msg+28);
325 if(rq_data_dir(req) == READ)
328 i2o_raw_writel(I2O_CMD_BLOCK_READ<<24|HOST_TID<<12|tid, msg+4);
329 for(i = sgnum; i > 0; i--)
332 i2o_raw_writel(0x10000000|sg_dma_len(sg), mptr);
334 i2o_raw_writel(0xD0000000|sg_dma_len(sg), mptr);
335 i2o_raw_writel(sg_dma_address(sg), mptr+4);
337 count -= sg_dma_len(sg);
343 i2o_raw_writel(0, msg+16);break;
345 i2o_raw_writel(0x201F0008, msg+16);break;
346 case CACHE_SMARTFETCH:
347 if(req->nr_sectors > 16)
348 i2o_raw_writel(0x201F0008, msg+16);
350 i2o_raw_writel(0x001F0000, msg+16);
354 // printk("Reading %d entries %d bytes.\n",
355 // mptr-msg-8, req->nr_sectors<<9);
357 else if(rq_data_dir(req) == WRITE)
360 i2o_raw_writel(I2O_CMD_BLOCK_WRITE<<24|HOST_TID<<12|tid, msg+4);
361 for(i = sgnum; i > 0; i--)
364 i2o_raw_writel(0x14000000|sg_dma_len(sg), mptr);
366 i2o_raw_writel(0xD4000000|sg_dma_len(sg), mptr);
367 i2o_raw_writel(sg_dma_address(sg), mptr+4);
369 count -= sg_dma_len(sg);
376 i2o_raw_writel(0, msg+16);break;
377 case CACHE_WRITETHROUGH:
378 i2o_raw_writel(0x001F0008, msg+16);break;
379 case CACHE_WRITEBACK:
380 i2o_raw_writel(0x001F0010, msg+16);break;
381 case CACHE_SMARTBACK:
382 if(req->nr_sectors > 16)
383 i2o_raw_writel(0x001F0004, msg+16);
385 i2o_raw_writel(0x001F0010, msg+16);
387 case CACHE_SMARTTHROUGH:
388 if(req->nr_sectors > 16)
389 i2o_raw_writel(0x001F0004, msg+16);
391 i2o_raw_writel(0x001F0010, msg+16);
394 // printk("Writing %d entries %d bytes.\n",
395 // mptr-msg-8, req->nr_sectors<<9);
397 i2o_raw_writel(I2O_MESSAGE_SIZE(mptr-msg)>>2 | SGL_OFFSET_8, msg);
401 printk(KERN_ERR "Request count botched by %d.\n", count);
404 i2o_post_message(c,m);
405 atomic_inc(&i2ob_queues[c->unit]->queue_depth);
411 * Remove a request from the _locked_ request list. We update both the
412 * list chain and if this is the last item the tail pointer. Caller
413 * must hold the lock.
416 static inline void i2ob_unhook_request(struct i2ob_request *ireq,
419 ireq->next = i2ob_queues[iop]->i2ob_qhead;
420 i2ob_queues[iop]->i2ob_qhead = ireq;
424 * Request completion handler
427 static inline void i2ob_end_request(struct request *req)
429 /* FIXME - pci unmap the request */
432 * Loop until all of the buffers that are linked
433 * to this request have been marked updated and
437 while (end_that_request_first( req, !req->errors, req->hard_cur_sectors ));
440 * It is now ok to complete the request.
442 end_that_request_last( req );
443 DEBUG("IO COMPLETED\n");
447 * OSM reply handler. This gets all the message replies
450 static void i2o_block_reply(struct i2o_handler *h, struct i2o_controller *c, struct i2o_message *msg)
453 struct i2ob_request *ireq = NULL;
456 u8 unit = (m[2]>>8)&0xF0; /* low 4 bits are partition */
457 struct i2ob_device *dev = &i2ob_dev[(unit&0xF0)];
466 * FAILed message from controller
467 * We increment the error count and abort it
469 * In theory this will never happen. The I2O block class
470 * specification states that block devices never return
471 * FAILs but instead use the REQ status field...but
472 * better be on the safe side since no one really follows
473 * the spec to the book :)
475 ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
478 spin_lock_irqsave(I2O_LOCK(c->unit), flags);
479 i2ob_unhook_request(ireq, c->unit);
480 i2ob_end_request(ireq->req);
481 spin_unlock_irqrestore(I2O_LOCK(c->unit), flags);
483 /* Now flush the message by making it a NOP */
485 m[0]|=(I2O_CMD_UTIL_NOP)<<24;
486 i2o_post_message(c, ((unsigned long)m) - c->mem_offset);
491 if(msg->function == I2O_CMD_UTIL_EVT_REGISTER)
493 spin_lock(&i2ob_evt_lock);
494 memcpy(evt_msg, msg, (m[0]>>16)<<2);
495 spin_unlock(&i2ob_evt_lock);
503 * This is HACK, but Intel Integrated RAID allows user
504 * to delete a volume that is claimed, locked, and in use
505 * by the OS. We have to check for a reply from a
506 * non-existent device and flag it as an error or the system
509 ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
511 printk(KERN_WARNING "I2O Block: Data transfer to deleted device!\n");
512 spin_lock_irqsave(I2O_LOCK(c->unit), flags);
513 i2ob_unhook_request(ireq, c->unit);
514 i2ob_end_request(ireq->req);
515 spin_unlock_irqrestore(I2O_LOCK(c->unit), flags);
520 * Lets see what is cooking. We stuffed the
521 * request in the context.
524 ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
534 "Failure communicating to device",
536 "Device is not ready",
538 "Media is locked by another user",
540 "Failure communicating to device",
541 "Device bus failure",
542 "Device is locked by another user",
543 "Device is write protected",
545 "Volume has changed, waiting for acknowledgement"
551 * Device not ready means two things. One is that the
552 * the thing went offline (but not a removal media)
554 * The second is that you have a SuperTrak 100 and the
555 * firmware got constipated. Unlike standard i2o card
556 * setups the supertrak returns an error rather than
557 * blocking for the timeout in these cases.
559 * Don't stick a supertrak100 into cache aggressive modes
563 printk(KERN_ERR "\n/dev/%s error: %s", dev->i2odev->dev_name,
564 bsa_errors[m[4]&0XFFFF]);
566 printk(" - DDM attempted %d retries", (m[4]>>16)&0x00FF );
571 ireq->req->errors = 0;
574 * Dequeue the request. We use irqsave locks as one day we
575 * may be running polled controllers from a BH...
578 i2ob_free_sglist(dev, ireq);
579 spin_lock_irqsave(I2O_LOCK(c->unit), flags);
580 i2ob_unhook_request(ireq, c->unit);
581 i2ob_end_request(ireq->req);
582 atomic_dec(&i2ob_queues[c->unit]->queue_depth);
585 * We may be able to do more I/O
588 i2ob_request(dev->req_queue);
589 spin_unlock_irqrestore(I2O_LOCK(c->unit), flags);
593 * Event handler. Needs to be a separate thread b/c we may have
594 * to do things like scan a partition table, or query parameters
595 * which cannot be done from an interrupt or from a bottom half.
597 static int i2ob_evt(void *dummy)
603 //The only event that has data is the SCSI_SMART event.
613 daemonize("i2oblock");
614 allow_signal(SIGKILL);
620 if(down_interruptible(&i2ob_evt_sem))
623 printk("exiting...");
628 * Keep another CPU/interrupt from overwriting the
629 * message while we're reading it
631 * We stuffed the unit in the TxContext and grab the event mask
632 * None of the BSA we care about events have EventData
634 spin_lock_irqsave(&i2ob_evt_lock, flags);
635 evt_local = (struct i2o_reply *)evt_msg;
636 spin_unlock_irqrestore(&i2ob_evt_lock, flags);
638 unit = le32_to_cpu(evt_local->header[3]);
639 evt = le32_to_cpu(evt_local->evt_indicator);
644 * New volume loaded on same TID, so we just re-install.
645 * The TID/controller don't change as it is the same
646 * I2O device. It's just new media that we have to
649 case I2O_EVT_IND_BSA_VOLUME_LOAD:
651 struct gendisk *p = i2ob_disk[unit>>4];
652 i2ob_install_device(i2ob_dev[unit].i2odev->controller,
653 i2ob_dev[unit].i2odev, unit);
659 * No media, so set all parameters to 0 and set the media
660 * change flag. The I2O device is still valid, just doesn't
661 * have media, so we don't want to clear the controller or
664 case I2O_EVT_IND_BSA_VOLUME_UNLOAD:
666 struct gendisk *p = i2ob_disk[unit>>4];
668 for(i = unit; i <= unit+15; i++)
669 blk_queue_max_sectors(i2ob_dev[i].req_queue, 0);
670 i2ob_media_change_flag[unit] = 1;
674 case I2O_EVT_IND_BSA_VOLUME_UNLOAD_REQ:
675 printk(KERN_WARNING "%s: Attempt to eject locked media\n",
676 i2ob_dev[unit].i2odev->dev_name);
680 * The capacity has changed and we are going to be
681 * updating the max_sectors and other information
682 * about this disk. We try a revalidate first. If
683 * the block device is in use, we don't want to
684 * do that as there may be I/Os bound for the disk
685 * at the moment. In that case we read the size
686 * from the device and update the information ourselves
687 * and the user can later force a partition table
688 * update through an ioctl.
690 case I2O_EVT_IND_BSA_CAPACITY_CHANGE:
694 if(i2ob_query_device(&i2ob_dev[unit], 0x0004, 0, &size, 8) !=0 )
695 i2ob_query_device(&i2ob_dev[unit], 0x0000, 4, &size, 8);
697 spin_lock_irqsave(I2O_LOCK(unit), flags);
698 set_capacity(i2ob_disk[unit>>4], size>>9);
699 spin_unlock_irqrestore(I2O_LOCK(unit), flags);
704 * We got a SCSI SMART event, we just log the relevant
705 * information and let the user decide what they want
706 * to do with the information.
708 case I2O_EVT_IND_BSA_SCSI_SMART:
711 printk(KERN_INFO "I2O Block: %s received a SCSI SMART Event\n",i2ob_dev[unit].i2odev->dev_name);
712 evt_local->data[16]='\0';
713 sprintf(buf,"%s",&evt_local->data[0]);
714 printk(KERN_INFO " Disk Serial#:%s\n",buf);
715 printk(KERN_INFO " ASC 0x%02x \n",evt_local->ASC);
716 printk(KERN_INFO " ASCQ 0x%02x \n",evt_local->ASCQ);
728 * An event we didn't ask for. Call the card manufacturer
729 * and tell them to fix their firmware :)
734 * If a promise card reports 0x20 event then the brown stuff
735 * hit the fan big time. The card seems to recover but loses
736 * the pending writes. Deeply ungood except for testing fsck
738 if(i2ob_dev[unit].i2odev->controller->promise)
739 panic("I2O controller firmware failed. Reboot and force a filesystem check.\n");
741 printk(KERN_INFO "%s: Received event 0x%X we didn't register for\n"
742 KERN_INFO " Blame the I2O card manufacturer 8)\n",
743 i2ob_dev[unit].i2odev->dev_name, evt);
748 complete_and_exit(&i2ob_thread_dead,0);
753 * The I2O block driver is listed as one of those that pulls the
754 * front entry off the queue before processing it. This is important
755 * to remember here. If we drop the io lock then CURRENT will change
756 * on us. We must unlink CURRENT in this routine before we return, if
760 static void i2ob_request(request_queue_t *q)
763 struct i2ob_request *ireq;
764 struct i2ob_device *dev;
767 while ((req = elv_next_request(q)) != NULL) {
769 * On an IRQ completion if there is an inactive
770 * request on the queue head it means it isnt yet
773 if(req->rq_status == RQ_INACTIVE)
776 dev = req->rq_disk->private_data;
779 * Queue depths probably belong with some kind of
780 * generic IOP commit control. Certainly it's not right
783 if(atomic_read(&i2ob_queues[dev->unit]->queue_depth) >= dev->depth)
791 if(atomic_read(&i2ob_queues[dev->unit]->queue_depth) == 0)
792 printk(KERN_ERR "i2o_block: message queue and request queue empty!!\n");
796 * Everything ok, so pull from kernel queue onto our queue
799 blkdev_dequeue_request(req);
802 ireq = i2ob_queues[dev->unit]->i2ob_qhead;
803 i2ob_queues[dev->unit]->i2ob_qhead = ireq->next;
806 i2ob_send(m, dev, ireq, (dev->unit&0xF0));
812 * SCSI-CAM for ioctl geometry mapping
813 * Duplicated with SCSI - this should be moved into somewhere common
816 * LBA -> CHS mapping table taken from:
818 * "Incorporating the I2O Architecture into BIOS for Intel Architecture
821 * This is an I2O document that is only available to I2O members,
824 * From my understanding, this is how all the I2O cards do this
826 * Disk Size | Sectors | Heads | Cylinders
827 * ---------------+---------+-------+-------------------
828 * 1 < X <= 528M | 63 | 16 | X/(63 * 16 * 512)
829 * 528M < X <= 1G | 63 | 32 | X/(63 * 32 * 512)
830 * 1 < X <528M | 63 | 16 | X/(63 * 16 * 512)
831 * 1 < X <528M | 63 | 16 | X/(63 * 16 * 512)
834 #define BLOCK_SIZE_528M 1081344
835 #define BLOCK_SIZE_1G 2097152
836 #define BLOCK_SIZE_21G 4403200
837 #define BLOCK_SIZE_42G 8806400
838 #define BLOCK_SIZE_84G 17612800
840 static void i2o_block_biosparam(
841 unsigned long capacity,
842 unsigned short *cyls,
846 unsigned long heads, sectors, cylinders;
848 sectors = 63L; /* Maximize sectors per track */
849 if(capacity <= BLOCK_SIZE_528M)
851 else if(capacity <= BLOCK_SIZE_1G)
853 else if(capacity <= BLOCK_SIZE_21G)
855 else if(capacity <= BLOCK_SIZE_42G)
860 cylinders = (unsigned long)capacity / (heads * sectors);
862 *cyls = (unsigned short) cylinders; /* Stuff return values */
863 *secs = (unsigned char) sectors;
864 *hds = (unsigned char) heads;
868 * Issue device specific ioctl calls.
871 static int i2ob_ioctl(struct inode *inode, struct file *file,
872 unsigned int cmd, unsigned long arg)
874 struct gendisk *disk = inode->i_bdev->bd_disk;
875 struct i2ob_device *dev = disk->private_data;
877 /* Anyone capable of this syscall can do *real bad* things */
879 if (!capable(CAP_SYS_ADMIN))
884 struct hd_geometry g;
885 i2o_block_biosparam(get_capacity(disk),
886 &g.cylinders, &g.heads, &g.sectors);
887 g.start = get_start_sect(inode->i_bdev);
888 return copy_to_user((void *)arg,&g, sizeof(g))?-EFAULT:0;
892 return put_user(dev->rcache, (int *)arg);
894 return put_user(dev->wcache, (int *)arg);
896 if(arg<0||arg>CACHE_SMARTFETCH)
901 if(arg!=0 && (arg<CACHE_WRITETHROUGH || arg>CACHE_SMARTBACK))
910 * Close the block device down
913 static int i2ob_release(struct inode *inode, struct file *file)
915 struct gendisk *disk = inode->i_bdev->bd_disk;
916 struct i2ob_device *dev = disk->private_data;
919 * This is to deail with the case of an application
920 * opening a device and then the device dissapears while
921 * it's in use, and then the application tries to release
922 * it. ex: Unmounting a deleted RAID volume at reboot.
923 * If we send messages, it will just cause FAILs since
924 * the TID no longer exists.
929 if (dev->refcnt <= 0)
930 printk(KERN_ALERT "i2ob_release: refcount(%d) <= 0\n", dev->refcnt);
935 * Flush the onboard cache on unmount
938 int *query_done = &dev->done_flag;
939 msg[0] = (FIVE_WORD_MSG_SIZE|SGL_OFFSET_0);
940 msg[1] = I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|dev->tid;
941 msg[2] = i2ob_context|0x40000000;
942 msg[3] = (u32)query_done;
944 DEBUG("Flushing...");
945 i2o_post_wait(dev->controller, msg, 20, 60);
950 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
951 msg[1] = I2O_CMD_BLOCK_MUNLOCK<<24|HOST_TID<<12|dev->tid;
952 msg[2] = i2ob_context|0x40000000;
953 msg[3] = (u32)query_done;
955 DEBUG("Unlocking...");
956 i2o_post_wait(dev->controller, msg, 20, 2);
957 DEBUG("Unlocked.\n");
959 msg[0] = FOUR_WORD_MSG_SIZE|SGL_OFFSET_0;
960 msg[1] = I2O_CMD_BLOCK_POWER<<24 | HOST_TID << 12 | dev->tid;
961 if(dev->flags & (1<<3|1<<4)) /* Removable */
966 if(i2o_post_wait(dev->controller, msg, 20, 60)==0)
970 * Now unclaim the device.
973 if (i2o_release_device(dev->i2odev, &i2o_block_handler))
974 printk(KERN_ERR "i2ob_release: controller rejected unclaim.\n");
982 * Open the block device.
985 static int i2ob_open(struct inode *inode, struct file *file)
987 struct gendisk *disk = inode->i_bdev->bd_disk;
988 struct i2ob_device *dev = disk->private_data;
998 if(i2o_claim_device(dev->i2odev, &i2o_block_handler))
1001 printk(KERN_INFO "I2O Block: Could not open device\n");
1006 * Power up if needed
1009 if(dev->power > 0x1f)
1011 msg[0] = FOUR_WORD_MSG_SIZE|SGL_OFFSET_0;
1012 msg[1] = I2O_CMD_BLOCK_POWER<<24 | HOST_TID << 12 | dev->tid;
1013 msg[4] = 0x02 << 24;
1014 if(i2o_post_wait(dev->controller, msg, 20, 60) == 0)
1019 * Mount the media if needed. Note that we don't use
1020 * the lock bit. Since we have to issue a lock if it
1021 * refuses a mount (quite possible) then we might as
1022 * well just send two messages out.
1024 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1025 msg[1] = I2O_CMD_BLOCK_MMOUNT<<24|HOST_TID<<12|dev->tid;
1029 i2o_post_wait(dev->controller, msg, 24, 2);
1034 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1035 msg[1] = I2O_CMD_BLOCK_MLOCK<<24|HOST_TID<<12|dev->tid;
1038 i2o_post_wait(dev->controller, msg, 20, 2);
1045 * Issue a device query
1048 static int i2ob_query_device(struct i2ob_device *dev, int table,
1049 int field, void *buf, int buflen)
1051 return i2o_query_scalar(dev->controller, dev->tid,
1052 table, field, buf, buflen);
1057 * Install the I2O block device we found.
1060 static int i2ob_install_device(struct i2o_controller *c, struct i2o_device *d, int unit)
1067 struct i2ob_device *dev=&i2ob_dev[unit];
1071 * For logging purposes...
1073 printk(KERN_INFO "i2ob: Installing tid %d device at unit %d\n",
1074 d->lct_data.tid, unit);
1077 * If this is the first I2O block device found on this IOP,
1078 * we need to initialize all the queue data structures
1079 * before any I/O can be performed. If it fails, this
1080 * device is useless.
1082 if(!i2ob_queues[unit]) {
1083 if(i2ob_init_iop(unit))
1088 * This will save one level of lookup/indirection in critical
1089 * code so that we can directly get the queue ptr from the
1090 * device instead of having to go the IOP data structure.
1092 dev->req_queue = i2ob_queues[unit]->req_queue;
1094 /* initialize gendik structure */
1095 i2ob_disk[unit>>4]->private_data = dev;
1096 i2ob_disk[unit>>4]->queue = dev->req_queue;
1099 * Ask for the current media data. If that isn't supported
1100 * then we ask for the device capacity data
1102 if(i2ob_query_device(dev, 0x0004, 1, &blocksize, 4) != 0
1103 || i2ob_query_device(dev, 0x0004, 0, &size, 8) !=0 )
1105 i2ob_query_device(dev, 0x0000, 3, &blocksize, 4);
1106 i2ob_query_device(dev, 0x0000, 4, &size, 8);
1109 if(i2ob_query_device(dev, 0x0000, 2, &power, 2)!=0)
1111 i2ob_query_device(dev, 0x0000, 5, &flags, 4);
1112 i2ob_query_device(dev, 0x0000, 6, &status, 4);
1113 set_capacity(i2ob_disk[unit>>4], size>>9);
1116 * Max number of Scatter-Gather Elements
1119 i2ob_dev[unit].power = power; /* Save power state in device proper */
1120 i2ob_dev[unit].flags = flags;
1122 for(i=unit;i<=unit+15;i++)
1124 request_queue_t *q = i2ob_dev[unit].req_queue;
1125 int segments = (d->controller->status_block->inbound_frame_size - 7) / 2;
1130 i2ob_dev[i].power = power; /* Save power state */
1131 i2ob_dev[unit].flags = flags; /* Keep the type info */
1133 blk_queue_max_sectors(q, 96); /* 256 might be nicer but many controllers
1134 explode on 65536 or higher */
1135 blk_queue_max_phys_segments(q, segments);
1136 blk_queue_max_hw_segments(q, segments);
1138 i2ob_dev[i].rcache = CACHE_SMARTFETCH;
1139 i2ob_dev[i].wcache = CACHE_WRITETHROUGH;
1141 if(d->controller->battery == 0)
1142 i2ob_dev[i].wcache = CACHE_WRITETHROUGH;
1144 if(d->controller->promise)
1145 i2ob_dev[i].wcache = CACHE_WRITETHROUGH;
1147 if(d->controller->short_req)
1149 blk_queue_max_sectors(q, 8);
1150 blk_queue_max_phys_segments(q, 8);
1151 blk_queue_max_hw_segments(q, 8);
1155 strcpy(d->dev_name, i2ob_disk[unit>>4]->disk_name);
1156 strcpy(i2ob_disk[unit>>4]->devfs_name, i2ob_disk[unit>>4]->disk_name);
1158 printk(KERN_INFO "%s: Max segments %d, queue depth %d, byte limit %d.\n",
1159 d->dev_name, i2ob_dev[unit].max_segments, i2ob_dev[unit].depth, i2ob_max_sectors[unit]<<9);
1161 i2ob_query_device(dev, 0x0000, 0, &type, 1);
1163 printk(KERN_INFO "%s: ", d->dev_name);
1166 case 0: printk("Disk Storage");break;
1167 case 4: printk("WORM");break;
1168 case 5: printk("CD-ROM");break;
1169 case 7: printk("Optical device");break;
1171 printk("Type %d", type);
1176 if((flags^status)&(1<<4|1<<3)) /* Missing media or device */
1178 printk(KERN_INFO " Not loaded.\n");
1179 /* Device missing ? */
1180 if((flags^status)&(1<<4))
1185 printk(": %dMB, %d byte sectors",
1186 (int)(size>>20), blocksize);
1191 i2ob_query_device(dev, 0x0003, 0, &cachesize, 4);
1194 printk(", %dMb cache", cachesize>>10);
1196 printk(", %dKb cache", cachesize);
1199 printk(KERN_INFO "%s: Maximum sectors/read set to %d.\n",
1200 d->dev_name, i2ob_max_sectors[unit]);
1203 * Register for the events we're interested in and that the
1204 * device actually supports.
1207 i2o_event_register(c, d->lct_data.tid, i2ob_context, unit,
1208 (I2OB_EVENT_MASK & d->lct_data.event_capabilities));
1213 * Initialize IOP specific queue structures. This is called
1214 * once for each IOP that has a block device sitting behind it.
1216 static int i2ob_init_iop(unsigned int unit)
1220 i2ob_queues[unit] = (struct i2ob_iop_queue *) kmalloc(sizeof(struct i2ob_iop_queue), GFP_ATOMIC);
1221 if(!i2ob_queues[unit])
1223 printk(KERN_WARNING "Could not allocate request queue for I2O block device!\n");
1227 for(i = 0; i< MAX_I2OB_DEPTH; i++)
1229 i2ob_queues[unit]->request_queue[i].next = &i2ob_queues[unit]->request_queue[i+1];
1230 i2ob_queues[unit]->request_queue[i].num = i;
1233 /* Queue is MAX_I2OB + 1... */
1234 i2ob_queues[unit]->request_queue[i].next = NULL;
1235 i2ob_queues[unit]->i2ob_qhead = &i2ob_queues[unit]->request_queue[0];
1236 atomic_set(&i2ob_queues[unit]->queue_depth, 0);
1238 i2ob_queues[unit]->lock = SPIN_LOCK_UNLOCKED;
1239 i2ob_queues[unit]->req_queue = blk_init_queue(i2ob_request, &i2ob_queues[unit]->lock);
1240 if (!i2ob_queues[unit]->req_queue) {
1241 kfree(i2ob_queues[unit]);
1245 i2ob_queues[unit]->req_queue->queuedata = &i2ob_queues[unit];
1251 * Probe the I2O subsytem for block class devices
1253 static void i2ob_scan(int bios)
1258 struct i2o_device *d, *b=NULL;
1259 struct i2o_controller *c;
1260 struct i2ob_device *dev;
1262 for(i=0; i< MAX_I2O_CONTROLLERS; i++)
1264 c=i2o_find_controller(i);
1270 * The device list connected to the I2O Controller is doubly linked
1271 * Here we traverse the end of the list , and start claiming devices
1272 * from that end. This assures that within an I2O controller atleast
1273 * the newly created volumes get claimed after the older ones, thus
1274 * mapping to same major/minor (and hence device file name) after
1276 * The exception being:
1277 * 1. If there was a TID reuse.
1278 * 2. There was more than one I2O controller.
1283 for (d=c->devices;d!=NULL;d=d->next)
1298 if(d->lct_data.class_id!=I2O_CLASS_RANDOM_BLOCK_STORAGE)
1301 if(d->lct_data.user_tid != 0xFFF)
1306 if(d->lct_data.bios_info != 0x80)
1308 printk(KERN_INFO "Claiming as Boot device: Controller %d, TID %d\n", c->unit, d->lct_data.tid);
1312 if(d->lct_data.bios_info == 0x80)
1313 continue; /*Already claimed on pass 1 */
1316 if(i2o_claim_device(d, &i2o_block_handler))
1318 printk(KERN_WARNING "i2o_block: Controller %d, TID %d\n", c->unit,
1320 printk(KERN_WARNING "\t%sevice refused claim! Skipping installation\n", bios?"Boot d":"D");
1324 i2o_release_device(d, &i2o_block_handler);
1326 if(scan_unit<MAX_I2OB<<4)
1329 * Get the device and fill in the
1330 * Tid and controller.
1332 dev=&i2ob_dev[scan_unit];
1334 dev->controller = c;
1335 dev->unit = c->unit;
1336 dev->tid = d->lct_data.tid;
1338 if(i2ob_install_device(c,d,scan_unit))
1339 printk(KERN_WARNING "Could not install I2O block device\n");
1342 add_disk(i2ob_disk[scan_unit>>4]);
1346 /* We want to know when device goes away */
1347 i2o_device_notify_on(d, &i2o_block_handler);
1353 printk(KERN_WARNING "i2o_block: too many device, registering only %d.\n", scan_unit>>4);
1356 i2o_unlock_controller(c);
1360 static void i2ob_probe(void)
1363 * Some overhead/redundancy involved here, while trying to
1364 * claim the first boot volume encountered as /dev/i2o/hda
1365 * everytime. All the i2o_controllers are searched and the
1366 * first i2o block device marked as bootable is claimed
1367 * If an I2O block device was booted off , the bios sets
1368 * its bios_info field to 0x80, this what we search for.
1369 * Assuming that the bootable volume is /dev/i2o/hda
1370 * everytime will prevent any kernel panic while mounting
1374 printk(KERN_INFO "i2o_block: Checking for Boot device...\n");
1378 * Now the remainder.
1380 printk(KERN_INFO "i2o_block: Checking for I2O Block devices...\n");
1386 * New device notification handler. Called whenever a new
1387 * I2O block storage device is added to the system.
1389 * Should we spin lock around this to keep multiple devs from
1390 * getting updated at the same time?
1393 void i2ob_new_device(struct i2o_controller *c, struct i2o_device *d)
1395 struct i2ob_device *dev;
1398 printk(KERN_INFO "i2o_block: New device detected\n");
1399 printk(KERN_INFO " Controller %d Tid %d\n",c->unit, d->lct_data.tid);
1401 /* Check for available space */
1402 if(i2ob_dev_count>=MAX_I2OB<<4)
1404 printk(KERN_ERR "i2o_block: No more devices allowed!\n");
1407 for(unit = 0; unit < (MAX_I2OB<<4); unit += 16)
1409 if(!i2ob_dev[unit].i2odev)
1413 if(i2o_claim_device(d, &i2o_block_handler))
1415 printk(KERN_INFO "i2o_block: Unable to claim device. Installation aborted\n");
1419 dev = &i2ob_dev[unit];
1421 dev->controller = c;
1422 dev->tid = d->lct_data.tid;
1424 if(i2ob_install_device(c,d,unit))
1425 printk(KERN_ERR "i2o_block: Could not install new device\n");
1428 add_disk(i2ob_disk[unit>>4]);
1430 i2o_device_notify_on(d, &i2o_block_handler);
1433 i2o_release_device(d, &i2o_block_handler);
1439 * Deleted device notification handler. Called when a device we
1440 * are talking to has been deleted by the user or some other
1441 * mysterious fource outside the kernel.
1443 void i2ob_del_device(struct i2o_controller *c, struct i2o_device *d)
1447 unsigned long flags;
1449 spin_lock_irqsave(I2O_LOCK(c->unit), flags);
1452 * Need to do this...we somtimes get two events from the IRTOS
1453 * in a row and that causes lots of problems.
1455 i2o_device_notify_off(d, &i2o_block_handler);
1457 printk(KERN_INFO "I2O Block Device Deleted\n");
1459 for(unit = 0; unit < MAX_I2OB<<4; unit += 16)
1461 if(i2ob_dev[unit].i2odev == d)
1463 printk(KERN_INFO " /dev/%s: Controller %d Tid %d\n",
1464 d->dev_name, c->unit, d->lct_data.tid);
1468 if(unit >= MAX_I2OB<<4)
1470 printk(KERN_ERR "i2ob_del_device called, but not in dev table!\n");
1471 spin_unlock_irqrestore(I2O_LOCK(c->unit), flags);
1476 * This will force errors when i2ob_get_queue() is called
1479 del_gendisk(i2ob_disk[unit>>4]);
1480 i2ob_dev[unit].req_queue = NULL;
1481 for(i = unit; i <= unit+15; i++)
1483 i2ob_dev[i].i2odev = NULL;
1484 blk_queue_max_sectors(i2ob_dev[i].req_queue, 0);
1486 spin_unlock_irqrestore(I2O_LOCK(c->unit), flags);
1489 * Decrease usage count for module
1492 while(i2ob_dev[unit].refcnt--)
1495 i2ob_dev[unit].refcnt = 0;
1497 i2ob_dev[i].tid = 0;
1501 * The media didn't really change...the device is just gone
1503 i2ob_media_change_flag[unit] = 1;
1509 * Have we seen a media change ?
1511 static int i2ob_media_change(struct gendisk *disk)
1513 struct i2ob_device *p = disk->private_data;
1515 if(i2ob_media_change_flag[i])
1517 i2ob_media_change_flag[i]=0;
1523 static int i2ob_revalidate(struct gendisk *disk)
1525 struct i2ob_device *p = disk->private_data;
1526 return i2ob_install_device(p->controller, p->i2odev, p->index<<4);
1530 * Reboot notifier. This is called by i2o_core when the system
1533 static void i2ob_reboot_event(void)
1537 for(i=0;i<MAX_I2OB;i++)
1539 struct i2ob_device *dev=&i2ob_dev[(i<<4)];
1544 * Flush the onboard cache
1547 int *query_done = &dev->done_flag;
1548 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1549 msg[1] = I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|dev->tid;
1550 msg[2] = i2ob_context|0x40000000;
1551 msg[3] = (u32)query_done;
1554 DEBUG("Flushing...");
1555 i2o_post_wait(dev->controller, msg, 20, 60);
1557 DEBUG("Unlocking...");
1561 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1562 msg[1] = I2O_CMD_BLOCK_MUNLOCK<<24|HOST_TID<<12|dev->tid;
1563 msg[2] = i2ob_context|0x40000000;
1564 msg[3] = (u32)query_done;
1566 i2o_post_wait(dev->controller, msg, 20, 2);
1568 DEBUG("Unlocked.\n");
1573 static struct block_device_operations i2ob_fops =
1575 .owner = THIS_MODULE,
1577 .release = i2ob_release,
1578 .ioctl = i2ob_ioctl,
1579 .media_changed = i2ob_media_change,
1580 .revalidate_disk= i2ob_revalidate,
1584 * And here should be modules and kernel interface
1585 * (Just smiley confuses emacs :-)
1588 static int i2o_block_init(void)
1592 printk(KERN_INFO "I2O Block Storage OSM v0.9\n");
1593 printk(KERN_INFO " (c) Copyright 1999-2001 Red Hat Software.\n");
1596 * Register the block device interfaces
1598 if (register_blkdev(MAJOR_NR, "i2o_block"))
1601 for (i = 0; i < MAX_I2OB; i++) {
1602 struct gendisk *disk = alloc_disk(16);
1605 i2ob_dev[i<<4].index = i;
1606 disk->queue = i2ob_dev[i<<4].req_queue;
1607 i2ob_disk[i] = disk;
1610 printk(KERN_INFO "i2o_block: registered device at major %d\n", MAJOR_NR);
1614 * Now fill in the boiler plate
1617 for (i = 0; i < MAX_I2OB << 4; i++) {
1618 i2ob_dev[i].refcnt = 0;
1619 i2ob_dev[i].flags = 0;
1620 i2ob_dev[i].controller = NULL;
1621 i2ob_dev[i].i2odev = NULL;
1622 i2ob_dev[i].tid = 0;
1623 i2ob_dev[i].head = NULL;
1624 i2ob_dev[i].tail = NULL;
1625 i2ob_dev[i].depth = MAX_I2OB_DEPTH;
1626 i2ob_max_sectors[i] = 2;
1629 for (i = 0; i < MAX_I2OB; i++) {
1630 struct gendisk *disk = i2ob_disk[i];
1631 disk->major = MAJOR_NR;
1632 disk->first_minor = i<<4;
1633 disk->fops = &i2ob_fops;
1634 sprintf(disk->disk_name, "i2o/hd%c", 'a' + i);
1640 for(i = 0; i < MAX_I2O_CONTROLLERS; i++)
1642 i2ob_queues[i] = NULL;
1646 * Register the OSM handler as we will need this to probe for
1647 * drives, geometry and other goodies.
1650 if(i2o_install_handler(&i2o_block_handler)<0)
1652 unregister_blkdev(MAJOR_NR, "i2o_block");
1653 printk(KERN_ERR "i2o_block: unable to register OSM.\n");
1656 i2ob_context = i2o_block_handler.context;
1659 * Initialize event handling thread
1661 init_MUTEX_LOCKED(&i2ob_evt_sem);
1662 evt_pid = kernel_thread(i2ob_evt, NULL, CLONE_SIGHAND);
1665 printk(KERN_ERR "i2o_block: Could not initialize event thread. Aborting\n");
1666 i2o_remove_handler(&i2o_block_handler);
1676 put_disk(i2ob_disk[i]);
1677 unregister_blkdev(MAJOR_NR, "i2o_block");
1682 static void i2o_block_exit(void)
1687 printk(KERN_INFO "Killing I2O block threads...");
1688 i = kill_proc(evt_pid, SIGKILL, 1);
1690 printk("waiting...\n");
1692 /* Be sure it died */
1693 wait_for_completion(&i2ob_thread_dead);
1698 * Unregister for updates from any devices..otherwise we still
1699 * get them and the core jumps to random memory :O
1701 if(i2ob_dev_count) {
1702 struct i2o_device *d;
1703 for(i = 0; i < MAX_I2OB; i++)
1704 if((d=i2ob_dev[i<<4].i2odev)) {
1705 i2o_device_notify_off(d, &i2o_block_handler);
1706 i2o_event_register(d->controller, d->lct_data.tid,
1707 i2ob_context, i<<4, 0);
1712 * We may get further callbacks for ourself. The i2o_core
1713 * code handles this case reasonably sanely. The problem here
1714 * is we shouldn't get them .. but a couple of cards feel
1715 * obliged to tell us stuff we don't care about.
1717 * This isnt ideal at all but will do for now.
1720 set_current_state(TASK_UNINTERRUPTIBLE);
1721 schedule_timeout(HZ);
1727 i2o_remove_handler(&i2o_block_handler);
1729 for (i = 0; i < MAX_I2OB; i++)
1730 put_disk(i2ob_disk[i]);
1733 * Return the block device
1735 if (unregister_blkdev(MAJOR_NR, "i2o_block") != 0)
1736 printk("i2o_block: cleanup_module failed\n");
1739 MODULE_AUTHOR("Red Hat");
1740 MODULE_DESCRIPTION("I2O Block Device OSM");
1741 MODULE_LICENSE("GPL");
1743 module_init(i2o_block_init);
1744 module_exit(i2o_block_exit);