ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / drivers / message / i2o / i2o_block.c
1 /*
2  * I2O Random Block Storage Class OSM
3  *
4  * (C) Copyright 1999-2002 Red Hat
5  *      
6  * Written by Alan Cox, Building Number Three Ltd
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License
10  * as published by the Free Software Foundation; either version
11  * 2 of the License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * For the purpose of avoiding doubt the preferred form of the work
19  * for making modifications shall be a standards compliant form such
20  * gzipped tar and not one requiring a proprietary or patent encumbered
21  * tool to unpack.
22  *
23  * This is a beta test release. Most of the good code was taken
24  * from the nbd driver by Pavel Machek, who in turn took some of it
25  * from loop.c. Isn't free software great for reusability 8)
26  *
27  * Fixes/additions:
28  *      Steve Ralston:  
29  *              Multiple device handling error fixes,
30  *              Added a queue depth.
31  *      Alan Cox:       
32  *              FC920 has an rmw bug. Dont or in the end marker.
33  *              Removed queue walk, fixed for 64bitness.
34  *              Rewrote much of the code over time
35  *              Added indirect block lists
36  *              Handle 64K limits on many controllers
37  *              Don't use indirects on the Promise (breaks)
38  *              Heavily chop down the queue depths
39  *      Deepak Saxena:
40  *              Independent queues per IOP
41  *              Support for dynamic device creation/deletion
42  *              Code cleanup    
43  *              Support for larger I/Os through merge* functions 
44  *              (taken from DAC960 driver)
45  *      Boji T Kannanthanam:
46  *              Set the I2O Block devices to be detected in increasing 
47  *              order of TIDs during boot.
48  *              Search and set the I2O block device that we boot off from  as
49  *              the first device to be claimed (as /dev/i2o/hda)
50  *              Properly attach/detach I2O gendisk structure from the system
51  *              gendisk list. The I2O block devices now appear in 
52  *              /proc/partitions.
53  *      Markus Lidel <Markus.Lidel@shadowconnect.com>:
54  *              Minor bugfixes for 2.6.
55  *
56  * To do:
57  *      Serial number scanning to find duplicates for FC multipathing
58  */
59
60 #include <linux/major.h>
61
62 #include <linux/module.h>
63 #include <linux/init.h>
64 #include <linux/sched.h>
65 #include <linux/fs.h>
66 #include <linux/stat.h>
67 #include <linux/pci.h>
68 #include <linux/errno.h>
69 #include <linux/file.h>
70 #include <linux/ioctl.h>
71 #include <linux/i2o.h>
72 #include <linux/blkdev.h>
73 #include <linux/blkpg.h>
74 #include <linux/slab.h>
75 #include <linux/hdreg.h>
76 #include <linux/spinlock.h>
77 #include <linux/bio.h>
78
79 #include <linux/notifier.h>
80 #include <linux/reboot.h>
81
82 #include <asm/uaccess.h>
83 #include <asm/semaphore.h>
84 #include <linux/completion.h>
85 #include <asm/io.h>
86 #include <asm/atomic.h>
87 #include <linux/smp_lock.h>
88 #include <linux/wait.h>
89
90 #define MAJOR_NR I2O_MAJOR
91
92 #define MAX_I2OB        16
93
94 #define MAX_I2OB_DEPTH  8
95 #define MAX_I2OB_RETRIES 4
96
97 //#define DRIVERDEBUG
98 #ifdef DRIVERDEBUG
99 #define DEBUG( s ) printk( s )
100 #else
101 #define DEBUG( s )
102 #endif
103
104 /*
105  * Events that this OSM is interested in
106  */
107 #define I2OB_EVENT_MASK         (I2O_EVT_IND_BSA_VOLUME_LOAD |  \
108                                  I2O_EVT_IND_BSA_VOLUME_UNLOAD | \
109                                  I2O_EVT_IND_BSA_VOLUME_UNLOAD_REQ | \
110                                  I2O_EVT_IND_BSA_CAPACITY_CHANGE | \
111                                  I2O_EVT_IND_BSA_SCSI_SMART )
112
113
114 #define I2O_LOCK(unit)  (i2ob_dev[(unit)].req_queue->queue_lock)
115
116 /*
117  *      Some of these can be made smaller later
118  */
119
120 static int i2ob_media_change_flag[MAX_I2OB];
121 static u32 i2ob_max_sectors[MAX_I2OB<<4];
122
123 static int i2ob_context;
124
125 /*
126  * I2O Block device descriptor 
127  */
128 struct i2ob_device
129 {
130         struct i2o_controller *controller;
131         struct i2o_device *i2odev;
132         int unit;
133         int tid;
134         int flags;
135         int refcnt;
136         struct request *head, *tail;
137         request_queue_t *req_queue;
138         int max_segments;
139         int max_direct;         /* Not yet used properly */
140         int done_flag;
141         int depth;
142         int rcache;
143         int wcache;
144         int power;
145         int index;
146 };
147
148 /*
149  *      FIXME:
150  *      We should cache align these to avoid ping-ponging lines on SMP
151  *      boxes under heavy I/O load...
152  */
153
154 struct i2ob_request
155 {
156         struct i2ob_request *next;
157         struct request *req;
158         int num;
159         int sg_dma_direction;
160         int sg_nents;
161         struct scatterlist sg_table[16];
162 };
163
164 /*
165  * Per IOP requst queue information
166  *
167  * We have a separate requeust_queue_t per IOP so that a heavilly
168  * loaded I2O block device on an IOP does not starve block devices
169  * across all I2O controllers.
170  * 
171  */
172 struct i2ob_iop_queue
173 {
174         atomic_t queue_depth;
175         struct i2ob_request request_queue[MAX_I2OB_DEPTH];
176         struct i2ob_request *i2ob_qhead;
177         request_queue_t *req_queue;
178         spinlock_t lock;
179 };
180 static struct i2ob_iop_queue *i2ob_queues[MAX_I2O_CONTROLLERS];
181
182 /*
183  *      Each I2O disk is one of these.
184  */
185
186 static struct i2ob_device i2ob_dev[MAX_I2OB<<4];
187 static int i2ob_dev_count = 0;
188 static struct gendisk *i2ob_disk[MAX_I2OB];
189
190 /*
191  * Mutex and spin lock for event handling synchronization
192  * evt_msg contains the last event.
193  */
194 static DECLARE_MUTEX_LOCKED(i2ob_evt_sem);
195 static DECLARE_COMPLETION(i2ob_thread_dead);
196 static spinlock_t i2ob_evt_lock = SPIN_LOCK_UNLOCKED;
197 static u32 evt_msg[MSG_FRAME_SIZE];
198
199 static void i2o_block_reply(struct i2o_handler *, struct i2o_controller *,
200          struct i2o_message *);
201 static void i2ob_new_device(struct i2o_controller *, struct i2o_device *);
202 static void i2ob_del_device(struct i2o_controller *, struct i2o_device *);
203 static void i2ob_reboot_event(void);
204 static int i2ob_install_device(struct i2o_controller *, struct i2o_device *, int);
205 static void i2ob_end_request(struct request *);
206 static void i2ob_request(request_queue_t *);
207 static int i2ob_init_iop(unsigned int);
208 static int i2ob_query_device(struct i2ob_device *, int, int, void*, int);
209 static int i2ob_evt(void *);
210
211 static int evt_pid = 0;
212 static int evt_running = 0;
213 static int scan_unit = 0;
214
215 /*
216  * I2O OSM registration structure...keeps getting bigger and bigger :)
217  */
218 static struct i2o_handler i2o_block_handler =
219 {
220         i2o_block_reply,
221         i2ob_new_device,
222         i2ob_del_device,
223         i2ob_reboot_event,
224         "I2O Block OSM",
225         0,
226         I2O_CLASS_RANDOM_BLOCK_STORAGE
227 };
228
229 /**
230  *      i2ob_get        -       Get an I2O message
231  *      @dev:  I2O block device
232  *
233  *      Get a message from the FIFO used for this block device. The message is returned
234  *      or the I2O 'no message' value of 0xFFFFFFFF if nothing is available.
235  */
236
237 static u32 i2ob_get(struct i2ob_device *dev)
238 {
239         struct i2o_controller *c=dev->controller;
240         return I2O_POST_READ32(c);
241 }
242
243 static int i2ob_build_sglist(struct i2ob_device *dev,  struct i2ob_request *ireq)
244 {
245         struct scatterlist *sg = ireq->sg_table;
246         int nents;
247
248         nents = blk_rq_map_sg(dev->req_queue, ireq->req, ireq->sg_table);
249                 
250         if (rq_data_dir(ireq->req) == READ)
251                 ireq->sg_dma_direction = PCI_DMA_FROMDEVICE;
252         else
253                 ireq->sg_dma_direction = PCI_DMA_TODEVICE;
254
255         ireq->sg_nents = pci_map_sg(dev->controller->pdev, sg, nents, ireq->sg_dma_direction);
256         return ireq->sg_nents;
257 }
258
259 void i2ob_free_sglist(struct i2ob_device *dev, struct i2ob_request *ireq)
260 {
261         struct pci_dev *pdev = dev->controller->pdev;
262         struct scatterlist *sg = ireq->sg_table;
263         int nents = ireq->sg_nents;
264         pci_unmap_sg(pdev, sg, nents, ireq->sg_dma_direction);
265 }
266  
267 /**
268  *      i2ob_send               -       Turn a request into a message and send it
269  *      @m: Message offset
270  *      @dev: I2O device
271  *      @ireq: Request structure
272  *      @unit: Device identity
273  *
274  *      Generate an I2O BSAREAD request. This interface function is called for devices that
275  *      appear to explode when they are fed indirect chain pointers (notably right now this
276  *      appears to afflict Promise hardwre, so be careful what you feed the hardware
277  *
278  *      No cleanup is done by this interface. It is done on the interrupt side when the
279  *      reply arrives
280  */
281  
282 static int i2ob_send(u32 m, struct i2ob_device *dev, struct i2ob_request *ireq, int unit)
283 {
284         struct i2o_controller *c = dev->controller;
285         int tid = dev->tid;
286         unsigned long msg;
287         unsigned long mptr;
288         u64 offset;
289         struct request *req = ireq->req;
290         int count = req->nr_sectors<<9;
291         struct scatterlist *sg;
292         int sgnum;
293         int i;
294
295         // printk(KERN_INFO "i2ob_send called\n");
296         /* Map the message to a virtual address */
297         msg = c->mem_offset + m;
298         
299         sgnum = i2ob_build_sglist(dev, ireq);
300         
301         /* FIXME: if we have no resources how should we get out of this */
302         if(sgnum == 0)
303                 BUG();
304         
305         /*
306          * Build the message based on the request.
307          */
308         i2o_raw_writel(i2ob_context|(unit<<8), msg+8);
309         i2o_raw_writel(ireq->num, msg+12);
310         i2o_raw_writel(req->nr_sectors << 9, msg+20);
311
312         /* 
313          * Mask out partitions from now on
314          */
315         unit &= 0xF0;
316                 
317         /* This can be optimised later - just want to be sure its right for
318            starters */
319         offset = ((u64)req->sector) << 9;
320         i2o_raw_writel( offset & 0xFFFFFFFF, msg+24);
321         i2o_raw_writel(offset>>32, msg+28);
322         mptr=msg+32;
323         
324         sg = ireq->sg_table;
325         if(rq_data_dir(req) == READ)
326         {
327                 DEBUG("READ\n");
328                 i2o_raw_writel(I2O_CMD_BLOCK_READ<<24|HOST_TID<<12|tid, msg+4);
329                 for(i = sgnum; i > 0; i--)
330                 {
331                         if(i != 1)
332                                 i2o_raw_writel(0x10000000|sg_dma_len(sg), mptr);
333                         else
334                                 i2o_raw_writel(0xD0000000|sg_dma_len(sg), mptr);
335                         i2o_raw_writel(sg_dma_address(sg), mptr+4);
336                         mptr += 8;      
337                         count -= sg_dma_len(sg);
338                         sg++;
339                 }
340                 switch(dev->rcache)
341                 {
342                         case CACHE_NULL:
343                                 i2o_raw_writel(0, msg+16);break;
344                         case CACHE_PREFETCH:
345                                 i2o_raw_writel(0x201F0008, msg+16);break;
346                         case CACHE_SMARTFETCH:
347                                 if(req->nr_sectors > 16)
348                                         i2o_raw_writel(0x201F0008, msg+16);
349                                 else
350                                         i2o_raw_writel(0x001F0000, msg+16);
351                                 break;
352                 }                               
353                                 
354 //              printk("Reading %d entries %d bytes.\n",
355 //                      mptr-msg-8, req->nr_sectors<<9);
356         }
357         else if(rq_data_dir(req) == WRITE)
358         {
359                 DEBUG("WRITE\n");
360                 i2o_raw_writel(I2O_CMD_BLOCK_WRITE<<24|HOST_TID<<12|tid, msg+4);
361                 for(i = sgnum; i > 0; i--)
362                 {
363                         if(i != 1)
364                                 i2o_raw_writel(0x14000000|sg_dma_len(sg), mptr);
365                         else
366                                 i2o_raw_writel(0xD4000000|sg_dma_len(sg), mptr);
367                         i2o_raw_writel(sg_dma_address(sg), mptr+4);
368                         mptr += 8;      
369                         count -= sg_dma_len(sg);
370                         sg++;
371                 }
372
373                 switch(dev->wcache)
374                 {
375                         case CACHE_NULL:
376                                 i2o_raw_writel(0, msg+16);break;
377                         case CACHE_WRITETHROUGH:
378                                 i2o_raw_writel(0x001F0008, msg+16);break;
379                         case CACHE_WRITEBACK:
380                                 i2o_raw_writel(0x001F0010, msg+16);break;
381                         case CACHE_SMARTBACK:
382                                 if(req->nr_sectors > 16)
383                                         i2o_raw_writel(0x001F0004, msg+16);
384                                 else
385                                         i2o_raw_writel(0x001F0010, msg+16);
386                                 break;
387                         case CACHE_SMARTTHROUGH:
388                                 if(req->nr_sectors > 16)
389                                         i2o_raw_writel(0x001F0004, msg+16);
390                                 else
391                                         i2o_raw_writel(0x001F0010, msg+16);
392                 }
393                                 
394 //              printk("Writing %d entries %d bytes.\n",
395 //                      mptr-msg-8, req->nr_sectors<<9);
396         }
397         i2o_raw_writel(I2O_MESSAGE_SIZE(mptr-msg)>>2 | SGL_OFFSET_8, msg);
398         
399         if(count != 0)
400         {
401                 printk(KERN_ERR "Request count botched by %d.\n", count);
402         }
403
404         i2o_post_message(c,m);
405         atomic_inc(&i2ob_queues[c->unit]->queue_depth);
406
407         return 0;
408 }
409
410 /*
411  *      Remove a request from the _locked_ request list. We update both the
412  *      list chain and if this is the last item the tail pointer. Caller
413  *      must hold the lock.
414  */
415  
416 static inline void i2ob_unhook_request(struct i2ob_request *ireq, 
417         unsigned int iop)
418 {
419         ireq->next = i2ob_queues[iop]->i2ob_qhead;
420         i2ob_queues[iop]->i2ob_qhead = ireq;
421 }
422
423 /*
424  *      Request completion handler
425  */
426  
427 static inline void i2ob_end_request(struct request *req)
428 {
429         /* FIXME  - pci unmap the request */
430
431         /*
432          * Loop until all of the buffers that are linked
433          * to this request have been marked updated and
434          * unlocked.
435          */
436
437         while (end_that_request_first( req, !req->errors, req->hard_cur_sectors ));
438
439         /*
440          * It is now ok to complete the request.
441          */
442         end_that_request_last( req );
443         DEBUG("IO COMPLETED\n");
444 }
445
446 /*
447  *      OSM reply handler. This gets all the message replies
448  */
449
450 static void i2o_block_reply(struct i2o_handler *h, struct i2o_controller *c, struct i2o_message *msg)
451 {
452         unsigned long flags;
453         struct i2ob_request *ireq = NULL;
454         u8 st;
455         u32 *m = (u32 *)msg;
456         u8 unit = (m[2]>>8)&0xF0;       /* low 4 bits are partition */
457         struct i2ob_device *dev = &i2ob_dev[(unit&0xF0)];
458
459         /*
460          * FAILed message
461          */
462         if(m[0] & (1<<13))
463         {
464                 DEBUG("FAIL");
465                 /*
466                  * FAILed message from controller
467                  * We increment the error count and abort it
468                  *
469                  * In theory this will never happen.  The I2O block class
470                  * specification states that block devices never return
471                  * FAILs but instead use the REQ status field...but
472                  * better be on the safe side since no one really follows
473                  * the spec to the book :)
474                  */
475                 ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
476                 ireq->req->errors++;
477
478                 spin_lock_irqsave(I2O_LOCK(c->unit), flags);
479                 i2ob_unhook_request(ireq, c->unit);
480                 i2ob_end_request(ireq->req);
481                 spin_unlock_irqrestore(I2O_LOCK(c->unit), flags);
482         
483                 /* Now flush the message by making it a NOP */
484                 m[0]&=0x00FFFFFF;
485                 m[0]|=(I2O_CMD_UTIL_NOP)<<24;
486                 i2o_post_message(c, ((unsigned long)m) - c->mem_offset);
487
488                 return;
489         }
490
491         if(msg->function == I2O_CMD_UTIL_EVT_REGISTER)
492         {
493                 spin_lock(&i2ob_evt_lock);
494                 memcpy(evt_msg, msg, (m[0]>>16)<<2);
495                 spin_unlock(&i2ob_evt_lock);
496                 up(&i2ob_evt_sem);
497                 return;
498         }
499
500         if(!dev->i2odev)
501         {
502                 /*
503                  * This is HACK, but Intel Integrated RAID allows user
504                  * to delete a volume that is claimed, locked, and in use 
505                  * by the OS. We have to check for a reply from a
506                  * non-existent device and flag it as an error or the system 
507                  * goes kaput...
508                  */
509                 ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
510                 ireq->req->errors++;
511                 printk(KERN_WARNING "I2O Block: Data transfer to deleted device!\n");
512                 spin_lock_irqsave(I2O_LOCK(c->unit), flags);
513                 i2ob_unhook_request(ireq, c->unit);
514                 i2ob_end_request(ireq->req);
515                 spin_unlock_irqrestore(I2O_LOCK(c->unit), flags);
516                 return;
517         }       
518
519         /*
520          *      Lets see what is cooking. We stuffed the
521          *      request in the context.
522          */
523                  
524         ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
525         st=m[4]>>24;
526
527         if(st!=0)
528         {
529                 int err;
530                 char *bsa_errors[] = 
531                 { 
532                         "Success", 
533                         "Media Error", 
534                         "Failure communicating to device",
535                         "Device Failure",
536                         "Device is not ready",
537                         "Media not present",
538                         "Media is locked by another user",
539                         "Media has failed",
540                         "Failure communicating to device",
541                         "Device bus failure",
542                         "Device is locked by another user",
543                         "Device is write protected",
544                         "Device has reset",
545                         "Volume has changed, waiting for acknowledgement"
546                 };
547                                 
548                 err = m[4]&0xFFFF;
549                 
550                 /*
551                  *      Device not ready means two things. One is that the
552                  *      the thing went offline (but not a removal media)
553                  *
554                  *      The second is that you have a SuperTrak 100 and the
555                  *      firmware got constipated. Unlike standard i2o card
556                  *      setups the supertrak returns an error rather than
557                  *      blocking for the timeout in these cases. 
558                  *
559                  *      Don't stick a supertrak100 into cache aggressive modes
560                  */
561                  
562                 
563                 printk(KERN_ERR "\n/dev/%s error: %s", dev->i2odev->dev_name, 
564                         bsa_errors[m[4]&0XFFFF]);
565                 if(m[4]&0x00FF0000)
566                         printk(" - DDM attempted %d retries", (m[4]>>16)&0x00FF );
567                 printk(".\n");
568                 ireq->req->errors++;    
569         }
570         else
571                 ireq->req->errors = 0;
572
573         /*
574          *      Dequeue the request. We use irqsave locks as one day we
575          *      may be running polled controllers from a BH...
576          */
577         
578         i2ob_free_sglist(dev, ireq);
579         spin_lock_irqsave(I2O_LOCK(c->unit), flags);
580         i2ob_unhook_request(ireq, c->unit);
581         i2ob_end_request(ireq->req);
582         atomic_dec(&i2ob_queues[c->unit]->queue_depth);
583         
584         /*
585          *      We may be able to do more I/O
586          */
587          
588         i2ob_request(dev->req_queue);
589         spin_unlock_irqrestore(I2O_LOCK(c->unit), flags);
590 }
591
592 /* 
593  * Event handler.  Needs to be a separate thread b/c we may have
594  * to do things like scan a partition table, or query parameters
595  * which cannot be done from an interrupt or from a bottom half.
596  */
597 static int i2ob_evt(void *dummy)
598 {
599         unsigned int evt;
600         unsigned long flags;
601         int unit;
602         int i;
603         //The only event that has data is the SCSI_SMART event.
604         struct i2o_reply {
605                 u32 header[4];
606                 u32 evt_indicator;
607                 u8 ASC;
608                 u8 ASCQ;
609                 u16 pad;
610                 u8 data[16];
611                 } *evt_local;
612
613         daemonize("i2oblock");
614         allow_signal(SIGKILL);
615
616         evt_running = 1;
617
618         while(1)
619         {
620                 if(down_interruptible(&i2ob_evt_sem))
621                 {
622                         evt_running = 0;
623                         printk("exiting...");
624                         break;
625                 }
626
627                 /*
628                  * Keep another CPU/interrupt from overwriting the 
629                  * message while we're reading it
630                  *
631                  * We stuffed the unit in the TxContext and grab the event mask
632                  * None of the BSA we care about events have EventData
633                  */
634                 spin_lock_irqsave(&i2ob_evt_lock, flags);
635                 evt_local = (struct i2o_reply *)evt_msg;
636                 spin_unlock_irqrestore(&i2ob_evt_lock, flags);
637
638                 unit = le32_to_cpu(evt_local->header[3]);
639                 evt = le32_to_cpu(evt_local->evt_indicator);
640
641                 switch(evt)
642                 {
643                         /*
644                          * New volume loaded on same TID, so we just re-install.
645                          * The TID/controller don't change as it is the same
646                          * I2O device.  It's just new media that we have to
647                          * rescan.
648                          */
649                         case I2O_EVT_IND_BSA_VOLUME_LOAD:
650                         {
651                                 struct gendisk *p = i2ob_disk[unit>>4];
652                                 i2ob_install_device(i2ob_dev[unit].i2odev->controller, 
653                                         i2ob_dev[unit].i2odev, unit);
654                                 add_disk(p);
655                                 break;
656                         }
657
658                         /*
659                          * No media, so set all parameters to 0 and set the media
660                          * change flag. The I2O device is still valid, just doesn't
661                          * have media, so we don't want to clear the controller or
662                          * device pointer.
663                          */
664                         case I2O_EVT_IND_BSA_VOLUME_UNLOAD:
665                         {
666                                 struct gendisk *p = i2ob_disk[unit>>4];
667                                 del_gendisk(p);
668                                 for(i = unit; i <= unit+15; i++)
669                                         blk_queue_max_sectors(i2ob_dev[i].req_queue, 0);
670                                 i2ob_media_change_flag[unit] = 1;
671                                 break;
672                         }
673
674                         case I2O_EVT_IND_BSA_VOLUME_UNLOAD_REQ:
675                                 printk(KERN_WARNING "%s: Attempt to eject locked media\n", 
676                                         i2ob_dev[unit].i2odev->dev_name);
677                                 break;
678
679                         /*
680                          * The capacity has changed and we are going to be
681                          * updating the max_sectors and other information 
682                          * about this disk.  We try a revalidate first. If
683                          * the block device is in use, we don't want to
684                          * do that as there may be I/Os bound for the disk
685                          * at the moment.  In that case we read the size 
686                          * from the device and update the information ourselves
687                          * and the user can later force a partition table
688                          * update through an ioctl.
689                          */
690                         case I2O_EVT_IND_BSA_CAPACITY_CHANGE:
691                         {
692                                 u64 size;
693
694                                 if(i2ob_query_device(&i2ob_dev[unit], 0x0004, 0, &size, 8) !=0 )
695                                         i2ob_query_device(&i2ob_dev[unit], 0x0000, 4, &size, 8);
696
697                                 spin_lock_irqsave(I2O_LOCK(unit), flags);       
698                                 set_capacity(i2ob_disk[unit>>4], size>>9);
699                                 spin_unlock_irqrestore(I2O_LOCK(unit), flags);  
700                                 break;
701                         }
702
703                         /* 
704                          * We got a SCSI SMART event, we just log the relevant
705                          * information and let the user decide what they want
706                          * to do with the information.
707                          */
708                         case I2O_EVT_IND_BSA_SCSI_SMART:
709                         {
710                                 char buf[16];
711                                 printk(KERN_INFO "I2O Block: %s received a SCSI SMART Event\n",i2ob_dev[unit].i2odev->dev_name);
712                                 evt_local->data[16]='\0';
713                                 sprintf(buf,"%s",&evt_local->data[0]);
714                                 printk(KERN_INFO "      Disk Serial#:%s\n",buf);
715                                 printk(KERN_INFO "      ASC 0x%02x \n",evt_local->ASC);
716                                 printk(KERN_INFO "      ASCQ 0x%02x \n",evt_local->ASCQ);
717                                 break;
718                         }
719                 
720                         /*
721                          *      Non event
722                          */
723                          
724                         case 0:
725                                 break;
726                                 
727                         /*
728                          * An event we didn't ask for.  Call the card manufacturer
729                          * and tell them to fix their firmware :)
730                          */
731                          
732                         case 0x20:
733                                 /*
734                                  * If a promise card reports 0x20 event then the brown stuff
735                                  * hit the fan big time. The card seems to recover but loses
736                                  * the pending writes. Deeply ungood except for testing fsck
737                                  */
738                                 if(i2ob_dev[unit].i2odev->controller->promise)
739                                         panic("I2O controller firmware failed. Reboot and force a filesystem check.\n");
740                         default:
741                                 printk(KERN_INFO "%s: Received event 0x%X we didn't register for\n"
742                                         KERN_INFO "   Blame the I2O card manufacturer 8)\n", 
743                                         i2ob_dev[unit].i2odev->dev_name, evt);
744                                 break;
745                 }
746         };
747
748         complete_and_exit(&i2ob_thread_dead,0);
749         return 0;
750 }
751
752 /*
753  *      The I2O block driver is listed as one of those that pulls the
754  *      front entry off the queue before processing it. This is important
755  *      to remember here. If we drop the io lock then CURRENT will change
756  *      on us. We must unlink CURRENT in this routine before we return, if
757  *      we use it.
758  */
759
760 static void i2ob_request(request_queue_t *q)
761 {
762         struct request *req;
763         struct i2ob_request *ireq;
764         struct i2ob_device *dev;
765         u32 m;
766         
767         while ((req = elv_next_request(q)) != NULL) {
768                 /*
769                  *      On an IRQ completion if there is an inactive
770                  *      request on the queue head it means it isnt yet
771                  *      ready to dispatch.
772                  */
773                 if(req->rq_status == RQ_INACTIVE)
774                         return;
775
776                 dev = req->rq_disk->private_data;
777
778                 /* 
779                  *      Queue depths probably belong with some kind of 
780                  *      generic IOP commit control. Certainly it's not right 
781                  *      its global!  
782                  */
783                 if(atomic_read(&i2ob_queues[dev->unit]->queue_depth) >= dev->depth)
784                         break;
785                 
786                 /* Get a message */
787                 m = i2ob_get(dev);
788
789                 if(m==0xFFFFFFFF)
790                 {
791                         if(atomic_read(&i2ob_queues[dev->unit]->queue_depth) == 0)
792                                 printk(KERN_ERR "i2o_block: message queue and request queue empty!!\n");
793                         break;
794                 }
795                 /*
796                  * Everything ok, so pull from kernel queue onto our queue
797                  */
798                 req->errors = 0;
799                 blkdev_dequeue_request(req);    
800                 req->waiting = NULL;
801                 
802                 ireq = i2ob_queues[dev->unit]->i2ob_qhead;
803                 i2ob_queues[dev->unit]->i2ob_qhead = ireq->next;
804                 ireq->req = req;
805
806                 i2ob_send(m, dev, ireq, (dev->unit&0xF0));
807         }
808 }
809
810
811 /*
812  *      SCSI-CAM for ioctl geometry mapping
813  *      Duplicated with SCSI - this should be moved into somewhere common
814  *      perhaps genhd ?
815  *
816  * LBA -> CHS mapping table taken from:
817  *
818  * "Incorporating the I2O Architecture into BIOS for Intel Architecture 
819  *  Platforms" 
820  *
821  * This is an I2O document that is only available to I2O members,
822  * not developers.
823  *
824  * From my understanding, this is how all the I2O cards do this
825  *
826  * Disk Size      | Sectors | Heads | Cylinders
827  * ---------------+---------+-------+-------------------
828  * 1 < X <= 528M  | 63      | 16    | X/(63 * 16 * 512)
829  * 528M < X <= 1G | 63      | 32    | X/(63 * 32 * 512)
830  * 1 < X <528M    | 63      | 16    | X/(63 * 16 * 512)
831  * 1 < X <528M    | 63      | 16    | X/(63 * 16 * 512)
832  *
833  */
834 #define BLOCK_SIZE_528M         1081344
835 #define BLOCK_SIZE_1G           2097152
836 #define BLOCK_SIZE_21G          4403200
837 #define BLOCK_SIZE_42G          8806400
838 #define BLOCK_SIZE_84G          17612800
839
840 static void i2o_block_biosparam(
841         unsigned long capacity,
842         unsigned short *cyls,
843         unsigned char *hds,
844         unsigned char *secs) 
845
846         unsigned long heads, sectors, cylinders; 
847
848         sectors = 63L;                          /* Maximize sectors per track */ 
849         if(capacity <= BLOCK_SIZE_528M)
850                 heads = 16;
851         else if(capacity <= BLOCK_SIZE_1G)
852                 heads = 32;
853         else if(capacity <= BLOCK_SIZE_21G)
854                 heads = 64;
855         else if(capacity <= BLOCK_SIZE_42G)
856                 heads = 128;
857         else
858                 heads = 255;
859
860         cylinders = (unsigned long)capacity / (heads * sectors);
861
862         *cyls = (unsigned short) cylinders;     /* Stuff return values */ 
863         *secs = (unsigned char) sectors; 
864         *hds  = (unsigned char) heads; 
865 }
866
867 /*
868  *      Issue device specific ioctl calls.
869  */
870
871 static int i2ob_ioctl(struct inode *inode, struct file *file,
872                      unsigned int cmd, unsigned long arg)
873 {
874         struct gendisk *disk = inode->i_bdev->bd_disk;
875         struct i2ob_device *dev = disk->private_data;
876
877         /* Anyone capable of this syscall can do *real bad* things */
878
879         if (!capable(CAP_SYS_ADMIN))
880                 return -EPERM;
881         switch (cmd) {
882                 case HDIO_GETGEO:
883                 {
884                         struct hd_geometry g;
885                         i2o_block_biosparam(get_capacity(disk), 
886                                         &g.cylinders, &g.heads, &g.sectors);
887                         g.start = get_start_sect(inode->i_bdev);
888                         return copy_to_user((void *)arg,&g, sizeof(g))?-EFAULT:0;
889                 }
890                 
891                 case BLKI2OGRSTRAT:
892                         return put_user(dev->rcache, (int *)arg);
893                 case BLKI2OGWSTRAT:
894                         return put_user(dev->wcache, (int *)arg);
895                 case BLKI2OSRSTRAT:
896                         if(arg<0||arg>CACHE_SMARTFETCH)
897                                 return -EINVAL;
898                         dev->rcache = arg;
899                         break;
900                 case BLKI2OSWSTRAT:
901                         if(arg!=0 && (arg<CACHE_WRITETHROUGH || arg>CACHE_SMARTBACK))
902                                 return -EINVAL;
903                         dev->wcache = arg;
904                         break;
905         }
906         return -ENOTTY;
907 }
908
909 /*
910  *      Close the block device down
911  */
912  
913 static int i2ob_release(struct inode *inode, struct file *file)
914 {
915         struct gendisk *disk = inode->i_bdev->bd_disk;
916         struct i2ob_device *dev = disk->private_data;
917
918         /*
919          * This is to deail with the case of an application
920          * opening a device and then the device dissapears while
921          * it's in use, and then the application tries to release
922          * it.  ex: Unmounting a deleted RAID volume at reboot. 
923          * If we send messages, it will just cause FAILs since
924          * the TID no longer exists.
925          */
926         if(!dev->i2odev)
927                 return 0;
928
929         if (dev->refcnt <= 0)
930                 printk(KERN_ALERT "i2ob_release: refcount(%d) <= 0\n", dev->refcnt);
931         dev->refcnt--;
932         if(dev->refcnt==0)
933         {
934                 /*
935                  *      Flush the onboard cache on unmount
936                  */
937                 u32 msg[5];
938                 int *query_done = &dev->done_flag;
939                 msg[0] = (FIVE_WORD_MSG_SIZE|SGL_OFFSET_0);
940                 msg[1] = I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|dev->tid;
941                 msg[2] = i2ob_context|0x40000000;
942                 msg[3] = (u32)query_done;
943                 msg[4] = 60<<16;
944                 DEBUG("Flushing...");
945                 i2o_post_wait(dev->controller, msg, 20, 60);
946
947                 /*
948                  *      Unlock the media
949                  */
950                 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
951                 msg[1] = I2O_CMD_BLOCK_MUNLOCK<<24|HOST_TID<<12|dev->tid;
952                 msg[2] = i2ob_context|0x40000000;
953                 msg[3] = (u32)query_done;
954                 msg[4] = -1;
955                 DEBUG("Unlocking...");
956                 i2o_post_wait(dev->controller, msg, 20, 2);
957                 DEBUG("Unlocked.\n");
958
959                 msg[0] = FOUR_WORD_MSG_SIZE|SGL_OFFSET_0;
960                 msg[1] = I2O_CMD_BLOCK_POWER<<24 | HOST_TID << 12 | dev->tid;
961                 if(dev->flags & (1<<3|1<<4))    /* Removable */
962                         msg[4] = 0x21 << 24;
963                 else
964                         msg[4] = 0x24 << 24;
965
966                 if(i2o_post_wait(dev->controller, msg, 20, 60)==0)
967                         dev->power = 0x24;
968
969                 /*
970                  * Now unclaim the device.
971                  */
972
973                 if (i2o_release_device(dev->i2odev, &i2o_block_handler))
974                         printk(KERN_ERR "i2ob_release: controller rejected unclaim.\n");
975                 
976                 DEBUG("Unclaim\n");
977         }
978         return 0;
979 }
980
981 /*
982  *      Open the block device.
983  */
984  
985 static int i2ob_open(struct inode *inode, struct file *file)
986 {
987         struct gendisk *disk = inode->i_bdev->bd_disk;
988         struct i2ob_device *dev = disk->private_data;
989
990         if(!dev->i2odev)        
991                 return -ENODEV;
992         
993         if(dev->refcnt++==0)
994         { 
995                 u32 msg[6];
996                 
997                 DEBUG("Claim ");
998                 if(i2o_claim_device(dev->i2odev, &i2o_block_handler))
999                 {
1000                         dev->refcnt--;
1001                         printk(KERN_INFO "I2O Block: Could not open device\n");
1002                         return -EBUSY;
1003                 }
1004                 DEBUG("Claimed ");
1005                 /*
1006                  *      Power up if needed
1007                  */
1008
1009                 if(dev->power > 0x1f)
1010                 {
1011                         msg[0] = FOUR_WORD_MSG_SIZE|SGL_OFFSET_0;
1012                         msg[1] = I2O_CMD_BLOCK_POWER<<24 | HOST_TID << 12 | dev->tid;
1013                         msg[4] = 0x02 << 24;
1014                         if(i2o_post_wait(dev->controller, msg, 20, 60) == 0)
1015                                 dev->power = 0x02;
1016                 }
1017
1018                 /*
1019                  *      Mount the media if needed. Note that we don't use
1020                  *      the lock bit. Since we have to issue a lock if it
1021                  *      refuses a mount (quite possible) then we might as
1022                  *      well just send two messages out.
1023                  */
1024                 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;               
1025                 msg[1] = I2O_CMD_BLOCK_MMOUNT<<24|HOST_TID<<12|dev->tid;
1026                 msg[4] = -1;
1027                 msg[5] = 0;
1028                 DEBUG("Mount ");
1029                 i2o_post_wait(dev->controller, msg, 24, 2);
1030
1031                 /*
1032                  *      Lock the media
1033                  */
1034                 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1035                 msg[1] = I2O_CMD_BLOCK_MLOCK<<24|HOST_TID<<12|dev->tid;
1036                 msg[4] = -1;
1037                 DEBUG("Lock ");
1038                 i2o_post_wait(dev->controller, msg, 20, 2);
1039                 DEBUG("Ready.\n");
1040         }               
1041         return 0;
1042 }
1043
1044 /*
1045  *      Issue a device query
1046  */
1047  
1048 static int i2ob_query_device(struct i2ob_device *dev, int table, 
1049         int field, void *buf, int buflen)
1050 {
1051         return i2o_query_scalar(dev->controller, dev->tid,
1052                 table, field, buf, buflen);
1053 }
1054
1055
1056 /*
1057  *      Install the I2O block device we found.
1058  */
1059  
1060 static int i2ob_install_device(struct i2o_controller *c, struct i2o_device *d, int unit)
1061 {
1062         u64 size;
1063         u32 blocksize;
1064         u8 type;
1065         u16 power;
1066         u32 flags, status;
1067         struct i2ob_device *dev=&i2ob_dev[unit];
1068         int i;
1069
1070         /*
1071          * For logging purposes...
1072          */
1073         printk(KERN_INFO "i2ob: Installing tid %d device at unit %d\n", 
1074                         d->lct_data.tid, unit); 
1075
1076         /*
1077          * If this is the first I2O block device found on this IOP,
1078          * we need to initialize all the queue data structures
1079          * before any I/O can be performed. If it fails, this
1080          * device is useless.
1081          */
1082         if(!i2ob_queues[unit]) {
1083                 if(i2ob_init_iop(unit))
1084                         return 1;
1085         }
1086
1087         /*
1088          * This will save one level of lookup/indirection in critical
1089          * code so that we can directly get the queue ptr from the
1090          * device instead of having to go the IOP data structure.
1091          */
1092         dev->req_queue = i2ob_queues[unit]->req_queue;
1093
1094         /* initialize gendik structure */
1095         i2ob_disk[unit>>4]->private_data = dev;
1096         i2ob_disk[unit>>4]->queue = dev->req_queue;
1097
1098         /*
1099          *      Ask for the current media data. If that isn't supported
1100          *      then we ask for the device capacity data
1101          */
1102         if(i2ob_query_device(dev, 0x0004, 1, &blocksize, 4) != 0
1103           || i2ob_query_device(dev, 0x0004, 0, &size, 8) !=0 )
1104         {
1105                 i2ob_query_device(dev, 0x0000, 3, &blocksize, 4);
1106                 i2ob_query_device(dev, 0x0000, 4, &size, 8);
1107         }
1108         
1109         if(i2ob_query_device(dev, 0x0000, 2, &power, 2)!=0)
1110                 power = 0;
1111         i2ob_query_device(dev, 0x0000, 5, &flags, 4);
1112         i2ob_query_device(dev, 0x0000, 6, &status, 4);
1113         set_capacity(i2ob_disk[unit>>4], size>>9);
1114
1115         /*
1116          * Max number of Scatter-Gather Elements
1117          */     
1118
1119         i2ob_dev[unit].power = power;   /* Save power state in device proper */
1120         i2ob_dev[unit].flags = flags;
1121
1122         for(i=unit;i<=unit+15;i++)
1123         {
1124                 request_queue_t *q = i2ob_dev[unit].req_queue;
1125                 int segments = (d->controller->status_block->inbound_frame_size - 7) / 2;
1126
1127                 if(segments > 16)
1128                         segments = 16;
1129                                         
1130                 i2ob_dev[i].power = power;      /* Save power state */
1131                 i2ob_dev[unit].flags = flags;   /* Keep the type info */
1132                 
1133                 blk_queue_max_sectors(q, 96);   /* 256 might be nicer but many controllers 
1134                                                    explode on 65536 or higher */
1135                 blk_queue_max_phys_segments(q, segments);
1136                 blk_queue_max_hw_segments(q, segments);
1137                 
1138                 i2ob_dev[i].rcache = CACHE_SMARTFETCH;
1139                 i2ob_dev[i].wcache = CACHE_WRITETHROUGH;
1140                 
1141                 if(d->controller->battery == 0)
1142                         i2ob_dev[i].wcache = CACHE_WRITETHROUGH;
1143
1144                 if(d->controller->promise)
1145                         i2ob_dev[i].wcache = CACHE_WRITETHROUGH;
1146
1147                 if(d->controller->short_req)
1148                 {
1149                         blk_queue_max_sectors(q, 8);
1150                         blk_queue_max_phys_segments(q, 8);
1151                         blk_queue_max_hw_segments(q, 8);
1152                 }
1153         }
1154
1155         strcpy(d->dev_name, i2ob_disk[unit>>4]->disk_name);
1156         strcpy(i2ob_disk[unit>>4]->devfs_name, i2ob_disk[unit>>4]->disk_name);
1157
1158         printk(KERN_INFO "%s: Max segments %d, queue depth %d, byte limit %d.\n",
1159                  d->dev_name, i2ob_dev[unit].max_segments, i2ob_dev[unit].depth, i2ob_max_sectors[unit]<<9);
1160
1161         i2ob_query_device(dev, 0x0000, 0, &type, 1);
1162
1163         printk(KERN_INFO "%s: ", d->dev_name);
1164         switch(type)
1165         {
1166                 case 0: printk("Disk Storage");break;
1167                 case 4: printk("WORM");break;
1168                 case 5: printk("CD-ROM");break;
1169                 case 7: printk("Optical device");break;
1170                 default:
1171                         printk("Type %d", type);
1172         }
1173         if(status&(1<<10))
1174                 printk("(RAID)");
1175
1176         if((flags^status)&(1<<4|1<<3))  /* Missing media or device */
1177         {
1178                 printk(KERN_INFO " Not loaded.\n");
1179                 /* Device missing ? */
1180                 if((flags^status)&(1<<4))
1181                         return 1;
1182         }
1183         else
1184         {
1185                 printk(": %dMB, %d byte sectors",
1186                         (int)(size>>20), blocksize);
1187         }
1188         if(status&(1<<0))
1189         {
1190                 u32 cachesize;
1191                 i2ob_query_device(dev, 0x0003, 0, &cachesize, 4);
1192                 cachesize>>=10;
1193                 if(cachesize>4095)
1194                         printk(", %dMb cache", cachesize>>10);
1195                 else
1196                         printk(", %dKb cache", cachesize);
1197         }
1198         printk(".\n");
1199         printk(KERN_INFO "%s: Maximum sectors/read set to %d.\n", 
1200                 d->dev_name, i2ob_max_sectors[unit]);
1201
1202         /*
1203          * Register for the events we're interested in and that the
1204          * device actually supports.
1205          */
1206
1207         i2o_event_register(c, d->lct_data.tid, i2ob_context, unit, 
1208                 (I2OB_EVENT_MASK & d->lct_data.event_capabilities));
1209         return 0;
1210 }
1211
1212 /*
1213  * Initialize IOP specific queue structures.  This is called
1214  * once for each IOP that has a block device sitting behind it.
1215  */
1216 static int i2ob_init_iop(unsigned int unit)
1217 {
1218         int i;
1219
1220         i2ob_queues[unit] = (struct i2ob_iop_queue *) kmalloc(sizeof(struct i2ob_iop_queue), GFP_ATOMIC);
1221         if(!i2ob_queues[unit])
1222         {
1223                 printk(KERN_WARNING "Could not allocate request queue for I2O block device!\n");
1224                 return -1;
1225         }
1226
1227         for(i = 0; i< MAX_I2OB_DEPTH; i++)
1228         {
1229                 i2ob_queues[unit]->request_queue[i].next =  &i2ob_queues[unit]->request_queue[i+1];
1230                 i2ob_queues[unit]->request_queue[i].num = i;
1231         }
1232         
1233         /* Queue is MAX_I2OB + 1... */
1234         i2ob_queues[unit]->request_queue[i].next = NULL;
1235         i2ob_queues[unit]->i2ob_qhead = &i2ob_queues[unit]->request_queue[0];
1236         atomic_set(&i2ob_queues[unit]->queue_depth, 0);
1237
1238         i2ob_queues[unit]->lock = SPIN_LOCK_UNLOCKED;
1239         i2ob_queues[unit]->req_queue = blk_init_queue(i2ob_request, &i2ob_queues[unit]->lock);
1240         if (!i2ob_queues[unit]->req_queue) {
1241                 kfree(i2ob_queues[unit]);
1242                 return -1;
1243         }
1244
1245         i2ob_queues[unit]->req_queue->queuedata = &i2ob_queues[unit];
1246
1247         return 0;
1248 }
1249
1250 /*
1251  * Probe the I2O subsytem for block class devices
1252  */
1253 static void i2ob_scan(int bios)
1254 {
1255         int i;
1256         int warned = 0;
1257
1258         struct i2o_device *d, *b=NULL;
1259         struct i2o_controller *c;
1260         struct i2ob_device *dev;
1261                 
1262         for(i=0; i< MAX_I2O_CONTROLLERS; i++)
1263         {
1264                 c=i2o_find_controller(i);
1265         
1266                 if(c==NULL)
1267                         continue;
1268
1269                 /*
1270                  *    The device list connected to the I2O Controller is doubly linked
1271                  * Here we traverse the end of the list , and start claiming devices
1272                  * from that end. This assures that within an I2O controller atleast
1273                  * the newly created volumes get claimed after the older ones, thus
1274                  * mapping to same major/minor (and hence device file name) after 
1275                  * every reboot.
1276                  * The exception being: 
1277                  * 1. If there was a TID reuse.
1278                  * 2. There was more than one I2O controller. 
1279                  */
1280
1281                 if(!bios)
1282                 {
1283                         for (d=c->devices;d!=NULL;d=d->next)
1284                         if(d->next == NULL)
1285                                 b = d;
1286                 }
1287                 else
1288                         b = c->devices;
1289
1290                 while(b != NULL)
1291                 {
1292                         d=b;
1293                         if(bios)
1294                                 b = b->next;
1295                         else
1296                                 b = b->prev;
1297
1298                         if(d->lct_data.class_id!=I2O_CLASS_RANDOM_BLOCK_STORAGE)
1299                                 continue;
1300
1301                         if(d->lct_data.user_tid != 0xFFF)
1302                                 continue;
1303
1304                         if(bios)
1305                         {
1306                                 if(d->lct_data.bios_info != 0x80)
1307                                         continue;
1308                                 printk(KERN_INFO "Claiming as Boot device: Controller %d, TID %d\n", c->unit, d->lct_data.tid);
1309                         }
1310                         else
1311                         {
1312                                 if(d->lct_data.bios_info == 0x80)
1313                                         continue; /*Already claimed on pass 1 */
1314                         }
1315
1316                         if(i2o_claim_device(d, &i2o_block_handler))
1317                         {
1318                                 printk(KERN_WARNING "i2o_block: Controller %d, TID %d\n", c->unit,
1319                                         d->lct_data.tid);
1320                                 printk(KERN_WARNING "\t%sevice refused claim! Skipping installation\n", bios?"Boot d":"D");
1321                                 continue;
1322                         }
1323
1324                         i2o_release_device(d, &i2o_block_handler);
1325
1326                         if(scan_unit<MAX_I2OB<<4)
1327                         {
1328                                 /*
1329                                  * Get the device and fill in the
1330                                  * Tid and controller.
1331                                  */
1332                                 dev=&i2ob_dev[scan_unit];
1333                                 dev->i2odev = d; 
1334                                 dev->controller = c;
1335                                 dev->unit = c->unit;
1336                                 dev->tid = d->lct_data.tid;
1337
1338                                 if(i2ob_install_device(c,d,scan_unit))
1339                                         printk(KERN_WARNING "Could not install I2O block device\n");
1340                                 else
1341                                 {
1342                                         add_disk(i2ob_disk[scan_unit>>4]);
1343                                         scan_unit+=16;
1344                                         i2ob_dev_count++;
1345
1346                                         /* We want to know when device goes away */
1347                                         i2o_device_notify_on(d, &i2o_block_handler);
1348                                 }
1349                         }
1350                         else
1351                         {
1352                                 if(!warned++)
1353                                         printk(KERN_WARNING "i2o_block: too many device, registering only %d.\n", scan_unit>>4);
1354                         }
1355                 }
1356                 i2o_unlock_controller(c);
1357         }
1358 }
1359
1360 static void i2ob_probe(void)
1361 {
1362         /*
1363          *      Some overhead/redundancy involved here, while trying to
1364          *      claim the first boot volume encountered as /dev/i2o/hda
1365          *      everytime. All the i2o_controllers are searched and the
1366          *      first i2o block device marked as bootable is claimed
1367          *      If an I2O block device was booted off , the bios sets
1368          *      its bios_info field to 0x80, this what we search for.
1369          *      Assuming that the bootable volume is /dev/i2o/hda
1370          *      everytime will prevent any kernel panic while mounting
1371          *      root partition
1372          */
1373
1374         printk(KERN_INFO "i2o_block: Checking for Boot device...\n");
1375         i2ob_scan(1);
1376
1377         /*
1378          *      Now the remainder.
1379          */
1380         printk(KERN_INFO "i2o_block: Checking for I2O Block devices...\n");
1381         i2ob_scan(0);
1382 }
1383
1384
1385 /*
1386  * New device notification handler.  Called whenever a new
1387  * I2O block storage device is added to the system.
1388  * 
1389  * Should we spin lock around this to keep multiple devs from 
1390  * getting updated at the same time? 
1391  * 
1392  */
1393 void i2ob_new_device(struct i2o_controller *c, struct i2o_device *d)
1394 {
1395         struct i2ob_device *dev;
1396         int unit = 0;
1397
1398         printk(KERN_INFO "i2o_block: New device detected\n");
1399         printk(KERN_INFO "   Controller %d Tid %d\n",c->unit, d->lct_data.tid);
1400
1401         /* Check for available space */
1402         if(i2ob_dev_count>=MAX_I2OB<<4)
1403         {
1404                 printk(KERN_ERR "i2o_block: No more devices allowed!\n");
1405                 return;
1406         }
1407         for(unit = 0; unit < (MAX_I2OB<<4); unit += 16)
1408         {
1409                 if(!i2ob_dev[unit].i2odev)
1410                         break;
1411         }
1412
1413         if(i2o_claim_device(d, &i2o_block_handler))
1414         {
1415                 printk(KERN_INFO "i2o_block: Unable to claim device. Installation aborted\n");
1416                 return;
1417         }
1418
1419         dev = &i2ob_dev[unit];
1420         dev->i2odev = d; 
1421         dev->controller = c;
1422         dev->tid = d->lct_data.tid;
1423
1424         if(i2ob_install_device(c,d,unit))
1425                 printk(KERN_ERR "i2o_block: Could not install new device\n");
1426         else    
1427         {
1428                 add_disk(i2ob_disk[unit>>4]);
1429                 i2ob_dev_count++;
1430                 i2o_device_notify_on(d, &i2o_block_handler);
1431         }
1432
1433         i2o_release_device(d, &i2o_block_handler);
1434  
1435         return;
1436 }
1437
1438 /*
1439  * Deleted device notification handler.  Called when a device we
1440  * are talking to has been deleted by the user or some other
1441  * mysterious fource outside the kernel.
1442  */
1443 void i2ob_del_device(struct i2o_controller *c, struct i2o_device *d)
1444 {       
1445         int unit = 0;
1446         int i = 0;
1447         unsigned long flags;
1448
1449         spin_lock_irqsave(I2O_LOCK(c->unit), flags);
1450
1451         /*
1452          * Need to do this...we somtimes get two events from the IRTOS
1453          * in a row and that causes lots of problems.
1454          */
1455         i2o_device_notify_off(d, &i2o_block_handler);
1456
1457         printk(KERN_INFO "I2O Block Device Deleted\n");
1458
1459         for(unit = 0; unit < MAX_I2OB<<4; unit += 16)
1460         {
1461                 if(i2ob_dev[unit].i2odev == d)
1462                 {
1463                         printk(KERN_INFO "  /dev/%s: Controller %d Tid %d\n", 
1464                                 d->dev_name, c->unit, d->lct_data.tid);
1465                         break;
1466                 }
1467         }
1468         if(unit >= MAX_I2OB<<4)
1469         {
1470                 printk(KERN_ERR "i2ob_del_device called, but not in dev table!\n");
1471                 spin_unlock_irqrestore(I2O_LOCK(c->unit), flags);
1472                 return;
1473         }
1474
1475         /* 
1476          * This will force errors when i2ob_get_queue() is called
1477          * by the kenrel.
1478          */
1479         del_gendisk(i2ob_disk[unit>>4]);
1480         i2ob_dev[unit].req_queue = NULL;
1481         for(i = unit; i <= unit+15; i++)
1482         {
1483                 i2ob_dev[i].i2odev = NULL;
1484                 blk_queue_max_sectors(i2ob_dev[i].req_queue, 0);
1485         }
1486         spin_unlock_irqrestore(I2O_LOCK(c->unit), flags);
1487
1488         /*
1489          * Decrease usage count for module
1490          */     
1491
1492         while(i2ob_dev[unit].refcnt--)
1493                 MOD_DEC_USE_COUNT;
1494
1495         i2ob_dev[unit].refcnt = 0;
1496         
1497         i2ob_dev[i].tid = 0;
1498
1499         /* 
1500          * Do we need this?
1501          * The media didn't really change...the device is just gone
1502          */
1503         i2ob_media_change_flag[unit] = 1;
1504
1505         i2ob_dev_count--;       
1506 }
1507
1508 /*
1509  *      Have we seen a media change ?
1510  */
1511 static int i2ob_media_change(struct gendisk *disk)
1512 {
1513         struct i2ob_device *p = disk->private_data;
1514         int i = p->index;
1515         if(i2ob_media_change_flag[i])
1516         {
1517                 i2ob_media_change_flag[i]=0;
1518                 return 1;
1519         }
1520         return 0;
1521 }
1522
1523 static int i2ob_revalidate(struct gendisk *disk)
1524 {
1525         struct i2ob_device *p = disk->private_data;
1526         return i2ob_install_device(p->controller, p->i2odev, p->index<<4);
1527 }
1528
1529 /*
1530  * Reboot notifier.  This is called by i2o_core when the system
1531  * shuts down.
1532  */
1533 static void i2ob_reboot_event(void)
1534 {
1535         int i;
1536         
1537         for(i=0;i<MAX_I2OB;i++)
1538         {
1539                 struct i2ob_device *dev=&i2ob_dev[(i<<4)];
1540                 
1541                 if(dev->refcnt!=0)
1542                 {
1543                         /*
1544                          *      Flush the onboard cache
1545                          */
1546                         u32 msg[5];
1547                         int *query_done = &dev->done_flag;
1548                         msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1549                         msg[1] = I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|dev->tid;
1550                         msg[2] = i2ob_context|0x40000000;
1551                         msg[3] = (u32)query_done;
1552                         msg[4] = 60<<16;
1553                         
1554                         DEBUG("Flushing...");
1555                         i2o_post_wait(dev->controller, msg, 20, 60);
1556
1557                         DEBUG("Unlocking...");
1558                         /*
1559                          *      Unlock the media
1560                          */
1561                         msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1562                         msg[1] = I2O_CMD_BLOCK_MUNLOCK<<24|HOST_TID<<12|dev->tid;
1563                         msg[2] = i2ob_context|0x40000000;
1564                         msg[3] = (u32)query_done;
1565                         msg[4] = -1;
1566                         i2o_post_wait(dev->controller, msg, 20, 2);
1567                         
1568                         DEBUG("Unlocked.\n");
1569                 }
1570         }       
1571 }
1572
1573 static struct block_device_operations i2ob_fops =
1574 {
1575         .owner          = THIS_MODULE,
1576         .open           = i2ob_open,
1577         .release        = i2ob_release,
1578         .ioctl          = i2ob_ioctl,
1579         .media_changed  = i2ob_media_change,
1580         .revalidate_disk= i2ob_revalidate,
1581 };
1582
1583 /*
1584  * And here should be modules and kernel interface 
1585  *  (Just smiley confuses emacs :-)
1586  */
1587
1588 static int i2o_block_init(void)
1589 {
1590         int i;
1591
1592         printk(KERN_INFO "I2O Block Storage OSM v0.9\n");
1593         printk(KERN_INFO "   (c) Copyright 1999-2001 Red Hat Software.\n");
1594         
1595         /*
1596          *      Register the block device interfaces
1597          */
1598         if (register_blkdev(MAJOR_NR, "i2o_block"))
1599                 return -EIO;
1600
1601         for (i = 0; i < MAX_I2OB; i++) {
1602                 struct gendisk *disk = alloc_disk(16);
1603                 if (!disk)
1604                         goto oom;
1605                 i2ob_dev[i<<4].index = i;
1606                 disk->queue = i2ob_dev[i<<4].req_queue;
1607                 i2ob_disk[i] = disk;
1608         }
1609 #ifdef MODULE
1610         printk(KERN_INFO "i2o_block: registered device at major %d\n", MAJOR_NR);
1611 #endif
1612
1613         /*
1614          *      Now fill in the boiler plate
1615          */
1616          
1617         for (i = 0; i < MAX_I2OB << 4; i++) {
1618                 i2ob_dev[i].refcnt = 0;
1619                 i2ob_dev[i].flags = 0;
1620                 i2ob_dev[i].controller = NULL;
1621                 i2ob_dev[i].i2odev = NULL;
1622                 i2ob_dev[i].tid = 0;
1623                 i2ob_dev[i].head = NULL;
1624                 i2ob_dev[i].tail = NULL;
1625                 i2ob_dev[i].depth = MAX_I2OB_DEPTH;
1626                 i2ob_max_sectors[i] = 2;
1627         }
1628         
1629         for (i = 0; i < MAX_I2OB; i++) {
1630                 struct gendisk *disk = i2ob_disk[i];
1631                 disk->major = MAJOR_NR;
1632                 disk->first_minor = i<<4;
1633                 disk->fops = &i2ob_fops;
1634                 sprintf(disk->disk_name, "i2o/hd%c", 'a' + i);
1635         }
1636         
1637         /*
1638          *      Set up the queue
1639          */
1640         for(i = 0; i < MAX_I2O_CONTROLLERS; i++)
1641         {
1642                 i2ob_queues[i] = NULL;
1643         }
1644
1645         /*
1646          *      Register the OSM handler as we will need this to probe for
1647          *      drives, geometry and other goodies.
1648          */
1649
1650         if(i2o_install_handler(&i2o_block_handler)<0)
1651         {
1652                 unregister_blkdev(MAJOR_NR, "i2o_block");
1653                 printk(KERN_ERR "i2o_block: unable to register OSM.\n");
1654                 return -EINVAL;
1655         }
1656         i2ob_context = i2o_block_handler.context;        
1657
1658         /*
1659          * Initialize event handling thread
1660          */
1661         init_MUTEX_LOCKED(&i2ob_evt_sem);
1662         evt_pid = kernel_thread(i2ob_evt, NULL, CLONE_SIGHAND);
1663         if(evt_pid < 0)
1664         {
1665                 printk(KERN_ERR "i2o_block: Could not initialize event thread.  Aborting\n");
1666                 i2o_remove_handler(&i2o_block_handler);
1667                 return 0;
1668         }
1669
1670         i2ob_probe();
1671
1672         return 0;
1673
1674 oom:
1675         while (i--)
1676                 put_disk(i2ob_disk[i]);
1677         unregister_blkdev(MAJOR_NR, "i2o_block");
1678         return -ENOMEM;
1679 }
1680
1681
1682 static void i2o_block_exit(void)
1683 {
1684         int i;
1685         
1686         if(evt_running) {
1687                 printk(KERN_INFO "Killing I2O block threads...");
1688                 i = kill_proc(evt_pid, SIGKILL, 1);
1689                 if(!i) {
1690                         printk("waiting...\n");
1691                 }
1692                 /* Be sure it died */
1693                 wait_for_completion(&i2ob_thread_dead);
1694                 printk("done.\n");
1695         }
1696
1697         /*
1698          * Unregister for updates from any devices..otherwise we still
1699          * get them and the core jumps to random memory :O
1700          */
1701         if(i2ob_dev_count) {
1702                 struct i2o_device *d;
1703                 for(i = 0; i < MAX_I2OB; i++)
1704                 if((d=i2ob_dev[i<<4].i2odev)) {
1705                         i2o_device_notify_off(d, &i2o_block_handler);
1706                         i2o_event_register(d->controller, d->lct_data.tid, 
1707                                 i2ob_context, i<<4, 0);
1708                 }
1709         }
1710         
1711         /*
1712          *      We may get further callbacks for ourself. The i2o_core
1713          *      code handles this case reasonably sanely. The problem here
1714          *      is we shouldn't get them .. but a couple of cards feel 
1715          *      obliged to tell us stuff we don't care about.
1716          *
1717          *      This isnt ideal at all but will do for now.
1718          */
1719          
1720         set_current_state(TASK_UNINTERRUPTIBLE);
1721         schedule_timeout(HZ);
1722         
1723         /*
1724          *      Flush the OSM
1725          */
1726
1727         i2o_remove_handler(&i2o_block_handler);
1728                  
1729         for (i = 0; i < MAX_I2OB; i++)
1730                 put_disk(i2ob_disk[i]);
1731
1732         /*
1733          *      Return the block device
1734          */
1735         if (unregister_blkdev(MAJOR_NR, "i2o_block") != 0)
1736                 printk("i2o_block: cleanup_module failed\n");
1737 }
1738
1739 MODULE_AUTHOR("Red Hat");
1740 MODULE_DESCRIPTION("I2O Block Device OSM");
1741 MODULE_LICENSE("GPL");
1742
1743 module_init(i2o_block_init);
1744 module_exit(i2o_block_exit);