fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / arch / sparc64 / kernel / irq.c
index b7f6a1e..c3d068c 100644 (file)
@@ -6,7 +6,6 @@
  * Copyright (C) 1998  Jakub Jelinek    (jj@ultra.linux.cz)
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/ptrace.h>
 #include <linux/delay.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/bootmem.h>
+#include <linux/irq.h>
 
 #include <asm/ptrace.h>
 #include <asm/processor.h>
 #include <asm/atomic.h>
 #include <asm/system.h>
 #include <asm/irq.h>
+#include <asm/io.h>
 #include <asm/sbus.h>
 #include <asm/iommu.h>
 #include <asm/upa.h>
 #include <asm/oplib.h>
+#include <asm/prom.h>
 #include <asm/timer.h>
 #include <asm/smp.h>
-#include <asm/hardirq.h>
 #include <asm/starfire.h>
 #include <asm/uaccess.h>
 #include <asm/cache.h>
 #include <asm/cpudata.h>
-
-#ifdef CONFIG_SMP
-static void distribute_irqs(void);
-#endif
+#include <asm/auxio.h>
+#include <asm/head.h>
 
 /* UPA nodes send interrupt packet to UltraSparc with first data reg
  * value low 5 (7 on Starfire) bits holding the IRQ identifier being
@@ -54,10 +54,29 @@ static void distribute_irqs(void);
  * The IVEC handler does not need to act atomically, the PIL dispatch
  * code uses CAS to get an atomic snapshot of the list and clear it
  * at the same time.
+ *
+ * If you make changes to ino_bucket, please update hand coded assembler
+ * of the vectored interrupt trap handler(s) in entry.S and sun4v_ivec.S
  */
+struct ino_bucket {
+       /* Next handler in per-CPU IRQ worklist.  We know that
+        * bucket pointers have the high 32-bits clear, so to
+        * save space we only store the bits we need.
+        */
+/*0x00*/unsigned int irq_chain;
 
+       /* Virtual interrupt number assigned to this INO.  */
+/*0x04*/unsigned int virt_irq;
+};
+
+#define NUM_IVECS      (IMAP_INR + 1)
 struct ino_bucket ivector_table[NUM_IVECS] __attribute__ ((aligned (SMP_CACHE_BYTES)));
 
+#define __irq_ino(irq) \
+        (((struct ino_bucket *)(unsigned long)(irq)) - &ivector_table[0])
+#define __bucket(irq) ((struct ino_bucket *)(unsigned long)(irq))
+#define __irq(bucket) ((unsigned int)(unsigned long)(bucket))
+
 /* This has to be in the main kernel image, it cannot be
  * turned into per-cpu data.  The reason is that the main
  * kernel image is locked into the TLB and this structure
@@ -65,999 +84,513 @@ struct ino_bucket ivector_table[NUM_IVECS] __attribute__ ((aligned (SMP_CACHE_BY
  * access to this structure takes a TLB miss it could cause
  * the 5-level sparc v9 trap stack to overflow.
  */
-struct irq_work_struct {
-       unsigned int    irq_worklists[16];
-};
-struct irq_work_struct __irq_work[NR_CPUS];
-#define irq_work(__cpu, __pil) &(__irq_work[(__cpu)].irq_worklists[(__pil)])
+#define irq_work(__cpu)        &(trap_block[(__cpu)].irq_worklist)
 
-#ifdef CONFIG_PCI
-/* This is a table of physical addresses used to deal with IBF_DMA_SYNC.
- * It is used for PCI only to synchronize DMA transfers with IRQ delivery
- * for devices behind busses other than APB on Sabre systems.
- *
- * Currently these physical addresses are just config space accesses
- * to the command register for that device.
- */
-unsigned long pci_dma_wsync;
-unsigned long dma_sync_reg_table[256];
-unsigned char dma_sync_reg_table_entry = 0;
-#endif
+static unsigned int virt_to_real_irq_table[NR_IRQS];
+static unsigned char virt_irq_cur = 1;
 
-/* This is based upon code in the 32-bit Sparc kernel written mostly by
- * David Redman (djhr@tadpole.co.uk).
- */
-#define MAX_STATIC_ALLOC       4
-static struct irqaction static_irqaction[MAX_STATIC_ALLOC];
-static int static_irq_count;
-
-/* This is exported so that fast IRQ handlers can get at it... -DaveM */
-struct irqaction *irq_action[NR_IRQS+1] = {
-         NULL, NULL, NULL, NULL, NULL, NULL , NULL, NULL,
-         NULL, NULL, NULL, NULL, NULL, NULL , NULL, NULL
-};
+static unsigned char virt_irq_alloc(unsigned int real_irq)
+{
+       unsigned char ent;
 
-/* This only synchronizes entities which modify IRQ handler
- * state and some selected user-level spots that want to
- * read things in the table.  IRQ handler processing orders
- * its' accesses such that no locking is needed.
- */
-static spinlock_t irq_action_lock = SPIN_LOCK_UNLOCKED;
+       BUILD_BUG_ON(NR_IRQS >= 256);
 
-static void register_irq_proc (unsigned int irq);
+       ent = virt_irq_cur;
+       if (ent >= NR_IRQS) {
+               printk(KERN_ERR "IRQ: Out of virtual IRQs.\n");
+               return 0;
+       }
 
-/*
- * Upper 2b of irqaction->flags holds the ino.
- * irqaction->mask holds the smp affinity information.
- */
-#define put_ino_in_irqaction(action, irq) \
-       action->flags &= 0xffffffffffffUL; \
-       if (__bucket(irq) == &pil0_dummy_bucket) \
-               action->flags |= 0xdeadUL << 48;  \
-       else \
-               action->flags |= __irq_ino(irq) << 48;
-#define get_ino_in_irqaction(action)   (action->flags >> 48)
-
-#if NR_CPUS > 64
-#error irqaction embedded smp affinity does not work with > 64 cpus, FIXME
+       virt_irq_cur = ent + 1;
+       virt_to_real_irq_table[ent] = real_irq;
+
+       return ent;
+}
+
+#if 0 /* Currently unused. */
+static unsigned char real_to_virt_irq(unsigned int real_irq)
+{
+       struct ino_bucket *bucket = __bucket(real_irq);
+
+       return bucket->virt_irq;
+}
 #endif
 
-#define put_smpaff_in_irqaction(action, smpaff)        (action)->mask = (smpaff)
-#define get_smpaff_in_irqaction(action)        ((action)->mask)
+static unsigned int virt_to_real_irq(unsigned char virt_irq)
+{
+       return virt_to_real_irq_table[virt_irq];
+}
+
+/*
+ * /proc/interrupts printing:
+ */
 
 int show_interrupts(struct seq_file *p, void *v)
 {
+       int i = *(loff_t *) v, j;
+       struct irqaction * action;
        unsigned long flags;
-       int i = *(loff_t *) v;
-       struct irqaction *action;
-#ifdef CONFIG_SMP
-       int j;
-#endif
 
-       spin_lock_irqsave(&irq_action_lock, flags);
-       if (i <= NR_IRQS) {
-               if (!(action = *(i + irq_action)))
-                       goto out_unlock;
-               seq_printf(p, "%3d: ", i);
+       if (i == 0) {
+               seq_printf(p, "           ");
+               for_each_online_cpu(j)
+                       seq_printf(p, "CPU%d       ",j);
+               seq_putc(p, '\n');
+       }
+
+       if (i < NR_IRQS) {
+               spin_lock_irqsave(&irq_desc[i].lock, flags);
+               action = irq_desc[i].action;
+               if (!action)
+                       goto skip;
+               seq_printf(p, "%3d: ",i);
 #ifndef CONFIG_SMP
                seq_printf(p, "%10u ", kstat_irqs(i));
 #else
-               for (j = 0; j < NR_CPUS; j++) {
-                       if (!cpu_online(j))
-                               continue;
-                       seq_printf(p, "%10u ",
-                                  kstat_cpu(j).irqs[i]);
-               }
+               for_each_online_cpu(j)
+                       seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
-               seq_printf(p, " %s:%lx", action->name,
-                          get_ino_in_irqaction(action));
-               for (action = action->next; action; action = action->next) {
-                       seq_printf(p, ", %s:%lx", action->name,
-                                  get_ino_in_irqaction(action));
-               }
+               seq_printf(p, " %9s", irq_desc[i].chip->typename);
+               seq_printf(p, "  %s", action->name);
+
+               for (action=action->next; action; action = action->next)
+                       seq_printf(p, ", %s", action->name);
+
                seq_putc(p, '\n');
+skip:
+               spin_unlock_irqrestore(&irq_desc[i].lock, flags);
        }
-out_unlock:
-       spin_unlock_irqrestore(&irq_action_lock, flags);
-
        return 0;
 }
 
-/* Now these are always passed a true fully specified sun4u INO. */
-void enable_irq(unsigned int irq)
-{
-       struct ino_bucket *bucket = __bucket(irq);
-       unsigned long imap;
-       unsigned long tid;
+extern unsigned long real_hard_smp_processor_id(void);
 
-       imap = bucket->imap;
-       if (imap == 0UL)
-               return;
-
-       preempt_disable();
+static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid)
+{
+       unsigned int tid;
 
-       if (tlb_type == cheetah || tlb_type == cheetah_plus) {
-               unsigned long ver;
+       if (this_is_starfire) {
+               tid = starfire_translate(imap, cpuid);
+               tid <<= IMAP_TID_SHIFT;
+               tid &= IMAP_TID_UPA;
+       } else {
+               if (tlb_type == cheetah || tlb_type == cheetah_plus) {
+                       unsigned long ver;
+
+                       __asm__ ("rdpr %%ver, %0" : "=r" (ver));
+                       if ((ver >> 32UL) == __JALAPENO_ID ||
+                           (ver >> 32UL) == __SERRANO_ID) {
+                               tid = cpuid << IMAP_TID_SHIFT;
+                               tid &= IMAP_TID_JBUS;
+                       } else {
+                               unsigned int a = cpuid & 0x1f;
+                               unsigned int n = (cpuid >> 5) & 0x1f;
 
-               __asm__ ("rdpr %%ver, %0" : "=r" (ver));
-               if ((ver >> 32) == 0x003e0016) {
-                       /* We set it to our JBUS ID. */
-                       __asm__ __volatile__("ldxa [%%g0] %1, %0"
-                                            : "=r" (tid)
-                                            : "i" (ASI_JBUS_CONFIG));
-                       tid = ((tid & (0x1fUL<<17)) << 9);
-                       tid &= IMAP_TID_JBUS;
+                               tid = ((a << IMAP_AID_SHIFT) |
+                                      (n << IMAP_NID_SHIFT));
+                               tid &= (IMAP_AID_SAFARI |
+                                       IMAP_NID_SAFARI);;
+                       }
                } else {
-                       /* We set it to our Safari AID. */
-                       __asm__ __volatile__("ldxa [%%g0] %1, %0"
-                                            : "=r" (tid)
-                                            : "i" (ASI_SAFARI_CONFIG));
-                       tid = ((tid & (0x3ffUL<<17)) << 9);
-                       tid &= IMAP_AID_SAFARI;
+                       tid = cpuid << IMAP_TID_SHIFT;
+                       tid &= IMAP_TID_UPA;
                }
-       } else if (this_is_starfire == 0) {
-               /* We set it to our UPA MID. */
-               __asm__ __volatile__("ldxa [%%g0] %1, %0"
-                                    : "=r" (tid)
-                                    : "i" (ASI_UPA_CONFIG));
-               tid = ((tid & UPA_CONFIG_MID) << 9);
-               tid &= IMAP_TID_UPA;
-       } else {
-               tid = (starfire_translate(imap, smp_processor_id()) << 26);
-               tid &= IMAP_TID_UPA;
        }
 
-       /* NOTE NOTE NOTE, IGN and INO are read-only, IGN is a product
-        * of this SYSIO's preconfigured IGN in the SYSIO Control
-        * Register, the hardware just mirrors that value here.
-        * However for Graphics and UPA Slave devices the full
-        * IMAP_INR field can be set by the programmer here.
-        *
-        * Things like FFB can now be handled via the new IRQ mechanism.
-        */
-       upa_writel(tid | IMAP_VALID, imap);
-
-       preempt_enable();
+       return tid;
 }
 
-/* This now gets passed true ino's as well. */
-void disable_irq(unsigned int irq)
-{
-       struct ino_bucket *bucket = __bucket(irq);
-       unsigned long imap;
-
-       imap = bucket->imap;
-       if (imap != 0UL) {
-               u32 tmp;
-
-               /* NOTE: We do not want to futz with the IRQ clear registers
-                *       and move the state to IDLE, the SCSI code does call
-                *       disable_irq() to assure atomicity in the queue cmd
-                *       SCSI adapter driver code.  Thus we'd lose interrupts.
-                */
-               tmp = upa_readl(imap);
-               tmp &= ~IMAP_VALID;
-               upa_writel(tmp, imap);
-       }
-}
+struct irq_handler_data {
+       unsigned long   iclr;
+       unsigned long   imap;
 
-/* The timer is the one "weird" interrupt which is generated by
- * the CPU %tick register and not by some normal vectored interrupt
- * source.  To handle this special case, we use this dummy INO bucket.
- */
-static struct ino_bucket pil0_dummy_bucket = {
-       0,      /* irq_chain */
-       0,      /* pil */
-       0,      /* pending */
-       0,      /* flags */
-       0,      /* __unused */
-       NULL,   /* irq_info */
-       0UL,    /* iclr */
-       0UL,    /* imap */
+       void            (*pre_handler)(unsigned int, void *, void *);
+       void            *pre_handler_arg1;
+       void            *pre_handler_arg2;
 };
 
-unsigned int build_irq(int pil, int inofixup, unsigned long iclr, unsigned long imap)
+static inline struct ino_bucket *virt_irq_to_bucket(unsigned int virt_irq)
 {
-       struct ino_bucket *bucket;
-       int ino;
-
-       if (pil == 0) {
-               if (iclr != 0UL || imap != 0UL) {
-                       prom_printf("Invalid dummy bucket for PIL0 (%lx:%lx)\n",
-                                   iclr, imap);
-                       prom_halt();
-               }
-               return __irq(&pil0_dummy_bucket);
-       }
+       unsigned int real_irq = virt_to_real_irq(virt_irq);
+       struct ino_bucket *bucket = NULL;
 
-       /* RULE: Both must be specified in all other cases. */
-       if (iclr == 0UL || imap == 0UL) {
-               prom_printf("Invalid build_irq %d %d %016lx %016lx\n",
-                           pil, inofixup, iclr, imap);
-               prom_halt();
-       }
-       
-       ino = (upa_readl(imap) & (IMAP_IGN | IMAP_INO)) + inofixup;
-       if (ino > NUM_IVECS) {
-               prom_printf("Invalid INO %04x (%d:%d:%016lx:%016lx)\n",
-                           ino, pil, inofixup, iclr, imap);
-               prom_halt();
-       }
-
-       /* Ok, looks good, set it up.  Don't touch the irq_chain or
-        * the pending flag.
-        */
-       bucket = &ivector_table[ino];
-       if ((bucket->flags & IBF_ACTIVE) ||
-           (bucket->irq_info != NULL)) {
-               /* This is a gross fatal error if it happens here. */
-               prom_printf("IRQ: Trying to reinit INO bucket, fatal error.\n");
-               prom_printf("IRQ: Request INO %04x (%d:%d:%016lx:%016lx)\n",
-                           ino, pil, inofixup, iclr, imap);
-               prom_printf("IRQ: Existing (%d:%016lx:%016lx)\n",
-                           bucket->pil, bucket->iclr, bucket->imap);
-               prom_printf("IRQ: Cannot continue, halting...\n");
-               prom_halt();
-       }
-       bucket->imap  = imap;
-       bucket->iclr  = iclr;
-       bucket->pil   = pil;
-       bucket->flags = 0;
-
-       bucket->irq_info = NULL;
+       if (likely(real_irq))
+               bucket = __bucket(real_irq);
 
-       return __irq(bucket);
+       return bucket;
 }
 
-static void atomic_bucket_insert(struct ino_bucket *bucket)
+#ifdef CONFIG_SMP
+static int irq_choose_cpu(unsigned int virt_irq)
 {
-       unsigned long pstate;
-       unsigned int *ent;
-
-       __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
-       __asm__ __volatile__("wrpr %0, %1, %%pstate"
-                            : : "r" (pstate), "i" (PSTATE_IE));
-       ent = irq_work(smp_processor_id(), bucket->pil);
-       bucket->irq_chain = *ent;
-       *ent = __irq(bucket);
-       __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate));
-}
+       cpumask_t mask = irq_desc[virt_irq].affinity;
+       int cpuid;
 
-int request_irq(unsigned int irq, irqreturn_t (*handler)(int, void *, struct pt_regs *),
-               unsigned long irqflags, const char *name, void *dev_id)
-{
-       struct irqaction *action, *tmp = NULL;
-       struct ino_bucket *bucket = __bucket(irq);
-       unsigned long flags;
-       int pending = 0;
-
-       if ((bucket != &pil0_dummy_bucket) &&
-           (bucket < &ivector_table[0] ||
-            bucket >= &ivector_table[NUM_IVECS])) {
-               unsigned int *caller;
-
-               __asm__ __volatile__("mov %%i7, %0" : "=r" (caller));
-               printk(KERN_CRIT "request_irq: Old style IRQ registry attempt "
-                      "from %p, irq %08x.\n", caller, irq);
-               return -EINVAL;
-       }       
-       if (!handler)
-           return -EINVAL;
-
-       if ((bucket != &pil0_dummy_bucket) && (irqflags & SA_SAMPLE_RANDOM)) {
-               /*
-                * This function might sleep, we want to call it first,
-                * outside of the atomic block. In SA_STATIC_ALLOC case,
-                * random driver's kmalloc will fail, but it is safe.
-                * If already initialized, random driver will not reinit.
-                * Yes, this might clear the entropy pool if the wrong
-                * driver is attempted to be loaded, without actually
-                * installing a new handler, but is this really a problem,
-                * only the sysadmin is able to do this.
-                */
-               rand_initialize_irq(irq);
-       }
+       if (cpus_equal(mask, CPU_MASK_ALL)) {
+               static int irq_rover;
+               static DEFINE_SPINLOCK(irq_rover_lock);
+               unsigned long flags;
 
-       spin_lock_irqsave(&irq_action_lock, flags);
+               /* Round-robin distribution... */
+       do_round_robin:
+               spin_lock_irqsave(&irq_rover_lock, flags);
 
-       action = *(bucket->pil + irq_action);
-       if (action) {
-               if ((action->flags & SA_SHIRQ) && (irqflags & SA_SHIRQ))
-                       for (tmp = action; tmp->next; tmp = tmp->next)
-                               ;
-               else {
-                       spin_unlock_irqrestore(&irq_action_lock, flags);
-                       return -EBUSY;
+               while (!cpu_online(irq_rover)) {
+                       if (++irq_rover >= NR_CPUS)
+                               irq_rover = 0;
                }
-               action = NULL;          /* Or else! */
-       }
+               cpuid = irq_rover;
+               do {
+                       if (++irq_rover >= NR_CPUS)
+                               irq_rover = 0;
+               } while (!cpu_online(irq_rover));
 
-       /* If this is flagged as statically allocated then we use our
-        * private struct which is never freed.
-        */
-       if (irqflags & SA_STATIC_ALLOC) {
-           if (static_irq_count < MAX_STATIC_ALLOC)
-               action = &static_irqaction[static_irq_count++];
-           else
-               printk("Request for IRQ%d (%s) SA_STATIC_ALLOC failed "
-                      "using kmalloc\n", irq, name);
-       }       
-       if (action == NULL)
-           action = (struct irqaction *)kmalloc(sizeof(struct irqaction),
-                                                GFP_ATOMIC);
-       
-       if (!action) { 
-               spin_unlock_irqrestore(&irq_action_lock, flags);
-               return -ENOMEM;
-       }
-
-       if (bucket == &pil0_dummy_bucket) {
-               bucket->irq_info = action;
-               bucket->flags |= IBF_ACTIVE;
+               spin_unlock_irqrestore(&irq_rover_lock, flags);
        } else {
-               if ((bucket->flags & IBF_ACTIVE) != 0) {
-                       void *orig = bucket->irq_info;
-                       void **vector = NULL;
-
-                       if ((bucket->flags & IBF_PCI) == 0) {
-                               printk("IRQ: Trying to share non-PCI bucket.\n");
-                               goto free_and_ebusy;
-                       }
-                       if ((bucket->flags & IBF_MULTI) == 0) {
-                               vector = kmalloc(sizeof(void *) * 4, GFP_ATOMIC);
-                               if (vector == NULL)
-                                       goto free_and_enomem;
-
-                               /* We might have slept. */
-                               if ((bucket->flags & IBF_MULTI) != 0) {
-                                       int ent;
-
-                                       kfree(vector);
-                                       vector = (void **)bucket->irq_info;
-                                       for(ent = 0; ent < 4; ent++) {
-                                               if (vector[ent] == NULL) {
-                                                       vector[ent] = action;
-                                                       break;
-                                               }
-                                       }
-                                       if (ent == 4)
-                                               goto free_and_ebusy;
-                               } else {
-                                       vector[0] = orig;
-                                       vector[1] = action;
-                                       vector[2] = NULL;
-                                       vector[3] = NULL;
-                                       bucket->irq_info = vector;
-                                       bucket->flags |= IBF_MULTI;
-                               }
-                       } else {
-                               int ent;
-
-                               vector = (void **)orig;
-                               for (ent = 0; ent < 4; ent++) {
-                                       if (vector[ent] == NULL) {
-                                               vector[ent] = action;
-                                               break;
-                                       }
-                               }
-                               if (ent == 4)
-                                       goto free_and_ebusy;
-                       }
-               } else {
-                       bucket->irq_info = action;
-                       bucket->flags |= IBF_ACTIVE;
-               }
-               pending = bucket->pending;
-               if (pending)
-                       bucket->pending = 0;
-       }
+               cpumask_t tmp;
 
-       action->handler = handler;
-       action->flags = irqflags;
-       action->name = name;
-       action->next = NULL;
-       action->dev_id = dev_id;
-       put_ino_in_irqaction(action, irq);
-       put_smpaff_in_irqaction(action, 0);
+               cpus_and(tmp, cpu_online_map, mask);
 
-       if (tmp)
-               tmp->next = action;
-       else
-               *(bucket->pil + irq_action) = action;
+               if (cpus_empty(tmp))
+                       goto do_round_robin;
 
-       enable_irq(irq);
-
-       /* We ate the IVEC already, this makes sure it does not get lost. */
-       if (pending) {
-               atomic_bucket_insert(bucket);
-               set_softint(1 << bucket->pil);
+               cpuid = first_cpu(tmp);
        }
-       spin_unlock_irqrestore(&irq_action_lock, flags);
-       if ((bucket != &pil0_dummy_bucket) && (!(irqflags & SA_STATIC_ALLOC)))
-               register_irq_proc(__irq_ino(irq));
-
-#ifdef CONFIG_SMP
-       distribute_irqs();
-#endif
-       return 0;
 
-free_and_ebusy:
-       kfree(action);
-       spin_unlock_irqrestore(&irq_action_lock, flags);
-       return -EBUSY;
-
-free_and_enomem:
-       kfree(action);
-       spin_unlock_irqrestore(&irq_action_lock, flags);
-       return -ENOMEM;
+       return cpuid;
 }
+#else
+static int irq_choose_cpu(unsigned int virt_irq)
+{
+       return real_hard_smp_processor_id();
+}
+#endif
 
-EXPORT_SYMBOL(request_irq);
-
-void free_irq(unsigned int irq, void *dev_id)
+static void sun4u_irq_enable(unsigned int virt_irq)
 {
-       struct irqaction *action;
-       struct irqaction *tmp = NULL;
-       unsigned long flags;
-       struct ino_bucket *bucket = __bucket(irq), *bp;
+       irq_desc_t *desc = irq_desc + virt_irq;
+       struct irq_handler_data *data = desc->handler_data;
 
-       if ((bucket != &pil0_dummy_bucket) &&
-           (bucket < &ivector_table[0] ||
-            bucket >= &ivector_table[NUM_IVECS])) {
-               unsigned int *caller;
+       if (likely(data)) {
+               unsigned long cpuid, imap;
+               unsigned int tid;
 
-               __asm__ __volatile__("mov %%i7, %0" : "=r" (caller));
-               printk(KERN_CRIT "free_irq: Old style IRQ removal attempt "
-                      "from %p, irq %08x.\n", caller, irq);
-               return;
-       }
-       
-       spin_lock_irqsave(&irq_action_lock, flags);
+               cpuid = irq_choose_cpu(virt_irq);
+               imap = data->imap;
 
-       action = *(bucket->pil + irq_action);
-       if (!action->handler) {
-               printk("Freeing free IRQ %d\n", bucket->pil);
-               return;
-       }
-       if (dev_id) {
-               for ( ; action; action = action->next) {
-                       if (action->dev_id == dev_id)
-                               break;
-                       tmp = action;
-               }
-               if (!action) {
-                       printk("Trying to free free shared IRQ %d\n", bucket->pil);
-                       spin_unlock_irqrestore(&irq_action_lock, flags);
-                       return;
-               }
-       } else if (action->flags & SA_SHIRQ) {
-               printk("Trying to free shared IRQ %d with NULL device ID\n", bucket->pil);
-               spin_unlock_irqrestore(&irq_action_lock, flags);
-               return;
-       }
+               tid = sun4u_compute_tid(imap, cpuid);
 
-       if (action->flags & SA_STATIC_ALLOC) {
-               printk("Attempt to free statically allocated IRQ %d (%s)\n",
-                      bucket->pil, action->name);
-               spin_unlock_irqrestore(&irq_action_lock, flags);
-               return;
+               upa_writel(tid | IMAP_VALID, imap);
        }
+}
 
-       if (action && tmp)
-               tmp->next = action->next;
-       else
-               *(bucket->pil + irq_action) = action->next;
-
-       spin_unlock_irqrestore(&irq_action_lock, flags);
-
-       synchronize_irq(irq);
-
-       spin_lock_irqsave(&irq_action_lock, flags);
-
-       if (bucket != &pil0_dummy_bucket) {
-               unsigned long imap = bucket->imap;
-               void **vector, *orig;
-               int ent;
-
-               orig = bucket->irq_info;
-               vector = (void **)orig;
-
-               if ((bucket->flags & IBF_MULTI) != 0) {
-                       int other = 0;
-                       void *orphan = NULL;
-                       for (ent = 0; ent < 4; ent++) {
-                               if (vector[ent] == action)
-                                       vector[ent] = NULL;
-                               else if (vector[ent] != NULL) {
-                                       orphan = vector[ent];
-                                       other++;
-                               }
-                       }
-
-                       /* Only free when no other shared irq
-                        * uses this bucket.
-                        */
-                       if (other) {
-                               if (other == 1) {
-                                       /* Convert back to non-shared bucket. */
-                                       bucket->irq_info = orphan;
-                                       bucket->flags &= ~(IBF_MULTI);
-                                       kfree(vector);
-                               }
-                               goto out;
-                       }
-               } else {
-                       bucket->irq_info = NULL;
-               }
+static void sun4u_irq_disable(unsigned int virt_irq)
+{
+       irq_desc_t *desc = irq_desc + virt_irq;
+       struct irq_handler_data *data = desc->handler_data;
 
-               /* This unique interrupt source is now inactive. */
-               bucket->flags &= ~IBF_ACTIVE;
-
-               /* See if any other buckets share this bucket's IMAP
-                * and are still active.
-                */
-               for (ent = 0; ent < NUM_IVECS; ent++) {
-                       bp = &ivector_table[ent];
-                       if (bp != bucket        &&
-                           bp->imap == imap    &&
-                           (bp->flags & IBF_ACTIVE) != 0)
-                               break;
-               }
+       if (likely(data)) {
+               unsigned long imap = data->imap;
+               u32 tmp = upa_readl(imap);
 
-               /* Only disable when no other sub-irq levels of
-                * the same IMAP are active.
-                */
-               if (ent == NUM_IVECS)
-                       disable_irq(irq);
+               tmp &= ~IMAP_VALID;
+               upa_writel(tmp, imap);
        }
-
-out:
-       kfree(action);
-       spin_unlock_irqrestore(&irq_action_lock, flags);
 }
 
-EXPORT_SYMBOL(free_irq);
-
-#ifdef CONFIG_SMP
-void synchronize_irq(unsigned int irq)
+static void sun4u_irq_end(unsigned int virt_irq)
 {
-       struct ino_bucket *bucket = __bucket(irq);
-
-#if 0
-       /* The following is how I wish I could implement this.
-        * Unfortunately the ICLR registers are read-only, you can
-        * only write ICLR_foo values to them.  To get the current
-        * IRQ status you would need to get at the IRQ diag registers
-        * in the PCI/SBUS controller and the layout of those vary
-        * from one controller to the next, sigh... -DaveM
-        */
-       unsigned long iclr = bucket->iclr;
-
-       while (1) {
-               u32 tmp = upa_readl(iclr);
-               
-               if (tmp == ICLR_TRANSMIT ||
-                   tmp == ICLR_PENDING) {
-                       cpu_relax();
-                       continue;
-               }
-               break;
-       }
-#else
-       /* So we have to do this with a INPROGRESS bit just like x86.  */
-       while (bucket->flags & IBF_INPROGRESS)
-               cpu_relax();
-#endif
-}
-#endif /* CONFIG_SMP */
+       irq_desc_t *desc = irq_desc + virt_irq;
+       struct irq_handler_data *data = desc->handler_data;
 
-void catch_disabled_ivec(struct pt_regs *regs)
-{
-       int cpu = smp_processor_id();
-       struct ino_bucket *bucket = __bucket(*irq_work(cpu, 0));
-
-       /* We can actually see this on Ultra/PCI PCI cards, which are bridges
-        * to other devices.  Here a single IMAP enabled potentially multiple
-        * unique interrupt sources (which each do have a unique ICLR register.
-        *
-        * So what we do is just register that the IVEC arrived, when registered
-        * for real the request_irq() code will check the bit and signal
-        * a local CPU interrupt for it.
-        */
-#if 0
-       printk("IVEC: Spurious interrupt vector (%x) received at (%016lx)\n",
-              bucket - &ivector_table[0], regs->tpc);
-#endif
-       *irq_work(cpu, 0) = 0;
-       bucket->pending = 1;
+       if (likely(data))
+               upa_writel(ICLR_IDLE, data->iclr);
 }
 
-/* Tune this... */
-#define FORWARD_VOLUME         12
-
-#ifdef CONFIG_SMP
-
-static inline void redirect_intr(int cpu, struct ino_bucket *bp)
+static void sun4v_irq_enable(unsigned int virt_irq)
 {
-       /* Ok, here is what is going on:
-        * 1) Retargeting IRQs on Starfire is very
-        *    expensive so just forget about it on them.
-        * 2) Moving around very high priority interrupts
-        *    is a losing game.
-        * 3) If the current cpu is idle, interrupts are
-        *    useful work, so keep them here.  But do not
-        *    pass to our neighbour if he is not very idle.
-        * 4) If sysadmin explicitly asks for directed intrs,
-        *    Just Do It.
-        */
-       struct irqaction *ap = bp->irq_info;
-       cpumask_t cpu_mask = get_smpaff_in_irqaction(ap);
-       unsigned int buddy, ticks;
+       struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
+       unsigned int ino = bucket - &ivector_table[0];
 
-       cpus_and(cpu_mask, cpu_mask, cpu_online_map);
-       if (cpus_empty(cpu_mask))
-               cpu_mask = cpu_online_map;
+       if (likely(bucket)) {
+               unsigned long cpuid;
+               int err;
 
-       if (this_is_starfire != 0 ||
-           bp->pil >= 10 || current->pid == 0)
-               goto out;
+               cpuid = irq_choose_cpu(virt_irq);
 
-       /* 'cpu' is the MID (ie. UPAID), calculate the MID
-        * of our buddy.
-        */
-       buddy = cpu + 1;
-       if (buddy >= NR_CPUS)
-               buddy = 0;
-
-       ticks = 0;
-       while (!cpu_isset(buddy, cpu_mask)) {
-               if (++buddy >= NR_CPUS)
-                       buddy = 0;
-               if (++ticks > NR_CPUS) {
-                       put_smpaff_in_irqaction(ap, 0);
-                       goto out;
-               }
+               err = sun4v_intr_settarget(ino, cpuid);
+               if (err != HV_EOK)
+                       printk("sun4v_intr_settarget(%x,%lu): err(%d)\n",
+                              ino, cpuid, err);
+               err = sun4v_intr_setenabled(ino, HV_INTR_ENABLED);
+               if (err != HV_EOK)
+                       printk("sun4v_intr_setenabled(%x): err(%d)\n",
+                              ino, err);
        }
+}
 
-       if (buddy == cpu)
-               goto out;
+static void sun4v_irq_disable(unsigned int virt_irq)
+{
+       struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
+       unsigned int ino = bucket - &ivector_table[0];
 
-       /* Voo-doo programming. */
-       if (cpu_data(buddy).idle_volume < FORWARD_VOLUME)
-               goto out;
+       if (likely(bucket)) {
+               int err;
 
-       /* This just so happens to be correct on Cheetah
-        * at the moment.
-        */
-       buddy <<= 26;
+               err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED);
+               if (err != HV_EOK)
+                       printk("sun4v_intr_setenabled(%x): "
+                              "err(%d)\n", ino, err);
+       }
+}
 
-       /* Push it to our buddy. */
-       upa_writel(buddy | IMAP_VALID, bp->imap);
+static void sun4v_irq_end(unsigned int virt_irq)
+{
+       struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
+       unsigned int ino = bucket - &ivector_table[0];
 
-out:
-       return;
-}
+       if (likely(bucket)) {
+               int err;
 
-#endif
+               err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE);
+               if (err != HV_EOK)
+                       printk("sun4v_intr_setstate(%x): "
+                              "err(%d)\n", ino, err);
+       }
+}
 
-void handler_irq(int irq, struct pt_regs *regs)
+static void run_pre_handler(unsigned int virt_irq)
 {
-       struct ino_bucket *bp, *nbp;
-       int cpu = smp_processor_id();
-
-#ifndef CONFIG_SMP
-       /*
-        * Check for TICK_INT on level 14 softint.
-        */
-       {
-               unsigned long clr_mask = 1 << irq;
-               unsigned long tick_mask = tick_ops->softint_mask;
+       struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
+       irq_desc_t *desc = irq_desc + virt_irq;
+       struct irq_handler_data *data = desc->handler_data;
 
-               if ((irq == 14) && (get_softint() & tick_mask)) {
-                       irq = 0;
-                       clr_mask = tick_mask;
-               }
-               clear_softint(clr_mask);
+       if (likely(data->pre_handler)) {
+               data->pre_handler(__irq_ino(__irq(bucket)),
+                                 data->pre_handler_arg1,
+                                 data->pre_handler_arg2);
        }
-#else
-       int should_forward = 1;
+}
 
-       clear_softint(1 << irq);
-#endif
+static struct irq_chip sun4u_irq = {
+       .typename       = "sun4u",
+       .enable         = sun4u_irq_enable,
+       .disable        = sun4u_irq_disable,
+       .end            = sun4u_irq_end,
+};
 
-       irq_enter();
-       kstat_this_cpu.irqs[irq]++;
+static struct irq_chip sun4u_irq_ack = {
+       .typename       = "sun4u+ack",
+       .enable         = sun4u_irq_enable,
+       .disable        = sun4u_irq_disable,
+       .ack            = run_pre_handler,
+       .end            = sun4u_irq_end,
+};
 
-       /* Sliiiick... */
-#ifndef CONFIG_SMP
-       bp = ((irq != 0) ?
-             __bucket(xchg32(irq_work(cpu, irq), 0)) :
-             &pil0_dummy_bucket);
-#else
-       bp = __bucket(xchg32(irq_work(cpu, irq), 0));
-#endif
-       for ( ; bp != NULL; bp = nbp) {
-               unsigned char flags = bp->flags;
-               unsigned char random = 0;
+static struct irq_chip sun4v_irq = {
+       .typename       = "sun4v",
+       .enable         = sun4v_irq_enable,
+       .disable        = sun4v_irq_disable,
+       .end            = sun4v_irq_end,
+};
 
-               nbp = __bucket(bp->irq_chain);
-               bp->irq_chain = 0;
+static struct irq_chip sun4v_irq_ack = {
+       .typename       = "sun4v+ack",
+       .enable         = sun4v_irq_enable,
+       .disable        = sun4v_irq_disable,
+       .ack            = run_pre_handler,
+       .end            = sun4v_irq_end,
+};
 
-               bp->flags |= IBF_INPROGRESS;
+void irq_install_pre_handler(int virt_irq,
+                            void (*func)(unsigned int, void *, void *),
+                            void *arg1, void *arg2)
+{
+       irq_desc_t *desc = irq_desc + virt_irq;
+       struct irq_handler_data *data = desc->handler_data;
 
-               if ((flags & IBF_ACTIVE) != 0) {
-#ifdef CONFIG_PCI
-                       if ((flags & IBF_DMA_SYNC) != 0) {
-                               upa_readl(dma_sync_reg_table[bp->synctab_ent]);
-                               upa_readq(pci_dma_wsync);
-                       }
-#endif
-                       if ((flags & IBF_MULTI) == 0) {
-                               struct irqaction *ap = bp->irq_info;
-                               ap->handler(__irq(bp), ap->dev_id, regs);
-                               random |= ap->flags & SA_SAMPLE_RANDOM;
-                       } else {
-                               void **vector = (void **)bp->irq_info;
-                               int ent;
-                               for (ent = 0; ent < 4; ent++) {
-                                       struct irqaction *ap = vector[ent];
-                                       if (ap != NULL) {
-                                               ap->handler(__irq(bp), ap->dev_id, regs);
-                                               random |= ap->flags & SA_SAMPLE_RANDOM;
-                                       }
-                               }
-                       }
-                       /* Only the dummy bucket lacks IMAP/ICLR. */
-                       if (bp->pil != 0) {
-#ifdef CONFIG_SMP
-                               if (should_forward) {
-                                       redirect_intr(cpu, bp);
-                                       should_forward = 0;
-                               }
-#endif
-                               upa_writel(ICLR_IDLE, bp->iclr);
-                               /* Test and add entropy */
-                               if (random)
-                                       add_interrupt_randomness(irq);
-                       }
-               } else
-                       bp->pending = 1;
+       data->pre_handler = func;
+       data->pre_handler_arg1 = arg1;
+       data->pre_handler_arg2 = arg2;
 
-               bp->flags &= ~IBF_INPROGRESS;
-       }
-       irq_exit();
-}
+       if (desc->chip == &sun4u_irq_ack ||
+           desc->chip == &sun4v_irq_ack)
+               return;
 
-#ifdef CONFIG_BLK_DEV_FD
-extern void floppy_interrupt(int irq, void *dev_cookie, struct pt_regs *regs);
+       desc->chip = (desc->chip == &sun4u_irq ?
+                     &sun4u_irq_ack : &sun4v_irq_ack);
+}
 
-void sparc_floppy_irq(int irq, void *dev_cookie, struct pt_regs *regs)
+unsigned int build_irq(int inofixup, unsigned long iclr, unsigned long imap)
 {
-       struct irqaction *action = *(irq + irq_action);
        struct ino_bucket *bucket;
-       int cpu = smp_processor_id();
+       struct irq_handler_data *data;
+       irq_desc_t *desc;
+       int ino;
 
-       irq_enter();
-       kstat_this_cpu.irqs[irq]++;
+       BUG_ON(tlb_type == hypervisor);
 
-       *(irq_work(cpu, irq)) = 0;
-       bucket = get_ino_in_irqaction(action) + ivector_table;
+       ino = (upa_readl(imap) & (IMAP_IGN | IMAP_INO)) + inofixup;
+       bucket = &ivector_table[ino];
+       if (!bucket->virt_irq) {
+               bucket->virt_irq = virt_irq_alloc(__irq(bucket));
+               irq_desc[bucket->virt_irq].chip = &sun4u_irq;
+       }
 
-       bucket->flags |= IBF_INPROGRESS;
+       desc = irq_desc + bucket->virt_irq;
+       if (unlikely(desc->handler_data))
+               goto out;
 
-       floppy_interrupt(irq, dev_cookie, regs);
-       upa_writel(ICLR_IDLE, bucket->iclr);
+       data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
+       if (unlikely(!data)) {
+               prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n");
+               prom_halt();
+       }
+       desc->handler_data = data;
 
-       bucket->flags &= ~IBF_INPROGRESS;
+       data->imap  = imap;
+       data->iclr  = iclr;
 
-       irq_exit();
+out:
+       return bucket->virt_irq;
 }
-#endif
-
-/* The following assumes that the branch lies before the place we
- * are branching to.  This is the case for a trap vector...
- * You have been warned.
- */
-#define SPARC_BRANCH(dest_addr, inst_addr) \
-          (0x10800000 | ((((dest_addr)-(inst_addr))>>2)&0x3fffff))
 
-#define SPARC_NOP (0x01000000)
-
-static void install_fast_irq(unsigned int cpu_irq,
-                            irqreturn_t (*handler)(int, void *, struct pt_regs *))
+unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino)
 {
-       extern unsigned long sparc64_ttable_tl0;
-       unsigned long ttent = (unsigned long) &sparc64_ttable_tl0;
-       unsigned int *insns;
-
-       ttent += 0x820;
-       ttent += (cpu_irq - 1) << 5;
-       insns = (unsigned int *) ttent;
-       insns[0] = SPARC_BRANCH(((unsigned long) handler),
-                               ((unsigned long)&insns[0]));
-       insns[1] = SPARC_NOP;
-       __asm__ __volatile__("membar #StoreStore; flush %0" : : "r" (ttent));
-}
+       struct ino_bucket *bucket;
+       struct irq_handler_data *data;
+       unsigned long sysino;
+       irq_desc_t *desc;
 
-int request_fast_irq(unsigned int irq,
-                    irqreturn_t (*handler)(int, void *, struct pt_regs *),
-                    unsigned long irqflags, const char *name, void *dev_id)
-{
-       struct irqaction *action;
-       struct ino_bucket *bucket = __bucket(irq);
-       unsigned long flags;
+       BUG_ON(tlb_type != hypervisor);
 
-       /* No pil0 dummy buckets allowed here. */
-       if (bucket < &ivector_table[0] ||
-           bucket >= &ivector_table[NUM_IVECS]) {
-               unsigned int *caller;
-
-               __asm__ __volatile__("mov %%i7, %0" : "=r" (caller));
-               printk(KERN_CRIT "request_fast_irq: Old style IRQ registry attempt "
-                      "from %p, irq %08x.\n", caller, irq);
-               return -EINVAL;
-       }       
-       
-       if (!handler)
-               return -EINVAL;
-
-       if ((bucket->pil == 0) || (bucket->pil == 14)) {
-               printk("request_fast_irq: Trying to register shared IRQ 0 or 14.\n");
-               return -EBUSY;
+       sysino = sun4v_devino_to_sysino(devhandle, devino);
+       bucket = &ivector_table[sysino];
+       if (!bucket->virt_irq) {
+               bucket->virt_irq = virt_irq_alloc(__irq(bucket));
+               irq_desc[bucket->virt_irq].chip = &sun4v_irq;
        }
 
-       spin_lock_irqsave(&irq_action_lock, flags);
-
-       action = *(bucket->pil + irq_action);
-       if (action) {
-               if (action->flags & SA_SHIRQ)
-                       panic("Trying to register fast irq when already shared.\n");
-               if (irqflags & SA_SHIRQ)
-                       panic("Trying to register fast irq as shared.\n");
-               printk("request_fast_irq: Trying to register yet already owned.\n");
-               spin_unlock_irqrestore(&irq_action_lock, flags);
-               return -EBUSY;
-       }
+       desc = irq_desc + bucket->virt_irq;
+       if (unlikely(desc->handler_data))
+               goto out;
 
-       /*
-        * We do not check for SA_SAMPLE_RANDOM in this path. Neither do we
-        * support smp intr affinity in this path.
-        */
-       if (irqflags & SA_STATIC_ALLOC) {
-               if (static_irq_count < MAX_STATIC_ALLOC)
-                       action = &static_irqaction[static_irq_count++];
-               else
-                       printk("Request for IRQ%d (%s) SA_STATIC_ALLOC failed "
-                              "using kmalloc\n", bucket->pil, name);
-       }
-       if (action == NULL)
-               action = (struct irqaction *)kmalloc(sizeof(struct irqaction),
-                                                    GFP_ATOMIC);
-       if (!action) {
-               spin_unlock_irqrestore(&irq_action_lock, flags);
-               return -ENOMEM;
+       data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
+       if (unlikely(!data)) {
+               prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n");
+               prom_halt();
        }
-       install_fast_irq(bucket->pil, handler);
-
-       bucket->irq_info = action;
-       bucket->flags |= IBF_ACTIVE;
+       desc->handler_data = data;
 
-       action->handler = handler;
-       action->flags = irqflags;
-       action->dev_id = NULL;
-       action->name = name;
-       action->next = NULL;
-       put_ino_in_irqaction(action, irq);
-       put_smpaff_in_irqaction(action, 0);
-
-       *(bucket->pil + irq_action) = action;
-       enable_irq(irq);
-
-       spin_unlock_irqrestore(&irq_action_lock, flags);
+       /* Catch accidental accesses to these things.  IMAP/ICLR handling
+        * is done by hypervisor calls on sun4v platforms, not by direct
+        * register accesses.
+        */
+       data->imap = ~0UL;
+       data->iclr = ~0UL;
 
-#ifdef CONFIG_SMP
-       distribute_irqs();
-#endif
-       return 0;
+out:
+       return bucket->virt_irq;
 }
 
-/* We really don't need these at all on the Sparc.  We only have
- * stubs here because they are exported to modules.
- */
-unsigned long probe_irq_on(void)
+void ack_bad_irq(unsigned int virt_irq)
 {
-       return 0;
-}
+       struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
+       unsigned int ino = 0xdeadbeef;
 
-EXPORT_SYMBOL(probe_irq_on);
+       if (bucket)
+               ino = bucket - &ivector_table[0];
 
-int probe_irq_off(unsigned long mask)
-{
-       return 0;
+       printk(KERN_CRIT "Unexpected IRQ from ino[%x] virt_irq[%u]\n",
+              ino, virt_irq);
 }
 
-EXPORT_SYMBOL(probe_irq_off);
+#ifndef CONFIG_SMP
+extern irqreturn_t timer_interrupt(int, void *);
 
-#ifdef CONFIG_SMP
-static int retarget_one_irq(struct irqaction *p, int goal_cpu)
+void timer_irq(int irq, struct pt_regs *regs)
 {
-       struct ino_bucket *bucket = get_ino_in_irqaction(p) + ivector_table;
-       unsigned long imap = bucket->imap;
-       unsigned int tid;
+       unsigned long clr_mask = 1 << irq;
+       unsigned long tick_mask = tick_ops->softint_mask;
+       struct pt_regs *old_regs;
 
-       while (!cpu_online(goal_cpu)) {
-               if (++goal_cpu >= NR_CPUS)
-                       goal_cpu = 0;
+       if (get_softint() & tick_mask) {
+               irq = 0;
+               clr_mask = tick_mask;
        }
+       clear_softint(clr_mask);
 
-       if (tlb_type == cheetah || tlb_type == cheetah_plus) {
-               tid = goal_cpu << 26;
-               tid &= IMAP_AID_SAFARI;
-       } else if (this_is_starfire == 0) {
-               tid = goal_cpu << 26;
-               tid &= IMAP_TID_UPA;
-       } else {
-               tid = (starfire_translate(imap, goal_cpu) << 26);
-               tid &= IMAP_TID_UPA;
-       }
-       upa_writel(tid | IMAP_VALID, imap);
+       old_regs = set_irq_regs(regs);
+       irq_enter();
 
-       while (!cpu_online(goal_cpu)) {
-               if (++goal_cpu >= NR_CPUS)
-                       goal_cpu = 0;
-       }
+       kstat_this_cpu.irqs[0]++;
+       timer_interrupt(irq, NULL);
 
-       return goal_cpu;
+       irq_exit();
+       set_irq_regs(old_regs);
 }
+#endif
 
-/* Called from request_irq. */
-static void distribute_irqs(void)
+void handler_irq(int irq, struct pt_regs *regs)
 {
-       unsigned long flags;
-       int cpu, level;
+       struct ino_bucket *bucket;
+       struct pt_regs *old_regs;
 
-       spin_lock_irqsave(&irq_action_lock, flags);
-       cpu = 0;
+       clear_softint(1 << irq);
 
-       /*
-        * Skip the timer at [0], and very rare error/power intrs at [15].
-        * Also level [12], it causes problems on Ex000 systems.
-        */
-       for (level = 1; level < NR_IRQS; level++) {
-               struct irqaction *p = irq_action[level];
-               if (level == 12) continue;
-               while(p) {
-                       cpu = retarget_one_irq(p, cpu);
-                       p = p->next;
-               }
+       old_regs = set_irq_regs(regs);
+       irq_enter();
+
+       /* Sliiiick... */
+       bucket = __bucket(xchg32(irq_work(smp_processor_id()), 0));
+       while (bucket) {
+               struct ino_bucket *next = __bucket(bucket->irq_chain);
+
+               bucket->irq_chain = 0;
+               __do_IRQ(bucket->virt_irq);
+
+               bucket = next;
        }
-       spin_unlock_irqrestore(&irq_action_lock, flags);
+
+       irq_exit();
+       set_irq_regs(old_regs);
 }
-#endif
 
+struct sun5_timer {
+       u64     count0;
+       u64     limit0;
+       u64     count1;
+       u64     limit1;
+};
 
-struct sun5_timer *prom_timers;
+static struct sun5_timer *prom_timers;
 static u64 prom_limit0, prom_limit1;
 
 static void map_prom_timers(void)
 {
-       unsigned int addr[3];
-       int tnode, err;
+       struct device_node *dp;
+       unsigned int *addr;
 
        /* PROM timer node hangs out in the top level of device siblings... */
-       tnode = prom_finddevice("/counter-timer");
+       dp = of_find_node_by_path("/");
+       dp = dp->child;
+       while (dp) {
+               if (!strcmp(dp->name, "counter-timer"))
+                       break;
+               dp = dp->sibling;
+       }
 
        /* Assume if node is not present, PROM uses different tick mechanism
         * which we should not care about.
         */
-       if (tnode == 0 || tnode == -1) {
+       if (!dp) {
                prom_timers = (struct sun5_timer *) 0;
                return;
        }
 
        /* If PROM is really using this, it must be mapped by him. */
-       err = prom_getproperty(tnode, "address", (char *)addr, sizeof(addr));
-       if (err == -1) {
+       addr = of_get_property(dp, "address", NULL);
+       if (!addr) {
                prom_printf("PROM does not have timer mapped, trying to continue.\n");
                prom_timers = (struct sun5_timer *) 0;
                return;
@@ -1092,216 +625,149 @@ static void kill_prom_timer(void)
        : "g1", "g2");
 }
 
-void enable_prom_timer(void)
+void init_irqwork_curcpu(void)
 {
-       if (!prom_timers)
-               return;
+       int cpu = hard_smp_processor_id();
 
-       /* Set it to whatever was there before. */
-       prom_timers->limit1 = prom_limit1;
-       prom_timers->count1 = 0;
-       prom_timers->limit0 = prom_limit0;
-       prom_timers->count0 = 0;
+       trap_block[cpu].irq_worklist = 0;
 }
 
-void init_irqwork_curcpu(void)
+static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type)
 {
-       register struct irq_work_struct *workp asm("o2");
-       unsigned long tmp;
-       int cpu = hard_smp_processor_id();
+       unsigned long num_entries = 128;
+       unsigned long status;
 
-       memset(__irq_work + cpu, 0, sizeof(*workp));
-
-       /* Make sure we are called with PSTATE_IE disabled.  */
-       __asm__ __volatile__("rdpr      %%pstate, %0\n\t"
-                            : "=r" (tmp));
-       if (tmp & PSTATE_IE) {
-               prom_printf("BUG: init_irqwork_curcpu() called with "
-                           "PSTATE_IE enabled, bailing.\n");
-               __asm__ __volatile__("mov       %%i7, %0\n\t"
-                                    : "=r" (tmp));
-               prom_printf("BUG: Called from %lx\n", tmp);
+       status = sun4v_cpu_qconf(type, paddr, num_entries);
+       if (status != HV_EOK) {
+               prom_printf("SUN4V: sun4v_cpu_qconf(%lu:%lx:%lu) failed, "
+                           "err %lu\n", type, paddr, num_entries, status);
                prom_halt();
        }
-
-       /* Set interrupt globals.  */
-       workp = &__irq_work[cpu];
-       __asm__ __volatile__(
-       "rdpr   %%pstate, %0\n\t"
-       "wrpr   %0, %1, %%pstate\n\t"
-       "mov    %2, %%g6\n\t"
-       "wrpr   %0, 0x0, %%pstate\n\t"
-       : "=&r" (tmp)
-       : "i" (PSTATE_IG), "r" (workp));
 }
 
-/* Only invoked on boot processor. */
-void __init init_IRQ(void)
+static void __cpuinit sun4v_register_mondo_queues(int this_cpu)
 {
-       map_prom_timers();
-       kill_prom_timer();
-       memset(&ivector_table[0], 0, sizeof(ivector_table));
-
-       /* We need to clear any IRQ's pending in the soft interrupt
-        * registers, a spurious one could be left around from the
-        * PROM timer which we just disabled.
-        */
-       clear_softint(get_softint());
+       struct trap_per_cpu *tb = &trap_block[this_cpu];
 
-       /* Now that ivector table is initialized, it is safe
-        * to receive IRQ vector traps.  We will normally take
-        * one or two right now, in case some device PROM used
-        * to boot us wants to speak to us.  We just ignore them.
-        */
-       __asm__ __volatile__("rdpr      %%pstate, %%g1\n\t"
-                            "or        %%g1, %0, %%g1\n\t"
-                            "wrpr      %%g1, 0x0, %%pstate"
-                            : /* No outputs */
-                            : "i" (PSTATE_IE)
-                            : "g1");
+       register_one_mondo(tb->cpu_mondo_pa, HV_CPU_QUEUE_CPU_MONDO);
+       register_one_mondo(tb->dev_mondo_pa, HV_CPU_QUEUE_DEVICE_MONDO);
+       register_one_mondo(tb->resum_mondo_pa, HV_CPU_QUEUE_RES_ERROR);
+       register_one_mondo(tb->nonresum_mondo_pa, HV_CPU_QUEUE_NONRES_ERROR);
 }
 
-static struct proc_dir_entry * root_irq_dir;
-static struct proc_dir_entry * irq_dir [NUM_IVECS];
-
-#ifdef CONFIG_SMP
-
-#define HEX_DIGITS 16
-
-static unsigned int parse_hex_value (const char __user *buffer,
-               unsigned long count, unsigned long *ret)
+static void __cpuinit alloc_one_mondo(unsigned long *pa_ptr, int use_bootmem)
 {
-       unsigned char hexnum [HEX_DIGITS];
-       unsigned long value;
-       int i;
-
-       if (!count)
-               return -EINVAL;
-       if (count > HEX_DIGITS)
-               count = HEX_DIGITS;
-       if (copy_from_user(hexnum, buffer, count))
-               return -EFAULT;
-
-       /*
-        * Parse the first 8 characters as a hex string, any non-hex char
-        * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same.
-        */
-       value = 0;
+       void *page;
 
-       for (i = 0; i < count; i++) {
-               unsigned int c = hexnum[i];
+       if (use_bootmem)
+               page = alloc_bootmem_low_pages(PAGE_SIZE);
+       else
+               page = (void *) get_zeroed_page(GFP_ATOMIC);
 
-               switch (c) {
-                       case '0' ... '9': c -= '0'; break;
-                       case 'a' ... 'f': c -= 'a'-10; break;
-                       case 'A' ... 'F': c -= 'A'-10; break;
-               default:
-                       goto out;
-               }
-               value = (value << 4) | c;
+       if (!page) {
+               prom_printf("SUN4V: Error, cannot allocate mondo queue.\n");
+               prom_halt();
        }
-out:
-       *ret = value;
-       return 0;
-}
 
-static int irq_affinity_read_proc (char *page, char **start, off_t off,
-                       int count, int *eof, void *data)
-{
-       struct ino_bucket *bp = ivector_table + (long)data;
-       struct irqaction *ap = bp->irq_info;
-       cpumask_t mask = get_smpaff_in_irqaction(ap);
-       int len;
-
-       if (cpus_empty(mask))
-               mask = cpu_online_map;
-
-       len = cpumask_scnprintf(page, count, mask);
-       if (count - len < 2)
-               return -EINVAL;
-       len += sprintf(page + len, "\n");
-       return len;
+       *pa_ptr = __pa(page);
 }
 
-static inline void set_intr_affinity(int irq, unsigned long hw_aff)
+static void __cpuinit alloc_one_kbuf(unsigned long *pa_ptr, int use_bootmem)
 {
-       struct ino_bucket *bp = ivector_table + irq;
+       void *page;
 
-       /* Users specify affinity in terms of hw cpu ids.
-        * As soon as we do this, handler_irq() might see and take action.
-        */
-       put_smpaff_in_irqaction((struct irqaction *)bp->irq_info, hw_aff);
+       if (use_bootmem)
+               page = alloc_bootmem_low_pages(PAGE_SIZE);
+       else
+               page = (void *) get_zeroed_page(GFP_ATOMIC);
 
-       /* Migration is simply done by the next cpu to service this
-        * interrupt.
-        */
+       if (!page) {
+               prom_printf("SUN4V: Error, cannot allocate kbuf page.\n");
+               prom_halt();
+       }
+
+       *pa_ptr = __pa(page);
 }
 
-static int irq_affinity_write_proc (struct file *file, const char __user *buffer,
-                                       unsigned long count, void *data)
+static void __cpuinit init_cpu_send_mondo_info(struct trap_per_cpu *tb, int use_bootmem)
 {
-       int irq = (long) data, full_count = count, err;
-       unsigned long new_value, i;
-
-       err = parse_hex_value(buffer, count, &new_value);
-
-       /*
-        * Do not allow disabling IRQs completely - it's a too easy
-        * way to make the system unusable accidentally :-) At least
-        * one online CPU still has to be targeted.
-        */
-       for (i = 0; i < NR_CPUS; i++) {
-               if ((new_value & (1UL << i)) != 0 &&
-                   !cpu_online(i))
-                       new_value &= ~(1UL << i);
-       }
+#ifdef CONFIG_SMP
+       void *page;
 
-       if (!new_value)
-               return -EINVAL;
+       BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64));
 
-       set_intr_affinity(irq, new_value);
+       if (use_bootmem)
+               page = alloc_bootmem_low_pages(PAGE_SIZE);
+       else
+               page = (void *) get_zeroed_page(GFP_ATOMIC);
 
-       return full_count;
-}
+       if (!page) {
+               prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n");
+               prom_halt();
+       }
 
+       tb->cpu_mondo_block_pa = __pa(page);
+       tb->cpu_list_pa = __pa(page + 64);
 #endif
+}
 
-#define MAX_NAMELEN 10
-
-static void register_irq_proc (unsigned int irq)
+/* Allocate and register the mondo and error queues for this cpu.  */
+void __cpuinit sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load)
 {
-       char name [MAX_NAMELEN];
-
-       if (!root_irq_dir || irq_dir[irq])
-               return;
+       struct trap_per_cpu *tb = &trap_block[cpu];
 
-       memset(name, 0, MAX_NAMELEN);
-       sprintf(name, "%x", irq);
+       if (alloc) {
+               alloc_one_mondo(&tb->cpu_mondo_pa, use_bootmem);
+               alloc_one_mondo(&tb->dev_mondo_pa, use_bootmem);
+               alloc_one_mondo(&tb->resum_mondo_pa, use_bootmem);
+               alloc_one_kbuf(&tb->resum_kernel_buf_pa, use_bootmem);
+               alloc_one_mondo(&tb->nonresum_mondo_pa, use_bootmem);
+               alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, use_bootmem);
 
-       /* create /proc/irq/1234 */
-       irq_dir[irq] = proc_mkdir(name, root_irq_dir);
+               init_cpu_send_mondo_info(tb, use_bootmem);
+       }
 
-#ifdef CONFIG_SMP
-       /* XXX SMP affinity not supported on starfire yet. */
-       if (this_is_starfire == 0) {
-               struct proc_dir_entry *entry;
-
-               /* create /proc/irq/1234/smp_affinity */
-               entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
-
-               if (entry) {
-                       entry->nlink = 1;
-                       entry->data = (void *)(long)irq;
-                       entry->read_proc = irq_affinity_read_proc;
-                       entry->write_proc = irq_affinity_write_proc;
+       if (load) {
+               if (cpu != hard_smp_processor_id()) {
+                       prom_printf("SUN4V: init mondo on cpu %d not %d\n",
+                                   cpu, hard_smp_processor_id());
+                       prom_halt();
                }
+               sun4v_register_mondo_queues(cpu);
        }
-#endif
 }
 
-void init_irq_proc (void)
+static struct irqaction timer_irq_action = {
+       .name = "timer",
+};
+
+/* Only invoked on boot processor. */
+void __init init_IRQ(void)
 {
-       /* create /proc/irq */
-       root_irq_dir = proc_mkdir("irq", 0);
-}
+       map_prom_timers();
+       kill_prom_timer();
+       memset(&ivector_table[0], 0, sizeof(ivector_table));
+
+       if (tlb_type == hypervisor)
+               sun4v_init_mondo_queues(1, hard_smp_processor_id(), 1, 1);
+
+       /* We need to clear any IRQ's pending in the soft interrupt
+        * registers, a spurious one could be left around from the
+        * PROM timer which we just disabled.
+        */
+       clear_softint(get_softint());
+
+       /* Now that ivector table is initialized, it is safe
+        * to receive IRQ vector traps.  We will normally take
+        * one or two right now, in case some device PROM used
+        * to boot us wants to speak to us.  We just ignore them.
+        */
+       __asm__ __volatile__("rdpr      %%pstate, %%g1\n\t"
+                            "or        %%g1, %0, %%g1\n\t"
+                            "wrpr      %%g1, 0x0, %%pstate"
+                            : /* No outputs */
+                            : "i" (PSTATE_IE)
+                            : "g1");
 
+       irq_desc[0].action = &timer_irq_action;
+}