fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / arch / ia64 / kernel / irq_ia64.c
index 6c4d59f..247c272 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * linux/arch/ia64/kernel/irq.c
+ * linux/arch/ia64/kernel/irq_ia64.c
  *
  * Copyright (C) 1998-2001 Hewlett-Packard Co
  *     Stephane Eranian <eranian@hpl.hp.com>
@@ -14,7 +14,6 @@
  *                                             Added CPU Hotplug handling for IPF.
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 
 #include <linux/jiffies.h>
 #include <linux/smp_lock.h>
 #include <linux/threads.h>
 #include <linux/bitops.h>
+#include <linux/irq.h>
+#ifdef CONFIG_XEN
+#include <linux/cpu.h>
+#endif
+
 
 #include <asm/delay.h>
 #include <asm/intrinsics.h>
 
 #define IRQ_DEBUG      0
 
+/* These can be overridden in platform_irq_init */
+int ia64_first_device_vector = IA64_DEF_FIRST_DEVICE_VECTOR;
+int ia64_last_device_vector = IA64_DEF_LAST_DEVICE_VECTOR;
+
 /* default base addr of IPI table */
 void __iomem *ipi_base_addr = ((void __iomem *)
                               (__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR));
@@ -60,12 +68,19 @@ __u8 isa_irq_to_vector_map[16] = {
 };
 EXPORT_SYMBOL(isa_irq_to_vector_map);
 
-static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_NUM_DEVICE_VECTORS)];
+static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_MAX_DEVICE_VECTORS)];
 
 int
 assign_irq_vector (int irq)
 {
        int pos, vector;
+
+#ifdef CONFIG_XEN
+       if (is_running_on_xen()) {
+               extern int xen_assign_irq_vector(int);
+               return xen_assign_irq_vector(irq);
+       }
+#endif
  again:
        pos = find_first_zero_bit(ia64_vector_mask, IA64_NUM_DEVICE_VECTORS);
        vector = IA64_FIRST_DEVICE_VECTOR + pos;
@@ -89,6 +104,38 @@ free_irq_vector (int vector)
                printk(KERN_WARNING "%s: double free!\n", __FUNCTION__);
 }
 
+int
+reserve_irq_vector (int vector)
+{
+       int pos;
+
+       if (vector < IA64_FIRST_DEVICE_VECTOR ||
+           vector > IA64_LAST_DEVICE_VECTOR)
+               return -EINVAL;
+
+       pos = vector - IA64_FIRST_DEVICE_VECTOR;
+       return test_and_set_bit(pos, ia64_vector_mask);
+}
+
+/*
+ * Dynamic irq allocate and deallocation for MSI
+ */
+int create_irq(void)
+{
+       int vector = assign_irq_vector(AUTO_ASSIGN);
+
+       if (vector >= 0)
+               dynamic_irq_init(vector);
+
+       return vector;
+}
+
+void destroy_irq(unsigned int irq)
+{
+       dynamic_irq_cleanup(irq);
+       free_irq_vector(irq);
+}
+
 #ifdef CONFIG_SMP
 #      define IS_RESCHEDULE(vec)       (vec == IA64_IPI_RESCHEDULE)
 #else
@@ -102,6 +149,7 @@ free_irq_vector (int vector)
 void
 ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
 {
+       struct pt_regs *old_regs = set_irq_regs(regs);
        unsigned long saved_tpr;
 
 #if IRQ_DEBUG
@@ -143,11 +191,13 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
        saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
        ia64_srlz_d();
        while (vector != IA64_SPURIOUS_INT_VECTOR) {
-               if (!IS_RESCHEDULE(vector)) {
+               if (unlikely(IS_RESCHEDULE(vector)))
+                        kstat_this_cpu.irqs[vector]++;
+               else {
                        ia64_setreg(_IA64_REG_CR_TPR, vector);
                        ia64_srlz_d();
 
-                       __do_IRQ(local_vector_to_irq(vector), regs);
+                       generic_handle_irq(local_vector_to_irq(vector));
 
                        /*
                         * Disable interrupts and send EOI:
@@ -164,6 +214,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
         * come through until ia64_eoi() has been done.
         */
        irq_exit();
+       set_irq_regs(old_regs);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -187,7 +238,11 @@ void ia64_process_pending_intr(void)
          * Perform normal interrupt style processing
          */
        while (vector != IA64_SPURIOUS_INT_VECTOR) {
-               if (!IS_RESCHEDULE(vector)) {
+               if (unlikely(IS_RESCHEDULE(vector)))
+                        kstat_this_cpu.irqs[vector]++;
+               else {
+                       struct pt_regs *old_regs = set_irq_regs(NULL);
+
                        ia64_setreg(_IA64_REG_CR_TPR, vector);
                        ia64_srlz_d();
 
@@ -198,7 +253,8 @@ void ia64_process_pending_intr(void)
                         * Probably could shared code.
                         */
                        vectors_in_migration[local_vector_to_irq(vector)]=0;
-                       __do_IRQ(local_vector_to_irq(vector), NULL);
+                       generic_handle_irq(local_vector_to_irq(vector));
+                       set_irq_regs(old_regs);
 
                        /*
                         * Disable interrupts and send EOI
@@ -215,26 +271,238 @@ void ia64_process_pending_intr(void)
 
 
 #ifdef CONFIG_SMP
-extern irqreturn_t handle_IPI (int irq, void *dev_id, struct pt_regs *regs);
+extern irqreturn_t handle_IPI (int irq, void *dev_id);
+
+static irqreturn_t dummy_handler (int irq, void *dev_id)
+{
+       BUG();
+}
 
 static struct irqaction ipi_irqaction = {
        .handler =      handle_IPI,
-       .flags =        SA_INTERRUPT,
+       .flags =        IRQF_DISABLED,
        .name =         "IPI"
 };
+
+static struct irqaction resched_irqaction = {
+       .handler =      dummy_handler,
+       .flags =        SA_INTERRUPT,
+       .name =         "resched"
+};
+#endif
+
+#ifdef CONFIG_XEN
+#include <xen/evtchn.h>
+#include <xen/interface/callback.h>
+
+static DEFINE_PER_CPU(int, timer_irq) = -1;
+static DEFINE_PER_CPU(int, ipi_irq) = -1;
+static DEFINE_PER_CPU(int, resched_irq) = -1;
+static char timer_name[NR_CPUS][15];
+static char ipi_name[NR_CPUS][15];
+static char resched_name[NR_CPUS][15];
+
+struct saved_irq {
+       unsigned int irq;
+       struct irqaction *action;
+};
+/* 16 should be far optimistic value, since only several percpu irqs
+ * are registered early.
+ */
+#define MAX_LATE_IRQ   16
+static struct saved_irq saved_percpu_irqs[MAX_LATE_IRQ];
+static unsigned short late_irq_cnt = 0;
+static unsigned short saved_irq_cnt = 0;
+static int xen_slab_ready = 0;
+
+#ifdef CONFIG_SMP
+/* Dummy stub. Though we may check RESCHEDULE_VECTOR before __do_IRQ,
+ * it ends up to issue several memory accesses upon percpu data and
+ * thus adds unnecessary traffic to other paths.
+ */
+static irqreturn_t
+handle_reschedule(int irq, void *dev_id, struct pt_regs *regs)
+{
+
+       return IRQ_HANDLED;
+}
+
+static struct irqaction resched_irqaction = {
+       .handler =      handle_reschedule,
+       .flags =        SA_INTERRUPT,
+       .name =         "RESCHED"
+};
+#endif
+
+/*
+ * This is xen version percpu irq registration, which needs bind
+ * to xen specific evtchn sub-system. One trick here is that xen
+ * evtchn binding interface depends on kmalloc because related
+ * port needs to be freed at device/cpu down. So we cache the
+ * registration on BSP before slab is ready and then deal them
+ * at later point. For rest instances happening after slab ready,
+ * we hook them to xen evtchn immediately.
+ *
+ * FIXME: MCA is not supported by far, and thus "nomca" boot param is
+ * required.
+ */
+static void
+xen_register_percpu_irq (unsigned int irq, struct irqaction *action, int save)
+{
+       unsigned int cpu = smp_processor_id();
+       int ret = 0;
+
+       if (xen_slab_ready) {
+               switch (irq) {
+               case IA64_TIMER_VECTOR:
+                       sprintf(timer_name[cpu], "%s%d", action->name, cpu);
+                       ret = bind_virq_to_irqhandler(VIRQ_ITC, cpu,
+                               action->handler, action->flags,
+                               timer_name[cpu], action->dev_id);
+                       per_cpu(timer_irq,cpu) = ret;
+                       printk(KERN_INFO "register VIRQ_ITC (%s) to xen irq (%d)\n", timer_name[cpu], ret);
+                       break;
+               case IA64_IPI_RESCHEDULE:
+                       sprintf(resched_name[cpu], "%s%d", action->name, cpu);
+                       ret = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR, cpu,
+                               action->handler, action->flags,
+                               resched_name[cpu], action->dev_id);
+                       per_cpu(resched_irq,cpu) = ret;
+                       printk(KERN_INFO "register RESCHEDULE_VECTOR (%s) to xen irq (%d)\n", resched_name[cpu], ret);
+                       break;
+               case IA64_IPI_VECTOR:
+                       sprintf(ipi_name[cpu], "%s%d", action->name, cpu);
+                       ret = bind_ipi_to_irqhandler(IPI_VECTOR, cpu,
+                               action->handler, action->flags,
+                               ipi_name[cpu], action->dev_id);
+                       per_cpu(ipi_irq,cpu) = ret;
+                       printk(KERN_INFO "register IPI_VECTOR (%s) to xen irq (%d)\n", ipi_name[cpu], ret);
+                       break;
+               case IA64_SPURIOUS_INT_VECTOR:
+                       break;
+               default:
+                       printk(KERN_WARNING "Percpu irq %d is unsupported by xen!\n", irq);
+                       break;
+               }
+               BUG_ON(ret < 0);
+       } 
+
+       /* For BSP, we cache registered percpu irqs, and then re-walk
+        * them when initializing APs
+        */
+       if (!cpu && save) {
+               BUG_ON(saved_irq_cnt == MAX_LATE_IRQ);
+               saved_percpu_irqs[saved_irq_cnt].irq = irq;
+               saved_percpu_irqs[saved_irq_cnt].action = action;
+               saved_irq_cnt++;
+               if (!xen_slab_ready)
+                       late_irq_cnt++;
+       }
+}
+
+static void
+xen_bind_early_percpu_irq (void)
+{
+       int i;
+
+       xen_slab_ready = 1;
+       /* There's no race when accessing this cached array, since only
+        * BSP will face with such step shortly
+        */
+       for (i = 0; i < late_irq_cnt; i++)
+               xen_register_percpu_irq(saved_percpu_irqs[i].irq,
+                                       saved_percpu_irqs[i].action, 0);
+}
+
+/* FIXME: There's no obvious point to check whether slab is ready. So
+ * a hack is used here by utilizing a late time hook.
+ */
+extern void (*late_time_init)(void);
+extern char xen_event_callback;
+extern void xen_init_IRQ(void);
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int __devinit
+unbind_evtchn_callback(struct notifier_block *nfb,
+                       unsigned long action, void *hcpu)
+{
+       unsigned int cpu = (unsigned long)hcpu;
+
+       if (action == CPU_DEAD) {
+               /* Unregister evtchn.  */
+               if (per_cpu(ipi_irq,cpu) >= 0) {
+                       unbind_from_irqhandler (per_cpu(ipi_irq, cpu), NULL);
+                       per_cpu(ipi_irq, cpu) = -1;
+               }
+               if (per_cpu(resched_irq,cpu) >= 0) {
+                       unbind_from_irqhandler (per_cpu(resched_irq, cpu),
+                                               NULL);
+                       per_cpu(resched_irq, cpu) = -1;
+               }
+               if (per_cpu(timer_irq,cpu) >= 0) {
+                       unbind_from_irqhandler (per_cpu(timer_irq, cpu), NULL);
+                       per_cpu(timer_irq, cpu) = -1;
+               }
+       }
+       return NOTIFY_OK;
+}
+
+static struct notifier_block unbind_evtchn_notifier = {
+       .notifier_call = unbind_evtchn_callback,
+       .priority = 0
+};
 #endif
 
+DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
+void xen_smp_intr_init(void)
+{
+#ifdef CONFIG_SMP
+       unsigned int cpu = smp_processor_id();
+       unsigned int i = 0;
+       struct callback_register event = {
+               .type = CALLBACKTYPE_event,
+               .address = (unsigned long)&xen_event_callback,
+       };
+
+       if (cpu == 0) {
+               /* Initialization was already done for boot cpu.  */
+#ifdef CONFIG_HOTPLUG_CPU
+               /* Register the notifier only once.  */
+               register_cpu_notifier(&unbind_evtchn_notifier);
+#endif
+               return;
+       }
+
+       /* This should be piggyback when setup vcpu guest context */
+       BUG_ON(HYPERVISOR_callback_op(CALLBACKOP_register, &event));
+
+       for (i = 0; i < saved_irq_cnt; i++)
+               xen_register_percpu_irq(saved_percpu_irqs[i].irq,
+                                       saved_percpu_irqs[i].action, 0);
+#endif /* CONFIG_SMP */
+}
+#endif /* CONFIG_XEN */
+
 void
 register_percpu_irq (ia64_vector vec, struct irqaction *action)
 {
        irq_desc_t *desc;
        unsigned int irq;
 
+#ifdef CONFIG_XEN
+       if (is_running_on_xen())
+               return xen_register_percpu_irq(vec, action, 1);
+#endif
+
        for (irq = 0; irq < NR_IRQS; ++irq)
                if (irq_to_vector(irq) == vec) {
-                       desc = irq_descp(irq);
+#ifdef CONFIG_XEN
+                       if (is_running_on_xen())
+                               return xen_register_percpu_irq(vec, action, 1);
+#endif
+                       desc = irq_desc + irq;
                        desc->status |= IRQ_PER_CPU;
-                       desc->handler = &irq_type_ia64_lsapic;
+                       desc->chip = &irq_type_ia64_lsapic;
                        if (action)
                                setup_irq(irq, action);
                }
@@ -243,9 +511,25 @@ register_percpu_irq (ia64_vector vec, struct irqaction *action)
 void __init
 init_IRQ (void)
 {
+#ifdef CONFIG_XEN
+       /* Maybe put into platform_irq_init later */
+       if (is_running_on_xen()) {
+               struct callback_register event = {
+                       .type = CALLBACKTYPE_event,
+                       .address = (unsigned long)&xen_event_callback,
+               };
+               xen_init_IRQ();
+               BUG_ON(HYPERVISOR_callback_op(CALLBACKOP_register, &event));
+               late_time_init = xen_bind_early_percpu_irq;
+#ifdef CONFIG_SMP
+               register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction);
+#endif /* CONFIG_SMP */
+       }
+#endif /* CONFIG_XEN */
        register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL);
 #ifdef CONFIG_SMP
        register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction);
+       register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction);
 #endif
 #ifdef CONFIG_PERFMON
        pfm_init_percpu();
@@ -260,6 +544,39 @@ ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect)
        unsigned long ipi_data;
        unsigned long phys_cpu_id;
 
+#ifdef CONFIG_XEN
+        if (is_running_on_xen()) {
+               int irq = -1;
+
+#ifdef CONFIG_SMP
+               /* TODO: we need to call vcpu_up here */
+               if (unlikely(vector == ap_wakeup_vector)) {
+                       extern void xen_send_ipi (int cpu, int vec);
+                       xen_send_ipi (cpu, vector);
+                       //vcpu_prepare_and_up(cpu);
+                       return;
+               }
+#endif
+
+               switch(vector) {
+               case IA64_IPI_VECTOR:
+                       irq = per_cpu(ipi_to_irq, cpu)[IPI_VECTOR];
+                       break;
+               case IA64_IPI_RESCHEDULE:
+                       irq = per_cpu(ipi_to_irq, cpu)[RESCHEDULE_VECTOR];
+                       break;
+               default:
+                       printk(KERN_WARNING"Unsupported IPI type 0x%x\n", vector);
+                       irq = 0;
+                       break;
+               }               
+       
+               BUG_ON(irq < 0);
+               notify_remote_via_irq(irq);
+               return;
+        }
+#endif /* CONFIG_XEN */
+
 #ifdef CONFIG_SMP
        phys_cpu_id = cpu_physical_id(cpu);
 #else