X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fia64%2Fkernel%2Fiosapic.c;h=60ff56dddcf3a4243df79751886055b85b5127b0;hb=refs%2Fheads%2Fvserver;hp=111dad9175ba10d3291ad3a64275eb56e75b8e29;hpb=6a77f38946aaee1cd85eeec6cf4229b204c15071;p=linux-2.6.git diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 111dad917..60ff56ddd 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -9,54 +9,65 @@ * Copyright (C) 1999 VA Linux Systems * Copyright (C) 1999,2000 Walt Drummond * - * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O APIC code. - * In particular, we now have separate handlers for edge - * and level triggered interrupts. - * 00/10/27 Asit Mallick, Goutham Rao IRQ vector allocation - * PCI to vector mapping, shared PCI interrupts. - * 00/10/27 D. Mosberger Document things a bit more to make them more understandable. - * Clean up much of the old IOSAPIC cruft. - * 01/07/27 J.I. Lee PCI irq routing, Platform/Legacy interrupts and fixes for - * ACPI S5(SoftOff) support. + * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O + * APIC code. In particular, we now have separate + * handlers for edge and level triggered + * interrupts. + * 00/10/27 Asit Mallick, Goutham Rao IRQ vector + * allocation PCI to vector mapping, shared PCI + * interrupts. + * 00/10/27 D. Mosberger Document things a bit more to make them more + * understandable. Clean up much of the old + * IOSAPIC cruft. + * 01/07/27 J.I. Lee PCI irq routing, Platform/Legacy interrupts + * and fixes for ACPI S5(SoftOff) support. * 02/01/23 J.I. Lee iosapic pgm fixes for PCI irq routing from _PRT - * 02/01/07 E. Focht Redirectable interrupt vectors in - * iosapic_set_affinity(), initializations for - * /proc/irq/#/smp_affinity + * 02/01/07 E. Focht Redirectable interrupt + * vectors in iosapic_set_affinity(), + * initializations for /proc/irq/#/smp_affinity * 02/04/02 P. Diefenbaugh Cleaned up ACPI PCI IRQ routing. * 02/04/18 J.I. Lee bug fix in iosapic_init_pci_irq - * 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to IOSAPIC mapping - * error + * 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to + * IOSAPIC mapping error * 02/07/29 T. Kochi Allocate interrupt vectors dynamically - * 02/08/04 T. Kochi Cleaned up terminology (irq, global system interrupt, vector, etc.) - * 02/09/20 D. Mosberger Simplified by taking advantage of ACPI's pci_irq code. + * 02/08/04 T. Kochi Cleaned up terminology (irq, global system + * interrupt, vector, etc.) + * 02/09/20 D. Mosberger Simplified by taking advantage of ACPI's + * pci_irq code. * 03/02/19 B. Helgaas Make pcat_compat system-wide, not per-IOSAPIC. - * Remove iosapic_address & gsi_base from external interfaces. - * Rationalize __init/__devinit attributes. + * Remove iosapic_address & gsi_base from + * external interfaces. Rationalize + * __init/__devinit attributes. * 04/12/04 Ashok Raj Intel Corporation 2004 - * Updated to work with irq migration necessary for CPU Hotplug + * Updated to work with irq migration necessary + * for CPU Hotplug */ /* - * Here is what the interrupt logic between a PCI device and the kernel looks like: + * Here is what the interrupt logic between a PCI device and the kernel looks + * like: * - * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, INTD). The - * device is uniquely identified by its bus--, and slot-number (the function - * number does not matter here because all functions share the same interrupt - * lines). + * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, + * INTD). The device is uniquely identified by its bus-, and slot-number + * (the function number does not matter here because all functions share + * the same interrupt lines). * - * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC controller. - * Multiple interrupt lines may have to share the same IOSAPIC pin (if they're level - * triggered and use the same polarity). Each interrupt line has a unique Global - * System Interrupt (GSI) number which can be calculated as the sum of the controller's - * base GSI number and the IOSAPIC pin number to which the line connects. + * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC + * controller. Multiple interrupt lines may have to share the same + * IOSAPIC pin (if they're level triggered and use the same polarity). + * Each interrupt line has a unique Global System Interrupt (GSI) number + * which can be calculated as the sum of the controller's base GSI number + * and the IOSAPIC pin number to which the line connects. * - * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the IOSAPIC pin - * into the IA-64 interrupt vector. This interrupt vector is then sent to the CPU. + * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the + * IOSAPIC pin into the IA-64 interrupt vector. This interrupt vector is then + * sent to the CPU. * - * (4) The kernel recognizes an interrupt as an IRQ. The IRQ interface is used as - * architecture-independent interrupt handling mechanism in Linux. As an - * IRQ is a number, we have to have IA-64 interrupt vector number <-> IRQ number - * mapping. On smaller systems, we use one-to-one mapping between IA-64 vector and - * IRQ. A platform can implement platform_irq_to_vector(irq) and + * (4) The kernel recognizes an interrupt as an IRQ. The IRQ interface is + * used as architecture-independent interrupt handling mechanism in Linux. + * As an IRQ is a number, we have to have + * IA-64 interrupt vector number <-> IRQ number mapping. On smaller + * systems, we use one-to-one mapping between IA-64 vector and IRQ. A + * platform can implement platform_irq_to_vector(irq) and * platform_local_vector_to_irq(vector) APIs to differentiate the mapping. * Please see also include/asm-ia64/hw_irq.h for those APIs. * @@ -64,11 +75,10 @@ * * PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ * - * Note: The term "IRQ" is loosely used everywhere in Linux kernel to describe interrupts. - * Now we use "IRQ" only for Linux IRQ's. ISA IRQ (isa_irq) is the only exception in this - * source code. + * Note: The term "IRQ" is loosely used everywhere in Linux kernel to + * describeinterrupts. Now we use "IRQ" only for Linux IRQ's. ISA IRQ + * (isa_irq) is the only exception in this source code. */ -#include #include #include @@ -79,6 +89,7 @@ #include #include #include +#include #include #include @@ -89,9 +100,7 @@ #include #include - #undef DEBUG_INTERRUPT_ROUTING -#undef OVERRIDE_DEBUG #ifdef DEBUG_INTERRUPT_ROUTING #define DBG(fmt...) printk(fmt) @@ -99,34 +108,115 @@ #define DBG(fmt...) #endif +#define NR_PREALLOCATE_RTE_ENTRIES \ + (PAGE_SIZE / sizeof(struct iosapic_rte_info)) +#define RTE_PREALLOCATED (1) + static DEFINE_SPINLOCK(iosapic_lock); -/* These tables map IA-64 vectors to the IOSAPIC pin that generates this vector. */ +/* + * These tables map IA-64 vectors to the IOSAPIC pin that generates this + * vector. + */ -static struct iosapic_intr_info { +struct iosapic_rte_info { + struct list_head rte_list; /* node in list of RTEs sharing the + * same vector */ char __iomem *addr; /* base address of IOSAPIC */ - u32 low32; /* current value of low word of Redirection table entry */ - unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */ - char rte_index; /* IOSAPIC RTE index (-1 => not an IOSAPIC interrupt) */ + unsigned int gsi_base; /* first GSI assigned to this + * IOSAPIC */ + char rte_index; /* IOSAPIC RTE index */ + int refcnt; /* reference counter */ + unsigned int flags; /* flags */ +} ____cacheline_aligned; + +static struct iosapic_intr_info { + struct list_head rtes; /* RTEs using this vector (empty => + * not an IOSAPIC interrupt) */ + int count; /* # of RTEs that shares this vector */ + u32 low32; /* current value of low word of + * Redirection table entry */ + unsigned int dest; /* destination CPU physical ID */ unsigned char dmode : 3; /* delivery mode (see iosapic.h) */ - unsigned char polarity: 1; /* interrupt polarity (see iosapic.h) */ + unsigned char polarity: 1; /* interrupt polarity + * (see iosapic.h) */ unsigned char trigger : 1; /* trigger mode (see iosapic.h) */ - int refcnt; /* reference counter */ } iosapic_intr_info[IA64_NUM_VECTORS]; static struct iosapic { char __iomem *addr; /* base address of IOSAPIC */ - unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */ - unsigned short num_rte; /* number of RTE in this IOSAPIC */ + unsigned int gsi_base; /* first GSI assigned to this + * IOSAPIC */ + unsigned short num_rte; /* # of RTEs on this IOSAPIC */ + int rtes_inuse; /* # of RTEs in use on this IOSAPIC */ #ifdef CONFIG_NUMA unsigned short node; /* numa node association via pxm */ #endif } iosapic_lists[NR_IOSAPICS]; -static int num_iosapic; +static unsigned char pcat_compat __devinitdata; /* 8259 compatibility flag */ + +static int iosapic_kmalloc_ok; +static LIST_HEAD(free_rte_list); -static unsigned char pcat_compat __initdata; /* 8259 compatibility flag */ +#ifdef CONFIG_XEN +#include +#include +#include +static inline unsigned int xen_iosapic_read(char __iomem *iosapic, unsigned int reg) +{ + struct physdev_apic apic_op; + int ret; + + apic_op.apic_physbase = (unsigned long)iosapic - + __IA64_UNCACHED_OFFSET; + apic_op.reg = reg; + ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op); + if (ret) + return ret; + return apic_op.value; +} +static inline void xen_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) +{ + struct physdev_apic apic_op; + + apic_op.apic_physbase = (unsigned long)iosapic - + __IA64_UNCACHED_OFFSET; + apic_op.reg = reg; + apic_op.value = val; + HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op); +} + +static inline unsigned int iosapic_read(char __iomem *iosapic, unsigned int reg) +{ + if (!is_running_on_xen()) { + writel(reg, iosapic + IOSAPIC_REG_SELECT); + return readl(iosapic + IOSAPIC_WINDOW); + } else + return xen_iosapic_read(iosapic, reg); +} + +static inline void iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) +{ + if (!is_running_on_xen()) { + writel(reg, iosapic + IOSAPIC_REG_SELECT); + writel(val, iosapic + IOSAPIC_WINDOW); + } else + xen_iosapic_write(iosapic, reg, val); +} + +int xen_assign_irq_vector(int irq) +{ + struct physdev_irq irq_op; + + irq_op.irq = irq; + if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) + return -ENOSPC; + + return irq_op.vector; +} +#endif /* XEN */ /* * Find an IOSAPIC associated with a GSI @@ -136,8 +226,9 @@ find_iosapic (unsigned int gsi) { int i; - for (i = 0; i < num_iosapic; i++) { - if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < iosapic_lists[i].num_rte) + for (i = 0; i < NR_IOSAPICS; i++) { + if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < + iosapic_lists[i].num_rte) return i; } @@ -148,10 +239,13 @@ static inline int _gsi_to_vector (unsigned int gsi) { struct iosapic_intr_info *info; + struct iosapic_rte_info *rte; - for (info = iosapic_intr_info; info < iosapic_intr_info + IA64_NUM_VECTORS; ++info) - if (info->gsi_base + info->rte_index == gsi) - return info - iosapic_intr_info; + for (info = iosapic_intr_info; info < + iosapic_intr_info + IA64_NUM_VECTORS; ++info) + list_for_each_entry(rte, &info->rtes, rte_list) + if (rte->gsi_base + rte->rte_index == gsi) + return info - iosapic_intr_info; return -1; } @@ -168,33 +262,53 @@ gsi_to_vector (unsigned int gsi) int gsi_to_irq (unsigned int gsi) { + unsigned long flags; + int irq; /* - * XXX fix me: this assumes an identity mapping vetween IA-64 vector and Linux irq - * numbers... + * XXX fix me: this assumes an identity mapping between IA-64 vector + * and Linux irq numbers... */ - return _gsi_to_vector(gsi); + spin_lock_irqsave(&iosapic_lock, flags); + { + irq = _gsi_to_vector(gsi); + } + spin_unlock_irqrestore(&iosapic_lock, flags); + + return irq; +} + +static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi, + unsigned int vec) +{ + struct iosapic_rte_info *rte; + + list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) + if (rte->gsi_base + rte->rte_index == gsi) + return rte; + return NULL; } static void -set_rte (unsigned int vector, unsigned int dest, int mask) +set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask) { unsigned long pol, trigger, dmode; u32 low32, high32; char __iomem *addr; int rte_index; char redir; + struct iosapic_rte_info *rte; DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest); - rte_index = iosapic_intr_info[vector].rte_index; - if (rte_index < 0) + rte = gsi_vector_to_rte(gsi, vector); + if (!rte) return; /* not an IOSAPIC interrupt */ - addr = iosapic_intr_info[vector].addr; + rte_index = rte->rte_index; + addr = rte->addr; pol = iosapic_intr_info[vector].polarity; trigger = iosapic_intr_info[vector].trigger; dmode = iosapic_intr_info[vector].dmode; - vector &= (~IA64_IRQ_REDIRECTED); redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0; @@ -204,7 +318,9 @@ set_rte (unsigned int vector, unsigned int dest, int mask) for (irq = 0; irq < NR_IRQS; ++irq) if (irq_to_vector(irq) == vector) { - set_irq_affinity_info(irq, (int)(dest & 0xffff), redir); + set_irq_affinity_info(irq, + (int)(dest & 0xffff), + redir); break; } } @@ -222,14 +338,36 @@ set_rte (unsigned int vector, unsigned int dest, int mask) iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32); iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); iosapic_intr_info[vector].low32 = low32; + iosapic_intr_info[vector].dest = dest; } static void -nop (unsigned int vector) +nop (unsigned int irq) { /* do nothing... */ } + +#ifdef CONFIG_KEXEC +void +kexec_disable_iosapic(void) +{ + struct iosapic_intr_info *info; + struct iosapic_rte_info *rte; + u8 vec = 0; + for (info = iosapic_intr_info; info < + iosapic_intr_info + IA64_NUM_VECTORS; ++info, ++vec) { + list_for_each_entry(rte, &info->rtes, + rte_list) { + iosapic_write(rte->addr, + IOSAPIC_RTE_LOW(rte->rte_index), + IOSAPIC_MASK|vec); + iosapic_eoi(rte->addr, vec); + } + } +} +#endif + static void mask_irq (unsigned int irq) { @@ -238,18 +376,21 @@ mask_irq (unsigned int irq) u32 low32; int rte_index; ia64_vector vec = irq_to_vector(irq); + struct iosapic_rte_info *rte; - addr = iosapic_intr_info[vec].addr; - rte_index = iosapic_intr_info[vec].rte_index; - - if (rte_index < 0) + if (list_empty(&iosapic_intr_info[vec].rtes)) return; /* not an IOSAPIC interrupt! */ spin_lock_irqsave(&iosapic_lock, flags); { /* set only the mask bit */ low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK; - iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); + list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, + rte_list) { + addr = rte->addr; + rte_index = rte->rte_index; + iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); + } } spin_unlock_irqrestore(&iosapic_lock, flags); } @@ -262,16 +403,20 @@ unmask_irq (unsigned int irq) u32 low32; int rte_index; ia64_vector vec = irq_to_vector(irq); + struct iosapic_rte_info *rte; - addr = iosapic_intr_info[vec].addr; - rte_index = iosapic_intr_info[vec].rte_index; - if (rte_index < 0) + if (list_empty(&iosapic_intr_info[vec].rtes)) return; /* not an IOSAPIC interrupt! */ spin_lock_irqsave(&iosapic_lock, flags); { low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK; - iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); + list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, + rte_list) { + addr = rte->addr; + rte_index = rte->rte_index; + iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); + } } spin_unlock_irqrestore(&iosapic_lock, flags); } @@ -287,6 +432,7 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask) char __iomem *addr; int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0; ia64_vector vec; + struct iosapic_rte_info *rte; irq &= (~IA64_IRQ_REDIRECTED); vec = irq_to_vector(irq); @@ -296,10 +442,7 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask) dest = cpu_physical_id(first_cpu(mask)); - rte_index = iosapic_intr_info[vec].rte_index; - addr = iosapic_intr_info[vec].addr; - - if (rte_index < 0) + if (list_empty(&iosapic_intr_info[vec].rtes)) return; /* not an IOSAPIC interrupt */ set_irq_affinity_info(irq, dest, redir); @@ -309,18 +452,27 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask) spin_lock_irqsave(&iosapic_lock, flags); { - low32 = iosapic_intr_info[vec].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT); + low32 = iosapic_intr_info[vec].low32 & + ~(7 << IOSAPIC_DELIVERY_SHIFT); if (redir) /* change delivery mode to lowest priority */ - low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT); + low32 |= (IOSAPIC_LOWEST_PRIORITY << + IOSAPIC_DELIVERY_SHIFT); else /* change delivery mode to fixed */ low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT); iosapic_intr_info[vec].low32 = low32; - iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32); - iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); + iosapic_intr_info[vec].dest = dest; + list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, + rte_list) { + addr = rte->addr; + rte_index = rte->rte_index; + iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), + high32); + iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); + } } spin_unlock_irqrestore(&iosapic_lock, flags); #endif @@ -341,9 +493,11 @@ static void iosapic_end_level_irq (unsigned int irq) { ia64_vector vec = irq_to_vector(irq); + struct iosapic_rte_info *rte; - move_irq(irq); - iosapic_eoi(iosapic_intr_info[vec].addr, vec); + move_native_irq(irq); + list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) + iosapic_eoi(rte->addr, vec); } #define iosapic_shutdown_level_irq mask_irq @@ -351,14 +505,16 @@ iosapic_end_level_irq (unsigned int irq) #define iosapic_disable_level_irq mask_irq #define iosapic_ack_level_irq nop -struct hw_interrupt_type irq_type_iosapic_level = { - .typename = "IO-SAPIC-level", +struct irq_chip irq_type_iosapic_level = { + .name = "IO-SAPIC-level", .startup = iosapic_startup_level_irq, .shutdown = iosapic_shutdown_level_irq, .enable = iosapic_enable_level_irq, .disable = iosapic_disable_level_irq, .ack = iosapic_ack_level_irq, .end = iosapic_end_level_irq, + .mask = mask_irq, + .unmask = unmask_irq, .set_affinity = iosapic_set_affinity }; @@ -381,15 +537,16 @@ iosapic_startup_edge_irq (unsigned int irq) static void iosapic_ack_edge_irq (unsigned int irq) { - irq_desc_t *idesc = irq_descp(irq); + irq_desc_t *idesc = irq_desc + irq; - move_irq(irq); + move_native_irq(irq); /* * Once we have recorded IRQ_PENDING already, we can mask the * interrupt for real. This prevents IRQ storms from unhandled * devices. */ - if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) == (IRQ_PENDING|IRQ_DISABLED)) + if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) == + (IRQ_PENDING|IRQ_DISABLED)) mask_irq(irq); } @@ -397,14 +554,16 @@ iosapic_ack_edge_irq (unsigned int irq) #define iosapic_disable_edge_irq nop #define iosapic_end_edge_irq nop -struct hw_interrupt_type irq_type_iosapic_edge = { - .typename = "IO-SAPIC-edge", +struct irq_chip irq_type_iosapic_edge = { + .name = "IO-SAPIC-edge", .startup = iosapic_startup_edge_irq, .shutdown = iosapic_disable_edge_irq, .enable = iosapic_enable_edge_irq, .disable = iosapic_disable_edge_irq, .ack = iosapic_ack_edge_irq, .end = iosapic_end_edge_irq, + .mask = mask_irq, + .unmask = unmask_irq, .set_affinity = iosapic_set_affinity }; @@ -423,6 +582,34 @@ iosapic_version (char __iomem *addr) return iosapic_read(addr, IOSAPIC_VERSION); } +static int iosapic_find_sharable_vector (unsigned long trigger, + unsigned long pol) +{ + int i, vector = -1, min_count = -1; + struct iosapic_intr_info *info; + + /* + * shared vectors for edge-triggered interrupts are not + * supported yet + */ + if (trigger == IOSAPIC_EDGE) + return -1; + + for (i = IA64_FIRST_DEVICE_VECTOR; i <= IA64_LAST_DEVICE_VECTOR; i++) { + info = &iosapic_intr_info[i]; + if (info->trigger == trigger && info->polarity == pol && + (info->dmode == IOSAPIC_FIXED || info->dmode == + IOSAPIC_LOWEST_PRIORITY)) { + if (min_count == -1 || info->count < min_count) { + vector = i; + min_count = info->count; + } + } + } + + return vector; +} + /* * if the given vector is already owned by other, * assign a new vector for the other and make the vector available @@ -432,20 +619,71 @@ iosapic_reassign_vector (int vector) { int new_vector; - if (iosapic_intr_info[vector].rte_index >= 0 || iosapic_intr_info[vector].addr - || iosapic_intr_info[vector].gsi_base || iosapic_intr_info[vector].dmode - || iosapic_intr_info[vector].polarity || iosapic_intr_info[vector].trigger) - { + if (!list_empty(&iosapic_intr_info[vector].rtes)) { new_vector = assign_irq_vector(AUTO_ASSIGN); - printk(KERN_INFO "Reassigning vector %d to %d\n", vector, new_vector); + if (new_vector < 0) + panic("%s: out of interrupt vectors!\n", __FUNCTION__); + printk(KERN_INFO "Reassigning vector %d to %d\n", + vector, new_vector); memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector], sizeof(struct iosapic_intr_info)); - memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info)); - iosapic_intr_info[vector].rte_index = -1; + INIT_LIST_HEAD(&iosapic_intr_info[new_vector].rtes); + list_move(iosapic_intr_info[vector].rtes.next, + &iosapic_intr_info[new_vector].rtes); + memset(&iosapic_intr_info[vector], 0, + sizeof(struct iosapic_intr_info)); + iosapic_intr_info[vector].low32 = IOSAPIC_MASK; + INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); } } -static void +static struct iosapic_rte_info *iosapic_alloc_rte (void) +{ + int i; + struct iosapic_rte_info *rte; + int preallocated = 0; + + if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) { + rte = alloc_bootmem(sizeof(struct iosapic_rte_info) * + NR_PREALLOCATE_RTE_ENTRIES); + if (!rte) + return NULL; + for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++) + list_add(&rte->rte_list, &free_rte_list); + } + + if (!list_empty(&free_rte_list)) { + rte = list_entry(free_rte_list.next, struct iosapic_rte_info, + rte_list); + list_del(&rte->rte_list); + preallocated++; + } else { + rte = kmalloc(sizeof(struct iosapic_rte_info), GFP_ATOMIC); + if (!rte) + return NULL; + } + + memset(rte, 0, sizeof(struct iosapic_rte_info)); + if (preallocated) + rte->flags |= RTE_PREALLOCATED; + + return rte; +} + +static void iosapic_free_rte (struct iosapic_rte_info *rte) +{ + if (rte->flags & RTE_PREALLOCATED) + list_add_tail(&rte->rte_list, &free_rte_list); + else + kfree(rte); +} + +static inline int vector_is_shared (int vector) +{ + return (iosapic_intr_info[vector].count > 1); +} + +static int register_intr (unsigned int gsi, int vector, unsigned char delivery, unsigned long polarity, unsigned long trigger) { @@ -455,37 +693,68 @@ register_intr (unsigned int gsi, int vector, unsigned char delivery, int index; unsigned long gsi_base; void __iomem *iosapic_address; + struct iosapic_rte_info *rte; index = find_iosapic(gsi); if (index < 0) { - printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", __FUNCTION__, gsi); - return; + printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", + __FUNCTION__, gsi); + return -ENODEV; } iosapic_address = iosapic_lists[index].addr; gsi_base = iosapic_lists[index].gsi_base; - rte_index = gsi - gsi_base; - iosapic_intr_info[vector].rte_index = rte_index; + rte = gsi_vector_to_rte(gsi, vector); + if (!rte) { + rte = iosapic_alloc_rte(); + if (!rte) { + printk(KERN_WARNING "%s: cannot allocate memory\n", + __FUNCTION__); + return -ENOMEM; + } + + rte_index = gsi - gsi_base; + rte->rte_index = rte_index; + rte->addr = iosapic_address; + rte->gsi_base = gsi_base; + rte->refcnt++; + list_add_tail(&rte->rte_list, &iosapic_intr_info[vector].rtes); + iosapic_intr_info[vector].count++; + iosapic_lists[index].rtes_inuse++; + } + else if (vector_is_shared(vector)) { + struct iosapic_intr_info *info = &iosapic_intr_info[vector]; + if (info->trigger != trigger || info->polarity != polarity) { + printk (KERN_WARNING + "%s: cannot override the interrupt\n", + __FUNCTION__); + return -EINVAL; + } + } + iosapic_intr_info[vector].polarity = polarity; iosapic_intr_info[vector].dmode = delivery; - iosapic_intr_info[vector].addr = iosapic_address; - iosapic_intr_info[vector].gsi_base = gsi_base; iosapic_intr_info[vector].trigger = trigger; - iosapic_intr_info[vector].refcnt++; + + if (is_running_on_xen()) + return 0; if (trigger == IOSAPIC_EDGE) irq_type = &irq_type_iosapic_edge; else irq_type = &irq_type_iosapic_level; - idesc = irq_descp(vector); - if (idesc->handler != irq_type) { - if (idesc->handler != &no_irq_type) - printk(KERN_WARNING "%s: changing vector %d from %s to %s\n", - __FUNCTION__, vector, idesc->handler->typename, irq_type->typename); - idesc->handler = irq_type; + idesc = irq_desc + vector; + if (idesc->chip != irq_type) { + if (idesc->chip != &no_irq_type) + printk(KERN_WARNING + "%s: changing vector %d from %s to %s\n", + __FUNCTION__, vector, + idesc->chip->name, irq_type->name); + idesc->chip = irq_type; } + return 0; } static unsigned int @@ -493,20 +762,33 @@ get_target_cpu (unsigned int gsi, int vector) { #ifdef CONFIG_SMP static int cpu = -1; + extern int cpe_vector; + + /* + * In case of vector shared by multiple RTEs, all RTEs that + * share the vector need to use the same destination CPU. + */ + if (!list_empty(&iosapic_intr_info[vector].rtes)) + return iosapic_intr_info[vector].dest; /* * If the platform supports redirection via XTP, let it * distribute interrupts. */ if (smp_int_redirect & SMP_IRQ_REDIRECTION) - return hard_smp_processor_id(); + return cpu_physical_id(smp_processor_id()); /* * Some interrupts (ACPI SCI, for instance) are registered * before the BSP is marked as online. */ if (!cpu_online(smp_processor_id())) - return hard_smp_processor_id(); + return cpu_physical_id(smp_processor_id()); + +#ifdef CONFIG_ACPI + if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR) + return get_cpei_target_cpu(); +#endif #ifdef CONFIG_NUMA { @@ -530,7 +812,7 @@ get_target_cpu (unsigned int gsi, int vector) if (!num_cpus) goto skip_numa_setup; - /* Use vector assigment to distribute across cpus in node */ + /* Use vector assignment to distribute across cpus in node */ cpu_index = vector % num_cpus; for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++) @@ -552,8 +834,8 @@ skip_numa_setup: } while (!cpu_online(cpu)); return cpu_physical_id(cpu); -#else - return hard_smp_processor_id(); +#else /* CONFIG_SMP */ + return cpu_physical_id(smp_processor_id()); #endif } @@ -566,10 +848,12 @@ int iosapic_register_intr (unsigned int gsi, unsigned long polarity, unsigned long trigger) { - int vector; + int vector, mask = 1, err; unsigned int dest; unsigned long flags; - + struct iosapic_rte_info *rte; + u32 low32; +again: /* * If this GSI has already been registered (i.e., it's a * shared interrupt, or we lost a race to register it), @@ -579,19 +863,55 @@ iosapic_register_intr (unsigned int gsi, { vector = gsi_to_vector(gsi); if (vector > 0) { - iosapic_intr_info[vector].refcnt++; + rte = gsi_vector_to_rte(gsi, vector); + rte->refcnt++; spin_unlock_irqrestore(&iosapic_lock, flags); return vector; } + } + spin_unlock_irqrestore(&iosapic_lock, flags); + + /* If vector is running out, we try to find a sharable vector */ + vector = assign_irq_vector(AUTO_ASSIGN); + if (vector < 0) { + vector = iosapic_find_sharable_vector(trigger, polarity); + if (vector < 0) + return -ENOSPC; + } + + spin_lock_irqsave(&irq_desc[vector].lock, flags); + spin_lock(&iosapic_lock); + { + if (gsi_to_vector(gsi) > 0) { + if (list_empty(&iosapic_intr_info[vector].rtes)) + free_irq_vector(vector); + spin_unlock(&iosapic_lock); + spin_unlock_irqrestore(&irq_desc[vector].lock, + flags); + goto again; + } - vector = assign_irq_vector(AUTO_ASSIGN); dest = get_target_cpu(gsi, vector); - register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, - polarity, trigger); + err = register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, + polarity, trigger); + if (err < 0) { + spin_unlock(&iosapic_lock); + spin_unlock_irqrestore(&irq_desc[vector].lock, + flags); + return err; + } - set_rte(vector, dest, 1); + /* + * If the vector is shared and already unmasked for + * other interrupt sources, don't mask it. + */ + low32 = iosapic_intr_info[vector].low32; + if (vector_is_shared(vector) && !(low32 & IOSAPIC_MASK)) + mask = 0; + set_rte(gsi, vector, dest, mask); } - spin_unlock_irqrestore(&iosapic_lock, flags); + spin_unlock(&iosapic_lock); + spin_unlock_irqrestore(&irq_desc[vector].lock, flags); printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n", gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), @@ -601,15 +921,16 @@ iosapic_register_intr (unsigned int gsi, return vector; } -#ifdef CONFIG_ACPI_DEALLOCATE_IRQ void iosapic_unregister_intr (unsigned int gsi) { unsigned long flags; - int irq, vector; + int irq, vector, index; irq_desc_t *idesc; - int rte_index; + u32 low32; unsigned long trigger, polarity; + unsigned int dest; + struct iosapic_rte_info *rte; /* * If the irq associated with the gsi is not found, @@ -618,70 +939,82 @@ iosapic_unregister_intr (unsigned int gsi) */ irq = gsi_to_irq(gsi); if (irq < 0) { - printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi); + printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", + gsi); WARN_ON(1); return; } vector = irq_to_vector(irq); - idesc = irq_descp(irq); + idesc = irq_desc + irq; spin_lock_irqsave(&idesc->lock, flags); spin_lock(&iosapic_lock); { - rte_index = iosapic_intr_info[vector].rte_index; - if (rte_index < 0) { - spin_unlock(&iosapic_lock); - spin_unlock_irqrestore(&idesc->lock, flags); - printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi); + if ((rte = gsi_vector_to_rte(gsi, vector)) == NULL) { + printk(KERN_ERR + "iosapic_unregister_intr(%u) unbalanced\n", + gsi); WARN_ON(1); - return; + goto out; } - if (--iosapic_intr_info[vector].refcnt > 0) { - spin_unlock(&iosapic_lock); - spin_unlock_irqrestore(&idesc->lock, flags); - return; - } + if (--rte->refcnt > 0) + goto out; - /* - * If interrupt handlers still exist on the irq - * associated with the gsi, don't unregister the - * interrupt. - */ - if (idesc->action) { - iosapic_intr_info[vector].refcnt++; - spin_unlock(&iosapic_lock); - spin_unlock_irqrestore(&idesc->lock, flags); - printk(KERN_WARNING "Cannot unregister GSI. IRQ %u is still in use.\n", irq); - return; - } + /* Mask the interrupt */ + low32 = iosapic_intr_info[vector].low32 | IOSAPIC_MASK; + iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index), + low32); - /* Clear the interrupt controller descriptor. */ - idesc->handler = &no_irq_type; + /* Remove the rte entry from the list */ + list_del(&rte->rte_list); + iosapic_intr_info[vector].count--; + iosapic_free_rte(rte); + index = find_iosapic(gsi); + iosapic_lists[index].rtes_inuse--; + WARN_ON(iosapic_lists[index].rtes_inuse < 0); - trigger = iosapic_intr_info[vector].trigger; + trigger = iosapic_intr_info[vector].trigger; polarity = iosapic_intr_info[vector].polarity; + dest = iosapic_intr_info[vector].dest; + printk(KERN_INFO + "GSI %u (%s, %s) -> CPU %d (0x%04x)" + " vector %d unregistered\n", + gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), + (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), + cpu_logical_id(dest), dest, vector); + + if (list_empty(&iosapic_intr_info[vector].rtes)) { + /* Sanity check */ + BUG_ON(iosapic_intr_info[vector].count); + + /* Clear the interrupt controller descriptor */ + idesc->chip = &no_irq_type; + + /* Clear the interrupt information */ + memset(&iosapic_intr_info[vector], 0, + sizeof(struct iosapic_intr_info)); + iosapic_intr_info[vector].low32 |= IOSAPIC_MASK; + INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); + + if (idesc->action) { + printk(KERN_ERR + "interrupt handlers still exist on" + "IRQ %u\n", irq); + WARN_ON(1); + } - /* Clear the interrupt information. */ - memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info)); - iosapic_intr_info[vector].rte_index = -1; /* mark as unused */ + /* Free the interrupt vector */ + free_irq_vector(vector); + } } + out: spin_unlock(&iosapic_lock); spin_unlock_irqrestore(&idesc->lock, flags); - - /* Free the interrupt vector */ - free_irq_vector(vector); - - printk(KERN_INFO "GSI %u (%s, %s) -> vector %d unregisterd.\n", - gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), - (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), - vector); } -#endif /* CONFIG_ACPI_DEALLOCATE_IRQ */ /* * ACPI calls this when it finds an entry for a platform interrupt. - * Note that the irq_base and IOSAPIC address must be set in iosapic_init(). */ int __init iosapic_register_platform_intr (u32 int_type, unsigned int gsi, @@ -705,6 +1038,8 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi, break; case ACPI_INTERRUPT_INIT: vector = assign_irq_vector(AUTO_ASSIGN); + if (vector < 0) + panic("%s: out of interrupt vectors!\n", __FUNCTION__); delivery = IOSAPIC_INIT; break; case ACPI_INTERRUPT_CPEI: @@ -713,26 +1048,27 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi, mask = 1; break; default: - printk(KERN_ERR "iosapic_register_platform_irq(): invalid int type 0x%x\n", int_type); + printk(KERN_ERR "%s: invalid int type 0x%x\n", __FUNCTION__, + int_type); return -1; } register_intr(gsi, vector, delivery, polarity, trigger); - printk(KERN_INFO "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n", + printk(KERN_INFO + "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)" + " vector %d\n", int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown", int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), cpu_logical_id(dest), dest, vector); - set_rte(vector, dest, mask); + set_rte(gsi, vector, dest, mask); return vector; } - /* * ACPI calls this when it finds an entry for a legacy ISA IRQ override. - * Note that the gsi_base and IOSAPIC address must be set in iosapic_init(). */ void __init iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi, @@ -740,7 +1076,7 @@ iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi, unsigned long trigger) { int vector; - unsigned int dest = hard_smp_processor_id(); + unsigned int dest = cpu_physical_id(smp_processor_id()); vector = isa_irq_to_vector(isa_irq); @@ -751,7 +1087,7 @@ iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi, polarity == IOSAPIC_POL_HIGH ? "high" : "low", cpu_logical_id(dest), dest, vector); - set_rte(vector, dest, 1); + set_rte(gsi, vector, dest, 1); } void __init @@ -759,59 +1095,156 @@ iosapic_system_init (int system_pcat_compat) { int vector; - for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) - iosapic_intr_info[vector].rte_index = -1; /* mark as unused */ + for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) { + iosapic_intr_info[vector].low32 = IOSAPIC_MASK; + /* mark as unused */ + INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes); + } pcat_compat = system_pcat_compat; + if (is_running_on_xen()) + return; + if (pcat_compat) { /* - * Disable the compatibility mode interrupts (8259 style), needs IN/OUT support - * enabled. + * Disable the compatibility mode interrupts (8259 style), + * needs IN/OUT support enabled. */ - printk(KERN_INFO "%s: Disabling PC-AT compatible 8259 interrupts\n", __FUNCTION__); + printk(KERN_INFO + "%s: Disabling PC-AT compatible 8259 interrupts\n", + __FUNCTION__); outb(0xff, 0xA1); outb(0xff, 0x21); } } -void __init +static inline int +iosapic_alloc (void) +{ + int index; + + for (index = 0; index < NR_IOSAPICS; index++) + if (!iosapic_lists[index].addr) + return index; + + printk(KERN_WARNING "%s: failed to allocate iosapic\n", __FUNCTION__); + return -1; +} + +static inline void +iosapic_free (int index) +{ + memset(&iosapic_lists[index], 0, sizeof(iosapic_lists[0])); +} + +static inline int +iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver) +{ + int index; + unsigned int gsi_end, base, end; + + /* check gsi range */ + gsi_end = gsi_base + ((ver >> 16) & 0xff); + for (index = 0; index < NR_IOSAPICS; index++) { + if (!iosapic_lists[index].addr) + continue; + + base = iosapic_lists[index].gsi_base; + end = base + iosapic_lists[index].num_rte - 1; + + if (gsi_end < base || end < gsi_base) + continue; /* OK */ + + return -EBUSY; + } + return 0; +} + +int __devinit iosapic_init (unsigned long phys_addr, unsigned int gsi_base) { - int num_rte; + int num_rte, err, index; unsigned int isa_irq, ver; char __iomem *addr; + unsigned long flags; + + spin_lock_irqsave(&iosapic_lock, flags); + { + addr = ioremap(phys_addr, 0); + ver = iosapic_version(addr); - addr = ioremap(phys_addr, 0); - ver = iosapic_version(addr); + if ((err = iosapic_check_gsi_range(gsi_base, ver))) { + iounmap(addr); + spin_unlock_irqrestore(&iosapic_lock, flags); + return err; + } - /* - * The MAX_REDIR register holds the highest input pin - * number (starting from 0). - * We add 1 so that we can use it for number of pins (= RTEs) - */ - num_rte = ((ver >> 16) & 0xff) + 1; + /* + * The MAX_REDIR register holds the highest input pin + * number (starting from 0). + * We add 1 so that we can use it for number of pins (= RTEs) + */ + num_rte = ((ver >> 16) & 0xff) + 1; - iosapic_lists[num_iosapic].addr = addr; - iosapic_lists[num_iosapic].gsi_base = gsi_base; - iosapic_lists[num_iosapic].num_rte = num_rte; + index = iosapic_alloc(); + iosapic_lists[index].addr = addr; + iosapic_lists[index].gsi_base = gsi_base; + iosapic_lists[index].num_rte = num_rte; #ifdef CONFIG_NUMA - iosapic_lists[num_iosapic].node = MAX_NUMNODES; + iosapic_lists[index].node = MAX_NUMNODES; #endif - num_iosapic++; + } + spin_unlock_irqrestore(&iosapic_lock, flags); if ((gsi_base == 0) && pcat_compat) { /* - * Map the legacy ISA devices into the IOSAPIC data. Some of these may - * get reprogrammed later on with data from the ACPI Interrupt Source - * Override table. + * Map the legacy ISA devices into the IOSAPIC data. Some of + * these may get reprogrammed later on with data from the ACPI + * Interrupt Source Override table. */ for (isa_irq = 0; isa_irq < 16; ++isa_irq) - iosapic_override_isa_irq(isa_irq, isa_irq, IOSAPIC_POL_HIGH, IOSAPIC_EDGE); + iosapic_override_isa_irq(isa_irq, isa_irq, + IOSAPIC_POL_HIGH, + IOSAPIC_EDGE); + } + return 0; +} + +#ifdef CONFIG_HOTPLUG +int +iosapic_remove (unsigned int gsi_base) +{ + int index, err = 0; + unsigned long flags; + + spin_lock_irqsave(&iosapic_lock, flags); + { + index = find_iosapic(gsi_base); + if (index < 0) { + printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n", + __FUNCTION__, gsi_base); + goto out; + } + + if (iosapic_lists[index].rtes_inuse) { + err = -EBUSY; + printk(KERN_WARNING + "%s: IOSAPIC for GSI base %u is busy\n", + __FUNCTION__, gsi_base); + goto out; + } + + iounmap(iosapic_lists[index].addr); + iosapic_free(index); } + out: + spin_unlock_irqrestore(&iosapic_lock, flags); + return err; } +#endif /* CONFIG_HOTPLUG */ #ifdef CONFIG_NUMA -void __init +void __devinit map_iosapic_to_node(unsigned int gsi_base, int node) { int index; @@ -826,3 +1259,10 @@ map_iosapic_to_node(unsigned int gsi_base, int node) return; } #endif + +static int __init iosapic_enable_kmalloc (void) +{ + iosapic_kmalloc_ok = 1; + return 0; +} +core_initcall (iosapic_enable_kmalloc);