Merge to Fedora kernel-2.6.18-1.2224_FC5 patched with stable patch-2.6.18.1-vs2.0...
[linux-2.6.git] / arch / x86_64 / kernel / io_apic.c
index 2fceed5..924a4a3 100644 (file)
  */
 
 #include <linux/mm.h>
-#include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
-#include <linux/config.h>
 #include <linux/smp_lock.h>
 #include <linux/mc146818rtc.h>
 #include <linux/acpi.h>
+#include <linux/sysdev.h>
+#ifdef CONFIG_ACPI
+#include <acpi/acpi_bus.h>
+#endif
 
 #include <asm/io.h>
 #include <asm/smp.h>
 #include <asm/desc.h>
 #include <asm/proto.h>
+#include <asm/mach_apic.h>
+#include <asm/acpi.h>
+#include <asm/dma.h>
+#include <asm/nmi.h>
+
+#define __apicdebuginit  __init
 
 int sis_apic_bug; /* not actually supported, dummy for compile */
 
-#undef APIC_LOCKUP_DEBUG
+static int no_timer_check;
+
+int disable_timer_pin_1 __initdata;
 
-#define APIC_LOCKUP_DEBUG
+int timer_over_8254 __initdata = 0;
 
-static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED;
+/* Where if anywhere is the i8259 connect in external int mode */
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+
+static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_SPINLOCK(vector_lock);
 
 /*
  * # of IRQ routing registers
@@ -53,7 +67,7 @@ int nr_ioapic_registers[MAX_IO_APICS];
  * Rough estimation of how many shared IRQs there are, can
  * be changed anytime.
  */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
+#define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS
 #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
 
 /*
@@ -67,24 +81,76 @@ static struct irq_pin_list {
        short apic, pin, next;
 } irq_2_pin[PIN_MAP_SIZE];
 
-#ifdef CONFIG_PCI_USE_VECTOR
-int vector_irq[NR_IRQS] = { [0 ... NR_IRQS -1] = -1};
+int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
+#ifdef CONFIG_PCI_MSI
 #define vector_to_irq(vector)  \
        (platform_legacy_irq(vector) ? vector : vector_irq[vector])
 #else
 #define vector_to_irq(vector)  (vector)
 #endif
 
+#define __DO_ACTION(R, ACTION, FINAL)                                  \
+                                                                       \
+{                                                                      \
+       int pin;                                                        \
+       struct irq_pin_list *entry = irq_2_pin + irq;                   \
+                                                                       \
+       BUG_ON(irq >= NR_IRQS);                                         \
+       for (;;) {                                                      \
+               unsigned int reg;                                       \
+               pin = entry->pin;                                       \
+               if (pin == -1)                                          \
+                       break;                                          \
+               reg = io_apic_read(entry->apic, 0x10 + R + pin*2);      \
+               reg ACTION;                                             \
+               io_apic_modify(entry->apic, reg);                       \
+               if (!entry->next)                                       \
+                       break;                                          \
+               entry = irq_2_pin + entry->next;                        \
+       }                                                               \
+       FINAL;                                                          \
+}
+
+#ifdef CONFIG_SMP
+static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+       unsigned long flags;
+       unsigned int dest;
+       cpumask_t tmp;
+
+       cpus_and(tmp, mask, cpu_online_map);
+       if (cpus_empty(tmp))
+               tmp = TARGET_CPUS;
+
+       cpus_and(mask, tmp, CPU_MASK_ALL);
+
+       dest = cpu_mask_to_apicid(mask);
+
+       /*
+        * Only the high 8 bits are valid.
+        */
+       dest = SET_APIC_LOGICAL_ID(dest);
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __DO_ACTION(1, = dest, )
+       set_irq_info(irq, mask);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+#endif
+
+static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF };
+
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static void __init add_pin_to_irq(unsigned int irq, int apic, int pin)
+static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 {
        static int first_free_entry = NR_IRQS;
        struct irq_pin_list *entry = irq_2_pin + irq;
 
+       BUG_ON(irq >= NR_IRQS);
        while (entry->next)
                entry = irq_2_pin + entry->next;
 
@@ -92,32 +158,12 @@ static void __init add_pin_to_irq(unsigned int irq, int apic, int pin)
                entry->next = first_free_entry;
                entry = irq_2_pin + entry->next;
                if (++first_free_entry >= PIN_MAP_SIZE)
-                       panic("io_apic.c: whoops");
+                       panic("io_apic.c: ran out of irq_2_pin entries!");
        }
        entry->apic = apic;
        entry->pin = pin;
 }
 
-#define __DO_ACTION(R, ACTION, FINAL)                                  \
-                                                                       \
-{                                                                      \
-       int pin;                                                        \
-       struct irq_pin_list *entry = irq_2_pin + irq;                   \
-                                                                       \
-       for (;;) {                                                      \
-               unsigned int reg;                                       \
-               pin = entry->pin;                                       \
-               if (pin == -1)                                          \
-                       break;                                          \
-               reg = io_apic_read(entry->apic, 0x10 + R + pin*2);      \
-               reg ACTION;                                             \
-               io_apic_modify(entry->apic, reg);                       \
-               if (!entry->next)                                       \
-                       break;                                          \
-               entry = irq_2_pin + entry->next;                        \
-       }                                                               \
-       FINAL;                                                          \
-}
 
 #define DO_ACTION(name,R,ACTION, FINAL)                                        \
                                                                        \
@@ -128,10 +174,6 @@ DO_ACTION( __mask,             0, |= 0x00010000, io_apic_sync(entry->apic) )
                                                /* mask = 1 */
 DO_ACTION( __unmask,           0, &= 0xfffeffff, )
                                                /* mask = 0 */
-DO_ACTION( __mask_and_edge,    0, = (reg & 0xffff7fff) | 0x00010000, )
-                                               /* mask = 1, trigger = 0 */
-DO_ACTION( __unmask_and_level, 0, = (reg & 0xfffeffff) | 0x00008000, )
-                                               /* mask = 0, trigger = 1 */
 
 static void mask_IO_APIC_irq (unsigned int irq)
 {
@@ -151,7 +193,7 @@ static void unmask_IO_APIC_irq (unsigned int irq)
        spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 {
        struct IO_APIC_route_entry entry;
        unsigned long flags;
@@ -189,8 +231,8 @@ static void clear_IO_APIC (void)
  */
 
 #define MAX_PIRQS 8
-int pirq_entries [MAX_PIRQS];
-int pirqs_enabled;
+static int pirq_entries [MAX_PIRQS];
+static int pirqs_enabled;
 int skip_ioapic_setup;
 int ioapic_force;
 
@@ -212,10 +254,36 @@ static int __init enable_ioapic_setup(char *str)
 __setup("noapic", disable_ioapic_setup);
 __setup("apic", enable_ioapic_setup);
 
+static int __init setup_disable_8254_timer(char *s)
+{
+       timer_over_8254 = -1;
+       return 1;
+}
+static int __init setup_enable_8254_timer(char *s)
+{
+       timer_over_8254 = 2;
+       return 1;
+}
+
+__setup("disable_8254_timer", setup_disable_8254_timer);
+__setup("enable_8254_timer", setup_enable_8254_timer);
+
 #include <asm/pci-direct.h>
 #include <linux/pci_ids.h>
 #include <linux/pci.h>
 
+
+#ifdef CONFIG_ACPI
+
+static int nvidia_hpet_detected __initdata;
+
+static int __init nvidia_hpet_check(unsigned long phys, unsigned long size)
+{
+       nvidia_hpet_detected = 1;
+       return 0;
+}
+#endif
+
 /* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC
    off. Check for an Nvidia or VIA PCI bridge and turn it off.
    Use pci direct infrastructure because this runs before the PCI subsystem. 
@@ -224,19 +292,19 @@ __setup("apic", enable_ioapic_setup);
 
    And another hack to disable the IOMMU on VIA chipsets.
 
+   ... and others. Really should move this somewhere else.
+
    Kludge-O-Rama. */
 void __init check_ioapic(void) 
 { 
        int num,slot,func; 
-       if (ioapic_force) 
-               return; 
-
        /* Poor man's PCI discovery */
        for (num = 0; num < 32; num++) { 
                for (slot = 0; slot < 32; slot++) { 
                        for (func = 0; func < 8; func++) { 
                                u32 class;
                                u32 vendor;
+                               u8 type;
                                class = read_pci_config(num,slot,func,
                                                        PCI_CLASS_REVISION);
                                if (class == 0xffffffff)
@@ -250,28 +318,51 @@ void __init check_ioapic(void)
                                vendor &= 0xffff;
                                switch (vendor) { 
                                case PCI_VENDOR_ID_VIA:
-#ifdef CONFIG_GART_IOMMU
-                                       if (end_pfn >= (0xffffffff>>PAGE_SHIFT) &&
+#ifdef CONFIG_IOMMU
+                                       if ((end_pfn > MAX_DMA32_PFN ||
+                                            force_iommu) &&
                                            !iommu_aperture_allowed) {
                                                printk(KERN_INFO
-    "Looks like a VIA chipset. Disabling IOMMU. Overwrite with \"iommu=allowed\"\n");
+    "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n");
                                                iommu_aperture_disabled = 1;
                                        }
 #endif
-                                       /* FALL THROUGH */
+                                       return;
                                case PCI_VENDOR_ID_NVIDIA:
-#ifndef CONFIG_SMP
-                                       printk(KERN_INFO 
-     "PCI bridge %02x:%02x from %x found. Setting \"noapic\". Overwrite with \"apic\"\n",
-                                              num,slot,vendor); 
-                                       skip_ioapic_setup = 1;
+#ifdef CONFIG_ACPI
+                                       /*
+                                        * All timer overrides on Nvidia are
+                                        * wrong unless HPET is enabled.
+                                        */
+                                       nvidia_hpet_detected = 0;
+                                       acpi_table_parse(ACPI_HPET,
+                                                       nvidia_hpet_check);
+                                       if (nvidia_hpet_detected == 0) {
+                                               acpi_skip_timer_override = 1;
+                                               printk(KERN_INFO "Nvidia board "
+                                                   "detected. Ignoring ACPI "
+                                                   "timer override.\n");
+                                       }
 #endif
+                                       /* RED-PEN skip them on mptables too? */
+                                       return;
+
+                               /* This should be actually default, but
+                                  for 2.6.16 let's do it for ATI only where
+                                  it's really needed. */
+                               case PCI_VENDOR_ID_ATI:
+                                       if (timer_over_8254 == 1) {     
+                                               timer_over_8254 = 0;    
+                                       printk(KERN_INFO
+               "ATI board detected. Disabling timer routing over 8254.\n");
+                                       }       
                                        return;
                                } 
 
+
                                /* No multi-function device? */
-                               u8 type = read_pci_config_byte(num,slot,func,
-                                                              PCI_HEADER_TYPE);
+                               type = read_pci_config_byte(num,slot,func,
+                                                           PCI_HEADER_TYPE);
                                if (!(type & 0x80))
                                        break;
                        } 
@@ -290,13 +381,13 @@ static int __init ioapic_pirq_setup(char *str)
                pirq_entries[i] = -1;
 
        pirqs_enabled = 1;
-       printk(KERN_INFO "PIRQ redirection, working around broken MP-BIOS.\n");
+       apic_printk(APIC_VERBOSE, "PIRQ redirection, working around broken MP-BIOS.\n");
        max = MAX_PIRQS;
        if (ints[0] < MAX_PIRQS)
                max = ints[0];
 
        for (i = 0; i < max; i++) {
-               printk(KERN_DEBUG "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+               apic_printk(APIC_VERBOSE, "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
                /*
                 * PIRQs are mapped upside down, usually.
                 */
@@ -310,7 +401,7 @@ __setup("pirq=", ioapic_pirq_setup);
 /*
  * Find the IRQ entry number of a certain pin.
  */
-static int __init find_irq_entry(int apic, int pin, int type)
+static int find_irq_entry(int apic, int pin, int type)
 {
        int i;
 
@@ -345,6 +436,31 @@ static int __init find_isa_irq_pin(int irq, int type)
        return -1;
 }
 
+static int __init find_isa_irq_apic(int irq, int type)
+{
+       int i;
+
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].mpc_srcbus;
+
+               if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
+                   (mp_irqs[i].mpc_irqtype == type) &&
+                   (mp_irqs[i].mpc_srcbusirq == irq))
+                       break;
+       }
+       if (i < mp_irq_entries) {
+               int apic;
+               for(apic = 0; apic < nr_ioapics; apic++) {
+                       if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
+                               return apic;
+               }
+       }
+
+       return -1;
+}
+
 /*
  * Find a specific PCI IRQ entry.
  * Not an __init, possibly needed by modules
@@ -355,10 +471,10 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 {
        int apic, i, best_guess = -1;
 
-       Dprintk("querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+       apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
                bus, slot, pin);
        if (mp_bus_id_to_pci_bus[bus] == -1) {
-               printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+               apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
                return -1;
        }
        for (i = 0; i < mp_irq_entries; i++) {
@@ -388,19 +504,20 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
                                best_guess = irq;
                }
        }
+       BUG_ON(best_guess >= NR_IRQS);
        return best_guess;
 }
 
 /*
  * EISA Edge/Level control register, ELCR
  */
-static int __init EISA_ELCR(unsigned int irq)
+static int EISA_ELCR(unsigned int irq)
 {
        if (irq < 16) {
                unsigned int port = 0x4d0 + (irq >> 3);
                return (inb(port) >> (irq & 7)) & 1;
        }
-       printk(KERN_INFO "Broken MPtable reports ISA irq %d\n", irq);
+       apic_printk(APIC_VERBOSE, "Broken MPtable reports ISA irq %d\n", irq);
        return 0;
 }
 
@@ -499,7 +616,7 @@ static int __init MPBIOS_polarity(int idx)
        return polarity;
 }
 
-static int __init MPBIOS_trigger(int idx)
+static int MPBIOS_trigger(int idx)
 {
        int bus = mp_irqs[idx].mpc_srcbus;
        int trigger;
@@ -578,6 +695,64 @@ static inline int irq_trigger(int idx)
        return MPBIOS_trigger(idx);
 }
 
+static int next_irq = 16;
+
+/*
+ * gsi_irq_sharing -- Name overload!  "irq" can be either a legacy IRQ
+ * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number
+ * from ACPI, which can reach 800 in large boxen.
+ *
+ * Compact the sparse GSI space into a sequential IRQ series and reuse
+ * vectors if possible.
+ */
+int gsi_irq_sharing(int gsi)
+{
+       int i, tries, vector;
+
+       BUG_ON(gsi >= NR_IRQ_VECTORS);
+
+       if (platform_legacy_irq(gsi))
+               return gsi;
+
+       if (gsi_2_irq[gsi] != 0xFF)
+               return (int)gsi_2_irq[gsi];
+
+       tries = NR_IRQS;
+  try_again:
+       vector = assign_irq_vector(gsi);
+
+       /*
+        * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
+        * use of vector and if found, return that IRQ.  However, we never want
+        * to share legacy IRQs, which usually have a different trigger mode
+        * than PCI.
+        */
+       for (i = 0; i < NR_IRQS; i++)
+               if (IO_APIC_VECTOR(i) == vector)
+                       break;
+       if (platform_legacy_irq(i)) {
+               if (--tries >= 0) {
+                       IO_APIC_VECTOR(i) = 0;
+                       goto try_again;
+               }
+               panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi);
+       }
+       if (i < NR_IRQS) {
+               gsi_2_irq[gsi] = i;
+               printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n",
+                               gsi, vector, i);
+               return i;
+       }
+
+       i = next_irq++;
+       BUG_ON(i >= NR_IRQS);
+       gsi_2_irq[gsi] = i;
+       IO_APIC_VECTOR(i) = vector;
+       printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n",
+                       gsi, vector, i);
+       return i;
+}
+
 static int pin_2_irq(int idx, int apic, int pin)
 {
        int irq, i;
@@ -607,6 +782,7 @@ static int pin_2_irq(int idx, int apic, int pin)
                        while (i < apic)
                                irq += nr_ioapic_registers[i++];
                        irq += pin;
+                       irq = gsi_irq_sharing(irq);
                        break;
                }
                default:
@@ -616,6 +792,7 @@ static int pin_2_irq(int idx, int apic, int pin)
                        break;
                }
        }
+       BUG_ON(irq >= NR_IRQS);
 
        /*
         * PCI IRQ command line redirection. Yes, limits are hardcoded.
@@ -623,14 +800,15 @@ static int pin_2_irq(int idx, int apic, int pin)
        if ((pin >= 16) && (pin <= 23)) {
                if (pirq_entries[pin-16] != -1) {
                        if (!pirq_entries[pin-16]) {
-                               printk(KERN_DEBUG "disabling PIRQ%d\n", pin-16);
+                               apic_printk(APIC_VERBOSE, "disabling PIRQ%d\n", pin-16);
                        } else {
                                irq = pirq_entries[pin-16];
-                               printk(KERN_DEBUG "using PIRQ%d -> IRQ %d\n",
+                               apic_printk(APIC_VERBOSE, "using PIRQ%d -> IRQ %d\n",
                                                pin-16, irq);
                        }
                }
        }
+       BUG_ON(irq >= NR_IRQS);
        return irq;
 }
 
@@ -652,32 +830,42 @@ static inline int IO_APIC_irq_trigger(int irq)
 }
 
 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 };
+u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
 
-#ifndef CONFIG_PCI_USE_VECTOR
-int __init assign_irq_vector(int irq)
+int assign_irq_vector(int irq)
 {
        static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
-       BUG_ON(irq >= NR_IRQ_VECTORS);
-       if (IO_APIC_VECTOR(irq) > 0)
+       unsigned long flags;
+       int vector;
+
+       BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
+
+       spin_lock_irqsave(&vector_lock, flags);
+
+       if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
+               spin_unlock_irqrestore(&vector_lock, flags);
                return IO_APIC_VECTOR(irq);
+       }
 next:
        current_vector += 8;
        if (current_vector == IA32_SYSCALL_VECTOR)
                goto next;
 
-       if (current_vector > FIRST_SYSTEM_VECTOR) {
-               offset++;
+       if (current_vector >= FIRST_SYSTEM_VECTOR) {
+               /* If we run out of vectors on large boxen, must share them. */
+               offset = (offset + 1) % 8;
                current_vector = FIRST_DEVICE_VECTOR + offset;
        }
 
-       if (current_vector == FIRST_SYSTEM_VECTOR)
-               panic("ran out of interrupt sources!");
+       vector = current_vector;
+       vector_irq[vector] = irq;
+       if (irq != AUTO_ASSIGN)
+               IO_APIC_VECTOR(irq) = vector;
 
-       IO_APIC_VECTOR(irq) = current_vector;
-       return current_vector;
+       spin_unlock_irqrestore(&vector_lock, flags);
+
+       return vector;
 }
-#endif
 
 extern void (*interrupt[NR_IRQS])(void);
 static struct hw_interrupt_type ioapic_level_type;
@@ -687,32 +875,27 @@ static struct hw_interrupt_type ioapic_edge_type;
 #define IOAPIC_EDGE    0
 #define IOAPIC_LEVEL   1
 
-static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
+static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 {
-       if (use_pci_vector() && !platform_legacy_irq(irq)) {
-               if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-                               trigger == IOAPIC_LEVEL)
-                       irq_desc[vector].handler = &ioapic_level_type;
-               else
-                       irq_desc[vector].handler = &ioapic_edge_type;
-               set_intr_gate(vector, interrupt[vector]);
-       } else  {
-               if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-                               trigger == IOAPIC_LEVEL)
-                       irq_desc[irq].handler = &ioapic_level_type;
-               else
-                       irq_desc[irq].handler = &ioapic_edge_type;
-               set_intr_gate(vector, interrupt[irq]);
-       }
+       unsigned idx;
+
+       idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
+
+       if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+                       trigger == IOAPIC_LEVEL)
+               irq_desc[idx].chip = &ioapic_level_type;
+       else
+               irq_desc[idx].chip = &ioapic_edge_type;
+       set_intr_gate(vector, interrupt[idx]);
 }
 
-void __init setup_IO_APIC_irqs(void)
+static void __init setup_IO_APIC_irqs(void)
 {
        struct IO_APIC_route_entry entry;
        int apic, pin, idx, irq, first_notcon = 1, vector;
        unsigned long flags;
 
-       printk(KERN_DEBUG "init IO_APIC IRQs\n");
+       apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
        for (apic = 0; apic < nr_ioapics; apic++) {
        for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
@@ -722,18 +905,18 @@ void __init setup_IO_APIC_irqs(void)
                 */
                memset(&entry,0,sizeof(entry));
 
-               entry.delivery_mode = dest_LowestPrio;
-               entry.dest_mode = INT_DELIVERY_MODE;
+               entry.delivery_mode = INT_DELIVERY_MODE;
+               entry.dest_mode = INT_DEST_MODE;
                entry.mask = 0;                         /* enable IRQ */
-               entry.dest.logical.logical_dest = TARGET_CPUS;
+               entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
 
                idx = find_irq_entry(apic,pin,mp_INT);
                if (idx == -1) {
                        if (first_notcon) {
-                               printk(KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+                               apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin);
                                first_notcon = 0;
                        } else
-                               printk(", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+                               apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
                        continue;
                }
 
@@ -743,7 +926,7 @@ void __init setup_IO_APIC_irqs(void)
                if (irq_trigger(idx)) {
                        entry.trigger = 1;
                        entry.mask = 1;
-                       entry.dest.logical.logical_dest = TARGET_CPUS;
+                       entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
                }
 
                irq = pin_2_irq(idx, apic, pin);
@@ -763,19 +946,20 @@ void __init setup_IO_APIC_irqs(void)
                spin_lock_irqsave(&ioapic_lock, flags);
                io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
                io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+               set_native_irq_info(irq, TARGET_CPUS);
                spin_unlock_irqrestore(&ioapic_lock, flags);
        }
        }
 
        if (!first_notcon)
-               printk(" not connected.\n");
+               apic_printk(APIC_VERBOSE," not connected.\n");
 }
 
 /*
  * Set up the 8259A-master output pin as broadcast to all
  * CPUs.
  */
-void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
+static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
 {
        struct IO_APIC_route_entry entry;
        unsigned long flags;
@@ -785,16 +969,16 @@ void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
        disable_8259A_irq(0);
 
        /* mask LVT0 */
-       apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+       apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 
        /*
         * We use logical delivery to get the timer IRQ
         * to the first CPU.
         */
-       entry.dest_mode = INT_DELIVERY_MODE;
+       entry.dest_mode = INT_DEST_MODE;
        entry.mask = 0;                                 /* unmask IRQ now */
-       entry.dest.logical.logical_dest = TARGET_CPUS;
-       entry.delivery_mode = dest_LowestPrio;
+       entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+       entry.delivery_mode = INT_DELIVERY_MODE;
        entry.polarity = 0;
        entry.trigger = 0;
        entry.vector = vector;
@@ -803,14 +987,14 @@ void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
         * The timer IRQ doesn't have to know that behind the
         * scene we have a 8259A-master in AEOI mode ...
         */
-       irq_desc[0].handler = &ioapic_edge_type;
+       irq_desc[0].chip = &ioapic_edge_type;
 
        /*
         * Add it to the IO-APIC irq-routing table:
         */
        spin_lock_irqsave(&ioapic_lock, flags);
-       io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
-       io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+       io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+       io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
        spin_unlock_irqrestore(&ioapic_lock, flags);
 
        enable_8259A_irq(0);
@@ -818,13 +1002,9 @@ void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
 
 void __init UNEXPECTED_IO_APIC(void)
 {
-#if 0
-       printk(KERN_WARNING " WARNING: unexpected IO-APIC, please mail\n");
-       printk(KERN_WARNING "          to linux-smp@vger.kernel.org\n");
-#endif
 }
 
-void __init print_IO_APIC(void)
+void __apicdebuginit print_IO_APIC(void)
 {
        int apic, i;
        union IO_APIC_reg_00 reg_00;
@@ -832,7 +1012,10 @@ void __init print_IO_APIC(void)
        union IO_APIC_reg_02 reg_02;
        unsigned long flags;
 
-       printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+       if (apic_verbosity == APIC_QUIET)
+               return;
+
+       printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
        for (i = 0; i < nr_ioapics; i++)
                printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
                       mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
@@ -923,12 +1106,17 @@ void __init print_IO_APIC(void)
                );
        }
        }
+       if (use_pci_vector())
+               printk(KERN_INFO "Using vector-based indexing\n");
        printk(KERN_DEBUG "IRQ to pin mappings:\n");
        for (i = 0; i < NR_IRQS; i++) {
                struct irq_pin_list *entry = irq_2_pin + i;
                if (entry->pin < 0)
                        continue;
-               printk(KERN_DEBUG "IRQ%d ", i);
+               if (use_pci_vector() && !platform_legacy_irq(i))
+                       printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
+               else
+                       printk(KERN_DEBUG "IRQ%d ", i);
                for (;;) {
                        printk("-> %d:%d", entry->apic, entry->pin);
                        if (!entry->next)
@@ -943,11 +1131,16 @@ void __init print_IO_APIC(void)
        return;
 }
 
-static void print_APIC_bitfield (int base)
+#if 0
+
+static __apicdebuginit void print_APIC_bitfield (int base)
 {
        unsigned int v;
        int i, j;
 
+       if (apic_verbosity == APIC_QUIET)
+               return;
+
        printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
        for (i = 0; i < 8; i++) {
                v = apic_read(base + i*0x10);
@@ -961,10 +1154,13 @@ static void print_APIC_bitfield (int base)
        }
 }
 
-void /*__init*/ print_local_APIC(void * dummy)
+void __apicdebuginit print_local_APIC(void * dummy)
 {
        unsigned int v, ver, maxlvt;
 
+       if (apic_verbosity == APIC_QUIET)
+               return;
+
        printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
                smp_processor_id(), hard_smp_processor_id());
        v = apic_read(APIC_ID);
@@ -977,13 +1173,11 @@ void /*__init*/ print_local_APIC(void * dummy)
        v = apic_read(APIC_TASKPRI);
        printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
 
-       if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
-               v = apic_read(APIC_ARBPRI);
-               printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
-                       v & APIC_ARBPRI_MASK);
-               v = apic_read(APIC_PROCPRI);
-               printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
-       }
+       v = apic_read(APIC_ARBPRI);
+       printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+               v & APIC_ARBPRI_MASK);
+       v = apic_read(APIC_PROCPRI);
+       printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
 
        v = apic_read(APIC_EOI);
        printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
@@ -1003,12 +1197,8 @@ void /*__init*/ print_local_APIC(void * dummy)
        printk(KERN_DEBUG "... APIC IRR field:\n");
        print_APIC_bitfield(APIC_IRR);
 
-       if (APIC_INTEGRATED(ver)) {             /* !82489DX */
-               if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
-                       apic_write(APIC_ESR, 0);
-               v = apic_read(APIC_ESR);
-               printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
-       }
+       v = apic_read(APIC_ESR);
+       printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
 
        v = apic_read(APIC_ICR);
        printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
@@ -1046,12 +1236,14 @@ void print_all_local_APICs (void)
        on_each_cpu(print_local_APIC, NULL, 1, 1);
 }
 
-void /*__init*/ print_PIC(void)
+void __apicdebuginit print_PIC(void)
 {
-       extern spinlock_t i8259A_lock;
        unsigned int v;
        unsigned long flags;
 
+       if (apic_verbosity == APIC_QUIET)
+               return;
+
        printk(KERN_DEBUG "\nprinting PIC contents\n");
 
        spin_lock_irqsave(&i8259A_lock, flags);
@@ -1076,10 +1268,13 @@ void /*__init*/ print_PIC(void)
        printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
 }
 
+#endif  /*  0  */
+
 static void __init enable_IO_APIC(void)
 {
        union IO_APIC_reg_01 reg_01;
-       int i;
+       int i8259_apic, i8259_pin;
+       int i, apic;
        unsigned long flags;
 
        for (i = 0; i < PIN_MAP_SIZE; i++) {
@@ -1093,11 +1288,48 @@ static void __init enable_IO_APIC(void)
        /*
         * The number of IO-APIC IRQ registers (== #pins):
         */
-       for (i = 0; i < nr_ioapics; i++) {
+       for (apic = 0; apic < nr_ioapics; apic++) {
                spin_lock_irqsave(&ioapic_lock, flags);
-               reg_01.raw = io_apic_read(i, 1);
+               reg_01.raw = io_apic_read(apic, 1);
                spin_unlock_irqrestore(&ioapic_lock, flags);
-               nr_ioapic_registers[i] = reg_01.bits.entries+1;
+               nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+       }
+       for(apic = 0; apic < nr_ioapics; apic++) {
+               int pin;
+               /* See if any of the pins is in ExtINT mode */
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       struct IO_APIC_route_entry entry;
+                       spin_lock_irqsave(&ioapic_lock, flags);
+                       *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+                       *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+                       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+
+                       /* If the interrupt line is enabled and in ExtInt mode
+                        * I have found the pin where the i8259 is connected.
+                        */
+                       if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
+                               ioapic_i8259.apic = apic;
+                               ioapic_i8259.pin  = pin;
+                               goto found_i8259;
+                       }
+               }
+       }
+ found_i8259:
+       /* Look to see what if the MP table has reported the ExtINT */
+       i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
+       i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
+       /* Trust the MP table if nothing is setup in the hardware */
+       if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
+               printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
+               ioapic_i8259.pin  = i8259_pin;
+               ioapic_i8259.apic = i8259_apic;
+       }
+       /* Complain if the MP table and the hardware disagree */
+       if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
+               (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
+       {
+               printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
        }
 
        /*
@@ -1116,7 +1348,39 @@ void disable_IO_APIC(void)
         */
        clear_IO_APIC();
 
-       disconnect_bsp_APIC();
+       /*
+        * If the i8259 is routed through an IOAPIC
+        * Put that IOAPIC in virtual wire mode
+        * so legacy interrupts can be delivered.
+        */
+       if (ioapic_i8259.pin != -1) {
+               struct IO_APIC_route_entry entry;
+               unsigned long flags;
+
+               memset(&entry, 0, sizeof(entry));
+               entry.mask            = 0; /* Enabled */
+               entry.trigger         = 0; /* Edge */
+               entry.irr             = 0;
+               entry.polarity        = 0; /* High */
+               entry.delivery_status = 0;
+               entry.dest_mode       = 0; /* Physical */
+               entry.delivery_mode   = dest_ExtINT; /* ExtInt */
+               entry.vector          = 0;
+               entry.dest.physical.physical_dest =
+                                       GET_APIC_ID(apic_read(APIC_ID));
+
+               /*
+                * Add it to the IO-APIC irq-routing table:
+                */
+               spin_lock_irqsave(&ioapic_lock, flags);
+               io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
+                       *(((int *)&entry)+1));
+               io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
+                       *(((int *)&entry)+0));
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+       }
+
+       disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 }
 
 /*
@@ -1129,7 +1393,6 @@ void disable_IO_APIC(void)
 static void __init setup_ioapic_ids_from_mpc (void)
 {
        union IO_APIC_reg_00 reg_00;
-       physid_mask_t phys_id_present_map = phys_cpu_present_map;
        int apic;
        int i;
        unsigned char old_id;
@@ -1147,36 +1410,8 @@ static void __init setup_ioapic_ids_from_mpc (void)
                
                old_id = mp_ioapics[apic].mpc_apicid;
 
-               if (mp_ioapics[apic].mpc_apicid >= 0xf) {
-                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
-                               apic, mp_ioapics[apic].mpc_apicid);
-                       printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
-                               reg_00.bits.ID);
-                       mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
-               }
 
-               /*
-                * Sanity check, is the ID really free? Every APIC in a
-                * system must have a unique ID or we get lots of nice
-                * 'stuck on smp_invalidate_needed IPI wait' messages.
-                */
-               if (physid_isset(mp_ioapics[apic].mpc_apicid, phys_id_present_map)) {
-                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
-                               apic, mp_ioapics[apic].mpc_apicid);
-                       for (i = 0; i < 0xf; i++)
-                               if (!physid_isset(i, phys_id_present_map))
-                                       break;
-                       if (i >= 0xf)
-                               panic("Max APIC ID exceeded!\n");
-                       printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
-                               i);
-                       physid_set(i, phys_id_present_map);
-                       mp_ioapics[apic].mpc_apicid = i;
-               } else {
-                       printk(KERN_INFO 
-                              "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid);
-                       physid_set(mp_ioapics[apic].mpc_apicid, phys_id_present_map);
-               }
+               printk(KERN_INFO "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid);
 
 
                /*
@@ -1193,7 +1428,7 @@ static void __init setup_ioapic_ids_from_mpc (void)
                 * Read the right value from the MPC table and
                 * write it into the ID register.
                 */
-               printk(KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
+               apic_printk(APIC_VERBOSE,KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
                                mp_ioapics[apic].mpc_apicid);
 
                reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
@@ -1208,9 +1443,9 @@ static void __init setup_ioapic_ids_from_mpc (void)
                reg_00.raw = io_apic_read(apic, 0);
                spin_unlock_irqrestore(&ioapic_lock, flags);
                if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
-                       panic("could not set ID!\n");
+                       printk("could not set ID!\n");
                else
-                       printk(" ok.\n");
+                       apic_printk(APIC_VERBOSE," ok.\n");
        }
 }
 
@@ -1291,6 +1526,7 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq)
  */
 static void ack_edge_ioapic_irq(unsigned int irq)
 {
+       move_irq(irq);
        if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
                                        == (IRQ_PENDING | IRQ_DISABLED))
                mask_IO_APIC_irq(irq);
@@ -1320,81 +1556,11 @@ static unsigned int startup_level_ioapic_irq (unsigned int irq)
 
 static void end_level_ioapic_irq (unsigned int irq)
 {
-       unsigned long v;
-       int i;
-
-/*
- * It appears there is an erratum which affects at least version 0x11
- * of I/O APIC (that's the 82093AA and cores integrated into various
- * chipsets).  Under certain conditions a level-triggered interrupt is
- * erroneously delivered as edge-triggered one but the respective IRR
- * bit gets set nevertheless.  As a result the I/O unit expects an EOI
- * message but it will never arrive and further interrupts are blocked
- * from the source.  The exact reason is so far unknown, but the
- * phenomenon was observed when two consecutive interrupt requests
- * from a given source get delivered to the same CPU and the source is
- * temporarily disabled in between.
- *
- * A workaround is to simulate an EOI message manually.  We achieve it
- * by setting the trigger mode to edge and then to level when the edge
- * trigger mode gets detected in the TMR of a local APIC for a
- * level-triggered interrupt.  We mask the source for the time of the
- * operation to prevent an edge-triggered interrupt escaping meanwhile.
- * The idea is from Manfred Spraul.  --macro
- */
-       i = IO_APIC_VECTOR(irq);
-       v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
-
+       move_irq(irq);
        ack_APIC_irq();
-
-       if (!(v & (1 << (i & 0x1f)))) {
-#ifdef APIC_LOCKUP_DEBUG
-               struct irq_pin_list *entry;
-#endif
-
-#ifdef APIC_MISMATCH_DEBUG
-               atomic_inc(&irq_mis_count);
-#endif
-               spin_lock(&ioapic_lock);
-               __mask_and_edge_IO_APIC_irq(irq);
-#ifdef APIC_LOCKUP_DEBUG
-               for (entry = irq_2_pin + irq;;) {
-                       unsigned int reg;
-
-                       if (entry->pin == -1)
-                               break;
-                       reg = io_apic_read(entry->apic, 0x10 + entry->pin * 2);
-                       if (reg & 0x00004000)
-                               printk(KERN_CRIT "Aieee!!!  Remote IRR"
-                                       " still set after unlock!\n");
-                       if (!entry->next)
-                               break;
-                       entry = irq_2_pin + entry->next;
-               }
-#endif
-               __unmask_and_level_IO_APIC_irq(irq);
-               spin_unlock(&ioapic_lock);
-       }
-}
-
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
-{
-       unsigned long flags;
-       unsigned int dest;
-
-       dest = cpu_mask_to_apicid(mk_cpumask_const(mask));
-
-       /*
-        * Only the first 8 bits are valid.
-        */
-       dest = dest << 24;
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       __DO_ACTION(1, = dest, )
-       spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-#ifdef CONFIG_PCI_USE_VECTOR
+#ifdef CONFIG_PCI_MSI
 static unsigned int startup_edge_ioapic_vector(unsigned int vector)
 {
        int irq = vector_to_irq(vector);
@@ -1406,6 +1572,7 @@ static void ack_edge_ioapic_vector(unsigned int vector)
 {
        int irq = vector_to_irq(vector);
 
+       move_native_irq(vector);
        ack_edge_ioapic_irq(irq);
 }
 
@@ -1420,6 +1587,7 @@ static void end_level_ioapic_vector (unsigned int vector)
 {
        int irq = vector_to_irq(vector);
 
+       move_native_irq(vector);
        end_level_ioapic_irq(irq);
 }
 
@@ -1437,14 +1605,24 @@ static void unmask_IO_APIC_vector (unsigned int vector)
        unmask_IO_APIC_irq(irq);
 }
 
+#ifdef CONFIG_SMP
 static void set_ioapic_affinity_vector (unsigned int vector,
                                        cpumask_t cpu_mask)
 {
        int irq = vector_to_irq(vector);
 
+       set_native_irq_info(vector, cpu_mask);
        set_ioapic_affinity_irq(irq, cpu_mask);
 }
-#endif
+#endif // CONFIG_SMP
+#endif // CONFIG_PCI_MSI
+
+static int ioapic_retrigger(unsigned int irq)
+{
+       send_IPI_self(IO_APIC_VECTOR(irq));
+
+       return 1;
+}
 
 /*
  * Level and edge triggered IO-APIC interrupts need different handling,
@@ -1455,7 +1633,7 @@ static void set_ioapic_affinity_vector (unsigned int vector,
  * races.
  */
 
-static struct hw_interrupt_type ioapic_edge_type = {
+static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
        .typename = "IO-APIC-edge",
        .startup        = startup_edge_ioapic,
        .shutdown       = shutdown_edge_ioapic,
@@ -1463,10 +1641,13 @@ static struct hw_interrupt_type ioapic_edge_type = {
        .disable        = disable_edge_ioapic,
        .ack            = ack_edge_ioapic,
        .end            = end_edge_ioapic,
+#ifdef CONFIG_SMP
        .set_affinity = set_ioapic_affinity,
+#endif
+       .retrigger      = ioapic_retrigger,
 };
 
-static struct hw_interrupt_type ioapic_level_type = {
+static struct hw_interrupt_type ioapic_level_type __read_mostly = {
        .typename = "IO-APIC-level",
        .startup        = startup_level_ioapic,
        .shutdown       = shutdown_level_ioapic,
@@ -1474,7 +1655,10 @@ static struct hw_interrupt_type ioapic_level_type = {
        .disable        = disable_level_ioapic,
        .ack            = mask_and_ack_level_ioapic,
        .end            = end_level_ioapic,
+#ifdef CONFIG_SMP
        .set_affinity = set_ioapic_affinity,
+#endif
+       .retrigger      = ioapic_retrigger,
 };
 
 static inline void init_IO_APIC_traps(void)
@@ -1509,7 +1693,7 @@ static inline void init_IO_APIC_traps(void)
                                make_8259A_irq(irq);
                        else
                                /* Strange. Oh, well.. */
-                               irq_desc[irq].handler = &no_irq_type;
+                               irq_desc[irq].chip = &no_irq_type;
                }
        }
 }
@@ -1519,7 +1703,7 @@ static void enable_lapic_irq (unsigned int irq)
        unsigned long v;
 
        v = apic_read(APIC_LVT0);
-       apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+       apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
 }
 
 static void disable_lapic_irq (unsigned int irq)
@@ -1527,7 +1711,7 @@ static void disable_lapic_irq (unsigned int irq)
        unsigned long v;
 
        v = apic_read(APIC_LVT0);
-       apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+       apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
 }
 
 static void ack_lapic_irq (unsigned int irq)
@@ -1537,7 +1721,7 @@ static void ack_lapic_irq (unsigned int irq)
 
 static void end_lapic_irq (unsigned int i) { /* nothing */ }
 
-static struct hw_interrupt_type lapic_irq_type = {
+static struct hw_interrupt_type lapic_irq_type __read_mostly = {
        .typename = "local-APIC-edge",
        .startup = NULL, /* startup_irq() not used for IRQ0 */
        .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
@@ -1574,20 +1758,21 @@ static void setup_nmi (void)
  */
 static inline void unlock_ExtINT_logic(void)
 {
-       int pin, i;
+       int apic, pin, i;
        struct IO_APIC_route_entry entry0, entry1;
        unsigned char save_control, save_freq_select;
        unsigned long flags;
 
-       pin = find_isa_irq_pin(8, mp_INT);
+       pin  = find_isa_irq_pin(8, mp_INT);
+       apic = find_isa_irq_apic(8, mp_INT);
        if (pin == -1)
                return;
 
        spin_lock_irqsave(&ioapic_lock, flags);
-       *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin);
-       *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin);
+       *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+       *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
        spin_unlock_irqrestore(&ioapic_lock, flags);
-       clear_IO_APIC_pin(0, pin);
+       clear_IO_APIC_pin(apic, pin);
 
        memset(&entry1, 0, sizeof(entry1));
 
@@ -1600,8 +1785,8 @@ static inline void unlock_ExtINT_logic(void)
        entry1.vector = 0;
 
        spin_lock_irqsave(&ioapic_lock, flags);
-       io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
-       io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
+       io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
+       io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
        spin_unlock_irqrestore(&ioapic_lock, flags);
 
        save_control = CMOS_READ(RTC_CONTROL);
@@ -1619,23 +1804,27 @@ static inline void unlock_ExtINT_logic(void)
 
        CMOS_WRITE(save_control, RTC_CONTROL);
        CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
-       clear_IO_APIC_pin(0, pin);
+       clear_IO_APIC_pin(apic, pin);
 
        spin_lock_irqsave(&ioapic_lock, flags);
-       io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
-       io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
+       io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
+       io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
        spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
+int timer_uses_ioapic_pin_0;
+
 /*
  * This code may look a bit paranoid, but it's supposed to cooperate with
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
  * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
  * fanatically on his truly buggy board.
+ *
+ * FIXME: really need to revamp this for modern platforms only.
  */
 static inline void check_timer(void)
 {
-       int pin1, pin2;
+       int apic1, pin1, apic2, pin2;
        int vector;
 
        /*
@@ -1652,91 +1841,108 @@ static inline void check_timer(void)
         * the 8259A which implies the virtual wire has to be
         * disabled in the local APIC.
         */
-       apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+       apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
        init_8259A(1);
-       enable_8259A_irq(0);
+       if (timer_over_8254 > 0)
+               enable_8259A_irq(0);
+
+       pin1  = find_isa_irq_pin(0, mp_INT);
+       apic1 = find_isa_irq_apic(0, mp_INT);
+       pin2  = ioapic_i8259.pin;
+       apic2 = ioapic_i8259.apic;
 
-       pin1 = find_isa_irq_pin(0, mp_INT);
-       pin2 = find_isa_irq_pin(0, mp_ExtINT);
+       if (pin1 == 0)
+               timer_uses_ioapic_pin_0 = 1;
 
-       printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2);
+       apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
+               vector, apic1, pin1, apic2, pin2);
 
        if (pin1 != -1) {
                /*
                 * Ok, does IRQ0 through the IOAPIC work?
                 */
                unmask_IO_APIC_irq(0);
-               if (timer_irq_works()) {
+               if (!no_timer_check && timer_irq_works()) {
                        nmi_watchdog_default();
                        if (nmi_watchdog == NMI_IO_APIC) {
                                disable_8259A_irq(0);
                                setup_nmi();
                                enable_8259A_irq(0);
-                               check_nmi_watchdog();
                        }
+                       if (disable_timer_pin_1 > 0)
+                               clear_IO_APIC_pin(0, pin1);
                        return;
                }
-               clear_IO_APIC_pin(0, pin1);
-               printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
+               clear_IO_APIC_pin(apic1, pin1);
+               apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not "
+                               "connected to IO-APIC\n");
        }
 
-       printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
+       apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) "
+                               "through the 8259A ... ");
        if (pin2 != -1) {
-               printk("\n..... (found pin %d) ...", pin2);
+               apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...",
+                       apic2, pin2);
                /*
                 * legacy devices should be connected to IO APIC #0
                 */
-               setup_ExtINT_IRQ0_pin(pin2, vector);
+               setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
                if (timer_irq_works()) {
-                       printk("works.\n");
+                       apic_printk(APIC_VERBOSE," works.\n");
                        nmi_watchdog_default();
                        if (nmi_watchdog == NMI_IO_APIC) {
                                setup_nmi();
-                               check_nmi_watchdog();
                        }
                        return;
                }
                /*
                 * Cleanup, just in case ...
                 */
-               clear_IO_APIC_pin(0, pin2);
+               clear_IO_APIC_pin(apic2, pin2);
        }
-       printk(" failed.\n");
+       apic_printk(APIC_VERBOSE," failed.\n");
 
-       if (nmi_watchdog) {
+       if (nmi_watchdog == NMI_IO_APIC) {
                printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
                nmi_watchdog = 0;
        }
 
-       printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
+       apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
 
        disable_8259A_irq(0);
-       irq_desc[0].handler = &lapic_irq_type;
-       apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector);   /* Fixed mode */
+       irq_desc[0].chip = &lapic_irq_type;
+       apic_write(APIC_LVT0, APIC_DM_FIXED | vector);  /* Fixed mode */
        enable_8259A_irq(0);
 
        if (timer_irq_works()) {
-               printk(" works.\n");
+               apic_printk(APIC_VERBOSE," works.\n");
                return;
        }
-       apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
-       printk(" failed.\n");
+       apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
+       apic_printk(APIC_VERBOSE," failed.\n");
 
-       printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
+       apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ...");
 
        init_8259A(0);
        make_8259A_irq(0);
-       apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
+       apic_write(APIC_LVT0, APIC_DM_EXTINT);
 
        unlock_ExtINT_logic();
 
        if (timer_irq_works()) {
-               printk(" works.\n");
+               apic_printk(APIC_VERBOSE," works.\n");
                return;
        }
-       printk(" failed :(.\n");
-       panic("IO-APIC + timer doesn't work! pester mingo@redhat.com");
+       apic_printk(APIC_VERBOSE," failed :(.\n");
+       panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n");
+}
+
+static int __init notimercheck(char *s)
+{
+       no_timer_check = 1;
+       return 1;
 }
+__setup("no_timer_check", notimercheck);
 
 /*
  *
@@ -1756,7 +1962,7 @@ void __init setup_IO_APIC(void)
        else
                io_apic_irqs = ~PIC_IRQS;
 
-       printk("ENABLING IO-APIC IRQs\n");
+       apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
 
        /*
         * Set up the IO-APIC IRQ routing table.
@@ -1771,83 +1977,105 @@ void __init setup_IO_APIC(void)
                print_IO_APIC();
 }
 
-/* --------------------------------------------------------------------------
-                          ACPI-based IOAPIC Configuration
-   -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI_BOOT
-
-#define IO_APIC_MAX_ID         15
+struct sysfs_ioapic_data {
+       struct sys_device dev;
+       struct IO_APIC_route_entry entry[0];
+};
+static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
 
-int __init io_apic_get_unique_id (int ioapic, int apic_id)
+static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
 {
-       union IO_APIC_reg_00 reg_00;
-       static physid_mask_t apic_id_map;
+       struct IO_APIC_route_entry *entry;
+       struct sysfs_ioapic_data *data;
        unsigned long flags;
-       int i = 0;
-
-       /*
-        * The P4 platform supports up to 256 APIC IDs on two separate APIC 
-        * buses (one for LAPICs, one for IOAPICs), where predecessors only 
-        * supports up to 16 on one shared APIC bus.
-        * 
-        * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
-        *      advantage of new APIC bus architecture.
-        */
-
-       if (physids_empty(apic_id_map))
-               apic_id_map = phys_cpu_present_map;
+       int i;
 
+       data = container_of(dev, struct sysfs_ioapic_data, dev);
+       entry = data->entry;
        spin_lock_irqsave(&ioapic_lock, flags);
-       reg_00.raw = io_apic_read(ioapic, 0);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-
-       if (apic_id >= IO_APIC_MAX_ID) {
-               printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
-                       "%d\n", ioapic, apic_id, reg_00.bits.ID);
-               apic_id = reg_00.bits.ID;
+       for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+               *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
+               *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
        }
+       spin_unlock_irqrestore(&ioapic_lock, flags);
 
-       /*
-        * Every APIC in a system must have a unique ID or we get lots of nice 
-        * 'stuck on smp_invalidate_needed IPI wait' messages.
-        */
-       if (physid_isset(apic_id, apic_id_map)) {
+       return 0;
+}
 
-               for (i = 0; i < IO_APIC_MAX_ID; i++) {
-                       if (!physid_isset(i, apic_id_map))
-                               break;
-               }
+static int ioapic_resume(struct sys_device *dev)
+{
+       struct IO_APIC_route_entry *entry;
+       struct sysfs_ioapic_data *data;
+       unsigned long flags;
+       union IO_APIC_reg_00 reg_00;
+       int i;
 
-               if (i == IO_APIC_MAX_ID)
-                       panic("Max apic_id exceeded!\n");
+       data = container_of(dev, struct sysfs_ioapic_data, dev);
+       entry = data->entry;
 
-               printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
-                       "trying %d\n", ioapic, apic_id, i);
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_00.raw = io_apic_read(dev->id, 0);
+       if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
+               reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
+               io_apic_write(dev->id, 0, reg_00.raw);
+       }
+       for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+               io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
+               io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
+       }
+       spin_unlock_irqrestore(&ioapic_lock, flags);
 
-               apic_id = i;
-       } 
+       return 0;
+}
 
-       physid_set(apic_id, apic_id_map);
+static struct sysdev_class ioapic_sysdev_class = {
+       set_kset_name("ioapic"),
+       .suspend = ioapic_suspend,
+       .resume = ioapic_resume,
+};
 
-       if (reg_00.bits.ID != apic_id) {
-               reg_00.bits.ID = apic_id;
+static int __init ioapic_init_sysfs(void)
+{
+       struct sys_device * dev;
+       int i, size, error = 0;
+
+       error = sysdev_class_register(&ioapic_sysdev_class);
+       if (error)
+               return error;
+
+       for (i = 0; i < nr_ioapics; i++ ) {
+               size = sizeof(struct sys_device) + nr_ioapic_registers[i]
+                       * sizeof(struct IO_APIC_route_entry);
+               mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
+               if (!mp_ioapic_data[i]) {
+                       printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+                       continue;
+               }
+               memset(mp_ioapic_data[i], 0, size);
+               dev = &mp_ioapic_data[i]->dev;
+               dev->id = i;
+               dev->cls = &ioapic_sysdev_class;
+               error = sysdev_register(dev);
+               if (error) {
+                       kfree(mp_ioapic_data[i]);
+                       mp_ioapic_data[i] = NULL;
+                       printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+                       continue;
+               }
+       }
 
-               spin_lock_irqsave(&ioapic_lock, flags);
-               io_apic_write(ioapic, 0, reg_00.raw);
-               reg_00.raw = io_apic_read(ioapic, 0);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
+       return 0;
+}
 
-               /* Sanity check */
-               if (reg_00.bits.ID != apic_id)
-                       panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
-       }
+device_initcall(ioapic_init_sysfs);
 
-       printk(KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+/* --------------------------------------------------------------------------
+                          ACPI-based IOAPIC Configuration
+   -------------------------------------------------------------------------- */
 
-       return apic_id;
-}
+#ifdef CONFIG_ACPI
 
+#define IO_APIC_MAX_ID         0xFE
 
 int __init io_apic_get_version (int ioapic)
 {
@@ -1875,13 +2103,13 @@ int __init io_apic_get_redir_entries (int ioapic)
 }
 
 
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
+int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
 {
        struct IO_APIC_route_entry entry;
        unsigned long flags;
 
        if (!IO_APIC_IRQ(irq)) {
-               printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+               apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
                        ioapic);
                return -EINVAL;
        }
@@ -1894,13 +2122,14 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
 
        memset(&entry,0,sizeof(entry));
 
-       entry.delivery_mode = dest_LowestPrio;
-       entry.dest_mode = INT_DELIVERY_MODE;
-       entry.dest.logical.logical_dest = TARGET_CPUS;
-       entry.trigger = edge_level;
-       entry.polarity = active_high_low;
+       entry.delivery_mode = INT_DELIVERY_MODE;
+       entry.dest_mode = INT_DEST_MODE;
+       entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+       entry.trigger = triggering;
+       entry.polarity = polarity;
        entry.mask = 1;                                  /* Disabled (masked) */
 
+       irq = gsi_irq_sharing(irq);
        /*
         * IRQs < 16 are already in the irq_2_pin[] map
         */
@@ -1909,20 +2138,12 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
 
        entry.vector = assign_irq_vector(irq);
 
-       printk(KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
+       apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
                "IRQ %d Mode:%i Active:%i)\n", ioapic, 
               mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
-              edge_level, active_high_low);
-
-       if (use_pci_vector() && !platform_legacy_irq(irq))
-               irq = IO_APIC_VECTOR(irq);
-       if (edge_level) {
-               irq_desc[irq].handler = &ioapic_level_type;
-       } else {
-               irq_desc[irq].handler = &ioapic_edge_type;
-       }
+              triggering, polarity);
 
-       set_intr_gate(entry.vector, interrupt[irq]);
+       ioapic_register_intr(irq, entry.vector, triggering);
 
        if (!ioapic && (irq < 16))
                disable_8259A_irq(irq);
@@ -1930,27 +2151,37 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
        spin_lock_irqsave(&ioapic_lock, flags);
        io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
        io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
+       set_native_irq_info(use_pci_vector() ?  entry.vector : irq, TARGET_CPUS);
        spin_unlock_irqrestore(&ioapic_lock, flags);
 
        return 0;
 }
 
-#endif /*CONFIG_ACPI_BOOT*/
+#endif /* CONFIG_ACPI */
+
 
-#ifndef CONFIG_SMP
-void send_IPI_self(int vector)
+/*
+ * This function currently is only a helper for the i386 smp boot process where
+ * we need to reprogram the ioredtbls to cater for the cpus which have come online
+ * so mask in all cases should simply be TARGET_CPUS
+ */
+#ifdef CONFIG_SMP
+void __init setup_ioapic_dest(void)
 {
-       unsigned int cfg;
+       int pin, ioapic, irq, irq_entry;
 
-       /*
-        * Wait for idle.
-        */
-       apic_wait_icr_idle();
-       cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
+       if (skip_ioapic_setup == 1)
+               return;
 
-       /*
-        * Send the IPI. The write to APIC_ICR fires this off.
-        */
-       apic_write_around(APIC_ICR, cfg);
+       for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
+               for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+                       irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+                       if (irq_entry == -1)
+                               continue;
+                       irq = pin_2_irq(irq_entry, ioapic, pin);
+                       set_ioapic_affinity_irq(irq, TARGET_CPUS);
+               }
+
+       }
 }
 #endif