fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / arch / i386 / kernel / io_apic.c
index 92bf30a..2fcd448 100644 (file)
  */
 
 #include <linux/mm.h>
-#include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
-#include <linux/config.h>
 #include <linux/smp_lock.h>
 #include <linux/mc146818rtc.h>
 #include <linux/compiler.h>
 #include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/sysdev.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/htirq.h>
+#include <linux/freezer.h>
 
 #include <asm/io.h>
 #include <asm/smp.h>
 #include <asm/desc.h>
 #include <asm/timer.h>
+#include <asm/i8259.h>
+#include <asm/nmi.h>
+#include <asm/msidef.h>
+#include <asm/hypertransport.h>
 
 #include <mach_apic.h>
+#include <mach_apicdef.h>
 
 #include "io_ports.h"
 
-#undef APIC_LOCKUP_DEBUG
+int (*ioapic_renumber_irq)(int ioapic, int irq);
+atomic_t irq_mis_count;
 
-#define APIC_LOCKUP_DEBUG
+/* Where if anywhere is the i8259 connect in external int mode */
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
-static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_SPINLOCK(vector_lock);
+
+int timer_over_8254 __initdata = 1;
 
 /*
  *     Is the SiS APIC rmw bug present ?
@@ -58,6 +72,8 @@ int sis_apic_bug = -1;
  */
 int nr_ioapic_registers[MAX_IO_APICS];
 
+static int disable_timer_pin_1 __initdata;
+
 /*
  * Rough estimation of how many shared IRQs there are, can
  * be changed anytime.
@@ -76,20 +92,107 @@ static struct irq_pin_list {
        int apic, pin, next;
 } irq_2_pin[PIN_MAP_SIZE];
 
-int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1};
-#ifdef CONFIG_PCI_USE_VECTOR
-#define vector_to_irq(vector)  \
-       (platform_legacy_irq(vector) ? vector : vector_irq[vector])
-#else
-#define vector_to_irq(vector)  (vector)
-#endif
+struct io_apic {
+       unsigned int index;
+       unsigned int unused[3];
+       unsigned int data;
+};
+
+static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
+{
+       return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
+               + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
+}
+
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+       struct io_apic __iomem *io_apic = io_apic_base(apic);
+       writel(reg, &io_apic->index);
+       return readl(&io_apic->data);
+}
+
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+       struct io_apic __iomem *io_apic = io_apic_base(apic);
+       writel(reg, &io_apic->index);
+       writel(value, &io_apic->data);
+}
+
+/*
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ *
+ * Older SiS APIC requires we rewrite the index register
+ */
+static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
+{
+       volatile struct io_apic *io_apic = io_apic_base(apic);
+       if (sis_apic_bug)
+               writel(reg, &io_apic->index);
+       writel(value, &io_apic->data);
+}
+
+union entry_union {
+       struct { u32 w1, w2; };
+       struct IO_APIC_route_entry entry;
+};
+
+static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
+{
+       union entry_union eu;
+       unsigned long flags;
+       spin_lock_irqsave(&ioapic_lock, flags);
+       eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
+       eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+       return eu.entry;
+}
+
+/*
+ * When we write a new IO APIC routing entry, we need to write the high
+ * word first! If the mask bit in the low word is clear, we will enable
+ * the interrupt, and we need to make sure the entry is fully populated
+ * before that happens.
+ */
+static void
+__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+{
+       union entry_union eu;
+       eu.entry = e;
+       io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+       io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+}
+
+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+{
+       unsigned long flags;
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __ioapic_write_entry(apic, pin, e);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * When we mask an IO APIC routing entry, we need to write the low
+ * word first, in order to set the mask bit before we change the
+ * high bits!
+ */
+static void ioapic_mask_entry(int apic, int pin)
+{
+       unsigned long flags;
+       union entry_union eu = { .entry.mask = 1 };
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+       io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
 
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static void __init add_pin_to_irq(unsigned int irq, int apic, int pin)
+static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 {
        static int first_free_entry = NR_IRQS;
        struct irq_pin_list *entry = irq_2_pin + irq;
@@ -127,83 +230,47 @@ static void __init replace_pin_at_irq(unsigned int irq,
        }
 }
 
-/* mask = 1 */
-static void __mask_IO_APIC_irq (unsigned int irq)
+static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
 {
-       int pin;
        struct irq_pin_list *entry = irq_2_pin + irq;
+       unsigned int pin, reg;
 
        for (;;) {
-               unsigned int reg;
                pin = entry->pin;
                if (pin == -1)
                        break;
                reg = io_apic_read(entry->apic, 0x10 + pin*2);
-               io_apic_modify(entry->apic, 0x10 + pin*2, reg |= 0x00010000);
+               reg &= ~disable;
+               reg |= enable;
+               io_apic_modify(entry->apic, 0x10 + pin*2, reg);
                if (!entry->next)
                        break;
                entry = irq_2_pin + entry->next;
        }
-       io_apic_sync(entry->apic);
+}
+
+/* mask = 1 */
+static void __mask_IO_APIC_irq (unsigned int irq)
+{
+       __modify_IO_APIC_irq(irq, 0x00010000, 0);
 }
 
 /* mask = 0 */
 static void __unmask_IO_APIC_irq (unsigned int irq)
 {
-       int pin;
-       struct irq_pin_list *entry = irq_2_pin + irq;
-
-       for (;;) {
-               unsigned int reg;
-               pin = entry->pin;
-               if (pin == -1)
-                       break;
-               reg = io_apic_read(entry->apic, 0x10 + pin*2);
-               io_apic_modify(entry->apic, 0x10 + pin*2, reg &= 0xfffeffff);
-               if (!entry->next)
-                       break;
-               entry = irq_2_pin + entry->next;
-       }
+       __modify_IO_APIC_irq(irq, 0, 0x00010000);
 }
 
 /* mask = 1, trigger = 0 */
 static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
 {
-       int pin;
-       struct irq_pin_list *entry = irq_2_pin + irq;
-
-       for (;;) {
-               unsigned int reg;
-               pin = entry->pin;
-               if (pin == -1)
-                       break;
-               reg = io_apic_read(entry->apic, 0x10 + pin*2);
-               reg = (reg & 0xffff7fff) | 0x00010000;
-               io_apic_modify(entry->apic, 0x10 + pin*2, reg);
-               if (!entry->next)
-                       break;
-               entry = irq_2_pin + entry->next;
-       }
+       __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
 }
 
 /* mask = 0, trigger = 1 */
 static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
 {
-       int pin;
-       struct irq_pin_list *entry = irq_2_pin + irq;
-
-       for (;;) {
-               unsigned int reg;
-               pin = entry->pin;
-               if (pin == -1)
-                       break;
-               reg = io_apic_read(entry->apic, 0x10 + pin*2);
-               reg = (reg & 0xfffeffff) | 0x00008000;
-               io_apic_modify(entry->apic, 0x10 + pin*2, reg);
-               if (!entry->next)
-                       break;
-               entry = irq_2_pin + entry->next;
-       }
+       __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
 }
 
 static void mask_IO_APIC_irq (unsigned int irq)
@@ -224,28 +291,19 @@ static void unmask_IO_APIC_irq (unsigned int irq)
        spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 {
        struct IO_APIC_route_entry entry;
-       unsigned long flags;
        
        /* Check delivery_mode to be sure we're not clearing an SMI pin */
-       spin_lock_irqsave(&ioapic_lock, flags);
-       *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
-       *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       entry = ioapic_read_entry(apic, pin);
        if (entry.delivery_mode == dest_SMI)
                return;
 
        /*
         * Disable it in the IO-APIC irq-routing table:
         */
-       memset(&entry, 0, sizeof(entry));
-       entry.mask = 1;
-       spin_lock_irqsave(&ioapic_lock, flags);
-       io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
-       io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       ioapic_mask_entry(apic, pin);
 }
 
 static void clear_IO_APIC (void)
@@ -257,14 +315,22 @@ static void clear_IO_APIC (void)
                        clear_IO_APIC_pin(apic, pin);
 }
 
+#ifdef CONFIG_SMP
 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
 {
        unsigned long flags;
        int pin;
        struct irq_pin_list *entry = irq_2_pin + irq;
        unsigned int apicid_value;
+       cpumask_t tmp;
        
-       apicid_value = cpu_mask_to_apicid(mk_cpumask_const(cpumask));
+       cpus_and(tmp, cpumask, cpu_online_map);
+       if (cpus_empty(tmp))
+               tmp = TARGET_CPUS;
+
+       cpus_and(cpumask, tmp, CPU_MASK_ALL);
+
+       apicid_value = cpu_mask_to_apicid(cpumask);
        /* Prepare to do the io_apic_write */
        apicid_value = apicid_value << 24;
        spin_lock_irqsave(&ioapic_lock, flags);
@@ -277,6 +343,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
                        break;
                entry = irq_2_pin + entry->next;
        }
+       set_native_irq_info(irq, cpumask);
        spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
@@ -286,7 +353,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
 # include <linux/slab.h>               /* kmalloc() */
 # include <linux/timer.h>      /* time_after() */
  
-# ifdef CONFIG_BALANCED_IRQ_DEBUG
+#ifdef CONFIG_BALANCED_IRQ_DEBUG
 #  define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
 #  define Dprintk(x...) do { TDprintk(x); } while (0)
 # else
@@ -294,15 +361,17 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
 #  define Dprintk(x...) 
 # endif
 
-extern cpumask_t irq_affinity[NR_IRQS];
-
-cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS];
-
 #define IRQBALANCE_CHECK_ARCH -999
-static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
-static int physical_balance = 0;
+#define MAX_BALANCED_IRQ_INTERVAL      (5*HZ)
+#define MIN_BALANCED_IRQ_INTERVAL      (HZ/2)
+#define BALANCED_IRQ_MORE_DELTA                (HZ/10)
+#define BALANCED_IRQ_LESS_DELTA                (HZ)
 
-struct irq_cpu_info {
+static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
+static int physical_balance __read_mostly;
+static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
+
+static struct irq_cpu_info {
        unsigned long * last_irq;
        unsigned long * irq_delta;
        unsigned long irq;
@@ -313,19 +382,20 @@ struct irq_cpu_info {
 #define IRQ_DELTA(cpu,irq)     (irq_cpu_data[cpu].irq_delta[irq])
 
 #define IDLE_ENOUGH(cpu,now) \
-               (idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1))
+       (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
 
 #define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
 
-#define CPU_TO_PACKAGEINDEX(i) \
-               ((physical_balance && i > cpu_sibling_map[i]) ? cpu_sibling_map[i] : i)
+#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
 
-#define MAX_BALANCED_IRQ_INTERVAL      (5*HZ)
-#define MIN_BALANCED_IRQ_INTERVAL      (HZ/2)
-#define BALANCED_IRQ_MORE_DELTA                (HZ/10)
-#define BALANCED_IRQ_LESS_DELTA                (HZ)
+static cpumask_t balance_irq_affinity[NR_IRQS] = {
+       [0 ... NR_IRQS-1] = CPU_MASK_ALL
+};
 
-long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
+void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+       balance_irq_affinity[irq] = mask;
+}
 
 static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
                        unsigned long now, int direction)
@@ -363,15 +433,10 @@ static inline void balance_irq(int cpu, int irq)
        if (irqbalance_disabled)
                return; 
 
-       cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]);
+       cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
        new_cpu = move(cpu, allowed_mask, now, 1);
        if (cpu != new_cpu) {
-               irq_desc_t *desc = irq_desc + irq;
-               unsigned long flags;
-
-               spin_lock_irqsave(&desc->lock, flags);
-               pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu);
-               spin_unlock_irqrestore(&desc->lock, flags);
+               set_pending_irq(irq, cpumask_of_cpu(new_cpu));
        }
 }
 
@@ -379,8 +444,8 @@ static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
 {
        int i, j;
        Dprintk("Rotating IRQs among CPUs.\n");
-       for (i = 0; i < NR_CPUS; i++) {
-               for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) {
+       for_each_online_cpu(i) {
+               for (j = 0; j < NR_IRQS; j++) {
                        if (!irq_desc[j].action)
                                continue;
                        /* Is it a significant load ?  */
@@ -401,6 +466,7 @@ static void do_irq_balance(void)
        unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
        unsigned long move_this_load = 0;
        int max_loaded = 0, min_loaded = 0;
+       int load;
        unsigned long useful_load_threshold = balanced_irq_interval + 10;
        int selected_irq;
        int tmp_loaded, first_attempt = 1;
@@ -408,7 +474,7 @@ static void do_irq_balance(void)
        unsigned long imbalance = 0;
        cpumask_t allowed_mask, target_cpu_mask, tmp;
 
-       for (i = 0; i < NR_CPUS; i++) {
+       for_each_possible_cpu(i) {
                int package_index;
                CPU_IRQ(i) = 0;
                if (!cpu_online(i))
@@ -449,10 +515,8 @@ static void do_irq_balance(void)
                }
        }
        /* Find the least loaded processor package */
-       for (i = 0; i < NR_CPUS; i++) {
-               if (!cpu_online(i))
-                       continue;
-               if (physical_balance && i > cpu_sibling_map[i])
+       for_each_online_cpu(i) {
+               if (i != CPU_TO_PACKAGEINDEX(i))
                        continue;
                if (min_cpu_irq > CPU_IRQ(i)) {
                        min_cpu_irq = CPU_IRQ(i);
@@ -468,10 +532,8 @@ tryanothercpu:
         */
        tmp_cpu_irq = 0;
        tmp_loaded = -1;
-       for (i = 0; i < NR_CPUS; i++) {
-               if (!cpu_online(i))
-                       continue;
-               if (physical_balance && i > cpu_sibling_map[i])
+       for_each_online_cpu(i) {
+               if (i != CPU_TO_PACKAGEINDEX(i))
                        continue;
                if (max_cpu_irq <= CPU_IRQ(i)) 
                        continue;
@@ -551,25 +613,28 @@ tryanotherirq:
         * We seek the least loaded sibling by making the comparison
         * (A+B)/2 vs B
         */
-       if (physical_balance && (CPU_IRQ(min_loaded) >> 1) >
-                                       CPU_IRQ(cpu_sibling_map[min_loaded]))
-               min_loaded = cpu_sibling_map[min_loaded];
+       load = CPU_IRQ(min_loaded) >> 1;
+       for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
+               if (load > CPU_IRQ(j)) {
+                       /* This won't change cpu_sibling_map[min_loaded] */
+                       load = CPU_IRQ(j);
+                       min_loaded = j;
+               }
+       }
 
-       cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]);
+       cpus_and(allowed_mask,
+               cpu_online_map,
+               balance_irq_affinity[selected_irq]);
        target_cpu_mask = cpumask_of_cpu(min_loaded);
        cpus_and(tmp, target_cpu_mask, allowed_mask);
 
        if (!cpus_empty(tmp)) {
-               irq_desc_t *desc = irq_desc + selected_irq;
-               unsigned long flags;
 
                Dprintk("irq = %d moved to cpu = %d\n",
                                selected_irq, min_loaded);
                /* mark for change destination */
-               spin_lock_irqsave(&desc->lock, flags);
-               pending_irq_balance_cpumask[selected_irq] =
-                                       cpumask_of_cpu(min_loaded);
-               spin_unlock_irqrestore(&desc->lock, flags);
+               set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
+
                /* Since we made a change, come back sooner to 
                 * check for more variation.
                 */
@@ -590,7 +655,7 @@ not_worth_the_effort:
        return;
 }
 
-int balanced_irq(void *unused)
+static int balanced_irq(void *unused)
 {
        int i;
        unsigned long prev_balance_time = jiffies;
@@ -600,20 +665,23 @@ int balanced_irq(void *unused)
        
        /* push everything to CPU 0 to give us a starting point.  */
        for (i = 0 ; i < NR_IRQS ; i++) {
-               pending_irq_balance_cpumask[i] = cpumask_of_cpu(0);
-       }
-
-repeat:
-       set_current_state(TASK_INTERRUPTIBLE);
-       time_remaining = schedule_timeout(time_remaining);
-       if (time_after(jiffies, prev_balance_time+balanced_irq_interval)) {
-               Dprintk("balanced_irq: calling do_irq_balance() %lu\n",
-                                       jiffies);
-               do_irq_balance();
-               prev_balance_time = jiffies;
-               time_remaining = balanced_irq_interval;
+               irq_desc[i].pending_mask = cpumask_of_cpu(0);
+               set_pending_irq(i, cpumask_of_cpu(0));
+       }
+
+       for ( ; ; ) {
+               time_remaining = schedule_timeout_interruptible(time_remaining);
+               try_to_freeze();
+               if (time_after(jiffies,
+                               prev_balance_time+balanced_irq_interval)) {
+                       preempt_disable();
+                       do_irq_balance();
+                       prev_balance_time = jiffies;
+                       time_remaining = balanced_irq_interval;
+                       preempt_enable();
+               }
        }
-       goto repeat;
+       return 0;
 }
 
 static int __init balanced_irq_init(void)
@@ -642,9 +710,7 @@ static int __init balanced_irq_init(void)
        if (smp_num_siblings > 1 && !cpus_empty(tmp))
                physical_balance = 1;
 
-       for (i = 0; i < NR_CPUS; i++) {
-               if (!cpu_online(i))
-                       continue;
+       for_each_online_cpu(i) {
                irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
                irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
                if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
@@ -661,37 +727,26 @@ static int __init balanced_irq_init(void)
        else 
                printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
 failed:
-       for (i = 0; i < NR_CPUS; i++) {
-               if(irq_cpu_data[i].irq_delta)
-                       kfree(irq_cpu_data[i].irq_delta);
-               if(irq_cpu_data[i].last_irq)
-                       kfree(irq_cpu_data[i].last_irq);
+       for_each_possible_cpu(i) {
+               kfree(irq_cpu_data[i].irq_delta);
+               irq_cpu_data[i].irq_delta = NULL;
+               kfree(irq_cpu_data[i].last_irq);
+               irq_cpu_data[i].last_irq = NULL;
        }
        return 0;
 }
 
-static int __init irqbalance_disable(char *str)
+int __init irqbalance_disable(char *str)
 {
        irqbalance_disabled = 1;
-       return 0;
+       return 1;
 }
 
 __setup("noirqbalance", irqbalance_disable);
 
-static inline void move_irq(int irq)
-{
-       /* note - we hold the desc->lock */
-       if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) {
-               set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]);
-               cpus_clear(pending_irq_balance_cpumask[irq]);
-       }
-}
-
-__initcall(balanced_irq_init);
-
-#else /* !CONFIG_IRQBALANCE */
-static inline void move_irq(int irq) { }
+late_initcall(balanced_irq_init);
 #endif /* CONFIG_IRQBALANCE */
+#endif /* CONFIG_SMP */
 
 #ifndef CONFIG_SMP
 void fastcall send_IPI_self(int vector)
@@ -717,8 +772,8 @@ void fastcall send_IPI_self(int vector)
  */
 
 #define MAX_PIRQS 8
-int pirq_entries [MAX_PIRQS];
-int pirqs_enabled;
+static int pirq_entries [MAX_PIRQS];
+static int pirqs_enabled;
 int skip_ioapic_setup;
 
 static int __init ioapic_setup(char *str)
@@ -740,13 +795,15 @@ static int __init ioapic_pirq_setup(char *str)
                pirq_entries[i] = -1;
 
        pirqs_enabled = 1;
-       printk(KERN_INFO "PIRQ redirection, working around broken MP-BIOS.\n");
+       apic_printk(APIC_VERBOSE, KERN_INFO
+                       "PIRQ redirection, working around broken MP-BIOS.\n");
        max = MAX_PIRQS;
        if (ints[0] < MAX_PIRQS)
                max = ints[0];
 
        for (i = 0; i < max; i++) {
-               printk(KERN_DEBUG "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                               "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
                /*
                 * PIRQs are mapped upside down, usually.
                 */
@@ -760,7 +817,7 @@ __setup("pirq=", ioapic_pirq_setup);
 /*
  * Find the IRQ entry number of a certain pin.
  */
-static int __init find_irq_entry(int apic, int pin, int type)
+static int find_irq_entry(int apic, int pin, int type)
 {
        int i;
 
@@ -786,8 +843,7 @@ static int __init find_isa_irq_pin(int irq, int type)
 
                if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
                     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
-                    mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
-                    mp_bus_id_to_type[lbus] == MP_BUS_NEC98
+                    mp_bus_id_to_type[lbus] == MP_BUS_MCA
                    ) &&
                    (mp_irqs[i].mpc_irqtype == type) &&
                    (mp_irqs[i].mpc_srcbusirq == irq))
@@ -797,6 +853,32 @@ static int __init find_isa_irq_pin(int irq, int type)
        return -1;
 }
 
+static int __init find_isa_irq_apic(int irq, int type)
+{
+       int i;
+
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].mpc_srcbus;
+
+               if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_MCA
+                   ) &&
+                   (mp_irqs[i].mpc_irqtype == type) &&
+                   (mp_irqs[i].mpc_srcbusirq == irq))
+                       break;
+       }
+       if (i < mp_irq_entries) {
+               int apic;
+               for(apic = 0; apic < nr_ioapics; apic++) {
+                       if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
+                               return apic;
+               }
+       }
+
+       return -1;
+}
+
 /*
  * Find a specific PCI IRQ entry.
  * Not an __init, possibly needed by modules
@@ -807,8 +889,8 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 {
        int apic, i, best_guess = -1;
 
-       Dprintk("querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
-               bus, slot, pin);
+       apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
+               "slot:%d, pin:%d.\n", bus, slot, pin);
        if (mp_bus_id_to_pci_bus[bus] == -1) {
                printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
                return -1;
@@ -842,13 +924,15 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
        }
        return best_guess;
 }
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
 
 /*
  * This function currently is only a helper for the i386 smp boot process where 
  * we need to reprogram the ioredtbls to cater for the cpus which have come online
  * so mask in all cases should simply be TARGET_CPUS
  */
-void __init setup_ioapic_dest(cpumask_t mask)
+#ifdef CONFIG_SMP
+void __init setup_ioapic_dest(void)
 {
        int pin, ioapic, irq, irq_entry;
 
@@ -861,22 +945,24 @@ void __init setup_ioapic_dest(cpumask_t mask)
                        if (irq_entry == -1)
                                continue;
                        irq = pin_2_irq(irq_entry, ioapic, pin);
-                       set_ioapic_affinity_irq(irq, mask);
+                       set_ioapic_affinity_irq(irq, TARGET_CPUS);
                }
 
        }
 }
+#endif
 
 /*
  * EISA Edge/Level control register, ELCR
  */
-static int __init EISA_ELCR(unsigned int irq)
+static int EISA_ELCR(unsigned int irq)
 {
        if (irq < 16) {
                unsigned int port = 0x4d0 + (irq >> 3);
                return (inb(port) >> (irq & 7)) & 1;
        }
-       printk(KERN_INFO "Broken MPtable reports ISA irq %d\n", irq);
+       apic_printk(APIC_VERBOSE, KERN_INFO
+                       "Broken MPtable reports ISA irq %d\n", irq);
        return 0;
 }
 
@@ -906,12 +992,6 @@ static int __init EISA_ELCR(unsigned int irq)
 #define default_MCA_trigger(idx)       (1)
 #define default_MCA_polarity(idx)      (0)
 
-/* NEC98 interrupts are always polarity zero edge triggered,
- * when listed as conforming in the MP table. */
-
-#define default_NEC98_trigger(idx)     (0)
-#define default_NEC98_polarity(idx)    (0)
-
 static int __init MPBIOS_polarity(int idx)
 {
        int bus = mp_irqs[idx].mpc_srcbus;
@@ -946,11 +1026,6 @@ static int __init MPBIOS_polarity(int idx)
                                        polarity = default_MCA_polarity(idx);
                                        break;
                                }
-                               case MP_BUS_NEC98: /* NEC 98 pin */
-                               {
-                                       polarity = default_NEC98_polarity(idx);
-                                       break;
-                               }
                                default:
                                {
                                        printk(KERN_WARNING "broken BIOS!!\n");
@@ -986,7 +1061,7 @@ static int __init MPBIOS_polarity(int idx)
        return polarity;
 }
 
-static int __init MPBIOS_trigger(int idx)
+static int MPBIOS_trigger(int idx)
 {
        int bus = mp_irqs[idx].mpc_srcbus;
        int trigger;
@@ -1020,11 +1095,6 @@ static int __init MPBIOS_trigger(int idx)
                                        trigger = default_MCA_trigger(idx);
                                        break;
                                }
-                               case MP_BUS_NEC98: /* NEC 98 pin */
-                               {
-                                       trigger = default_NEC98_trigger(idx);
-                                       break;
-                               }
                                default:
                                {
                                        printk(KERN_WARNING "broken BIOS!!\n");
@@ -1086,7 +1156,6 @@ static int pin_2_irq(int idx, int apic, int pin)
                case MP_BUS_ISA: /* ISA pin */
                case MP_BUS_EISA:
                case MP_BUS_MCA:
-               case MP_BUS_NEC98:
                {
                        irq = mp_irqs[idx].mpc_srcbusirq;
                        break;
@@ -1100,8 +1169,13 @@ static int pin_2_irq(int idx, int apic, int pin)
                        while (i < apic)
                                irq += nr_ioapic_registers[i++];
                        irq += pin;
-                       if ((!apic) && (irq < 16)) 
-                               irq += 16;
+
+                       /*
+                        * For MPS mode, so far only needed by ES7000 platform
+                        */
+                       if (ioapic_renumber_irq)
+                               irq = ioapic_renumber_irq(apic, irq);
+
                        break;
                }
                default:
@@ -1118,10 +1192,12 @@ static int pin_2_irq(int idx, int apic, int pin)
        if ((pin >= 16) && (pin <= 23)) {
                if (pirq_entries[pin-16] != -1) {
                        if (!pirq_entries[pin-16]) {
-                               printk(KERN_DEBUG "disabling PIRQ%d\n", pin-16);
+                               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                                               "disabling PIRQ%d\n", pin-16);
                        } else {
                                irq = pirq_entries[pin-16];
-                               printk(KERN_DEBUG "using PIRQ%d -> IRQ %d\n",
+                               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                                               "using PIRQ%d -> IRQ %d\n",
                                                pin-16, irq);
                        }
                }
@@ -1147,71 +1223,79 @@ static inline int IO_APIC_irq_trigger(int irq)
 }
 
 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 };
+static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
 
-#ifdef CONFIG_PCI_USE_VECTOR
-int assign_irq_vector(int irq)
-#else
-int __init assign_irq_vector(int irq)
-#endif
+static int __assign_irq_vector(int irq)
 {
-       static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
+       static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
+       int vector, offset, i;
+
+       BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
+
+       if (irq_vector[irq] > 0)
+               return irq_vector[irq];
 
-       BUG_ON(irq >= NR_IRQ_VECTORS);
-       if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
-               return IO_APIC_VECTOR(irq);
+       vector = current_vector;
+       offset = current_offset;
 next:
-       current_vector += 8;
-       if (current_vector == SYSCALL_VECTOR)
+       vector += 8;
+       if (vector >= FIRST_SYSTEM_VECTOR) {
+               offset = (offset + 1) % 8;
+               vector = FIRST_DEVICE_VECTOR + offset;
+       }
+       if (vector == current_vector)
+               return -ENOSPC;
+       if (vector == SYSCALL_VECTOR)
                goto next;
+       for (i = 0; i < NR_IRQ_VECTORS; i++)
+               if (irq_vector[i] == vector)
+                       goto next;
 
-       if (current_vector >= FIRST_SYSTEM_VECTOR) {
-               offset++;
-               if (!(offset%8))
-                       return -ENOSPC;
-               current_vector = FIRST_DEVICE_VECTOR + offset;
-       }
-
-       vector_irq[current_vector] = irq;
-       if (irq != AUTO_ASSIGN)
-               IO_APIC_VECTOR(irq) = current_vector;
+       current_vector = vector;
+       current_offset = offset;
+       irq_vector[irq] = vector;
 
-       return current_vector;
+       return vector;
 }
 
-static struct hw_interrupt_type ioapic_level_type;
-static struct hw_interrupt_type ioapic_edge_type;
+static int assign_irq_vector(int irq)
+{
+       unsigned long flags;
+       int vector;
+
+       spin_lock_irqsave(&vector_lock, flags);
+       vector = __assign_irq_vector(irq);
+       spin_unlock_irqrestore(&vector_lock, flags);
+
+       return vector;
+}
+static struct irq_chip ioapic_chip;
 
 #define IOAPIC_AUTO    -1
 #define IOAPIC_EDGE    0
 #define IOAPIC_LEVEL   1
 
-static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
+static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 {
-       if (use_pci_vector() && !platform_legacy_irq(irq)) {
-               if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-                               trigger == IOAPIC_LEVEL)
-                       irq_desc[vector].handler = &ioapic_level_type;
-               else
-                       irq_desc[vector].handler = &ioapic_edge_type;
-               set_intr_gate(vector, interrupt[vector]);
-       } else  {
-               if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-                               trigger == IOAPIC_LEVEL)
-                       irq_desc[irq].handler = &ioapic_level_type;
-               else
-                       irq_desc[irq].handler = &ioapic_edge_type;
-               set_intr_gate(vector, interrupt[irq]);
+       if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+                       trigger == IOAPIC_LEVEL)
+               set_irq_chip_and_handler_name(irq, &ioapic_chip,
+                                        handle_fasteoi_irq, "fasteoi");
+       else {
+               irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
+               set_irq_chip_and_handler_name(irq, &ioapic_chip,
+                                        handle_edge_irq, "edge");
        }
+       set_intr_gate(vector, interrupt[irq]);
 }
 
-void __init setup_IO_APIC_irqs(void)
+static void __init setup_IO_APIC_irqs(void)
 {
        struct IO_APIC_route_entry entry;
        int apic, pin, idx, irq, first_notcon = 1, vector;
        unsigned long flags;
 
-       printk(KERN_DEBUG "init IO_APIC IRQs\n");
+       apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
        for (apic = 0; apic < nr_ioapics; apic++) {
        for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
@@ -1230,10 +1314,14 @@ void __init setup_IO_APIC_irqs(void)
                idx = find_irq_entry(apic,pin,mp_INT);
                if (idx == -1) {
                        if (first_notcon) {
-                               printk(KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+                               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                                               " IO-APIC (apicid-pin) %d-%d",
+                                               mp_ioapics[apic].mpc_apicid,
+                                               pin);
                                first_notcon = 0;
                        } else
-                               printk(", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+                               apic_printk(APIC_VERBOSE, ", %d-%d",
+                                       mp_ioapics[apic].mpc_apicid, pin);
                        continue;
                }
 
@@ -1267,23 +1355,22 @@ void __init setup_IO_APIC_irqs(void)
                                disable_8259A_irq(irq);
                }
                spin_lock_irqsave(&ioapic_lock, flags);
-               io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
-               io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+               __ioapic_write_entry(apic, pin, entry);
+               set_native_irq_info(irq, TARGET_CPUS);
                spin_unlock_irqrestore(&ioapic_lock, flags);
        }
        }
 
        if (!first_notcon)
-               printk(" not connected.\n");
+               apic_printk(APIC_VERBOSE, " not connected.\n");
 }
 
 /*
  * Set up the 8259A-master output pin:
  */
-void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
+static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
 {
        struct IO_APIC_route_entry entry;
-       unsigned long flags;
 
        memset(&entry,0,sizeof(entry));
 
@@ -1308,15 +1395,13 @@ void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
         * The timer IRQ doesn't have to know that behind the
         * scene we have a 8259A-master in AEOI mode ...
         */
-       irq_desc[0].handler = &ioapic_edge_type;
+       irq_desc[0].chip = &ioapic_chip;
+       set_irq_handler(0, handle_edge_irq);
 
        /*
         * Add it to the IO-APIC irq-routing table:
         */
-       spin_lock_irqsave(&ioapic_lock, flags);
-       io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
-       io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       ioapic_write_entry(apic, pin, entry);
 
        enable_8259A_irq(0);
 }
@@ -1334,6 +1419,9 @@ void __init print_IO_APIC(void)
        union IO_APIC_reg_03 reg_03;
        unsigned long flags;
 
+       if (apic_verbosity == APIC_QUIET)
+               return;
+
        printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
        for (i = 0; i < nr_ioapics; i++)
                printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
@@ -1361,7 +1449,7 @@ void __init print_IO_APIC(void)
        printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
        printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
        printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
-       if (reg_00.bits.ID >= APIC_BROADCAST_ID)
+       if (reg_00.bits.ID >= get_physical_broadcast())
                UNEXPECTED_IO_APIC();
        if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
                UNEXPECTED_IO_APIC();
@@ -1423,10 +1511,7 @@ void __init print_IO_APIC(void)
        for (i = 0; i <= reg_01.bits.entries; i++) {
                struct IO_APIC_route_entry entry;
 
-               spin_lock_irqsave(&ioapic_lock, flags);
-               *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
-               *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
-               spin_unlock_irqrestore(&ioapic_lock, flags);
+               entry = ioapic_read_entry(apic, i);
 
                printk(KERN_DEBUG " %02x %03X %02X  ",
                        i,
@@ -1466,11 +1551,16 @@ void __init print_IO_APIC(void)
        return;
 }
 
+#if 0
+
 static void print_APIC_bitfield (int base)
 {
        unsigned int v;
        int i, j;
 
+       if (apic_verbosity == APIC_QUIET)
+               return;
+
        printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
        for (i = 0; i < 8; i++) {
                v = apic_read(base + i*0x10);
@@ -1488,6 +1578,9 @@ void /*__init*/ print_local_APIC(void * dummy)
 {
        unsigned int v, ver, maxlvt;
 
+       if (apic_verbosity == APIC_QUIET)
+               return;
+
        printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
                smp_processor_id(), hard_smp_processor_id());
        v = apic_read(APIC_ID);
@@ -1571,10 +1664,12 @@ void print_all_local_APICs (void)
 
 void /*__init*/ print_PIC(void)
 {
-       extern spinlock_t i8259A_lock;
        unsigned int v;
        unsigned long flags;
 
+       if (apic_verbosity == APIC_QUIET)
+               return;
+
        printk(KERN_DEBUG "\nprinting PIC contents\n");
 
        spin_lock_irqsave(&i8259A_lock, flags);
@@ -1599,10 +1694,13 @@ void /*__init*/ print_PIC(void)
        printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
 }
 
+#endif  /*  0  */
+
 static void __init enable_IO_APIC(void)
 {
        union IO_APIC_reg_01 reg_01;
-       int i;
+       int i8259_apic, i8259_pin;
+       int i, apic;
        unsigned long flags;
 
        for (i = 0; i < PIN_MAP_SIZE; i++) {
@@ -1616,11 +1714,49 @@ static void __init enable_IO_APIC(void)
        /*
         * The number of IO-APIC IRQ registers (== #pins):
         */
-       for (i = 0; i < nr_ioapics; i++) {
+       for (apic = 0; apic < nr_ioapics; apic++) {
                spin_lock_irqsave(&ioapic_lock, flags);
-               reg_01.raw = io_apic_read(i, 1);
+               reg_01.raw = io_apic_read(apic, 1);
                spin_unlock_irqrestore(&ioapic_lock, flags);
-               nr_ioapic_registers[i] = reg_01.bits.entries+1;
+               nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+       }
+       for(apic = 0; apic < nr_ioapics; apic++) {
+               int pin;
+               /* See if any of the pins is in ExtINT mode */
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       struct IO_APIC_route_entry entry;
+                       entry = ioapic_read_entry(apic, pin);
+
+
+                       /* If the interrupt line is enabled and in ExtInt mode
+                        * I have found the pin where the i8259 is connected.
+                        */
+                       if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
+                               ioapic_i8259.apic = apic;
+                               ioapic_i8259.pin  = pin;
+                               goto found_i8259;
+                       }
+               }
+       }
+ found_i8259:
+       /* Look to see what if the MP table has reported the ExtINT */
+       /* If we could not find the appropriate pin by looking at the ioapic
+        * the i8259 probably is not connected the ioapic but give the
+        * mptable a chance anyway.
+        */
+       i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
+       i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
+       /* Trust the MP table if nothing is setup in the hardware */
+       if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
+               printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
+               ioapic_i8259.pin  = i8259_pin;
+               ioapic_i8259.apic = i8259_apic;
+       }
+       /* Complain if the MP table and the hardware disagree */
+       if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
+               (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
+       {
+               printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
        }
 
        /*
@@ -1639,7 +1775,32 @@ void disable_IO_APIC(void)
         */
        clear_IO_APIC();
 
-       disconnect_bsp_APIC();
+       /*
+        * If the i8259 is routed through an IOAPIC
+        * Put that IOAPIC in virtual wire mode
+        * so legacy interrupts can be delivered.
+        */
+       if (ioapic_i8259.pin != -1) {
+               struct IO_APIC_route_entry entry;
+
+               memset(&entry, 0, sizeof(entry));
+               entry.mask            = 0; /* Enabled */
+               entry.trigger         = 0; /* Edge */
+               entry.irr             = 0;
+               entry.polarity        = 0; /* High */
+               entry.delivery_status = 0;
+               entry.dest_mode       = 0; /* Physical */
+               entry.delivery_mode   = dest_ExtINT; /* ExtInt */
+               entry.vector          = 0;
+               entry.dest.physical.physical_dest =
+                                       GET_APIC_ID(apic_read(APIC_ID));
+
+               /*
+                * Add it to the IO-APIC irq-routing table:
+                */
+               ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
+       }
+       disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 }
 
 /*
@@ -1659,6 +1820,13 @@ static void __init setup_ioapic_ids_from_mpc(void)
        unsigned char old_id;
        unsigned long flags;
 
+       /*
+        * Don't check I/O APIC IDs for xAPIC systems.  They have
+        * no meaning without the serial APIC bus.
+        */
+       if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+               || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+               return;
        /*
         * This is broken; anything with a real cpu count has to
         * circumvent this idiocy regardless.
@@ -1677,7 +1845,7 @@ static void __init setup_ioapic_ids_from_mpc(void)
                
                old_id = mp_ioapics[apic].mpc_apicid;
 
-               if (mp_ioapics[apic].mpc_apicid >= APIC_BROADCAST_ID) {
+               if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
                        printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
                                apic, mp_ioapics[apic].mpc_apicid);
                        printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
@@ -1685,10 +1853,6 @@ static void __init setup_ioapic_ids_from_mpc(void)
                        mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
                }
 
-               /* Don't check I/O APIC IDs for some xAPIC systems.  They have
-                * no meaning without the serial APIC bus. */
-               if (NO_IOAPIC_CHECK)
-                       continue;
                /*
                 * Sanity check, is the ID really free? Every APIC in a
                 * system must have a unique ID or we get lots of nice
@@ -1698,10 +1862,10 @@ static void __init setup_ioapic_ids_from_mpc(void)
                                        mp_ioapics[apic].mpc_apicid)) {
                        printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
                                apic, mp_ioapics[apic].mpc_apicid);
-                       for (i = 0; i < APIC_BROADCAST_ID; i++)
+                       for (i = 0; i < get_physical_broadcast(); i++)
                                if (!physid_isset(i, phys_id_present_map))
                                        break;
-                       if (i >= APIC_BROADCAST_ID)
+                       if (i >= get_physical_broadcast())
                                panic("Max APIC ID exceeded!\n");
                        printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
                                i);
@@ -1710,7 +1874,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
                } else {
                        physid_mask_t tmp;
                        tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
-                       printk("Setting %d in the phys_id_present_map\n", mp_ioapics[apic].mpc_apicid);
+                       apic_printk(APIC_VERBOSE, "Setting %d in the "
+                                       "phys_id_present_map\n",
+                                       mp_ioapics[apic].mpc_apicid);
                        physids_or(phys_id_present_map, phys_id_present_map, tmp);
                }
 
@@ -1729,8 +1895,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
                 * Read the right value from the MPC table and
                 * write it into the ID register.
                 */
-               printk(KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
-                                       mp_ioapics[apic].mpc_apicid);
+               apic_printk(APIC_VERBOSE, KERN_INFO
+                       "...changing IO-APIC physical APIC ID to %d ...",
+                       mp_ioapics[apic].mpc_apicid);
 
                reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
                spin_lock_irqsave(&ioapic_lock, flags);
@@ -1744,15 +1911,24 @@ static void __init setup_ioapic_ids_from_mpc(void)
                reg_00.raw = io_apic_read(apic, 0);
                spin_unlock_irqrestore(&ioapic_lock, flags);
                if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
-                       panic("could not set ID!\n");
+                       printk("could not set ID!\n");
                else
-                       printk(" ok.\n");
+                       apic_printk(APIC_VERBOSE, " ok.\n");
        }
 }
 #else
 static void __init setup_ioapic_ids_from_mpc(void) { }
 #endif
 
+static int no_timer_check __initdata;
+
+static int __init notimercheck(char *s)
+{
+       no_timer_check = 1;
+       return 1;
+}
+__setup("no_timer_check", notimercheck);
+
 /*
  * There is a nasty bug in some older SMP boards, their mptable lies
  * about the timer IRQ. We do the following to work around the situation:
@@ -1761,10 +1937,13 @@ static void __init setup_ioapic_ids_from_mpc(void) { }
  *     - if this function detects that timer IRQs are defunct, then we fall
  *       back to ISA timer IRQs
  */
-static int __init timer_irq_works(void)
+int __init timer_irq_works(void)
 {
        unsigned long t1 = jiffies;
 
+       if (no_timer_check)
+               return 1;
+
        local_irq_enable();
        /* Let ten ticks pass... */
        mdelay((10 * 1000) / HZ);
@@ -1796,6 +1975,8 @@ static int __init timer_irq_works(void)
  */
 
 /*
+ * Startup quirk:
+ *
  * Starting up a edge-triggered IO-APIC interrupt is
  * nasty - we need to make sure that we get the edge.
  * If it is already asserted for some reason, we need
@@ -1803,8 +1984,10 @@ static int __init timer_irq_works(void)
  *
  * This is not complete - we should be able to fake
  * an edge even if it isn't on the 8259A...
+ *
+ * (We do this for level-triggered IRQs too - it cannot hurt.)
  */
-static unsigned int startup_edge_ioapic_irq(unsigned int irq)
+static unsigned int startup_ioapic_irq(unsigned int irq)
 {
        int was_pending = 0;
        unsigned long flags;
@@ -1821,47 +2004,18 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq)
        return was_pending;
 }
 
-/*
- * Once we have recorded IRQ_PENDING already, we can mask the
- * interrupt for real. This prevents IRQ storms from unhandled
- * devices.
- */
-static void ack_edge_ioapic_irq(unsigned int irq)
+static void ack_ioapic_irq(unsigned int irq)
 {
-       move_irq(irq);
-       if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
-                                       == (IRQ_PENDING | IRQ_DISABLED))
-               mask_IO_APIC_irq(irq);
+       move_native_irq(irq);
        ack_APIC_irq();
 }
 
-/*
- * Level triggered interrupts can just be masked,
- * and shutting down and starting up the interrupt
- * is the same as enabling and disabling them -- except
- * with a startup need to return a "was pending" value.
- *
- * Level triggered interrupts are special because we
- * do not touch any IO-APIC register while handling
- * them. We ack the APIC in the end-IRQ handler, not
- * in the start-IRQ-handler. Protection against reentrance
- * from the same interrupt is still provided, both by the
- * generic IRQ layer and by the fact that an unacked local
- * APIC does not accept IRQs.
- */
-static unsigned int startup_level_ioapic_irq (unsigned int irq)
-{
-       unmask_IO_APIC_irq(irq);
-
-       return 0; /* don't check for pending */
-}
-
-static void end_level_ioapic_irq (unsigned int irq)
+static void ack_ioapic_quirk_irq(unsigned int irq)
 {
        unsigned long v;
        int i;
 
-       move_irq(irq);
+       move_native_irq(irq);
 /*
  * It appears there is an erratum which affects at least version 0x11
  * of I/O APIC (that's the 82093AA and cores integrated into various
@@ -1881,123 +2035,41 @@ static void end_level_ioapic_irq (unsigned int irq)
  * operation to prevent an edge-triggered interrupt escaping meanwhile.
  * The idea is from Manfred Spraul.  --macro
  */
-       i = IO_APIC_VECTOR(irq);
+       i = irq_vector[irq];
 
        v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
 
        ack_APIC_irq();
 
        if (!(v & (1 << (i & 0x1f)))) {
-#ifdef APIC_LOCKUP_DEBUG
-               struct irq_pin_list *entry;
-#endif
-
-#ifdef APIC_MISMATCH_DEBUG
                atomic_inc(&irq_mis_count);
-#endif
                spin_lock(&ioapic_lock);
                __mask_and_edge_IO_APIC_irq(irq);
-#ifdef APIC_LOCKUP_DEBUG
-               for (entry = irq_2_pin + irq;;) {
-                       unsigned int reg;
-
-                       if (entry->pin == -1)
-                               break;
-                       reg = io_apic_read(entry->apic, 0x10 + entry->pin * 2);
-                       if (reg & 0x00004000)
-                               printk(KERN_CRIT "Aieee!!!  Remote IRR"
-                                       " still set after unlock!\n");
-                       if (!entry->next)
-                               break;
-                       entry = irq_2_pin + entry->next;
-               }
-#endif
                __unmask_and_level_IO_APIC_irq(irq);
                spin_unlock(&ioapic_lock);
        }
 }
 
-#ifdef CONFIG_PCI_USE_VECTOR
-static unsigned int startup_edge_ioapic_vector(unsigned int vector)
-{
-       int irq = vector_to_irq(vector);
-
-       return startup_edge_ioapic_irq(irq);
-}
-
-static void ack_edge_ioapic_vector(unsigned int vector)
-{
-       int irq = vector_to_irq(vector);
-
-       ack_edge_ioapic_irq(irq);
-}
-
-static unsigned int startup_level_ioapic_vector (unsigned int vector)
+static int ioapic_retrigger_irq(unsigned int irq)
 {
-       int irq = vector_to_irq(vector);
-
-       return startup_level_ioapic_irq (irq);
-}
+       send_IPI_self(irq_vector[irq]);
 
-static void end_level_ioapic_vector (unsigned int vector)
-{
-       int irq = vector_to_irq(vector);
-
-       end_level_ioapic_irq(irq);
-}
-
-static void mask_IO_APIC_vector (unsigned int vector)
-{
-       int irq = vector_to_irq(vector);
-
-       mask_IO_APIC_irq(irq);
-}
-
-static void unmask_IO_APIC_vector (unsigned int vector)
-{
-       int irq = vector_to_irq(vector);
-
-       unmask_IO_APIC_irq(irq);
+       return 1;
 }
 
-static void set_ioapic_affinity_vector (unsigned int vector,
-                                       cpumask_t cpu_mask)
-{
-       int irq = vector_to_irq(vector);
-
-       set_ioapic_affinity_irq(irq, cpu_mask);
-}
+static struct irq_chip ioapic_chip __read_mostly = {
+       .name           = "IO-APIC",
+       .startup        = startup_ioapic_irq,
+       .mask           = mask_IO_APIC_irq,
+       .unmask         = unmask_IO_APIC_irq,
+       .ack            = ack_ioapic_irq,
+       .eoi            = ack_ioapic_quirk_irq,
+#ifdef CONFIG_SMP
+       .set_affinity   = set_ioapic_affinity_irq,
 #endif
-
-/*
- * Level and edge triggered IO-APIC interrupts need different handling,
- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
- * handled with the level-triggered descriptor, but that one has slightly
- * more overhead. Level-triggered interrupts cannot be handled with the
- * edge-triggered handler, without risking IRQ storms and other ugly
- * races.
- */
-static struct hw_interrupt_type ioapic_edge_type = {
-       .typename       = "IO-APIC-edge",
-       .startup        = startup_edge_ioapic,
-       .shutdown       = shutdown_edge_ioapic,
-       .enable         = enable_edge_ioapic,
-       .disable        = disable_edge_ioapic,
-       .ack            = ack_edge_ioapic,
-       .end            = end_edge_ioapic,
-       .set_affinity   = set_ioapic_affinity,
+       .retrigger      = ioapic_retrigger_irq,
 };
 
-static struct hw_interrupt_type ioapic_level_type = {
-       .typename       = "IO-APIC-level",
-       .startup        = startup_level_ioapic,
-       .shutdown       = shutdown_level_ioapic,
-       .enable         = enable_level_ioapic,
-       .disable        = disable_level_ioapic,
-       .ack            = mask_and_ack_level_ioapic,
-       .end            = end_level_ioapic,
-       .set_affinity   = set_ioapic_affinity,
-};
 
 static inline void init_IO_APIC_traps(void)
 {
@@ -2016,12 +2088,7 @@ static inline void init_IO_APIC_traps(void)
         */
        for (irq = 0; irq < NR_IRQS ; irq++) {
                int tmp = irq;
-               if (use_pci_vector()) {
-                       if (!platform_legacy_irq(tmp))
-                               if ((tmp = vector_to_irq(tmp)) == -1)
-                                       continue;
-               }
-               if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
+               if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
                        /*
                         * Hmm.. We don't have an entry for this,
                         * so default to an old-fashioned 8259
@@ -2031,20 +2098,21 @@ static inline void init_IO_APIC_traps(void)
                                make_8259A_irq(irq);
                        else
                                /* Strange. Oh, well.. */
-                               irq_desc[irq].handler = &no_irq_type;
+                               irq_desc[irq].chip = &no_irq_chip;
                }
        }
 }
 
-static void enable_lapic_irq (unsigned int irq)
-{
-       unsigned long v;
+/*
+ * The local APIC irq-chip implementation:
+ */
 
-       v = apic_read(APIC_LVT0);
-       apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+static void ack_apic(unsigned int irq)
+{
+       ack_APIC_irq();
 }
 
-static void disable_lapic_irq (unsigned int irq)
+static void mask_lapic_irq (unsigned int irq)
 {
        unsigned long v;
 
@@ -2052,21 +2120,19 @@ static void disable_lapic_irq (unsigned int irq)
        apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
 }
 
-static void ack_lapic_irq (unsigned int irq)
+static void unmask_lapic_irq (unsigned int irq)
 {
-       ack_APIC_irq();
-}
+       unsigned long v;
 
-static void end_lapic_irq (unsigned int i) { /* nothing */ }
+       v = apic_read(APIC_LVT0);
+       apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+}
 
-static struct hw_interrupt_type lapic_irq_type = {
-       .typename       = "local-APIC-edge",
-       .startup        = NULL, /* startup_irq() not used for IRQ0 */
-       .shutdown       = NULL, /* shutdown_irq() not used for IRQ0 */
-       .enable         = enable_lapic_irq,
-       .disable        = disable_lapic_irq,
-       .ack            = ack_lapic_irq,
-       .end            = end_lapic_irq
+static struct irq_chip lapic_chip __read_mostly = {
+       .name           = "local-APIC-edge",
+       .mask           = mask_lapic_irq,
+       .unmask         = unmask_lapic_irq,
+       .eoi            = ack_apic,
 };
 
 static void setup_nmi (void)
@@ -2080,11 +2146,11 @@ static void setup_nmi (void)
         * is from Maciej W. Rozycki - so we do not have to EOI from
         * the NMI handler or the timer interrupt.
         */ 
-       printk(KERN_INFO "activating NMI Watchdog ...");
+       apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
 
        on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1);
 
-       printk(" done.\n");
+       apic_printk(APIC_VERBOSE, " done.\n");
 }
 
 /*
@@ -2096,20 +2162,23 @@ static void setup_nmi (void)
  */
 static inline void unlock_ExtINT_logic(void)
 {
-       int pin, i;
+       int apic, pin, i;
        struct IO_APIC_route_entry entry0, entry1;
        unsigned char save_control, save_freq_select;
-       unsigned long flags;
 
-       pin = find_isa_irq_pin(8, mp_INT);
-       if (pin == -1)
+       pin  = find_isa_irq_pin(8, mp_INT);
+       if (pin == -1) {
+               WARN_ON_ONCE(1);
+               return;
+       }
+       apic = find_isa_irq_apic(8, mp_INT);
+       if (apic == -1) {
+               WARN_ON_ONCE(1);
                return;
+       }
 
-       spin_lock_irqsave(&ioapic_lock, flags);
-       *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin);
-       *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-       clear_IO_APIC_pin(0, pin);
+       entry0 = ioapic_read_entry(apic, pin);
+       clear_IO_APIC_pin(apic, pin);
 
        memset(&entry1, 0, sizeof(entry1));
 
@@ -2121,10 +2190,7 @@ static inline void unlock_ExtINT_logic(void)
        entry1.trigger = 0;
        entry1.vector = 0;
 
-       spin_lock_irqsave(&ioapic_lock, flags);
-       io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
-       io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       ioapic_write_entry(apic, pin, entry1);
 
        save_control = CMOS_READ(RTC_CONTROL);
        save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
@@ -2141,23 +2207,22 @@ static inline void unlock_ExtINT_logic(void)
 
        CMOS_WRITE(save_control, RTC_CONTROL);
        CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
-       clear_IO_APIC_pin(0, pin);
+       clear_IO_APIC_pin(apic, pin);
 
-       spin_lock_irqsave(&ioapic_lock, flags);
-       io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
-       io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
-       spin_unlock_irqrestore(&ioapic_lock, flags);
+       ioapic_write_entry(apic, pin, entry0);
 }
 
+int timer_uses_ioapic_pin_0;
+
 /*
  * This code may look a bit paranoid, but it's supposed to cooperate with
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
  * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
  * fanatically on his truly buggy board.
  */
-static inline void check_timer(void)
+static inline void __init check_timer(void)
 {
-       int pin1, pin2;
+       int apic1, pin1, apic2, pin2;
        int vector;
 
        /*
@@ -2177,12 +2242,19 @@ static inline void check_timer(void)
        apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
        init_8259A(1);
        timer_ack = 1;
-       enable_8259A_irq(0);
+       if (timer_over_8254 > 0)
+               enable_8259A_irq(0);
 
-       pin1 = find_isa_irq_pin(0, mp_INT);
-       pin2 = find_isa_irq_pin(0, mp_ExtINT);
+       pin1  = find_isa_irq_pin(0, mp_INT);
+       apic1 = find_isa_irq_apic(0, mp_INT);
+       pin2  = ioapic_i8259.pin;
+       apic2 = ioapic_i8259.apic;
 
-       printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2);
+       if (pin1 == 0)
+               timer_uses_ioapic_pin_0 = 1;
+
+       printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
+               vector, apic1, pin1, apic2, pin2);
 
        if (pin1 != -1) {
                /*
@@ -2194,12 +2266,14 @@ static inline void check_timer(void)
                                disable_8259A_irq(0);
                                setup_nmi();
                                enable_8259A_irq(0);
-                               check_nmi_watchdog();
                        }
+                       if (disable_timer_pin_1 > 0)
+                               clear_IO_APIC_pin(0, pin1);
                        return;
                }
-               clear_IO_APIC_pin(0, pin1);
-               printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
+               clear_IO_APIC_pin(apic1, pin1);
+               printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to "
+                               "IO-APIC\n");
        }
 
        printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
@@ -2208,23 +2282,22 @@ static inline void check_timer(void)
                /*
                 * legacy devices should be connected to IO APIC #0
                 */
-               setup_ExtINT_IRQ0_pin(pin2, vector);
+               setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
                if (timer_irq_works()) {
                        printk("works.\n");
                        if (pin1 != -1)
-                               replace_pin_at_irq(0, 0, pin1, 0, pin2);
+                               replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
                        else
-                               add_pin_to_irq(0, 0, pin2);
+                               add_pin_to_irq(0, apic2, pin2);
                        if (nmi_watchdog == NMI_IO_APIC) {
                                setup_nmi();
-                               check_nmi_watchdog();
                        }
                        return;
                }
                /*
                 * Cleanup, just in case ...
                 */
-               clear_IO_APIC_pin(0, pin2);
+               clear_IO_APIC_pin(apic2, pin2);
        }
        printk(" failed.\n");
 
@@ -2236,7 +2309,8 @@ static inline void check_timer(void)
        printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
 
        disable_8259A_irq(0);
-       irq_desc[0].handler = &lapic_irq_type;
+       set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
+                                     "fasteio");
        apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector);   /* Fixed mode */
        enable_8259A_irq(0);
 
@@ -2261,7 +2335,8 @@ static inline void check_timer(void)
                return;
        }
        printk(" failed :(.\n");
-       panic("IO-APIC + timer doesn't work! pester mingo@redhat.com");
+       panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
+               "report.  Then try booting with the 'noapic' option");
 }
 
 /*
@@ -2297,6 +2372,20 @@ void __init setup_IO_APIC(void)
                print_IO_APIC();
 }
 
+static int __init setup_disable_8254_timer(char *s)
+{
+       timer_over_8254 = -1;
+       return 1;
+}
+static int __init setup_enable_8254_timer(char *s)
+{
+       timer_over_8254 = 2;
+       return 1;
+}
+
+__setup("disable_8254_timer", setup_disable_8254_timer);
+__setup("enable_8254_timer", setup_enable_8254_timer);
+
 /*
  *     Called after all the initialization is done. If we didnt find any
  *     APIC bugs then we can allow the modify fast path
@@ -2311,13 +2400,330 @@ static int __init io_apic_bug_finalize(void)
 
 late_initcall(io_apic_bug_finalize);
 
+struct sysfs_ioapic_data {
+       struct sys_device dev;
+       struct IO_APIC_route_entry entry[0];
+};
+static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
+
+static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
+{
+       struct IO_APIC_route_entry *entry;
+       struct sysfs_ioapic_data *data;
+       int i;
+       
+       data = container_of(dev, struct sysfs_ioapic_data, dev);
+       entry = data->entry;
+       for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
+               entry[i] = ioapic_read_entry(dev->id, i);
+
+       return 0;
+}
+
+static int ioapic_resume(struct sys_device *dev)
+{
+       struct IO_APIC_route_entry *entry;
+       struct sysfs_ioapic_data *data;
+       unsigned long flags;
+       union IO_APIC_reg_00 reg_00;
+       int i;
+       
+       data = container_of(dev, struct sysfs_ioapic_data, dev);
+       entry = data->entry;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       reg_00.raw = io_apic_read(dev->id, 0);
+       if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
+               reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
+               io_apic_write(dev->id, 0, reg_00.raw);
+       }
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+       for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
+               ioapic_write_entry(dev->id, i, entry[i]);
+
+       return 0;
+}
+
+static struct sysdev_class ioapic_sysdev_class = {
+       set_kset_name("ioapic"),
+       .suspend = ioapic_suspend,
+       .resume = ioapic_resume,
+};
+
+static int __init ioapic_init_sysfs(void)
+{
+       struct sys_device * dev;
+       int i, size, error = 0;
+
+       error = sysdev_class_register(&ioapic_sysdev_class);
+       if (error)
+               return error;
+
+       for (i = 0; i < nr_ioapics; i++ ) {
+               size = sizeof(struct sys_device) + nr_ioapic_registers[i] 
+                       * sizeof(struct IO_APIC_route_entry);
+               mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
+               if (!mp_ioapic_data[i]) {
+                       printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+                       continue;
+               }
+               memset(mp_ioapic_data[i], 0, size);
+               dev = &mp_ioapic_data[i]->dev;
+               dev->id = i; 
+               dev->cls = &ioapic_sysdev_class;
+               error = sysdev_register(dev);
+               if (error) {
+                       kfree(mp_ioapic_data[i]);
+                       mp_ioapic_data[i] = NULL;
+                       printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+                       continue;
+               }
+       }
+
+       return 0;
+}
+
+device_initcall(ioapic_init_sysfs);
+
+/*
+ * Dynamic irq allocate and deallocation
+ */
+int create_irq(void)
+{
+       /* Allocate an unused irq */
+       int irq, new, vector = 0;
+       unsigned long flags;
+
+       irq = -ENOSPC;
+       spin_lock_irqsave(&vector_lock, flags);
+       for (new = (NR_IRQS - 1); new >= 0; new--) {
+               if (platform_legacy_irq(new))
+                       continue;
+               if (irq_vector[new] != 0)
+                       continue;
+               vector = __assign_irq_vector(new);
+               if (likely(vector > 0))
+                       irq = new;
+               break;
+       }
+       spin_unlock_irqrestore(&vector_lock, flags);
+
+       if (irq >= 0) {
+               set_intr_gate(vector, interrupt[irq]);
+               dynamic_irq_init(irq);
+       }
+       return irq;
+}
+
+void destroy_irq(unsigned int irq)
+{
+       unsigned long flags;
+
+       dynamic_irq_cleanup(irq);
+
+       spin_lock_irqsave(&vector_lock, flags);
+       irq_vector[irq] = 0;
+       spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+/*
+ * MSI mesage composition
+ */
+#ifdef CONFIG_PCI_MSI
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
+{
+       int vector;
+       unsigned dest;
+
+       vector = assign_irq_vector(irq);
+       if (vector >= 0) {
+               dest = cpu_mask_to_apicid(TARGET_CPUS);
+
+               msg->address_hi = MSI_ADDR_BASE_HI;
+               msg->address_lo =
+                       MSI_ADDR_BASE_LO |
+                       ((INT_DEST_MODE == 0) ?
+                               MSI_ADDR_DEST_MODE_PHYSICAL:
+                               MSI_ADDR_DEST_MODE_LOGICAL) |
+                       ((INT_DELIVERY_MODE != dest_LowestPrio) ?
+                               MSI_ADDR_REDIRECTION_CPU:
+                               MSI_ADDR_REDIRECTION_LOWPRI) |
+                       MSI_ADDR_DEST_ID(dest);
+
+               msg->data =
+                       MSI_DATA_TRIGGER_EDGE |
+                       MSI_DATA_LEVEL_ASSERT |
+                       ((INT_DELIVERY_MODE != dest_LowestPrio) ?
+                               MSI_DATA_DELIVERY_FIXED:
+                               MSI_DATA_DELIVERY_LOWPRI) |
+                       MSI_DATA_VECTOR(vector);
+       }
+       return vector;
+}
+
+#ifdef CONFIG_SMP
+static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+       struct msi_msg msg;
+       unsigned int dest;
+       cpumask_t tmp;
+       int vector;
+
+       cpus_and(tmp, mask, cpu_online_map);
+       if (cpus_empty(tmp))
+               tmp = TARGET_CPUS;
+
+       vector = assign_irq_vector(irq);
+       if (vector < 0)
+               return;
+
+       dest = cpu_mask_to_apicid(mask);
+
+       read_msi_msg(irq, &msg);
+
+       msg.data &= ~MSI_DATA_VECTOR_MASK;
+       msg.data |= MSI_DATA_VECTOR(vector);
+       msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+       msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+       write_msi_msg(irq, &msg);
+       set_native_irq_info(irq, mask);
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
+ * which implement the MSI or MSI-X Capability Structure.
+ */
+static struct irq_chip msi_chip = {
+       .name           = "PCI-MSI",
+       .enable         = enable_msi_irq,
+       .disable        = disable_msi_irq,
+       .unmask         = unmask_msi_irq,
+       .mask           = mask_msi_irq,
+       .ack            = ack_ioapic_irq,
+#ifdef CONFIG_SMP
+       .set_affinity   = set_msi_irq_affinity,
+#endif
+       .retrigger      = ioapic_retrigger_irq,
+};
+
+int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
+{
+       struct msi_msg msg;
+       int ret;
+       ret = msi_compose_msg(dev, irq, &msg);
+       if (ret < 0)
+               return ret;
+
+       write_msi_msg(irq, &msg);
+
+       set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
+                                     "edge");
+
+       return 0;
+}
+
+void arch_teardown_msi_irq(unsigned int irq)
+{
+       return;
+}
+
+#endif /* CONFIG_PCI_MSI */
+
+/*
+ * Hypertransport interrupt support
+ */
+#ifdef CONFIG_HT_IRQ
+
+#ifdef CONFIG_SMP
+
+static void target_ht_irq(unsigned int irq, unsigned int dest)
+{
+       struct ht_irq_msg msg;
+       fetch_ht_irq_msg(irq, &msg);
+
+       msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
+       msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
+
+       msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
+       msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
+
+       write_ht_irq_msg(irq, &msg);
+}
+
+static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+       unsigned int dest;
+       cpumask_t tmp;
+
+       cpus_and(tmp, mask, cpu_online_map);
+       if (cpus_empty(tmp))
+               tmp = TARGET_CPUS;
+
+       cpus_and(mask, tmp, CPU_MASK_ALL);
+
+       dest = cpu_mask_to_apicid(mask);
+
+       target_ht_irq(irq, dest);
+       set_native_irq_info(irq, mask);
+}
+#endif
+
+static struct irq_chip ht_irq_chip = {
+       .name           = "PCI-HT",
+       .mask           = mask_ht_irq,
+       .unmask         = unmask_ht_irq,
+       .ack            = ack_ioapic_irq,
+#ifdef CONFIG_SMP
+       .set_affinity   = set_ht_irq_affinity,
+#endif
+       .retrigger      = ioapic_retrigger_irq,
+};
+
+int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
+{
+       int vector;
+
+       vector = assign_irq_vector(irq);
+       if (vector >= 0) {
+               struct ht_irq_msg msg;
+               unsigned dest;
+               cpumask_t tmp;
+
+               cpus_clear(tmp);
+               cpu_set(vector >> 8, tmp);
+               dest = cpu_mask_to_apicid(tmp);
+
+               msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
+
+               msg.address_lo =
+                       HT_IRQ_LOW_BASE |
+                       HT_IRQ_LOW_DEST_ID(dest) |
+                       HT_IRQ_LOW_VECTOR(vector) |
+                       ((INT_DEST_MODE == 0) ?
+                               HT_IRQ_LOW_DM_PHYSICAL :
+                               HT_IRQ_LOW_DM_LOGICAL) |
+                       HT_IRQ_LOW_RQEOI_EDGE |
+                       ((INT_DELIVERY_MODE != dest_LowestPrio) ?
+                               HT_IRQ_LOW_MT_FIXED :
+                               HT_IRQ_LOW_MT_ARBITRATED) |
+                       HT_IRQ_LOW_IRQ_MASKED;
+
+               write_ht_irq_msg(irq, &msg);
+
+               set_irq_chip_and_handler_name(irq, &ht_irq_chip,
+                                             handle_edge_irq, "edge");
+       }
+       return vector;
+}
+#endif /* CONFIG_HT_IRQ */
+
 /* --------------------------------------------------------------------------
                           ACPI-based IOAPIC Configuration
    -------------------------------------------------------------------------- */
 
-#ifdef CONFIG_ACPI_BOOT
-
-#define IO_APIC_MAX_ID APIC_BROADCAST_ID
+#ifdef CONFIG_ACPI
 
 int __init io_apic_get_unique_id (int ioapic, int apic_id)
 {
@@ -2343,7 +2749,7 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id)
        reg_00.raw = io_apic_read(ioapic, 0);
        spin_unlock_irqrestore(&ioapic_lock, flags);
 
-       if (apic_id >= IO_APIC_MAX_ID) {
+       if (apic_id >= get_physical_broadcast()) {
                printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
                        "%d\n", ioapic, apic_id, reg_00.bits.ID);
                apic_id = reg_00.bits.ID;
@@ -2355,12 +2761,12 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id)
         */
        if (check_apicid_used(apic_id_map, apic_id)) {
 
-               for (i = 0; i < IO_APIC_MAX_ID; i++) {
+               for (i = 0; i < get_physical_broadcast(); i++) {
                        if (!check_apicid_used(apic_id_map, i))
                                break;
                }
 
-               if (i == IO_APIC_MAX_ID)
+               if (i == get_physical_broadcast())
                        panic("Max apic_id exceeded!\n");
 
                printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
@@ -2381,11 +2787,14 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id)
                spin_unlock_irqrestore(&ioapic_lock, flags);
 
                /* Sanity check */
-               if (reg_00.bits.ID != apic_id)
-                       panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
+               if (reg_00.bits.ID != apic_id) {
+                       printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
+                       return -1;
+               }
        }
 
-       printk(KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+       apic_printk(APIC_VERBOSE, KERN_INFO
+                       "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
 
        return apic_id;
 }
@@ -2451,29 +2860,44 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
 
        entry.vector = assign_irq_vector(irq);
 
-       Dprintk(KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
-               "IRQ %d Mode:%i Active:%i)\n", ioapic, 
-               mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low);
+       apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
+               "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
+               mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
+               edge_level, active_high_low);
 
-       if (use_pci_vector() && !platform_legacy_irq(irq))
-               irq = IO_APIC_VECTOR(irq);
-       if (edge_level) {
-               irq_desc[irq].handler = &ioapic_level_type;
-       } else {
-               irq_desc[irq].handler = &ioapic_edge_type;
-       }
-
-       set_intr_gate(entry.vector, interrupt[irq]);
+       ioapic_register_intr(irq, entry.vector, edge_level);
 
        if (!ioapic && (irq < 16))
                disable_8259A_irq(irq);
 
        spin_lock_irqsave(&ioapic_lock, flags);
-       io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
-       io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
+       __ioapic_write_entry(ioapic, pin, entry);
+       set_native_irq_info(irq, TARGET_CPUS);
        spin_unlock_irqrestore(&ioapic_lock, flags);
 
        return 0;
 }
 
-#endif /*CONFIG_ACPI_BOOT*/
+#endif /* CONFIG_ACPI */
+
+static int __init parse_disable_timer_pin_1(char *arg)
+{
+       disable_timer_pin_1 = 1;
+       return 0;
+}
+early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
+
+static int __init parse_enable_timer_pin_1(char *arg)
+{
+       disable_timer_pin_1 = -1;
+       return 0;
+}
+early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
+
+static int __init parse_noapic(char *arg)
+{
+       /* disable IO-APIC */
+       disable_ioapic_setup();
+       return 0;
+}
+early_param("noapic", parse_noapic);