Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff
[linux-2.6.git] / arch / i386 / kernel / smpboot.c
index 1ce7180..825b2b4 100644 (file)
 #include <linux/sched.h>
 #include <linux/kernel_stat.h>
 #include <linux/smp_lock.h>
-#include <linux/irq.h>
 #include <linux/bootmem.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/percpu.h>
 
 #include <linux/delay.h>
 #include <linux/mc146818rtc.h>
 #include <smpboot_hooks.h>
 
 /* Set if we find a B stepping CPU */
-static int __initdata smp_b_stepping;
+static int __devinitdata smp_b_stepping;
 
 /* Number of siblings per CPU package */
 int smp_num_siblings = 1;
-int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
-EXPORT_SYMBOL(phys_proc_id);
-int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */
-EXPORT_SYMBOL(cpu_core_id);
+#ifdef CONFIG_X86_HT
+EXPORT_SYMBOL(smp_num_siblings);
+#endif
+
+/* Package ID of each logical CPU */
+int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
+
+/* Core ID of each logical CPU */
+int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
+
+/* Last level cache ID of each logical CPU */
+int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
+
+/* representing HT siblings of each logical CPU */
+cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(cpu_sibling_map);
+
+/* representing HT and core siblings of each logical CPU */
+cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(cpu_core_map);
 
 /* bitmap of online cpus */
-cpumask_t cpu_online_map;
+cpumask_t cpu_online_map __read_mostly;
+EXPORT_SYMBOL(cpu_online_map);
 
 cpumask_t cpu_callin_map;
 cpumask_t cpu_callout_map;
+EXPORT_SYMBOL(cpu_callout_map);
+cpumask_t cpu_possible_map;
+EXPORT_SYMBOL(cpu_possible_map);
 static cpumask_t smp_commenced_mask;
 
+/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
+ * is no way to resync one AP against BP. TBD: for prescott and above, we
+ * should use IA64's algorithm
+ */
+static int __devinitdata tsc_sync_disabled;
+
 /* Per CPU bogomips and other parameters */
 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_data);
 
-u8 x86_cpu_to_apicid[NR_CPUS] =
+u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly =
                        { [0 ... NR_CPUS-1] = 0xff };
 EXPORT_SYMBOL(x86_cpu_to_apicid);
 
@@ -90,13 +119,16 @@ static int trampoline_exec;
 
 static void map_cpu_to_logical_apicid(void);
 
+/* State of each CPU. */
+DEFINE_PER_CPU(int, cpu_state) = { 0 };
+
 /*
  * Currently trivial. Write the real->protected mode
  * bootstrap into the page concerned. The caller
  * has made sure it's suitably aligned.
  */
 
-static unsigned long __init setup_trampoline(void)
+static unsigned long __devinit setup_trampoline(void)
 {
        memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
        return virt_to_phys(trampoline_base);
@@ -126,7 +158,7 @@ void __init smp_alloc_memory(void)
  * a given CPU
  */
 
-static void __init smp_store_cpu_info(int id)
+static void __devinit smp_store_cpu_info(int id)
 {
        struct cpuinfo_x86 *c = cpu_data + id;
 
@@ -172,7 +204,7 @@ static void __init smp_store_cpu_info(int id)
                                goto valid_k7;
 
                /* If we get here, it's not a certified SMP capable AMD system. */
-               tainted |= TAINT_UNSAFE_SMP;
+               add_taint(TAINT_UNSAFE_SMP);
        }
 
 valid_k7:
@@ -199,7 +231,7 @@ static void __init synchronize_tsc_bp (void)
        unsigned long long t0;
        unsigned long long sum, avg;
        long long delta;
-       unsigned long one_usec;
+       unsigned int one_usec;
        int buggy = 0;
 
        printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
@@ -281,7 +313,9 @@ static void __init synchronize_tsc_bp (void)
                        if (tsc_values[i] < avg)
                                realdelta = -realdelta;
 
-                       printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
+                       if (realdelta > 0)
+                               printk(KERN_INFO "CPU#%d had %ld usecs TSC "
+                                       "skew, fixed it up.\n", i, realdelta);
                }
 
                sum += delta;
@@ -320,7 +354,7 @@ extern void calibrate_delay(void);
 
 static atomic_t init_deasserted;
 
-static void __init smp_callin(void)
+static void __devinit smp_callin(void)
 {
        int cpuid, phys_id;
        unsigned long timeout;
@@ -405,16 +439,94 @@ static void __init smp_callin(void)
        /*
         *      Synchronize the TSC with the BP
         */
-       if (cpu_has_tsc && cpu_khz)
+       if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled)
                synchronize_tsc_ap();
 }
 
 static int cpucount;
 
+/* maps the cpu to the sched domain representing multi-core */
+cpumask_t cpu_coregroup_map(int cpu)
+{
+       struct cpuinfo_x86 *c = cpu_data + cpu;
+       /*
+        * For perf, we return last level cache shared map.
+        * TBD: when power saving sched policy is added, we will return
+        *      cpu_core_map when power saving policy is enabled
+        */
+       return c->llc_shared_map;
+}
+
+/* representing cpus for which sibling maps can be computed */
+static cpumask_t cpu_sibling_setup_map;
+
+static inline void
+set_cpu_sibling_map(int cpu)
+{
+       int i;
+       struct cpuinfo_x86 *c = cpu_data;
+
+       cpu_set(cpu, cpu_sibling_setup_map);
+
+       if (smp_num_siblings > 1) {
+               for_each_cpu_mask(i, cpu_sibling_setup_map) {
+                       if (phys_proc_id[cpu] == phys_proc_id[i] &&
+                           cpu_core_id[cpu] == cpu_core_id[i]) {
+                               cpu_set(i, cpu_sibling_map[cpu]);
+                               cpu_set(cpu, cpu_sibling_map[i]);
+                               cpu_set(i, cpu_core_map[cpu]);
+                               cpu_set(cpu, cpu_core_map[i]);
+                               cpu_set(i, c[cpu].llc_shared_map);
+                               cpu_set(cpu, c[i].llc_shared_map);
+                       }
+               }
+       } else {
+               cpu_set(cpu, cpu_sibling_map[cpu]);
+       }
+
+       cpu_set(cpu, c[cpu].llc_shared_map);
+
+       if (current_cpu_data.x86_max_cores == 1) {
+               cpu_core_map[cpu] = cpu_sibling_map[cpu];
+               c[cpu].booted_cores = 1;
+               return;
+       }
+
+       for_each_cpu_mask(i, cpu_sibling_setup_map) {
+               if (cpu_llc_id[cpu] != BAD_APICID &&
+                   cpu_llc_id[cpu] == cpu_llc_id[i]) {
+                       cpu_set(i, c[cpu].llc_shared_map);
+                       cpu_set(cpu, c[i].llc_shared_map);
+               }
+               if (phys_proc_id[cpu] == phys_proc_id[i]) {
+                       cpu_set(i, cpu_core_map[cpu]);
+                       cpu_set(cpu, cpu_core_map[i]);
+                       /*
+                        *  Does this new cpu bringup a new core?
+                        */
+                       if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
+                               /*
+                                * for each core in package, increment
+                                * the booted_cores for this new cpu
+                                */
+                               if (first_cpu(cpu_sibling_map[i]) == i)
+                                       c[cpu].booted_cores++;
+                               /*
+                                * increment the core count for all
+                                * the other cpus in this package
+                                */
+                               if (i != cpu)
+                                       c[i].booted_cores++;
+                       } else if (i != cpu && !c[cpu].booted_cores)
+                               c[cpu].booted_cores = c[i].booted_cores;
+               }
+       }
+}
+
 /*
  * Activate a secondary processor.
  */
-static void __init start_secondary(void *unused)
+static void __devinit start_secondary(void *unused)
 {
        /*
         * Dont put anything before smp_callin(), SMP
@@ -422,6 +534,7 @@ static void __init start_secondary(void *unused)
         * things done here to the most necessary things.
         */
        cpu_init();
+       preempt_disable();
        smp_callin();
        while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
                rep_nop();
@@ -437,7 +550,23 @@ static void __init start_secondary(void *unused)
         * the local TLBs too.
         */
        local_flush_tlb();
+
+       /* This must be done before setting cpu_online_map */
+       set_cpu_sibling_map(raw_smp_processor_id());
+       wmb();
+
+       /*
+        * We need to hold call_lock, so there is no inconsistency
+        * between the time smp_call_function() determines number of
+        * IPI receipients, and the time when the determination is made
+        * for which cpus receive the IPI. Holding this
+        * lock helps us to not include this cpu in a currently in progress
+        * smp_call_function().
+        */
+       lock_ipi_call_lock();
        cpu_set(smp_processor_id(), cpu_online_map);
+       unlock_ipi_call_lock();
+       per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
 
        /* We can take interrupts now: we're officially "up". */
        local_irq_enable();
@@ -452,7 +581,7 @@ static void __init start_secondary(void *unused)
  * from the task structure
  * This function must not return.
  */
-void __init initialize_secondary(void)
+void __devinit initialize_secondary(void)
 {
        /*
         * We don't actually need to load the full TSS,
@@ -474,10 +603,10 @@ extern struct {
 #ifdef CONFIG_NUMA
 
 /* which logical CPUs are on which nodes */
-cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
+cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly =
                                { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
 /* which node each logical CPU is on */
-int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
+int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
 EXPORT_SYMBOL(cpu_2_node);
 
 /* set up a mapping between cpu and node. */
@@ -505,7 +634,7 @@ static inline void unmap_cpu_to_node(int cpu)
 
 #endif /* CONFIG_NUMA */
 
-u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
 
 static void map_cpu_to_logical_apicid(void)
 {
@@ -531,7 +660,7 @@ static inline void __inquire_remote_apic(int apicid)
 
        printk("Inquiring remote APIC #%d...\n", apicid);
 
-       for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
+       for (i = 0; i < ARRAY_SIZE(regs); i++) {
                printk("... APIC #%d %s: ", apicid, names[i]);
 
                /*
@@ -566,7 +695,7 @@ static inline void __inquire_remote_apic(int apicid)
  * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
  * won't ... remember to clear down the APIC, etc later.
  */
-static int __init
+static int __devinit
 wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
 {
        unsigned long send_status = 0, accept_status = 0;
@@ -612,7 +741,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
 #endif /* WAKE_SECONDARY_VIA_NMI */
 
 #ifdef WAKE_SECONDARY_VIA_INIT
-static int __init
+static int __devinit
 wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
 {
        unsigned long send_status = 0, accept_status = 0;
@@ -747,8 +876,42 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
 #endif /* WAKE_SECONDARY_VIA_INIT */
 
 extern cpumask_t cpu_initialized;
+static inline int alloc_cpu_id(void)
+{
+       cpumask_t       tmp_map;
+       int cpu;
+       cpus_complement(tmp_map, cpu_present_map);
+       cpu = first_cpu(tmp_map);
+       if (cpu >= NR_CPUS)
+               return -ENODEV;
+       return cpu;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static struct task_struct * __devinitdata cpu_idle_tasks[NR_CPUS];
+static inline struct task_struct * alloc_idle_task(int cpu)
+{
+       struct task_struct *idle;
 
-static int __init do_boot_cpu(int apicid)
+       if ((idle = cpu_idle_tasks[cpu]) != NULL) {
+               /* initialize thread_struct.  we really want to avoid destroy
+                * idle tread
+                */
+               idle->thread.esp = (unsigned long)task_pt_regs(idle);
+               init_idle(idle, cpu);
+               return idle;
+       }
+       idle = fork_idle(cpu);
+
+       if (!IS_ERR(idle))
+               cpu_idle_tasks[cpu] = idle;
+       return idle;
+}
+#else
+#define alloc_idle_task(cpu) fork_idle(cpu)
+#endif
+
+static int __devinit do_boot_cpu(int apicid, int cpu)
 /*
  * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
  * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -757,16 +920,18 @@ static int __init do_boot_cpu(int apicid)
 {
        struct task_struct *idle;
        unsigned long boot_error;
-       int timeout, cpu;
+       int timeout;
        unsigned long start_eip;
        unsigned short nmi_high = 0, nmi_low = 0;
 
-       cpu = ++cpucount;
+       ++cpucount;
+       alternatives_smp_switch(1);
+
        /*
         * We can't use kernel_thread since we must avoid to
         * reschedule the child.
         */
-       idle = fork_idle(cpu);
+       idle = alloc_idle_task(cpu);
        if (IS_ERR(idle))
                panic("failed fork for CPU %d", cpu);
        idle->thread.eip = (unsigned long) start_secondary;
@@ -833,13 +998,16 @@ static int __init do_boot_cpu(int apicid)
                        inquire_remote_apic(apicid);
                }
        }
-       x86_cpu_to_apicid[cpu] = apicid;
+
        if (boot_error) {
                /* Try to put things back the way they were before ... */
                unmap_cpu_to_logical_apicid(cpu);
                cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
                cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
                cpucount--;
+       } else {
+               x86_cpu_to_apicid[cpu] = apicid;
+               cpu_set(cpu, cpu_present_map);
        }
 
        /* mark "stuck" area as not stuck */
@@ -848,6 +1016,72 @@ static int __init do_boot_cpu(int apicid)
        return boot_error;
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+void cpu_exit_clear(void)
+{
+       int cpu = raw_smp_processor_id();
+
+       idle_task_exit();
+
+       cpucount --;
+       cpu_uninit();
+       irq_ctx_exit(cpu);
+
+       cpu_clear(cpu, cpu_callout_map);
+       cpu_clear(cpu, cpu_callin_map);
+
+       cpu_clear(cpu, smp_commenced_mask);
+       unmap_cpu_to_logical_apicid(cpu);
+}
+
+struct warm_boot_cpu_info {
+       struct completion *complete;
+       int apicid;
+       int cpu;
+};
+
+static void __cpuinit do_warm_boot_cpu(void *p)
+{
+       struct warm_boot_cpu_info *info = p;
+       do_boot_cpu(info->apicid, info->cpu);
+       complete(info->complete);
+}
+
+static int __cpuinit __smp_prepare_cpu(int cpu)
+{
+       DECLARE_COMPLETION(done);
+       struct warm_boot_cpu_info info;
+       struct work_struct task;
+       int     apicid, ret;
+
+       apicid = x86_cpu_to_apicid[cpu];
+       if (apicid == BAD_APICID) {
+               ret = -ENODEV;
+               goto exit;
+       }
+
+       info.complete = &done;
+       info.apicid = apicid;
+       info.cpu = cpu;
+       INIT_WORK(&task, do_warm_boot_cpu, &info);
+
+       tsc_sync_disabled = 1;
+
+       /* init low mem mapping */
+       clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
+                       KERNEL_PGD_PTRS);
+       flush_tlb_all();
+       schedule_work(&task);
+       wait_for_completion(&done);
+
+       tsc_sync_disabled = 0;
+       zap_low_mappings();
+       ret = 0;
+exit:
+       return ret;
+}
+#endif
+
 static void smp_tune_scheduling (void)
 {
        unsigned long cachesize;       /* kB   */
@@ -875,6 +1109,7 @@ static void smp_tune_scheduling (void)
                        cachesize = 16; /* Pentiums, 2x8kB cache */
                        bandwidth = 100;
                }
+               max_cache_size = cachesize * 1024;
        }
 }
 
@@ -885,10 +1120,9 @@ static void smp_tune_scheduling (void)
 static int boot_cpu_logical_apicid;
 /* Where the IO area was mapped on multiquad, always 0 otherwise */
 void *xquad_portio;
-
-cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
-cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
-EXPORT_SYMBOL(cpu_core_map);
+#ifdef CONFIG_X86_NUMAQ
+EXPORT_SYMBOL(xquad_portio);
+#endif
 
 static void __init smp_boot_cpus(unsigned int max_cpus)
 {
@@ -908,11 +1142,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
 
        current_thread_info()->cpu = 0;
        smp_tune_scheduling();
-       cpus_clear(cpu_sibling_map[0]);
-       cpu_set(0, cpu_sibling_map[0]);
 
-       cpus_clear(cpu_core_map[0]);
-       cpu_set(0, cpu_core_map[0]);
+       set_cpu_sibling_map(0);
 
        /*
         * If we couldn't find an SMP configuration at boot time,
@@ -1001,7 +1232,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
                if (max_cpus <= cpucount+1)
                        continue;
 
-               if (do_boot_cpu(apicid))
+               if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu))
                        printk("CPU #%d not responding - cannot use it.\n",
                                                                apicid);
                else
@@ -1023,8 +1254,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
        printk(KERN_INFO
                "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
                cpucount+1,
-               HZ*(bogosum >> 3)/62500,
-               (HZ*(bogosum >> 3)/625) % 100);
+               bogosum/(500000/HZ),
+               (bogosum/(5000/HZ))%100);
        
        Dprintk("Before bogocount - setting activated=1.\n");
 
@@ -1053,44 +1284,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
                cpus_clear(cpu_core_map[cpu]);
        }
 
-       for (cpu = 0; cpu < NR_CPUS; cpu++) {
-               struct cpuinfo_x86 *c = cpu_data + cpu;
-               int siblings = 0;
-               int i;
-               if (!cpu_isset(cpu, cpu_callout_map))
-                       continue;
-
-               if (smp_num_siblings > 1) {
-                       for (i = 0; i < NR_CPUS; i++) {
-                               if (!cpu_isset(i, cpu_callout_map))
-                                       continue;
-                               if (cpu_core_id[cpu] == cpu_core_id[i]) {
-                                       siblings++;
-                                       cpu_set(i, cpu_sibling_map[cpu]);
-                               }
-                       }
-               } else {
-                       siblings++;
-                       cpu_set(cpu, cpu_sibling_map[cpu]);
-               }
-
-               if (siblings != smp_num_siblings) {
-                       printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
-                       smp_num_siblings = siblings;
-               }
-
-               if (c->x86_num_cores > 1) {
-                       for (i = 0; i < NR_CPUS; i++) {
-                               if (!cpu_isset(i, cpu_callout_map))
-                                       continue;
-                               if (phys_proc_id[cpu] == phys_proc_id[i]) {
-                                       cpu_set(i, cpu_core_map[cpu]);
-                               }
-                       }
-               } else {
-                       cpu_core_map[cpu] = cpu_sibling_map[cpu];
-               }
-       }
+       cpu_set(0, cpu_sibling_map[0]);
+       cpu_set(0, cpu_core_map[0]);
 
        smpboot_setup_io_apic();
 
@@ -1107,6 +1302,9 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
    who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
+       smp_commenced_mask = cpumask_of_cpu(0);
+       cpu_callin_map = cpumask_of_cpu(0);
+       mb();
        smp_boot_cpus(max_cpus);
 }
 
@@ -1114,23 +1312,124 @@ void __devinit smp_prepare_boot_cpu(void)
 {
        cpu_set(smp_processor_id(), cpu_online_map);
        cpu_set(smp_processor_id(), cpu_callout_map);
+       cpu_set(smp_processor_id(), cpu_present_map);
+       cpu_set(smp_processor_id(), cpu_possible_map);
+       per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
 }
 
-int __devinit __cpu_up(unsigned int cpu)
+#ifdef CONFIG_HOTPLUG_CPU
+static void
+remove_siblinginfo(int cpu)
 {
-       /* This only works at boot for x86.  See "rewrite" above. */
-       if (cpu_isset(cpu, smp_commenced_mask)) {
-               local_irq_enable();
-               return -ENOSYS;
+       int sibling;
+       struct cpuinfo_x86 *c = cpu_data;
+
+       for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
+               cpu_clear(cpu, cpu_core_map[sibling]);
+               /*
+                * last thread sibling in this cpu core going down
+                */
+               if (cpus_weight(cpu_sibling_map[cpu]) == 1)
+                       c[sibling].booted_cores--;
        }
+                       
+       for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
+               cpu_clear(cpu, cpu_sibling_map[sibling]);
+       cpus_clear(cpu_sibling_map[cpu]);
+       cpus_clear(cpu_core_map[cpu]);
+       phys_proc_id[cpu] = BAD_APICID;
+       cpu_core_id[cpu] = BAD_APICID;
+       cpu_clear(cpu, cpu_sibling_setup_map);
+}
+
+int __cpu_disable(void)
+{
+       cpumask_t map = cpu_online_map;
+       int cpu = smp_processor_id();
+
+       /*
+        * Perhaps use cpufreq to drop frequency, but that could go
+        * into generic code.
+        *
+        * We won't take down the boot processor on i386 due to some
+        * interrupts only being able to be serviced by the BSP.
+        * Especially so if we're not using an IOAPIC   -zwane
+        */
+       if (cpu == 0)
+               return -EBUSY;
+
+       clear_local_APIC();
+       /* Allow any queued timer interrupts to get serviced */
+       local_irq_enable();
+       mdelay(1);
+       local_irq_disable();
+
+       remove_siblinginfo(cpu);
+
+       cpu_clear(cpu, map);
+       fixup_irqs(map);
+       /* It's now safe to remove this processor from the online map */
+       cpu_clear(cpu, cpu_online_map);
+       return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+       /* We don't do anything here: idle task is faking death itself. */
+       unsigned int i;
+
+       for (i = 0; i < 10; i++) {
+               /* They ack this in play_dead by setting CPU_DEAD */
+               if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
+                       printk ("CPU %d is now offline\n", cpu);
+                       if (1 == num_online_cpus())
+                               alternatives_smp_switch(0);
+                       return;
+               }
+               msleep(100);
+       }
+       printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+}
+#else /* ... !CONFIG_HOTPLUG_CPU */
+int __cpu_disable(void)
+{
+       return -ENOSYS;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+       /* We said "no" in __cpu_disable */
+       BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+int __devinit __cpu_up(unsigned int cpu)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+       int ret=0;
+
+       /*
+        * We do warm boot only on cpus that had booted earlier
+        * Otherwise cold boot is all handled from smp_boot_cpus().
+        * cpu_callin_map is set during AP kickstart process. Its reset
+        * when a cpu is taken offline from cpu_exit_clear().
+        */
+       if (!cpu_isset(cpu, cpu_callin_map))
+               ret = __smp_prepare_cpu(cpu);
+
+       if (ret)
+               return -EIO;
+#endif
 
        /* In case one didn't come up */
        if (!cpu_isset(cpu, cpu_callin_map)) {
+               printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
                local_irq_enable();
                return -EIO;
        }
 
        local_irq_enable();
+       per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
        /* Unleash the CPU! */
        cpu_set(cpu, smp_commenced_mask);
        while (!cpu_isset(cpu, cpu_online_map))
@@ -1144,10 +1443,12 @@ void __init smp_cpus_done(unsigned int max_cpus)
        setup_ioapic_dest();
 #endif
        zap_low_mappings();
+#ifndef CONFIG_HOTPLUG_CPU
        /*
         * Disable executability of the SMP trampoline:
         */
        set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
+#endif
 }
 
 void __init smp_intr_init(void)