X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fi386%2Fkernel%2Fsmpboot.c;h=8c6c8c52b95c0b574b837c279f84647d3b4ba867;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=42f7799e32ae3a2e457134c8c4dc8383c25db135;hpb=16c70f8c1b54b61c3b951b6fb220df250fe09b32;p=linux-2.6.git diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 42f7799e3..8c6c8c52b 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -33,6 +33,11 @@ * Dave Jones : Report invalid combinations of Athlon CPUs. * Rusty Russell : Hacked into shape for new "hotplug" boot process. */ + +/* SMP boot always wants to use real time delay to allow sufficient time for + * the APs to come online */ +#define USE_REAL_TIME_DELAY + #include #include #include @@ -52,6 +57,8 @@ #include #include #include +#include +#include #include #include @@ -62,9 +69,7 @@ static int __devinitdata smp_b_stepping; /* Number of siblings per CPU package */ int smp_num_siblings = 1; -#ifdef CONFIG_X86_HT EXPORT_SYMBOL(smp_num_siblings); -#endif /* Last level cache ID of each logical CPU */ int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; @@ -102,6 +107,8 @@ u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0xff }; EXPORT_SYMBOL(x86_cpu_to_apicid); +u8 apicid_2_node[MAX_APICID]; + /* * Trampoline 80x86 program as an array. */ @@ -152,7 +159,7 @@ void __init smp_alloc_memory(void) * a given CPU */ -static void __devinit smp_store_cpu_info(int id) +static void __cpuinit smp_store_cpu_info(int id) { struct cpuinfo_x86 *c = cpu_data + id; @@ -177,6 +184,9 @@ static void __devinit smp_store_cpu_info(int id) */ if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { + if (num_possible_cpus() == 1) + goto valid_k7; + /* Athlon 660/661 is valid. */ if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1))) goto valid_k7; @@ -217,7 +227,7 @@ static struct { atomic_t count_start; atomic_t count_stop; unsigned long long values[NR_CPUS]; -} tsc __initdata = { +} tsc __cpuinitdata = { .start_flag = ATOMIC_INIT(0), .count_start = ATOMIC_INIT(0), .count_stop = ATOMIC_INIT(0), @@ -322,7 +332,7 @@ static void __init synchronize_tsc_bp(void) printk("passed.\n"); } -static void __init synchronize_tsc_ap(void) +static void __cpuinit synchronize_tsc_ap(void) { int i; @@ -354,7 +364,7 @@ extern void calibrate_delay(void); static atomic_t init_deasserted; -static void __devinit smp_callin(void) +static void __cpuinit smp_callin(void) { int cpuid, phys_id; unsigned long timeout; @@ -528,14 +538,14 @@ set_cpu_sibling_map(int cpu) /* * Activate a secondary processor. */ -static void __devinit start_secondary(void *unused) +static void __cpuinit start_secondary(void *unused) { /* - * Dont put anything before smp_callin(), SMP + * Don't put *anything* before secondary_cpu_init(), SMP * booting is too fragile that we want to limit the * things done here to the most necessary things. */ - cpu_init(); + secondary_cpu_init(); preempt_disable(); smp_callin(); while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) @@ -585,6 +595,12 @@ static void __devinit start_secondary(void *unused) */ void __devinit initialize_secondary(void) { + /* + * switch to the per CPU GDT we already set up + * in do_boot_cpu() + */ + cpu_set_gdt(current_thread_info()->cpu); + /* * We don't actually need to load the full TSS, * basically just the stack pointer and the eip. @@ -594,19 +610,23 @@ void __devinit initialize_secondary(void) "movl %0,%%esp\n\t" "jmp *%1" : - :"r" (current->thread.esp),"r" (current->thread.eip)); + :"m" (current->thread.esp),"m" (current->thread.eip)); } +/* Static state in head.S used to set up a CPU */ extern struct { void * esp; unsigned short ss; } stack_start; +extern struct i386_pda *start_pda; +extern struct Xgt_desc_struct cpu_gdt_descr; #ifdef CONFIG_NUMA /* which logical CPUs are on which nodes */ cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly = { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; +EXPORT_SYMBOL(node_2_cpu_mask); /* which node each logical CPU is on */ int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 }; EXPORT_SYMBOL(cpu_2_node); @@ -917,7 +937,7 @@ static inline struct task_struct * alloc_idle_task(int cpu) #define alloc_idle_task(cpu) fork_idle(cpu) #endif -static int __devinit do_boot_cpu(int apicid, int cpu) +static int __cpuinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. @@ -930,9 +950,6 @@ static int __devinit do_boot_cpu(int apicid, int cpu) unsigned long start_eip; unsigned short nmi_high = 0, nmi_low = 0; - ++cpucount; - alternatives_smp_switch(1); - /* * We can't use kernel_thread since we must avoid to * reschedule the child. @@ -940,10 +957,22 @@ static int __devinit do_boot_cpu(int apicid, int cpu) idle = alloc_idle_task(cpu); if (IS_ERR(idle)) panic("failed fork for CPU %d", cpu); + + /* Pre-allocate and initialize the CPU's GDT and PDA so it + doesn't have to do any memory allocation during the + delicate CPU-bringup phase. */ + if (!init_gdt(cpu, idle)) { + printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu); + return -1; /* ? */ + } + idle->thread.eip = (unsigned long) start_secondary; /* start_eip had better be page-aligned! */ start_eip = setup_trampoline(); + ++cpucount; + alternatives_smp_switch(1); + /* So we see what's up */ printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); /* Stack for startup_32 can be just as for start_secondary onwards */ @@ -951,6 +980,7 @@ static int __devinit do_boot_cpu(int apicid, int cpu) irq_ctx_init(cpu); + x86_cpu_to_apicid[cpu] = apicid; /* * This grunge runs the startup process for * the targeted processor. @@ -1042,13 +1072,15 @@ void cpu_exit_clear(void) struct warm_boot_cpu_info { struct completion *complete; + struct work_struct task; int apicid; int cpu; }; -static void __cpuinit do_warm_boot_cpu(void *p) +static void __cpuinit do_warm_boot_cpu(struct work_struct *work) { - struct warm_boot_cpu_info *info = p; + struct warm_boot_cpu_info *info = + container_of(work, struct warm_boot_cpu_info, task); do_boot_cpu(info->apicid, info->cpu); complete(info->complete); } @@ -1057,7 +1089,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu) { DECLARE_COMPLETION_ONSTACK(done); struct warm_boot_cpu_info info; - struct work_struct task; int apicid, ret; struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); @@ -1082,15 +1113,15 @@ static int __cpuinit __smp_prepare_cpu(int cpu) info.complete = &done; info.apicid = apicid; info.cpu = cpu; - INIT_WORK(&task, do_warm_boot_cpu, &info); + INIT_WORK(&info.task, do_warm_boot_cpu); tsc_sync_disabled = 1; /* init low mem mapping */ clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - KERNEL_PGD_PTRS); + min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); flush_tlb_all(); - schedule_work(&task); + schedule_work(&info.task); wait_for_completion(&done); tsc_sync_disabled = 0; @@ -1101,34 +1132,15 @@ exit: } #endif -static void smp_tune_scheduling (void) +static void smp_tune_scheduling(void) { unsigned long cachesize; /* kB */ - unsigned long bandwidth = 350; /* MB/s */ - /* - * Rough estimation for SMP scheduling, this is the number of - * cycles it takes for a fully memory-limited process to flush - * the SMP-local cache. - * - * (For a P5 this pretty much means we will choose another idle - * CPU almost always at wakeup time (this is due to the small - * L1 cache), on PIIs it's around 50-100 usecs, depending on - * the cache size) - */ - if (!cpu_khz) { - /* - * this basically disables processor-affinity - * scheduling on SMP without a TSC. - */ - return; - } else { + if (cpu_khz) { cachesize = boot_cpu_data.x86_cache_size; - if (cachesize == -1) { - cachesize = 16; /* Pentiums, 2x8kB cache */ - bandwidth = 100; - } - max_cache_size = cachesize * 1024; + + if (cachesize > 0) + max_cache_size = cachesize * 1024; } } @@ -1376,7 +1388,8 @@ int __cpu_disable(void) */ if (cpu == 0) return -EBUSY; - + if (nmi_watchdog == NMI_LOCAL_APIC) + stop_apic_nmi_watchdog(NULL); clear_local_APIC(); /* Allow any queued timer interrupts to get serviced */ local_irq_enable(); @@ -1422,7 +1435,7 @@ void __cpu_die(unsigned int cpu) } #endif /* CONFIG_HOTPLUG_CPU */ -int __devinit __cpu_up(unsigned int cpu) +int __cpuinit __cpu_up(unsigned int cpu) { #ifdef CONFIG_HOTPLUG_CPU int ret=0; @@ -1453,6 +1466,12 @@ int __devinit __cpu_up(unsigned int cpu) cpu_set(cpu, smp_commenced_mask); while (!cpu_isset(cpu, cpu_online_map)) cpu_relax(); + +#ifdef CONFIG_X86_GENERICARCH + if (num_online_cpus() > 8 && genapic == &apic_default) + panic("Default flat APIC routing can't be used with > 8 cpus\n"); +#endif + return 0; } @@ -1490,3 +1509,16 @@ void __init smp_intr_init(void) /* IPI for generic function call */ set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); } + +/* + * If the BIOS enumerates physical processors before logical, + * maxcpus=N at enumeration-time can be used to disable HT. + */ +static int __init parse_maxcpus(char *arg) +{ + extern unsigned int maxcpus; + + maxcpus = simple_strtoul(arg, NULL, 0); + return 0; +} +early_param("maxcpus", parse_maxcpus);