X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fi386%2Fkernel%2Fcpu%2Fcommon.c;h=968f156740159e4fc518f9f11707bb4519e92711;hb=refs%2Fheads%2Fvserver;hp=e6bd095ae1085e75234d10aebdec2ac3d2190e43;hpb=76828883507a47dae78837ab5dec5a5b4513c667;p=linux-2.6.git diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index e6bd095ae..968f15674 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -11,29 +11,33 @@ #include #include #include +#include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #include #include #endif +#include #include "cpu.h" DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); -DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); -EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); +struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(_cpu_pda); -static int cachesize_override __devinitdata = -1; -static int disable_x86_fxsr __devinitdata = 0; -static int disable_x86_serial_nr __devinitdata = 1; +static int cachesize_override __cpuinitdata = -1; +static int disable_x86_fxsr __cpuinitdata; +static int disable_x86_serial_nr __cpuinitdata = 1; +static int disable_x86_sep __cpuinitdata; struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; extern int disable_pse; -static void default_init(struct cpuinfo_x86 * c) +static void __cpuinit default_init(struct cpuinfo_x86 * c) { /* Not much we can do here... */ /* Check if at least it has cpuid */ @@ -46,11 +50,11 @@ static void default_init(struct cpuinfo_x86 * c) } } -static struct cpu_dev default_cpu = { +static struct cpu_dev __cpuinitdata default_cpu = { .c_init = default_init, .c_vendor = "Unknown", }; -static struct cpu_dev * this_cpu = &default_cpu; +static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu; static int __init cachesize_setup(char *str) { @@ -59,7 +63,7 @@ static int __init cachesize_setup(char *str) } __setup("cachesize=", cachesize_setup); -int __devinit get_model_name(struct cpuinfo_x86 *c) +int __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; char *p, *q; @@ -89,7 +93,7 @@ int __devinit get_model_name(struct cpuinfo_x86 *c) } -void __devinit display_cacheinfo(struct cpuinfo_x86 *c) +void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, ecx, edx, l2size; @@ -130,7 +134,7 @@ void __devinit display_cacheinfo(struct cpuinfo_x86 *c) /* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ /* Look up CPU names by table lookup. */ -static char __devinit *table_lookup_model(struct cpuinfo_x86 *c) +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) { struct cpu_model_info *info; @@ -151,7 +155,7 @@ static char __devinit *table_lookup_model(struct cpuinfo_x86 *c) } -static void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) +static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) { char *v = c->x86_vendor_id; int i; @@ -181,12 +185,29 @@ static void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) static int __init x86_fxsr_setup(char * s) { + /* Tell all the other CPU's to not use it... */ disable_x86_fxsr = 1; + + /* + * ... and clear the bits early in the boot_cpu_data + * so that the bootup process doesn't try to do this + * either. + */ + clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability); + clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability); return 1; } __setup("nofxsr", x86_fxsr_setup); +static int __init x86_sep_setup(char * s) +{ + disable_x86_sep = 1; + return 1; +} +__setup("nosep", x86_sep_setup); + + /* Standard macro to see if a specific flag is changeable */ static inline int flag_is_changeable_p(u32 flag) { @@ -210,34 +231,19 @@ static inline int flag_is_changeable_p(u32 flag) /* Probe for the CPUID instruction */ -static int __devinit have_cpuid_p(void) +static int __cpuinit have_cpuid_p(void) { return flag_is_changeable_p(X86_EFLAGS_ID); } -/* Do minimum CPU detection early. - Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. - The others are not touched to avoid unwanted side effects. - - WARNING: this function is only called on the BP. Don't add code here - that is supposed to run on all CPUs. */ -static void __init early_cpu_detect(void) +void __init cpu_detect(struct cpuinfo_x86 *c) { - struct cpuinfo_x86 *c = &boot_cpu_data; - - c->x86_cache_alignment = 32; - - if (!have_cpuid_p()) - return; - /* Get vendor name */ cpuid(0x00000000, &c->cpuid_level, (int *)&c->x86_vendor_id[0], (int *)&c->x86_vendor_id[8], (int *)&c->x86_vendor_id[4]); - get_cpu_vendor(c, 1); - c->x86 = 4; if (c->cpuid_level >= 0x00000001) { u32 junk, tfms, cap0, misc; @@ -254,10 +260,30 @@ static void __init early_cpu_detect(void) } } -void __devinit generic_identify(struct cpuinfo_x86 * c) +/* Do minimum CPU detection early. + Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. + The others are not touched to avoid unwanted side effects. + + WARNING: this function is only called on the BP. Don't add code here + that is supposed to run on all CPUs. */ +static void __init early_cpu_detect(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + c->x86_cache_alignment = 32; + + if (!have_cpuid_p()) + return; + + cpu_detect(c); + + get_cpu_vendor(c, 1); +} + +static void __cpuinit generic_identify(struct cpuinfo_x86 * c) { u32 tfms, xlvl; - int junk; + int ebx; if (have_cpuid_p()) { /* Get vendor name */ @@ -273,7 +299,7 @@ void __devinit generic_identify(struct cpuinfo_x86 * c) /* Intel-defined flags: level 0x00000001 */ if ( c->cpuid_level >= 0x00000001 ) { u32 capability, excap; - cpuid(0x00000001, &tfms, &junk, &excap, &capability); + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); c->x86_capability[0] = capability; c->x86_capability[4] = excap; c->x86 = (tfms >> 8) & 15; @@ -283,6 +309,13 @@ void __devinit generic_identify(struct cpuinfo_x86 * c) if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; c->x86_mask = tfms & 15; +#ifdef CONFIG_X86_HT + c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); +#else + c->apicid = (ebx >> 24) & 0xFF; +#endif + if (c->x86_capability[0] & (1<<19)) + c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; } else { /* Have CPUID level 0 only - unheard of */ c->x86 = 4; @@ -303,11 +336,11 @@ void __devinit generic_identify(struct cpuinfo_x86 * c) early_intel_workaround(c); #ifdef CONFIG_X86_HT - phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff; + c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; #endif } -static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { /* Disable processor serial number */ @@ -335,7 +368,7 @@ __setup("serialnumber", x86_serial_nr_setup); /* * This does the hard work of actually picking apart the CPU stuff... */ -void __devinit identify_cpu(struct cpuinfo_x86 *c) +void __cpuinit identify_cpu(struct cpuinfo_x86 *c) { int i; @@ -347,6 +380,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; + c->x86_clflush_size = 32; memset(&c->x86_capability, 0, sizeof c->x86_capability); if (!have_cpuid_p()) { @@ -405,9 +439,20 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) clear_bit(X86_FEATURE_XMM, c->x86_capability); } + /* SEP disabled? */ + if (disable_x86_sep) + clear_bit(X86_FEATURE_SEP, c->x86_capability); + if (disable_pse) clear_bit(X86_FEATURE_PSE, c->x86_capability); + if (exec_shield != 0) { +#ifdef CONFIG_HIGHMEM64G /* NX implies PAE */ + if (!test_bit(X86_FEATURE_NX, c->x86_capability)) +#endif + clear_bit(X86_FEATURE_SEP, c->x86_capability); + } + /* If the model name is still unset, do table lookup. */ if ( !c->x86_model_id[0] ) { char *p; @@ -417,7 +462,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) else /* Last resort... */ sprintf(c->x86_model_id, "%02x/%02x", - c->x86_vendor, c->x86_model); + c->x86, c->x86_model); } /* Now the feature flags better reflect actual CPU features! */ @@ -453,16 +498,13 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) } #ifdef CONFIG_X86_HT -void __devinit detect_ht(struct cpuinfo_x86 *c) +void __cpuinit detect_ht(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; int index_msb, core_bits; - int cpu = smp_processor_id(); cpuid(1, &eax, &ebx, &ecx, &edx); - c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); - if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; @@ -473,16 +515,17 @@ void __devinit detect_ht(struct cpuinfo_x86 *c) } else if (smp_num_siblings > 1 ) { if (smp_num_siblings > NR_CPUS) { - printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); + printk(KERN_WARNING "CPU: Unsupported number of the " + "siblings %d", smp_num_siblings); smp_num_siblings = 1; return; } index_msb = get_count_order(smp_num_siblings); - phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); + c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - phys_proc_id[cpu]); + c->phys_proc_id); smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -490,17 +533,17 @@ void __devinit detect_ht(struct cpuinfo_x86 *c) core_bits = get_count_order(c->x86_max_cores); - cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & + c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & ((1 << core_bits) - 1); if (c->x86_max_cores > 1) printk(KERN_INFO "CPU: Processor Core ID: %d\n", - cpu_core_id[cpu]); + c->cpu_core_id); } } #endif -void __devinit print_cpu_info(struct cpuinfo_x86 *c) +void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { char *vendor = NULL; @@ -523,7 +566,7 @@ void __devinit print_cpu_info(struct cpuinfo_x86 *c) printk("\n"); } -cpumask_t cpu_initialized __devinitdata = CPU_MASK_NONE; +cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; /* This is hacky. :) * We're emulating future behavior. @@ -564,35 +607,23 @@ void __init early_cpu_init(void) disable_pse = 1; #endif } -/* - * cpu_init() initializes state that is per-CPU. Some data is already - * initialized (naturally) in the bootstrap process, such as the GDT - * and IDT. We reload them nevertheless, this function acts as a - * 'CPU state barrier', nothing should get across. - */ -void __devinit cpu_init(void) + +/* Make sure %gs is initialized properly in idle threads */ +struct pt_regs * __devinit idle_regs(struct pt_regs *regs) { - int cpu = smp_processor_id(); - struct tss_struct * t = &per_cpu(init_tss, cpu); - struct thread_struct *thread = ¤t->thread; - struct desc_struct *gdt; - __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu); - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); + memset(regs, 0, sizeof(struct pt_regs)); + regs->xgs = __KERNEL_PDA; + return regs; +} - if (cpu_test_and_set(cpu, cpu_initialized)) { - printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); - for (;;) local_irq_enable(); - } - printk(KERN_INFO "Initializing CPU#%d\n", cpu); +static __cpuinit int alloc_gdt(int cpu) +{ + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); + struct desc_struct *gdt; + struct i386_pda *pda; - if (cpu_has_vme || cpu_has_tsc || cpu_has_de) - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - if (tsc_disable && cpu_has_tsc) { - printk(KERN_NOTICE "Disabling TSC...\n"); - /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ - clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); - set_in_cr4(X86_CR4_TSD); - } + gdt = (struct desc_struct *)cpu_gdt_descr->address; + pda = cpu_pda(cpu); /* * This is a horrible hack to allocate the GDT. The problem @@ -601,44 +632,134 @@ void __devinit cpu_init(void) * CPUs, when bootmem will have gone away */ if (NODE_DATA(0)->bdata->node_bootmem_map) { - gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); - /* alloc_bootmem_pages panics on failure, so no check */ + BUG_ON(gdt != NULL || pda != NULL); + + gdt = alloc_bootmem_pages(PAGE_SIZE); + pda = alloc_bootmem(sizeof(*pda)); + /* alloc_bootmem(_pages) panics on failure, so no check */ + memset(gdt, 0, PAGE_SIZE); + memset(pda, 0, sizeof(*pda)); } else { - gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); - if (unlikely(!gdt)) { - printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); - for (;;) - local_irq_enable(); + /* GDT and PDA might already have been allocated if + this is a CPU hotplug re-insertion. */ + if (gdt == NULL) + gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); + + if (pda == NULL) + pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu)); + + if (unlikely(!gdt || !pda)) { + free_pages((unsigned long)gdt, 0); + kfree(pda); + return 0; } } + cpu_gdt_descr->address = (unsigned long)gdt; + cpu_pda(cpu) = pda; + + return 1; +} + +/* Initial PDA used by boot CPU */ +struct i386_pda boot_pda = { + ._pda = &boot_pda, + .cpu_number = 0, + .pcurrent = &init_task, +}; + +static inline void set_kernel_gs(void) +{ + /* Set %gs for this CPU's PDA. Memory clobber is to create a + barrier with respect to any PDA operations, so the compiler + doesn't move any before here. */ + asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory"); +} + +/* Initialize the CPU's GDT and PDA. The boot CPU does this for + itself, but secondaries find this done for them. */ +__cpuinit int init_gdt(int cpu, struct task_struct *idle) +{ + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); + struct desc_struct *gdt; + struct i386_pda *pda; + + /* For non-boot CPUs, the GDT and PDA should already have been + allocated. */ + if (!alloc_gdt(cpu)) { + printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu); + return 0; + } + + gdt = (struct desc_struct *)cpu_gdt_descr->address; + pda = cpu_pda(cpu); + + BUG_ON(gdt == NULL || pda == NULL); + /* * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: */ memcpy(gdt, cpu_gdt_table, GDT_SIZE); + cpu_gdt_descr->size = GDT_SIZE - 1; - /* Set up GDT entry for 16bit stack */ - *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |= - ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) | - ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | - (CPU_16BIT_STACK_SIZE - 1); + pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a, + (u32 *)&gdt[GDT_ENTRY_PDA].b, + (unsigned long)pda, sizeof(*pda) - 1, + 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */ - cpu_gdt_descr->size = GDT_SIZE - 1; - cpu_gdt_descr->address = (unsigned long)gdt; + memset(pda, 0, sizeof(*pda)); + pda->_pda = pda; + pda->cpu_number = cpu; + pda->pcurrent = idle; + return 1; +} + +void __cpuinit cpu_set_gdt(int cpu) +{ + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); + + /* Reinit these anyway, even if they've already been done (on + the boot CPU, this will transition from the boot gdt+pda to + the real ones). */ load_gdt(cpu_gdt_descr); + set_kernel_gs(); +} + +/* Common CPU init for both boot and secondary CPUs */ +static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) +{ + struct tss_struct * t = &per_cpu(init_tss, cpu); + struct thread_struct *thread = &curr->thread; + + if (cpu_test_and_set(cpu, cpu_initialized)) { + printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); + for (;;) local_irq_enable(); + } + + printk(KERN_INFO "Initializing CPU#%d\n", cpu); + + if (cpu_has_vme || cpu_has_tsc || cpu_has_de) + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + if (tsc_disable && cpu_has_tsc) { + printk(KERN_NOTICE "Disabling TSC...\n"); + /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ + clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); + set_in_cr4(X86_CR4_TSD); + } + load_idt(&idt_descr); /* * Set up and load the per-CPU TSS and LDT */ atomic_inc(&init_mm.mm_count); - current->active_mm = &init_mm; - if (current->mm) + curr->active_mm = &init_mm; + if (curr->mm) BUG(); - enter_lazy_tlb(&init_mm, current); + enter_lazy_tlb(&init_mm, curr); load_esp0(t, thread); set_tss_desc(cpu,t); @@ -650,8 +771,8 @@ void __devinit cpu_init(void) __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); #endif - /* Clear %fs and %gs. */ - asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); + /* Clear %fs. */ + asm volatile ("mov %0, %%fs" : : "r" (0)); /* Clear all 6 debug registers: */ set_debugreg(0, 0); @@ -669,8 +790,40 @@ void __devinit cpu_init(void) mxcsr_feature_mask_init(); } +/* Entrypoint to initialize secondary CPU */ +void __cpuinit secondary_cpu_init(void) +{ + int cpu = smp_processor_id(); + struct task_struct *curr = current; + + _cpu_init(cpu, curr); +} + +/* + * cpu_init() initializes state that is per-CPU. Some data is already + * initialized (naturally) in the bootstrap process, such as the GDT + * and IDT. We reload them nevertheless, this function acts as a + * 'CPU state barrier', nothing should get across. + */ +void __cpuinit cpu_init(void) +{ + int cpu = smp_processor_id(); + struct task_struct *curr = current; + + /* Set up the real GDT and PDA, so we can transition from the + boot versions. */ + if (!init_gdt(cpu, curr)) { + /* failed to allocate something; not much we can do... */ + for (;;) + local_irq_enable(); + } + + cpu_set_gdt(cpu); + _cpu_init(cpu, curr); +} + #ifdef CONFIG_HOTPLUG_CPU -void __devinit cpu_uninit(void) +void __cpuinit cpu_uninit(void) { int cpu = raw_smp_processor_id(); cpu_clear(cpu, cpu_initialized);