X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fx86_64%2Fkernel%2Fsetup64.c;h=7ba429b355f7bd7347decdc460c79f384fdb2dc1;hb=refs%2Fheads%2Fvserver;hp=b83680afe01a95c542abf544ee0618b33296bcfc;hpb=c7b5ebbddf7bcd3651947760f423e3783bbe6573;p=linux-2.6.git diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index b83680afe..7ba429b35 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -3,123 +3,46 @@ * Copyright (C) 1995 Linus Torvalds * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen. * See setup.c for older changelog. - * $Id: setup64.c,v 1.12 2002/03/21 10:09:17 ak Exp $ */ -#include #include #include #include #include #include +#include +#include +#include #include #include #include #include -#include #include #include #include #include #include -#include #include -#include -#include +#include -char x86_boot_params[2048] __initdata = {0,}; +char x86_boot_params[BOOT_PARAM_SIZE] __initdata; -unsigned long cpu_initialized __initdata = 0; +cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; -struct x8664_pda cpu_pda[NR_CPUS] __cacheline_aligned; +struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(_cpu_pda); +struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; -extern struct task_struct init_task; - -extern unsigned char __per_cpu_start[], __per_cpu_end[]; - -extern struct desc_ptr cpu_gdt_descr[]; -struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; +struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); -unsigned long __supported_pte_mask = ~0UL; -static int do_not_nx __initdata = 0; -unsigned long vm_stack_flags = __VM_STACK_FLAGS; -unsigned long vm_stack_flags32 = __VM_STACK_FLAGS; -unsigned long vm_data_default_flags = __VM_DATA_DEFAULT_FLAGS; -unsigned long vm_data_default_flags32 = __VM_DATA_DEFAULT_FLAGS; -unsigned long vm_force_exec32 = PROT_EXEC; - -/* noexec=on|off -Control non executable mappings for 64bit processes. - -on Enable -off Disable -noforce (default) Don't enable by default for heap/stack/data, - but allow PROT_EXEC to be effective - -*/ -static int __init nonx_setup(char *str) -{ - if (!strcmp(str, "on")) { - __supported_pte_mask |= _PAGE_NX; - do_not_nx = 0; - vm_data_default_flags &= ~VM_EXEC; - vm_stack_flags &= ~VM_EXEC; - } else if (!strcmp(str, "noforce") || !strcmp(str, "off")) { - do_not_nx = (str[0] == 'o'); - if (do_not_nx) - __supported_pte_mask &= ~_PAGE_NX; - vm_data_default_flags |= VM_EXEC; - vm_stack_flags |= VM_EXEC; - } - return 1; -} - -__setup("noexec=", nonx_setup); - -/* noexec32=opt{,opt} - -Control the no exec default for 32bit processes. Can be also overwritten -per executable using ELF header flags (e.g. needed for the X server) -Requires noexec=on or noexec=noforce to be effective. - -Valid options: - all,on Heap,stack,data is non executable. - off (default) Heap,stack,data is executable - stack Stack is non executable, heap/data is. - force Don't imply PROT_EXEC for PROT_READ - compat (default) Imply PROT_EXEC for PROT_READ - -*/ - static int __init nonx32_setup(char *s) - { - while (*s) { - if (!strncmp(s, "all", 3) || !strncmp(s,"on",2)) { - vm_data_default_flags32 &= ~VM_EXEC; - vm_stack_flags32 &= ~VM_EXEC; - } else if (!strncmp(s, "off",3)) { - vm_data_default_flags32 |= VM_EXEC; - vm_stack_flags32 |= VM_EXEC; - } else if (!strncmp(s, "stack", 5)) { - vm_data_default_flags32 |= VM_EXEC; - vm_stack_flags32 &= ~VM_EXEC; - } else if (!strncmp(s, "force",5)) { - vm_force_exec32 = 0; - } else if (!strncmp(s, "compat",5)) { - vm_force_exec32 = PROT_EXEC; - } - s += strcspn(s, ","); - if (*s == ',') - ++s; - } - return 1; -} - -__setup("noexec32=", nonx32_setup); +unsigned long __supported_pte_mask __read_mostly = ~0UL; +EXPORT_SYMBOL(__supported_pte_mask); +static int do_not_nx __cpuinitdata = 0; /* * Great future plan: - * Declare PDA itself and support (irqstack,tss,pml4) as per cpu data. + * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. * Always point %gs to its beginning */ void __init setup_per_cpu_areas(void) @@ -127,40 +50,42 @@ void __init setup_per_cpu_areas(void) int i; unsigned long size; - /* Copy section for each CPU (we discard the original) */ - size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); -#ifdef CONFIG_MODULES - if (size < PERCPU_ENOUGH_ROOM) - size = PERCPU_ENOUGH_ROOM; +#ifdef CONFIG_HOTPLUG_CPU + prefill_possible_map(); #endif - for (i = 0; i < NR_CPUS; i++) { - unsigned char *ptr; - /* If possible allocate on the node of the CPU. - In case it doesn't exist round-robin nodes. */ - if (!NODE_DATA(i % numnodes)) { - printk("cpu with no node %d, numnodes %d\n", i, numnodes); + /* Copy section for each CPU (we discard the original) */ + size = PERCPU_ENOUGH_ROOM; + + printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size); + for_each_cpu_mask (i, cpu_possible_map) { + char *ptr; + + if (!NODE_DATA(cpu_to_node(i))) { + printk("cpu with no node %d, num_online_nodes %d\n", + i, num_online_nodes()); ptr = alloc_bootmem(size); } else { - ptr = alloc_bootmem_node(NODE_DATA(i % numnodes), size); + ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); } if (!ptr) panic("Cannot allocate cpu data for CPU %d\n", i); - cpu_pda[i].data_offset = ptr - __per_cpu_start; + cpu_pda(i)->data_offset = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); } } void pda_init(int cpu) { - pml4_t *level4; - struct x8664_pda *pda = &cpu_pda[cpu]; + struct x8664_pda *pda = cpu_pda(cpu); /* Setup up data that may be needed in __get_free_pages early */ asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); - wrmsrl(MSR_GS_BASE, cpu_pda + cpu); + /* Memory clobbers used to order PDA accessed */ + mb(); + wrmsrl(MSR_GS_BASE, pda); + mb(); - pda->me = pda; pda->cpunumber = cpu; pda->irqcount = -1; pda->kernelstack = @@ -172,27 +97,18 @@ void pda_init(int cpu) /* others are initialized in smpboot.c */ pda->pcurrent = &init_task; pda->irqstackptr = boot_cpu_stack; - level4 = init_level4_pgt; } else { - level4 = (pml4_t *)__get_free_pages(GFP_ATOMIC, 0); - if (!level4) - panic("Cannot allocate top level page for cpu %d", cpu); pda->irqstackptr = (char *) __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); if (!pda->irqstackptr) panic("cannot allocate irqstack for cpu %d", cpu); } - pda->level4_pgt = (unsigned long *)level4; - if (level4 != init_level4_pgt) - memcpy(level4, &init_level4_pgt, PAGE_SIZE); - set_pml4(level4 + 510, mk_kernel_pml4(__pa_symbol(boot_vmalloc_pgt))); - asm volatile("movq %0,%%cr3" :: "r" (__pa(level4))); pda->irqstackptr += IRQSTACKSIZE-64; } -char boot_exception_stacks[N_EXCEPTION_STACKS * EXCEPTION_STKSZ] +char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ] __attribute__((section(".bss.page_aligned"))); /* May not be marked __init: used by software suspend */ @@ -214,7 +130,7 @@ void syscall_init(void) wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); } -void __init check_efer(void) +void __cpuinit check_efer(void) { unsigned long efer; @@ -224,6 +140,8 @@ void __init check_efer(void) } } +unsigned long kernel_eflags; + /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT @@ -231,14 +149,11 @@ void __init check_efer(void) * 'CPU state barrier', nothing should get across. * A lot of state is already set up in PDA init. */ -void __init cpu_init (void) +void __cpuinit cpu_init (void) { -#ifdef CONFIG_SMP int cpu = stack_smp_processor_id(); -#else - int cpu = smp_processor_id(); -#endif struct tss_struct *t = &per_cpu(init_tss, cpu); + struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); unsigned long v; char *estacks = NULL; struct task_struct *me; @@ -247,42 +162,31 @@ void __init cpu_init (void) /* CPU 0 is initialised in head64.c */ if (cpu != 0) { pda_init(cpu); + zap_low_mappings(cpu); } else estacks = boot_exception_stacks; me = current; - if (test_and_set_bit(cpu, &cpu_initialized)) + if (cpu_test_and_set(cpu, cpu_initialized)) panic("CPU#%d already initialized!\n", cpu); printk("Initializing CPU#%d\n", cpu); - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); /* * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: */ - if (cpu) { - memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE); - } + if (cpu) + memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE); cpu_gdt_descr[cpu].size = GDT_SIZE; - cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu]; - __asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu])); - __asm__ __volatile__("lidt %0": "=m" (idt_descr)); - - memcpy(me->thread.tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_ENTRIES * 8); - - /* - * Delete NT - */ - - asm volatile("pushfq ; popq %%rax ; btr $14,%%rax ; pushq %%rax ; popfq" ::: "eax"); - - if (cpu == 0) - early_identify_cpu(&boot_cpu_data); + asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu])); + asm volatile("lidt %0" :: "m" (idt_descr)); + memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); syscall_init(); wrmsrl(MSR_FS_BASE, 0); @@ -295,15 +199,18 @@ void __init cpu_init (void) * set up and load the per-CPU TSS */ for (v = 0; v < N_EXCEPTION_STACKS; v++) { + static const unsigned int order[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + }; if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, - EXCEPTION_STACK_ORDER); + estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); if (!estacks) panic("Cannot allocate exception stack %ld %d\n", v, cpu); } - estacks += EXCEPTION_STKSZ; - t->ist[v] = (unsigned long)estacks; + estacks += PAGE_SIZE << order[v]; + orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks; } t->io_bitmap_base = offsetof(struct tss_struct, io_bitmap); @@ -328,16 +235,14 @@ void __init cpu_init (void) * Clear all 6 debug registers: */ - set_debug(0UL, 0); - set_debug(0UL, 1); - set_debug(0UL, 2); - set_debug(0UL, 3); - set_debug(0UL, 6); - set_debug(0UL, 7); + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); + set_debugreg(0UL, 6); + set_debugreg(0UL, 7); fpu_init(); -#ifdef CONFIG_NUMA - numa_add_cpu(cpu); -#endif + raw_local_save_flags(kernel_eflags); }