X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fx86_64%2Fkernel%2Fsetup64.c;h=8a691fa6d3938e0d6473e208d477995b6838055c;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=9d3137a553fffb4fb1176c33e90aefeb83f6d756;hpb=9213980e6a70d8473e0ffd4b39ab5b6caaba9ff5;p=linux-2.6.git diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 9d3137a55..8a691fa6d 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -11,113 +11,76 @@ #include #include #include +#include +#include +#include #include #include #include #include -#include #include #include #include #include #include -#include #include -#include -#include +#include -char x86_boot_params[2048] __initdata = {0,}; +char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,}; -unsigned long cpu_initialized __initdata = 0; +cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; -struct x8664_pda cpu_pda[NR_CPUS] __cacheline_aligned; +struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; +struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; -extern struct task_struct init_task; - -extern unsigned char __per_cpu_start[], __per_cpu_end[]; - -extern struct desc_ptr cpu_gdt_descr[]; -struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; +struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); -unsigned long __supported_pte_mask = ~0UL; -static int do_not_nx __initdata = 0; -unsigned long vm_stack_flags = __VM_STACK_FLAGS; -unsigned long vm_stack_flags32 = __VM_STACK_FLAGS; -unsigned long vm_data_default_flags = __VM_DATA_DEFAULT_FLAGS; -unsigned long vm_data_default_flags32 = __VM_DATA_DEFAULT_FLAGS; -unsigned long vm_force_exec32 = PROT_EXEC; +unsigned long __supported_pte_mask __read_mostly = ~0UL; +static int do_not_nx __cpuinitdata = 0; /* noexec=on|off Control non executable mappings for 64bit processes. -on Enable +on Enable(default) off Disable -noforce (default) Don't enable by default for heap/stack/data, - but allow PROT_EXEC to be effective - */ -static int __init nonx_setup(char *str) +int __init nonx_setup(char *str) { - if (!strncmp(str, "on",3)) { + if (!strncmp(str, "on", 2)) { __supported_pte_mask |= _PAGE_NX; do_not_nx = 0; - vm_data_default_flags &= ~VM_EXEC; - vm_stack_flags &= ~VM_EXEC; - } else if (!strncmp(str, "noforce",7) || !strncmp(str,"off",3)) { - do_not_nx = (str[0] == 'o'); - if (do_not_nx) - __supported_pte_mask &= ~_PAGE_NX; - vm_data_default_flags |= VM_EXEC; - vm_stack_flags |= VM_EXEC; - } - return 1; + } else if (!strncmp(str, "off", 3)) { + do_not_nx = 1; + __supported_pte_mask &= ~_PAGE_NX; + } + return 1; } +__setup("noexec=", nonx_setup); /* parsed early actually */ -__setup("noexec=", nonx_setup); - -/* noexec32=opt{,opt} +int force_personality32 = 0; -Control the no exec default for 32bit processes. Can be also overwritten -per executable using ELF header flags (e.g. needed for the X server) -Requires noexec=on or noexec=noforce to be effective. - -Valid options: - all,on Heap,stack,data is non executable. - off (default) Heap,stack,data is executable - stack Stack is non executable, heap/data is. - force Don't imply PROT_EXEC for PROT_READ - compat (default) Imply PROT_EXEC for PROT_READ +/* noexec32=on|off +Control non executable heap for 32bit processes. +To control the stack too use noexec=off +on PROT_READ does not imply PROT_EXEC for 32bit processes +off PROT_READ implies PROT_EXEC (default) */ - static int __init nonx32_setup(char *str) - { - char *s; - while ((s = strsep(&str, ",")) != NULL) { - if (!strcmp(s, "all") || !strcmp(s,"on")) { - vm_data_default_flags32 &= ~VM_EXEC; - vm_stack_flags32 &= ~VM_EXEC; - } else if (!strcmp(s, "off")) { - vm_data_default_flags32 |= VM_EXEC; - vm_stack_flags32 |= VM_EXEC; - } else if (!strcmp(s, "stack")) { - vm_data_default_flags32 |= VM_EXEC; - vm_stack_flags32 &= ~VM_EXEC; - } else if (!strcmp(s, "force")) { - vm_force_exec32 = 0; - } else if (!strcmp(s, "compat")) { - vm_force_exec32 = PROT_EXEC; - } - } - return 1; -} - -__setup("noexec32=", nonx32_setup); +static int __init nonx32_setup(char *str) +{ + if (!strcmp(str, "on")) + force_personality32 &= ~READ_IMPLIES_EXEC; + else if (!strcmp(str, "off")) + force_personality32 |= READ_IMPLIES_EXEC; + return 1; +} +__setup("noexec32=", nonx32_setup); /* * Great future plan: - * Declare PDA itself and support (irqstack,tss,pml4) as per cpu data. + * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. * Always point %gs to its beginning */ void __init setup_per_cpu_areas(void) @@ -125,6 +88,10 @@ void __init setup_per_cpu_areas(void) int i; unsigned long size; +#ifdef CONFIG_HOTPLUG_CPU + prefill_possible_map(); +#endif + /* Copy section for each CPU (we discard the original) */ size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); #ifdef CONFIG_MODULES @@ -132,33 +99,31 @@ void __init setup_per_cpu_areas(void) size = PERCPU_ENOUGH_ROOM; #endif - for (i = 0; i < NR_CPUS; i++) { - unsigned char *ptr; - /* If possible allocate on the node of the CPU. - In case it doesn't exist round-robin nodes. */ - if (!NODE_DATA(i % numnodes)) { - printk("cpu with no node %d, numnodes %d\n", i, numnodes); + for_each_cpu_mask (i, cpu_possible_map) { + char *ptr; + + if (!NODE_DATA(cpu_to_node(i))) { + printk("cpu with no node %d, num_online_nodes %d\n", + i, num_online_nodes()); ptr = alloc_bootmem(size); } else { - ptr = alloc_bootmem_node(NODE_DATA(i % numnodes), size); + ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); } if (!ptr) panic("Cannot allocate cpu data for CPU %d\n", i); - cpu_pda[i].data_offset = ptr - __per_cpu_start; + cpu_pda(i)->data_offset = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); } } void pda_init(int cpu) { - pml4_t *level4; - struct x8664_pda *pda = &cpu_pda[cpu]; + struct x8664_pda *pda = cpu_pda(cpu); /* Setup up data that may be needed in __get_free_pages early */ asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); - wrmsrl(MSR_GS_BASE, cpu_pda + cpu); + wrmsrl(MSR_GS_BASE, pda); - pda->me = pda; pda->cpunumber = cpu; pda->irqcount = -1; pda->kernelstack = @@ -170,30 +135,22 @@ void pda_init(int cpu) /* others are initialized in smpboot.c */ pda->pcurrent = &init_task; pda->irqstackptr = boot_cpu_stack; - level4 = init_level4_pgt; } else { - level4 = (pml4_t *)__get_free_pages(GFP_ATOMIC, 0); - if (!level4) - panic("Cannot allocate top level page for cpu %d", cpu); pda->irqstackptr = (char *) __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); if (!pda->irqstackptr) panic("cannot allocate irqstack for cpu %d", cpu); } - pda->level4_pgt = (unsigned long *)level4; - if (level4 != init_level4_pgt) - memcpy(level4, &init_level4_pgt, PAGE_SIZE); - set_pml4(level4 + 510, mk_kernel_pml4(__pa_symbol(boot_vmalloc_pgt))); - asm volatile("movq %0,%%cr3" :: "r" (__pa(level4))); pda->irqstackptr += IRQSTACKSIZE-64; } -char boot_exception_stacks[N_EXCEPTION_STACKS * EXCEPTION_STKSZ] +char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ] __attribute__((section(".bss.page_aligned"))); -void __init syscall_init(void) +/* May not be marked __init: used by software suspend */ +void syscall_init(void) { /* * LSTAR and STAR live in a bit strange symbiosis. @@ -211,7 +168,7 @@ void __init syscall_init(void) wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); } -void __init check_efer(void) +void __cpuinit check_efer(void) { unsigned long efer; @@ -228,57 +185,43 @@ void __init check_efer(void) * 'CPU state barrier', nothing should get across. * A lot of state is already set up in PDA init. */ -void __init cpu_init (void) +void __cpuinit cpu_init (void) { -#ifdef CONFIG_SMP int cpu = stack_smp_processor_id(); -#else - int cpu = smp_processor_id(); -#endif - struct tss_struct * t = &init_tss[cpu]; + struct tss_struct *t = &per_cpu(init_tss, cpu); unsigned long v; char *estacks = NULL; struct task_struct *me; + int i; /* CPU 0 is initialised in head64.c */ if (cpu != 0) { pda_init(cpu); + zap_low_mappings(cpu); } else estacks = boot_exception_stacks; me = current; - if (test_and_set_bit(cpu, &cpu_initialized)) + if (cpu_test_and_set(cpu, cpu_initialized)) panic("CPU#%d already initialized!\n", cpu); printk("Initializing CPU#%d\n", cpu); - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); /* * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: */ - if (cpu) { - memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE); - } + if (cpu) + memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE); cpu_gdt_descr[cpu].size = GDT_SIZE; - cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu]; - __asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu])); - __asm__ __volatile__("lidt %0": "=m" (idt_descr)); - - memcpy(me->thread.tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_ENTRIES * 8); - - /* - * Delete NT - */ - - asm volatile("pushfq ; popq %%rax ; btr $14,%%rax ; pushq %%rax ; popfq" ::: "eax"); - - if (cpu == 0) - early_identify_cpu(&boot_cpu_data); + asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu])); + asm volatile("lidt %0" :: "m" (idt_descr)); + memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); syscall_init(); wrmsrl(MSR_FS_BASE, 0); @@ -292,22 +235,37 @@ void __init cpu_init (void) */ for (v = 0; v < N_EXCEPTION_STACKS; v++) { if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, - EXCEPTION_STACK_ORDER); + static const unsigned int order[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + }; + + estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); if (!estacks) panic("Cannot allocate exception stack %ld %d\n", v, cpu); } - estacks += EXCEPTION_STKSZ; + switch (v + 1) { +#if DEBUG_STKSZ > EXCEPTION_STKSZ + case DEBUG_STACK: + cpu_pda(cpu)->debugstack = (unsigned long)estacks; + estacks += DEBUG_STKSZ; + break; +#endif + default: + estacks += EXCEPTION_STKSZ; + break; + } t->ist[v] = (unsigned long)estacks; } - t->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; + t->io_bitmap_base = offsetof(struct tss_struct, io_bitmap); /* - * This is required because the CPU will access up to + * <= is required because the CPU will access up to * 8 bits beyond the end of the IO permission bitmap. */ - t->io_bitmap[IO_BITMAP_LONGS] = ~0UL; + for (i = 0; i <= IO_BITMAP_LONGS; i++) + t->io_bitmap[i] = ~0UL; atomic_inc(&init_mm.mm_count); me->active_mm = &init_mm; @@ -323,16 +281,12 @@ void __init cpu_init (void) * Clear all 6 debug registers: */ - set_debug(0UL, 0); - set_debug(0UL, 1); - set_debug(0UL, 2); - set_debug(0UL, 3); - set_debug(0UL, 6); - set_debug(0UL, 7); + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); + set_debugreg(0UL, 6); + set_debugreg(0UL, 7); fpu_init(); - -#ifdef CONFIG_NUMA - numa_add_cpu(cpu); -#endif }