X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fx86_64%2Fkernel%2Fhead.S;h=1e6f80870679506482ef7d2714621e8ad4df4d08;hb=refs%2Fheads%2Fvserver;hp=30e92ff8b3c07315d7738685af53f3f0c25aa84c;hpb=6a77f38946aaee1cd85eeec6cf4229b204c15071;p=linux-2.6.git diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index 30e92ff8b..1e6f80870 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S @@ -5,13 +5,12 @@ * Copyright (C) 2000 Pavel Machek * Copyright (C) 2000 Karsten Keil * Copyright (C) 2001,2002 Andi Kleen - * - * $Id: head.S,v 1.49 2002/03/19 17:39:25 ak Exp $ */ #include #include +#include #include #include #include @@ -25,7 +24,9 @@ */ .text + .section .bootstrap.text .code32 + .globl startup_32 /* %bx: 1 if coming from smp trampoline on secondary cpu */ startup_32: @@ -37,11 +38,13 @@ startup_32: * There is no stack until we set one up. */ - movl %ebx,%ebp /* Save trampoline flag */ - + /* Initialize the %ds segment register */ movl $__KERNEL_DS,%eax movl %eax,%ds - + + /* Load new GDT with the 64bit segments using 32bit descriptor */ + lgdt pGDT32 - __START_KERNEL_map + /* If the CPU doesn't support CPUID this will double fault. * Unfortunately it is hard to check for CPUID without a stack. */ @@ -57,20 +60,17 @@ startup_32: btl $29, %edx jnc no_long_mode - movl %edx,%edi - /* * Prepare for entering 64bits mode */ - /* Enable PAE mode and PGE */ + /* Enable PAE mode */ xorl %eax, %eax btsl $5, %eax - btsl $7, %eax movl %eax, %cr4 /* Setup early boot stage 4 level pagetables */ - movl $(init_level4_pgt - __START_KERNEL_map), %eax + movl $(boot_level4_pgt - __START_KERNEL_map), %eax movl %eax, %cr3 /* Setup EFER (Extended Feature Enable Register) */ @@ -79,14 +79,6 @@ startup_32: /* Enable Long Mode */ btsl $_EFER_LME, %eax - /* Enable System Call */ - btsl $_EFER_SCE, %eax - - /* No Execute supported? */ - btl $20,%edi - jnc 1f - btsl $_EFER_NX, %eax -1: /* Make changes effective */ wrmsr @@ -94,38 +86,69 @@ startup_32: xorl %eax, %eax btsl $31, %eax /* Enable paging and in turn activate Long Mode */ btsl $0, %eax /* Enable protected mode */ - btsl $1, %eax /* Enable MP */ - btsl $4, %eax /* Enable ET */ - btsl $5, %eax /* Enable NE */ - btsl $16, %eax /* Enable WP */ - btsl $18, %eax /* Enable AM */ /* Make changes effective */ movl %eax, %cr0 - jmp reach_compatibility_mode -reach_compatibility_mode: - /* * At this point we're in long mode but in 32bit compatibility mode * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn - * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we load + * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use * the new gdt/idt that has __KERNEL_CS with CS.L = 1. */ - - testw %bp,%bp /* secondary CPU? */ - jnz second - - /* Load new GDT with the 64bit segment using 32bit descriptor */ - movl $(pGDT32 - __START_KERNEL_map), %eax - lgdt (%eax) - -second: - movl $(ljumpvector - __START_KERNEL_map), %eax - /* Finally jump in 64bit mode */ - ljmp *(%eax) + ljmp $__KERNEL_CS, $(startup_64 - __START_KERNEL_map) .code64 .org 0x100 -reach_long64: + .globl startup_64 +startup_64: + /* We come here either from startup_32 + * or directly from a 64bit bootloader. + * Since we may have come directly from a bootloader we + * reload the page tables here. + */ + + /* Enable PAE mode and PGE */ + xorq %rax, %rax + btsq $5, %rax + btsq $7, %rax + movq %rax, %cr4 + + /* Setup early boot stage 4 level pagetables. */ + movq $(boot_level4_pgt - __START_KERNEL_map), %rax + movq %rax, %cr3 + + /* Check if nx is implemented */ + movl $0x80000001, %eax + cpuid + movl %edx,%edi + + /* Setup EFER (Extended Feature Enable Register) */ + movl $MSR_EFER, %ecx + rdmsr + + /* Enable System Call */ + btsl $_EFER_SCE, %eax + + /* No Execute supported? */ + btl $20,%edi + jnc 1f + btsl $_EFER_NX, %eax +1: + /* Make changes effective */ + wrmsr + + /* Setup cr0 */ +#define CR0_PM 1 /* protected mode */ +#define CR0_MP (1<<1) +#define CR0_ET (1<<4) +#define CR0_NE (1<<5) +#define CR0_WP (1<<16) +#define CR0_AM (1<<18) +#define CR0_PAGING (1<<31) + movl $CR0_PM|CR0_MP|CR0_ET|CR0_NE|CR0_WP|CR0_AM|CR0_PAGING,%eax + /* Make changes effective */ + movq %rax, %cr0 + + /* Setup a boot time stack */ movq init_rsp(%rip),%rsp /* zero EFLAGS after setting rsp */ @@ -162,13 +185,18 @@ reach_long64: /* Finally jump to run C code and to be on real kernel address * Since we are running on identity-mapped space we have to jump - * to the full 64bit address , this is only possible as indirect - * jump + * to the full 64bit address, this is only possible as indirect + * jump. In addition we need to ensure %cs is set so we make this + * a far return. */ movq initial_code(%rip),%rax - jmp *%rax + pushq $0 # fake return address to stop unwinder + pushq $__KERNEL_CS # set correct cs + pushq %rax # target address in negative space + lretq - /* SMP bootup changes these two */ + /* SMP bootup changes these two */ + .align 8 .globl initial_code initial_code: .quad x86_64_start_kernel @@ -177,17 +205,32 @@ init_rsp: .quad init_thread_union+THREAD_SIZE-8 ENTRY(early_idt_handler) + cmpl $2,early_recursion_flag(%rip) + jz 1f + incl early_recursion_flag(%rip) xorl %eax,%eax movq 8(%rsp),%rsi # get rip movq (%rsp),%rdx movq %cr2,%rcx leaq early_idt_msg(%rip),%rdi call early_printk + cmpl $2,early_recursion_flag(%rip) + jz 1f + call dump_stack +#ifdef CONFIG_KALLSYMS + leaq early_idt_ripmsg(%rip),%rdi + movq 8(%rsp),%rsi # get rip again + call __print_symbol +#endif 1: hlt jmp 1b +early_recursion_flag: + .long 0 early_idt_msg: .asciz "PANIC: early exception rip %lx error %lx cr2 %lx\n" +early_idt_ripmsg: + .asciz "RIP %s\n" .code32 ENTRY(no_long_mode) @@ -198,133 +241,105 @@ ENTRY(no_long_mode) .org 0xf00 .globl pGDT32 pGDT32: - .word gdt32_end-gdt_table32 - .long gdt_table32-__START_KERNEL_map + .word gdt_end-cpu_gdt_table-1 + .long cpu_gdt_table-__START_KERNEL_map .org 0xf10 ljumpvector: - .long reach_long64-__START_KERNEL_map + .long startup_64-__START_KERNEL_map .word __KERNEL_CS ENTRY(stext) ENTRY(_stext) - /* - * This default setting generates an ident mapping at address 0x100000 - * and a mapping for the kernel that precisely maps virtual address - * 0xffffffff80000000 to physical address 0x000000. (always using - * 2Mbyte large pages provided by PAE mode) - */ -.org 0x1000 -ENTRY(init_level4_pgt) - .quad 0x0000000000102007 /* -> level3_ident_pgt */ - .fill 255,8,0 - .quad 0x000000000010a007 - .fill 254,8,0 - /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ - .quad 0x0000000000103007 /* -> level3_kernel_pgt */ + $page = 0 +#define NEXT_PAGE(name) \ + $page = $page + 1; \ + .org $page * 0x1000; \ + phys_/**/name = $page * 0x1000 + __PHYSICAL_START; \ +ENTRY(name) -.org 0x2000 -ENTRY(level3_ident_pgt) - .quad 0x0000000000104007 +NEXT_PAGE(init_level4_pgt) + /* This gets initialized in x86_64_start_kernel */ + .fill 512,8,0 + +NEXT_PAGE(level3_ident_pgt) + .quad phys_level2_ident_pgt | 0x007 .fill 511,8,0 -.org 0x3000 -ENTRY(level3_kernel_pgt) +NEXT_PAGE(level3_kernel_pgt) .fill 510,8,0 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ - .quad 0x0000000000105007 /* -> level2_kernel_pgt */ + .quad phys_level2_kernel_pgt | 0x007 .fill 1,8,0 -.org 0x4000 -ENTRY(level2_ident_pgt) +NEXT_PAGE(level2_ident_pgt) /* 40MB for bootup. */ - .quad 0x0000000000000283 - .quad 0x0000000000200183 - .quad 0x0000000000400183 - .quad 0x0000000000600183 - .quad 0x0000000000800183 - .quad 0x0000000000A00183 - .quad 0x0000000000C00183 - .quad 0x0000000000E00183 - .quad 0x0000000001000183 - .quad 0x0000000001200183 - .quad 0x0000000001400183 - .quad 0x0000000001600183 - .quad 0x0000000001800183 - .quad 0x0000000001A00183 - .quad 0x0000000001C00183 - .quad 0x0000000001E00183 - .quad 0x0000000002000183 - .quad 0x0000000002200183 - .quad 0x0000000002400183 - .quad 0x0000000002600183 + i = 0 + .rept 20 + .quad i << 21 | 0x083 + i = i + 1 + .endr /* Temporary mappings for the super early allocator in arch/x86_64/mm/init.c */ .globl temp_boot_pmds temp_boot_pmds: .fill 492,8,0 -.org 0x5000 -ENTRY(level2_kernel_pgt) +NEXT_PAGE(level2_kernel_pgt) /* 40MB kernel mapping. The kernel code cannot be bigger than that. When you change this change KERNEL_TEXT_SIZE in page.h too. */ /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */ - .quad 0x0000000000000183 - .quad 0x0000000000200183 - .quad 0x0000000000400183 - .quad 0x0000000000600183 - .quad 0x0000000000800183 - .quad 0x0000000000A00183 - .quad 0x0000000000C00183 - .quad 0x0000000000E00183 - .quad 0x0000000001000183 - .quad 0x0000000001200183 - .quad 0x0000000001400183 - .quad 0x0000000001600183 - .quad 0x0000000001800183 - .quad 0x0000000001A00183 - .quad 0x0000000001C00183 - .quad 0x0000000001E00183 - .quad 0x0000000002000183 - .quad 0x0000000002200183 - .quad 0x0000000002400183 - .quad 0x0000000002600183 + i = 0 + .rept 20 + .quad i << 21 | 0x183 + i = i + 1 + .endr /* Module mapping starts here */ .fill 492,8,0 -.org 0x6000 -ENTRY(empty_zero_page) - -.org 0x7000 -ENTRY(empty_bad_page) - -.org 0x8000 -ENTRY(empty_bad_pte_table) +NEXT_PAGE(level3_physmem_pgt) + .quad phys_level2_kernel_pgt | 0x007 /* so that __va works even before pagetable_init */ + .fill 511,8,0 -.org 0x9000 -ENTRY(empty_bad_pmd_table) +#undef NEXT_PAGE -.org 0xa000 -ENTRY(level3_physmem_pgt) - .quad 0x0000000000105007 /* -> level2_kernel_pgt (so that __va works even before pagetable_init) */ + .data - .org 0xb000 #ifdef CONFIG_ACPI_SLEEP + .align PAGE_SIZE ENTRY(wakeup_level4_pgt) - .quad 0x0000000000102007 /* -> level3_ident_pgt */ + .quad phys_level3_ident_pgt | 0x007 .fill 255,8,0 - .quad 0x000000000010a007 + .quad phys_level3_physmem_pgt | 0x007 .fill 254,8,0 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ - .quad 0x0000000000103007 /* -> level3_kernel_pgt */ + .quad phys_level3_kernel_pgt | 0x007 #endif +#ifndef CONFIG_HOTPLUG_CPU + __INITDATA +#endif + /* + * This default setting generates an ident mapping at address 0x100000 + * and a mapping for the kernel that precisely maps virtual address + * 0xffffffff80000000 to physical address 0x000000. (always using + * 2Mbyte large pages provided by PAE mode) + */ + .align PAGE_SIZE +ENTRY(boot_level4_pgt) + .quad phys_level3_ident_pgt | 0x007 + .fill 255,8,0 + .quad phys_level3_physmem_pgt | 0x007 + .fill 254,8,0 + /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ + .quad phys_level3_kernel_pgt | 0x007 + .data .align 16 .globl cpu_gdt_descr cpu_gdt_descr: - .word gdt_end-cpu_gdt_table + .word gdt_end-cpu_gdt_table-1 gdt: .quad cpu_gdt_table #ifdef CONFIG_SMP @@ -334,25 +349,20 @@ gdt: .endr #endif -ENTRY(gdt_table32) - .quad 0x0000000000000000 /* This one is magic */ - .quad 0x0000000000000000 /* unused */ - .quad 0x00af9a000000ffff /* __KERNEL_CS */ -gdt32_end: - /* We need valid kernel segments for data and code in long mode too * IRET will check the segment types kkeil 2000/10/28 * Also sysret mandates a special GDT layout */ -.align L1_CACHE_BYTES + .section .data.page_aligned, "aw" + .align PAGE_SIZE /* The TLS descriptors are currently at a different place compared to i386. Hopefully nobody expects them at a fixed place (Wine?) */ ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x008f9a000000ffff /* __KERNEL_COMPAT32_CS */ + .quad 0x0 /* unused */ .quad 0x00af9a000000ffff /* __KERNEL_CS */ .quad 0x00cf92000000ffff /* __KERNEL_DS */ .quad 0x00cffa000000ffff /* __USER32_CS */ @@ -360,21 +370,23 @@ ENTRY(cpu_gdt_table) .quad 0x00affa000000ffff /* __USER_CS */ .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ .quad 0,0 /* TSS */ - .quad 0 /* LDT */ + .quad 0,0 /* LDT */ .quad 0,0,0 /* three TLS descriptors */ - .quad 0 /* unused now */ - .quad 0x00009a000000ffff /* __KERNEL16_CS - 16bit PM for S3 wakeup. */ - /* base must be patched for real base address. */ + .quad 0x0000f40000000000 /* node/CPU stored in limit */ gdt_end: /* asm/segment.h:GDT_ENTRIES must match this */ /* This should be a multiple of the cache line size */ - /* GDTs of other CPUs: */ - .fill (GDT_SIZE * NR_CPUS) - (gdt_end - cpu_gdt_table) - - .align L1_CACHE_BYTES -ENTRY(idt_table) - .rept 256 - .quad 0 - .quad 0 - .endr + /* GDTs of other CPUs are now dynamically allocated */ + + /* zero the remaining page */ + .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 + .section .bss, "aw", @nobits + .align L1_CACHE_BYTES +ENTRY(idt_table) + .skip 256 * 16 + + .section .bss.page_aligned, "aw", @nobits + .align PAGE_SIZE +ENTRY(empty_zero_page) + .skip PAGE_SIZE