X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fx86_64%2Fia32%2Fsyscall32.c;h=d8ea6d9fcb2976ae8fbb16ea459cf8a831ad5eea;hb=refs%2Fheads%2Fvserver;hp=1806bff6687ac7ed71bd0e08a6902eeb9e2b4599;hpb=9213980e6a70d8473e0ffd4b39ab5b6caaba9ff5;p=linux-2.6.git diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c index 1806bff66..d8ea6d9fc 100644 --- a/arch/x86_64/ia32/syscall32.c +++ b/arch/x86_64/ia32/syscall32.c @@ -9,53 +9,93 @@ #include #include #include +#include +#include #include #include #include -/* 32bit VDSOs mapped into user space. */ -asm(".section \".init.data\",\"aw\"\n" - "syscall32_syscall:\n" - ".incbin \"arch/x86_64/ia32/vsyscall-syscall.so\"\n" - "syscall32_syscall_end:\n" - "syscall32_sysenter:\n" - ".incbin \"arch/x86_64/ia32/vsyscall-sysenter.so\"\n" - "syscall32_sysenter_end:\n" - ".previous"); - +#ifdef USE_INT80 +extern unsigned char syscall32_int80[], syscall32_int80_end[]; +#endif extern unsigned char syscall32_syscall[], syscall32_syscall_end[]; extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[]; extern int sysctl_vsyscall32; char *syscall32_page; -static int use_sysenter __initdata = -1; +#ifndef USE_INT80 +static int use_sysenter = -1; +#endif -/* RED-PEN: This knows too much about high level VM */ -/* Alternative would be to generate a vma with appropriate backing options - and let it be handled by generic VM */ -int map_syscall32(struct mm_struct *mm, unsigned long address) -{ - pte_t *pte; - pmd_t *pmd; - int err = 0; - - down_read(&mm->mmap_sem); - spin_lock(&mm->page_table_lock); - pmd = pmd_alloc(mm, pgd_offset(mm, address), address); - if (pmd && (pte = pte_alloc_map(mm, pmd, address)) != NULL) { - if (pte_none(*pte)) { - set_pte(pte, - mk_pte(virt_to_page(syscall32_page), - PAGE_KERNEL_VSYSCALL)); - } - /* Flush only the local CPU. Other CPUs taking a fault - will just end up here again */ - __flush_tlb_one(address); - } else - err = -ENOMEM; - spin_unlock(&mm->page_table_lock); - up_read(&mm->mmap_sem); - return err; +static struct page * +syscall32_nopage(struct vm_area_struct *vma, unsigned long adr, int *type) +{ + struct page *p = virt_to_page(adr - vma->vm_start + syscall32_page); + get_page(p); + return p; +} + +/* Prevent VMA merging */ +static void syscall32_vma_close(struct vm_area_struct *vma) +{ +} + +static struct vm_operations_struct syscall32_vm_ops = { + .close = syscall32_vma_close, + .nopage = syscall32_nopage, +}; + +struct linux_binprm; + +/* Setup a VMA at program startup for the vsyscall page */ +int syscall32_setup_pages(struct linux_binprm *bprm, int exstack, + unsigned long start_code, + unsigned long interp_map_address) +{ + int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT; + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + int ret; + + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); + if (!vma) + return -ENOMEM; + + memset(vma, 0, sizeof(struct vm_area_struct)); + /* Could randomize here */ + vma->vm_start = VSYSCALL32_BASE; + vma->vm_end = VSYSCALL32_END; + /* MAYWRITE to allow gdb to COW and set breakpoints */ + vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE; + /* + * Make sure the vDSO gets into every core dump. + * Dumping its contents makes post-mortem fully interpretable later + * without matching up the same kernel and hardware config to see + * what PC values meant. + */ + vma->vm_flags |= VM_ALWAYSDUMP; + vma->vm_flags |= mm->def_flags; + vma->vm_page_prot = protection_map[vma->vm_flags & 7]; + vma->vm_ops = &syscall32_vm_ops; + vma->vm_mm = mm; + + down_write(&mm->mmap_sem); + if ((ret = insert_vm_struct(mm, vma))) { + up_write(&mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return ret; + } + vx_vmpages_add(mm, npages); + up_write(&mm->mmap_sem); + return 0; +} + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + if (vma->vm_start == VSYSCALL32_BASE && + vma->vm_mm && vma->vm_mm->task_size == IA32_PAGE_OFFSET) + return "[vdso]"; + return NULL; } static int __init init_syscall32(void) @@ -63,7 +103,14 @@ static int __init init_syscall32(void) syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); if (!syscall32_page) panic("Cannot allocate syscall32 page"); - SetPageReserved(virt_to_page(syscall32_page)); + +#ifdef USE_INT80 + /* + * At this point we use int 0x80. + */ + memcpy(syscall32_page, syscall32_int80, + syscall32_int80_end - syscall32_int80); +#else if (use_sysenter > 0) { memcpy(syscall32_page, syscall32_sysenter, syscall32_sysenter_end - syscall32_sysenter); @@ -71,13 +118,20 @@ static int __init init_syscall32(void) memcpy(syscall32_page, syscall32_syscall, syscall32_syscall_end - syscall32_syscall); } +#endif return 0; } - -__initcall(init_syscall32); -void __init syscall32_cpu_init(void) +/* + * This must be done early in case we have an initrd containing 32-bit + * binaries (e.g., hotplug). This could be pushed upstream to arch/x86_64. + */ +core_initcall(init_syscall32); + +/* May not be __init: called during resume */ +void syscall32_cpu_init(void) { +#ifndef USE_INT80 if (use_sysenter < 0) use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL); @@ -88,4 +142,5 @@ void __init syscall32_cpu_init(void) checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); wrmsrl(MSR_CSTAR, ia32_cstar_target); +#endif }