* linux/arch/i386/kernel/sysenter.c
*
* (C) Copyright 2002 Linus Torvalds
+ * Portions based on the vdso-randomization code from exec-shield:
+ * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar
*
* This file contains the needed initializations to support sysenter.
*/
#include <linux/gfp.h>
#include <linux/string.h>
#include <linux/elf.h>
+#include <linux/mm.h>
#include <linux/mman.h>
+#include <linux/module.h>
+#include <linux/vs_memory.h>
#include <asm/cpufeature.h>
#include <asm/msr.h>
#include <asm/pgtable.h>
#include <asm/unistd.h>
-#include <linux/highmem.h>
+
+#ifdef CONFIG_XEN
+#include <xen/interface/callback.h>
+#endif
+
+/*
+ * Should the kernel map a VDSO page into processes and pass its
+ * address down to glibc upon exec()?
+ */
+#ifdef CONFIG_PARAVIRT
+unsigned int __read_mostly vdso_enabled = 0;
+#else
+unsigned int __read_mostly vdso_enabled = 1;
+#endif
+
+EXPORT_SYMBOL_GPL(vdso_enabled);
+
+static int __init vdso_setup(char *s)
+{
+ vdso_enabled = simple_strtoul(s, NULL, 0);
+
+ return 1;
+}
+
+__setup("vdso=", vdso_setup);
extern asmlinkage void sysenter_entry(void);
-void enable_sep_cpu(void *info)
+void enable_sep_cpu(void)
{
+#ifndef CONFIG_X86_NO_TSS
int cpu = get_cpu();
-#ifdef CONFIG_X86_HIGH_ENTRY
- struct tss_struct *tss = (struct tss_struct *) __fix_to_virt(FIX_TSS_0) + cpu;
-#else
- struct tss_struct *tss = init_tss + cpu;
-#endif
+ struct tss_struct *tss = &per_cpu(init_tss, cpu);
+
+ if (!boot_cpu_has(X86_FEATURE_SEP)) {
+ put_cpu();
+ return;
+ }
tss->ss1 = __KERNEL_CS;
tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0);
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0);
put_cpu();
+#endif
}
/*
*/
extern const char vsyscall_int80_start, vsyscall_int80_end;
extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
+static void *syscall_page;
-struct page *sysenter_page;
-
-static int __init sysenter_setup(void)
+int __init sysenter_setup(void)
{
- unsigned long page = get_zeroed_page(GFP_ATOMIC);
+ syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
+
+#ifdef CONFIG_XEN
+ if (boot_cpu_has(X86_FEATURE_SEP)) {
+ struct callback_register sysenter = {
+ .type = CALLBACKTYPE_sysenter,
+ .address = { __KERNEL_CS, (unsigned long)sysenter_entry },
+ };
- __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_KERNEL_RO);
- sysenter_page = virt_to_page(page);
+ if (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) < 0)
+ clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability);
+ }
+#endif
+
+#ifdef CONFIG_COMPAT_VDSO
+ __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY_EXEC);
+ printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
+#endif
if (!boot_cpu_has(X86_FEATURE_SEP)) {
- memcpy((void *) page,
+ memcpy(syscall_page,
&vsyscall_int80_start,
&vsyscall_int80_end - &vsyscall_int80_start);
return 0;
}
- memcpy((void *) page,
+ memcpy(syscall_page,
&vsyscall_sysenter_start,
&vsyscall_sysenter_end - &vsyscall_sysenter_start);
- on_each_cpu(enable_sep_cpu, NULL, 1, 1);
-
return 0;
}
-__initcall(sysenter_setup);
+#ifndef CONFIG_COMPAT_VDSO
+static struct page *syscall_nopage(struct vm_area_struct *vma,
+ unsigned long adr, int *type)
+{
+ struct page *p = virt_to_page(adr - vma->vm_start + syscall_page);
+ get_page(p);
+ return p;
+}
+
+/* Prevent VMA merging */
+static void syscall_vma_close(struct vm_area_struct *vma)
+{
+}
-extern void SYSENTER_RETURN_OFFSET;
+static struct vm_operations_struct syscall_vm_ops = {
+ .close = syscall_vma_close,
+ .nopage = syscall_nopage,
+};
-unsigned int vdso_enabled = 1;
+/* Defined in vsyscall-sysenter.S */
+extern void SYSENTER_RETURN;
-void map_vsyscall(void)
+/* Setup a VMA at program startup for the vsyscall page */
+int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack,
+ unsigned long start_code, unsigned long interp_map_address)
{
- struct thread_info *ti = current_thread_info();
struct vm_area_struct *vma;
+ struct mm_struct *mm = current->mm;
unsigned long addr;
+ int ret;
- if (unlikely(!vdso_enabled)) {
- current->mm->context.vdso = NULL;
- return;
+ down_write(&mm->mmap_sem);
+ addr = get_unmapped_area_prot(NULL, 0, PAGE_SIZE, 0, 0, 1);
+ if (IS_ERR_VALUE(addr)) {
+ ret = addr;
+ goto up_fail;
+ }
+
+ vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+ if (!vma) {
+ ret = -ENOMEM;
+ goto up_fail;
}
+ vma->vm_start = addr;
+ vma->vm_end = addr + PAGE_SIZE;
+ /* MAYWRITE to allow gdb to COW and set breakpoints */
+ vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
/*
- * Map the vDSO (it will be randomized):
+ * Make sure the vDSO gets into every core dump.
+ * Dumping its contents makes post-mortem fully interpretable later
+ * without matching up the same kernel and hardware config to see
+ * what PC values meant.
*/
- down_write(¤t->mm->mmap_sem);
- addr = do_mmap(NULL, 0, 4096, PROT_READ | PROT_EXEC, MAP_PRIVATE, 0);
- current->mm->context.vdso = (void *)addr;
- ti->sysenter_return = (void *)addr + (long)&SYSENTER_RETURN_OFFSET;
- if (addr != -1) {
- vma = find_vma(current->mm, addr);
- if (vma) {
- pgprot_val(vma->vm_page_prot) &= ~_PAGE_RW;
- get_page(sysenter_page);
- install_page(current->mm, vma, addr,
- sysenter_page, vma->vm_page_prot);
-
- }
+ vma->vm_flags |= VM_ALWAYSDUMP;
+ vma->vm_flags |= mm->def_flags;
+ vma->vm_page_prot = protection_map[vma->vm_flags & 7];
+ vma->vm_ops = &syscall_vm_ops;
+ vma->vm_mm = mm;
+
+ ret = insert_vm_struct(mm, vma);
+ if (unlikely(ret)) {
+ kmem_cache_free(vm_area_cachep, vma);
+ goto up_fail;
}
- up_write(¤t->mm->mmap_sem);
+
+ current->mm->context.vdso = (void *)addr;
+ current_thread_info()->sysenter_return =
+ (void *)VDSO_SYM(&SYSENTER_RETURN);
+ vx_vmpages_inc(mm);
+up_fail:
+ up_write(&mm->mmap_sem);
+ return ret;
}
-static int __init vdso_setup(char *str)
+const char *arch_vma_name(struct vm_area_struct *vma)
{
- vdso_enabled = simple_strtoul(str, NULL, 0);
- return 1;
+ if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
+ return "[vdso]";
+ return NULL;
+}
+
+struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
+{
+ return NULL;
+}
+
+int in_gate_area(struct task_struct *task, unsigned long addr)
+{
+ return 0;
}
-__setup("vdso=", vdso_setup);
+int in_gate_area_no_task(unsigned long addr)
+{
+ return 0;
+}
+#endif