Merge to Fedora kernel-2.6.18-1.2224_FC5 patched with stable patch-2.6.18.1-vs2.0...
[linux-2.6.git] / arch / i386 / kernel / sysenter.c
1 /*
2  * linux/arch/i386/kernel/sysenter.c
3  *
4  * (C) Copyright 2002 Linus Torvalds
5  * Portions based on the vdso-randomization code from exec-shield:
6  * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar
7  *
8  * This file contains the needed initializations to support sysenter.
9  */
10
11 #include <linux/init.h>
12 #include <linux/smp.h>
13 #include <linux/thread_info.h>
14 #include <linux/sched.h>
15 #include <linux/gfp.h>
16 #include <linux/string.h>
17 #include <linux/elf.h>
18 #include <linux/mm.h>
19 #include <linux/module.h>
20 #include <linux/vs_memory.h>
21
22 #include <asm/cpufeature.h>
23 #include <asm/msr.h>
24 #include <asm/pgtable.h>
25 #include <asm/unistd.h>
26
27 #ifdef CONFIG_XEN
28 #include <xen/interface/callback.h>
29 #endif
30
31 /*
32  * Should the kernel map a VDSO page into processes and pass its
33  * address down to glibc upon exec()?
34  */
35 unsigned int __read_mostly vdso_enabled = 1;
36
37 EXPORT_SYMBOL_GPL(vdso_enabled);
38
39 static int __init vdso_setup(char *s)
40 {
41         vdso_enabled = simple_strtoul(s, NULL, 0);
42
43         return 1;
44 }
45
46 __setup("vdso=", vdso_setup);
47
48 extern asmlinkage void sysenter_entry(void);
49
50 void enable_sep_cpu(void)
51 {
52 #ifndef CONFIG_X86_NO_TSS
53         int cpu = get_cpu();
54         struct tss_struct *tss = &per_cpu(init_tss, cpu);
55
56         if (!boot_cpu_has(X86_FEATURE_SEP)) {
57                 put_cpu();
58                 return;
59         }
60
61         tss->ss1 = __KERNEL_CS;
62         tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
63         wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
64         wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0);
65         wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0);
66         put_cpu();      
67 #endif
68 }
69
70 /*
71  * These symbols are defined by vsyscall.o to mark the bounds
72  * of the ELF DSO images included therein.
73  */
74 extern const char vsyscall_int80_start, vsyscall_int80_end;
75 extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
76 static void *syscall_page;
77
78 int __init sysenter_setup(void)
79 {
80         syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
81
82 #ifdef CONFIG_XEN
83         if (boot_cpu_has(X86_FEATURE_SEP)) {
84                 struct callback_register sysenter = {
85                         .type = CALLBACKTYPE_sysenter,
86                         .address = { __KERNEL_CS, (unsigned long)sysenter_entry },
87                 };
88
89                 if (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) < 0)
90                         clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability);
91         }
92 #endif
93
94 #ifdef CONFIG_COMPAT_VDSO
95         __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY);
96         printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
97 #else
98         /*
99          * In the non-compat case the ELF coredumping code needs the fixmap:
100          */
101 #ifdef CONFIG_XEN
102         __set_fixmap(FIX_VDSO, virt_to_machine(syscall_page), PAGE_KERNEL_RO);
103 #else
104         __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_KERNEL_RO);
105 #endif
106 #endif
107
108         if (!boot_cpu_has(X86_FEATURE_SEP)) {
109                 memcpy(syscall_page,
110                        &vsyscall_int80_start,
111                        &vsyscall_int80_end - &vsyscall_int80_start);
112                 return 0;
113         }
114
115         memcpy(syscall_page,
116                &vsyscall_sysenter_start,
117                &vsyscall_sysenter_end - &vsyscall_sysenter_start);
118
119         return 0;
120 }
121
122 static struct page *syscall_nopage(struct vm_area_struct *vma,
123                                 unsigned long adr, int *type)
124 {
125         struct page *p = virt_to_page(adr - vma->vm_start + syscall_page);
126         get_page(p);
127         return p;
128 }
129
130 /* Prevent VMA merging */
131 static void syscall_vma_close(struct vm_area_struct *vma)
132 {
133 }
134
135 static struct vm_operations_struct syscall_vm_ops = {
136         .close = syscall_vma_close,
137         .nopage = syscall_nopage,
138 };
139
140 /* Defined in vsyscall-sysenter.S */
141 extern void SYSENTER_RETURN;
142
143 /* Setup a VMA at program startup for the vsyscall page */
144 int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack,
145                                 unsigned long start_code, unsigned long interp_map_address)
146 {
147         struct vm_area_struct *vma;
148         struct mm_struct *mm = current->mm;
149         unsigned long addr;
150         int ret;
151
152         down_write(&mm->mmap_sem);
153         addr = get_unmapped_area_prot(NULL, 0, PAGE_SIZE, 0, 0, 1);
154         if (IS_ERR_VALUE(addr)) {
155                 ret = addr;
156                 goto up_fail;
157         }
158
159         vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL);
160         if (!vma) {
161                 ret = -ENOMEM;
162                 goto up_fail;
163         }
164
165         vma->vm_start = addr;
166         vma->vm_end = addr + PAGE_SIZE;
167         /* MAYWRITE to allow gdb to COW and set breakpoints */
168         vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
169         vma->vm_flags |= mm->def_flags;
170         vma->vm_page_prot = protection_map[vma->vm_flags & 7];
171         vma->vm_ops = &syscall_vm_ops;
172         vma->vm_mm = mm;
173
174         ret = insert_vm_struct(mm, vma);
175         if (unlikely(ret)) {
176                 kmem_cache_free(vm_area_cachep, vma);
177                 goto up_fail;
178         }
179
180         current->mm->context.vdso = (void *)addr;
181         current_thread_info()->sysenter_return =
182                                     (void *)VDSO_SYM(&SYSENTER_RETURN);
183         vx_vmpages_inc(mm);
184 up_fail:
185         up_write(&mm->mmap_sem);
186         return ret;
187 }
188
189 const char *arch_vma_name(struct vm_area_struct *vma)
190 {
191         if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
192                 return "[vdso]";
193         return NULL;
194 }
195
196 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
197 {
198         return NULL;
199 }
200
201 int in_gate_area(struct task_struct *task, unsigned long addr)
202 {
203         return 0;
204 }
205
206 int in_gate_area_no_task(unsigned long addr)
207 {
208         return 0;
209 }