Merge to Fedora kernel-2.6.18-1.2260_FC5 patched with stable patch-2.6.18.5-vs2.0...
[linux-2.6.git] / arch / i386 / kernel / sysenter.c
1 /*
2  * linux/arch/i386/kernel/sysenter.c
3  *
4  * (C) Copyright 2002 Linus Torvalds
5  * Portions based on the vdso-randomization code from exec-shield:
6  * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar
7  *
8  * This file contains the needed initializations to support sysenter.
9  */
10
11 #include <linux/init.h>
12 #include <linux/smp.h>
13 #include <linux/thread_info.h>
14 #include <linux/sched.h>
15 #include <linux/gfp.h>
16 #include <linux/string.h>
17 #include <linux/elf.h>
18 #include <linux/mm.h>
19 #include <linux/mman.h>
20 #include <linux/module.h>
21 #include <linux/vs_memory.h>
22
23 #include <asm/cpufeature.h>
24 #include <asm/msr.h>
25 #include <asm/pgtable.h>
26 #include <asm/unistd.h>
27
28 #ifdef CONFIG_XEN
29 #include <xen/interface/callback.h>
30 #endif
31
32 /*
33  * Should the kernel map a VDSO page into processes and pass its
34  * address down to glibc upon exec()?
35  */
36 unsigned int __read_mostly vdso_enabled = 1;
37
38 EXPORT_SYMBOL_GPL(vdso_enabled);
39
40 static int __init vdso_setup(char *s)
41 {
42         vdso_enabled = simple_strtoul(s, NULL, 0);
43
44         return 1;
45 }
46
47 __setup("vdso=", vdso_setup);
48
49 extern asmlinkage void sysenter_entry(void);
50
51 void enable_sep_cpu(void)
52 {
53 #ifndef CONFIG_X86_NO_TSS
54         int cpu = get_cpu();
55         struct tss_struct *tss = &per_cpu(init_tss, cpu);
56
57         if (!boot_cpu_has(X86_FEATURE_SEP)) {
58                 put_cpu();
59                 return;
60         }
61
62         tss->ss1 = __KERNEL_CS;
63         tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
64         wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
65         wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0);
66         wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0);
67         put_cpu();      
68 #endif
69 }
70
71 /*
72  * These symbols are defined by vsyscall.o to mark the bounds
73  * of the ELF DSO images included therein.
74  */
75 extern const char vsyscall_int80_start, vsyscall_int80_end;
76 extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
77 static void *syscall_page;
78
79 int __init sysenter_setup(void)
80 {
81         syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
82
83 #ifdef CONFIG_XEN
84         if (boot_cpu_has(X86_FEATURE_SEP)) {
85                 struct callback_register sysenter = {
86                         .type = CALLBACKTYPE_sysenter,
87                         .address = { __KERNEL_CS, (unsigned long)sysenter_entry },
88                 };
89
90                 if (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) < 0)
91                         clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability);
92         }
93 #endif
94
95 #ifdef CONFIG_COMPAT_VDSO
96         __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY);
97         printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
98 #else
99         /*
100          * In the non-compat case the ELF coredumping code needs the fixmap:
101          */
102 #ifdef CONFIG_XEN
103         __set_fixmap(FIX_VDSO, virt_to_machine(syscall_page), PAGE_KERNEL_RO);
104 #else
105         __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_KERNEL_RO);
106 #endif
107 #endif
108
109         if (!boot_cpu_has(X86_FEATURE_SEP)) {
110                 memcpy(syscall_page,
111                        &vsyscall_int80_start,
112                        &vsyscall_int80_end - &vsyscall_int80_start);
113                 return 0;
114         }
115
116         memcpy(syscall_page,
117                &vsyscall_sysenter_start,
118                &vsyscall_sysenter_end - &vsyscall_sysenter_start);
119
120         return 0;
121 }
122
123 static struct page *syscall_nopage(struct vm_area_struct *vma,
124                                 unsigned long adr, int *type)
125 {
126         struct page *p = virt_to_page(adr - vma->vm_start + syscall_page);
127         get_page(p);
128         return p;
129 }
130
131 /* Prevent VMA merging */
132 static void syscall_vma_close(struct vm_area_struct *vma)
133 {
134 }
135
136 static struct vm_operations_struct syscall_vm_ops = {
137         .close = syscall_vma_close,
138         .nopage = syscall_nopage,
139 };
140
141 /* Defined in vsyscall-sysenter.S */
142 extern void SYSENTER_RETURN;
143
144 /* Setup a VMA at program startup for the vsyscall page */
145 int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack,
146                                 unsigned long start_code, unsigned long interp_map_address)
147 {
148         struct vm_area_struct *vma;
149         struct mm_struct *mm = current->mm;
150         unsigned long addr;
151         int ret;
152
153         down_write(&mm->mmap_sem);
154         addr = get_unmapped_area_prot(NULL, 0, PAGE_SIZE, 0, 0, 1);
155         if (IS_ERR_VALUE(addr)) {
156                 ret = addr;
157                 goto up_fail;
158         }
159
160         vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL);
161         if (!vma) {
162                 ret = -ENOMEM;
163                 goto up_fail;
164         }
165
166         vma->vm_start = addr;
167         vma->vm_end = addr + PAGE_SIZE;
168         /* MAYWRITE to allow gdb to COW and set breakpoints */
169         vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
170         vma->vm_flags |= mm->def_flags;
171         vma->vm_page_prot = protection_map[vma->vm_flags & 7];
172         vma->vm_ops = &syscall_vm_ops;
173         vma->vm_mm = mm;
174
175         ret = insert_vm_struct(mm, vma);
176         if (unlikely(ret)) {
177                 kmem_cache_free(vm_area_cachep, vma);
178                 goto up_fail;
179         }
180
181         current->mm->context.vdso = (void *)addr;
182         current_thread_info()->sysenter_return =
183                                     (void *)VDSO_SYM(&SYSENTER_RETURN);
184         vx_vmpages_inc(mm);
185 up_fail:
186         up_write(&mm->mmap_sem);
187         return ret;
188 }
189
190 const char *arch_vma_name(struct vm_area_struct *vma)
191 {
192         if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
193                 return "[vdso]";
194         return NULL;
195 }
196
197 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
198 {
199         return NULL;
200 }
201
202 int in_gate_area(struct task_struct *task, unsigned long addr)
203 {
204         return 0;
205 }
206
207 int in_gate_area_no_task(unsigned long addr)
208 {
209         return 0;
210 }