Merge to Fedora kernel-2.6.18-1.2260_FC5 patched with stable patch-2.6.18.5-vs2.0...
[linux-2.6.git] / arch / i386 / kernel / sysenter.c
1 /*
2  * linux/arch/i386/kernel/sysenter.c
3  *
4  * (C) Copyright 2002 Linus Torvalds
5  * Portions based on the vdso-randomization code from exec-shield:
6  * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar
7  *
8  * This file contains the needed initializations to support sysenter.
9  */
10
11 #include <linux/init.h>
12 #include <linux/smp.h>
13 #include <linux/thread_info.h>
14 #include <linux/sched.h>
15 #include <linux/gfp.h>
16 #include <linux/string.h>
17 #include <linux/elf.h>
18 #include <linux/mm.h>
19 #include <linux/mman.h>
20 #include <linux/module.h>
21 #include <linux/vs_base.h>
22 #include <linux/vs_memory.h>
23
24 #include <asm/cpufeature.h>
25 #include <asm/msr.h>
26 #include <asm/pgtable.h>
27 #include <asm/unistd.h>
28
29 #ifdef CONFIG_XEN
30 #include <xen/interface/callback.h>
31 #endif
32
33 /*
34  * Should the kernel map a VDSO page into processes and pass its
35  * address down to glibc upon exec()?
36  */
37 unsigned int __read_mostly vdso_enabled = 1;
38
39 EXPORT_SYMBOL_GPL(vdso_enabled);
40
41 static int __init vdso_setup(char *s)
42 {
43         vdso_enabled = simple_strtoul(s, NULL, 0);
44
45         return 1;
46 }
47
48 __setup("vdso=", vdso_setup);
49
50 extern asmlinkage void sysenter_entry(void);
51
52 void enable_sep_cpu(void)
53 {
54 #ifndef CONFIG_X86_NO_TSS
55         int cpu = get_cpu();
56         struct tss_struct *tss = &per_cpu(init_tss, cpu);
57
58         if (!boot_cpu_has(X86_FEATURE_SEP)) {
59                 put_cpu();
60                 return;
61         }
62
63         tss->ss1 = __KERNEL_CS;
64         tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
65         wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
66         wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0);
67         wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0);
68         put_cpu();      
69 #endif
70 }
71
72 /*
73  * These symbols are defined by vsyscall.o to mark the bounds
74  * of the ELF DSO images included therein.
75  */
76 extern const char vsyscall_int80_start, vsyscall_int80_end;
77 extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
78 static void *syscall_page;
79
80 int __init sysenter_setup(void)
81 {
82         syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
83
84 #ifdef CONFIG_XEN
85         if (boot_cpu_has(X86_FEATURE_SEP)) {
86                 struct callback_register sysenter = {
87                         .type = CALLBACKTYPE_sysenter,
88                         .address = { __KERNEL_CS, (unsigned long)sysenter_entry },
89                 };
90
91                 if (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) < 0)
92                         clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability);
93         }
94 #endif
95
96 #ifdef CONFIG_COMPAT_VDSO
97         __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY);
98         printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
99 #else
100         /*
101          * In the non-compat case the ELF coredumping code needs the fixmap:
102          */
103 #ifdef CONFIG_XEN
104         __set_fixmap(FIX_VDSO, virt_to_machine(syscall_page), PAGE_KERNEL_RO);
105 #else
106         __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_KERNEL_RO);
107 #endif
108 #endif
109
110         if (!boot_cpu_has(X86_FEATURE_SEP)) {
111                 memcpy(syscall_page,
112                        &vsyscall_int80_start,
113                        &vsyscall_int80_end - &vsyscall_int80_start);
114                 return 0;
115         }
116
117         memcpy(syscall_page,
118                &vsyscall_sysenter_start,
119                &vsyscall_sysenter_end - &vsyscall_sysenter_start);
120
121         return 0;
122 }
123
124 static struct page *syscall_nopage(struct vm_area_struct *vma,
125                                 unsigned long adr, int *type)
126 {
127         struct page *p = virt_to_page(adr - vma->vm_start + syscall_page);
128         get_page(p);
129         return p;
130 }
131
132 /* Prevent VMA merging */
133 static void syscall_vma_close(struct vm_area_struct *vma)
134 {
135 }
136
137 static struct vm_operations_struct syscall_vm_ops = {
138         .close = syscall_vma_close,
139         .nopage = syscall_nopage,
140 };
141
142 /* Defined in vsyscall-sysenter.S */
143 extern void SYSENTER_RETURN;
144
145 /* Setup a VMA at program startup for the vsyscall page */
146 int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack,
147                                 unsigned long start_code, unsigned long interp_map_address)
148 {
149         struct vm_area_struct *vma;
150         struct mm_struct *mm = current->mm;
151         unsigned long addr;
152         int ret;
153
154         down_write(&mm->mmap_sem);
155         addr = get_unmapped_area_prot(NULL, 0, PAGE_SIZE, 0, 0, 1);
156         if (IS_ERR_VALUE(addr)) {
157                 ret = addr;
158                 goto up_fail;
159         }
160
161         vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL);
162         if (!vma) {
163                 ret = -ENOMEM;
164                 goto up_fail;
165         }
166
167         vma->vm_start = addr;
168         vma->vm_end = addr + PAGE_SIZE;
169         /* MAYWRITE to allow gdb to COW and set breakpoints */
170         vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
171         vma->vm_flags |= mm->def_flags;
172         vma->vm_page_prot = protection_map[vma->vm_flags & 7];
173         vma->vm_ops = &syscall_vm_ops;
174         vma->vm_mm = mm;
175
176         ret = insert_vm_struct(mm, vma);
177         if (unlikely(ret)) {
178                 kmem_cache_free(vm_area_cachep, vma);
179                 goto up_fail;
180         }
181
182         current->mm->context.vdso = (void *)addr;
183         current_thread_info()->sysenter_return =
184                                     (void *)VDSO_SYM(&SYSENTER_RETURN);
185         vx_vmpages_inc(mm);
186 up_fail:
187         up_write(&mm->mmap_sem);
188         return ret;
189 }
190
191 const char *arch_vma_name(struct vm_area_struct *vma)
192 {
193         if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
194                 return "[vdso]";
195         return NULL;
196 }
197
198 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
199 {
200         return NULL;
201 }
202
203 int in_gate_area(struct task_struct *task, unsigned long addr)
204 {
205         return 0;
206 }
207
208 int in_gate_area_no_task(unsigned long addr)
209 {
210         return 0;
211 }