This commit was manufactured by cvs2svn to create branch 'vserver'.
[linux-2.6.git] / arch / x86_64 / kernel / ldt-xen.c
1 /*
2  * linux/arch/x86_64/kernel/ldt.c
3  *
4  * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
5  * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
6  * Copyright (C) 2002 Andi Kleen
7  * 
8  * This handles calls from both 32bit and 64bit mode.
9  */
10
11 #include <linux/errno.h>
12 #include <linux/sched.h>
13 #include <linux/string.h>
14 #include <linux/mm.h>
15 #include <linux/smp.h>
16 #include <linux/smp_lock.h>
17 #include <linux/vmalloc.h>
18 #include <linux/slab.h>
19
20 #include <asm/uaccess.h>
21 #include <asm/system.h>
22 #include <asm/ldt.h>
23 #include <asm/desc.h>
24 #include <asm/proto.h>
25 #include <asm/pgalloc.h>
26
27 #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
28 static void flush_ldt(void *null)
29 {
30         if (current->active_mm)
31                load_LDT(&current->active_mm->context);
32 }
33 #endif
34
35 static int alloc_ldt(mm_context_t *pc, unsigned mincount, int reload)
36 {
37         void *oldldt;
38         void *newldt;
39         unsigned oldsize;
40
41         if (mincount <= (unsigned)pc->size)
42                 return 0;
43         oldsize = pc->size;
44         mincount = (mincount+511)&(~511);
45         if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
46                 newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
47         else
48                 newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
49
50         if (!newldt)
51                 return -ENOMEM;
52
53         if (oldsize)
54                 memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
55         oldldt = pc->ldt;
56         memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
57         wmb();
58         pc->ldt = newldt;
59         wmb();
60         pc->size = mincount;
61         wmb();
62         if (reload) {
63 #ifdef CONFIG_SMP
64                 cpumask_t mask;
65
66                 preempt_disable();
67 #endif
68                 make_pages_readonly(
69                         pc->ldt,
70                         (pc->size * LDT_ENTRY_SIZE) / PAGE_SIZE,
71                         XENFEAT_writable_descriptor_tables);
72                 load_LDT(pc);
73 #ifdef CONFIG_SMP
74                 mask = cpumask_of_cpu(smp_processor_id());
75                 if (!cpus_equal(current->mm->cpu_vm_mask, mask))
76                         smp_call_function(flush_ldt, NULL, 1, 1);
77                 preempt_enable();
78 #endif
79         }
80         if (oldsize) {
81                 make_pages_writable(
82                         oldldt,
83                         (oldsize * LDT_ENTRY_SIZE) / PAGE_SIZE,
84                         XENFEAT_writable_descriptor_tables);
85                 if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
86                         vfree(oldldt);
87                 else
88                         kfree(oldldt);
89         }
90         return 0;
91 }
92
93 static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
94 {
95         int err = alloc_ldt(new, old->size, 0);
96         if (err < 0)
97                 return err;
98         memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
99         make_pages_readonly(
100                 new->ldt,
101                 (new->size * LDT_ENTRY_SIZE) / PAGE_SIZE,
102                 XENFEAT_writable_descriptor_tables);
103         return 0;
104 }
105
106 /*
107  * we do not have to muck with descriptors here, that is
108  * done in switch_mm() as needed.
109  */
110 int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
111 {
112         struct mm_struct * old_mm;
113         int retval = 0;
114
115         memset(&mm->context, 0, sizeof(mm->context));
116         init_MUTEX(&mm->context.sem);
117         old_mm = current->mm;
118         if (old_mm && old_mm->context.size > 0) {
119                 down(&old_mm->context.sem);
120                 retval = copy_ldt(&mm->context, &old_mm->context);
121                 up(&old_mm->context.sem);
122         }
123         if (retval == 0) {
124                 spin_lock(&mm_unpinned_lock);
125                 list_add(&mm->context.unpinned, &mm_unpinned);
126                 spin_unlock(&mm_unpinned_lock);
127         }
128         return retval;
129 }
130
131 /*
132  * 
133  * Don't touch the LDT register - we're already in the next thread.
134  */
135 void destroy_context(struct mm_struct *mm)
136 {
137         if (mm->context.size) {
138                 if (mm == current->active_mm)
139                         clear_LDT();
140                 make_pages_writable(
141                         mm->context.ldt,
142                         (mm->context.size * LDT_ENTRY_SIZE) / PAGE_SIZE,
143                         XENFEAT_writable_descriptor_tables);
144                 if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
145                         vfree(mm->context.ldt);
146                 else
147                         kfree(mm->context.ldt);
148                 mm->context.size = 0;
149         }
150         if (!mm->context.pinned) {
151                 spin_lock(&mm_unpinned_lock);
152                 list_del(&mm->context.unpinned);
153                 spin_unlock(&mm_unpinned_lock);
154         }
155 }
156
157 static int read_ldt(void __user * ptr, unsigned long bytecount)
158 {
159         int err;
160         unsigned long size;
161         struct mm_struct * mm = current->mm;
162
163         if (!mm->context.size)
164                 return 0;
165         if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
166                 bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
167
168         down(&mm->context.sem);
169         size = mm->context.size*LDT_ENTRY_SIZE;
170         if (size > bytecount)
171                 size = bytecount;
172
173         err = 0;
174         if (copy_to_user(ptr, mm->context.ldt, size))
175                 err = -EFAULT;
176         up(&mm->context.sem);
177         if (err < 0)
178                 goto error_return;
179         if (size != bytecount) {
180                 /* zero-fill the rest */
181                 if (clear_user(ptr+size, bytecount-size) != 0) {
182                         err = -EFAULT;
183                         goto error_return;
184                 }
185         }
186         return bytecount;
187 error_return:
188         return err;
189 }
190
191 static int read_default_ldt(void __user * ptr, unsigned long bytecount)
192 {
193         /* Arbitrary number */ 
194         /* x86-64 default LDT is all zeros */
195         if (bytecount > 128) 
196                 bytecount = 128;        
197         if (clear_user(ptr, bytecount))
198                 return -EFAULT;
199         return bytecount; 
200 }
201
202 static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode)
203 {
204         struct task_struct *me = current;
205         struct mm_struct * mm = me->mm;
206         __u32 entry_1, entry_2, *lp;
207         unsigned long mach_lp;
208         int error;
209         struct user_desc ldt_info;
210
211         error = -EINVAL;
212
213         if (bytecount != sizeof(ldt_info))
214                 goto out;
215         error = -EFAULT;        
216         if (copy_from_user(&ldt_info, ptr, bytecount))
217                 goto out;
218
219         error = -EINVAL;
220         if (ldt_info.entry_number >= LDT_ENTRIES)
221                 goto out;
222         if (ldt_info.contents == 3) {
223                 if (oldmode)
224                         goto out;
225                 if (ldt_info.seg_not_present == 0)
226                         goto out;
227         }
228
229         down(&mm->context.sem);
230         if (ldt_info.entry_number >= (unsigned)mm->context.size) {
231                 error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
232                 if (error < 0)
233                         goto out_unlock;
234         }
235
236         lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt);
237         mach_lp = arbitrary_virt_to_machine(lp);
238
239         /* Allow LDTs to be cleared by the user. */
240         if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
241                 if (oldmode || LDT_empty(&ldt_info)) {
242                         entry_1 = 0;
243                         entry_2 = 0;
244                         goto install;
245                 }
246         }
247
248         entry_1 = LDT_entry_a(&ldt_info);
249         entry_2 = LDT_entry_b(&ldt_info);
250         if (oldmode)
251                 entry_2 &= ~(1 << 20);
252
253         /* Install the new entry ...  */
254 install:
255         error = HYPERVISOR_update_descriptor(mach_lp, (unsigned long)((entry_1 | (unsigned long) entry_2 << 32)));
256
257 out_unlock:
258         up(&mm->context.sem);
259 out:
260         return error;
261 }
262
263 asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
264 {
265         int ret = -ENOSYS;
266
267         switch (func) {
268         case 0:
269                 ret = read_ldt(ptr, bytecount);
270                 break;
271         case 1:
272                 ret = write_ldt(ptr, bytecount, 1);
273                 break;
274         case 2:
275                 ret = read_default_ldt(ptr, bytecount);
276                 break;
277         case 0x11:
278                 ret = write_ldt(ptr, bytecount, 0);
279                 break;
280         }
281         return ret;
282 }