Merge to Fedora kernel-2.6.18-1.2224_FC5 patched with stable patch-2.6.18.1-vs2.0...
[linux-2.6.git] / arch / i386 / mm / hypervisor.c
1 /******************************************************************************
2  * mm/hypervisor.c
3  * 
4  * Update page tables via the hypervisor.
5  * 
6  * Copyright (c) 2002-2004, K A Fraser
7  * 
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License version 2
10  * as published by the Free Software Foundation; or, when distributed
11  * separately from the Linux kernel or incorporated into other
12  * software packages, subject to the following license:
13  * 
14  * Permission is hereby granted, free of charge, to any person obtaining a copy
15  * of this source file (the "Software"), to deal in the Software without
16  * restriction, including without limitation the rights to use, copy, modify,
17  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18  * and to permit persons to whom the Software is furnished to do so, subject to
19  * the following conditions:
20  * 
21  * The above copyright notice and this permission notice shall be included in
22  * all copies or substantial portions of the Software.
23  * 
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30  * IN THE SOFTWARE.
31  */
32
33 #include <linux/sched.h>
34 #include <linux/mm.h>
35 #include <linux/vmalloc.h>
36 #include <asm/page.h>
37 #include <asm/pgtable.h>
38 #include <asm/hypervisor.h>
39 #include <xen/balloon.h>
40 #include <xen/features.h>
41 #include <xen/interface/memory.h>
42 #include <linux/module.h>
43 #include <linux/percpu.h>
44 #include <asm/tlbflush.h>
45
46 #ifdef CONFIG_X86_64
47 #define pmd_val_ma(v) (v).pmd
48 #else
49 #ifdef CONFIG_X86_PAE
50 # define pmd_val_ma(v) ((v).pmd)
51 # define pud_val_ma(v) ((v).pgd.pgd)
52 #else
53 # define pmd_val_ma(v) ((v).pud.pgd.pgd)
54 #endif
55 #endif
56
57 void xen_l1_entry_update(pte_t *ptr, pte_t val)
58 {
59         mmu_update_t u;
60         u.ptr = virt_to_machine(ptr);
61         u.val = pte_val_ma(val);
62         BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
63 }
64
65 void xen_l2_entry_update(pmd_t *ptr, pmd_t val)
66 {
67         mmu_update_t u;
68         u.ptr = virt_to_machine(ptr);
69         u.val = pmd_val_ma(val);
70         BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
71 }
72
73 #ifdef CONFIG_X86_PAE
74 void xen_l3_entry_update(pud_t *ptr, pud_t val)
75 {
76         mmu_update_t u;
77         u.ptr = virt_to_machine(ptr);
78         u.val = pud_val_ma(val);
79         BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
80 }
81 #endif
82
83 #ifdef CONFIG_X86_64
84 void xen_l3_entry_update(pud_t *ptr, pud_t val)
85 {
86         mmu_update_t u;
87         u.ptr = virt_to_machine(ptr);
88         u.val = val.pud;
89         BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
90 }
91
92 void xen_l4_entry_update(pgd_t *ptr, pgd_t val)
93 {
94         mmu_update_t u;
95         u.ptr = virt_to_machine(ptr);
96         u.val = val.pgd;
97         BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
98 }
99 #endif /* CONFIG_X86_64 */
100
101 void xen_machphys_update(unsigned long mfn, unsigned long pfn)
102 {
103         mmu_update_t u;
104         if (xen_feature(XENFEAT_auto_translated_physmap)) {
105                 BUG_ON(pfn != mfn);
106                 return;
107         }
108         u.ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
109         u.val = pfn;
110         BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
111 }
112
113 void xen_pt_switch(unsigned long ptr)
114 {
115         struct mmuext_op op;
116         op.cmd = MMUEXT_NEW_BASEPTR;
117         op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
118         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
119 }
120
121 void xen_new_user_pt(unsigned long ptr)
122 {
123         struct mmuext_op op;
124         op.cmd = MMUEXT_NEW_USER_BASEPTR;
125         op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
126         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
127 }
128
129 void xen_tlb_flush(void)
130 {
131         struct mmuext_op op;
132         op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
133         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
134 }
135 EXPORT_SYMBOL(xen_tlb_flush);
136
137 void xen_invlpg(unsigned long ptr)
138 {
139         struct mmuext_op op;
140         op.cmd = MMUEXT_INVLPG_LOCAL;
141         op.arg1.linear_addr = ptr & PAGE_MASK;
142         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
143 }
144 EXPORT_SYMBOL(xen_invlpg);
145
146 #ifdef CONFIG_SMP
147
148 void xen_tlb_flush_all(void)
149 {
150         struct mmuext_op op;
151         op.cmd = MMUEXT_TLB_FLUSH_ALL;
152         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
153 }
154
155 void xen_tlb_flush_mask(cpumask_t *mask)
156 {
157         struct mmuext_op op;
158         if ( cpus_empty(*mask) )
159                 return;
160         op.cmd = MMUEXT_TLB_FLUSH_MULTI;
161         op.arg2.vcpumask = mask->bits;
162         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
163 }
164
165 void xen_invlpg_all(unsigned long ptr)
166 {
167         struct mmuext_op op;
168         op.cmd = MMUEXT_INVLPG_ALL;
169         op.arg1.linear_addr = ptr & PAGE_MASK;
170         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
171 }
172
173 void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr)
174 {
175         struct mmuext_op op;
176         if ( cpus_empty(*mask) )
177                 return;
178         op.cmd = MMUEXT_INVLPG_MULTI;
179         op.arg1.linear_addr = ptr & PAGE_MASK;
180         op.arg2.vcpumask    = mask->bits;
181         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
182 }
183
184 #endif /* CONFIG_SMP */
185
186 void xen_pgd_pin(unsigned long ptr)
187 {
188         struct mmuext_op op;
189 #ifdef CONFIG_X86_64
190         op.cmd = MMUEXT_PIN_L4_TABLE;
191 #elif defined(CONFIG_X86_PAE)
192         op.cmd = MMUEXT_PIN_L3_TABLE;
193 #else
194         op.cmd = MMUEXT_PIN_L2_TABLE;
195 #endif
196         op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
197         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
198 }
199
200 void xen_pgd_unpin(unsigned long ptr)
201 {
202         struct mmuext_op op;
203         op.cmd = MMUEXT_UNPIN_TABLE;
204         op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
205         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
206 }
207
208 void xen_set_ldt(unsigned long ptr, unsigned long len)
209 {
210         struct mmuext_op op;
211         op.cmd = MMUEXT_SET_LDT;
212         op.arg1.linear_addr = ptr;
213         op.arg2.nr_ents     = len;
214         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
215 }
216
217 /*
218  * Bitmap is indexed by page number. If bit is set, the page is part of a
219  * xen_create_contiguous_region() area of memory.
220  */
221 unsigned long *contiguous_bitmap;
222
223 static void contiguous_bitmap_set(
224         unsigned long first_page, unsigned long nr_pages)
225 {
226         unsigned long start_off, end_off, curr_idx, end_idx;
227
228         curr_idx  = first_page / BITS_PER_LONG;
229         start_off = first_page & (BITS_PER_LONG-1);
230         end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
231         end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
232
233         if (curr_idx == end_idx) {
234                 contiguous_bitmap[curr_idx] |=
235                         ((1UL<<end_off)-1) & -(1UL<<start_off);
236         } else {
237                 contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
238                 while ( ++curr_idx < end_idx )
239                         contiguous_bitmap[curr_idx] = ~0UL;
240                 contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
241         }
242 }
243
244 static void contiguous_bitmap_clear(
245         unsigned long first_page, unsigned long nr_pages)
246 {
247         unsigned long start_off, end_off, curr_idx, end_idx;
248
249         curr_idx  = first_page / BITS_PER_LONG;
250         start_off = first_page & (BITS_PER_LONG-1);
251         end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
252         end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
253
254         if (curr_idx == end_idx) {
255                 contiguous_bitmap[curr_idx] &=
256                         -(1UL<<end_off) | ((1UL<<start_off)-1);
257         } else {
258                 contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
259                 while ( ++curr_idx != end_idx )
260                         contiguous_bitmap[curr_idx] = 0;
261                 contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
262         }
263 }
264
265 /* Protected by balloon_lock. */
266 #define MAX_CONTIG_ORDER 9 /* 2MB */
267 static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
268 static multicall_entry_t cr_mcl[1<<MAX_CONTIG_ORDER];
269
270 /* Ensure multi-page extents are contiguous in machine memory. */
271 int xen_create_contiguous_region(
272         unsigned long vstart, unsigned int order, unsigned int address_bits)
273 {
274         unsigned long *in_frames = discontig_frames, out_frame;
275         unsigned long  frame, i, flags;
276         long           rc;
277         int            success;
278         struct xen_memory_exchange exchange = {
279                 .in = {
280                         .nr_extents   = 1UL << order,
281                         .extent_order = 0,
282                         .domid        = DOMID_SELF
283                 },
284                 .out = {
285                         .nr_extents   = 1,
286                         .extent_order = order,
287                         .address_bits = address_bits,
288                         .domid        = DOMID_SELF
289                 }
290         };
291
292         /*
293          * Currently an auto-translated guest will not perform I/O, nor will
294          * it require PAE page directories below 4GB. Therefore any calls to
295          * this function are redundant and can be ignored.
296          */
297         if (xen_feature(XENFEAT_auto_translated_physmap))
298                 return 0;
299
300         if (unlikely(order > MAX_CONTIG_ORDER))
301                 return -ENOMEM;
302
303         set_xen_guest_handle(exchange.in.extent_start, in_frames);
304         set_xen_guest_handle(exchange.out.extent_start, &out_frame);
305
306         scrub_pages(vstart, 1 << order);
307
308         balloon_lock(flags);
309
310         /* 1. Zap current PTEs, remembering MFNs. */
311         for (i = 0; i < (1UL<<order); i++) {
312                 in_frames[i] = pfn_to_mfn((__pa(vstart) >> PAGE_SHIFT) + i);
313                 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
314                                         __pte_ma(0), 0);
315                 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
316                         INVALID_P2M_ENTRY);
317         }
318         if (HYPERVISOR_multicall(cr_mcl, i))
319                 BUG();
320
321         /* 2. Get a new contiguous memory extent. */
322         out_frame = __pa(vstart) >> PAGE_SHIFT;
323         rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
324         success = (exchange.nr_exchanged == (1UL << order));
325         BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
326         BUG_ON(success && (rc != 0));
327         if (unlikely(rc == -ENOSYS)) {
328                 /* Compatibility when XENMEM_exchange is unsupported. */
329                 if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
330                                          &exchange.in) != (1UL << order))
331                         BUG();
332                 success = (HYPERVISOR_memory_op(XENMEM_populate_physmap,
333                                                 &exchange.out) == 1);
334                 if (!success) {
335                         /* Couldn't get special memory: fall back to normal. */
336                         for (i = 0; i < (1UL<<order); i++)
337                                 in_frames[i] = (__pa(vstart)>>PAGE_SHIFT) + i;
338                         if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
339                                                  &exchange.in) != (1UL<<order))
340                                 BUG();
341                 }
342         }
343
344         /* 3. Map the new extent in place of old pages. */
345         for (i = 0; i < (1UL<<order); i++) {
346                 frame = success ? (out_frame + i) : in_frames[i];
347                 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
348                                         pfn_pte_ma(frame, PAGE_KERNEL), 0);
349                 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
350         }
351
352         cr_mcl[i - 1].args[MULTI_UVMFLAGS_INDEX] = order
353                                                    ? UVMF_TLB_FLUSH|UVMF_ALL
354                                                    : UVMF_INVLPG|UVMF_ALL;
355         if (HYPERVISOR_multicall(cr_mcl, i))
356                 BUG();
357
358         if (success)
359                 contiguous_bitmap_set(__pa(vstart) >> PAGE_SHIFT,
360                                       1UL << order);
361
362         balloon_unlock(flags);
363
364         return success ? 0 : -ENOMEM;
365 }
366
367 void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
368 {
369         unsigned long *out_frames = discontig_frames, in_frame;
370         unsigned long  frame, i, flags;
371         long           rc;
372         int            success;
373         struct xen_memory_exchange exchange = {
374                 .in = {
375                         .nr_extents   = 1,
376                         .extent_order = order,
377                         .domid        = DOMID_SELF
378                 },
379                 .out = {
380                         .nr_extents   = 1UL << order,
381                         .extent_order = 0,
382                         .domid        = DOMID_SELF
383                 }
384         };
385
386         if (xen_feature(XENFEAT_auto_translated_physmap) ||
387             !test_bit(__pa(vstart) >> PAGE_SHIFT, contiguous_bitmap))
388                 return;
389
390         if (unlikely(order > MAX_CONTIG_ORDER))
391                 return;
392
393         set_xen_guest_handle(exchange.in.extent_start, &in_frame);
394         set_xen_guest_handle(exchange.out.extent_start, out_frames);
395
396         scrub_pages(vstart, 1 << order);
397
398         balloon_lock(flags);
399
400         contiguous_bitmap_clear(__pa(vstart) >> PAGE_SHIFT, 1UL << order);
401
402         /* 1. Find start MFN of contiguous extent. */
403         in_frame = pfn_to_mfn(__pa(vstart) >> PAGE_SHIFT);
404
405         /* 2. Zap current PTEs. */
406         for (i = 0; i < (1UL<<order); i++) {
407                 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
408                                         __pte_ma(0), 0);
409                 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
410                         INVALID_P2M_ENTRY);
411                 out_frames[i] = (__pa(vstart) >> PAGE_SHIFT) + i;
412         }
413         if (HYPERVISOR_multicall(cr_mcl, i))
414                 BUG();
415
416         /* 3. Do the exchange for non-contiguous MFNs. */
417         rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
418         success = (exchange.nr_exchanged == 1);
419         BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
420         BUG_ON(success && (rc != 0));
421         if (unlikely(rc == -ENOSYS)) {
422                 /* Compatibility when XENMEM_exchange is unsupported. */
423                 if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
424                                          &exchange.in) != 1)
425                         BUG();
426                 if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
427                                          &exchange.out) != (1UL << order))
428                         BUG();
429                 success = 1;
430         }
431
432         /* 4. Map new pages in place of old pages. */
433         for (i = 0; i < (1UL<<order); i++) {
434                 frame = success ? out_frames[i] : (in_frame + i);
435                 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
436                                         pfn_pte_ma(frame, PAGE_KERNEL), 0);
437                 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
438         }
439
440         cr_mcl[i - 1].args[MULTI_UVMFLAGS_INDEX] = order
441                                                    ? UVMF_TLB_FLUSH|UVMF_ALL
442                                                    : UVMF_INVLPG|UVMF_ALL;
443         if (HYPERVISOR_multicall(cr_mcl, i))
444                 BUG();
445
446         balloon_unlock(flags);
447 }
448
449 #ifdef __i386__
450 int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b)
451 {
452         __u32 *lp = (__u32 *)((char *)ldt + entry * 8);
453         maddr_t mach_lp = arbitrary_virt_to_machine(lp);
454         return HYPERVISOR_update_descriptor(
455                 mach_lp, (u64)entry_a | ((u64)entry_b<<32));
456 }
457 #endif