This commit was manufactured by cvs2svn to create branch 'vserver'.
[linux-2.6.git] / arch / i386 / mm / hypervisor.c
1 /******************************************************************************
2  * mm/hypervisor.c
3  * 
4  * Update page tables via the hypervisor.
5  * 
6  * Copyright (c) 2002-2004, K A Fraser
7  * 
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License version 2
10  * as published by the Free Software Foundation; or, when distributed
11  * separately from the Linux kernel or incorporated into other
12  * software packages, subject to the following license:
13  * 
14  * Permission is hereby granted, free of charge, to any person obtaining a copy
15  * of this source file (the "Software"), to deal in the Software without
16  * restriction, including without limitation the rights to use, copy, modify,
17  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18  * and to permit persons to whom the Software is furnished to do so, subject to
19  * the following conditions:
20  * 
21  * The above copyright notice and this permission notice shall be included in
22  * all copies or substantial portions of the Software.
23  * 
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30  * IN THE SOFTWARE.
31  */
32
33 #include <linux/config.h>
34 #include <linux/sched.h>
35 #include <linux/mm.h>
36 #include <linux/vmalloc.h>
37 #include <asm/page.h>
38 #include <asm/pgtable.h>
39 #include <asm/hypervisor.h>
40 #include <xen/balloon.h>
41 #include <xen/features.h>
42 #include <xen/interface/memory.h>
43 #include <linux/module.h>
44 #include <linux/percpu.h>
45 #include <asm/tlbflush.h>
46
47 #ifdef CONFIG_X86_64
48 #define pmd_val_ma(v) (v).pmd
49 #else
50 #ifdef CONFIG_X86_PAE
51 # define pmd_val_ma(v) ((v).pmd)
52 # define pud_val_ma(v) ((v).pgd.pgd)
53 #else
54 # define pmd_val_ma(v) ((v).pud.pgd.pgd)
55 #endif
56 #endif
57
58 void xen_l1_entry_update(pte_t *ptr, pte_t val)
59 {
60         mmu_update_t u;
61         u.ptr = virt_to_machine(ptr);
62         u.val = pte_val_ma(val);
63         BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
64 }
65
66 void xen_l2_entry_update(pmd_t *ptr, pmd_t val)
67 {
68         mmu_update_t u;
69         u.ptr = virt_to_machine(ptr);
70         u.val = pmd_val_ma(val);
71         BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
72 }
73
74 #ifdef CONFIG_X86_PAE
75 void xen_l3_entry_update(pud_t *ptr, pud_t val)
76 {
77         mmu_update_t u;
78         u.ptr = virt_to_machine(ptr);
79         u.val = pud_val_ma(val);
80         BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
81 }
82 #endif
83
84 #ifdef CONFIG_X86_64
85 void xen_l3_entry_update(pud_t *ptr, pud_t val)
86 {
87         mmu_update_t u;
88         u.ptr = virt_to_machine(ptr);
89         u.val = val.pud;
90         BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
91 }
92
93 void xen_l4_entry_update(pgd_t *ptr, pgd_t val)
94 {
95         mmu_update_t u;
96         u.ptr = virt_to_machine(ptr);
97         u.val = val.pgd;
98         BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
99 }
100 #endif /* CONFIG_X86_64 */
101
102 void xen_machphys_update(unsigned long mfn, unsigned long pfn)
103 {
104         mmu_update_t u;
105         if (xen_feature(XENFEAT_auto_translated_physmap)) {
106                 BUG_ON(pfn != mfn);
107                 return;
108         }
109         u.ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
110         u.val = pfn;
111         BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
112 }
113
114 void xen_pt_switch(unsigned long ptr)
115 {
116         struct mmuext_op op;
117         op.cmd = MMUEXT_NEW_BASEPTR;
118         op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
119         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
120 }
121
122 void xen_new_user_pt(unsigned long ptr)
123 {
124         struct mmuext_op op;
125         op.cmd = MMUEXT_NEW_USER_BASEPTR;
126         op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
127         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
128 }
129
130 void xen_tlb_flush(void)
131 {
132         struct mmuext_op op;
133         op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
134         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
135 }
136 EXPORT_SYMBOL(xen_tlb_flush);
137
138 void xen_invlpg(unsigned long ptr)
139 {
140         struct mmuext_op op;
141         op.cmd = MMUEXT_INVLPG_LOCAL;
142         op.arg1.linear_addr = ptr & PAGE_MASK;
143         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
144 }
145 EXPORT_SYMBOL(xen_invlpg);
146
147 #ifdef CONFIG_SMP
148
149 void xen_tlb_flush_all(void)
150 {
151         struct mmuext_op op;
152         op.cmd = MMUEXT_TLB_FLUSH_ALL;
153         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
154 }
155
156 void xen_tlb_flush_mask(cpumask_t *mask)
157 {
158         struct mmuext_op op;
159         if ( cpus_empty(*mask) )
160                 return;
161         op.cmd = MMUEXT_TLB_FLUSH_MULTI;
162         op.arg2.vcpumask = mask->bits;
163         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
164 }
165
166 void xen_invlpg_all(unsigned long ptr)
167 {
168         struct mmuext_op op;
169         op.cmd = MMUEXT_INVLPG_ALL;
170         op.arg1.linear_addr = ptr & PAGE_MASK;
171         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
172 }
173
174 void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr)
175 {
176         struct mmuext_op op;
177         if ( cpus_empty(*mask) )
178                 return;
179         op.cmd = MMUEXT_INVLPG_MULTI;
180         op.arg1.linear_addr = ptr & PAGE_MASK;
181         op.arg2.vcpumask    = mask->bits;
182         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
183 }
184
185 #endif /* CONFIG_SMP */
186
187 void xen_pgd_pin(unsigned long ptr)
188 {
189         struct mmuext_op op;
190 #ifdef CONFIG_X86_64
191         op.cmd = MMUEXT_PIN_L4_TABLE;
192 #elif defined(CONFIG_X86_PAE)
193         op.cmd = MMUEXT_PIN_L3_TABLE;
194 #else
195         op.cmd = MMUEXT_PIN_L2_TABLE;
196 #endif
197         op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
198         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
199 }
200
201 void xen_pgd_unpin(unsigned long ptr)
202 {
203         struct mmuext_op op;
204         op.cmd = MMUEXT_UNPIN_TABLE;
205         op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
206         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
207 }
208
209 void xen_set_ldt(unsigned long ptr, unsigned long len)
210 {
211         struct mmuext_op op;
212         op.cmd = MMUEXT_SET_LDT;
213         op.arg1.linear_addr = ptr;
214         op.arg2.nr_ents     = len;
215         BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
216 }
217
218 /*
219  * Bitmap is indexed by page number. If bit is set, the page is part of a
220  * xen_create_contiguous_region() area of memory.
221  */
222 unsigned long *contiguous_bitmap;
223
224 static void contiguous_bitmap_set(
225         unsigned long first_page, unsigned long nr_pages)
226 {
227         unsigned long start_off, end_off, curr_idx, end_idx;
228
229         curr_idx  = first_page / BITS_PER_LONG;
230         start_off = first_page & (BITS_PER_LONG-1);
231         end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
232         end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
233
234         if (curr_idx == end_idx) {
235                 contiguous_bitmap[curr_idx] |=
236                         ((1UL<<end_off)-1) & -(1UL<<start_off);
237         } else {
238                 contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
239                 while ( ++curr_idx < end_idx )
240                         contiguous_bitmap[curr_idx] = ~0UL;
241                 contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
242         }
243 }
244
245 static void contiguous_bitmap_clear(
246         unsigned long first_page, unsigned long nr_pages)
247 {
248         unsigned long start_off, end_off, curr_idx, end_idx;
249
250         curr_idx  = first_page / BITS_PER_LONG;
251         start_off = first_page & (BITS_PER_LONG-1);
252         end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
253         end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
254
255         if (curr_idx == end_idx) {
256                 contiguous_bitmap[curr_idx] &=
257                         -(1UL<<end_off) | ((1UL<<start_off)-1);
258         } else {
259                 contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
260                 while ( ++curr_idx != end_idx )
261                         contiguous_bitmap[curr_idx] = 0;
262                 contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
263         }
264 }
265
266 /* Protected by balloon_lock. */
267 #define MAX_CONTIG_ORDER 9 /* 2MB */
268 static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
269 static multicall_entry_t cr_mcl[1<<MAX_CONTIG_ORDER];
270
271 /* Ensure multi-page extents are contiguous in machine memory. */
272 int xen_create_contiguous_region(
273         unsigned long vstart, unsigned int order, unsigned int address_bits)
274 {
275         unsigned long *in_frames = discontig_frames, out_frame;
276         unsigned long  frame, i, flags;
277         long           rc;
278         int            success;
279         struct xen_memory_exchange exchange = {
280                 .in = {
281                         .nr_extents   = 1UL << order,
282                         .extent_order = 0,
283                         .domid        = DOMID_SELF
284                 },
285                 .out = {
286                         .nr_extents   = 1,
287                         .extent_order = order,
288                         .address_bits = address_bits,
289                         .domid        = DOMID_SELF
290                 }
291         };
292
293         /*
294          * Currently an auto-translated guest will not perform I/O, nor will
295          * it require PAE page directories below 4GB. Therefore any calls to
296          * this function are redundant and can be ignored.
297          */
298         if (xen_feature(XENFEAT_auto_translated_physmap))
299                 return 0;
300
301         if (unlikely(order > MAX_CONTIG_ORDER))
302                 return -ENOMEM;
303
304         set_xen_guest_handle(exchange.in.extent_start, in_frames);
305         set_xen_guest_handle(exchange.out.extent_start, &out_frame);
306
307         scrub_pages(vstart, 1 << order);
308
309         balloon_lock(flags);
310
311         /* 1. Zap current PTEs, remembering MFNs. */
312         for (i = 0; i < (1UL<<order); i++) {
313                 in_frames[i] = pfn_to_mfn((__pa(vstart) >> PAGE_SHIFT) + i);
314                 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
315                                         __pte_ma(0), 0);
316                 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
317                         INVALID_P2M_ENTRY);
318         }
319         if (HYPERVISOR_multicall(cr_mcl, i))
320                 BUG();
321
322         /* 2. Get a new contiguous memory extent. */
323         out_frame = __pa(vstart) >> PAGE_SHIFT;
324         rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
325         success = (exchange.nr_exchanged == (1UL << order));
326         BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
327         BUG_ON(success && (rc != 0));
328         if (unlikely(rc == -ENOSYS)) {
329                 /* Compatibility when XENMEM_exchange is unsupported. */
330                 if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
331                                          &exchange.in) != (1UL << order))
332                         BUG();
333                 success = (HYPERVISOR_memory_op(XENMEM_populate_physmap,
334                                                 &exchange.out) == 1);
335                 if (!success) {
336                         /* Couldn't get special memory: fall back to normal. */
337                         for (i = 0; i < (1UL<<order); i++)
338                                 in_frames[i] = (__pa(vstart)>>PAGE_SHIFT) + i;
339                         if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
340                                                  &exchange.in) != (1UL<<order))
341                                 BUG();
342                 }
343         }
344
345         /* 3. Map the new extent in place of old pages. */
346         for (i = 0; i < (1UL<<order); i++) {
347                 frame = success ? (out_frame + i) : in_frames[i];
348                 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
349                                         pfn_pte_ma(frame, PAGE_KERNEL), 0);
350                 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
351         }
352
353         cr_mcl[i - 1].args[MULTI_UVMFLAGS_INDEX] = order
354                                                    ? UVMF_TLB_FLUSH|UVMF_ALL
355                                                    : UVMF_INVLPG|UVMF_ALL;
356         if (HYPERVISOR_multicall(cr_mcl, i))
357                 BUG();
358
359         if (success)
360                 contiguous_bitmap_set(__pa(vstart) >> PAGE_SHIFT,
361                                       1UL << order);
362
363         balloon_unlock(flags);
364
365         return success ? 0 : -ENOMEM;
366 }
367
368 void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
369 {
370         unsigned long *out_frames = discontig_frames, in_frame;
371         unsigned long  frame, i, flags;
372         long           rc;
373         int            success;
374         struct xen_memory_exchange exchange = {
375                 .in = {
376                         .nr_extents   = 1,
377                         .extent_order = order,
378                         .domid        = DOMID_SELF
379                 },
380                 .out = {
381                         .nr_extents   = 1UL << order,
382                         .extent_order = 0,
383                         .domid        = DOMID_SELF
384                 }
385         };
386
387         if (xen_feature(XENFEAT_auto_translated_physmap) ||
388             !test_bit(__pa(vstart) >> PAGE_SHIFT, contiguous_bitmap))
389                 return;
390
391         if (unlikely(order > MAX_CONTIG_ORDER))
392                 return;
393
394         set_xen_guest_handle(exchange.in.extent_start, &in_frame);
395         set_xen_guest_handle(exchange.out.extent_start, out_frames);
396
397         scrub_pages(vstart, 1 << order);
398
399         balloon_lock(flags);
400
401         contiguous_bitmap_clear(__pa(vstart) >> PAGE_SHIFT, 1UL << order);
402
403         /* 1. Find start MFN of contiguous extent. */
404         in_frame = pfn_to_mfn(__pa(vstart) >> PAGE_SHIFT);
405
406         /* 2. Zap current PTEs. */
407         for (i = 0; i < (1UL<<order); i++) {
408                 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
409                                         __pte_ma(0), 0);
410                 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
411                         INVALID_P2M_ENTRY);
412                 out_frames[i] = (__pa(vstart) >> PAGE_SHIFT) + i;
413         }
414         if (HYPERVISOR_multicall(cr_mcl, i))
415                 BUG();
416
417         /* 3. Do the exchange for non-contiguous MFNs. */
418         rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
419         success = (exchange.nr_exchanged == 1);
420         BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
421         BUG_ON(success && (rc != 0));
422         if (unlikely(rc == -ENOSYS)) {
423                 /* Compatibility when XENMEM_exchange is unsupported. */
424                 if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
425                                          &exchange.in) != 1)
426                         BUG();
427                 if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
428                                          &exchange.out) != (1UL << order))
429                         BUG();
430                 success = 1;
431         }
432
433         /* 4. Map new pages in place of old pages. */
434         for (i = 0; i < (1UL<<order); i++) {
435                 frame = success ? out_frames[i] : (in_frame + i);
436                 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
437                                         pfn_pte_ma(frame, PAGE_KERNEL), 0);
438                 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
439         }
440
441         cr_mcl[i - 1].args[MULTI_UVMFLAGS_INDEX] = order
442                                                    ? UVMF_TLB_FLUSH|UVMF_ALL
443                                                    : UVMF_INVLPG|UVMF_ALL;
444         if (HYPERVISOR_multicall(cr_mcl, i))
445                 BUG();
446
447         balloon_unlock(flags);
448 }
449
450 #ifdef __i386__
451 int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b)
452 {
453         __u32 *lp = (__u32 *)((char *)ldt + entry * 8);
454         maddr_t mach_lp = arbitrary_virt_to_machine(lp);
455         return HYPERVISOR_update_descriptor(
456                 mach_lp, (u64)entry_a | ((u64)entry_b<<32));
457 }
458 #endif