1 /******************************************************************************
4 * Update page tables via the hypervisor.
6 * Copyright (c) 2002-2004, K A Fraser
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
33 #include <linux/config.h>
34 #include <linux/sched.h>
36 #include <linux/vmalloc.h>
38 #include <asm/pgtable.h>
39 #include <asm/hypervisor.h>
40 #include <xen/balloon.h>
41 #include <xen/features.h>
42 #include <xen/interface/memory.h>
43 #include <linux/module.h>
44 #include <linux/percpu.h>
45 #include <asm/tlbflush.h>
48 #define pmd_val_ma(v) (v).pmd
51 # define pmd_val_ma(v) ((v).pmd)
52 # define pud_val_ma(v) ((v).pgd.pgd)
54 # define pmd_val_ma(v) ((v).pud.pgd.pgd)
58 void xen_l1_entry_update(pte_t *ptr, pte_t val)
61 u.ptr = virt_to_machine(ptr);
62 u.val = pte_val_ma(val);
63 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
66 void xen_l2_entry_update(pmd_t *ptr, pmd_t val)
69 u.ptr = virt_to_machine(ptr);
70 u.val = pmd_val_ma(val);
71 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
75 void xen_l3_entry_update(pud_t *ptr, pud_t val)
78 u.ptr = virt_to_machine(ptr);
79 u.val = pud_val_ma(val);
80 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
85 void xen_l3_entry_update(pud_t *ptr, pud_t val)
88 u.ptr = virt_to_machine(ptr);
90 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
93 void xen_l4_entry_update(pgd_t *ptr, pgd_t val)
96 u.ptr = virt_to_machine(ptr);
98 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
100 #endif /* CONFIG_X86_64 */
102 void xen_machphys_update(unsigned long mfn, unsigned long pfn)
105 if (xen_feature(XENFEAT_auto_translated_physmap)) {
109 u.ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
111 BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
114 void xen_pt_switch(unsigned long ptr)
117 op.cmd = MMUEXT_NEW_BASEPTR;
118 op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
119 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
122 void xen_new_user_pt(unsigned long ptr)
125 op.cmd = MMUEXT_NEW_USER_BASEPTR;
126 op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
127 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
130 void xen_tlb_flush(void)
133 op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
134 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
136 EXPORT_SYMBOL(xen_tlb_flush);
138 void xen_invlpg(unsigned long ptr)
141 op.cmd = MMUEXT_INVLPG_LOCAL;
142 op.arg1.linear_addr = ptr & PAGE_MASK;
143 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
145 EXPORT_SYMBOL(xen_invlpg);
149 void xen_tlb_flush_all(void)
152 op.cmd = MMUEXT_TLB_FLUSH_ALL;
153 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
156 void xen_tlb_flush_mask(cpumask_t *mask)
159 if ( cpus_empty(*mask) )
161 op.cmd = MMUEXT_TLB_FLUSH_MULTI;
162 op.arg2.vcpumask = mask->bits;
163 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
166 void xen_invlpg_all(unsigned long ptr)
169 op.cmd = MMUEXT_INVLPG_ALL;
170 op.arg1.linear_addr = ptr & PAGE_MASK;
171 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
174 void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr)
177 if ( cpus_empty(*mask) )
179 op.cmd = MMUEXT_INVLPG_MULTI;
180 op.arg1.linear_addr = ptr & PAGE_MASK;
181 op.arg2.vcpumask = mask->bits;
182 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
185 #endif /* CONFIG_SMP */
187 void xen_pgd_pin(unsigned long ptr)
191 op.cmd = MMUEXT_PIN_L4_TABLE;
192 #elif defined(CONFIG_X86_PAE)
193 op.cmd = MMUEXT_PIN_L3_TABLE;
195 op.cmd = MMUEXT_PIN_L2_TABLE;
197 op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
198 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
201 void xen_pgd_unpin(unsigned long ptr)
204 op.cmd = MMUEXT_UNPIN_TABLE;
205 op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
206 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
209 void xen_set_ldt(unsigned long ptr, unsigned long len)
212 op.cmd = MMUEXT_SET_LDT;
213 op.arg1.linear_addr = ptr;
214 op.arg2.nr_ents = len;
215 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
219 * Bitmap is indexed by page number. If bit is set, the page is part of a
220 * xen_create_contiguous_region() area of memory.
222 unsigned long *contiguous_bitmap;
224 static void contiguous_bitmap_set(
225 unsigned long first_page, unsigned long nr_pages)
227 unsigned long start_off, end_off, curr_idx, end_idx;
229 curr_idx = first_page / BITS_PER_LONG;
230 start_off = first_page & (BITS_PER_LONG-1);
231 end_idx = (first_page + nr_pages) / BITS_PER_LONG;
232 end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
234 if (curr_idx == end_idx) {
235 contiguous_bitmap[curr_idx] |=
236 ((1UL<<end_off)-1) & -(1UL<<start_off);
238 contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
239 while ( ++curr_idx < end_idx )
240 contiguous_bitmap[curr_idx] = ~0UL;
241 contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
245 static void contiguous_bitmap_clear(
246 unsigned long first_page, unsigned long nr_pages)
248 unsigned long start_off, end_off, curr_idx, end_idx;
250 curr_idx = first_page / BITS_PER_LONG;
251 start_off = first_page & (BITS_PER_LONG-1);
252 end_idx = (first_page + nr_pages) / BITS_PER_LONG;
253 end_off = (first_page + nr_pages) & (BITS_PER_LONG-1);
255 if (curr_idx == end_idx) {
256 contiguous_bitmap[curr_idx] &=
257 -(1UL<<end_off) | ((1UL<<start_off)-1);
259 contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
260 while ( ++curr_idx != end_idx )
261 contiguous_bitmap[curr_idx] = 0;
262 contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
266 /* Protected by balloon_lock. */
267 #define MAX_CONTIG_ORDER 9 /* 2MB */
268 static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
269 static multicall_entry_t cr_mcl[1<<MAX_CONTIG_ORDER];
271 /* Ensure multi-page extents are contiguous in machine memory. */
272 int xen_create_contiguous_region(
273 unsigned long vstart, unsigned int order, unsigned int address_bits)
275 unsigned long *in_frames = discontig_frames, out_frame;
276 unsigned long frame, i, flags;
279 struct xen_memory_exchange exchange = {
281 .nr_extents = 1UL << order,
287 .extent_order = order,
288 .address_bits = address_bits,
294 * Currently an auto-translated guest will not perform I/O, nor will
295 * it require PAE page directories below 4GB. Therefore any calls to
296 * this function are redundant and can be ignored.
298 if (xen_feature(XENFEAT_auto_translated_physmap))
301 if (unlikely(order > MAX_CONTIG_ORDER))
304 set_xen_guest_handle(exchange.in.extent_start, in_frames);
305 set_xen_guest_handle(exchange.out.extent_start, &out_frame);
307 scrub_pages(vstart, 1 << order);
311 /* 1. Zap current PTEs, remembering MFNs. */
312 for (i = 0; i < (1UL<<order); i++) {
313 in_frames[i] = pfn_to_mfn((__pa(vstart) >> PAGE_SHIFT) + i);
314 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
316 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
319 if (HYPERVISOR_multicall(cr_mcl, i))
322 /* 2. Get a new contiguous memory extent. */
323 out_frame = __pa(vstart) >> PAGE_SHIFT;
324 rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
325 success = (exchange.nr_exchanged == (1UL << order));
326 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
327 BUG_ON(success && (rc != 0));
328 if (unlikely(rc == -ENOSYS)) {
329 /* Compatibility when XENMEM_exchange is unsupported. */
330 if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
331 &exchange.in) != (1UL << order))
333 success = (HYPERVISOR_memory_op(XENMEM_populate_physmap,
334 &exchange.out) == 1);
336 /* Couldn't get special memory: fall back to normal. */
337 for (i = 0; i < (1UL<<order); i++)
338 in_frames[i] = (__pa(vstart)>>PAGE_SHIFT) + i;
339 if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
340 &exchange.in) != (1UL<<order))
345 /* 3. Map the new extent in place of old pages. */
346 for (i = 0; i < (1UL<<order); i++) {
347 frame = success ? (out_frame + i) : in_frames[i];
348 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
349 pfn_pte_ma(frame, PAGE_KERNEL), 0);
350 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
353 cr_mcl[i - 1].args[MULTI_UVMFLAGS_INDEX] = order
354 ? UVMF_TLB_FLUSH|UVMF_ALL
355 : UVMF_INVLPG|UVMF_ALL;
356 if (HYPERVISOR_multicall(cr_mcl, i))
360 contiguous_bitmap_set(__pa(vstart) >> PAGE_SHIFT,
363 balloon_unlock(flags);
365 return success ? 0 : -ENOMEM;
368 void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
370 unsigned long *out_frames = discontig_frames, in_frame;
371 unsigned long frame, i, flags;
374 struct xen_memory_exchange exchange = {
377 .extent_order = order,
381 .nr_extents = 1UL << order,
387 if (xen_feature(XENFEAT_auto_translated_physmap) ||
388 !test_bit(__pa(vstart) >> PAGE_SHIFT, contiguous_bitmap))
391 if (unlikely(order > MAX_CONTIG_ORDER))
394 set_xen_guest_handle(exchange.in.extent_start, &in_frame);
395 set_xen_guest_handle(exchange.out.extent_start, out_frames);
397 scrub_pages(vstart, 1 << order);
401 contiguous_bitmap_clear(__pa(vstart) >> PAGE_SHIFT, 1UL << order);
403 /* 1. Find start MFN of contiguous extent. */
404 in_frame = pfn_to_mfn(__pa(vstart) >> PAGE_SHIFT);
406 /* 2. Zap current PTEs. */
407 for (i = 0; i < (1UL<<order); i++) {
408 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
410 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
412 out_frames[i] = (__pa(vstart) >> PAGE_SHIFT) + i;
414 if (HYPERVISOR_multicall(cr_mcl, i))
417 /* 3. Do the exchange for non-contiguous MFNs. */
418 rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
419 success = (exchange.nr_exchanged == 1);
420 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
421 BUG_ON(success && (rc != 0));
422 if (unlikely(rc == -ENOSYS)) {
423 /* Compatibility when XENMEM_exchange is unsupported. */
424 if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
427 if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
428 &exchange.out) != (1UL << order))
433 /* 4. Map new pages in place of old pages. */
434 for (i = 0; i < (1UL<<order); i++) {
435 frame = success ? out_frames[i] : (in_frame + i);
436 MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE),
437 pfn_pte_ma(frame, PAGE_KERNEL), 0);
438 set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
441 cr_mcl[i - 1].args[MULTI_UVMFLAGS_INDEX] = order
442 ? UVMF_TLB_FLUSH|UVMF_ALL
443 : UVMF_INVLPG|UVMF_ALL;
444 if (HYPERVISOR_multicall(cr_mcl, i))
447 balloon_unlock(flags);
451 int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b)
453 __u32 *lp = (__u32 *)((char *)ldt + entry * 8);
454 maddr_t mach_lp = arbitrary_virt_to_machine(lp);
455 return HYPERVISOR_update_descriptor(
456 mach_lp, (u64)entry_a | ((u64)entry_b<<32));