/****************************************************************************** * mm/hypervisor.c * * Update page tables via the hypervisor. * * Copyright (c) 2002-2004, K A Fraser * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 * as published by the Free Software Foundation; or, when distributed * separately from the Linux kernel or incorporated into other * software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #ifdef CONFIG_X86_64 #define pmd_val_ma(v) (v).pmd #else #ifdef CONFIG_X86_PAE # define pmd_val_ma(v) ((v).pmd) # define pud_val_ma(v) ((v).pgd.pgd) #else # define pmd_val_ma(v) ((v).pud.pgd.pgd) #endif #endif void xen_l1_entry_update(pte_t *ptr, pte_t val) { mmu_update_t u; u.ptr = virt_to_machine(ptr); u.val = pte_val_ma(val); BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0); } void xen_l2_entry_update(pmd_t *ptr, pmd_t val) { mmu_update_t u; u.ptr = virt_to_machine(ptr); u.val = pmd_val_ma(val); BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0); } #ifdef CONFIG_X86_PAE void xen_l3_entry_update(pud_t *ptr, pud_t val) { mmu_update_t u; u.ptr = virt_to_machine(ptr); u.val = pud_val_ma(val); BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0); } #endif #ifdef CONFIG_X86_64 void xen_l3_entry_update(pud_t *ptr, pud_t val) { mmu_update_t u; u.ptr = virt_to_machine(ptr); u.val = val.pud; BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0); } void xen_l4_entry_update(pgd_t *ptr, pgd_t val) { mmu_update_t u; u.ptr = virt_to_machine(ptr); u.val = val.pgd; BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0); } #endif /* CONFIG_X86_64 */ void xen_machphys_update(unsigned long mfn, unsigned long pfn) { mmu_update_t u; if (xen_feature(XENFEAT_auto_translated_physmap)) { BUG_ON(pfn != mfn); return; } u.ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; u.val = pfn; BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0); } void xen_pt_switch(unsigned long ptr) { struct mmuext_op op; op.cmd = MMUEXT_NEW_BASEPTR; op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_new_user_pt(unsigned long ptr) { struct mmuext_op op; op.cmd = MMUEXT_NEW_USER_BASEPTR; op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_tlb_flush(void) { struct mmuext_op op; op.cmd = MMUEXT_TLB_FLUSH_LOCAL; BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } EXPORT_SYMBOL(xen_tlb_flush); void xen_invlpg(unsigned long ptr) { struct mmuext_op op; op.cmd = MMUEXT_INVLPG_LOCAL; op.arg1.linear_addr = ptr & PAGE_MASK; BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } EXPORT_SYMBOL(xen_invlpg); #ifdef CONFIG_SMP void xen_tlb_flush_all(void) { struct mmuext_op op; op.cmd = MMUEXT_TLB_FLUSH_ALL; BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_tlb_flush_mask(cpumask_t *mask) { struct mmuext_op op; if ( cpus_empty(*mask) ) return; op.cmd = MMUEXT_TLB_FLUSH_MULTI; op.arg2.vcpumask = mask->bits; BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_invlpg_all(unsigned long ptr) { struct mmuext_op op; op.cmd = MMUEXT_INVLPG_ALL; op.arg1.linear_addr = ptr & PAGE_MASK; BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr) { struct mmuext_op op; if ( cpus_empty(*mask) ) return; op.cmd = MMUEXT_INVLPG_MULTI; op.arg1.linear_addr = ptr & PAGE_MASK; op.arg2.vcpumask = mask->bits; BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } #endif /* CONFIG_SMP */ void xen_pgd_pin(unsigned long ptr) { struct mmuext_op op; #ifdef CONFIG_X86_64 op.cmd = MMUEXT_PIN_L4_TABLE; #elif defined(CONFIG_X86_PAE) op.cmd = MMUEXT_PIN_L3_TABLE; #else op.cmd = MMUEXT_PIN_L2_TABLE; #endif op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_pgd_unpin(unsigned long ptr) { struct mmuext_op op; op.cmd = MMUEXT_UNPIN_TABLE; op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } void xen_set_ldt(unsigned long ptr, unsigned long len) { struct mmuext_op op; op.cmd = MMUEXT_SET_LDT; op.arg1.linear_addr = ptr; op.arg2.nr_ents = len; BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); } /* * Bitmap is indexed by page number. If bit is set, the page is part of a * xen_create_contiguous_region() area of memory. */ unsigned long *contiguous_bitmap; static void contiguous_bitmap_set( unsigned long first_page, unsigned long nr_pages) { unsigned long start_off, end_off, curr_idx, end_idx; curr_idx = first_page / BITS_PER_LONG; start_off = first_page & (BITS_PER_LONG-1); end_idx = (first_page + nr_pages) / BITS_PER_LONG; end_off = (first_page + nr_pages) & (BITS_PER_LONG-1); if (curr_idx == end_idx) { contiguous_bitmap[curr_idx] |= ((1UL< MAX_CONTIG_ORDER)) return -ENOMEM; set_xen_guest_handle(exchange.in.extent_start, in_frames); set_xen_guest_handle(exchange.out.extent_start, &out_frame); scrub_pages(vstart, 1 << order); balloon_lock(flags); /* 1. Zap current PTEs, remembering MFNs. */ for (i = 0; i < (1UL<> PAGE_SHIFT) + i); MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE), __pte_ma(0), 0); set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, INVALID_P2M_ENTRY); } if (HYPERVISOR_multicall(cr_mcl, i)) BUG(); /* 2. Get a new contiguous memory extent. */ out_frame = __pa(vstart) >> PAGE_SHIFT; rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange); success = (exchange.nr_exchanged == (1UL << order)); BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0))); BUG_ON(success && (rc != 0)); if (unlikely(rc == -ENOSYS)) { /* Compatibility when XENMEM_exchange is unsupported. */ if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, &exchange.in) != (1UL << order)) BUG(); success = (HYPERVISOR_memory_op(XENMEM_populate_physmap, &exchange.out) == 1); if (!success) { /* Couldn't get special memory: fall back to normal. */ for (i = 0; i < (1UL<>PAGE_SHIFT) + i; if (HYPERVISOR_memory_op(XENMEM_populate_physmap, &exchange.in) != (1UL<>PAGE_SHIFT)+i, frame); } cr_mcl[i - 1].args[MULTI_UVMFLAGS_INDEX] = order ? UVMF_TLB_FLUSH|UVMF_ALL : UVMF_INVLPG|UVMF_ALL; if (HYPERVISOR_multicall(cr_mcl, i)) BUG(); if (success) contiguous_bitmap_set(__pa(vstart) >> PAGE_SHIFT, 1UL << order); balloon_unlock(flags); return success ? 0 : -ENOMEM; } void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) { unsigned long *out_frames = discontig_frames, in_frame; unsigned long frame, i, flags; long rc; int success; struct xen_memory_exchange exchange = { .in = { .nr_extents = 1, .extent_order = order, .domid = DOMID_SELF }, .out = { .nr_extents = 1UL << order, .extent_order = 0, .domid = DOMID_SELF } }; if (xen_feature(XENFEAT_auto_translated_physmap) || !test_bit(__pa(vstart) >> PAGE_SHIFT, contiguous_bitmap)) return; if (unlikely(order > MAX_CONTIG_ORDER)) return; set_xen_guest_handle(exchange.in.extent_start, &in_frame); set_xen_guest_handle(exchange.out.extent_start, out_frames); scrub_pages(vstart, 1 << order); balloon_lock(flags); contiguous_bitmap_clear(__pa(vstart) >> PAGE_SHIFT, 1UL << order); /* 1. Find start MFN of contiguous extent. */ in_frame = pfn_to_mfn(__pa(vstart) >> PAGE_SHIFT); /* 2. Zap current PTEs. */ for (i = 0; i < (1UL<>PAGE_SHIFT)+i, INVALID_P2M_ENTRY); out_frames[i] = (__pa(vstart) >> PAGE_SHIFT) + i; } if (HYPERVISOR_multicall(cr_mcl, i)) BUG(); /* 3. Do the exchange for non-contiguous MFNs. */ rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange); success = (exchange.nr_exchanged == 1); BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0))); BUG_ON(success && (rc != 0)); if (unlikely(rc == -ENOSYS)) { /* Compatibility when XENMEM_exchange is unsupported. */ if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, &exchange.in) != 1) BUG(); if (HYPERVISOR_memory_op(XENMEM_populate_physmap, &exchange.out) != (1UL << order)) BUG(); success = 1; } /* 4. Map new pages in place of old pages. */ for (i = 0; i < (1UL<>PAGE_SHIFT)+i, frame); } cr_mcl[i - 1].args[MULTI_UVMFLAGS_INDEX] = order ? UVMF_TLB_FLUSH|UVMF_ALL : UVMF_INVLPG|UVMF_ALL; if (HYPERVISOR_multicall(cr_mcl, i)) BUG(); balloon_unlock(flags); } #ifdef __i386__ int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b) { __u32 *lp = (__u32 *)((char *)ldt + entry * 8); maddr_t mach_lp = arbitrary_virt_to_machine(lp); return HYPERVISOR_update_descriptor( mach_lp, (u64)entry_a | ((u64)entry_b<<32)); } #endif