This commit was manufactured by cvs2svn to create branch 'vserver'.
[linux-2.6.git] / drivers / xen / balloon / balloon.c
diff --git a/drivers/xen/balloon/balloon.c b/drivers/xen/balloon/balloon.c
new file mode 100644 (file)
index 0000000..649f64c
--- /dev/null
@@ -0,0 +1,438 @@
+/******************************************************************************
+ * balloon.c
+ *
+ * Xen balloon driver - enables returning/claiming memory to/from Xen.
+ *
+ * Copyright (c) 2003, B Dragovic
+ * Copyright (c) 2003-2004, M Williamson, K Fraser
+ * 
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/smp_lock.h>
+#include <linux/pagemap.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/vmalloc.h>
+#include <asm-xen/xen_proc.h>
+#include <asm-xen/hypervisor.h>
+#include <asm-xen/ctrl_if.h>
+#include <asm-xen/balloon.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/tlb.h>
+#include <linux/list.h>
+
+static struct proc_dir_entry *balloon_pde;
+
+static DECLARE_MUTEX(balloon_mutex);
+spinlock_t balloon_lock = SPIN_LOCK_UNLOCKED;
+
+/* We aim for 'current allocation' == 'target allocation'. */
+static unsigned long current_pages;
+static unsigned long target_pages;
+
+/* We may hit the hard limit in Xen. If we do then we remember it. */
+static unsigned long hard_limit;
+
+/*
+ * Drivers may alter the memory reservation independently, but they must
+ * inform the balloon driver so that we can avoid hitting the hard limit.
+ */
+static unsigned long driver_pages;
+
+/* List of ballooned pages, threaded through the mem_map array. */
+static LIST_HEAD(ballooned_pages);
+static unsigned long balloon_low, balloon_high;
+
+/* Main work function, always executed in process context. */
+static void balloon_process(void *unused);
+static DECLARE_WORK(balloon_worker, balloon_process, NULL);
+static struct timer_list balloon_timer;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+/* Use the private and mapping fields of struct page as a list. */
+#define PAGE_TO_LIST(p) ( (struct list_head *)&p->private )
+#define LIST_TO_PAGE(l) ( list_entry( ((unsigned long *)l),   \
+                                     struct page, private ) )
+#define UNLIST_PAGE(p)  do { list_del(PAGE_TO_LIST(p));       \
+                             p->mapping = NULL;               \
+                             p->private = 0; } while(0)
+#else
+/* There's a dedicated list field in struct page we can use.    */
+#define PAGE_TO_LIST(p) ( &p->list )
+#define LIST_TO_PAGE(l) ( list_entry(l, struct page, list) )
+#define UNLIST_PAGE(p)  ( list_del(&p->list) )
+#define pte_offset_kernel pte_offset
+#define pud_t pgd_t
+#define pud_offset(d, va) d
+#define pud_none(d) 0
+#define pud_bad(d) 0
+#define subsys_initcall(_fn) __initcall(_fn)
+#define pfn_to_page(_pfn) (mem_map + (_pfn))
+#endif
+
+#define IPRINTK(fmt, args...) \
+    printk(KERN_INFO "xen_mem: " fmt, ##args)
+#define WPRINTK(fmt, args...) \
+    printk(KERN_WARNING "xen_mem: " fmt, ##args)
+
+/* balloon_append: add the given page to the balloon. */
+static void balloon_append(struct page *page)
+{
+    /* Low memory is re-populated first, so highmem pages go at list tail. */
+    if ( PageHighMem(page) )
+    {
+        list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
+        balloon_high++;
+    }
+    else
+    {
+        list_add(PAGE_TO_LIST(page), &ballooned_pages);
+        balloon_low++;
+    }
+}
+
+/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
+static struct page *balloon_retrieve(void)
+{
+    struct page *page;
+
+    if ( list_empty(&ballooned_pages) )
+        return NULL;
+
+    page = LIST_TO_PAGE(ballooned_pages.next);
+    UNLIST_PAGE(page);
+
+    if ( PageHighMem(page) )
+        balloon_high--;
+    else
+        balloon_low--;
+
+    return page;
+}
+
+static void balloon_alarm(unsigned long unused)
+{
+    schedule_work(&balloon_worker);
+}
+
+static unsigned long current_target(void)
+{
+    unsigned long target = min(target_pages, hard_limit);
+    if ( target > (current_pages + balloon_low + balloon_high) )
+        target = current_pages + balloon_low + balloon_high;
+    return target;
+}
+
+/*
+ * We avoid multiple worker processes conflicting via the balloon mutex.
+ * We may of course race updates of the target counts (which are protected
+ * by the balloon lock), or with changes to the Xen hard limit, but we will
+ * recover from these in time.
+ */
+static void balloon_process(void *unused)
+{
+    unsigned long *mfn_list, pfn, i, flags;
+    struct page   *page;
+    long           credit, debt, rc;
+    void          *v;
+
+    down(&balloon_mutex);
+
+ retry:
+    mfn_list = NULL;
+
+    if ( (credit = current_target() - current_pages) > 0 )
+    {
+        mfn_list = (unsigned long *)vmalloc(credit * sizeof(*mfn_list));
+        if ( mfn_list == NULL )
+            goto out;
+
+        balloon_lock(flags);
+        rc = HYPERVISOR_dom_mem_op(
+            MEMOP_increase_reservation, mfn_list, credit, 0);
+        balloon_unlock(flags);
+        if ( rc < credit )
+        {
+            /* We hit the Xen hard limit: reprobe. */
+            if ( HYPERVISOR_dom_mem_op(
+                MEMOP_decrease_reservation, mfn_list, rc, 0) != rc )
+                BUG();
+            hard_limit = current_pages + rc - driver_pages;
+            vfree(mfn_list);
+            goto retry;
+        }
+
+        for ( i = 0; i < credit; i++ )
+        {
+            if ( (page = balloon_retrieve()) == NULL )
+                BUG();
+
+            pfn = page - mem_map;
+            if ( phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY )
+                BUG();
+
+            /* Update P->M and M->P tables. */
+            phys_to_machine_mapping[pfn] = mfn_list[i];
+            xen_machphys_update(mfn_list[i], pfn);
+            
+            /* Link back into the page tables if it's not a highmem page. */
+            if ( pfn < max_low_pfn )
+            {
+                HYPERVISOR_update_va_mapping(
+                    (unsigned long)__va(pfn << PAGE_SHIFT),
+                    __pte_ma((mfn_list[i] << PAGE_SHIFT) |
+                             pgprot_val(PAGE_KERNEL)),
+                    0);
+            }
+
+            /* Finally, relinquish the memory back to the system allocator. */
+            ClearPageReserved(page);
+            set_page_count(page, 1);
+            __free_page(page);
+        }
+
+        current_pages += credit;
+    }
+    else if ( credit < 0 )
+    {
+        debt = -credit;
+
+        mfn_list = (unsigned long *)vmalloc(debt * sizeof(*mfn_list));
+        if ( mfn_list == NULL )
+            goto out;
+
+        for ( i = 0; i < debt; i++ )
+        {
+            if ( (page = alloc_page(GFP_HIGHUSER)) == NULL )
+            {
+                debt = i;
+                break;
+            }
+
+            pfn = page - mem_map;
+            mfn_list[i] = phys_to_machine_mapping[pfn];
+
+            if ( !PageHighMem(page) )
+            {
+                v = phys_to_virt(pfn << PAGE_SHIFT);
+                scrub_pages(v, 1);
+                HYPERVISOR_update_va_mapping(
+                    (unsigned long)v, __pte_ma(0), 0);
+            }
+#ifdef CONFIG_XEN_SCRUB_PAGES
+            else
+            {
+                v = kmap(page);
+                scrub_pages(v, 1);
+                kunmap(page);
+            }
+#endif
+        }
+
+        /* Ensure that ballooned highmem pages don't have cached mappings. */
+        kmap_flush_unused();
+        flush_tlb_all();
+
+        /* No more mappings: invalidate pages in P2M and add to balloon. */
+        for ( i = 0; i < debt; i++ )
+        {
+            pfn = mfn_to_pfn(mfn_list[i]);
+            phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
+            balloon_append(pfn_to_page(pfn));
+        }
+
+        if ( HYPERVISOR_dom_mem_op(
+            MEMOP_decrease_reservation, mfn_list, debt, 0) != debt )
+            BUG();
+
+        current_pages -= debt;
+    }
+
+ out:
+    if ( mfn_list != NULL )
+        vfree(mfn_list);
+
+    /* Schedule more work if there is some still to be done. */
+    if ( current_target() != current_pages )
+        mod_timer(&balloon_timer, jiffies + HZ);
+
+    up(&balloon_mutex);
+}
+
+/* Resets the Xen limit, sets new target, and kicks off processing. */
+static void set_new_target(unsigned long target)
+{
+    /* No need for lock. Not read-modify-write updates. */
+    hard_limit   = ~0UL;
+    target_pages = target;
+    schedule_work(&balloon_worker);
+}
+
+static void balloon_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+    switch ( msg->subtype )
+    {
+    case CMSG_MEM_REQUEST_SET:
+    {
+        mem_request_t *req = (mem_request_t *)&msg->msg[0];
+        set_new_target(req->target);
+        req->status = 0;
+    }
+    break;        
+
+    default:
+        msg->length = 0;
+        break;
+    }
+
+    ctrl_if_send_response(msg);
+}
+
+static int balloon_write(struct file *file, const char __user *buffer,
+                         unsigned long count, void *data)
+{
+    char memstring[64], *endchar;
+    unsigned long long target_bytes;
+
+    if ( !capable(CAP_SYS_ADMIN) )
+        return -EPERM;
+
+    if ( count <= 1 )
+        return -EBADMSG; /* runt */
+    if ( count > sizeof(memstring) )
+        return -EFBIG;   /* too long */
+
+    if ( copy_from_user(memstring, buffer, count) )
+        return -EFAULT;
+    memstring[sizeof(memstring)-1] = '\0';
+
+    target_bytes = memparse(memstring, &endchar);
+    set_new_target(target_bytes >> PAGE_SHIFT);
+
+    return count;
+}
+
+static int balloon_read(char *page, char **start, off_t off,
+                        int count, int *eof, void *data)
+{
+    int len;
+
+#define K(_p) ((_p)<<(PAGE_SHIFT-10))
+    len = sprintf(
+        page,
+        "Current allocation: %8lu kB\n"
+        "Requested target:   %8lu kB\n"
+        "Low-mem balloon:    %8lu kB\n"
+        "High-mem balloon:   %8lu kB\n"
+        "Xen hard limit:     ",
+        K(current_pages), K(target_pages), K(balloon_low), K(balloon_high));
+
+    if ( hard_limit != ~0UL )
+        len += sprintf(
+            page + len, 
+            "%8lu kB (inc. %8lu kB driver headroom)\n",
+            K(hard_limit), K(driver_pages));
+    else
+        len += sprintf(
+            page + len,
+            "     ??? kB\n");
+
+    *eof = 1;
+    return len;
+}
+
+static int __init balloon_init(void)
+{
+    unsigned long pfn;
+    struct page *page;
+
+    IPRINTK("Initialising balloon driver.\n");
+
+    current_pages = min(xen_start_info.nr_pages, max_pfn);
+    target_pages  = current_pages;
+    balloon_low   = 0;
+    balloon_high  = 0;
+    driver_pages  = 0UL;
+    hard_limit    = ~0UL;
+
+    init_timer(&balloon_timer);
+    balloon_timer.data = 0;
+    balloon_timer.function = balloon_alarm;
+    
+    if ( (balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL )
+    {
+        WPRINTK("Unable to create /proc/xen/balloon.\n");
+        return -1;
+    }
+
+    balloon_pde->read_proc  = balloon_read;
+    balloon_pde->write_proc = balloon_write;
+
+    (void)ctrl_if_register_receiver(CMSG_MEM_REQUEST, balloon_ctrlif_rx, 0);
+
+    /* Initialise the balloon with excess memory space. */
+    for ( pfn = xen_start_info.nr_pages; pfn < max_pfn; pfn++ )
+    {
+        page = &mem_map[pfn];
+        if ( !PageReserved(page) )
+            balloon_append(page);
+    }
+
+    return 0;
+}
+
+subsys_initcall(balloon_init);
+
+void balloon_update_driver_allowance(long delta)
+{
+    unsigned long flags;
+    balloon_lock(flags);
+    driver_pages += delta; /* non-atomic update */
+    balloon_unlock(flags);
+}
+
+void balloon_put_pages(unsigned long *mfn_list, unsigned long nr_mfns)
+{
+    unsigned long flags;
+
+    balloon_lock(flags);
+    if ( HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
+                               mfn_list, nr_mfns, 0) != nr_mfns )
+        BUG();
+    current_pages -= nr_mfns; /* non-atomic update */
+    balloon_unlock(flags);
+
+    schedule_work(&balloon_worker);
+}
+
+EXPORT_SYMBOL(balloon_update_driver_allowance);
+EXPORT_SYMBOL(balloon_put_pages);