Merge to Fedora kernel-2.6.17-1.2187_FC5 patched with stable patch-2.6.17.13-vs2...
[linux-2.6.git] / drivers / xen / balloon / balloon.c
index 649f64c..3226753 100644 (file)
@@ -5,9 +5,13 @@
  *
  * Copyright (c) 2003, B Dragovic
  * Copyright (c) 2003-2004, M Williamson, K Fraser
+ * Copyright (c) 2005 Dan M. Smith, IBM Corporation
  * 
- * This file may be distributed separately from the Linux kernel, or
- * incorporated into other software packages, subject to the following license:
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this source file (the "Software"), to deal in the Software without
 #include <linux/bootmem.h>
 #include <linux/highmem.h>
 #include <linux/vmalloc.h>
-#include <asm-xen/xen_proc.h>
-#include <asm-xen/hypervisor.h>
-#include <asm-xen/ctrl_if.h>
-#include <asm-xen/balloon.h>
+#include <xen/xen_proc.h>
+#include <asm/hypervisor.h>
+#include <xen/balloon.h>
+#include <xen/interface/memory.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
 #include <asm/tlb.h>
 #include <linux/list.h>
 
+#include <xen/xenbus.h>
+
+#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
+
+#ifdef CONFIG_PROC_FS
 static struct proc_dir_entry *balloon_pde;
+#endif
 
 static DECLARE_MUTEX(balloon_mutex);
-spinlock_t balloon_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * Protects atomic reservation decrease/increase against concurrent increases.
+ * Also protects non-atomic updates of current_pages and driver_pages, and
+ * balloon lists.
+ */
+DEFINE_SPINLOCK(balloon_lock);
 
 /* We aim for 'current allocation' == 'target allocation'. */
 static unsigned long current_pages;
 static unsigned long target_pages;
 
+/* We increase/decrease in batches which fit in a page */
+static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; 
+
+/* VM /proc information for memory */
+extern unsigned long totalram_pages;
+
 /* We may hit the hard limit in Xen. If we do then we remember it. */
 static unsigned long hard_limit;
 
@@ -77,79 +99,230 @@ static void balloon_process(void *unused);
 static DECLARE_WORK(balloon_worker, balloon_process, NULL);
 static struct timer_list balloon_timer;
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-/* Use the private and mapping fields of struct page as a list. */
-#define PAGE_TO_LIST(p) ( (struct list_head *)&p->private )
-#define LIST_TO_PAGE(l) ( list_entry( ((unsigned long *)l),   \
-                                     struct page, private ) )
-#define UNLIST_PAGE(p)  do { list_del(PAGE_TO_LIST(p));       \
-                             p->mapping = NULL;               \
-                             p->private = 0; } while(0)
-#else
-/* There's a dedicated list field in struct page we can use.    */
-#define PAGE_TO_LIST(p) ( &p->list )
-#define LIST_TO_PAGE(l) ( list_entry(l, struct page, list) )
-#define UNLIST_PAGE(p)  ( list_del(&p->list) )
-#define pte_offset_kernel pte_offset
-#define pud_t pgd_t
-#define pud_offset(d, va) d
-#define pud_none(d) 0
-#define pud_bad(d) 0
-#define subsys_initcall(_fn) __initcall(_fn)
-#define pfn_to_page(_pfn) (mem_map + (_pfn))
-#endif
+/* When ballooning out (allocating memory to return to Xen) we don't really 
+   want the kernel to try too hard since that can trigger the oom killer. */
+#define GFP_BALLOON \
+       (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
+
+#define PAGE_TO_LIST(p) (&(p)->lru)
+#define LIST_TO_PAGE(l) list_entry((l), struct page, lru)
+#define UNLIST_PAGE(p)                         \
+       do {                                    \
+               list_del(PAGE_TO_LIST(p));      \
+               PAGE_TO_LIST(p)->next = NULL;   \
+               PAGE_TO_LIST(p)->prev = NULL;   \
+       } while(0)
 
 #define IPRINTK(fmt, args...) \
-    printk(KERN_INFO "xen_mem: " fmt, ##args)
+       printk(KERN_INFO "xen_mem: " fmt, ##args)
 #define WPRINTK(fmt, args...) \
-    printk(KERN_WARNING "xen_mem: " fmt, ##args)
+       printk(KERN_WARNING "xen_mem: " fmt, ##args)
 
 /* balloon_append: add the given page to the balloon. */
 static void balloon_append(struct page *page)
 {
-    /* Low memory is re-populated first, so highmem pages go at list tail. */
-    if ( PageHighMem(page) )
-    {
-        list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
-        balloon_high++;
-    }
-    else
-    {
-        list_add(PAGE_TO_LIST(page), &ballooned_pages);
-        balloon_low++;
-    }
+       /* Lowmem is re-populated first, so highmem pages go at list tail. */
+       if (PageHighMem(page)) {
+               list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
+               balloon_high++;
+       } else {
+               list_add(PAGE_TO_LIST(page), &ballooned_pages);
+               balloon_low++;
+       }
 }
 
 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
 static struct page *balloon_retrieve(void)
 {
-    struct page *page;
+       struct page *page;
+
+       if (list_empty(&ballooned_pages))
+               return NULL;
+
+       page = LIST_TO_PAGE(ballooned_pages.next);
+       UNLIST_PAGE(page);
 
-    if ( list_empty(&ballooned_pages) )
-        return NULL;
+       if (PageHighMem(page))
+               balloon_high--;
+       else
+               balloon_low--;
 
-    page = LIST_TO_PAGE(ballooned_pages.next);
-    UNLIST_PAGE(page);
+       return page;
+}
 
-    if ( PageHighMem(page) )
-        balloon_high--;
-    else
-        balloon_low--;
+static struct page *balloon_first_page(void)
+{
+       if (list_empty(&ballooned_pages))
+               return NULL;
+       return LIST_TO_PAGE(ballooned_pages.next);
+}
 
-    return page;
+static struct page *balloon_next_page(struct page *page)
+{
+       struct list_head *next = PAGE_TO_LIST(page)->next;
+       if (next == &ballooned_pages)
+               return NULL;
+       return LIST_TO_PAGE(next);
 }
 
 static void balloon_alarm(unsigned long unused)
 {
-    schedule_work(&balloon_worker);
+       schedule_work(&balloon_worker);
 }
 
 static unsigned long current_target(void)
 {
-    unsigned long target = min(target_pages, hard_limit);
-    if ( target > (current_pages + balloon_low + balloon_high) )
-        target = current_pages + balloon_low + balloon_high;
-    return target;
+       unsigned long target = min(target_pages, hard_limit);
+       if (target > (current_pages + balloon_low + balloon_high))
+               target = current_pages + balloon_low + balloon_high;
+       return target;
+}
+
+static int increase_reservation(unsigned long nr_pages)
+{
+       unsigned long  pfn, i, flags;
+       struct page   *page;
+       long           rc;
+       struct xen_memory_reservation reservation = {
+               .address_bits = 0,
+               .extent_order = 0,
+               .domid        = DOMID_SELF
+       };
+
+       if (nr_pages > ARRAY_SIZE(frame_list))
+               nr_pages = ARRAY_SIZE(frame_list);
+
+       balloon_lock(flags);
+
+       page = balloon_first_page();
+       for (i = 0; i < nr_pages; i++) {
+               BUG_ON(page == NULL);
+               frame_list[i] = page_to_pfn(page);;
+               page = balloon_next_page(page);
+       }
+
+       set_xen_guest_handle(reservation.extent_start, frame_list);
+       reservation.nr_extents   = nr_pages;
+       rc = HYPERVISOR_memory_op(
+               XENMEM_populate_physmap, &reservation);
+       if (rc < nr_pages) {
+               if (rc > 0) {
+                       int ret;
+
+                       /* We hit the Xen hard limit: reprobe. */
+                       reservation.nr_extents = rc;
+                       ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+                                       &reservation);
+                       BUG_ON(ret != rc);
+               }
+               if (rc >= 0)
+                       hard_limit = current_pages + rc - driver_pages;
+               goto out;
+       }
+
+       for (i = 0; i < nr_pages; i++) {
+               page = balloon_retrieve();
+               BUG_ON(page == NULL);
+
+               pfn = page_to_pfn(page);
+               BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
+                      phys_to_machine_mapping_valid(pfn));
+
+               /* Update P->M and M->P tables. */
+               set_phys_to_machine(pfn, frame_list[i]);
+               xen_machphys_update(frame_list[i], pfn);
+
+               /* Link back into the page tables if not highmem. */
+               if (pfn < max_low_pfn) {
+                       int ret;
+                       ret = HYPERVISOR_update_va_mapping(
+                               (unsigned long)__va(pfn << PAGE_SHIFT),
+                               pfn_pte_ma(frame_list[i], PAGE_KERNEL),
+                               0);
+                       BUG_ON(ret);
+               }
+
+               /* Relinquish the page back to the allocator. */
+               ClearPageReserved(page);
+               init_page_count(page);
+               __free_page(page);
+       }
+
+       current_pages += nr_pages;
+       totalram_pages = current_pages;
+
+ out:
+       balloon_unlock(flags);
+
+       return 0;
+}
+
+static int decrease_reservation(unsigned long nr_pages)
+{
+       unsigned long  pfn, i, flags;
+       struct page   *page;
+       void          *v;
+       int            need_sleep = 0;
+       int ret;
+       struct xen_memory_reservation reservation = {
+               .address_bits = 0,
+               .extent_order = 0,
+               .domid        = DOMID_SELF
+       };
+
+       if (nr_pages > ARRAY_SIZE(frame_list))
+               nr_pages = ARRAY_SIZE(frame_list);
+
+       for (i = 0; i < nr_pages; i++) {
+               if ((page = alloc_page(GFP_BALLOON)) == NULL) {
+                       nr_pages = i;
+                       need_sleep = 1;
+                       break;
+               }
+
+               pfn = page_to_pfn(page);
+               frame_list[i] = pfn_to_mfn(pfn);
+
+               if (!PageHighMem(page)) {
+                       v = phys_to_virt(pfn << PAGE_SHIFT);
+                       scrub_pages(v, 1);
+                       ret = HYPERVISOR_update_va_mapping(
+                               (unsigned long)v, __pte_ma(0), 0);
+                       BUG_ON(ret);
+               }
+#ifdef CONFIG_XEN_SCRUB_PAGES
+               else {
+                       v = kmap(page);
+                       scrub_pages(v, 1);
+                       kunmap(page);
+               }
+#endif
+       }
+
+       /* Ensure that ballooned highmem pages don't have kmaps. */
+       kmap_flush_unused();
+       flush_tlb_all();
+
+       balloon_lock(flags);
+
+       /* No more mappings: invalidate P2M and add to balloon. */
+       for (i = 0; i < nr_pages; i++) {
+               pfn = mfn_to_pfn(frame_list[i]);
+               set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+               balloon_append(pfn_to_page(pfn));
+       }
+
+       set_xen_guest_handle(reservation.extent_start, frame_list);
+       reservation.nr_extents   = nr_pages;
+       ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+       BUG_ON(ret != nr_pages);
+
+       current_pages -= nr_pages;
+       totalram_pages = current_pages;
+
+       balloon_unlock(flags);
+
+       return need_sleep;
 }
 
 /*
@@ -160,279 +333,285 @@ static unsigned long current_target(void)
  */
 static void balloon_process(void *unused)
 {
-    unsigned long *mfn_list, pfn, i, flags;
-    struct page   *page;
-    long           credit, debt, rc;
-    void          *v;
-
-    down(&balloon_mutex);
-
- retry:
-    mfn_list = NULL;
-
-    if ( (credit = current_target() - current_pages) > 0 )
-    {
-        mfn_list = (unsigned long *)vmalloc(credit * sizeof(*mfn_list));
-        if ( mfn_list == NULL )
-            goto out;
-
-        balloon_lock(flags);
-        rc = HYPERVISOR_dom_mem_op(
-            MEMOP_increase_reservation, mfn_list, credit, 0);
-        balloon_unlock(flags);
-        if ( rc < credit )
-        {
-            /* We hit the Xen hard limit: reprobe. */
-            if ( HYPERVISOR_dom_mem_op(
-                MEMOP_decrease_reservation, mfn_list, rc, 0) != rc )
-                BUG();
-            hard_limit = current_pages + rc - driver_pages;
-            vfree(mfn_list);
-            goto retry;
-        }
-
-        for ( i = 0; i < credit; i++ )
-        {
-            if ( (page = balloon_retrieve()) == NULL )
-                BUG();
-
-            pfn = page - mem_map;
-            if ( phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY )
-                BUG();
-
-            /* Update P->M and M->P tables. */
-            phys_to_machine_mapping[pfn] = mfn_list[i];
-            xen_machphys_update(mfn_list[i], pfn);
-            
-            /* Link back into the page tables if it's not a highmem page. */
-            if ( pfn < max_low_pfn )
-            {
-                HYPERVISOR_update_va_mapping(
-                    (unsigned long)__va(pfn << PAGE_SHIFT),
-                    __pte_ma((mfn_list[i] << PAGE_SHIFT) |
-                             pgprot_val(PAGE_KERNEL)),
-                    0);
-            }
-
-            /* Finally, relinquish the memory back to the system allocator. */
-            ClearPageReserved(page);
-            set_page_count(page, 1);
-            __free_page(page);
-        }
-
-        current_pages += credit;
-    }
-    else if ( credit < 0 )
-    {
-        debt = -credit;
-
-        mfn_list = (unsigned long *)vmalloc(debt * sizeof(*mfn_list));
-        if ( mfn_list == NULL )
-            goto out;
-
-        for ( i = 0; i < debt; i++ )
-        {
-            if ( (page = alloc_page(GFP_HIGHUSER)) == NULL )
-            {
-                debt = i;
-                break;
-            }
-
-            pfn = page - mem_map;
-            mfn_list[i] = phys_to_machine_mapping[pfn];
-
-            if ( !PageHighMem(page) )
-            {
-                v = phys_to_virt(pfn << PAGE_SHIFT);
-                scrub_pages(v, 1);
-                HYPERVISOR_update_va_mapping(
-                    (unsigned long)v, __pte_ma(0), 0);
-            }
-#ifdef CONFIG_XEN_SCRUB_PAGES
-            else
-            {
-                v = kmap(page);
-                scrub_pages(v, 1);
-                kunmap(page);
-            }
-#endif
-        }
+       int need_sleep = 0;
+       long credit;
 
-        /* Ensure that ballooned highmem pages don't have cached mappings. */
-        kmap_flush_unused();
-        flush_tlb_all();
+       down(&balloon_mutex);
 
-        /* No more mappings: invalidate pages in P2M and add to balloon. */
-        for ( i = 0; i < debt; i++ )
-        {
-            pfn = mfn_to_pfn(mfn_list[i]);
-            phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
-            balloon_append(pfn_to_page(pfn));
-        }
+       do {
+               credit = current_target() - current_pages;
+               if (credit > 0)
+                       need_sleep = (increase_reservation(credit) != 0);
+               if (credit < 0)
+                       need_sleep = (decrease_reservation(-credit) != 0);
 
-        if ( HYPERVISOR_dom_mem_op(
-            MEMOP_decrease_reservation, mfn_list, debt, 0) != debt )
-            BUG();
-
-        current_pages -= debt;
-    }
-
- out:
-    if ( mfn_list != NULL )
-        vfree(mfn_list);
+#ifndef CONFIG_PREEMPT
+               if (need_resched())
+                       schedule();
+#endif
+       } while ((credit != 0) && !need_sleep);
 
-    /* Schedule more work if there is some still to be done. */
-    if ( current_target() != current_pages )
-        mod_timer(&balloon_timer, jiffies + HZ);
+       /* Schedule more work if there is some still to be done. */
+       if (current_target() != current_pages)
+               mod_timer(&balloon_timer, jiffies + HZ);
 
-    up(&balloon_mutex);
+       up(&balloon_mutex);
 }
 
 /* Resets the Xen limit, sets new target, and kicks off processing. */
 static void set_new_target(unsigned long target)
 {
-    /* No need for lock. Not read-modify-write updates. */
-    hard_limit   = ~0UL;
-    target_pages = target;
-    schedule_work(&balloon_worker);
+       /* No need for lock. Not read-modify-write updates. */
+       hard_limit   = ~0UL;
+       target_pages = target;
+       schedule_work(&balloon_worker);
 }
 
-static void balloon_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+static struct xenbus_watch target_watch =
 {
-    switch ( msg->subtype )
-    {
-    case CMSG_MEM_REQUEST_SET:
-    {
-        mem_request_t *req = (mem_request_t *)&msg->msg[0];
-        set_new_target(req->target);
-        req->status = 0;
-    }
-    break;        
-
-    default:
-        msg->length = 0;
-        break;
-    }
-
-    ctrl_if_send_response(msg);
+       .node = "memory/target"
+};
+
+/* React to a change in the target key */
+static void watch_target(struct xenbus_watch *watch,
+                        const char **vec, unsigned int len)
+{
+       unsigned long long new_target;
+       int err;
+
+       err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
+       if (err != 1) {
+               /* This is ok (for domain0 at least) - so just return */
+               return;
+       }
+
+       /* The given memory/target value is in KiB, so it needs converting to
+        * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
+        */
+       set_new_target(new_target >> (PAGE_SHIFT - 10));
+}
+
+static int balloon_init_watcher(struct notifier_block *notifier,
+                               unsigned long event,
+                               void *data)
+{
+       int err;
+
+       err = register_xenbus_watch(&target_watch);
+       if (err)
+               printk(KERN_ERR "Failed to set balloon watcher\n");
+
+       return NOTIFY_DONE;
 }
 
+#ifdef CONFIG_PROC_FS
 static int balloon_write(struct file *file, const char __user *buffer,
-                         unsigned long count, void *data)
+                        unsigned long count, void *data)
 {
-    char memstring[64], *endchar;
-    unsigned long long target_bytes;
+       char memstring[64], *endchar;
+       unsigned long long target_bytes;
 
-    if ( !capable(CAP_SYS_ADMIN) )
-        return -EPERM;
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
 
-    if ( count <= 1 )
-        return -EBADMSG; /* runt */
-    if ( count > sizeof(memstring) )
-        return -EFBIG;   /* too long */
+       if (count <= 1)
+               return -EBADMSG; /* runt */
+       if (count > sizeof(memstring))
+               return -EFBIG;   /* too long */
 
-    if ( copy_from_user(memstring, buffer, count) )
-        return -EFAULT;
-    memstring[sizeof(memstring)-1] = '\0';
+       if (copy_from_user(memstring, buffer, count))
+               return -EFAULT;
+       memstring[sizeof(memstring)-1] = '\0';
 
-    target_bytes = memparse(memstring, &endchar);
-    set_new_target(target_bytes >> PAGE_SHIFT);
+       target_bytes = memparse(memstring, &endchar);
+       set_new_target(target_bytes >> PAGE_SHIFT);
 
-    return count;
+       return count;
 }
 
 static int balloon_read(char *page, char **start, off_t off,
-                        int count, int *eof, void *data)
+                       int count, int *eof, void *data)
 {
-    int len;
-
-#define K(_p) ((_p)<<(PAGE_SHIFT-10))
-    len = sprintf(
-        page,
-        "Current allocation: %8lu kB\n"
-        "Requested target:   %8lu kB\n"
-        "Low-mem balloon:    %8lu kB\n"
-        "High-mem balloon:   %8lu kB\n"
-        "Xen hard limit:     ",
-        K(current_pages), K(target_pages), K(balloon_low), K(balloon_high));
-
-    if ( hard_limit != ~0UL )
-        len += sprintf(
-            page + len, 
-            "%8lu kB (inc. %8lu kB driver headroom)\n",
-            K(hard_limit), K(driver_pages));
-    else
-        len += sprintf(
-            page + len,
-            "     ??? kB\n");
-
-    *eof = 1;
-    return len;
+       int len;
+
+       len = sprintf(
+               page,
+               "Current allocation: %8lu kB\n"
+               "Requested target:   %8lu kB\n"
+               "Low-mem balloon:    %8lu kB\n"
+               "High-mem balloon:   %8lu kB\n"
+               "Xen hard limit:     ",
+               PAGES2KB(current_pages), PAGES2KB(target_pages), 
+               PAGES2KB(balloon_low), PAGES2KB(balloon_high));
+
+       if (hard_limit != ~0UL) {
+               len += sprintf(
+                       page + len, 
+                       "%8lu kB (inc. %8lu kB driver headroom)\n",
+                       PAGES2KB(hard_limit), PAGES2KB(driver_pages));
+       } else {
+               len += sprintf(
+                       page + len,
+                       "     ??? kB\n");
+       }
+
+       *eof = 1;
+       return len;
 }
+#endif
+
+static struct notifier_block xenstore_notifier;
 
 static int __init balloon_init(void)
 {
-    unsigned long pfn;
-    struct page *page;
+       unsigned long pfn;
+       struct page *page;
+
+       if (!is_running_on_xen())
+               return -ENODEV;
+
+       IPRINTK("Initialising balloon driver.\n");
+
+       current_pages = min(xen_start_info->nr_pages, max_pfn);
+       totalram_pages = current_pages;
+       target_pages  = current_pages;
+       balloon_low   = 0;
+       balloon_high  = 0;
+       driver_pages  = 0UL;
+       hard_limit    = ~0UL;
 
-    IPRINTK("Initialising balloon driver.\n");
+       init_timer(&balloon_timer);
+       balloon_timer.data = 0;
+       balloon_timer.function = balloon_alarm;
+    
+#ifdef CONFIG_PROC_FS
+       if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
+               WPRINTK("Unable to create /proc/xen/balloon.\n");
+               return -1;
+       }
+
+       balloon_pde->read_proc  = balloon_read;
+       balloon_pde->write_proc = balloon_write;
+#endif
+    
+       /* Initialise the balloon with excess memory space. */
+       for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
+               page = pfn_to_page(pfn);
+               if (!PageReserved(page))
+                       balloon_append(page);
+       }
 
-    current_pages = min(xen_start_info.nr_pages, max_pfn);
-    target_pages  = current_pages;
-    balloon_low   = 0;
-    balloon_high  = 0;
-    driver_pages  = 0UL;
-    hard_limit    = ~0UL;
+       target_watch.callback = watch_target;
+       xenstore_notifier.notifier_call = balloon_init_watcher;
 
-    init_timer(&balloon_timer);
-    balloon_timer.data = 0;
-    balloon_timer.function = balloon_alarm;
+       register_xenstore_notifier(&xenstore_notifier);
     
-    if ( (balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL )
-    {
-        WPRINTK("Unable to create /proc/xen/balloon.\n");
-        return -1;
-    }
-
-    balloon_pde->read_proc  = balloon_read;
-    balloon_pde->write_proc = balloon_write;
-
-    (void)ctrl_if_register_receiver(CMSG_MEM_REQUEST, balloon_ctrlif_rx, 0);
-
-    /* Initialise the balloon with excess memory space. */
-    for ( pfn = xen_start_info.nr_pages; pfn < max_pfn; pfn++ )
-    {
-        page = &mem_map[pfn];
-        if ( !PageReserved(page) )
-            balloon_append(page);
-    }
-
-    return 0;
+       return 0;
 }
 
 subsys_initcall(balloon_init);
 
 void balloon_update_driver_allowance(long delta)
 {
-    unsigned long flags;
-    balloon_lock(flags);
-    driver_pages += delta; /* non-atomic update */
-    balloon_unlock(flags);
+       unsigned long flags;
+
+       balloon_lock(flags);
+       driver_pages += delta;
+       balloon_unlock(flags);
 }
 
-void balloon_put_pages(unsigned long *mfn_list, unsigned long nr_mfns)
+static int dealloc_pte_fn(
+       pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
 {
-    unsigned long flags;
+       unsigned long mfn = pte_mfn(*pte);
+       int ret;
+       struct xen_memory_reservation reservation = {
+               .nr_extents   = 1,
+               .extent_order = 0,
+               .domid        = DOMID_SELF
+       };
+       set_xen_guest_handle(reservation.extent_start, &mfn);
+       set_pte_at(&init_mm, addr, pte, __pte_ma(0));
+       set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
+       ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+       BUG_ON(ret != 1);
+       return 0;
+}
 
-    balloon_lock(flags);
-    if ( HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
-                               mfn_list, nr_mfns, 0) != nr_mfns )
-        BUG();
-    current_pages -= nr_mfns; /* non-atomic update */
-    balloon_unlock(flags);
+struct page *balloon_alloc_empty_page_range(unsigned long nr_pages)
+{
+       unsigned long vstart, flags;
+       unsigned int  order = get_order(nr_pages * PAGE_SIZE);
+       int ret;
+       unsigned long i;
+       struct page *page;
+
+       vstart = __get_free_pages(GFP_KERNEL, order);
+       if (vstart == 0)
+               return NULL;
+
+       scrub_pages(vstart, 1 << order);
+
+       balloon_lock(flags);
+       if (xen_feature(XENFEAT_auto_translated_physmap)) {
+               unsigned long gmfn = __pa(vstart) >> PAGE_SHIFT;
+               struct xen_memory_reservation reservation = {
+                       .nr_extents   = 1,
+                       .extent_order = order,
+                       .domid        = DOMID_SELF
+               };
+               set_xen_guest_handle(reservation.extent_start, &gmfn);
+               ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+                                          &reservation);
+               if (ret == -ENOSYS)
+                       goto err;
+               BUG_ON(ret != 1);
+       } else {
+               ret = apply_to_page_range(&init_mm, vstart, PAGE_SIZE << order,
+                                         dealloc_pte_fn, NULL);
+               if (ret == -ENOSYS)
+                       goto err;
+               BUG_ON(ret);
+       }
+       current_pages -= 1UL << order;
+       totalram_pages = current_pages;
+       balloon_unlock(flags);
+
+       schedule_work(&balloon_worker);
+
+       flush_tlb_all();
+
+       page = virt_to_page(vstart);
+
+       for (i = 0; i < (1UL << order); i++)
+               init_page_count(page + i);
+
+       return page;
+
+ err:
+       free_pages(vstart, order);
+       balloon_unlock(flags);
+       return NULL;
+}
 
-    schedule_work(&balloon_worker);
+void balloon_dealloc_empty_page_range(
+       struct page *page, unsigned long nr_pages)
+{
+       unsigned long i, flags;
+       unsigned int  order = get_order(nr_pages * PAGE_SIZE);
+
+       balloon_lock(flags);
+       for (i = 0; i < (1UL << order); i++) {
+               BUG_ON(page_count(page + i) != 1);
+               balloon_append(page + i);
+       }
+       balloon_unlock(flags);
+
+       schedule_work(&balloon_worker);
 }
 
-EXPORT_SYMBOL(balloon_update_driver_allowance);
-EXPORT_SYMBOL(balloon_put_pages);
+EXPORT_SYMBOL_GPL(balloon_update_driver_allowance);
+EXPORT_SYMBOL_GPL(balloon_alloc_empty_page_range);
+EXPORT_SYMBOL_GPL(balloon_dealloc_empty_page_range);
+
+MODULE_LICENSE("Dual BSD/GPL");