1 /******************************************************************************
4 * Xen balloon driver - enables returning/claiming memory to/from Xen.
6 * Copyright (c) 2003, B Dragovic
7 * Copyright (c) 2003-2004, M Williamson, K Fraser
8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license:
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
35 #include <linux/kernel.h>
36 #include <linux/module.h>
37 #include <linux/sched.h>
38 #include <linux/errno.h>
40 #include <linux/mman.h>
41 #include <linux/smp_lock.h>
42 #include <linux/pagemap.h>
43 #include <linux/bootmem.h>
44 #include <linux/highmem.h>
45 #include <linux/vmalloc.h>
46 #include <xen/xen_proc.h>
47 #include <asm/hypervisor.h>
48 #include <xen/balloon.h>
49 #include <xen/interface/memory.h>
50 #include <asm/pgalloc.h>
51 #include <asm/pgtable.h>
52 #include <asm/uaccess.h>
54 #include <linux/list.h>
56 #include <xen/xenbus.h>
58 #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
61 static struct proc_dir_entry *balloon_pde;
64 static DECLARE_MUTEX(balloon_mutex);
67 * Protects atomic reservation decrease/increase against concurrent increases.
68 * Also protects non-atomic updates of current_pages and driver_pages, and
71 DEFINE_SPINLOCK(balloon_lock);
73 /* We aim for 'current allocation' == 'target allocation'. */
74 static unsigned long current_pages;
75 static unsigned long target_pages;
77 /* We increase/decrease in batches which fit in a page */
78 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
80 /* VM /proc information for memory */
81 extern unsigned long totalram_pages;
83 /* We may hit the hard limit in Xen. If we do then we remember it. */
84 static unsigned long hard_limit;
87 * Drivers may alter the memory reservation independently, but they must
88 * inform the balloon driver so that we can avoid hitting the hard limit.
90 static unsigned long driver_pages;
92 /* List of ballooned pages, threaded through the mem_map array. */
93 static LIST_HEAD(ballooned_pages);
94 static unsigned long balloon_low, balloon_high;
96 /* Main work function, always executed in process context. */
97 static void balloon_process(void *unused);
98 static DECLARE_WORK(balloon_worker, balloon_process, NULL);
99 static struct timer_list balloon_timer;
101 /* When ballooning out (allocating memory to return to Xen) we don't really
102 want the kernel to try too hard since that can trigger the oom killer. */
103 #define GFP_BALLOON \
104 (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
106 #define PAGE_TO_LIST(p) (&(p)->lru)
107 #define LIST_TO_PAGE(l) list_entry((l), struct page, lru)
108 #define UNLIST_PAGE(p) \
110 list_del(PAGE_TO_LIST(p)); \
111 PAGE_TO_LIST(p)->next = NULL; \
112 PAGE_TO_LIST(p)->prev = NULL; \
115 #define IPRINTK(fmt, args...) \
116 printk(KERN_INFO "xen_mem: " fmt, ##args)
117 #define WPRINTK(fmt, args...) \
118 printk(KERN_WARNING "xen_mem: " fmt, ##args)
120 /* balloon_append: add the given page to the balloon. */
121 static void balloon_append(struct page *page)
123 /* Lowmem is re-populated first, so highmem pages go at list tail. */
124 if (PageHighMem(page)) {
125 list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
128 list_add(PAGE_TO_LIST(page), &ballooned_pages);
133 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
134 static struct page *balloon_retrieve(void)
138 if (list_empty(&ballooned_pages))
141 page = LIST_TO_PAGE(ballooned_pages.next);
144 if (PageHighMem(page))
152 static struct page *balloon_first_page(void)
154 if (list_empty(&ballooned_pages))
156 return LIST_TO_PAGE(ballooned_pages.next);
159 static struct page *balloon_next_page(struct page *page)
161 struct list_head *next = PAGE_TO_LIST(page)->next;
162 if (next == &ballooned_pages)
164 return LIST_TO_PAGE(next);
167 static void balloon_alarm(unsigned long unused)
169 schedule_work(&balloon_worker);
172 static unsigned long current_target(void)
174 unsigned long target = min(target_pages, hard_limit);
175 if (target > (current_pages + balloon_low + balloon_high))
176 target = current_pages + balloon_low + balloon_high;
180 static int increase_reservation(unsigned long nr_pages)
182 unsigned long pfn, i, flags;
185 struct xen_memory_reservation reservation = {
191 if (nr_pages > ARRAY_SIZE(frame_list))
192 nr_pages = ARRAY_SIZE(frame_list);
196 page = balloon_first_page();
197 for (i = 0; i < nr_pages; i++) {
198 BUG_ON(page == NULL);
199 frame_list[i] = page_to_pfn(page);;
200 page = balloon_next_page(page);
203 set_xen_guest_handle(reservation.extent_start, frame_list);
204 reservation.nr_extents = nr_pages;
205 rc = HYPERVISOR_memory_op(
206 XENMEM_populate_physmap, &reservation);
211 /* We hit the Xen hard limit: reprobe. */
212 reservation.nr_extents = rc;
213 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
218 hard_limit = current_pages + rc - driver_pages;
222 for (i = 0; i < nr_pages; i++) {
223 page = balloon_retrieve();
224 BUG_ON(page == NULL);
226 pfn = page_to_pfn(page);
227 BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
228 phys_to_machine_mapping_valid(pfn));
230 /* Update P->M and M->P tables. */
231 set_phys_to_machine(pfn, frame_list[i]);
232 xen_machphys_update(frame_list[i], pfn);
234 /* Link back into the page tables if not highmem. */
235 if (pfn < max_low_pfn) {
237 ret = HYPERVISOR_update_va_mapping(
238 (unsigned long)__va(pfn << PAGE_SHIFT),
239 pfn_pte_ma(frame_list[i], PAGE_KERNEL),
244 /* Relinquish the page back to the allocator. */
245 ClearPageReserved(page);
246 init_page_count(page);
250 current_pages += nr_pages;
251 totalram_pages = current_pages;
254 balloon_unlock(flags);
259 static int decrease_reservation(unsigned long nr_pages)
261 unsigned long pfn, i, flags;
266 struct xen_memory_reservation reservation = {
272 if (nr_pages > ARRAY_SIZE(frame_list))
273 nr_pages = ARRAY_SIZE(frame_list);
275 for (i = 0; i < nr_pages; i++) {
276 if ((page = alloc_page(GFP_BALLOON)) == NULL) {
282 pfn = page_to_pfn(page);
283 frame_list[i] = pfn_to_mfn(pfn);
285 if (!PageHighMem(page)) {
286 v = phys_to_virt(pfn << PAGE_SHIFT);
288 ret = HYPERVISOR_update_va_mapping(
289 (unsigned long)v, __pte_ma(0), 0);
292 #ifdef CONFIG_XEN_SCRUB_PAGES
301 /* Ensure that ballooned highmem pages don't have kmaps. */
307 /* No more mappings: invalidate P2M and add to balloon. */
308 for (i = 0; i < nr_pages; i++) {
309 pfn = mfn_to_pfn(frame_list[i]);
310 set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
311 balloon_append(pfn_to_page(pfn));
314 set_xen_guest_handle(reservation.extent_start, frame_list);
315 reservation.nr_extents = nr_pages;
316 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
317 BUG_ON(ret != nr_pages);
319 current_pages -= nr_pages;
320 totalram_pages = current_pages;
322 balloon_unlock(flags);
328 * We avoid multiple worker processes conflicting via the balloon mutex.
329 * We may of course race updates of the target counts (which are protected
330 * by the balloon lock), or with changes to the Xen hard limit, but we will
331 * recover from these in time.
333 static void balloon_process(void *unused)
338 down(&balloon_mutex);
341 credit = current_target() - current_pages;
343 need_sleep = (increase_reservation(credit) != 0);
345 need_sleep = (decrease_reservation(-credit) != 0);
347 #ifndef CONFIG_PREEMPT
351 } while ((credit != 0) && !need_sleep);
353 /* Schedule more work if there is some still to be done. */
354 if (current_target() != current_pages)
355 mod_timer(&balloon_timer, jiffies + HZ);
360 /* Resets the Xen limit, sets new target, and kicks off processing. */
361 static void set_new_target(unsigned long target)
363 /* No need for lock. Not read-modify-write updates. */
365 target_pages = target;
366 schedule_work(&balloon_worker);
369 static struct xenbus_watch target_watch =
371 .node = "memory/target"
374 /* React to a change in the target key */
375 static void watch_target(struct xenbus_watch *watch,
376 const char **vec, unsigned int len)
378 unsigned long long new_target;
381 err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
383 /* This is ok (for domain0 at least) - so just return */
387 /* The given memory/target value is in KiB, so it needs converting to
388 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
390 set_new_target(new_target >> (PAGE_SHIFT - 10));
393 static int balloon_init_watcher(struct notifier_block *notifier,
399 err = register_xenbus_watch(&target_watch);
401 printk(KERN_ERR "Failed to set balloon watcher\n");
406 #ifdef CONFIG_PROC_FS
407 static int balloon_write(struct file *file, const char __user *buffer,
408 unsigned long count, void *data)
410 char memstring[64], *endchar;
411 unsigned long long target_bytes;
413 if (!capable(CAP_SYS_ADMIN))
417 return -EBADMSG; /* runt */
418 if (count > sizeof(memstring))
419 return -EFBIG; /* too long */
421 if (copy_from_user(memstring, buffer, count))
423 memstring[sizeof(memstring)-1] = '\0';
425 target_bytes = memparse(memstring, &endchar);
426 set_new_target(target_bytes >> PAGE_SHIFT);
431 static int balloon_read(char *page, char **start, off_t off,
432 int count, int *eof, void *data)
438 "Current allocation: %8lu kB\n"
439 "Requested target: %8lu kB\n"
440 "Low-mem balloon: %8lu kB\n"
441 "High-mem balloon: %8lu kB\n"
442 "Driver pages: %8lu kB\n"
444 PAGES2KB(current_pages), PAGES2KB(target_pages),
445 PAGES2KB(balloon_low), PAGES2KB(balloon_high),
446 PAGES2KB(driver_pages));
448 if (hard_limit != ~0UL)
449 len += sprintf(page + len, "%8lu kB\n", PAGES2KB(hard_limit));
451 len += sprintf(page + len, " ??? kB\n");
458 static struct notifier_block xenstore_notifier;
460 static int __init balloon_init(void)
465 if (!is_running_on_xen())
468 IPRINTK("Initialising balloon driver.\n");
470 current_pages = min(xen_start_info->nr_pages, max_pfn);
471 totalram_pages = current_pages;
472 target_pages = current_pages;
478 init_timer(&balloon_timer);
479 balloon_timer.data = 0;
480 balloon_timer.function = balloon_alarm;
482 #ifdef CONFIG_PROC_FS
483 if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
484 WPRINTK("Unable to create /proc/xen/balloon.\n");
488 balloon_pde->read_proc = balloon_read;
489 balloon_pde->write_proc = balloon_write;
492 /* Initialise the balloon with excess memory space. */
493 for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
494 page = pfn_to_page(pfn);
495 if (!PageReserved(page))
496 balloon_append(page);
499 target_watch.callback = watch_target;
500 xenstore_notifier.notifier_call = balloon_init_watcher;
502 register_xenstore_notifier(&xenstore_notifier);
507 subsys_initcall(balloon_init);
509 void balloon_update_driver_allowance(long delta)
514 driver_pages += delta;
515 balloon_unlock(flags);
518 static int dealloc_pte_fn(
519 pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
521 unsigned long mfn = pte_mfn(*pte);
523 struct xen_memory_reservation reservation = {
528 set_xen_guest_handle(reservation.extent_start, &mfn);
529 set_pte_at(&init_mm, addr, pte, __pte_ma(0));
530 set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
531 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
536 struct page **alloc_empty_pages_and_pagevec(int nr_pages)
538 unsigned long vaddr, flags;
539 struct page *page, **pagevec;
542 pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
546 for (i = 0; i < nr_pages; i++) {
547 page = pagevec[i] = alloc_page(GFP_KERNEL);
551 vaddr = (unsigned long)page_address(page);
553 scrub_pages(vaddr, 1);
557 if (xen_feature(XENFEAT_auto_translated_physmap)) {
558 unsigned long gmfn = page_to_pfn(page);
559 struct xen_memory_reservation reservation = {
564 set_xen_guest_handle(reservation.extent_start, &gmfn);
565 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
568 ret = 0; /* success */
570 ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE,
571 dealloc_pte_fn, NULL);
575 balloon_unlock(flags);
580 totalram_pages = --current_pages;
582 balloon_unlock(flags);
586 schedule_work(&balloon_worker);
593 balloon_append(pagevec[i]);
594 balloon_unlock(flags);
600 void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
609 for (i = 0; i < nr_pages; i++) {
610 BUG_ON(page_count(pagevec[i]) != 1);
611 balloon_append(pagevec[i]);
613 balloon_unlock(flags);
617 schedule_work(&balloon_worker);
620 void balloon_release_driver_page(struct page *page)
625 balloon_append(page);
627 balloon_unlock(flags);
629 schedule_work(&balloon_worker);
632 EXPORT_SYMBOL_GPL(balloon_update_driver_allowance);
633 EXPORT_SYMBOL_GPL(alloc_empty_pages_and_pagevec);
634 EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec);
635 EXPORT_SYMBOL_GPL(balloon_release_driver_page);
637 MODULE_LICENSE("Dual BSD/GPL");