#include <linux/config.h>
#include <linux/mm.h>
+#include <linux/hugetlb.h>
#include <linux/mman.h>
#include <linux/slab.h>
#include <linux/kernel_stat.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <linux/swapops.h>
+#include <linux/vs_base.h>
spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
unsigned int nr_swapfiles;
static DECLARE_MUTEX(swapon_sem);
/*
- * Array of backing blockdevs, for swap_unplug_fn. We need this because the
- * bdev->unplug_fn can sleep and we cannot hold swap_list_lock while calling
- * the unplug_fn. And swap_list_lock cannot be turned into a semaphore.
+ * We need this because the bdev->unplug_fn can sleep and we cannot
+ * hold swap_list_lock while calling the unplug_fn. And swap_list_lock
+ * cannot be turned into a semaphore.
*/
-static DECLARE_MUTEX(swap_bdevs_sem);
-static struct block_device *swap_bdevs[MAX_SWAPFILES];
+static DECLARE_RWSEM(swap_unplug_sem);
#define SWAPFILE_CLUSTER 256
-/*
- * Caller holds swap_bdevs_sem
- */
-static void install_swap_bdev(struct block_device *bdev)
-{
- int i;
-
- for (i = 0; i < MAX_SWAPFILES; i++) {
- if (swap_bdevs[i] == NULL) {
- swap_bdevs[i] = bdev;
- return;
- }
- }
- BUG();
-}
-
-static void remove_swap_bdev(struct block_device *bdev)
-{
- int i;
-
- for (i = 0; i < MAX_SWAPFILES; i++) {
- if (swap_bdevs[i] == bdev) {
- memcpy(&swap_bdevs[i], &swap_bdevs[i + 1],
- (MAX_SWAPFILES - i - 1) * sizeof(*swap_bdevs));
- swap_bdevs[MAX_SWAPFILES - 1] = NULL;
- return;
- }
- }
- BUG();
-}
-
-/*
- * Unlike a standard unplug_io_fn, swap_unplug_io_fn is never called
- * through swap's backing_dev_info (which is only used by shrink_list),
- * but directly from sync_page when PageSwapCache: and takes the page
- * as argument, so that it can find the right device from swp_entry_t.
- */
-void swap_unplug_io_fn(struct page *page)
+void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
{
swp_entry_t entry;
- down(&swap_bdevs_sem);
+ down_read(&swap_unplug_sem);
entry.val = page->private;
if (PageSwapCache(page)) {
- struct block_device *bdev = swap_bdevs[swp_type(entry)];
+ struct block_device *bdev = swap_info[swp_type(entry)].bdev;
struct backing_dev_info *bdi;
- if (bdev) {
- bdi = bdev->bd_inode->i_mapping->backing_dev_info;
- (*bdi->unplug_io_fn)(bdi);
- }
+ /*
+ * If the page is removed from swapcache from under us (with a
+ * racy try_to_unuse/swapoff) we need an additional reference
+ * count to avoid reading garbage from page->private above. If
+ * the WARN_ON triggers during a swapoff it maybe the race
+ * condition and it's harmless. However if it triggers without
+ * swapoff it signals a problem.
+ */
+ WARN_ON(page_count(page) <= 1);
+
+ bdi = bdev->bd_inode->i_mapping->backing_dev_info;
+ bdi->unplug_io_fn(bdi, page);
}
- up(&swap_bdevs_sem);
+ up_read(&swap_unplug_sem);
}
static inline int scan_swap_map(struct swap_info_struct *si)
/* Is the only swap cache user the cache itself? */
if (p->swap_map[swp_offset(entry)] == 1) {
/* Recheck the page count with the swapcache lock held.. */
- spin_lock(&swapper_space.tree_lock);
+ spin_lock_irq(&swapper_space.tree_lock);
if (page_count(page) == 2)
retval = 1;
- spin_unlock(&swapper_space.tree_lock);
+ spin_unlock_irq(&swapper_space.tree_lock);
}
swap_info_put(p);
}
retval = 0;
if (p->swap_map[swp_offset(entry)] == 1) {
/* Recheck the page count with the swapcache lock held.. */
- spin_lock(&swapper_space.tree_lock);
+ spin_lock_irq(&swapper_space.tree_lock);
if ((page_count(page) == 2) && !PageWriteback(page)) {
__delete_from_swap_cache(page);
SetPageDirty(page);
retval = 1;
}
- spin_unlock(&swapper_space.tree_lock);
+ spin_unlock_irq(&swapper_space.tree_lock);
}
swap_info_put(p);
p = swap_info_get(entry);
if (p) {
if (swap_entry_free(p, swp_offset(entry)) == 1) {
- spin_lock(&swapper_space.tree_lock);
+ spin_lock_irq(&swapper_space.tree_lock);
page = radix_tree_lookup(&swapper_space.page_tree,
entry.val);
if (page && TestSetPageLocked(page))
page = NULL;
- spin_unlock(&swapper_space.tree_lock);
+ spin_unlock_irq(&swapper_space.tree_lock);
}
swap_info_put(p);
}
/* vma->vm_mm->page_table_lock is held */
static void
unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir,
- swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp)
+ swp_entry_t entry, struct page *page)
{
// vma->vm_mm->rss++;
vx_rsspages_inc(vma->vm_mm);
get_page(page);
set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
- *pte_chainp = page_add_rmap(page, dir, *pte_chainp);
+ page_add_anon_rmap(page, vma, address);
swap_free(entry);
}
/* vma->vm_mm->page_table_lock is held */
-static int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
+static unsigned long unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
unsigned long address, unsigned long size, unsigned long offset,
- swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp)
+ swp_entry_t entry, struct page *page)
{
pte_t * pte;
unsigned long end;
* Test inline before going to call unuse_pte.
*/
if (unlikely(pte_same(*pte, swp_pte))) {
- unuse_pte(vma, offset + address, pte,
- entry, page, pte_chainp);
+ unuse_pte(vma, offset + address, pte, entry, page);
pte_unmap(pte);
- return 1;
+ /* add 1 since address may be 0 */
+ return 1 + offset + address;
}
address += PAGE_SIZE;
pte++;
}
/* vma->vm_mm->page_table_lock is held */
-static int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
+static unsigned long unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
unsigned long address, unsigned long size,
- swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp)
+ swp_entry_t entry, struct page *page)
{
pmd_t * pmd;
unsigned long offset, end;
+ unsigned long foundaddr;
if (pgd_none(*dir))
return 0;
if (address >= end)
BUG();
do {
- if (unuse_pmd(vma, pmd, address, end - address,
- offset, entry, page, pte_chainp))
- return 1;
+ foundaddr = unuse_pmd(vma, pmd, address, end - address,
+ offset, entry, page);
+ if (foundaddr)
+ return foundaddr;
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
} while (address && (address < end));
}
/* vma->vm_mm->page_table_lock is held */
-static int unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
- swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp)
+static unsigned long unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
+ swp_entry_t entry, struct page *page)
{
unsigned long start = vma->vm_start, end = vma->vm_end;
+ unsigned long foundaddr;
if (start >= end)
BUG();
do {
- if (unuse_pgd(vma, pgdir, start, end - start,
- entry, page, pte_chainp))
- return 1;
+ foundaddr = unuse_pgd(vma, pgdir, start, end - start,
+ entry, page);
+ if (foundaddr)
+ return foundaddr;
start = (start + PGDIR_SIZE) & PGDIR_MASK;
pgdir++;
} while (start && (start < end));
swp_entry_t entry, struct page* page)
{
struct vm_area_struct* vma;
- struct pte_chain *pte_chain;
-
- pte_chain = pte_chain_alloc(GFP_KERNEL);
- if (!pte_chain)
- return -ENOMEM;
+ unsigned long foundaddr = 0;
/*
* Go through process' page directory.
*/
+ down_read(&mm->mmap_sem);
spin_lock(&mm->page_table_lock);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
- pgd_t * pgd = pgd_offset(mm, vma->vm_start);
- if (unuse_vma(vma, pgd, entry, page, &pte_chain))
- break;
+ if (!is_vm_hugetlb_page(vma)) {
+ pgd_t * pgd = pgd_offset(mm, vma->vm_start);
+ foundaddr = unuse_vma(vma, pgd, entry, page);
+ if (foundaddr)
+ break;
+ }
}
spin_unlock(&mm->page_table_lock);
- pte_chain_free(pte_chain);
+ up_read(&mm->mmap_sem);
+ /*
+ * Currently unuse_process cannot fail, but leave error handling
+ * at call sites for now, since we change it from time to time.
+ */
return 0;
}
*/
swap_map = &si->swap_map[i];
entry = swp_entry(type, i);
- page = read_swap_cache_async(entry);
+ page = read_swap_cache_async(entry, NULL, 0);
if (!page) {
/*
* Either swap_duplicate() failed because entry
current->flags |= PF_SWAPOFF;
err = try_to_unuse(type);
current->flags &= ~PF_SWAPOFF;
+
+ /* wait for any unplug function to finish */
+ down_write(&swap_unplug_sem);
+ up_write(&swap_unplug_sem);
+
if (err) {
/* re-insert swap space back into swap_list */
swap_list_lock();
goto out_dput;
}
down(&swapon_sem);
- down(&swap_bdevs_sem);
swap_list_lock();
swap_device_lock(p);
swap_file = p->swap_file;
destroy_swap_extents(p);
swap_device_unlock(p);
swap_list_unlock();
- remove_swap_bdev(p->bdev);
- up(&swap_bdevs_sem);
up(&swapon_sem);
vfree(swap_map);
if (S_ISBLK(mapping->host->i_mode)) {
goto bad_swap;
down(&swapon_sem);
- down(&swap_bdevs_sem);
swap_list_lock();
swap_device_lock(p);
p->flags = SWP_ACTIVE;
}
swap_device_unlock(p);
swap_list_unlock();
- install_swap_bdev(p->bdev);
- up(&swap_bdevs_sem);
up(&swapon_sem);
error = 0;
goto out;