vserver 2.0 rc7
[linux-2.6.git] / mm / swapfile.c
index 2519963..e905448 100644 (file)
@@ -24,7 +24,6 @@
 #include <linux/module.h>
 #include <linux/rmap.h>
 #include <linux/security.h>
-#include <linux/acct.h>
 #include <linux/backing-dev.h>
 #include <linux/syscalls.h>
 
@@ -81,7 +80,7 @@ void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
                WARN_ON(page_count(page) <= 1);
 
                bdi = bdev->bd_inode->i_mapping->backing_dev_info;
-               bdi->unplug_io_fn(bdi, page);
+               blk_run_backing_dev(bdi, page);
        }
        up_read(&swap_unplug_sem);
 }
@@ -293,10 +292,10 @@ static int exclusive_swap_page(struct page *page)
                /* Is the only swap cache user the cache itself? */
                if (p->swap_map[swp_offset(entry)] == 1) {
                        /* Recheck the page count with the swapcache lock held.. */
-                       spin_lock_irq(&swapper_space.tree_lock);
+                       write_lock_irq(&swapper_space.tree_lock);
                        if (page_count(page) == 2)
                                retval = 1;
-                       spin_unlock_irq(&swapper_space.tree_lock);
+                       write_unlock_irq(&swapper_space.tree_lock);
                }
                swap_info_put(p);
        }
@@ -364,13 +363,13 @@ int remove_exclusive_swap_page(struct page *page)
        retval = 0;
        if (p->swap_map[swp_offset(entry)] == 1) {
                /* Recheck the page count with the swapcache lock held.. */
-               spin_lock_irq(&swapper_space.tree_lock);
+               write_lock_irq(&swapper_space.tree_lock);
                if ((page_count(page) == 2) && !PageWriteback(page)) {
                        __delete_from_swap_cache(page);
                        SetPageDirty(page);
                        retval = 1;
                }
-               spin_unlock_irq(&swapper_space.tree_lock);
+               write_unlock_irq(&swapper_space.tree_lock);
        }
        swap_info_put(p);
 
@@ -393,14 +392,8 @@ void free_swap_and_cache(swp_entry_t entry)
 
        p = swap_info_get(entry);
        if (p) {
-               if (swap_entry_free(p, swp_offset(entry)) == 1) {
-                       spin_lock_irq(&swapper_space.tree_lock);
-                       page = radix_tree_lookup(&swapper_space.page_tree,
-                               entry.val);
-                       if (page && TestSetPageLocked(page))
-                               page = NULL;
-                       spin_unlock_irq(&swapper_space.tree_lock);
-               }
+               if (swap_entry_free(p, swp_offset(entry)) == 1)
+                       page = find_trylock_page(&swapper_space, entry.val);
                swap_info_put(p);
        }
        if (page) {
@@ -420,170 +413,121 @@ void free_swap_and_cache(swp_entry_t entry)
 }
 
 /*
- * The swap entry has been read in advance, and we return 1 to indicate
- * that the page has been used or is no longer needed.
- *
  * Always set the resulting pte to be nowrite (the same as COW pages
  * after one process has exited).  We don't know just how many PTEs will
  * share this swap entry, so be cautious and let do_wp_page work out
  * what to do if a write is requested later.
+ *
+ * vma->vm_mm->page_table_lock is held.
  */
-/* vma->vm_mm->page_table_lock is held */
-static void
-unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir,
-       swp_entry_t entry, struct page *page)
+static void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
+               unsigned long addr, swp_entry_t entry, struct page *page)
 {
-       vx_rsspages_inc(vma->vm_mm);
+       inc_mm_counter(vma->vm_mm, rss);
        get_page(page);
-       set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
-       page_add_anon_rmap(page, vma, address);
+       set_pte_at(vma->vm_mm, addr, pte,
+                  pte_mkold(mk_pte(page, vma->vm_page_prot)));
+       page_add_anon_rmap(page, vma, addr);
        swap_free(entry);
-       acct_update_integrals();
-       update_mem_hiwater();
+       /*
+        * Move the page to the active list so it is not
+        * immediately swapped out again after swapon.
+        */
+       activate_page(page);
 }
 
-/* vma->vm_mm->page_table_lock is held */
-static unsigned long unuse_pmd(struct vm_area_struct *vma, pmd_t *dir,
-       unsigned long address, unsigned long end,
-       swp_entry_t entry, struct page *page)
+static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
+                               unsigned long addr, unsigned long end,
+                               swp_entry_t entry, struct page *page)
 {
        pte_t *pte;
        pte_t swp_pte = swp_entry_to_pte(entry);
 
-       if (pmd_none(*dir))
-               return 0;
-       if (pmd_bad(*dir)) {
-               pmd_ERROR(*dir);
-               pmd_clear(dir);
-               return 0;
-       }
-       pte = pte_offset_map(dir, address);
+       pte = pte_offset_map(pmd, addr);
        do {
                /*
                 * swapoff spends a _lot_ of time in this loop!
                 * Test inline before going to call unuse_pte.
                 */
                if (unlikely(pte_same(*pte, swp_pte))) {
-                       unuse_pte(vma, address, pte, entry, page);
+                       unuse_pte(vma, pte, addr, entry, page);
                        pte_unmap(pte);
-
-                       /*
-                        * Move the page to the active list so it is not
-                        * immediately swapped out again after swapon.
-                        */
-                       activate_page(page);
-
-                       /* add 1 since address may be 0 */
-                       return 1 + address;
+                       return 1;
                }
-               address += PAGE_SIZE;
-               pte++;
-       } while (address < end);
+       } while (pte++, addr += PAGE_SIZE, addr != end);
        pte_unmap(pte - 1);
        return 0;
 }
 
-/* vma->vm_mm->page_table_lock is held */
-static unsigned long unuse_pud(struct vm_area_struct *vma, pud_t *pud,
-        unsigned long address, unsigned long end,
-       swp_entry_t entry, struct page *page)
+static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
+                               unsigned long addr, unsigned long end,
+                               swp_entry_t entry, struct page *page)
 {
        pmd_t *pmd;
        unsigned long next;
-       unsigned long foundaddr;
 
-       if (pud_none(*pud))
-               return 0;
-       if (pud_bad(*pud)) {
-               pud_ERROR(*pud);
-               pud_clear(pud);
-               return 0;
-       }
-       pmd = pmd_offset(pud, address);
+       pmd = pmd_offset(pud, addr);
        do {
-               next = (address + PMD_SIZE) & PMD_MASK;
-               if (next > end || !next)
-                       next = end;
-               foundaddr = unuse_pmd(vma, pmd, address, next, entry, page);
-               if (foundaddr)
-                       return foundaddr;
-               address = next;
-               pmd++;
-       } while (address < end);
+               next = pmd_addr_end(addr, end);
+               if (pmd_none_or_clear_bad(pmd))
+                       continue;
+               if (unuse_pte_range(vma, pmd, addr, next, entry, page))
+                       return 1;
+       } while (pmd++, addr = next, addr != end);
        return 0;
 }
 
-/* vma->vm_mm->page_table_lock is held */
-static unsigned long unuse_pgd(struct vm_area_struct *vma, pgd_t *pgd,
-       unsigned long address, unsigned long end,
-       swp_entry_t entry, struct page *page)
+static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
+                               unsigned long addr, unsigned long end,
+                               swp_entry_t entry, struct page *page)
 {
        pud_t *pud;
        unsigned long next;
-       unsigned long foundaddr;
 
-       if (pgd_none(*pgd))
-               return 0;
-       if (pgd_bad(*pgd)) {
-               pgd_ERROR(*pgd);
-               pgd_clear(pgd);
-               return 0;
-       }
-       pud = pud_offset(pgd, address);
+       pud = pud_offset(pgd, addr);
        do {
-               next = (address + PUD_SIZE) & PUD_MASK;
-               if (next > end || !next)
-                       next = end;
-               foundaddr = unuse_pud(vma, pud, address, next, entry, page);
-               if (foundaddr)
-                       return foundaddr;
-               address = next;
-               pud++;
-       } while (address < end);
+               next = pud_addr_end(addr, end);
+               if (pud_none_or_clear_bad(pud))
+                       continue;
+               if (unuse_pmd_range(vma, pud, addr, next, entry, page))
+                       return 1;
+       } while (pud++, addr = next, addr != end);
        return 0;
 }
 
-/* vma->vm_mm->page_table_lock is held */
-static unsigned long unuse_vma(struct vm_area_struct *vma,
-       swp_entry_t entry, struct page *page)
+static int unuse_vma(struct vm_area_struct *vma,
+                               swp_entry_t entry, struct page *page)
 {
        pgd_t *pgd;
-       unsigned long address, next, end;
-       unsigned long foundaddr;
+       unsigned long addr, end, next;
 
        if (page->mapping) {
-               address = page_address_in_vma(page, vma);
-               if (address == -EFAULT)
+               addr = page_address_in_vma(page, vma);
+               if (addr == -EFAULT)
                        return 0;
                else
-                       end = address + PAGE_SIZE;
+                       end = addr + PAGE_SIZE;
        } else {
-               address = vma->vm_start;
+               addr = vma->vm_start;
                end = vma->vm_end;
        }
-       pgd = pgd_offset(vma->vm_mm, address);
+
+       pgd = pgd_offset(vma->vm_mm, addr);
        do {
-               next = (address + PGDIR_SIZE) & PGDIR_MASK;
-               if (next > end || !next)
-                       next = end;
-               foundaddr = unuse_pgd(vma, pgd, address, next, entry, page);
-               if (foundaddr)
-                       return foundaddr;
-               address = next;
-               pgd++;
-       } while (address < end);
+               next = pgd_addr_end(addr, end);
+               if (pgd_none_or_clear_bad(pgd))
+                       continue;
+               if (unuse_pud_range(vma, pgd, addr, next, entry, page))
+                       return 1;
+       } while (pgd++, addr = next, addr != end);
        return 0;
 }
 
-static int unuse_process(struct mm_struct * mm,
-                       swp_entry_t entry, struct page* page)
+static int unuse_mm(struct mm_struct *mm,
+                               swp_entry_t entry, struct page *page)
 {
-       struct vm_area_struct* vma;
-       unsigned long foundaddr = 0;
+       struct vm_area_struct *vma;
 
-       /*
-        * Go through process' page directory.
-        */
        if (!down_read_trylock(&mm->mmap_sem)) {
                /*
                 * Our reference to the page stops try_to_unmap_one from
@@ -595,16 +539,13 @@ static int unuse_process(struct mm_struct * mm,
        }
        spin_lock(&mm->page_table_lock);
        for (vma = mm->mmap; vma; vma = vma->vm_next) {
-               if (vma->anon_vma) {
-                       foundaddr = unuse_vma(vma, entry, page);
-                       if (foundaddr)
-                               break;
-               }
+               if (vma->anon_vma && unuse_vma(vma, entry, page))
+                       break;
        }
        spin_unlock(&mm->page_table_lock);
        up_read(&mm->mmap_sem);
        /*
-        * Currently unuse_process cannot fail, but leave error handling
+        * Currently unuse_mm cannot fail, but leave error handling
         * at call sites for now, since we change it from time to time.
         */
        return 0;
@@ -748,7 +689,7 @@ static int try_to_unuse(unsigned int type)
                        if (start_mm == &init_mm)
                                shmem = shmem_unuse(entry, page);
                        else
-                               retval = unuse_process(start_mm, entry, page);
+                               retval = unuse_mm(start_mm, entry, page);
                }
                if (*swap_map > 1) {
                        int set_start_mm = (*swap_map >= swcount);
@@ -780,7 +721,7 @@ static int try_to_unuse(unsigned int type)
                                        set_start_mm = 1;
                                        shmem = shmem_unuse(entry, page);
                                } else
-                                       retval = unuse_process(mm, entry, page);
+                                       retval = unuse_mm(mm, entry, page);
                                if (set_start_mm && *swap_map < swcount) {
                                        mmput(new_start_mm);
                                        atomic_inc(&mm->mm_users);