This commit was manufactured by cvs2svn to create tag

[linux-2.6.git] / mm / swapfile.c
diff --git a/mm/swapfile.c b/mm/swapfile.c

index 1b4dae6..50c3a77 100644 (file)
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -25,11 +25,11 @@
  #include <linux/rmap.h>
  #include <linux/security.h>
  #include <linux/backing-dev.h>
-#include <linux/syscalls.h>
  
  #include <asm/pgtable.h>
  #include <asm/tlbflush.h>
  #include <linux/swapops.h>
+#include <linux/vs_base.h>
  #include <linux/vs_memory.h>
  
  spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
@@ -650,12 +650,11 @@ static int try_to_unuse(unsigned int type)
          *
          * A simpler strategy would be to start at the last mm we
          * freed the previous entry from; but that would take less
-        * advantage of mmlist ordering, which clusters forked mms
-        * together, child after parent.  If we race with dup_mmap(), we
-        * prefer to resolve parent before child, lest we miss entries
-        * duplicated after we scanned child: using last mm would invert
-        * that.  Though it's only a serious concern when an overflowed
-        * swap count is reset from SWAP_MAP_MAX, preventing a rescan.
+        * advantage of mmlist ordering (now preserved by swap_out()),
+        * which clusters forked address spaces together, most recent
+        * child immediately after parent.  If we race with dup_mmap(),
+        * we very much want to resolve parent before child, otherwise
+        * we may miss some entries: using last mm would invert that.
          */
         start_mm = &init_mm;
         atomic_inc(&init_mm.mm_users);
@@ -663,7 +662,15 @@ static int try_to_unuse(unsigned int type)
         /*
          * Keep on scanning until all entries have gone.  Usually,
          * one pass through swap_map is enough, but not necessarily:
-        * there are races when an instance of an entry might be missed.
+        * mmput() removes mm from mmlist before exit_mmap() and its
+        * zap_page_range().  That's not too bad, those entries are
+        * on their way out, and handled faster there than here.
+        * do_munmap() behaves similarly, taking the range out of mm's
+        * vma list before zap_page_range().  But unfortunately, when
+        * unmapping a part of a vma, it takes the whole out first,
+        * then reinserts what's left after (might even reschedule if
+        * open() method called) - so swap entries may be invisible
+        * to swapoff for a while, then reappear - but that is rare.
          */
         while ((i = find_next_to_unuse(si, i)) != 0) {
                 if (signal_pending(current)) {
@@ -715,7 +722,7 @@ static int try_to_unuse(unsigned int type)
                 wait_on_page_writeback(page);
  
                 /*
-                * Remove all references to entry.
+                * Remove all references to entry, without blocking.
                  * Whenever we reach init_mm, there's no address space
                  * to search, but use it as a reminder to search shmem.
                  */
@@ -740,10 +747,7 @@ static int try_to_unuse(unsigned int type)
                         while (*swap_map > 1 && !retval &&
                                         (p = p->next) != &start_mm->mmlist) {
                                 mm = list_entry(p, struct mm_struct, mmlist);
-                               if (atomic_inc_return(&mm->mm_users) == 1) {
-                                       atomic_dec(&mm->mm_users);
-                                       continue;
-                               }
+                               atomic_inc(&mm->mm_users);
                                 spin_unlock(&mmlist_lock);
                                 mmput(prev_mm);
                                 prev_mm = mm;
@@ -856,26 +860,6 @@ static int try_to_unuse(unsigned int type)
         return retval;
  }
  
-/*
- * After a successful try_to_unuse, if no swap is now in use, we know we
- * can empty the mmlist.  swap_list_lock must be held on entry and exit.
- * Note that mmlist_lock nests inside swap_list_lock, and an mm must be
- * added to the mmlist just after page_duplicate - before would be racy.
- */
-static void drain_mmlist(void)
-{
-       struct list_head *p, *next;
-       unsigned int i;
-
-       for (i = 0; i < nr_swapfiles; i++)
-               if (swap_info[i].inuse_pages)
-                       return;
-       spin_lock(&mmlist_lock);
-       list_for_each_safe(p, next, &init_mm.mmlist)
-               list_del_init(p);
-       spin_unlock(&mmlist_lock);
-}
-
  /*
   * Use this swapdev's extent info to locate the (PAGE_SIZE) block which
   * corresponds to page offset `offset'.
@@ -1190,7 +1174,6 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
         }
         down(&swapon_sem);
         swap_list_lock();
-       drain_mmlist();
         swap_device_lock(p);
         swap_file = p->swap_file;
         p->swap_file = NULL;
@@ -1376,7 +1359,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
         p->highest_bit = 0;
         p->cluster_nr = 0;
         p->inuse_pages = 0;
-       spin_lock_init(&p->sdev_lock);
+       p->sdev_lock = SPIN_LOCK_UNLOCKED;
         p->next = -1;
         if (swap_flags & SWAP_FLAG_PREFER) {
                 p->prio =