fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / mm / swapfile.c
index 7fab939..8297157 100644 (file)
@@ -5,7 +5,6 @@
  *  Swap reorganised 29.12.95, Stephen Tweedie
  */
 
-#include <linux/config.h>
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/mman.h>
@@ -32,6 +31,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <linux/swapops.h>
+#include <linux/vs_base.h>
 #include <linux/vs_memory.h>
 
 DEFINE_SPINLOCK(swap_lock);
@@ -46,7 +46,7 @@ static const char Unused_offset[] = "Unused swap offset entry ";
 
 struct swap_list_t swap_list = {-1, -1};
 
-struct swap_info_struct swap_info[MAX_SWAPFILES];
+static struct swap_info_struct swap_info[MAX_SWAPFILES];
 
 static DEFINE_MUTEX(swapon_mutex);
 
@@ -117,7 +117,7 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
                                last_in_cluster = offset + SWAPFILE_CLUSTER;
                        else if (offset == last_in_cluster) {
                                spin_lock(&swap_lock);
-                               si->cluster_next = offset-SWAPFILE_CLUSTER-1;
+                               si->cluster_next = offset-SWAPFILE_CLUSTER+1;
                                goto cluster;
                        }
                        if (unlikely(--latency_ration < 0)) {
@@ -396,20 +396,29 @@ void free_swap_and_cache(swp_entry_t entry)
        struct swap_info_struct * p;
        struct page *page = NULL;
 
+       if (is_migration_entry(entry))
+               return;
+
        p = swap_info_get(entry);
        if (p) {
-               if (swap_entry_free(p, swp_offset(entry)) == 1)
-                       page = find_trylock_page(&swapper_space, entry.val);
+               if (swap_entry_free(p, swp_offset(entry)) == 1) {
+                       page = find_get_page(&swapper_space, entry.val);
+                       if (page && unlikely(TestSetPageLocked(page))) {
+                               page_cache_release(page);
+                               page = NULL;
+                       }
+               }
                spin_unlock(&swap_lock);
        }
        if (page) {
                int one_user;
 
                BUG_ON(PagePrivate(page));
-               page_cache_get(page);
                one_user = (page_count(page) == 2);
                /* Only cache user (+us), or swap space full? Free it! */
-               if (!PageWriteback(page) && (one_user || vm_swap_full())) {
+               /* Also recheck PageSwapCache after page is locked (above) */
+               if (PageSwapCache(page) && !PageWriteback(page) &&
+                                       (one_user || vm_swap_full())) {
                        delete_from_swap_cache(page);
                        SetPageDirty(page);
                }
@@ -418,6 +427,82 @@ void free_swap_and_cache(swp_entry_t entry)
        }
 }
 
+#ifdef CONFIG_SOFTWARE_SUSPEND
+/*
+ * Find the swap type that corresponds to given device (if any).
+ *
+ * @offset - number of the PAGE_SIZE-sized block of the device, starting
+ * from 0, in which the swap header is expected to be located.
+ *
+ * This is needed for the suspend to disk (aka swsusp).
+ */
+int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
+{
+       struct block_device *bdev = NULL;
+       int i;
+
+       if (device)
+               bdev = bdget(device);
+
+       spin_lock(&swap_lock);
+       for (i = 0; i < nr_swapfiles; i++) {
+               struct swap_info_struct *sis = swap_info + i;
+
+               if (!(sis->flags & SWP_WRITEOK))
+                       continue;
+
+               if (!bdev) {
+                       if (bdev_p)
+                               *bdev_p = sis->bdev;
+
+                       spin_unlock(&swap_lock);
+                       return i;
+               }
+               if (bdev == sis->bdev) {
+                       struct swap_extent *se;
+
+                       se = list_entry(sis->extent_list.next,
+                                       struct swap_extent, list);
+                       if (se->start_block == offset) {
+                               if (bdev_p)
+                                       *bdev_p = sis->bdev;
+
+                               spin_unlock(&swap_lock);
+                               bdput(bdev);
+                               return i;
+                       }
+               }
+       }
+       spin_unlock(&swap_lock);
+       if (bdev)
+               bdput(bdev);
+
+       return -ENODEV;
+}
+
+/*
+ * Return either the total number of swap pages of given type, or the number
+ * of free pages of that type (depending on @free)
+ *
+ * This is needed for software suspend
+ */
+unsigned int count_swap_pages(int type, int free)
+{
+       unsigned int n = 0;
+
+       if (type < nr_swapfiles) {
+               spin_lock(&swap_lock);
+               if (swap_info[type].flags & SWP_WRITEOK) {
+                       n = swap_info[type].pages;
+                       if (free)
+                               n -= swap_info[type].inuse_pages;
+               }
+               spin_unlock(&swap_lock);
+       }
+       return n;
+}
+#endif
+
 /*
  * No need to decide whether this PTE shares the swap entry with others,
  * just let do_wp_page work it out if a write is requested later - to
@@ -555,15 +640,6 @@ static int unuse_mm(struct mm_struct *mm,
        return 0;
 }
 
-#ifdef CONFIG_MIGRATION
-int remove_vma_swap(struct vm_area_struct *vma, struct page *page)
-{
-       swp_entry_t entry = { .val = page_private(page) };
-
-       return unuse_vma(vma, entry, page);
-}
-#endif
-
 /*
  * Scan swap_map from current position to next entry still in use.
  * Recycle to start on reaching the end, returning 0 when empty.
@@ -656,7 +732,6 @@ static int try_to_unuse(unsigned int type)
                 */
                swap_map = &si->swap_map[i];
                entry = swp_entry(type, i);
-again:
                page = read_swap_cache_async(entry, NULL, 0);
                if (!page) {
                        /*
@@ -691,12 +766,6 @@ again:
                wait_on_page_locked(page);
                wait_on_page_writeback(page);
                lock_page(page);
-               if (!PageSwapCache(page)) {
-                       /* Page migration has occured */
-                       unlock_page(page);
-                       page_cache_release(page);
-                       goto again;
-               }
                wait_on_page_writeback(page);
 
                /*
@@ -725,10 +794,8 @@ again:
                        while (*swap_map > 1 && !retval &&
                                        (p = p->next) != &start_mm->mmlist) {
                                mm = list_entry(p, struct mm_struct, mmlist);
-                               if (atomic_inc_return(&mm->mm_users) == 1) {
-                                       atomic_dec(&mm->mm_users);
+                               if (!atomic_inc_not_zero(&mm->mm_users))
                                        continue;
-                               }
                                spin_unlock(&mmlist_lock);
                                mmput(prev_mm);
                                prev_mm = mm;
@@ -886,6 +953,23 @@ sector_t map_swap_page(struct swap_info_struct *sis, pgoff_t offset)
        }
 }
 
+#ifdef CONFIG_SOFTWARE_SUSPEND
+/*
+ * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev
+ * corresponding to given index in swap_info (swap type).
+ */
+sector_t swapdev_block(int swap_type, pgoff_t offset)
+{
+       struct swap_info_struct *sis;
+
+       if (swap_type >= nr_swapfiles)
+               return 0;
+
+       sis = swap_info + swap_type;
+       return (sis->flags & SWP_WRITEOK) ? map_swap_page(sis, offset) : 0;
+}
+#endif /* CONFIG_SOFTWARE_SUSPEND */
+
 /*
  * Free all of a swapdev's extent information
  */
@@ -1229,10 +1313,13 @@ static void *swap_start(struct seq_file *swap, loff_t *pos)
 
        mutex_lock(&swapon_mutex);
 
+       if (!l)
+               return SEQ_START_TOKEN;
+
        for (i = 0; i < nr_swapfiles; i++, ptr++) {
                if (!(ptr->flags & SWP_USED) || !ptr->swap_map)
                        continue;
-               if (!l--)
+               if (!--l)
                        return ptr;
        }
 
@@ -1241,10 +1328,17 @@ static void *swap_start(struct seq_file *swap, loff_t *pos)
 
 static void *swap_next(struct seq_file *swap, void *v, loff_t *pos)
 {
-       struct swap_info_struct *ptr = v;
+       struct swap_info_struct *ptr;
        struct swap_info_struct *endptr = swap_info + nr_swapfiles;
 
-       for (++ptr; ptr < endptr; ptr++) {
+       if (v == SEQ_START_TOKEN)
+               ptr = swap_info;
+       else {
+               ptr = v;
+               ptr++;
+       }
+
+       for (; ptr < endptr; ptr++) {
                if (!(ptr->flags & SWP_USED) || !ptr->swap_map)
                        continue;
                ++*pos;
@@ -1265,14 +1359,16 @@ static int swap_show(struct seq_file *swap, void *v)
        struct file *file;
        int len;
 
-       if (v == swap_info)
-               seq_puts(swap, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
+       if (ptr == SEQ_START_TOKEN) {
+               seq_puts(swap,"Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
+               return 0;
+       }
 
        file = ptr->swap_file;
-       len = seq_path(swap, file->f_vfsmnt, file->f_dentry, " \t\n\\");
+       len = seq_path(swap, file->f_path.mnt, file->f_path.dentry, " \t\n\\");
        seq_printf(swap, "%*s%s\t%u\t%u\t%d\n",
                       len < 40 ? 40 - len : 1, " ",
-                      S_ISBLK(file->f_dentry->d_inode->i_mode) ?
+                      S_ISBLK(file->f_path.dentry->d_inode->i_mode) ?
                                "partition" : "file\t",
                       ptr->pages << (PAGE_SHIFT - 10),
                       ptr->inuse_pages << (PAGE_SHIFT - 10),
@@ -1280,7 +1376,7 @@ static int swap_show(struct seq_file *swap, void *v)
        return 0;
 }
 
-static struct seq_operations swaps_op = {
+static const struct seq_operations swaps_op = {
        .start =        swap_start,
        .next =         swap_next,
        .stop =         swap_stop,
@@ -1292,7 +1388,7 @@ static int swaps_open(struct inode *inode, struct file *file)
        return seq_open(file, &swaps_op);
 }
 
-static struct file_operations proc_swaps_operations = {
+static const struct file_operations proc_swaps_operations = {
        .open           = swaps_open,
        .read           = seq_read,
        .llseek         = seq_lseek,
@@ -1347,19 +1443,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
                if (!(p->flags & SWP_USED))
                        break;
        error = -EPERM;
-       /*
-        * Test if adding another swap device is possible. There are
-        * two limiting factors: 1) the number of bits for the swap
-        * type swp_entry_t definition and 2) the number of bits for
-        * the swap type in the swap ptes as defined by the different
-        * architectures. To honor both limitations a swap entry
-        * with swap offset 0 and swap type ~0UL is created, encoded
-        * to a swap pte, decoded to a swp_entry_t again and finally
-        * the swap type part is extracted. This will mask all bits
-        * from the initial ~0UL that can't be encoded in either the
-        * swp_entry_t or the architecture definition of a swap pte.
-        */
-       if (type > swp_type(pte_to_swp_entry(swp_entry_to_pte(swp_entry(~0UL,0))))) {
+       if (type >= MAX_SWAPFILES) {
                spin_unlock(&swap_lock);
                goto out;
        }
@@ -1444,8 +1528,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
                error = -EINVAL;
                goto bad_swap;
        }
-       page = read_cache_page(mapping, 0,
-                       (filler_t *)mapping->a_ops->readpage, swap_file);
+       page = read_mapping_page(mapping, 0, swap_file);
        if (IS_ERR(page)) {
                error = PTR_ERR(page);
                goto bad_swap;
@@ -1508,6 +1591,11 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
                error = -EINVAL;
                if (!maxpages)
                        goto bad_swap;
+               if (swapfilesize && maxpages > swapfilesize) {
+                       printk(KERN_WARNING
+                              "Swap area shorter than signature indicates\n");
+                       goto bad_swap;
+               }
                if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
                        goto bad_swap;
                if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
@@ -1535,12 +1623,6 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
                        goto bad_swap;
        }
 
-       if (swapfilesize && maxpages > swapfilesize) {
-               printk(KERN_WARNING
-                      "Swap area shorter than signature indicates\n");
-               error = -EINVAL;
-               goto bad_swap;
-       }
        if (nr_good_pages) {
                p->swap_map[0] = SWAP_MAP_BAD;
                p->max = maxpages;
@@ -1651,6 +1733,9 @@ int swap_duplicate(swp_entry_t entry)
        unsigned long offset, type;
        int result = 0;
 
+       if (is_migration_entry(entry))
+               return 1;
+
        type = swp_type(entry);
        if (type >= nr_swapfiles)
                goto bad_file;
@@ -1690,13 +1775,14 @@ get_swap_info_struct(unsigned type)
  */
 int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
 {
-       int ret = 0, i = 1 << page_cluster;
+       int our_page_cluster = page_cluster;
+       int ret = 0, i = 1 << our_page_cluster;
        unsigned long toff;
        struct swap_info_struct *swapdev = swp_type(entry) + swap_info;
 
-       if (!page_cluster)      /* no readahead */
+       if (!our_page_cluster)  /* no readahead */
                return 0;
-       toff = (swp_offset(entry) >> page_cluster) << page_cluster;
+       toff = (swp_offset(entry) >> our_page_cluster) << our_page_cluster;
        if (!toff)              /* first page is swap header */
                toff++, i--;
        *offset = toff;