Merge to Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.13-vs2...
[linux-2.6.git] / mm / fremap.c
index 2c8abe6..414b5bc 100644 (file)
 #include <linux/swapops.h>
 #include <linux/rmap.h>
 #include <linux/module.h>
+#include <linux/syscalls.h>
+#include <linux/vs_memory.h>
 
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
-static inline void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
+static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
                        unsigned long addr, pte_t *ptep)
 {
        pte_t pte = *ptep;
+       struct page *page = NULL;
 
-       if (pte_none(pte))
-               return;
        if (pte_present(pte)) {
-               unsigned long pfn = pte_pfn(pte);
-
-               flush_cache_page(vma, addr);
+               flush_cache_page(vma, addr, pte_pfn(pte));
                pte = ptep_clear_flush(vma, addr, ptep);
-               if (pfn_valid(pfn)) {
-                       struct page *page = pfn_to_page(pfn);
-                       if (!PageReserved(page)) {
-                               if (pte_dirty(pte))
-                                       set_page_dirty(page);
-                               page_remove_rmap(page, ptep);
-                               page_cache_release(page);
-                               mm->rss--;
-                       }
+               page = vm_normal_page(vma, addr, pte);
+               if (page) {
+                       if (pte_dirty(pte))
+                               set_page_dirty(page);
+                       page_remove_rmap(page);
+                       page_cache_release(page);
                }
        } else {
                if (!pte_file(pte))
                        free_swap_and_cache(pte_to_swp_entry(pte));
-               pte_clear(ptep);
+               pte_clear(mm, addr, ptep);
        }
+       return !!page;
 }
 
 /*
- * Install a page to a given virtual memory address, release any
+ * Install a file page to a given virtual memory address, release any
  * previously existing mapping.
  */
 int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
                unsigned long addr, struct page *page, pgprot_t prot)
 {
+       struct inode *inode;
+       pgoff_t size;
        int err = -ENOMEM;
        pte_t *pte;
-       pgd_t *pgd;
-       pmd_t *pmd;
        pte_t pte_val;
-       struct pte_chain *pte_chain;
-
-       pte_chain = pte_chain_alloc(GFP_KERNEL);
-       if (!pte_chain)
-               goto err;
-       pgd = pgd_offset(mm, addr);
-       spin_lock(&mm->page_table_lock);
+       spinlock_t *ptl;
 
-       pmd = pmd_alloc(mm, pgd, addr);
-       if (!pmd)
-               goto err_unlock;
-
-       pte = pte_alloc_map(mm, pmd, addr);
+       pte = get_locked_pte(mm, addr, &ptl);
        if (!pte)
-               goto err_unlock;
+               goto out;
+
+       /*
+        * This page may have been truncated. Tell the
+        * caller about it.
+        */
+       err = -EINVAL;
+       if (vma->vm_file) {
+               inode = vma->vm_file->f_mapping->host;
+               size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+               if (!page->mapping || page->index >= size)
+                       goto unlock;
+               err = -ENOMEM;
+               if (page_mapcount(page) > INT_MAX/2)
+                       goto unlock;
+               if (!vx_rsspages_avail(mm, 1))
+                       goto unlock;
+       }
 
-       zap_pte(mm, vma, addr, pte);
+       if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte))
+               inc_mm_counter(mm, file_rss);
 
-       mm->rss++;
        flush_icache_page(vma, page);
-       set_pte(pte, mk_pte(page, prot));
-       pte_chain = page_add_rmap(page, pte, pte_chain);
+       set_pte_at(mm, addr, pte, mk_pte(page, prot));
+       page_add_file_rmap(page);
        pte_val = *pte;
-       pte_unmap(pte);
        update_mmu_cache(vma, addr, pte_val);
-       spin_unlock(&mm->page_table_lock);
-       pte_chain_free(pte_chain);
-       return 0;
-
-err_unlock:
-       spin_unlock(&mm->page_table_lock);
-       pte_chain_free(pte_chain);
-err:
+       err = 0;
+unlock:
+       pte_unmap_unlock(pte, ptl);
+out:
        return err;
 }
 EXPORT_SYMBOL(install_page);
 
-
 /*
  * Install a file pte to a given virtual memory address, release any
  * previously existing mapping.
@@ -107,36 +105,27 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
 {
        int err = -ENOMEM;
        pte_t *pte;
-       pgd_t *pgd;
-       pmd_t *pmd;
        pte_t pte_val;
+       spinlock_t *ptl;
 
-       pgd = pgd_offset(mm, addr);
-       spin_lock(&mm->page_table_lock);
-
-       pmd = pmd_alloc(mm, pgd, addr);
-       if (!pmd)
-               goto err_unlock;
-
-       pte = pte_alloc_map(mm, pmd, addr);
+       pte = get_locked_pte(mm, addr, &ptl);
        if (!pte)
-               goto err_unlock;
+               goto out;
 
-       zap_pte(mm, vma, addr, pte);
+       if (!pte_none(*pte) && zap_pte(mm, vma, addr, pte)) {
+               update_hiwater_rss(mm);
+               dec_mm_counter(mm, file_rss);
+       }
 
-       set_pte(pte, pgoff_to_pte(pgoff));
+       set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff));
        pte_val = *pte;
-       pte_unmap(pte);
        update_mmu_cache(vma, addr, pte_val);
-       spin_unlock(&mm->page_table_lock);
-       return 0;
-
-err_unlock:
-       spin_unlock(&mm->page_table_lock);
+       pte_unmap_unlock(pte, ptl);
+       err = 0;
+out:
        return err;
 }
 
-
 /***
  * sys_remap_file_pages - remap arbitrary pages of a shared backing store
  *                        file within an existing vma.
@@ -159,9 +148,11 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
        unsigned long __prot, unsigned long pgoff, unsigned long flags)
 {
        struct mm_struct *mm = current->mm;
+       struct address_space *mapping;
        unsigned long end = start + size;
        struct vm_area_struct *vma;
        int err = -EINVAL;
+       int has_write_lock = 0;
 
        if (__prot)
                return err;
@@ -182,25 +173,41 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
 #endif
 
        /* We need down_write() to change vma->vm_flags. */
-       down_write(&mm->mmap_sem);
+       down_read(&mm->mmap_sem);
+ retry:
        vma = find_vma(mm, start);
 
        /*
         * Make sure the vma is shared, that it supports prefaulting,
         * and that the remapped range is valid and fully within
-        * the single existing vma:
+        * the single existing vma.  vm_private_data is used as a
+        * swapout cursor in a VM_NONLINEAR vma.
         */
        if (vma && (vma->vm_flags & VM_SHARED) &&
+               (!vma->vm_private_data || (vma->vm_flags & VM_NONLINEAR)) &&
                vma->vm_ops && vma->vm_ops->populate &&
                        end > start && start >= vma->vm_start &&
                                end <= vma->vm_end) {
 
                /* Must set VM_NONLINEAR before any pages are populated. */
-               if (pgoff != ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff)
+               if (pgoff != linear_page_index(vma, start) &&
+                   !(vma->vm_flags & VM_NONLINEAR)) {
+                       if (!has_write_lock) {
+                               up_read(&mm->mmap_sem);
+                               down_write(&mm->mmap_sem);
+                               has_write_lock = 1;
+                               goto retry;
+                       }
+                       mapping = vma->vm_file->f_mapping;
+                       spin_lock(&mapping->i_mmap_lock);
+                       flush_dcache_mmap_lock(mapping);
                        vma->vm_flags |= VM_NONLINEAR;
+                       vma_prio_tree_remove(vma, &mapping->i_mmap);
+                       vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
+                       flush_dcache_mmap_unlock(mapping);
+                       spin_unlock(&mapping->i_mmap_lock);
+               }
 
-               /* ->populate can take a long time, so downgrade the lock. */
-               downgrade_write(&mm->mmap_sem);
                err = vma->vm_ops->populate(vma, start, size,
                                            vma->vm_page_prot,
                                            pgoff, flags & MAP_NONBLOCK);
@@ -210,10 +217,11 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
                 * it after ->populate completes, and that would prevent
                 * downgrading the lock.  (Locks can't be upgraded).
                 */
+       }
+       if (likely(!has_write_lock))
                up_read(&mm->mmap_sem);
-       } else {
+       else
                up_write(&mm->mmap_sem);
-       }
 
        return err;
 }