vserver 2.0 rc7
[linux-2.6.git] / mm / mremap.c
index 92142f5..1b0b71c 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/fs.h>
 #include <linux/highmem.h>
 #include <linux/security.h>
+#include <linux/syscalls.h>
 #include <linux/vs_memory.h>
 
 #include <asm/uaccess.h>
 static pte_t *get_one_pte_map_nested(struct mm_struct *mm, unsigned long addr)
 {
        pgd_t *pgd;
+       pud_t *pud;
        pmd_t *pmd;
        pte_t *pte = NULL;
 
        pgd = pgd_offset(mm, addr);
-       if (pgd_none(*pgd))
+       if (pgd_none_or_clear_bad(pgd))
                goto end;
-       if (pgd_bad(*pgd)) {
-               pgd_ERROR(*pgd);
-               pgd_clear(pgd);
-               goto end;
-       }
 
-       pmd = pmd_offset(pgd, addr);
-       if (pmd_none(*pmd))
+       pud = pud_offset(pgd, addr);
+       if (pud_none_or_clear_bad(pud))
                goto end;
-       if (pmd_bad(*pmd)) {
-               pmd_ERROR(*pmd);
-               pmd_clear(pmd);
+
+       pmd = pmd_offset(pud, addr);
+       if (pmd_none_or_clear_bad(pmd))
                goto end;
-       }
 
        pte = pte_offset_map_nested(pmd, addr);
        if (pte_none(*pte)) {
@@ -58,23 +54,37 @@ end:
 static pte_t *get_one_pte_map(struct mm_struct *mm, unsigned long addr)
 {
        pgd_t *pgd;
+       pud_t *pud;
        pmd_t *pmd;
 
        pgd = pgd_offset(mm, addr);
-       if (pgd_none(*pgd))
+       if (pgd_none_or_clear_bad(pgd))
                return NULL;
-       pmd = pmd_offset(pgd, addr);
-       if (!pmd_present(*pmd))
+
+       pud = pud_offset(pgd, addr);
+       if (pud_none_or_clear_bad(pud))
                return NULL;
+
+       pmd = pmd_offset(pud, addr);
+       if (pmd_none_or_clear_bad(pmd))
+               return NULL;
+
        return pte_offset_map(pmd, addr);
 }
 
 static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr)
 {
+       pgd_t *pgd;
+       pud_t *pud;
        pmd_t *pmd;
        pte_t *pte = NULL;
 
-       pmd = pmd_alloc(mm, pgd_offset(mm, addr), addr);
+       pgd = pgd_offset(mm, addr);
+
+       pud = pud_alloc(mm, pgd, addr);
+       if (!pud)
+               return NULL;
+       pmd = pmd_alloc(mm, pud, addr);
        if (pmd)
                pte = pte_alloc_map(mm, pmd, addr);
        return pte;
@@ -82,7 +92,7 @@ static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr)
 
 static int
 move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
-               unsigned long new_addr)
+               struct vm_area_struct *new_vma, unsigned long new_addr)
 {
        struct address_space *mapping = NULL;
        struct mm_struct *mm = vma->vm_mm;
@@ -98,6 +108,9 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
                 */
                mapping = vma->vm_file->f_mapping;
                spin_lock(&mapping->i_mmap_lock);
+               if (new_vma->vm_truncate_count &&
+                   new_vma->vm_truncate_count != vma->vm_truncate_count)
+                       new_vma->vm_truncate_count = 0;
        }
        spin_lock(&mm->page_table_lock);
 
@@ -129,7 +142,7 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
                        if (dst) {
                                pte_t pte;
                                pte = ptep_clear_flush(vma, old_addr, src);
-                               set_pte(dst, pte);
+                               set_pte_at(mm, new_addr, dst, pte);
                        } else
                                error = -ENOMEM;
                        pte_unmap_nested(src);
@@ -144,8 +157,8 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
 }
 
 static unsigned long move_page_tables(struct vm_area_struct *vma,
-               unsigned long new_addr, unsigned long old_addr,
-               unsigned long len)
+               unsigned long old_addr, struct vm_area_struct *new_vma,
+               unsigned long new_addr, unsigned long len)
 {
        unsigned long offset;
 
@@ -157,7 +170,8 @@ static unsigned long move_page_tables(struct vm_area_struct *vma,
         * only a few pages.. This also makes error recovery easier.
         */
        for (offset = 0; offset < len; offset += PAGE_SIZE) {
-               if (move_one_page(vma, old_addr+offset, new_addr+offset) < 0)
+               if (move_one_page(vma, old_addr + offset,
+                               new_vma, new_addr + offset) < 0)
                        break;
                cond_resched();
        }
@@ -188,14 +202,14 @@ static unsigned long move_vma(struct vm_area_struct *vma,
        if (!new_vma)
                return -ENOMEM;
 
-       moved_len = move_page_tables(vma, new_addr, old_addr, old_len);
+       moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len);
        if (moved_len < old_len) {
                /*
                 * On error, move entries back from new area to old,
                 * which will succeed since page tables still there,
                 * and then proceed to unmap new area instead of old.
                 */
-               move_page_tables(new_vma, old_addr, new_addr, moved_len);
+               move_page_tables(new_vma, new_addr, vma, old_addr, moved_len);
                vma = new_vma;
                old_len = new_len;
                old_addr = new_addr;
@@ -211,6 +225,12 @@ static unsigned long move_vma(struct vm_area_struct *vma,
                        split = 1;
        }
 
+       /*
+        * if we failed to move page tables we still do total_vm increment
+        * since do_munmap() will decrement it by old_len == new_len
+        */
+       vx_vmpages_add(mm, new_len >> PAGE_SHIFT);
+
        if (do_munmap(mm, old_addr, old_len) < 0) {
                /* OOM: unable to split vma, just get accounts right */
                vm_unacct_memory(excess >> PAGE_SHIFT);
@@ -224,11 +244,8 @@ static unsigned long move_vma(struct vm_area_struct *vma,
                        vma->vm_next->vm_flags |= VM_ACCOUNT;
        }
 
-       // mm->total_vm += new_len >> PAGE_SHIFT;
-       vx_vmpages_add(mm, new_len >> PAGE_SHIFT);
        __vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
        if (vm_flags & VM_LOCKED) {
-               // mm->locked_vm += new_len >> PAGE_SHIFT;
                vx_vmlocked_add(mm, new_len >> PAGE_SHIFT);
                if (new_len > old_len)
                        make_pages_present(new_addr + old_len,
@@ -330,23 +347,19 @@ unsigned long do_mremap(unsigned long addr,
        if (vma->vm_flags & VM_LOCKED) {
                unsigned long locked, lock_limit;
                locked = current->mm->locked_vm << PAGE_SHIFT;
-               lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur;
+               lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
                locked += new_len - old_len;
                ret = -EAGAIN;
                if (locked > lock_limit && !capable(CAP_IPC_LOCK))
                        goto out;
-               ret = -ENOMEM;
                if (!vx_vmlocked_avail(current->mm,
                        (new_len - old_len) >> PAGE_SHIFT))
                        goto out;
        }
-       ret = -ENOMEM;
-       if ((current->mm->total_vm << PAGE_SHIFT) + (new_len - old_len)
-           > current->rlim[RLIMIT_AS].rlim_cur)
-               goto out;
-       /* check context space, maybe only Private writable mapping? */
-       if (!vx_vmpages_avail(current->mm, (new_len - old_len) >> PAGE_SHIFT))
+       if (!may_expand_vm(current->mm, (new_len - old_len) >> PAGE_SHIFT)) {
+               ret = -ENOMEM;
                goto out;
+       }
 
        if (vma->vm_flags & VM_ACCOUNT) {
                charged = (new_len - old_len) >> PAGE_SHIFT;
@@ -370,12 +383,10 @@ unsigned long do_mremap(unsigned long addr,
                        vma_adjust(vma, vma->vm_start,
                                addr + new_len, vma->vm_pgoff, NULL);
 
-                       // current->mm->total_vm += pages;
                        vx_vmpages_add(current->mm, pages);
                        __vm_stat_account(vma->vm_mm, vma->vm_flags,
                                                        vma->vm_file, pages);
                        if (vma->vm_flags & VM_LOCKED) {
-                               // current->mm->locked_vm += pages;
                                vx_vmlocked_add(vma->vm_mm, pages);
                                make_pages_present(addr + old_len,
                                                   addr + new_len);