vserver 2.0 rc7

[linux-2.6.git] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index 5d07c87..994a8e2 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -46,7 +46,6 @@
  #include <linux/highmem.h>
  #include <linux/pagemap.h>
  #include <linux/rmap.h>
-#include <linux/acct.h>
  #include <linux/module.h>
  #include <linux/init.h>
  
@@ -83,117 +82,206 @@ EXPORT_SYMBOL(num_physpages);
  EXPORT_SYMBOL(high_memory);
  EXPORT_SYMBOL(vmalloc_earlyreserve);
  
+/*
+ * If a p?d_bad entry is found while walking page tables, report
+ * the error, before resetting entry to p?d_none.  Usually (but
+ * very seldom) called out from the p?d_none_or_clear_bad macros.
+ */
+
+void pgd_clear_bad(pgd_t *pgd)
+{
+       pgd_ERROR(*pgd);
+       pgd_clear(pgd);
+}
+
+void pud_clear_bad(pud_t *pud)
+{
+       pud_ERROR(*pud);
+       pud_clear(pud);
+}
+
+void pmd_clear_bad(pmd_t *pmd)
+{
+       pmd_ERROR(*pmd);
+       pmd_clear(pmd);
+}
+
  /*
   * Note: this doesn't free the actual pages themselves. That
   * has been handled earlier when unmapping all the memory regions.
   */
-static inline void clear_pmd_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long start, unsigned long end)
+static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
  {
-       struct page *page;
-
-       if (pmd_none(*pmd))
-               return;
-       if (unlikely(pmd_bad(*pmd))) {
-               pmd_ERROR(*pmd);
-               pmd_clear(pmd);
-               return;
-       }
-       if (!((start | end) & ~PMD_MASK)) {
-               /* Only clear full, aligned ranges */
-               page = pmd_page(*pmd);
-               pmd_clear(pmd);
-               dec_page_state(nr_page_table_pages);
-               tlb->mm->nr_ptes--;
-               pte_free_tlb(tlb, page);
-       }
+       struct page *page = pmd_page(*pmd);
+       pmd_clear(pmd);
+       pte_free_tlb(tlb, page);
+       dec_page_state(nr_page_table_pages);
+       tlb->mm->nr_ptes--;
  }
  
-static inline void clear_pud_range(struct mmu_gather *tlb, pud_t *pud, unsigned long start, unsigned long end)
+static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+                               unsigned long addr, unsigned long end,
+                               unsigned long floor, unsigned long ceiling)
  {
-       unsigned long addr = start, next;
-       pmd_t *pmd, *__pmd;
+       pmd_t *pmd;
+       unsigned long next;
+       unsigned long start;
  
-       if (pud_none(*pud))
-               return;
-       if (unlikely(pud_bad(*pud))) {
-               pud_ERROR(*pud);
-               pud_clear(pud);
+       start = addr;
+       pmd = pmd_offset(pud, addr);
+       do {
+               next = pmd_addr_end(addr, end);
+               if (pmd_none_or_clear_bad(pmd))
+                       continue;
+               free_pte_range(tlb, pmd);
+       } while (pmd++, addr = next, addr != end);
+
+       start &= PUD_MASK;
+       if (start < floor)
                 return;
+       if (ceiling) {
+               ceiling &= PUD_MASK;
+               if (!ceiling)
+                       return;
         }
+       if (end - 1 > ceiling - 1)
+               return;
  
-       pmd = __pmd = pmd_offset(pud, start);
-       do {
-               next = (addr + PMD_SIZE) & PMD_MASK;
-               if (next > end || next <= addr)
-                       next = end;
-               
-               clear_pmd_range(tlb, pmd, addr, next);
-               pmd++;
-               addr = next;
-       } while (addr && (addr < end));
-
-       if (!((start | end) & ~PUD_MASK)) {
-               /* Only clear full, aligned ranges */
-               pud_clear(pud);
-               pmd_free_tlb(tlb, __pmd);
-       }
+       pmd = pmd_offset(pud, start);
+       pud_clear(pud);
+       pmd_free_tlb(tlb, pmd);
  }
  
-
-static inline void clear_pgd_range(struct mmu_gather *tlb, pgd_t *pgd, unsigned long start, unsigned long end)
+static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+                               unsigned long addr, unsigned long end,
+                               unsigned long floor, unsigned long ceiling)
  {
-       unsigned long addr = start, next;
-       pud_t *pud, *__pud;
+       pud_t *pud;
+       unsigned long next;
+       unsigned long start;
  
-       if (pgd_none(*pgd))
-               return;
-       if (unlikely(pgd_bad(*pgd))) {
-               pgd_ERROR(*pgd);
-               pgd_clear(pgd);
+       start = addr;
+       pud = pud_offset(pgd, addr);
+       do {
+               next = pud_addr_end(addr, end);
+               if (pud_none_or_clear_bad(pud))
+                       continue;
+               free_pmd_range(tlb, pud, addr, next, floor, ceiling);
+       } while (pud++, addr = next, addr != end);
+
+       start &= PGDIR_MASK;
+       if (start < floor)
                 return;
+       if (ceiling) {
+               ceiling &= PGDIR_MASK;
+               if (!ceiling)
+                       return;
         }
+       if (end - 1 > ceiling - 1)
+               return;
  
-       pud = __pud = pud_offset(pgd, start);
-       do {
-               next = (addr + PUD_SIZE) & PUD_MASK;
-               if (next > end || next <= addr)
-                       next = end;
-               
-               clear_pud_range(tlb, pud, addr, next);
-               pud++;
-               addr = next;
-       } while (addr && (addr < end));
-
-       if (!((start | end) & ~PGDIR_MASK)) {
-               /* Only clear full, aligned ranges */
-               pgd_clear(pgd);
-               pud_free_tlb(tlb, __pud);
-       }
+       pud = pud_offset(pgd, start);
+       pgd_clear(pgd);
+       pud_free_tlb(tlb, pud);
  }
  
  /*
- * This function clears user-level page tables of a process.
+ * This function frees user-level page tables of a process.
   *
   * Must be called with pagetable lock held.
   */
-void clear_page_range(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+void free_pgd_range(struct mmu_gather **tlb,
+                       unsigned long addr, unsigned long end,
+                       unsigned long floor, unsigned long ceiling)
  {
-       unsigned long addr = start, next;
-       pgd_t * pgd = pgd_offset(tlb->mm, start);
-       unsigned long i;
-
-       for (i = pgd_index(start); i <= pgd_index(end-1); i++) {
-               next = (addr + PGDIR_SIZE) & PGDIR_MASK;
-               if (next > end || next <= addr)
-                       next = end;
-               
-               clear_pgd_range(tlb, pgd, addr, next);
-               pgd++;
-               addr = next;
+       pgd_t *pgd;
+       unsigned long next;
+       unsigned long start;
+
+       /*
+        * The next few lines have given us lots of grief...
+        *
+        * Why are we testing PMD* at this top level?  Because often
+        * there will be no work to do at all, and we'd prefer not to
+        * go all the way down to the bottom just to discover that.
+        *
+        * Why all these "- 1"s?  Because 0 represents both the bottom
+        * of the address space and the top of it (using -1 for the
+        * top wouldn't help much: the masks would do the wrong thing).
+        * The rule is that addr 0 and floor 0 refer to the bottom of
+        * the address space, but end 0 and ceiling 0 refer to the top
+        * Comparisons need to use "end - 1" and "ceiling - 1" (though
+        * that end 0 case should be mythical).
+        *
+        * Wherever addr is brought up or ceiling brought down, we must
+        * be careful to reject "the opposite 0" before it confuses the
+        * subsequent tests.  But what about where end is brought down
+        * by PMD_SIZE below? no, end can't go down to 0 there.
+        *
+        * Whereas we round start (addr) and ceiling down, by different
+        * masks at different levels, in order to test whether a table
+        * now has no other vmas using it, so can be freed, we don't
+        * bother to round floor or end up - the tests don't need that.
+        */
+
+       addr &= PMD_MASK;
+       if (addr < floor) {
+               addr += PMD_SIZE;
+               if (!addr)
+                       return;
         }
+       if (ceiling) {
+               ceiling &= PMD_MASK;
+               if (!ceiling)
+                       return;
+       }
+       if (end - 1 > ceiling - 1)
+               end -= PMD_SIZE;
+       if (addr > end - 1)
+               return;
+
+       start = addr;
+       pgd = pgd_offset((*tlb)->mm, addr);
+       do {
+               next = pgd_addr_end(addr, end);
+               if (pgd_none_or_clear_bad(pgd))
+                       continue;
+               free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
+       } while (pgd++, addr = next, addr != end);
+
+       if (!tlb_is_full_mm(*tlb))
+               flush_tlb_pgtables((*tlb)->mm, start, end);
  }
  
-pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
+               unsigned long floor, unsigned long ceiling)
+{
+       while (vma) {
+               struct vm_area_struct *next = vma->vm_next;
+               unsigned long addr = vma->vm_start;
+
+               if (is_hugepage_only_range(vma->vm_mm, addr, HPAGE_SIZE)) {
+                       hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
+                               floor, next? next->vm_start: ceiling);
+               } else {
+                       /*
+                        * Optimization: gather nearby vmas into one call down
+                        */
+                       while (next && next->vm_start <= vma->vm_end + PMD_SIZE
+                         && !is_hugepage_only_range(vma->vm_mm, next->vm_start,
+                                                       HPAGE_SIZE)) {
+                               vma = next;
+                               next = vma->vm_next;
+                       }
+                       free_pgd_range(tlb, addr, vma->vm_end,
+                               floor, next? next->vm_start: ceiling);
+               }
+               vma = next;
+       }
+}
+
+pte_t fastcall *pte_alloc_map(struct mm_struct *mm, pmd_t *pmd,
+                               unsigned long address)
  {
         if (!pmd_present(*pmd)) {
                 struct page *new;
@@ -254,20 +342,7 @@ out:
   */
  
  static inline void
-copy_swap_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t pte)
-{
-       if (pte_file(pte))
-               return;
-       swap_duplicate(pte_to_swp_entry(pte));
-       if (list_empty(&dst_mm->mmlist)) {
-               spin_lock(&mmlist_lock);
-               list_add(&dst_mm->mmlist, &src_mm->mmlist);
-               spin_unlock(&mmlist_lock);
-       }
-}
-
-static inline void
-copy_one_pte(struct mm_struct *dst_mm,  struct mm_struct *src_mm,
+copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                 pte_t *dst_pte, pte_t *src_pte, unsigned long vm_flags,
                 unsigned long addr)
  {
@@ -275,12 +350,21 @@ copy_one_pte(struct mm_struct *dst_mm,  struct mm_struct *src_mm,
         struct page *page;
         unsigned long pfn;
  
-       /* pte contains position in swap, so copy. */
-       if (!pte_present(pte)) {
-               copy_swap_pte(dst_mm, src_mm, pte);
-               set_pte(dst_pte, pte);
+       /* pte contains position in swap or file, so copy. */
+       if (unlikely(!pte_present(pte))) {
+               if (!pte_file(pte)) {
+                       swap_duplicate(pte_to_swp_entry(pte));
+                       /* make sure dst_mm is on swapoff's mmlist. */
+                       if (unlikely(list_empty(&dst_mm->mmlist))) {
+                               spin_lock(&mmlist_lock);
+                               list_add(&dst_mm->mmlist, &src_mm->mmlist);
+                               spin_unlock(&mmlist_lock);
+                       }
+               }
+               set_pte_at(dst_mm, addr, dst_pte, pte);
                 return;
         }
+
         pfn = pte_pfn(pte);
         /* the pte points outside of valid memory, the
          * mapping is assumed to be good, meaningful
@@ -292,7 +376,7 @@ copy_one_pte(struct mm_struct *dst_mm,  struct mm_struct *src_mm,
                 page = pfn_to_page(pfn);
  
         if (!page || PageReserved(page)) {
-               set_pte(dst_pte, pte);
+               set_pte_at(dst_mm, addr, dst_pte, pte);
                 return;
         }
  
@@ -301,7 +385,7 @@ copy_one_pte(struct mm_struct *dst_mm,  struct mm_struct *src_mm,
          * in the parent and the child
          */
         if ((vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) {
-               ptep_set_wrprotect(src_pte);
+               ptep_set_wrprotect(src_mm, addr, src_pte);
                 pte = *src_pte;
         }
  
@@ -313,172 +397,137 @@ copy_one_pte(struct mm_struct *dst_mm,  struct mm_struct *src_mm,
                 pte = pte_mkclean(pte);
         pte = pte_mkold(pte);
         get_page(page);
-       vx_rsspages_inc(dst_mm);
+       inc_mm_counter(dst_mm, rss);
         if (PageAnon(page))
-               vx_anonpages_inc(dst_mm);
-       set_pte(dst_pte, pte);
+               inc_mm_counter(dst_mm, anon_rss);
+       set_pte_at(dst_mm, addr, dst_pte, pte);
         page_dup_rmap(page);
  }
  
-static int copy_pte_range(struct mm_struct *dst_mm,  struct mm_struct *src_mm,
+static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                 pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
                 unsigned long addr, unsigned long end)
  {
         pte_t *src_pte, *dst_pte;
-       pte_t *s, *d;
         unsigned long vm_flags = vma->vm_flags;
+       int progress;
  
-       d = dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
+again:
+       dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
         if (!dst_pte)
                 return -ENOMEM;
+       src_pte = pte_offset_map_nested(src_pmd, addr);
  
+       progress = 0;
         spin_lock(&src_mm->page_table_lock);
-       s = src_pte = pte_offset_map_nested(src_pmd, addr);
-       for (; addr < end; addr += PAGE_SIZE, s++, d++) {
-               if (pte_none(*s))
+       do {
+               /*
+                * We are holding two locks at this point - either of them
+                * could generate latencies in another task on another CPU.
+                */
+               if (progress >= 32 && (need_resched() ||
+                   need_lockbreak(&src_mm->page_table_lock) ||
+                   need_lockbreak(&dst_mm->page_table_lock)))
+                       break;
+               if (pte_none(*src_pte)) {
+                       progress++;
                         continue;
-               copy_one_pte(dst_mm, src_mm, d, s, vm_flags, addr);
-       }
-       pte_unmap_nested(src_pte);
-       pte_unmap(dst_pte);
+               }
+               copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vm_flags, addr);
+               progress += 8;
+       } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
         spin_unlock(&src_mm->page_table_lock);
+
+       pte_unmap_nested(src_pte - 1);
+       pte_unmap(dst_pte - 1);
         cond_resched_lock(&dst_mm->page_table_lock);
+       if (addr != end)
+               goto again;
         return 0;
  }
  
-static int copy_pmd_range(struct mm_struct *dst_mm,  struct mm_struct *src_mm,
+static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                 pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
                 unsigned long addr, unsigned long end)
  {
         pmd_t *src_pmd, *dst_pmd;
-       int err = 0;
         unsigned long next;
  
-       src_pmd = pmd_offset(src_pud, addr);
         dst_pmd = pmd_alloc(dst_mm, dst_pud, addr);
         if (!dst_pmd)
                 return -ENOMEM;
-
-       for (; addr < end; addr = next, src_pmd++, dst_pmd++) {
-               next = (addr + PMD_SIZE) & PMD_MASK;
-               if (next > end || next <= addr)
-                       next = end;
-               if (pmd_none(*src_pmd))
-                       continue;
-               if (pmd_bad(*src_pmd)) {
-                       pmd_ERROR(*src_pmd);
-                       pmd_clear(src_pmd);
+       src_pmd = pmd_offset(src_pud, addr);
+       do {
+               next = pmd_addr_end(addr, end);
+               if (pmd_none_or_clear_bad(src_pmd))
                         continue;
-               }
-               err = copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
-                                                       vma, addr, next);
-               if (err)
-                       break;
-       }
-       return err;
+               if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
+                                               vma, addr, next))
+                       return -ENOMEM;
+       } while (dst_pmd++, src_pmd++, addr = next, addr != end);
+       return 0;
  }
  
-static int copy_pud_range(struct mm_struct *dst_mm,  struct mm_struct *src_mm,
+static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                 pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
                 unsigned long addr, unsigned long end)
  {
         pud_t *src_pud, *dst_pud;
-       int err = 0;
         unsigned long next;
  
-       src_pud = pud_offset(src_pgd, addr);
         dst_pud = pud_alloc(dst_mm, dst_pgd, addr);
         if (!dst_pud)
                 return -ENOMEM;
-
-       for (; addr < end; addr = next, src_pud++, dst_pud++) {
-               next = (addr + PUD_SIZE) & PUD_MASK;
-               if (next > end || next <= addr)
-                       next = end;
-               if (pud_none(*src_pud))
-                       continue;
-               if (pud_bad(*src_pud)) {
-                       pud_ERROR(*src_pud);
-                       pud_clear(src_pud);
+       src_pud = pud_offset(src_pgd, addr);
+       do {
+               next = pud_addr_end(addr, end);
+               if (pud_none_or_clear_bad(src_pud))
                         continue;
-               }
-               err = copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
-                                                       vma, addr, next);
-               if (err)
-                       break;
-       }
-       return err;
+               if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
+                                               vma, addr, next))
+                       return -ENOMEM;
+       } while (dst_pud++, src_pud++, addr = next, addr != end);
+       return 0;
  }
  
-int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
+int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                 struct vm_area_struct *vma)
  {
         pgd_t *src_pgd, *dst_pgd;
-       unsigned long addr, start, end, next;
-       int err = 0;
+       unsigned long next;
+       unsigned long addr = vma->vm_start;
+       unsigned long end = vma->vm_end;
  
         if (is_vm_hugetlb_page(vma))
-               return copy_hugetlb_page_range(dst, src, vma);
-
-       start = vma->vm_start;
-       src_pgd = pgd_offset(src, start);
-       dst_pgd = pgd_offset(dst, start);
-
-       end = vma->vm_end;
-       addr = start;
-       while (addr && (addr < end-1)) {
-               next = (addr + PGDIR_SIZE) & PGDIR_MASK;
-               if (next > end || next <= addr)
-                       next = end;
-               if (pgd_none(*src_pgd))
-                       goto next_pgd;
-               if (pgd_bad(*src_pgd)) {
-                       pgd_ERROR(*src_pgd);
-                       pgd_clear(src_pgd);
-                       goto next_pgd;
-               }
-               err = copy_pud_range(dst, src, dst_pgd, src_pgd,
-                                                       vma, addr, next);
-               if (err)
-                       break;
+               return copy_hugetlb_page_range(dst_mm, src_mm, vma);
  
-next_pgd:
-               src_pgd++;
-               dst_pgd++;
-               addr = next;
-       }
-
-       return err;
+       dst_pgd = pgd_offset(dst_mm, addr);
+       src_pgd = pgd_offset(src_mm, addr);
+       do {
+               next = pgd_addr_end(addr, end);
+               if (pgd_none_or_clear_bad(src_pgd))
+                       continue;
+               if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
+                                               vma, addr, next))
+                       return -ENOMEM;
+       } while (dst_pgd++, src_pgd++, addr = next, addr != end);
+       return 0;
  }
  
-static void zap_pte_range(struct mmu_gather *tlb,
-               pmd_t *pmd, unsigned long address,
-               unsigned long size, struct zap_details *details)
+static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+                               unsigned long addr, unsigned long end,
+                               struct zap_details *details)
  {
-       unsigned long offset;
-       pte_t *ptep;
+       pte_t *pte;
  
-       if (pmd_none(*pmd))
-               return;
-       if (unlikely(pmd_bad(*pmd))) {
-               pmd_ERROR(*pmd);
-               pmd_clear(pmd);
-               return;
-       }
-       ptep = pte_offset_map(pmd, address);
-       offset = address & ~PMD_MASK;
-       if (offset + size > PMD_SIZE)
-               size = PMD_SIZE - offset;
-       size &= PAGE_MASK;
-       if (details && !details->check_mapping && !details->nonlinear_vma)
-               details = NULL;
-       for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) {
-               pte_t pte = *ptep;
-               if (pte_none(pte))
+       pte = pte_offset_map(pmd, addr);
+       do {
+               pte_t ptent = *pte;
+               if (pte_none(ptent))
                         continue;
-               if (pte_present(pte)) {
+               if (pte_present(ptent)) {
                         struct page *page = NULL;
-                       unsigned long pfn = pte_pfn(pte);
+                       unsigned long pfn = pte_pfn(ptent);
                         if (pfn_valid(pfn)) {
                                 page = pfn_to_page(pfn);
                                 if (PageReserved(page))
@@ -502,19 +551,20 @@ static void zap_pte_range(struct mmu_gather *tlb,
                                      page->index > details->last_index))
                                         continue;
                         }
-                       pte = ptep_get_and_clear(ptep);
-                       tlb_remove_tlb_entry(tlb, ptep, address+offset);
+                       ptent = ptep_get_and_clear(tlb->mm, addr, pte);
+                       tlb_remove_tlb_entry(tlb, pte, addr);
                         if (unlikely(!page))
                                 continue;
                         if (unlikely(details) && details->nonlinear_vma
                             && linear_page_index(details->nonlinear_vma,
-                                       address+offset) != page->index)
-                               set_pte(ptep, pgoff_to_pte(page->index));
-                       if (pte_dirty(pte))
+                                               addr) != page->index)
+                               set_pte_at(tlb->mm, addr, pte,
+                                          pgoff_to_pte(page->index));
+                       if (pte_dirty(ptent))
                                 set_page_dirty(page);
                         if (PageAnon(page))
-                               vx_anonpages_dec(tlb->mm);
-                       else if (pte_young(pte))
+                               dec_mm_counter(tlb->mm, anon_rss);
+                       else if (pte_young(ptent))
                                 mark_page_accessed(page);
                         tlb->freed++;
                         page_remove_rmap(page);
@@ -527,78 +577,64 @@ static void zap_pte_range(struct mmu_gather *tlb,
                  */
                 if (unlikely(details))
                         continue;
-               if (!pte_file(pte))
-                       free_swap_and_cache(pte_to_swp_entry(pte));
-               pte_clear(ptep);
-       }
-       pte_unmap(ptep-1);
+               if (!pte_file(ptent))
+                       free_swap_and_cache(pte_to_swp_entry(ptent));
+               pte_clear(tlb->mm, addr, pte);
+       } while (pte++, addr += PAGE_SIZE, addr != end);
+       pte_unmap(pte - 1);
  }
  
-static void zap_pmd_range(struct mmu_gather *tlb,
-               pud_t *pud, unsigned long address,
-               unsigned long size, struct zap_details *details)
+static inline void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+                               unsigned long addr, unsigned long end,
+                               struct zap_details *details)
  {
-       pmd_t * pmd;
-       unsigned long end;
+       pmd_t *pmd;
+       unsigned long next;
  
-       if (pud_none(*pud))
-               return;
-       if (unlikely(pud_bad(*pud))) {
-               pud_ERROR(*pud);
-               pud_clear(pud);
-               return;
-       }
-       pmd = pmd_offset(pud, address);
-       end = address + size;
-       if (end > ((address + PUD_SIZE) & PUD_MASK))
-               end = ((address + PUD_SIZE) & PUD_MASK);
+       pmd = pmd_offset(pud, addr);
         do {
-               zap_pte_range(tlb, pmd, address, end - address, details);
-               address = (address + PMD_SIZE) & PMD_MASK; 
-               pmd++;
-       } while (address && (address < end));
+               next = pmd_addr_end(addr, end);
+               if (pmd_none_or_clear_bad(pmd))
+                       continue;
+               zap_pte_range(tlb, pmd, addr, next, details);
+       } while (pmd++, addr = next, addr != end);
  }
  
-static void zap_pud_range(struct mmu_gather *tlb,
-               pgd_t * pgd, unsigned long address,
-               unsigned long end, struct zap_details *details)
+static inline void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+                               unsigned long addr, unsigned long end,
+                               struct zap_details *details)
  {
-       pud_t * pud;
+       pud_t *pud;
+       unsigned long next;
  
-       if (pgd_none(*pgd))
-               return;
-       if (unlikely(pgd_bad(*pgd))) {
-               pgd_ERROR(*pgd);
-               pgd_clear(pgd);
-               return;
-       }
-       pud = pud_offset(pgd, address);
+       pud = pud_offset(pgd, addr);
         do {
-               zap_pmd_range(tlb, pud, address, end - address, details);
-               address = (address + PUD_SIZE) & PUD_MASK; 
-               pud++;
-       } while (address && (address < end));
+               next = pud_addr_end(addr, end);
+               if (pud_none_or_clear_bad(pud))
+                       continue;
+               zap_pmd_range(tlb, pud, addr, next, details);
+       } while (pud++, addr = next, addr != end);
  }
  
-static void unmap_page_range(struct mmu_gather *tlb,
-               struct vm_area_struct *vma, unsigned long address,
-               unsigned long end, struct zap_details *details)
+static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
+                               unsigned long addr, unsigned long end,
+                               struct zap_details *details)
  {
-       unsigned long next;
         pgd_t *pgd;
-       int i;
+       unsigned long next;
  
-       BUG_ON(address >= end);
-       pgd = pgd_offset(vma->vm_mm, address);
+       if (details && !details->check_mapping && !details->nonlinear_vma)
+               details = NULL;
+
+       BUG_ON(addr >= end);
         tlb_start_vma(tlb, vma);
-       for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
-               next = (address + PGDIR_SIZE) & PGDIR_MASK;
-               if (next <= address || next > end)
-                       next = end;
-               zap_pud_range(tlb, pgd, address, next, details);
-               address = next;
-               pgd++;
-       }
+       pgd = pgd_offset(vma->vm_mm, addr);
+       do {
+               next = pgd_addr_end(addr, end);
+               if (pgd_none_or_clear_bad(pgd))
+                       continue;
+               zap_pud_range(tlb, pgd, addr, next, details);
+       } while (pgd++, addr = next, addr != end);
         tlb_end_vma(tlb, vma);
  }
  
@@ -619,7 +655,7 @@ static void unmap_page_range(struct mmu_gather *tlb,
   * @nr_accounted: Place number of unmapped pages in vm-accountable vma's here
   * @details: details of nonlinear truncation or shared cache invalidation
   *
- * Returns the number of vma's which were covered by the unmapping.
+ * Returns the end address of the unmapping (restart addr if interrupted).
   *
   * Unmap all pages in the vma list.  Called under page_table_lock.
   *
@@ -636,7 +672,7 @@ static void unmap_page_range(struct mmu_gather *tlb,
   * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
   * drops the lock and schedules.
   */
-int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
+unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
                 struct vm_area_struct *vma, unsigned long start_addr,
                 unsigned long end_addr, unsigned long *nr_accounted,
                 struct zap_details *details)
@@ -644,12 +680,11 @@ int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
         unsigned long zap_bytes = ZAP_BLOCK_SIZE;
         unsigned long tlb_start = 0;    /* For tlb_finish_mmu */
         int tlb_start_valid = 0;
-       int ret = 0;
+       unsigned long start = start_addr;
         spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
         int fullmm = tlb_is_full_mm(*tlbp);
  
         for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
-               unsigned long start;
                 unsigned long end;
  
                 start = max(vma->vm_start, start_addr);
@@ -662,7 +697,6 @@ int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
                 if (vma->vm_flags & VM_ACCOUNT)
                         *nr_accounted += (end - start) >> PAGE_SHIFT;
  
-               ret++;
                 while (start != end) {
                         unsigned long block;
  
@@ -693,7 +727,6 @@ int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
                                 if (i_mmap_lock) {
                                         /* must reset count of rss freed */
                                         *tlbp = tlb_gather_mmu(mm, fullmm);
-                                       details->break_addr = start;
                                         goto out;
                                 }
                                 spin_unlock(&mm->page_table_lock);
@@ -707,7 +740,7 @@ int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
                 }
         }
  out:
-       return ret;
+       return start;   /* which is now the end (or restart) address */
  }
  
  /**
@@ -717,7 +750,7 @@ out:
   * @size: number of bytes to zap
   * @details: details of nonlinear truncation or shared cache invalidation
   */
-void zap_page_range(struct vm_area_struct *vma, unsigned long address,
+unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
                 unsigned long size, struct zap_details *details)
  {
         struct mm_struct *mm = vma->vm_mm;
@@ -727,16 +760,16 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address,
  
         if (is_vm_hugetlb_page(vma)) {
                 zap_hugepage_range(vma, address, size);
-               return;
+               return end;
         }
  
         lru_add_drain();
         spin_lock(&mm->page_table_lock);
         tlb = tlb_gather_mmu(mm, 0);
-       unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
+       end = unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
         tlb_finish_mmu(tlb, address, end);
-       acct_update_integrals();
         spin_unlock(&mm->page_table_lock);
+       return end;
  }
  
  /*
@@ -987,111 +1020,78 @@ out:
  
  EXPORT_SYMBOL(get_user_pages);
  
-static void zeromap_pte_range(pte_t * pte, unsigned long address,
-                                     unsigned long size, pgprot_t prot)
+static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
+                       unsigned long addr, unsigned long end, pgprot_t prot)
  {
-       unsigned long end;
+       pte_t *pte;
  
-       address &= ~PMD_MASK;
-       end = address + size;
-       if (end > PMD_SIZE)
-               end = PMD_SIZE;
+       pte = pte_alloc_map(mm, pmd, addr);
+       if (!pte)
+               return -ENOMEM;
         do {
-               pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), prot));
+               pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(addr), prot));
                 BUG_ON(!pte_none(*pte));
-               set_pte(pte, zero_pte);
-               address += PAGE_SIZE;
-               pte++;
-       } while (address && (address < end));
+               set_pte_at(mm, addr, pte, zero_pte);
+       } while (pte++, addr += PAGE_SIZE, addr != end);
+       pte_unmap(pte - 1);
+       return 0;
  }
  
-static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd,
-               unsigned long address, unsigned long size, pgprot_t prot)
+static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
+                       unsigned long addr, unsigned long end, pgprot_t prot)
  {
-       unsigned long base, end;
+       pmd_t *pmd;
+       unsigned long next;
  
-       base = address & PUD_MASK;
-       address &= ~PUD_MASK;
-       end = address + size;
-       if (end > PUD_SIZE)
-               end = PUD_SIZE;
+       pmd = pmd_alloc(mm, pud, addr);
+       if (!pmd)
+               return -ENOMEM;
         do {
-               pte_t * pte = pte_alloc_map(mm, pmd, base + address);
-               if (!pte)
+               next = pmd_addr_end(addr, end);
+               if (zeromap_pte_range(mm, pmd, addr, next, prot))
                         return -ENOMEM;
-               zeromap_pte_range(pte, base + address, end - address, prot);
-               pte_unmap(pte);
-               address = (address + PMD_SIZE) & PMD_MASK;
-               pmd++;
-       } while (address && (address < end));
+       } while (pmd++, addr = next, addr != end);
         return 0;
  }
  
-static inline int zeromap_pud_range(struct mm_struct *mm, pud_t * pud,
-                                   unsigned long address,
-                                    unsigned long size, pgprot_t prot)
+static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+                       unsigned long addr, unsigned long end, pgprot_t prot)
  {
-       unsigned long base, end;
-       int error = 0;
-
-       base = address & PGDIR_MASK;
-       address &= ~PGDIR_MASK;
-       end = address + size;
-       if (end > PGDIR_SIZE)
-               end = PGDIR_SIZE;
+       pud_t *pud;
+       unsigned long next;
+
+       pud = pud_alloc(mm, pgd, addr);
+       if (!pud)
+               return -ENOMEM;
         do {
-               pmd_t * pmd = pmd_alloc(mm, pud, base + address);
-               error = -ENOMEM;
-               if (!pmd)
-                       break;
-               error = zeromap_pmd_range(mm, pmd, base + address,
-                                         end - address, prot);
-               if (error)
-                       break;
-               address = (address + PUD_SIZE) & PUD_MASK;
-               pud++;
-       } while (address && (address < end));
+               next = pud_addr_end(addr, end);
+               if (zeromap_pmd_range(mm, pud, addr, next, prot))
+                       return -ENOMEM;
+       } while (pud++, addr = next, addr != end);
         return 0;
  }
  
-int zeromap_page_range(struct vm_area_struct *vma, unsigned long address,
-                                       unsigned long size, pgprot_t prot)
+int zeromap_page_range(struct vm_area_struct *vma,
+                       unsigned long addr, unsigned long size, pgprot_t prot)
  {
-       int i;
-       int error = 0;
-       pgd_t * pgd;
-       unsigned long beg = address;
-       unsigned long end = address + size;
+       pgd_t *pgd;
         unsigned long next;
+       unsigned long end = addr + size;
         struct mm_struct *mm = vma->vm_mm;
+       int err;
  
-       pgd = pgd_offset(mm, address);
-       flush_cache_range(vma, beg, end);
-       BUG_ON(address >= end);
-       BUG_ON(end > vma->vm_end);
-
+       BUG_ON(addr >= end);
+       pgd = pgd_offset(mm, addr);
+       flush_cache_range(vma, addr, end);
         spin_lock(&mm->page_table_lock);
-       for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
-               pud_t *pud = pud_alloc(mm, pgd, address);
-               error = -ENOMEM;
-               if (!pud)
-                       break;
-               next = (address + PGDIR_SIZE) & PGDIR_MASK;
-               if (next <= beg || next > end)
-                       next = end;
-               error = zeromap_pud_range(mm, pud, address,
-                                               next - address, prot);
-               if (error)
+       do {
+               next = pgd_addr_end(addr, end);
+               err = zeromap_pud_range(mm, pgd, addr, next, prot);
+               if (err)
                         break;
-               address = next;
-               pgd++;
-       }
-       /*
-        * Why flush? zeromap_pte_range has a BUG_ON for !pte_none()
-        */
-       flush_tlb_range(vma, beg, end);
+       } while (pgd++, addr = next, addr != end);
         spin_unlock(&mm->page_table_lock);
-       return error;
+       return err;
  }
  
  /*
@@ -1099,95 +1099,74 @@ int zeromap_page_range(struct vm_area_struct *vma, unsigned long address,
   * mappings are removed. any references to nonexistent pages results
   * in null mappings (currently treated as "copy-on-access")
   */
-static inline void
-remap_pte_range(pte_t * pte, unsigned long address, unsigned long size,
-               unsigned long pfn, pgprot_t prot)
+static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
+                       unsigned long addr, unsigned long end,
+                       unsigned long pfn, pgprot_t prot)
  {
-       unsigned long end;
+       pte_t *pte;
  
-       address &= ~PMD_MASK;
-       end = address + size;
-       if (end > PMD_SIZE)
-               end = PMD_SIZE;
+       pte = pte_alloc_map(mm, pmd, addr);
+       if (!pte)
+               return -ENOMEM;
         do {
                 BUG_ON(!pte_none(*pte));
                 if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn)))
-                       set_pte(pte, pfn_pte(pfn, prot));
-               address += PAGE_SIZE;
+                       set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
                 pfn++;
-               pte++;
-       } while (address && (address < end));
+       } while (pte++, addr += PAGE_SIZE, addr != end);
+       pte_unmap(pte - 1);
+       return 0;
  }
  
-static inline int
-remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address,
-               unsigned long size, unsigned long pfn, pgprot_t prot)
+static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
+                       unsigned long addr, unsigned long end,
+                       unsigned long pfn, pgprot_t prot)
  {
-       unsigned long base, end;
-
-       base = address & PUD_MASK;
-       address &= ~PUD_MASK;
-       end = address + size;
-       if (end > PUD_SIZE)
-               end = PUD_SIZE;
-       pfn -= (address >> PAGE_SHIFT);
+       pmd_t *pmd;
+       unsigned long next;
+
+       pfn -= addr >> PAGE_SHIFT;
+       pmd = pmd_alloc(mm, pud, addr);
+       if (!pmd)
+               return -ENOMEM;
         do {
-               pte_t * pte = pte_alloc_map(mm, pmd, base + address);
-               if (!pte)
+               next = pmd_addr_end(addr, end);
+               if (remap_pte_range(mm, pmd, addr, next,
+                               pfn + (addr >> PAGE_SHIFT), prot))
                         return -ENOMEM;
-               remap_pte_range(pte, base + address, end - address,
-                               (address >> PAGE_SHIFT) + pfn, prot);
-               pte_unmap(pte);
-               address = (address + PMD_SIZE) & PMD_MASK;
-               pmd++;
-       } while (address && (address < end));
+       } while (pmd++, addr = next, addr != end);
         return 0;
  }
  
-static inline int remap_pud_range(struct mm_struct *mm, pud_t * pud,
-                                 unsigned long address, unsigned long size,
-                                 unsigned long pfn, pgprot_t prot)
+static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+                       unsigned long addr, unsigned long end,
+                       unsigned long pfn, pgprot_t prot)
  {
-       unsigned long base, end;
-       int error;
-
-       base = address & PGDIR_MASK;
-       address &= ~PGDIR_MASK;
-       end = address + size;
-       if (end > PGDIR_SIZE)
-               end = PGDIR_SIZE;
-       pfn -= address >> PAGE_SHIFT;
+       pud_t *pud;
+       unsigned long next;
+
+       pfn -= addr >> PAGE_SHIFT;
+       pud = pud_alloc(mm, pgd, addr);
+       if (!pud)
+               return -ENOMEM;
         do {
-               pmd_t *pmd = pmd_alloc(mm, pud, base+address);
-               error = -ENOMEM;
-               if (!pmd)
-                       break;
-               error = remap_pmd_range(mm, pmd, base + address, end - address,
-                               (address >> PAGE_SHIFT) + pfn, prot);
-               if (error)
-                       break;
-               address = (address + PUD_SIZE) & PUD_MASK;
-               pud++;
-       } while (address && (address < end));
-       return error;
+               next = pud_addr_end(addr, end);
+               if (remap_pmd_range(mm, pud, addr, next,
+                               pfn + (addr >> PAGE_SHIFT), prot))
+                       return -ENOMEM;
+       } while (pud++, addr = next, addr != end);
+       return 0;
  }
  
  /*  Note: this is only safe if the mm semaphore is held when called. */
-int remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
+int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
                     unsigned long pfn, unsigned long size, pgprot_t prot)
  {
-       int error = 0;
         pgd_t *pgd;
-       unsigned long beg = from;
-       unsigned long end = from + size;
         unsigned long next;
+       unsigned long end = addr + PAGE_ALIGN(size);
         struct mm_struct *mm = vma->vm_mm;
-       int i;
-
-       pfn -= from >> PAGE_SHIFT;
-       pgd = pgd_offset(mm, from);
-       flush_cache_range(vma, beg, end);
-       BUG_ON(from >= end);
+       int err;
  
         /*
          * Physically remapped pages are special. Tell the
@@ -1199,31 +1178,21 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
          */
         vma->vm_flags |= VM_IO | VM_RESERVED;
  
+       BUG_ON(addr >= end);
+       pfn -= addr >> PAGE_SHIFT;
+       pgd = pgd_offset(mm, addr);
+       flush_cache_range(vma, addr, end);
         spin_lock(&mm->page_table_lock);
-       for (i = pgd_index(beg); i <= pgd_index(end-1); i++) {
-               pud_t *pud = pud_alloc(mm, pgd, from);
-               error = -ENOMEM;
-               if (!pud)
-                       break;
-               next = (from + PGDIR_SIZE) & PGDIR_MASK;
-               if (next > end || next <= from)
-                       next = end;
-               error = remap_pud_range(mm, pud, from, end - from,
-                                       pfn + (from >> PAGE_SHIFT), prot);
-               if (error)
+       do {
+               next = pgd_addr_end(addr, end);
+               err = remap_pud_range(mm, pgd, addr, next,
+                               pfn + (addr >> PAGE_SHIFT), prot);
+               if (err)
                         break;
-               from = next;
-               pgd++;
-       }
-       /*
-        * Why flush? remap_pte_range has a BUG_ON for !pte_none()
-        */
-       flush_tlb_range(vma, beg, end);
+       } while (pgd++, addr = next, addr != end);
         spin_unlock(&mm->page_table_lock);
-
-       return error;
+       return err;
  }
-
  EXPORT_SYMBOL(remap_pfn_range);
  
  /*
@@ -1247,11 +1216,11 @@ static inline void break_cow(struct vm_area_struct * vma, struct page * new_page
  {
         pte_t entry;
  
-       flush_cache_page(vma, address);
         entry = maybe_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)),
                               vma);
         ptep_establish(vma, address, page_table, entry);
         update_mmu_cache(vma, address, entry);
+       lazy_mmu_prot_update(entry);
  }
  
  /*
@@ -1299,11 +1268,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
                 int reuse = can_share_swap_page(old_page);
                 unlock_page(old_page);
                 if (reuse) {
-                       flush_cache_page(vma, address);
+                       flush_cache_page(vma, address, pfn);
                         entry = maybe_mkwrite(pte_mkyoung(pte_mkdirty(pte)),
                                               vma);
                         ptep_set_access_flags(vma, address, page_table, entry, 1);
                         update_mmu_cache(vma, address, entry);
+                       lazy_mmu_prot_update(entry);
                         pte_unmap(page_table);
                         spin_unlock(&mm->page_table_lock);
                         return VM_FAULT_MINOR;
@@ -1337,13 +1307,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
         page_table = pte_offset_map(pmd, address);
         if (likely(pte_same(*page_table, pte))) {
                 if (PageAnon(old_page))
-                       vx_anonpages_dec(mm);
-               if (PageReserved(old_page)) {
-                       vx_rsspages_inc(mm);
-                       acct_update_integrals();
-                       update_mem_hiwater();
-               } else
+                       dec_mm_counter(mm, anon_rss);
+               if (PageReserved(old_page))
+                       inc_mm_counter(mm, rss);
+               else
                         page_remove_rmap(old_page);
+               flush_cache_page(vma, address, pfn);
                 break_cow(vma, new_page, address, page_table);
                 lru_cache_add_active(new_page);
                 page_add_anon_rmap(new_page, vma, address);
@@ -1387,7 +1356,7 @@ no_new_page:
   * i_mmap_lock.
   *
   * In order to make forward progress despite repeatedly restarting some
- * large vma, note the break_addr set by unmap_vmas when it breaks out:
+ * large vma, note the restart_addr from unmap_vmas when it breaks out:
   * and restart from that address when we reach that vma again.  It might
   * have been split or merged, shrunk or extended, but never shifted: so
   * restart_addr remains valid so long as it remains in the vma's range.
@@ -1425,8 +1394,8 @@ again:
                 }
         }
  
-       details->break_addr = end_addr;
-       zap_page_range(vma, start_addr, end_addr - start_addr, details);
+       restart_addr = zap_page_range(vma, start_addr,
+                                       end_addr - start_addr, details);
  
         /*
          * We cannot rely on the break test in unmap_vmas:
@@ -1437,14 +1406,14 @@ again:
         need_break = need_resched() ||
                         need_lockbreak(details->i_mmap_lock);
  
-       if (details->break_addr >= end_addr) {
+       if (restart_addr >= end_addr) {
                 /* We have now completed this vma: mark it so */
                 vma->vm_truncate_count = details->truncate_count;
                 if (!need_break)
                         return 0;
         } else {
                 /* Note restart_addr in vma's truncate_count field */
-               vma->vm_truncate_count = details->break_addr;
+               vma->vm_truncate_count = restart_addr;
                 if (!need_break)
                         goto again;
         }
@@ -1736,12 +1705,13 @@ static int do_swap_page(struct mm_struct * mm,
         spin_lock(&mm->page_table_lock);
         page_table = pte_offset_map(pmd, address);
         if (unlikely(!pte_same(*page_table, orig_pte))) {
-               pte_unmap(page_table);
-               spin_unlock(&mm->page_table_lock);
-               unlock_page(page);
-               page_cache_release(page);
                 ret = VM_FAULT_MINOR;
-               goto out;
+               goto out_nomap;
+       }
+
+       if (unlikely(!PageUptodate(page))) {
+               ret = VM_FAULT_SIGBUS;
+               goto out_nomap;
         }
  
         /* The page isn't present yet, go ahead with the fault. */
@@ -1750,10 +1720,7 @@ static int do_swap_page(struct mm_struct * mm,
         if (vm_swap_full())
                 remove_exclusive_swap_page(page);
  
-       vx_rsspages_inc(mm);
-       acct_update_integrals();
-       update_mem_hiwater();
-
+       inc_mm_counter(mm, rss);
         pte = mk_pte(page, vma->vm_page_prot);
         if (write_access && can_share_swap_page(page)) {
                 pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -1762,7 +1729,7 @@ static int do_swap_page(struct mm_struct * mm,
         unlock_page(page);
  
         flush_icache_page(vma, page);
-       set_pte(page_table, pte);
+       set_pte_at(mm, address, page_table, pte);
         page_add_anon_rmap(page, vma, address);
  
         if (write_access) {
@@ -1774,10 +1741,17 @@ static int do_swap_page(struct mm_struct * mm,
  
         /* No need to invalidate - it was non-present before */
         update_mmu_cache(vma, address, pte);
+       lazy_mmu_prot_update(pte);
         pte_unmap(page_table);
         spin_unlock(&mm->page_table_lock);
  out:
         return ret;
+out_nomap:
+       pte_unmap(page_table);
+       spin_unlock(&mm->page_table_lock);
+       unlock_page(page);
+       page_cache_release(page);
+       goto out;
  }
  
  /*
@@ -1819,9 +1793,7 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                         spin_unlock(&mm->page_table_lock);
                         goto out;
                 }
-               vx_rsspages_inc(mm);
-               acct_update_integrals();
-               update_mem_hiwater();
+               inc_mm_counter(mm, rss);
                 entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
                                                          vma->vm_page_prot)),
                                       vma);
@@ -1830,11 +1802,12 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 page_add_anon_rmap(page, vma, addr);
         }
  
-       set_pte(page_table, entry);
+       set_pte_at(mm, addr, page_table, entry);
         pte_unmap(page_table);
  
         /* No need to invalidate - it was non-present before */
         update_mmu_cache(vma, addr, entry);
+       lazy_mmu_prot_update(entry);
         spin_unlock(&mm->page_table_lock);
  out:
         return VM_FAULT_MINOR;
@@ -1940,15 +1913,13 @@ retry:
         /* Only go through if we didn't race with anybody else... */
         if (pte_none(*page_table)) {
                 if (!PageReserved(new_page))
-                       vx_rsspages_inc(mm);
-               acct_update_integrals();
-               update_mem_hiwater();
+                       inc_mm_counter(mm, rss);
  
                 flush_icache_page(vma, new_page);
                 entry = mk_pte(new_page, vma->vm_page_prot);
                 if (write_access)
                         entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-               set_pte(page_table, entry);
+               set_pte_at(mm, address, page_table, entry);
                 if (anon) {
                         lru_cache_add_active(new_page);
                         page_add_anon_rmap(new_page, vma, address);
@@ -1965,6 +1936,7 @@ retry:
  
         /* no need to invalidate: a not-present page shouldn't be cached */
         update_mmu_cache(vma, address, entry);
+       lazy_mmu_prot_update(entry);
         spin_unlock(&mm->page_table_lock);
  out:
         return ret;
@@ -1992,7 +1964,7 @@ static int do_file_page(struct mm_struct * mm, struct vm_area_struct * vma,
          */
         if (!vma->vm_ops || !vma->vm_ops->populate || 
                         (write_access && !(vma->vm_flags & VM_SHARED))) {
-               pte_clear(pte);
+               pte_clear(mm, address, pte);
                 return do_no_page(mm, vma, address, write_access, pte, pmd);
         }
  
@@ -2059,6 +2031,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
         entry = pte_mkyoung(entry);
         ptep_set_access_flags(vma, address, pte, entry, write_access);
         update_mmu_cache(vma, address, entry);
+       lazy_mmu_prot_update(entry);
         pte_unmap(pte);
         spin_unlock(&mm->page_table_lock);
         return VM_FAULT_MINOR;
@@ -2108,15 +2081,12 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
         return VM_FAULT_OOM;
  }
  
-#ifndef __ARCH_HAS_4LEVEL_HACK
+#ifndef __PAGETABLE_PUD_FOLDED
  /*
   * Allocate page upper directory.
   *
   * We've already handled the fast-path in-line, and we own the
   * page table lock.
- *
- * On a two-level or three-level page table, this ends up actually being
- * entirely optimized away.
   */
  pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
  {
@@ -2140,15 +2110,14 @@ pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long addr
   out:
         return pud_offset(pgd, address);
  }
+#endif /* __PAGETABLE_PUD_FOLDED */
  
+#ifndef __PAGETABLE_PMD_FOLDED
  /*
   * Allocate page middle directory.
   *
   * We've already handled the fast-path in-line, and we own the
   * page table lock.
- *
- * On a two-level page table, this ends up actually being entirely
- * optimized away.
   */
  pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
  {
@@ -2164,38 +2133,24 @@ pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr
          * Because we dropped the lock, we should re-check the
          * entry, as somebody else could have populated it..
          */
+#ifndef __ARCH_HAS_4LEVEL_HACK
         if (pud_present(*pud)) {
                 pmd_free(new);
                 goto out;
         }
         pud_populate(mm, pud, new);
- out:
-       return pmd_offset(pud, address);
-}
  #else
-pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
-{
-       pmd_t *new;
-
-       spin_unlock(&mm->page_table_lock);
-       new = pmd_alloc_one(mm, address);
-       spin_lock(&mm->page_table_lock);
-       if (!new)
-               return NULL;
-
-       /*
-        * Because we dropped the lock, we should re-check the
-        * entry, as somebody else could have populated it..
-        */
         if (pgd_present(*pud)) {
                 pmd_free(new);
                 goto out;
         }
         pgd_populate(mm, pud, new);
-out:
+#endif /* __ARCH_HAS_4LEVEL_HACK */
+
+ out:
         return pmd_offset(pud, address);
  }
-#endif
+#endif /* __PAGETABLE_PMD_FOLDED */
  
  int make_pages_present(unsigned long addr, unsigned long end)
  {
@@ -2262,13 +2217,13 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
   * update_mem_hiwater
   *     - update per process rss and vm high water data
   */
-void update_mem_hiwater(void)
+void update_mem_hiwater(struct task_struct *tsk)
  {
-       struct task_struct *tsk = current;
-
         if (tsk->mm) {
-               if (tsk->mm->hiwater_rss < tsk->mm->rss)
-                       tsk->mm->hiwater_rss = tsk->mm->rss;
+               unsigned long rss = get_mm_counter(tsk->mm, rss);
+
+               if (tsk->mm->hiwater_rss < rss)
+                       tsk->mm->hiwater_rss = rss;
                 if (tsk->mm->hiwater_vm < tsk->mm->total_vm)
                         tsk->mm->hiwater_vm = tsk->mm->total_vm;
         }