This commit was manufactured by cvs2svn to create tag

[linux-2.6.git] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index 05ff499..9a4f695 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -114,6 +114,7 @@ static inline void free_one_pmd(struct mmu_gather *tlb, pmd_t * dir)
         page = pmd_page(*dir);
         pmd_clear(dir);
         dec_page_state(nr_page_table_pages);
+       tlb->mm->nr_ptes--;
         pte_free_tlb(tlb, page);
  }
  
@@ -163,7 +164,6 @@ pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long a
                 spin_lock(&mm->page_table_lock);
                 if (!new)
                         return NULL;
-
                 /*
                  * Because we dropped the lock, we should re-check the
                  * entry, as somebody else could have populated it..
@@ -172,6 +172,7 @@ pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long a
                         pte_free(new);
                         goto out;
                 }
+               mm->nr_ptes++;
                 inc_page_state(nr_page_table_pages);
                 pmd_populate(mm, pmd, new);
         }
@@ -282,14 +283,25 @@ skip_copy_pte_range:
                                 struct page *page;
                                 unsigned long pfn;
  
+                               if (!vx_rsspages_avail(dst, 1)) {
+                                       spin_unlock(&src->page_table_lock);
+                                       goto nomem;
+                               }
                                 /* copy_one_pte */
  
                                 if (pte_none(pte))
                                         goto cont_copy_pte_range_noset;
                                 /* pte contains position in swap, so copy. */
                                 if (!pte_present(pte)) {
-                                       if (!pte_file(pte))
+                                       if (!pte_file(pte)) {
                                                 swap_duplicate(pte_to_swp_entry(pte));
+                                               if (list_empty(&dst->mmlist)) {
+                                                       spin_lock(&mmlist_lock);
+                                                       list_add(&dst->mmlist,
+                                                                &src->mmlist);
+                                                       spin_unlock(&mmlist_lock);
+                                               }
+                                       }
                                         set_pte(dst_pte, pte);
                                         goto cont_copy_pte_range_noset;
                                 }
@@ -325,7 +337,10 @@ skip_copy_pte_range:
                                         pte = pte_mkclean(pte);
                                 pte = pte_mkold(pte);
                                 get_page(page);
-                               dst->rss++;
+                               // dst->rss++;
+                               vx_rsspages_inc(dst);
+                               if (PageAnon(page))
+                                       dst->anon_rss++;
                                 set_pte(dst_pte, pte);
                                 page_dup_rmap(page);
  cont_copy_pte_range_noset:
@@ -416,7 +431,9 @@ static void zap_pte_range(struct mmu_gather *tlb,
                                 set_pte(ptep, pgoff_to_pte(page->index));
                         if (pte_dirty(pte))
                                 set_page_dirty(page);
-                       if (pte_young(pte) && !PageAnon(page))
+                       if (PageAnon(page))
+                               tlb->mm->anon_rss--;
+                       else if (pte_young(pte))
                                 mark_page_accessed(page);
                         tlb->freed++;
                         page_remove_rmap(page);
@@ -727,19 +744,15 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                         pte_t *pte;
                         if (write) /* user gate pages are read-only */
                                 return i ? : -EFAULT;
-                       pgd = pgd_offset_gate(mm, pg);
-                       if (!pgd)
-                               return i ? : -EFAULT;
+                       if (pg > TASK_SIZE)
+                               pgd = pgd_offset_k(pg);
+                       else
+                               pgd = pgd_offset_gate(mm, pg);
+                       BUG_ON(pgd_none(*pgd));
                         pmd = pmd_offset(pgd, pg);
-                       if (!pmd)
-                               return i ? : -EFAULT;
+                       BUG_ON(pmd_none(*pmd));
                         pte = pte_offset_map(pmd, pg);
-                       if (!pte)
-                               return i ? : -EFAULT;
-                       if (!pte_present(*pte)) {
-                               pte_unmap(pte);
-                               return i ? : -EFAULT;
-                       }
+                       BUG_ON(pte_none(*pte));
                         if (pages) {
                                 pages[i] = pte_page(*pte);
                                 get_page(pages[i]);
@@ -753,7 +766,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                         continue;
                 }
  
-               if (!vma || (pages && (vma->vm_flags & VM_IO))
+               if (!vma || (vma->vm_flags & VM_IO)
                                 || !(flags & vma->vm_flags))
                         return i ? : -EFAULT;
  
@@ -910,16 +923,14 @@ int zeromap_page_range(struct vm_area_struct *vma, unsigned long address, unsign
   * in null mappings (currently treated as "copy-on-access")
   */
  static inline void remap_pte_range(pte_t * pte, unsigned long address, unsigned long size,
-       unsigned long phys_addr, pgprot_t prot)
+       unsigned long pfn, pgprot_t prot)
  {
         unsigned long end;
-       unsigned long pfn;
  
         address &= ~PMD_MASK;
         end = address + size;
         if (end > PMD_SIZE)
                 end = PMD_SIZE;
-       pfn = phys_addr >> PAGE_SHIFT;
         do {
                 BUG_ON(!pte_none(*pte));
                 if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn)))
@@ -931,7 +942,7 @@ static inline void remap_pte_range(pte_t * pte, unsigned long address, unsigned
  }
  
  static inline int remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size,
-       unsigned long phys_addr, pgprot_t prot)
+       unsigned long pfn, pgprot_t prot)
  {
         unsigned long base, end;
  
@@ -940,12 +951,12 @@ static inline int remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned lo
         end = address + size;
         if (end > PGDIR_SIZE)
                 end = PGDIR_SIZE;
-       phys_addr -= address;
+       pfn -= address >> PAGE_SHIFT;
         do {
                 pte_t * pte = pte_alloc_map(mm, pmd, base + address);
                 if (!pte)
                         return -ENOMEM;
-               remap_pte_range(pte, base + address, end - address, address + phys_addr, prot);
+               remap_pte_range(pte, base + address, end - address, pfn + (address >> PAGE_SHIFT), prot);
                 pte_unmap(pte);
                 address = (address + PMD_SIZE) & PMD_MASK;
                 pmd++;
@@ -954,7 +965,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned lo
  }
  
  /*  Note: this is only safe if the mm semaphore is held when called. */
-int remap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long phys_addr, unsigned long size, pgprot_t prot)
+int remap_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long pfn, unsigned long size, pgprot_t prot)
  {
         int error = 0;
         pgd_t * dir;
@@ -962,19 +973,28 @@ int remap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned lo
         unsigned long end = from + size;
         struct mm_struct *mm = vma->vm_mm;
  
-       phys_addr -= from;
+       pfn -= from >> PAGE_SHIFT;
         dir = pgd_offset(mm, from);
         flush_cache_range(vma, beg, end);
         if (from >= end)
                 BUG();
  
+       /*
+        * Physically remapped pages are special. Tell the
+        * rest of the world about it:
+        *   VM_IO tells people not to look at these pages
+        *      (accesses can have side effects).
+        *   VM_RESERVED tells swapout not to try to touch
+        *      this region.
+        */
+       vma->vm_flags |= VM_IO | VM_RESERVED;
         spin_lock(&mm->page_table_lock);
         do {
                 pmd_t *pmd = pmd_alloc(mm, dir, from);
                 error = -ENOMEM;
                 if (!pmd)
                         break;
-               error = remap_pmd_range(mm, pmd, from, end - from, phys_addr + from, prot);
+               error = remap_pmd_range(mm, pmd, from, end - from, pfn + (from >> PAGE_SHIFT), prot);
                 if (error)
                         break;
                 from = (from + PGDIR_SIZE) & PGDIR_MASK;
@@ -987,8 +1007,7 @@ int remap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned lo
         spin_unlock(&mm->page_table_lock);
         return error;
  }
-
-EXPORT_SYMBOL(remap_page_range);
+EXPORT_SYMBOL(remap_pfn_range);
  
  /*
   * Do pte_mkwrite, but only if the vma says VM_WRITE.  We do this when
@@ -1095,8 +1114,11 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
         spin_lock(&mm->page_table_lock);
         page_table = pte_offset_map(pmd, address);
         if (likely(pte_same(*page_table, pte))) {
+               if (PageAnon(old_page))
+                       mm->anon_rss--;
                 if (PageReserved(old_page))
-                       ++mm->rss;
+                       // ++mm->rss;
+                       vx_rsspages_inc(mm);
                 else
                         page_remove_rmap(old_page);
                 break_cow(vma, new_page, address, page_table);
@@ -1123,12 +1145,12 @@ no_new_page:
  static inline void unmap_mapping_range_list(struct prio_tree_root *root,
                                             struct zap_details *details)
  {
-       struct vm_area_struct *vma = NULL;
+       struct vm_area_struct *vma;
         struct prio_tree_iter iter;
         pgoff_t vba, vea, zba, zea;
  
-       while ((vma = vma_prio_tree_next(vma, root, &iter,
-                       details->first_index, details->last_index)) != NULL) {
+       vma_prio_tree_foreach(vma, &iter, root,
+                       details->first_index, details->last_index) {
                 vba = vma->vm_pgoff;
                 vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1;
                 /* Assume for now that PAGE_CACHE_SHIFT == PAGE_SHIFT */
@@ -1236,7 +1258,7 @@ int vmtruncate(struct inode * inode, loff_t offset)
         goto out_truncate;
  
  do_expand:
-       limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
+       limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
         if (limit != RLIM_INFINITY && offset > limit)
                 goto out_sig;
         if (offset > inode->i_sb->s_maxbytes)
@@ -1351,8 +1373,13 @@ static int do_swap_page(struct mm_struct * mm,
                 /* Had to read the page from swap area: Major fault */
                 ret = VM_FAULT_MAJOR;
                 inc_page_state(pgmajfault);
+               grab_swap_token();
         }
  
+       if (!vx_rsspages_avail(mm, 1)) {
+               ret = VM_FAULT_OOM;
+               goto out;
+       }
         mark_page_accessed(page);
         lock_page(page);
  
@@ -1377,7 +1404,8 @@ static int do_swap_page(struct mm_struct * mm,
         if (vm_swap_full())
                 remove_exclusive_swap_page(page);
  
-       mm->rss++;
+       // mm->rss++;
+       vx_rsspages_inc(mm);
         pte = mk_pte(page, vma->vm_page_prot);
         if (write_access && can_share_swap_page(page)) {
                 pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -1428,6 +1456,9 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
  
                 if (unlikely(anon_vma_prepare(vma)))
                         goto no_mem;
+               if (!vx_rsspages_avail(mm, 1))
+                       goto no_mem;
+
                 page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
                 if (!page)
                         goto no_mem;
@@ -1442,7 +1473,8 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                         spin_unlock(&mm->page_table_lock);
                         goto out;
                 }
-               mm->rss++;
+               // mm->rss++;
+               vx_rsspages_inc(mm);
                 entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
                                                          vma->vm_page_prot)),
                                       vma);
@@ -1505,6 +1537,8 @@ retry:
                 return VM_FAULT_SIGBUS;
         if (new_page == NOPAGE_OOM)
                 return VM_FAULT_OOM;
+       if (!vx_rsspages_avail(mm, 1))
+               return VM_FAULT_OOM;
  
         /*
          * Should we do an early C-O-W break?
@@ -1550,8 +1584,9 @@ retry:
          */
         /* Only go through if we didn't race with anybody else... */
         if (pte_none(*page_table)) {
-               if (!PageReserved(new_page))
-                       ++mm->rss;
+               if (!PageReserved(new_page)) 
+                       //++mm->rss;
+                       vx_rsspages_inc(mm);
                 flush_icache_page(vma, new_page);
                 entry = mk_pte(new_page, vma->vm_page_prot);
                 if (write_access)
@@ -1693,15 +1728,20 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
          * We need the page table lock to synchronize with kswapd
          * and the SMP-safe atomic PTE updates.
          */
+       set_delay_flag(current,PF_MEMIO);
         spin_lock(&mm->page_table_lock);
         pmd = pmd_alloc(mm, pgd, address);
  
         if (pmd) {
                 pte_t * pte = pte_alloc_map(mm, pmd, address);
-               if (pte)
-                       return handle_pte_fault(mm, vma, address, write_access, pte, pmd);
+               if (pte) {
+                       int rc = handle_pte_fault(mm, vma, address, write_access, pte, pmd);
+                       clear_delay_flag(current,PF_MEMIO);
+                       return rc;
+               }
         }
         spin_unlock(&mm->page_table_lock);
+       clear_delay_flag(current,PF_MEMIO);
         return VM_FAULT_OOM;
  }
  
@@ -1743,6 +1783,8 @@ int make_pages_present(unsigned long addr, unsigned long end)
         struct vm_area_struct * vma;
  
         vma = find_vma(current->mm, addr);
+       if (!vma)
+               return -1;
         write = (vma->vm_flags & VM_WRITE) != 0;
         if (addr >= end)
                 BUG();
@@ -1770,13 +1812,11 @@ struct page * vmalloc_to_page(void * vmalloc_addr)
         if (!pgd_none(*pgd)) {
                 pmd = pmd_offset(pgd, addr);
                 if (!pmd_none(*pmd)) {
-                       preempt_disable();
                         ptep = pte_offset_map(pmd, addr);
                         pte = *ptep;
                         if (pte_present(pte))
                                 page = pte_page(pte);
                         pte_unmap(ptep);
-                       preempt_enable();
                 }
         }
         return page;
@@ -1784,6 +1824,16 @@ struct page * vmalloc_to_page(void * vmalloc_addr)
  
  EXPORT_SYMBOL(vmalloc_to_page);
  
+/*
+ * Map a vmalloc()-space virtual address to the physical page frame number.
+ */
+unsigned long vmalloc_to_pfn(void * vmalloc_addr)
+{
+       return page_to_pfn(vmalloc_to_page(vmalloc_addr));
+}
+
+EXPORT_SYMBOL(vmalloc_to_pfn);
+
  #if !defined(CONFIG_ARCH_GATE_AREA)
  
  #if defined(AT_SYSINFO_EHDR)