This commit was manufactured by cvs2svn to create tag
[linux-2.6.git] / mm / memory.c
index 3df1f05..44f80c9 100644 (file)
@@ -108,7 +108,8 @@ static inline void free_one_pmd(struct mmu_gather *tlb, pmd_t * dir)
        pte_free_tlb(tlb, page);
 }
 
-static inline void free_one_pgd(struct mmu_gather *tlb, pgd_t * dir)
+static inline void free_one_pgd(struct mmu_gather *tlb, pgd_t * dir,
+                                                       int pgd_idx)
 {
        int j;
        pmd_t * pmd;
@@ -122,8 +123,11 @@ static inline void free_one_pgd(struct mmu_gather *tlb, pgd_t * dir)
        }
        pmd = pmd_offset(dir, 0);
        pgd_clear(dir);
-       for (j = 0; j < PTRS_PER_PMD ; j++)
+       for (j = 0; j < PTRS_PER_PMD ; j++) {
+               if (pgd_idx * PGDIR_SIZE + j * PMD_SIZE >= TASK_SIZE)
+                       break;
                free_one_pmd(tlb, pmd+j);
+       }
        pmd_free_tlb(tlb, pmd);
 }
 
@@ -136,11 +140,13 @@ static inline void free_one_pgd(struct mmu_gather *tlb, pgd_t * dir)
 void clear_page_tables(struct mmu_gather *tlb, unsigned long first, int nr)
 {
        pgd_t * page_dir = tlb->mm->pgd;
+       int pgd_idx = first;
 
        page_dir += first;
        do {
-               free_one_pgd(tlb, page_dir);
+               free_one_pgd(tlb, page_dir, pgd_idx);
                page_dir++;
+               pgd_idx++;
        } while (--nr);
 }
 
@@ -273,6 +279,10 @@ skip_copy_pte_range:
                                struct page *page;
                                unsigned long pfn;
 
+                               if (!vx_rsspages_avail(dst, 1)) {
+                                       spin_unlock(&src->page_table_lock);
+                                       goto nomem;
+                               }
                                /* copy_one_pte */
 
                                if (pte_none(pte))
@@ -316,7 +326,8 @@ skip_copy_pte_range:
                                        pte = pte_mkclean(pte);
                                pte = pte_mkold(pte);
                                get_page(page);
-                               dst->rss++;
+                               // dst->rss++;
+                               vx_rsspages_inc(dst);
                                set_pte(dst_pte, pte);
                                page_dup_rmap(page);
 cont_copy_pte_range_noset:
@@ -432,7 +443,7 @@ static void zap_pmd_range(struct mmu_gather *tlb,
                unsigned long size, struct zap_details *details)
 {
        pmd_t * pmd;
-       unsigned long end;
+       unsigned long end, pgd_boundary;
 
        if (pgd_none(*dir))
                return;
@@ -443,8 +454,9 @@ static void zap_pmd_range(struct mmu_gather *tlb,
        }
        pmd = pmd_offset(dir, address);
        end = address + size;
-       if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
-               end = ((address + PGDIR_SIZE) & PGDIR_MASK);
+       pgd_boundary = ((address + PGDIR_SIZE) & PGDIR_MASK);
+       if (pgd_boundary && (end > pgd_boundary))
+               end = pgd_boundary;
        do {
                zap_pte_range(tlb, pmd, address, end - address, details);
                address = (address + PMD_SIZE) & PMD_MASK; 
@@ -637,13 +649,74 @@ follow_page(struct mm_struct *mm, unsigned long address, int write)
        if (pte_present(pte)) {
                if (write && !pte_write(pte))
                        goto out;
+               if (write && !pte_dirty(pte)) {
+                       struct page *page = pte_page(pte);
+                       if (!PageDirty(page))
+                               set_page_dirty(page);
+               }
                pfn = pte_pfn(pte);
                if (pfn_valid(pfn)) {
-                       page = pfn_to_page(pfn);
-                       if (write && !pte_dirty(pte) && !PageDirty(page))
+                       struct page *page = pfn_to_page(pfn);
+                       
+                       mark_page_accessed(page);
+                       return page;
+               }
+       }
+
+out:
+       return NULL;
+}
+
+struct page *
+follow_page_pfn(struct mm_struct *mm, unsigned long address, int write,
+               unsigned long *pfn_ptr)
+{
+       pgd_t *pgd;
+       pmd_t *pmd;
+       pte_t *ptep, pte;
+       unsigned long pfn;
+       struct page *page;
+
+       *pfn_ptr = 0;
+       page = follow_huge_addr(mm, address, write);
+       if (!IS_ERR(page))
+               return page;
+
+       pgd = pgd_offset(mm, address);
+       if (pgd_none(*pgd) || pgd_bad(*pgd))
+               goto out;
+
+       pmd = pmd_offset(pgd, address);
+       if (pmd_none(*pmd))
+               goto out;
+       if (pmd_huge(*pmd))
+               return follow_huge_pmd(mm, address, pmd, write);
+       if (pmd_bad(*pmd))
+               goto out;
+
+       ptep = pte_offset_map(pmd, address);
+       if (!ptep)
+               goto out;
+
+       pte = *ptep;
+       pte_unmap(ptep);
+       if (pte_present(pte)) {
+               if (write && !pte_write(pte))
+                       goto out;
+               if (write && !pte_dirty(pte)) {
+                       struct page *page = pte_page(pte);
+                       if (!PageDirty(page))
                                set_page_dirty(page);
+               }
+               pfn = pte_pfn(pte);
+               if (pfn_valid(pfn)) {
+                       struct page *page = pfn_to_page(pfn);
+                       
                        mark_page_accessed(page);
                        return page;
+               } else {
+                       *pfn_ptr = pfn;
+                       return NULL;
                }
        }
 
@@ -651,6 +724,7 @@ out:
        return NULL;
 }
 
+
 /* 
  * Given a physical address, is there a useful struct page pointing to
  * it?  This may become more complex in the future if we start dealing
@@ -1081,7 +1155,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
        page_table = pte_offset_map(pmd, address);
        if (likely(pte_same(*page_table, pte))) {
                if (PageReserved(old_page))
-                       ++mm->rss;
+                       // ++mm->rss;
+                       vx_rsspages_inc(mm);
                else
                        page_remove_rmap(old_page);
                break_cow(vma, new_page, address, page_table);
@@ -1330,6 +1405,10 @@ static int do_swap_page(struct mm_struct * mm,
                inc_page_state(pgmajfault);
        }
 
+       if (!vx_rsspages_avail(mm, 1)) {
+               ret = VM_FAULT_OOM;
+               goto out;
+       }
        mark_page_accessed(page);
        lock_page(page);
 
@@ -1354,7 +1433,8 @@ static int do_swap_page(struct mm_struct * mm,
        if (vm_swap_full())
                remove_exclusive_swap_page(page);
 
-       mm->rss++;
+       // mm->rss++;
+       vx_rsspages_inc(mm);
        pte = mk_pte(page, vma->vm_page_prot);
        if (write_access && can_share_swap_page(page)) {
                pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -1394,6 +1474,11 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
        pte_t entry;
        struct page * page = ZERO_PAGE(addr);
 
+       if (!vx_rsspages_avail(mm, 1)) {
+               spin_unlock(&mm->page_table_lock);
+               return VM_FAULT_OOM;
+       }
+
        /* Read-only mapping of ZERO_PAGE. */
        entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
 
@@ -1419,7 +1504,8 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                        spin_unlock(&mm->page_table_lock);
                        goto out;
                }
-               mm->rss++;
+               // mm->rss++;
+               vx_rsspages_inc(mm);
                entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
                                                         vma->vm_page_prot)),
                                      vma);
@@ -1482,6 +1568,8 @@ retry:
                return VM_FAULT_SIGBUS;
        if (new_page == NOPAGE_OOM)
                return VM_FAULT_OOM;
+       if (!vx_rsspages_avail(mm, 1))
+               return VM_FAULT_OOM;
 
        /*
         * Should we do an early C-O-W break?
@@ -1528,7 +1616,8 @@ retry:
        /* Only go through if we didn't race with anybody else... */
        if (pte_none(*page_table)) {
                if (!PageReserved(new_page))
-                       ++mm->rss;
+                       // ++mm->rss;
+                       vx_rsspages_inc(mm);
                flush_icache_page(vma, new_page);
                entry = mk_pte(new_page, vma->vm_page_prot);
                if (write_access)
@@ -1670,15 +1759,20 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
         * We need the page table lock to synchronize with kswapd
         * and the SMP-safe atomic PTE updates.
         */
+       set_delay_flag(current,PF_MEMIO);
        spin_lock(&mm->page_table_lock);
        pmd = pmd_alloc(mm, pgd, address);
 
        if (pmd) {
                pte_t * pte = pte_alloc_map(mm, pmd, address);
-               if (pte)
-                       return handle_pte_fault(mm, vma, address, write_access, pte, pmd);
+               if (pte) {
+                       int rc = handle_pte_fault(mm, vma, address, write_access, pte, pmd);
+                       clear_delay_flag(current,PF_MEMIO);
+                       return rc;
+               }
        }
        spin_unlock(&mm->page_table_lock);
+       clear_delay_flag(current,PF_MEMIO);
        return VM_FAULT_OOM;
 }