#endif
unsigned long num_physpages;
+/*
+ * A number of key systems in x86 including ioremap() rely on the assumption
+ * that high_memory defines the upper bound on direct map memory, then end
+ * of ZONE_NORMAL. Under CONFIG_DISCONTIG this means that max_low_pfn and
+ * highstart_pfn must be the same; there must be no gap between ZONE_NORMAL
+ * and ZONE_HIGHMEM.
+ */
void * high_memory;
struct page *highmem_start_page;
+unsigned long vmalloc_earlyreserve;
EXPORT_SYMBOL(num_physpages);
EXPORT_SYMBOL(highmem_start_page);
EXPORT_SYMBOL(high_memory);
+EXPORT_SYMBOL(vmalloc_earlyreserve);
/*
* We special-case the C-O-W ZERO_PAGE, because it's such
pte_free_tlb(tlb, page);
}
-static inline void free_one_pgd(struct mmu_gather *tlb, pgd_t * dir)
+static inline void free_one_pgd(struct mmu_gather *tlb, pgd_t * dir,
+ int pgd_idx)
{
int j;
pmd_t * pmd;
}
pmd = pmd_offset(dir, 0);
pgd_clear(dir);
- for (j = 0; j < PTRS_PER_PMD ; j++)
+ for (j = 0; j < PTRS_PER_PMD ; j++) {
+ if (pgd_idx * PGDIR_SIZE + j * PMD_SIZE >= TASK_SIZE)
+ break;
free_one_pmd(tlb, pmd+j);
+ }
pmd_free_tlb(tlb, pmd);
}
void clear_page_tables(struct mmu_gather *tlb, unsigned long first, int nr)
{
pgd_t * page_dir = tlb->mm->pgd;
+ int pgd_idx = first;
page_dir += first;
do {
- free_one_pgd(tlb, page_dir);
+ free_one_pgd(tlb, page_dir, pgd_idx);
page_dir++;
+ pgd_idx++;
} while (--nr);
}
struct page *page;
unsigned long pfn;
+ if (!vx_rsspages_avail(dst, 1)) {
+ spin_unlock(&src->page_table_lock);
+ goto nomem;
+ }
/* copy_one_pte */
if (pte_none(pte))
pte = pte_mkclean(pte);
pte = pte_mkold(pte);
get_page(page);
- dst->rss++;
+ // dst->rss++;
+ vx_rsspages_inc(dst);
set_pte(dst_pte, pte);
page_dup_rmap(page);
cont_copy_pte_range_noset:
set_pte(ptep, pgoff_to_pte(page->index));
if (pte_dirty(pte))
set_page_dirty(page);
- if (pte_young(pte) && page_mapping(page))
+ if (pte_young(pte) && !PageAnon(page))
mark_page_accessed(page);
tlb->freed++;
page_remove_rmap(page);
unsigned long size, struct zap_details *details)
{
pmd_t * pmd;
- unsigned long end;
+ unsigned long end, pgd_boundary;
if (pgd_none(*dir))
return;
}
pmd = pmd_offset(dir, address);
end = address + size;
- if (end > ((address + PGDIR_SIZE) & PGDIR_MASK))
- end = ((address + PGDIR_SIZE) & PGDIR_MASK);
+ pgd_boundary = ((address + PGDIR_SIZE) & PGDIR_MASK);
+ if (pgd_boundary && (end > pgd_boundary))
+ end = pgd_boundary;
do {
zap_pte_range(tlb, pmd, address, end - address, details);
address = (address + PMD_SIZE) & PMD_MASK;
tlb_end_vma(tlb, vma);
}
+#ifdef CONFIG_PREEMPT_VOLUNTARY
+# define ZAP_BLOCK_SIZE (128 * PAGE_SIZE)
+#else
+
/* Dispose of an entire struct mmu_gather per rescheduling point */
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT)
#define ZAP_BLOCK_SIZE (FREE_PTE_NR * PAGE_SIZE)
#define ZAP_BLOCK_SIZE (1024 * PAGE_SIZE)
#endif
+#endif
+
/**
* unmap_vmas - unmap a range of memory covered by a list of vma's
* @tlbp: address of the caller's struct mmu_gather
start += block;
zap_bytes -= block;
- if ((long)zap_bytes > 0)
- continue;
if (!atomic && need_resched()) {
int fullmm = tlb_is_full_mm(*tlbp);
tlb_finish_mmu(*tlbp, tlb_start, start);
*tlbp = tlb_gather_mmu(mm, fullmm);
tlb_start_valid = 0;
}
+ if ((long)zap_bytes > 0)
+ continue;
zap_bytes = ZAP_BLOCK_SIZE;
}
}
return NULL;
}
+struct page *
+follow_page_pfn(struct mm_struct *mm, unsigned long address, int write,
+ unsigned long *pfn_ptr)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *ptep, pte;
+ unsigned long pfn;
+ struct page *page;
+
+ *pfn_ptr = 0;
+ page = follow_huge_addr(mm, address, write);
+ if (!IS_ERR(page))
+ return page;
+
+ pgd = pgd_offset(mm, address);
+ if (pgd_none(*pgd) || pgd_bad(*pgd))
+ goto out;
+
+ pmd = pmd_offset(pgd, address);
+ if (pmd_none(*pmd))
+ goto out;
+ if (pmd_huge(*pmd))
+ return follow_huge_pmd(mm, address, pmd, write);
+ if (pmd_bad(*pmd))
+ goto out;
+
+ ptep = pte_offset_map(pmd, address);
+ if (!ptep)
+ goto out;
+
+ pte = *ptep;
+ pte_unmap(ptep);
+ if (pte_present(pte)) {
+ if (write && !pte_write(pte))
+ goto out;
+ if (write && !pte_dirty(pte)) {
+ struct page *page = pte_page(pte);
+ if (!PageDirty(page))
+ set_page_dirty(page);
+ }
+ pfn = pte_pfn(pte);
+ if (pfn_valid(pfn)) {
+ struct page *page = pfn_to_page(pfn);
+
+ mark_page_accessed(page);
+ return page;
+ } else {
+ *pfn_ptr = pfn;
+ return NULL;
+ }
+ }
+
+out:
+ return NULL;
+}
+
+
/*
* Given a physical address, is there a useful struct page pointing to
* it? This may become more complex in the future if we start dealing
static inline struct page *get_page_map(struct page *page)
{
if (!pfn_valid(page_to_pfn(page)))
- return 0;
+ return NULL;
return page;
}
+#ifndef CONFIG_X86_4G
static inline int
untouched_anonymous_page(struct mm_struct* mm, struct vm_area_struct *vma,
unsigned long address)
/* There is a pte slot for 'address' in 'mm'. */
return 0;
}
+#endif
int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
pte_t *pte;
if (write) /* user gate pages are read-only */
return i ? : -EFAULT;
- pgd = pgd_offset_k(pg);
+ pgd = pgd_offset_gate(mm, pg);
if (!pgd)
return i ? : -EFAULT;
pmd = pmd_offset(pgd, pg);
if (!pmd)
return i ? : -EFAULT;
- pte = pte_offset_kernel(pmd, pg);
- if (!pte || !pte_present(*pte))
+ pte = pte_offset_map(pmd, pg);
+ if (!pte)
return i ? : -EFAULT;
+ if (!pte_present(*pte)) {
+ pte_unmap(pte);
+ return i ? : -EFAULT;
+ }
if (pages) {
pages[i] = pte_page(*pte);
get_page(pages[i]);
}
+ pte_unmap(pte);
if (vmas)
vmas[i] = gate_vma;
i++;
* insanly big anonymously mapped areas that
* nobody touched so far. This is important
* for doing a core dump for these mappings.
+ *
+ * disable this for 4:4 - it prevents
+ * follow_page() from ever seeing these pages.
+ *
+ * (The 'fix' is dubious anyway, there's
+ * nothing that this code avoids which couldnt
+ * be triggered from userspace anyway.)
*/
+#ifndef CONFIG_X86_4G
if (!lookup_write &&
untouched_anonymous_page(mm,vma,start)) {
map = ZERO_PAGE(start);
break;
}
+#endif
spin_unlock(&mm->page_table_lock);
switch (handle_mm_fault(mm,vma,start,write)) {
case VM_FAULT_MINOR:
/*
* Ok, we need to copy. Oh, well..
*/
- page_cache_get(old_page);
+ if (!PageReserved(old_page))
+ page_cache_get(old_page);
spin_unlock(&mm->page_table_lock);
if (unlikely(anon_vma_prepare(vma)))
page_table = pte_offset_map(pmd, address);
if (likely(pte_same(*page_table, pte))) {
if (PageReserved(old_page))
- ++mm->rss;
+ // ++mm->rss;
+ vx_rsspages_inc(mm);
else
page_remove_rmap(old_page);
break_cow(vma, new_page, address, page_table);
if (inode->i_size < offset)
goto do_expand;
+ /*
+ * truncation of in-use swapfiles is disallowed - it would cause
+ * subsequent swapout to scribble on the now-freed blocks.
+ */
+ if (IS_SWAPFILE(inode))
+ goto out_busy;
i_size_write(inode, offset);
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
truncate_inode_pages(mapping, offset);
if (limit != RLIM_INFINITY && offset > limit)
goto out_sig;
if (offset > inode->i_sb->s_maxbytes)
- goto out;
+ goto out_big;
i_size_write(inode, offset);
out_truncate:
return 0;
out_sig:
send_sig(SIGXFSZ, current, 0);
-out:
+out_big:
return -EFBIG;
+out_busy:
+ return -ETXTBSY;
}
EXPORT_SYMBOL(vmtruncate);
/* Had to read the page from swap area: Major fault */
ret = VM_FAULT_MAJOR;
inc_page_state(pgmajfault);
+ grab_swap_token();
}
+ if (!vx_rsspages_avail(mm, 1)) {
+ ret = VM_FAULT_OOM;
+ goto out;
+ }
mark_page_accessed(page);
lock_page(page);
if (vm_swap_full())
remove_exclusive_swap_page(page);
- mm->rss++;
+ // mm->rss++;
+ vx_rsspages_inc(mm);
pte = mk_pte(page, vma->vm_page_prot);
if (write_access && can_share_swap_page(page)) {
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
if (unlikely(anon_vma_prepare(vma)))
goto no_mem;
+ if (!vx_rsspages_avail(mm, 1))
+ goto no_mem;
+
page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
if (!page)
goto no_mem;
spin_unlock(&mm->page_table_lock);
goto out;
}
- mm->rss++;
+ // mm->rss++;
+ vx_rsspages_inc(mm);
entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
vma->vm_page_prot)),
vma);
return VM_FAULT_SIGBUS;
if (new_page == NOPAGE_OOM)
return VM_FAULT_OOM;
+ if (!vx_rsspages_avail(mm, 1))
+ return VM_FAULT_OOM;
/*
* Should we do an early C-O-W break?
* We need the page table lock to synchronize with kswapd
* and the SMP-safe atomic PTE updates.
*/
+ set_delay_flag(current,PF_MEMIO);
spin_lock(&mm->page_table_lock);
pmd = pmd_alloc(mm, pgd, address);
if (pmd) {
pte_t * pte = pte_alloc_map(mm, pmd, address);
- if (pte)
- return handle_pte_fault(mm, vma, address, write_access, pte, pmd);
+ if (pte) {
+ int rc = handle_pte_fault(mm, vma, address, write_access, pte, pmd);
+ clear_delay_flag(current,PF_MEMIO);
+ return rc;
+ }
}
spin_unlock(&mm->page_table_lock);
+ clear_delay_flag(current,PF_MEMIO);
return VM_FAULT_OOM;
}
#ifdef AT_SYSINFO_EHDR
return &gate_vma;
#else
- return 0;
+ return NULL;
#endif
}