#include <linux/swap.h>
#include <linux/fs.h>
#include <linux/highmem.h>
-#include <linux/rmap.h>
#include <linux/security.h>
+#include <linux/acct.h>
+#include <linux/syscalls.h>
+#include <linux/vs_memory.h>
#include <asm/uaccess.h>
-#include <asm/pgalloc.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
static pte_t *get_one_pte_map_nested(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte = NULL;
pgd = pgd_offset(mm, addr);
if (pgd_none(*pgd))
goto end;
- if (pgd_bad(*pgd)) {
- pgd_ERROR(*pgd);
- pgd_clear(pgd);
+
+ pud = pud_offset(pgd, addr);
+ if (pud_none(*pud))
+ goto end;
+ if (pud_bad(*pud)) {
+ pud_ERROR(*pud);
+ pud_clear(pud);
goto end;
}
- pmd = pmd_offset(pgd, addr);
+ pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd))
goto end;
if (pmd_bad(*pmd)) {
return pte;
}
-static inline int page_table_present(struct mm_struct *mm, unsigned long addr)
+static pte_t *get_one_pte_map(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
+ pud_t *pud;
pmd_t *pmd;
pgd = pgd_offset(mm, addr);
if (pgd_none(*pgd))
- return 0;
- pmd = pmd_offset(pgd, addr);
- return pmd_present(*pmd);
+ return NULL;
+
+ pud = pud_offset(pgd, addr);
+ if (pud_none(*pud))
+ return NULL;
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_present(*pmd))
+ return NULL;
+ return pte_offset_map(pmd, addr);
}
static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr)
{
+ pgd_t *pgd;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte = NULL;
- pmd = pmd_alloc(mm, pgd_offset(mm, addr), addr);
+ pgd = pgd_offset(mm, addr);
+
+ pud = pud_alloc(mm, pgd, addr);
+ if (!pud)
+ return NULL;
+ pmd = pmd_alloc(mm, pud, addr);
if (pmd)
pte = pte_alloc_map(mm, pmd, addr);
return pte;
}
-static void
-copy_one_pte(struct vm_area_struct *vma, unsigned long old_addr,
- pte_t *src, pte_t *dst, struct pte_chain **pte_chainp)
-{
- pte_t pte = ptep_clear_flush(vma, old_addr, src);
- set_pte(dst, pte);
-
- if (pte_present(pte)) {
- unsigned long pfn = pte_pfn(pte);
- if (pfn_valid(pfn)) {
- struct page *page = pfn_to_page(pfn);
- page_remove_rmap(page, src);
- *pte_chainp = page_add_rmap(page, dst, *pte_chainp);
- }
- }
-}
-
static int
move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
- unsigned long new_addr)
+ struct vm_area_struct *new_vma, unsigned long new_addr)
{
+ struct address_space *mapping = NULL;
struct mm_struct *mm = vma->vm_mm;
int error = 0;
pte_t *src, *dst;
- struct pte_chain *pte_chain;
- pte_chain = pte_chain_alloc(GFP_KERNEL);
- if (!pte_chain) {
- error = -ENOMEM;
- goto out;
+ if (vma->vm_file) {
+ /*
+ * Subtle point from Rajesh Venkatasubramanian: before
+ * moving file-based ptes, we must lock vmtruncate out,
+ * since it might clean the dst vma before the src vma,
+ * and we propagate stale pages into the dst afterward.
+ */
+ mapping = vma->vm_file->f_mapping;
+ spin_lock(&mapping->i_mmap_lock);
+ if (new_vma->vm_truncate_count &&
+ new_vma->vm_truncate_count != vma->vm_truncate_count)
+ new_vma->vm_truncate_count = 0;
}
spin_lock(&mm->page_table_lock);
+
src = get_one_pte_map_nested(mm, old_addr);
if (src) {
/*
* memory allocation. If it does then we need to drop the
* atomic kmap
*/
- if (!page_table_present(mm, new_addr)) {
+ dst = get_one_pte_map(mm, new_addr);
+ if (unlikely(!dst)) {
pte_unmap_nested(src);
- src = NULL;
- }
- dst = alloc_one_pte_map(mm, new_addr);
- if (src == NULL)
+ if (mapping)
+ spin_unlock(&mapping->i_mmap_lock);
+ dst = alloc_one_pte_map(mm, new_addr);
+ if (mapping && !spin_trylock(&mapping->i_mmap_lock)) {
+ spin_unlock(&mm->page_table_lock);
+ spin_lock(&mapping->i_mmap_lock);
+ spin_lock(&mm->page_table_lock);
+ }
src = get_one_pte_map_nested(mm, old_addr);
+ }
/*
* Since alloc_one_pte_map can drop and re-acquire
* page_table_lock, we should re-check the src entry...
*/
if (src) {
- if (dst)
- copy_one_pte(vma, old_addr, src,
- dst, &pte_chain);
- else
+ if (dst) {
+ pte_t pte;
+ pte = ptep_clear_flush(vma, old_addr, src);
+ set_pte(dst, pte);
+ } else
error = -ENOMEM;
pte_unmap_nested(src);
}
pte_unmap(dst);
}
spin_unlock(&mm->page_table_lock);
- pte_chain_free(pte_chain);
-out:
+ if (mapping)
+ spin_unlock(&mapping->i_mmap_lock);
return error;
}
static unsigned long move_page_tables(struct vm_area_struct *vma,
- unsigned long new_addr, unsigned long old_addr, unsigned long len)
+ unsigned long old_addr, struct vm_area_struct *new_vma,
+ unsigned long new_addr, unsigned long len)
{
unsigned long offset;
* only a few pages.. This also makes error recovery easier.
*/
for (offset = 0; offset < len; offset += PAGE_SIZE) {
- if (move_one_page(vma, old_addr+offset, new_addr+offset) < 0)
+ if (move_one_page(vma, old_addr + offset,
+ new_vma, new_addr + offset) < 0)
break;
+ cond_resched();
}
return offset;
}
unsigned long new_len, unsigned long new_addr)
{
struct mm_struct *mm = vma->vm_mm;
- struct address_space *mapping = NULL;
struct vm_area_struct *new_vma;
unsigned long vm_flags = vma->vm_flags;
unsigned long new_pgoff;
if (!new_vma)
return -ENOMEM;
- if (vma->vm_file) {
- /*
- * Subtle point from Rajesh Venkatasubramanian: before
- * moving file-based ptes, we must lock vmtruncate out,
- * since it might clean the dst vma before the src vma,
- * and we propagate stale pages into the dst afterward.
- */
- mapping = vma->vm_file->f_mapping;
- down(&mapping->i_shared_sem);
- }
- moved_len = move_page_tables(vma, new_addr, old_addr, old_len);
+ moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len);
if (moved_len < old_len) {
/*
* On error, move entries back from new area to old,
* which will succeed since page tables still there,
* and then proceed to unmap new area instead of old.
*/
- move_page_tables(new_vma, old_addr, new_addr, moved_len);
+ move_page_tables(new_vma, new_addr, vma, old_addr, moved_len);
vma = new_vma;
old_len = new_len;
old_addr = new_addr;
new_addr = -ENOMEM;
}
- if (mapping)
- up(&mapping->i_shared_sem);
/* Conceal VM_ACCOUNT so old reservation is not undone */
if (vm_flags & VM_ACCOUNT) {
// mm->total_vm += new_len >> PAGE_SHIFT;
vx_vmpages_add(mm, new_len >> PAGE_SHIFT);
+ __vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
if (vm_flags & VM_LOCKED) {
// mm->locked_vm += new_len >> PAGE_SHIFT;
vx_vmlocked_add(mm, new_len >> PAGE_SHIFT);
new_addr + new_len);
}
+ acct_update_integrals();
+ update_mem_hiwater();
+
return new_addr;
}
goto out;
}
if (vma->vm_flags & VM_LOCKED) {
- unsigned long locked = current->mm->locked_vm << PAGE_SHIFT;
+ unsigned long locked, lock_limit;
+ locked = current->mm->locked_vm << PAGE_SHIFT;
+ lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
locked += new_len - old_len;
ret = -EAGAIN;
- if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+ goto out;
+ ret = -ENOMEM;
+ if (!vx_vmlocked_avail(current->mm,
+ (new_len - old_len) >> PAGE_SHIFT))
goto out;
}
ret = -ENOMEM;
if ((current->mm->total_vm << PAGE_SHIFT) + (new_len - old_len)
- > current->rlim[RLIMIT_AS].rlim_cur)
+ > current->signal->rlim[RLIMIT_AS].rlim_cur)
goto out;
/* check context space, maybe only Private writable mapping? */
if (!vx_vmpages_avail(current->mm, (new_len - old_len) >> PAGE_SHIFT))
/* can we just expand the current mapping? */
if (max_addr - addr >= new_len) {
int pages = (new_len - old_len) >> PAGE_SHIFT;
- spin_lock(&vma->vm_mm->page_table_lock);
- vma->vm_end = addr + new_len;
- spin_unlock(&vma->vm_mm->page_table_lock);
+
+ vma_adjust(vma, vma->vm_start,
+ addr + new_len, vma->vm_pgoff, NULL);
+
// current->mm->total_vm += pages;
vx_vmpages_add(current->mm, pages);
+ __vm_stat_account(vma->vm_mm, vma->vm_flags,
+ vma->vm_file, pages);
if (vma->vm_flags & VM_LOCKED) {
// current->mm->locked_vm += pages;
- vx_vmlocked_add(current->mm, pages);
+ vx_vmlocked_add(vma->vm_mm, pages);
make_pages_present(addr + old_len,
addr + new_len);
}
+ acct_update_integrals();
+ update_mem_hiwater();
ret = addr;
goto out;
}