#include <linux/hugetlb.h>
#include <linux/profile.h>
#include <linux/module.h>
-#include <linux/acct.h>
#include <linux/mount.h>
#include <linux/mempolicy.h>
#include <linux/rmap.h>
#include <asm/cacheflush.h>
#include <asm/tlb.h>
+static void unmap_region(struct mm_struct *mm,
+ struct vm_area_struct *vma, struct vm_area_struct *prev,
+ unsigned long start, unsigned long end);
+
/*
* WARNING: the debugging will use recursive algorithms so never enable this
* unless you know what you are doing.
int error;
struct rb_node ** rb_link, * rb_parent;
int accountable = 1;
- unsigned long charged = 0;
+ unsigned long charged = 0, reqprot = prot;
if (file) {
if (is_file_hugepages(file))
prot |= PROT_EXEC;
if (!len)
- return addr;
+ return -EINVAL;
/* Careful about overflows.. */
len = PAGE_ALIGN(len);
if (!len || len > TASK_SIZE)
- return -EINVAL;
+ return -ENOMEM;
/* offset overflow? */
if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
- return -EINVAL;
+ return -EOVERFLOW;
/* Too many mappings? */
if (mm->map_count > sysctl_max_map_count)
/* mlock MCL_FUTURE? */
if (vm_flags & VM_LOCKED) {
unsigned long locked, lock_limit;
- locked = mm->locked_vm << PAGE_SHIFT;
+ locked = len >> PAGE_SHIFT;
+ locked += mm->locked_vm;
lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
- locked += len;
+ lock_limit >>= PAGE_SHIFT;
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
return -EAGAIN;
}
}
}
- error = security_file_mmap(file, prot, flags);
+ error = security_file_mmap(file, reqprot, prot, flags);
if (error)
return error;
}
/* Check against address space limit. */
- if ((mm->total_vm << PAGE_SHIFT) + len
- > current->signal->rlim[RLIMIT_AS].rlim_cur)
- return -ENOMEM;
-
- /* check context space, maybe only Private writable mapping? */
- if (!vx_vmpages_avail(mm, len >> PAGE_SHIFT))
+ if (!may_expand_vm(mm, len >> PAGE_SHIFT))
return -ENOMEM;
if (accountable && (!(flags & MAP_NORESERVE) ||
pgoff, flags & MAP_NONBLOCK);
down_write(&mm->mmap_sem);
}
- acct_update_integrals();
- update_mem_hiwater();
return addr;
unmap_and_free_vma:
fput(file);
/* Undo any partial mapping done by a device driver. */
- zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
+ unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
+ charged = 0;
free_vma:
kmem_cache_free(vm_area_cachep, vma);
unacct_error:
const unsigned long len, const unsigned long pgoff,
const unsigned long flags)
{
- struct vm_area_struct *vma, *prev_vma;
+ struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
- unsigned long base = mm->mmap_base, addr = addr0;
- int first_time = 1;
+ unsigned long addr = addr0;
/* requested length too big for entire address space */
if (len > TASK_SIZE)
return -ENOMEM;
- /* dont allow allocations above current base */
- if (mm->free_area_cache > base)
- mm->free_area_cache = base;
-
/* requesting a specific address */
if (addr) {
addr = PAGE_ALIGN(addr);
return addr;
}
-try_again:
+ /* either no address requested or can't fit in requested address hole */
+ addr = mm->free_area_cache;
+
/* make sure it can fit in the remaining address space */
- if (mm->free_area_cache < len)
- goto fail;
+ if (addr > len) {
+ vma = find_vma(mm, addr-len);
+ if (!vma || addr <= vma->vm_start)
+ /* remember the address as a hint for next time */
+ return (mm->free_area_cache = addr-len);
+ }
+
+ addr = mm->mmap_base-len;
- /* either no address requested or cant fit in requested address hole */
- addr = (mm->free_area_cache - len) & PAGE_MASK;
do {
/*
* Lookup failure means no vma is above this address,
- * i.e. return with success:
+ * else if new region fits below vma->vm_start,
+ * return with success:
*/
- if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
- return addr;
-
- /*
- * new region fits between prev_vma->vm_end and
- * vma->vm_start, use it:
- */
- if (addr+len <= vma->vm_start &&
- (!prev_vma || (addr >= prev_vma->vm_end)))
+ vma = find_vma(mm, addr);
+ if (!vma || addr+len <= vma->vm_start)
/* remember the address as a hint for next time */
return (mm->free_area_cache = addr);
- else
- /* pull free_area_cache down to the first hole */
- if (mm->free_area_cache == vma->vm_end)
- mm->free_area_cache = vma->vm_start;
/* try just below the current vma->vm_start */
addr = vma->vm_start-len;
- } while (len <= vma->vm_start);
+ } while (len < vma->vm_start);
-fail:
- /*
- * if hint left us with no space for the requested
- * mapping then try again:
- */
- if (first_time) {
- mm->free_area_cache = base;
- first_time = 0;
- goto try_again;
- }
/*
* A failed mmap() very likely causes application failure,
* so fall back to the bottom-up function here. This scenario
/*
* Restore the topdown base:
*/
- mm->free_area_cache = base;
+ mm->free_area_cache = mm->mmap_base;
return addr;
}
*/
if (area->vm_end > area->vm_mm->free_area_cache)
area->vm_mm->free_area_cache = area->vm_end;
+
+ /* dont allow allocations above current base */
+ if (area->vm_mm->free_area_cache > area->vm_mm->mmap_base)
+ area->vm_mm->free_area_cache = area->vm_mm->mmap_base;
}
unsigned long
* reserved hugepage range. For some archs like IA-64,
* there is a separate region for hugepages.
*/
- ret = is_hugepage_only_range(addr, len);
+ ret = is_hugepage_only_range(current->mm, addr, len);
}
if (ret)
return -EINVAL;
struct rlimit *rlim = current->signal->rlim;
/* address space limit tests */
- if (mm->total_vm + grow > rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT)
+ if (!may_expand_vm(mm, grow))
return -ENOMEM;
/* Stack limit test */
return -ENOMEM;
}
- if (!vx_vmpages_avail(vma->vm_mm, grow))
- return -ENOMEM;
-
/*
* Overcommit.. This must be the final test, as it will
* update security statistics.
if (vma->vm_flags & VM_LOCKED)
vx_vmlocked_add(mm, grow);
__vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
- acct_update_integrals();
- update_mem_hiwater();
return 0;
}
}
#endif
-/*
- * Try to free as many page directory entries as we can,
- * without having to work very hard at actually scanning
- * the page tables themselves.
- *
- * Right now we try to free page tables if we have a nice
- * PGDIR-aligned area that got free'd up. We could be more
- * granular if we want to, but this is fast and simple,
- * and covers the bad cases.
- *
- * "prev", if it exists, points to a vma before the one
- * we just free'd - but there's no telling how much before.
- */
-static void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev,
- unsigned long start, unsigned long end)
-{
- unsigned long first = start & PGDIR_MASK;
- unsigned long last = end + PGDIR_SIZE - 1;
- struct mm_struct *mm = tlb->mm;
-
- if (last > MM_VM_SIZE(mm) || last < end)
- last = MM_VM_SIZE(mm);
-
- if (!prev) {
- prev = mm->mmap;
- if (!prev)
- goto no_mmaps;
- if (prev->vm_end > start) {
- if (last > prev->vm_start)
- last = prev->vm_start;
- goto no_mmaps;
- }
- }
- for (;;) {
- struct vm_area_struct *next = prev->vm_next;
-
- if (next) {
- if (next->vm_start < start) {
- prev = next;
- continue;
- }
- if (last > next->vm_start)
- last = next->vm_start;
- }
- if (prev->vm_end > first)
- first = prev->vm_end;
- break;
- }
-no_mmaps:
- if (last < first) /* for arches with discontiguous pgd indices */
- return;
- if (first < FIRST_USER_PGD_NR * PGDIR_SIZE)
- first = FIRST_USER_PGD_NR * PGDIR_SIZE;
- /* No point trying to free anything if we're in the same pte page */
- if ((first & PMD_MASK) < (last & PMD_MASK)) {
- clear_page_range(tlb, first, last);
- flush_tlb_pgtables(mm, first, last);
- }
-}
-
/* Normal function to fix up a mapping
* This function is the default for when an area has no specific
* function. This may be used as part of a more specific routine.
* Ok - we have the memory areas we should free on the 'free' list,
* so release them, and do the vma updates.
*/
-static void unmap_vma_list(struct mm_struct *mm,
- struct vm_area_struct *mpnt)
+static void unmap_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
{
do {
- struct vm_area_struct *next = mpnt->vm_next;
- unmap_vma(mm, mpnt);
- mpnt = next;
- } while (mpnt != NULL);
+ struct vm_area_struct *next = vma->vm_next;
+ unmap_vma(mm, vma);
+ vma = next;
+ } while (vma);
validate_mm(mm);
}
* Called with the page table lock held.
*/
static void unmap_region(struct mm_struct *mm,
- struct vm_area_struct *vma,
- struct vm_area_struct *prev,
- unsigned long start,
- unsigned long end)
+ struct vm_area_struct *vma, struct vm_area_struct *prev,
+ unsigned long start, unsigned long end)
{
+ struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
struct mmu_gather *tlb;
unsigned long nr_accounted = 0;
lru_add_drain();
+ spin_lock(&mm->page_table_lock);
tlb = tlb_gather_mmu(mm, 0);
unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
-
- if (is_hugepage_only_range(start, end - start))
- hugetlb_free_pgtables(tlb, prev, start, end);
- else
- free_pgtables(tlb, prev, start, end);
+ free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
+ next? next->vm_start: 0);
tlb_finish_mmu(tlb, start, end);
+ spin_unlock(&mm->page_table_lock);
}
/*
int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
{
unsigned long end;
- struct vm_area_struct *mpnt, *prev, *last;
+ struct vm_area_struct *vma, *prev, *last;
if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
return -EINVAL;
return -EINVAL;
/* Find the first overlapping VMA */
- mpnt = find_vma_prev(mm, start, &prev);
- if (!mpnt)
+ vma = find_vma_prev(mm, start, &prev);
+ if (!vma)
return 0;
- /* we have start < mpnt->vm_end */
+ /* we have start < vma->vm_end */
/* if it doesn't overlap, we have nothing.. */
end = start + len;
- if (mpnt->vm_start >= end)
+ if (vma->vm_start >= end)
return 0;
/*
* unmapped vm_area_struct will remain in use: so lower split_vma
* places tmp vma above, and higher split_vma places tmp vma below.
*/
- if (start > mpnt->vm_start) {
- int error = split_vma(mm, mpnt, start, 0);
+ if (start > vma->vm_start) {
+ int error = split_vma(mm, vma, start, 0);
if (error)
return error;
- prev = mpnt;
+ prev = vma;
}
/* Does it split the last one? */
if (error)
return error;
}
- mpnt = prev? prev->vm_next: mm->mmap;
+ vma = prev? prev->vm_next: mm->mmap;
/*
* Remove the vma's, and unmap the actual pages
*/
- detach_vmas_to_be_unmapped(mm, mpnt, prev, end);
- spin_lock(&mm->page_table_lock);
- unmap_region(mm, mpnt, prev, start, end);
- spin_unlock(&mm->page_table_lock);
+ detach_vmas_to_be_unmapped(mm, vma, prev, end);
+ unmap_region(mm, vma, prev, start, end);
/* Fix up all other VM information */
- unmap_vma_list(mm, mpnt);
+ unmap_vma_list(mm, vma);
return 0;
}
*/
if (mm->def_flags & VM_LOCKED) {
unsigned long locked, lock_limit;
- locked = mm->locked_vm << PAGE_SHIFT;
+ locked = len >> PAGE_SHIFT;
+ locked += mm->locked_vm;
lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
- locked += len;
+ lock_limit >>= PAGE_SHIFT;
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
return -EAGAIN;
if (!vx_vmlocked_avail(mm, len >> PAGE_SHIFT))
}
/* Check against address space limits *after* clearing old maps... */
- if ((mm->total_vm << PAGE_SHIFT) + len
- > current->signal->rlim[RLIMIT_AS].rlim_cur)
+ if (!may_expand_vm(mm, len >> PAGE_SHIFT))
return -ENOMEM;
if (mm->map_count > sysctl_max_map_count)
return -ENOMEM;
- if (security_vm_enough_memory(len >> PAGE_SHIFT) ||
- !vx_vmpages_avail(mm, len >> PAGE_SHIFT))
+ if (security_vm_enough_memory(len >> PAGE_SHIFT))
return -ENOMEM;
flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
vx_vmlocked_add(mm, len >> PAGE_SHIFT);
make_pages_present(addr, addr + len);
}
- acct_update_integrals();
- update_mem_hiwater();
return addr;
}
void exit_mmap(struct mm_struct *mm)
{
struct mmu_gather *tlb;
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma = mm->mmap;
unsigned long nr_accounted = 0;
+ unsigned long end;
lru_add_drain();
spin_lock(&mm->page_table_lock);
- tlb = tlb_gather_mmu(mm, 1);
flush_cache_mm(mm);
- /* Use ~0UL here to ensure all VMAs in the mm are unmapped */
- mm->map_count -= unmap_vmas(&tlb, mm, mm->mmap, 0,
- ~0UL, &nr_accounted, NULL);
+ tlb = tlb_gather_mmu(mm, 1);
+ /* Use -1 here to ensure all VMAs in the mm are unmapped */
+ end = unmap_vmas(&tlb, mm, vma, 0, -1, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
- BUG_ON(mm->map_count); /* This is just debugging */
- clear_page_range(tlb, FIRST_USER_PGD_NR * PGDIR_SIZE, MM_VM_SIZE(mm));
-
- tlb_finish_mmu(tlb, 0, MM_VM_SIZE(mm));
+ free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
+ tlb_finish_mmu(tlb, 0, end);
- vma = mm->mmap;
mm->mmap = mm->mmap_cache = NULL;
mm->mm_rb = RB_ROOT;
- vx_rsspages_sub(mm, mm->rss);
+ set_mm_counter(mm, rss, 0);
vx_vmpages_sub(mm, mm->total_vm);
vx_vmlocked_sub(mm, mm->locked_vm);
remove_vm_struct(vma);
vma = next;
}
+
+ BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
}
/* Insert vm structure into process list sorted by address
}
return new_vma;
}
+
+/*
+ * Return true if the calling process may expand its vm space by the passed
+ * number of pages
+ */
+int may_expand_vm(struct mm_struct *mm, unsigned long npages)
+{
+ unsigned long cur = mm->total_vm; /* pages */
+ unsigned long lim;
+
+ lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+
+ if (cur + npages > lim)
+ return 0;
+ if (!vx_vmpages_avail(mm, npages))
+ return 0;
+ return 1;
+}