X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;ds=sidebyside;f=mm%2Fnommu.c;h=87e14d6eefa07a95eb9328fbc9d8129652caed41;hb=refs%2Fheads%2Fvserver;hp=e1b88b832bf188c197b62ae2c7ae50f6a88a5f9c;hpb=f7f1b0f1e2fbadeab12d24236000e778aa9b1ead;p=linux-2.6.git diff --git a/mm/nommu.c b/mm/nommu.c index e1b88b832..87e14d6ee 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -44,10 +44,6 @@ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; int heap_stack_gap = 0; EXPORT_SYMBOL(mem_map); -EXPORT_SYMBOL(sysctl_max_map_count); -EXPORT_SYMBOL(sysctl_overcommit_memory); -EXPORT_SYMBOL(sysctl_overcommit_ratio); -EXPORT_SYMBOL(vm_committed_space); EXPORT_SYMBOL(__vm_enough_memory); /* list of shareable VMAs */ @@ -57,6 +53,12 @@ DECLARE_RWSEM(nommu_vma_sem); struct vm_operations_struct generic_file_vm_ops = { }; +EXPORT_SYMBOL(vfree); +EXPORT_SYMBOL(vmalloc_to_page); +EXPORT_SYMBOL(vmalloc_32); +EXPORT_SYMBOL(vmap); +EXPORT_SYMBOL(vunmap); + /* * Handle all mappings that got truncated by a "truncate()" * system call. @@ -120,28 +122,54 @@ unsigned int kobjsize(const void *objp) } /* - * The nommu dodgy version :-) + * get a list of pages in an address range belonging to the specified process + * and indicate the VMA that covers each page + * - this is potentially dodgy as we may end incrementing the page count of a + * slab page or a secondary page from a compound page + * - don't permit access to VMAs that don't support it, such as I/O mappings */ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, int write, int force, struct page **pages, struct vm_area_struct **vmas) { + struct vm_area_struct *vma; + unsigned long vm_flags; int i; - static struct vm_area_struct dummy_vma; + + /* calculate required read or write permissions. + * - if 'force' is set, we only require the "MAY" flags. + */ + vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); + vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); for (i = 0; i < len; i++) { + vma = find_vma(mm, start); + if (!vma) + goto finish_or_fault; + + /* protect what we can, including chardevs */ + if (vma->vm_flags & (VM_IO | VM_PFNMAP) || + !(vm_flags & vma->vm_flags)) + goto finish_or_fault; + if (pages) { pages[i] = virt_to_page(start); if (pages[i]) page_cache_get(pages[i]); } if (vmas) - vmas[i] = &dummy_vma; + vmas[i] = vma; start += PAGE_SIZE; } - return(i); + + return i; + +finish_or_fault: + return i ? : -EFAULT; } +EXPORT_SYMBOL(get_user_pages); + DEFINE_RWLOCK(vmlist_lock); struct vm_struct *vmlist; @@ -150,13 +178,12 @@ void vfree(void *addr) kfree(addr); } -void *__vmalloc(unsigned long size, unsigned int __nocast gfp_mask, - pgprot_t prot) +void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) { /* * kmalloc doesn't like __GFP_HIGHMEM for some reason */ - return kmalloc(size, gfp_mask & ~__GFP_HIGHMEM); + return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); } struct page * vmalloc_to_page(void *addr) @@ -194,13 +221,20 @@ long vwrite(char *buf, char *addr, unsigned long count) * Allocate enough pages to cover @size from the page level * allocator and map them into continguos kernel virtual space. * - * For tight cotrol over page level allocator and protection flags + * For tight control over page level allocator and protection flags * use __vmalloc() instead. */ void *vmalloc(unsigned long size) { return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); } +EXPORT_SYMBOL(vmalloc); + +void *vmalloc_node(unsigned long size, int node) +{ + return vmalloc(size); +} +EXPORT_SYMBOL(vmalloc_node); /* * vmalloc_32 - allocate virtually continguos memory (32bit addressable) @@ -276,6 +310,77 @@ static void show_process_blocks(void) } #endif /* DEBUG */ +/* + * add a VMA into a process's mm_struct in the appropriate place in the list + * - should be called with mm->mmap_sem held writelocked + */ +static void add_vma_to_mm(struct mm_struct *mm, struct vm_list_struct *vml) +{ + struct vm_list_struct **ppv; + + for (ppv = ¤t->mm->context.vmlist; *ppv; ppv = &(*ppv)->next) + if ((*ppv)->vma->vm_start > vml->vma->vm_start) + break; + + vml->next = *ppv; + *ppv = vml; +} + +/* + * look up the first VMA in which addr resides, NULL if none + * - should be called with mm->mmap_sem at least held readlocked + */ +struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) +{ + struct vm_list_struct *loop, *vml; + + /* search the vm_start ordered list */ + vml = NULL; + for (loop = mm->context.vmlist; loop; loop = loop->next) { + if (loop->vma->vm_start > addr) + break; + vml = loop; + } + + if (vml && vml->vma->vm_end > addr) + return vml->vma; + + return NULL; +} +EXPORT_SYMBOL(find_vma); + +/* + * find a VMA + * - we don't extend stack VMAs under NOMMU conditions + */ +struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) +{ + return find_vma(mm, addr); +} + +/* + * look up the first VMA exactly that exactly matches addr + * - should be called with mm->mmap_sem at least held readlocked + */ +static inline struct vm_area_struct *find_vma_exact(struct mm_struct *mm, + unsigned long addr) +{ + struct vm_list_struct *vml; + + /* search the vm_start ordered list */ + for (vml = mm->context.vmlist; vml; vml = vml->next) { + if (vml->vma->vm_start == addr) + return vml->vma; + if (vml->vma->vm_start > addr) + break; + } + + return NULL; +} + +/* + * find a VMA in the global tree + */ static inline struct vm_area_struct *find_nommu_vma(unsigned long start) { struct vm_area_struct *vma; @@ -295,6 +400,9 @@ static inline struct vm_area_struct *find_nommu_vma(unsigned long start) return NULL; } +/* + * add a VMA in the global tree + */ static void add_nommu_vma(struct vm_area_struct *vma) { struct vm_area_struct *pvma; @@ -341,6 +449,9 @@ static void add_nommu_vma(struct vm_area_struct *vma) rb_insert_color(&vma->vm_rb, &nommu_vma_tree); } +/* + * delete a VMA from the global list + */ static void delete_nommu_vma(struct vm_area_struct *vma) { struct address_space *mapping; @@ -386,15 +497,17 @@ static int validate_mmap_request(struct file *file, (flags & MAP_TYPE) != MAP_SHARED) return -EINVAL; - if (PAGE_ALIGN(len) == 0) - return addr; - - if (len > TASK_SIZE) + if (!len) return -EINVAL; + /* Careful about overflows.. */ + len = PAGE_ALIGN(len); + if (!len || len > TASK_SIZE) + return -ENOMEM; + /* offset overflow? */ if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) - return -EINVAL; + return -EOVERFLOW; if (file) { /* validate file mapping requests */ @@ -410,7 +523,7 @@ static int validate_mmap_request(struct file *file, */ mapping = file->f_mapping; if (!mapping) - mapping = file->f_dentry->d_inode->i_mapping; + mapping = file->f_path.dentry->d_inode->i_mapping; capabilities = 0; if (mapping && mapping->backing_dev_info) @@ -419,7 +532,7 @@ static int validate_mmap_request(struct file *file, if (!capabilities) { /* no explicit capabilities set, so assume some * defaults */ - switch (file->f_dentry->d_inode->i_mode & S_IFMT) { + switch (file->f_path.dentry->d_inode->i_mode & S_IFMT) { case S_IFREG: case S_IFBLK: capabilities = BDI_CAP_MAP_COPY; @@ -450,11 +563,11 @@ static int validate_mmap_request(struct file *file, !(file->f_mode & FMODE_WRITE)) return -EACCES; - if (IS_APPEND(file->f_dentry->d_inode) && + if (IS_APPEND(file->f_path.dentry->d_inode) && (file->f_mode & FMODE_WRITE)) return -EACCES; - if (locks_verify_locked(file->f_dentry->d_inode)) + if (locks_verify_locked(file->f_path.dentry->d_inode)) return -EAGAIN; if (!(capabilities & BDI_CAP_MAP_DIRECT)) @@ -485,7 +598,7 @@ static int validate_mmap_request(struct file *file, /* handle executable mappings and implied executable * mappings */ - if (file->f_vfsmnt->mnt_flags & MNT_NOEXEC) { + if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) { if (prot & PROT_EXEC) return -EPERM; } @@ -613,7 +726,7 @@ static int do_mmap_private(struct vm_area_struct *vma, unsigned long len) * - note that this may not return a page-aligned address if the object * we're allocating is smaller than a page */ - base = kmalloc(len, GFP_KERNEL); + base = kmalloc(len, GFP_KERNEL|__GFP_COMP); if (!base) goto enomem; @@ -695,10 +808,9 @@ unsigned long do_mmap_pgoff(struct file *file, vm_flags = determine_vm_flags(file, prot, flags, capabilities); /* we're going to need to record the mapping if it works */ - vml = kmalloc(sizeof(struct vm_list_struct), GFP_KERNEL); + vml = kzalloc(sizeof(struct vm_list_struct), GFP_KERNEL); if (!vml) goto error_getting_vml; - memset(vml, 0, sizeof(*vml)); down_write(&nommu_vma_sem); @@ -721,7 +833,7 @@ unsigned long do_mmap_pgoff(struct file *file, continue; /* search for overlapping mappings on the same file */ - if (vma->vm_file->f_dentry->d_inode != file->f_dentry->d_inode) + if (vma->vm_file->f_path.dentry->d_inode != file->f_path.dentry->d_inode) continue; if (vma->vm_pgoff >= pgoff + pglen) @@ -774,11 +886,10 @@ unsigned long do_mmap_pgoff(struct file *file, } /* we're going to need a VMA struct as well */ - vma = kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL); + vma = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL); if (!vma) goto error_getting_vma; - memset(vma, 0, sizeof(*vma)); INIT_LIST_HEAD(&vma->anon_vma_node); atomic_set(&vma->vm_usage, 1); if (file) @@ -818,8 +929,7 @@ unsigned long do_mmap_pgoff(struct file *file, realalloc += kobjsize(vml); askedalloc += sizeof(*vml); - vml->next = current->mm->context.vmlist; - current->mm->context.vmlist = vml; + add_vma_to_mm(current->mm, vml); up_write(&nommu_vma_sem); @@ -838,7 +948,8 @@ unsigned long do_mmap_pgoff(struct file *file, up_write(&nommu_vma_sem); kfree(vml); if (vma) { - fput(vma->vm_file); + if (vma->vm_file) + fput(vma->vm_file); kfree(vma); } return ret; @@ -852,7 +963,7 @@ unsigned long do_mmap_pgoff(struct file *file, error_getting_vma: up_write(&nommu_vma_sem); kfree(vml); - printk("Allocation of vml for %lu byte allocation from process %d failed\n", + printk("Allocation of vma for %lu byte allocation from process %d failed\n", len, current->pid); show_free_areas(); return -ENOMEM; @@ -898,6 +1009,11 @@ static void put_vma(struct vm_area_struct *vma) } } +/* + * release a mapping + * - under NOMMU conditions the parameters must match exactly to the mapping to + * be removed + */ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) { struct vm_list_struct *vml, **parent; @@ -907,10 +1023,13 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) printk("do_munmap:\n"); #endif - for (parent = &mm->context.vmlist; *parent; parent = &(*parent)->next) + for (parent = &mm->context.vmlist; *parent; parent = &(*parent)->next) { + if ((*parent)->vma->vm_start > addr) + break; if ((*parent)->vma->vm_start == addr && - (*parent)->vma->vm_end == end) + ((len == 0) || ((*parent)->vma->vm_end == end))) goto found; + } printk("munmap of non-mmaped memory by process %d (%s): %p\n", current->pid, current->comm, (void *) addr); @@ -925,6 +1044,8 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) realalloc -= kobjsize(vml); askedalloc -= sizeof(*vml); kfree(vml); + + update_hiwater_vm(mm); vx_vmpages_sub(mm, len >> PAGE_SHIFT); #ifdef DEBUG @@ -934,7 +1055,20 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) return 0; } -/* Release all mmaps. */ +asmlinkage long sys_munmap(unsigned long addr, size_t len) +{ + int ret; + struct mm_struct *mm = current->mm; + + down_write(&mm->mmap_sem); + ret = do_munmap(mm, addr, len); + up_write(&mm->mmap_sem); + return ret; +} + +/* + * Release all mappings + */ void exit_mmap(struct mm_struct * mm) { struct vm_list_struct *tmp; @@ -961,37 +1095,26 @@ void exit_mmap(struct mm_struct * mm) } } -asmlinkage long sys_munmap(unsigned long addr, size_t len) -{ - int ret; - struct mm_struct *mm = current->mm; - - down_write(&mm->mmap_sem); - ret = do_munmap(mm, addr, len); - up_write(&mm->mmap_sem); - return ret; -} - unsigned long do_brk(unsigned long addr, unsigned long len) { return -ENOMEM; } /* - * Expand (or shrink) an existing mapping, potentially moving it at the - * same time (controlled by the MREMAP_MAYMOVE flag and available VM space) + * expand (or shrink) an existing mapping, potentially moving it at the same + * time (controlled by the MREMAP_MAYMOVE flag and available VM space) * - * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise - * This option implies MREMAP_MAYMOVE. + * under NOMMU conditions, we only permit changing a mapping's size, and only + * as long as it stays within the hole allocated by the kmalloc() call in + * do_mmap_pgoff() and the block is not shareable * - * on uClinux, we only permit changing a mapping's size, and only as long as it stays within the - * hole allocated by the kmalloc() call in do_mmap_pgoff() and the block is not shareable + * MREMAP_FIXED is not supported under NOMMU conditions */ unsigned long do_mremap(unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr) { - struct vm_list_struct *vml = NULL; + struct vm_area_struct *vma; /* insanity checks first */ if (new_len == 0) @@ -1000,53 +1123,42 @@ unsigned long do_mremap(unsigned long addr, if (flags & MREMAP_FIXED && new_addr != addr) return (unsigned long) -EINVAL; - for (vml = current->mm->context.vmlist; vml; vml = vml->next) - if (vml->vma->vm_start == addr) - goto found; - - return (unsigned long) -EINVAL; + vma = find_vma_exact(current->mm, addr); + if (!vma) + return (unsigned long) -EINVAL; - found: - if (vml->vma->vm_end != vml->vma->vm_start + old_len) + if (vma->vm_end != vma->vm_start + old_len) return (unsigned long) -EFAULT; - if (vml->vma->vm_flags & VM_MAYSHARE) + if (vma->vm_flags & VM_MAYSHARE) return (unsigned long) -EPERM; if (new_len > kobjsize((void *) addr)) return (unsigned long) -ENOMEM; /* all checks complete - do it */ - vml->vma->vm_end = vml->vma->vm_start + new_len; + vma->vm_end = vma->vm_start + new_len; askedalloc -= old_len; askedalloc += new_len; - return vml->vma->vm_start; + return vma->vm_start; } -/* - * Look up the first VMA which satisfies addr < vm_end, NULL if none - */ -struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) +asmlinkage unsigned long sys_mremap(unsigned long addr, + unsigned long old_len, unsigned long new_len, + unsigned long flags, unsigned long new_addr) { - struct vm_list_struct *vml; - - for (vml = mm->context.vmlist; vml; vml = vml->next) - if (addr >= vml->vma->vm_start && addr < vml->vma->vm_end) - return vml->vma; + unsigned long ret; - return NULL; -} - -EXPORT_SYMBOL(find_vma); - -struct page * follow_page(struct mm_struct *mm, unsigned long addr, int write) -{ - return NULL; + down_write(¤t->mm->mmap_sem); + ret = do_mremap(addr, old_len, new_len, flags, new_addr); + up_write(¤t->mm->mmap_sem); + return ret; } -struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) +struct page *follow_page(struct vm_area_struct *vma, unsigned long address, + unsigned int foll_flags) { return NULL; } @@ -1054,8 +1166,10 @@ struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) int remap_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long to, unsigned long size, pgprot_t prot) { - return -EPERM; + vma->vm_start = vma->vm_pgoff << PAGE_SHIFT; + return 0; } +EXPORT_SYMBOL(remap_pfn_range); void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) { @@ -1067,20 +1181,8 @@ unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, return -ENOMEM; } -void arch_unmap_area(struct vm_area_struct *area) -{ -} - -void update_mem_hiwater(struct task_struct *tsk) +void arch_unmap_area(struct mm_struct *mm, unsigned long addr) { - unsigned long rss = get_mm_counter(tsk->mm, rss); - - if (likely(tsk->mm)) { - if (tsk->mm->hiwater_rss < rss) - tsk->mm->hiwater_rss = rss; - if (tsk->mm->hiwater_vm < tsk->mm->total_vm) - tsk->mm->hiwater_vm = tsk->mm->total_vm; - } } void unmap_mapping_range(struct address_space *mapping, @@ -1088,6 +1190,7 @@ void unmap_mapping_range(struct address_space *mapping, int even_cows) { } +EXPORT_SYMBOL(unmap_mapping_range); /* * Check that a process has enough memory to allocate a new virtual @@ -1120,7 +1223,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin) if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { unsigned long n; - free = get_page_cache_size(); + free = global_page_state(NR_FILE_PAGES); free += nr_swap_pages; /* @@ -1129,7 +1232,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin) * which are reclaimable, under pressure. The dentry * cache and most inode caches should fall into this */ - free += atomic_read(&slab_reclaim_pages); + free += global_page_state(NR_SLAB_RECLAIMABLE); /* * Leave the last 3% for root @@ -1145,14 +1248,26 @@ int __vm_enough_memory(long pages, int cap_sys_admin) * only call if we're about to fail. */ n = nr_free_pages(); + + /* + * Leave reserved pages. The pages are not for anonymous pages. + */ + if (n <= totalreserve_pages) + goto error; + else + n -= totalreserve_pages; + + /* + * Leave the last 3% for root + */ if (!cap_sys_admin) n -= n / 32; free += n; if (free > pages) return 0; - vm_unacct_memory(pages); - return -ENOMEM; + + goto error; } allowed = totalram_pages * sysctl_overcommit_ratio / 100; @@ -1167,9 +1282,13 @@ int __vm_enough_memory(long pages, int cap_sys_admin) leave 3% of the size of this process for other processes */ allowed -= current->mm->total_vm / 32; - if (atomic_read(&vm_committed_space) < allowed) + /* + * cast `allowed' as a signed long because vm_committed_space + * sometimes has a negative value + */ + if (atomic_read(&vm_committed_space) < (long)allowed) return 0; - +error: vm_unacct_memory(pages); return -ENOMEM; @@ -1179,3 +1298,51 @@ int in_gate_area_no_task(unsigned long addr) { return 0; } + +struct page *filemap_nopage(struct vm_area_struct *area, + unsigned long address, int *type) +{ + BUG(); + return NULL; +} + +/* + * Access another process' address space. + * - source/target buffer must be kernel space + */ +int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) +{ + struct vm_area_struct *vma; + struct mm_struct *mm; + + if (addr + len < addr) + return 0; + + mm = get_task_mm(tsk); + if (!mm) + return 0; + + down_read(&mm->mmap_sem); + + /* the access must start within one of the target process's mappings */ + vma = find_vma(mm, addr); + if (vma) { + /* don't overrun this mapping */ + if (addr + len >= vma->vm_end) + len = vma->vm_end - addr; + + /* only read or write mappings where it is permitted */ + if (write && vma->vm_flags & VM_MAYWRITE) + len -= copy_to_user((void *) addr, buf, len); + else if (!write && vma->vm_flags & VM_MAYREAD) + len -= copy_from_user(buf, (void *) addr, len); + else + len = 0; + } else { + len = 0; + } + + up_read(&mm->mmap_sem); + mmput(mm); + return len; +}