X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=mm%2Fnommu.c;h=87e14d6eefa07a95eb9328fbc9d8129652caed41;hb=refs%2Fheads%2Fvserver;hp=ef55210e550888212c9fba9876942338a2490d14;hpb=6a77f38946aaee1cd85eeec6cf4229b204c15071;p=linux-2.6.git diff --git a/mm/nommu.c b/mm/nommu.c index ef55210e5..87e14d6ee 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -44,10 +44,6 @@ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; int heap_stack_gap = 0; EXPORT_SYMBOL(mem_map); -EXPORT_SYMBOL(sysctl_max_map_count); -EXPORT_SYMBOL(sysctl_overcommit_memory); -EXPORT_SYMBOL(sysctl_overcommit_ratio); -EXPORT_SYMBOL(vm_committed_space); EXPORT_SYMBOL(__vm_enough_memory); /* list of shareable VMAs */ @@ -57,6 +53,12 @@ DECLARE_RWSEM(nommu_vma_sem); struct vm_operations_struct generic_file_vm_ops = { }; +EXPORT_SYMBOL(vfree); +EXPORT_SYMBOL(vmalloc_to_page); +EXPORT_SYMBOL(vmalloc_32); +EXPORT_SYMBOL(vmap); +EXPORT_SYMBOL(vunmap); + /* * Handle all mappings that got truncated by a "truncate()" * system call. @@ -120,28 +122,54 @@ unsigned int kobjsize(const void *objp) } /* - * The nommu dodgy version :-) + * get a list of pages in an address range belonging to the specified process + * and indicate the VMA that covers each page + * - this is potentially dodgy as we may end incrementing the page count of a + * slab page or a secondary page from a compound page + * - don't permit access to VMAs that don't support it, such as I/O mappings */ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, int write, int force, struct page **pages, struct vm_area_struct **vmas) { + struct vm_area_struct *vma; + unsigned long vm_flags; int i; - static struct vm_area_struct dummy_vma; + + /* calculate required read or write permissions. + * - if 'force' is set, we only require the "MAY" flags. + */ + vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); + vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); for (i = 0; i < len; i++) { + vma = find_vma(mm, start); + if (!vma) + goto finish_or_fault; + + /* protect what we can, including chardevs */ + if (vma->vm_flags & (VM_IO | VM_PFNMAP) || + !(vm_flags & vma->vm_flags)) + goto finish_or_fault; + if (pages) { pages[i] = virt_to_page(start); if (pages[i]) page_cache_get(pages[i]); } if (vmas) - vmas[i] = &dummy_vma; + vmas[i] = vma; start += PAGE_SIZE; } - return(i); + + return i; + +finish_or_fault: + return i ? : -EFAULT; } +EXPORT_SYMBOL(get_user_pages); + DEFINE_RWLOCK(vmlist_lock); struct vm_struct *vmlist; @@ -150,12 +178,12 @@ void vfree(void *addr) kfree(addr); } -void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot) +void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) { /* * kmalloc doesn't like __GFP_HIGHMEM for some reason */ - return kmalloc(size, gfp_mask & ~__GFP_HIGHMEM); + return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM); } struct page * vmalloc_to_page(void *addr) @@ -193,13 +221,20 @@ long vwrite(char *buf, char *addr, unsigned long count) * Allocate enough pages to cover @size from the page level * allocator and map them into continguos kernel virtual space. * - * For tight cotrol over page level allocator and protection flags + * For tight control over page level allocator and protection flags * use __vmalloc() instead. */ void *vmalloc(unsigned long size) { return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); } +EXPORT_SYMBOL(vmalloc); + +void *vmalloc_node(unsigned long size, int node) +{ + return vmalloc(size); +} +EXPORT_SYMBOL(vmalloc_node); /* * vmalloc_32 - allocate virtually continguos memory (32bit addressable) @@ -256,29 +291,6 @@ asmlinkage unsigned long sys_brk(unsigned long brk) return mm->brk = brk; } -/* - * Combine the mmap "prot" and "flags" argument into one "vm_flags" used - * internally. Essentially, translate the "PROT_xxx" and "MAP_xxx" bits - * into "VM_xxx". - */ -static inline unsigned long calc_vm_flags(unsigned long prot, unsigned long flags) -{ -#define _trans(x,bit1,bit2) \ -((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0) - - unsigned long prot_bits, flag_bits; - prot_bits = - _trans(prot, PROT_READ, VM_READ) | - _trans(prot, PROT_WRITE, VM_WRITE) | - _trans(prot, PROT_EXEC, VM_EXEC); - flag_bits = - _trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN) | - _trans(flags, MAP_DENYWRITE, VM_DENYWRITE) | - _trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE); - return prot_bits | flag_bits; -#undef _trans -} - #ifdef DEBUG static void show_process_blocks(void) { @@ -298,6 +310,77 @@ static void show_process_blocks(void) } #endif /* DEBUG */ +/* + * add a VMA into a process's mm_struct in the appropriate place in the list + * - should be called with mm->mmap_sem held writelocked + */ +static void add_vma_to_mm(struct mm_struct *mm, struct vm_list_struct *vml) +{ + struct vm_list_struct **ppv; + + for (ppv = ¤t->mm->context.vmlist; *ppv; ppv = &(*ppv)->next) + if ((*ppv)->vma->vm_start > vml->vma->vm_start) + break; + + vml->next = *ppv; + *ppv = vml; +} + +/* + * look up the first VMA in which addr resides, NULL if none + * - should be called with mm->mmap_sem at least held readlocked + */ +struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) +{ + struct vm_list_struct *loop, *vml; + + /* search the vm_start ordered list */ + vml = NULL; + for (loop = mm->context.vmlist; loop; loop = loop->next) { + if (loop->vma->vm_start > addr) + break; + vml = loop; + } + + if (vml && vml->vma->vm_end > addr) + return vml->vma; + + return NULL; +} +EXPORT_SYMBOL(find_vma); + +/* + * find a VMA + * - we don't extend stack VMAs under NOMMU conditions + */ +struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) +{ + return find_vma(mm, addr); +} + +/* + * look up the first VMA exactly that exactly matches addr + * - should be called with mm->mmap_sem at least held readlocked + */ +static inline struct vm_area_struct *find_vma_exact(struct mm_struct *mm, + unsigned long addr) +{ + struct vm_list_struct *vml; + + /* search the vm_start ordered list */ + for (vml = mm->context.vmlist; vml; vml = vml->next) { + if (vml->vma->vm_start == addr) + return vml->vma; + if (vml->vma->vm_start > addr) + break; + } + + return NULL; +} + +/* + * find a VMA in the global tree + */ static inline struct vm_area_struct *find_nommu_vma(unsigned long start) { struct vm_area_struct *vma; @@ -317,6 +400,9 @@ static inline struct vm_area_struct *find_nommu_vma(unsigned long start) return NULL; } +/* + * add a VMA in the global tree + */ static void add_nommu_vma(struct vm_area_struct *vma) { struct vm_area_struct *pvma; @@ -363,6 +449,9 @@ static void add_nommu_vma(struct vm_area_struct *vma) rb_insert_color(&vma->vm_rb, &nommu_vma_tree); } +/* + * delete a VMA from the global list + */ static void delete_nommu_vma(struct vm_area_struct *vma) { struct address_space *mapping; @@ -381,145 +470,352 @@ static void delete_nommu_vma(struct vm_area_struct *vma) } /* - * handle mapping creation for uClinux + * determine whether a mapping should be permitted and, if so, what sort of + * mapping we're capable of supporting */ -unsigned long do_mmap_pgoff(struct file *file, - unsigned long addr, - unsigned long len, - unsigned long prot, - unsigned long flags, - unsigned long pgoff) +static int validate_mmap_request(struct file *file, + unsigned long addr, + unsigned long len, + unsigned long prot, + unsigned long flags, + unsigned long pgoff, + unsigned long *_capabilities) { - struct vm_list_struct *vml = NULL; - struct vm_area_struct *vma = NULL; - struct rb_node *rb; - unsigned int vm_flags; - void *result; - int ret, membacked; + unsigned long capabilities; + unsigned long reqprot = prot; + int ret; /* do the simple checks first */ if (flags & MAP_FIXED || addr) { - printk(KERN_DEBUG "%d: Can't do fixed-address/overlay mmap of RAM\n", + printk(KERN_DEBUG + "%d: Can't do fixed-address/overlay mmap of RAM\n", current->pid); return -EINVAL; } - if (PAGE_ALIGN(len) == 0) - return addr; + if ((flags & MAP_TYPE) != MAP_PRIVATE && + (flags & MAP_TYPE) != MAP_SHARED) + return -EINVAL; - if (len > TASK_SIZE) + if (!len) return -EINVAL; + /* Careful about overflows.. */ + len = PAGE_ALIGN(len); + if (!len || len > TASK_SIZE) + return -ENOMEM; + /* offset overflow? */ if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) - return -EINVAL; + return -EOVERFLOW; - /* validate file mapping requests */ - membacked = 0; if (file) { + /* validate file mapping requests */ + struct address_space *mapping; + /* files must support mmap */ if (!file->f_op || !file->f_op->mmap) return -ENODEV; - if ((prot & PROT_EXEC) && - (file->f_vfsmnt->mnt_flags & MNT_NOEXEC)) - return -EPERM; - /* work out if what we've got could possibly be shared * - we support chardevs that provide their own "memory" * - we support files/blockdevs that are memory backed */ - if (S_ISCHR(file->f_dentry->d_inode->i_mode)) { - membacked = 1; - } - else { - struct address_space *mapping = file->f_mapping; - if (!mapping) - mapping = file->f_dentry->d_inode->i_mapping; - if (mapping && mapping->backing_dev_info) - membacked = mapping->backing_dev_info->memory_backed; + mapping = file->f_mapping; + if (!mapping) + mapping = file->f_path.dentry->d_inode->i_mapping; + + capabilities = 0; + if (mapping && mapping->backing_dev_info) + capabilities = mapping->backing_dev_info->capabilities; + + if (!capabilities) { + /* no explicit capabilities set, so assume some + * defaults */ + switch (file->f_path.dentry->d_inode->i_mode & S_IFMT) { + case S_IFREG: + case S_IFBLK: + capabilities = BDI_CAP_MAP_COPY; + break; + + case S_IFCHR: + capabilities = + BDI_CAP_MAP_DIRECT | + BDI_CAP_READ_MAP | + BDI_CAP_WRITE_MAP; + break; + + default: + return -EINVAL; + } } + /* eliminate any capabilities that we can't support on this + * device */ + if (!file->f_op->get_unmapped_area) + capabilities &= ~BDI_CAP_MAP_DIRECT; + if (!file->f_op->read) + capabilities &= ~BDI_CAP_MAP_COPY; + if (flags & MAP_SHARED) { /* do checks for writing, appending and locking */ - if ((prot & PROT_WRITE) && !(file->f_mode & FMODE_WRITE)) + if ((prot & PROT_WRITE) && + !(file->f_mode & FMODE_WRITE)) return -EACCES; - if (IS_APPEND(file->f_dentry->d_inode) && + if (IS_APPEND(file->f_path.dentry->d_inode) && (file->f_mode & FMODE_WRITE)) return -EACCES; - if (locks_verify_locked(file->f_dentry->d_inode)) + if (locks_verify_locked(file->f_path.dentry->d_inode)) return -EAGAIN; - if (!membacked) { + if (!(capabilities & BDI_CAP_MAP_DIRECT)) + return -ENODEV; + + if (((prot & PROT_READ) && !(capabilities & BDI_CAP_READ_MAP)) || + ((prot & PROT_WRITE) && !(capabilities & BDI_CAP_WRITE_MAP)) || + ((prot & PROT_EXEC) && !(capabilities & BDI_CAP_EXEC_MAP)) + ) { printk("MAP_SHARED not completely supported on !MMU\n"); return -EINVAL; } - /* we require greater support from the driver or - * filesystem - we ask it to tell us what memory to - * use */ - if (!file->f_op->get_unmapped_area) - return -ENODEV; + /* we mustn't privatise shared mappings */ + capabilities &= ~BDI_CAP_MAP_COPY; } else { - /* we read private files into memory we allocate */ - if (!file->f_op->read) + /* we're going to read the file into private memory we + * allocate */ + if (!(capabilities & BDI_CAP_MAP_COPY)) return -ENODEV; + + /* we don't permit a private writable mapping to be + * shared with the backing device */ + if (prot & PROT_WRITE) + capabilities &= ~BDI_CAP_MAP_DIRECT; + } + + /* handle executable mappings and implied executable + * mappings */ + if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) { + if (prot & PROT_EXEC) + return -EPERM; + } + else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) { + /* handle implication of PROT_EXEC by PROT_READ */ + if (current->personality & READ_IMPLIES_EXEC) { + if (capabilities & BDI_CAP_EXEC_MAP) + prot |= PROT_EXEC; + } + } + else if ((prot & PROT_READ) && + (prot & PROT_EXEC) && + !(capabilities & BDI_CAP_EXEC_MAP) + ) { + /* backing file is not executable, try to copy */ + capabilities &= ~BDI_CAP_MAP_DIRECT; } } + else { + /* anonymous mappings are always memory backed and can be + * privately mapped + */ + capabilities = BDI_CAP_MAP_COPY; - /* handle PROT_EXEC implication by PROT_READ */ - if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC)) - if (!(file && (file->f_vfsmnt->mnt_flags & MNT_NOEXEC))) + /* handle PROT_EXEC implication by PROT_READ */ + if ((prot & PROT_READ) && + (current->personality & READ_IMPLIES_EXEC)) prot |= PROT_EXEC; + } - /* do simple checking here so the lower-level routines won't have - * to. we assume access permissions have been handled by the open - * of the memory object, so we don't do any here. - */ - vm_flags = calc_vm_flags(prot,flags) /* | mm->def_flags */ - | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; + /* allow the security API to have its say */ + ret = security_file_mmap(file, reqprot, prot, flags); + if (ret < 0) + return ret; - if (!membacked) { - /* share any file segment that's mapped read-only */ - if (((flags & MAP_PRIVATE) && !(prot & PROT_WRITE) && file) || - ((flags & MAP_SHARED) && !(prot & PROT_WRITE) && file)) - vm_flags |= VM_MAYSHARE; + /* looks okay */ + *_capabilities = capabilities; + return 0; +} - /* refuse to let anyone share files with this process if it's being traced - - * otherwise breakpoints set in it may interfere with another untraced process - */ - if (current->ptrace & PT_PTRACED) - vm_flags &= ~(VM_SHARED | VM_MAYSHARE); +/* + * we've determined that we can make the mapping, now translate what we + * now know into VMA flags + */ +static unsigned long determine_vm_flags(struct file *file, + unsigned long prot, + unsigned long flags, + unsigned long capabilities) +{ + unsigned long vm_flags; + + vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags); + vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; + /* vm_flags |= mm->def_flags; */ + + if (!(capabilities & BDI_CAP_MAP_DIRECT)) { + /* attempt to share read-only copies of mapped file chunks */ + if (file && !(prot & PROT_WRITE)) + vm_flags |= VM_MAYSHARE; } else { - /* permit sharing of character devices and ramfs files at any time for - * anything other than a privately writable mapping - */ - if (!(flags & MAP_PRIVATE) || !(prot & PROT_WRITE)) { + /* overlay a shareable mapping on the backing device or inode + * if possible - used for chardevs, ramfs/tmpfs/shmfs and + * romfs/cramfs */ + if (flags & MAP_SHARED) + vm_flags |= VM_MAYSHARE | VM_SHARED; + else if ((((vm_flags & capabilities) ^ vm_flags) & BDI_CAP_VMFLAGS) == 0) vm_flags |= VM_MAYSHARE; - if (flags & MAP_SHARED) - vm_flags |= VM_SHARED; + } + + /* refuse to let anyone share private mappings with this process if + * it's being traced - otherwise breakpoints set in it may interfere + * with another untraced process + */ + if ((flags & MAP_PRIVATE) && (current->ptrace & PT_PTRACED)) + vm_flags &= ~VM_MAYSHARE; + + return vm_flags; +} + +/* + * set up a shared mapping on a file + */ +static int do_mmap_shared_file(struct vm_area_struct *vma, unsigned long len) +{ + int ret; + + ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); + if (ret != -ENOSYS) + return ret; + + /* getting an ENOSYS error indicates that direct mmap isn't + * possible (as opposed to tried but failed) so we'll fall + * through to making a private copy of the data and mapping + * that if we can */ + return -ENODEV; +} + +/* + * set up a private mapping or an anonymous shared mapping + */ +static int do_mmap_private(struct vm_area_struct *vma, unsigned long len) +{ + void *base; + int ret; + + /* invoke the file's mapping function so that it can keep track of + * shared mappings on devices or memory + * - VM_MAYSHARE will be set if it may attempt to share + */ + if (vma->vm_file) { + ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); + if (ret != -ENOSYS) { + /* shouldn't return success if we're not sharing */ + BUG_ON(ret == 0 && !(vma->vm_flags & VM_MAYSHARE)); + return ret; /* success or a real error */ } + + /* getting an ENOSYS error indicates that direct mmap isn't + * possible (as opposed to tried but failed) so we'll try to + * make a private copy of the data and map that instead */ } - /* allow the security API to have its say */ - ret = security_file_mmap(file, prot, flags); - if (ret) + /* allocate some memory to hold the mapping + * - note that this may not return a page-aligned address if the object + * we're allocating is smaller than a page + */ + base = kmalloc(len, GFP_KERNEL|__GFP_COMP); + if (!base) + goto enomem; + + vma->vm_start = (unsigned long) base; + vma->vm_end = vma->vm_start + len; + vma->vm_flags |= VM_MAPPED_COPY; + +#ifdef WARN_ON_SLACK + if (len + WARN_ON_SLACK <= kobjsize(result)) + printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n", + len, current->pid, kobjsize(result) - len); +#endif + + if (vma->vm_file) { + /* read the contents of a file into the copy */ + mm_segment_t old_fs; + loff_t fpos; + + fpos = vma->vm_pgoff; + fpos <<= PAGE_SHIFT; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + ret = vma->vm_file->f_op->read(vma->vm_file, base, len, &fpos); + set_fs(old_fs); + + if (ret < 0) + goto error_free; + + /* clear the last little bit */ + if (ret < len) + memset(base + ret, 0, len - ret); + + } else { + /* if it's an anonymous mapping, then just clear it */ + memset(base, 0, len); + } + + return 0; + +error_free: + kfree(base); + vma->vm_start = 0; + return ret; + +enomem: + printk("Allocation of length %lu from process %d failed\n", + len, current->pid); + show_free_areas(); + return -ENOMEM; +} + +/* + * handle mapping creation for uClinux + */ +unsigned long do_mmap_pgoff(struct file *file, + unsigned long addr, + unsigned long len, + unsigned long prot, + unsigned long flags, + unsigned long pgoff) +{ + struct vm_list_struct *vml = NULL; + struct vm_area_struct *vma = NULL; + struct rb_node *rb; + unsigned long capabilities, vm_flags; + void *result; + int ret; + + /* decide whether we should attempt the mapping, and if so what sort of + * mapping */ + ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, + &capabilities); + if (ret < 0) return ret; + /* we've determined that we can make the mapping, now translate what we + * now know into VMA flags */ + vm_flags = determine_vm_flags(file, prot, flags, capabilities); + /* we're going to need to record the mapping if it works */ - vml = kmalloc(sizeof(struct vm_list_struct), GFP_KERNEL); + vml = kzalloc(sizeof(struct vm_list_struct), GFP_KERNEL); if (!vml) goto error_getting_vml; - memset(vml, 0, sizeof(*vml)); down_write(&nommu_vma_sem); - /* if we want to share, we need to search for VMAs created by another - * mmap() call that overlap with our proposed mapping + /* if we want to share, we need to check for VMAs created by other + * mmap() calls that overlap with our proposed mapping * - we can only share with an exact match on most regular files * - shared mappings on character devices and memory backed files are * permitted to overlap inexactly as far as we are concerned for in @@ -537,19 +833,20 @@ unsigned long do_mmap_pgoff(struct file *file, continue; /* search for overlapping mappings on the same file */ - if (vma->vm_file->f_dentry->d_inode != file->f_dentry->d_inode) + if (vma->vm_file->f_path.dentry->d_inode != file->f_path.dentry->d_inode) continue; if (vma->vm_pgoff >= pgoff + pglen) continue; - vmpglen = (vma->vm_end - vma->vm_start + PAGE_SIZE - 1) >> PAGE_SHIFT; + vmpglen = vma->vm_end - vma->vm_start + PAGE_SIZE - 1; + vmpglen >>= PAGE_SHIFT; if (pgoff >= vma->vm_pgoff + vmpglen) continue; - /* handle inexact matches between mappings */ - if (vmpglen != pglen || vma->vm_pgoff != pgoff) { - if (!membacked) + /* handle inexactly overlapping matches between mappings */ + if (vma->vm_pgoff != pgoff || vmpglen != pglen) { + if (!(capabilities & BDI_CAP_MAP_DIRECT)) goto sharing_violation; continue; } @@ -561,30 +858,38 @@ unsigned long do_mmap_pgoff(struct file *file, result = (void *) vma->vm_start; goto shared; } - } - vma = NULL; + vma = NULL; - /* obtain the address to map to. we verify (or select) it and ensure - * that it represents a valid section of the address space - * - this is the hook for quasi-memory character devices - */ - if (file && file->f_op->get_unmapped_area) { - addr = file->f_op->get_unmapped_area(file, addr, len, pgoff, flags); - if (IS_ERR((void *) addr)) { - ret = addr; - if (ret == (unsigned long) -ENOSYS) + /* obtain the address at which to make a shared mapping + * - this is the hook for quasi-memory character devices to + * tell us the location of a shared mapping + */ + if (file && file->f_op->get_unmapped_area) { + addr = file->f_op->get_unmapped_area(file, addr, len, + pgoff, flags); + if (IS_ERR((void *) addr)) { + ret = addr; + if (ret != (unsigned long) -ENOSYS) + goto error; + + /* the driver refused to tell us where to site + * the mapping so we'll have to attempt to copy + * it */ ret = (unsigned long) -ENODEV; - goto error; + if (!(capabilities & BDI_CAP_MAP_COPY)) + goto error; + + capabilities &= ~BDI_CAP_MAP_DIRECT; + } } } /* we're going to need a VMA struct as well */ - vma = kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL); + vma = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL); if (!vma) goto error_getting_vma; - memset(vma, 0, sizeof(*vma)); INIT_LIST_HEAD(&vma->anon_vma_node); atomic_set(&vma->vm_usage, 1); if (file) @@ -597,96 +902,18 @@ unsigned long do_mmap_pgoff(struct file *file, vml->vma = vma; - /* determine the object being mapped and call the appropriate specific - * mapper. - */ - if (file) { -#ifdef MAGIC_ROM_PTR - /* First, try simpler routine designed to give us a ROM pointer. */ - if (file->f_op->romptr && !(prot & PROT_WRITE)) { - ret = file->f_op->romptr(file, vma); -#ifdef DEBUG - printk("romptr mmap returned %d (st=%lx)\n", - ret, vma->vm_start); -#endif - result = (void *) vma->vm_start; - if (!ret) - goto done; - else if (ret != -ENOSYS) - goto error; - } else -#endif /* MAGIC_ROM_PTR */ - /* Then try full mmap routine, which might return a RAM - * pointer, or do something truly complicated - */ - if (file->f_op->mmap) { - ret = file->f_op->mmap(file, vma); - -#ifdef DEBUG - printk("f_op->mmap() returned %d (st=%lx)\n", - ret, vma->vm_start); -#endif - result = (void *) vma->vm_start; - if (!ret) - goto done; - else if (ret != -ENOSYS) - goto error; - } else { - ret = -ENODEV; /* No mapping operations defined */ - goto error; - } - - /* An ENOSYS error indicates that mmap isn't possible (as - * opposed to tried but failed) so we'll fall through to the - * copy. */ - } - - /* allocate some memory to hold the mapping - * - note that this may not return a page-aligned address if the object - * we're allocating is smaller than a page - */ - ret = -ENOMEM; - result = kmalloc(len, GFP_KERNEL); - if (!result) { - printk("Allocation of length %lu from process %d failed\n", - len, current->pid); - show_free_areas(); + /* set up the mapping */ + if (file && vma->vm_flags & VM_SHARED) + ret = do_mmap_shared_file(vma, len); + else + ret = do_mmap_private(vma, len); + if (ret < 0) goto error; - } - vma->vm_start = (unsigned long) result; - vma->vm_end = vma->vm_start + len; - -#ifdef WARN_ON_SLACK - if (len + WARN_ON_SLACK <= kobjsize(result)) - printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n", - len, current->pid, kobjsize(result) - len); -#endif - - if (file) { - mm_segment_t old_fs = get_fs(); - loff_t fpos; - - fpos = pgoff; - fpos <<= PAGE_SHIFT; - - set_fs(KERNEL_DS); - ret = file->f_op->read(file, (char *) result, len, &fpos); - set_fs(old_fs); - - if (ret < 0) - goto error2; - if (ret < len) - memset(result + ret, 0, len - ret); - } else { - memset(result, 0, len); - } - - if (prot & PROT_EXEC) - flush_icache_range((unsigned long) result, (unsigned long) result + len); + /* okay... we have a mapping; now we have to register it */ + result = (void *) vma->vm_start; - done: - if (!(vma->vm_flags & VM_SHARED)) { + if (vma->vm_flags & VM_MAPPED_COPY) { realalloc += kobjsize(result); askedalloc += len; } @@ -694,19 +921,22 @@ unsigned long do_mmap_pgoff(struct file *file, realalloc += kobjsize(vma); askedalloc += sizeof(*vma); - // current->mm->total_vm += len >> PAGE_SHIFT; vx_vmpages_add(current->mm, len >> PAGE_SHIFT); add_nommu_vma(vma); + shared: realalloc += kobjsize(vml); askedalloc += sizeof(*vml); - vml->next = current->mm->context.vmlist; - current->mm->context.vmlist = vml; + add_vma_to_mm(current->mm, vml); up_write(&nommu_vma_sem); + if (prot & PROT_EXEC) + flush_icache_range((unsigned long) result, + (unsigned long) result + len); + #ifdef DEBUG printk("do_mmap:\n"); show_process_blocks(); @@ -714,13 +944,12 @@ unsigned long do_mmap_pgoff(struct file *file, return (unsigned long) result; - error2: - kfree(result); error: up_write(&nommu_vma_sem); kfree(vml); if (vma) { - fput(vma->vm_file); + if (vma->vm_file) + fput(vma->vm_file); kfree(vma); } return ret; @@ -734,7 +963,7 @@ unsigned long do_mmap_pgoff(struct file *file, error_getting_vma: up_write(&nommu_vma_sem); kfree(vml); - printk("Allocation of vml for %lu byte allocation from process %d failed\n", + printk("Allocation of vma for %lu byte allocation from process %d failed\n", len, current->pid); show_free_areas(); return -ENOMEM; @@ -762,7 +991,7 @@ static void put_vma(struct vm_area_struct *vma) /* IO memory and memory shared directly out of the pagecache from * ramfs/tmpfs mustn't be released here */ - if (!(vma->vm_flags & (VM_IO | VM_SHARED)) && vma->vm_start) { + if (vma->vm_flags & VM_MAPPED_COPY) { realalloc -= kobjsize((void *) vma->vm_start); askedalloc -= vma->vm_end - vma->vm_start; kfree((void *) vma->vm_start); @@ -780,26 +1009,27 @@ static void put_vma(struct vm_area_struct *vma) } } +/* + * release a mapping + * - under NOMMU conditions the parameters must match exactly to the mapping to + * be removed + */ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) { struct vm_list_struct *vml, **parent; unsigned long end = addr + len; -#ifdef MAGIC_ROM_PTR - /* For efficiency's sake, if the pointer is obviously in ROM, - don't bother walking the lists to free it */ - if (is_in_rom(addr)) - return 0; -#endif - #ifdef DEBUG printk("do_munmap:\n"); #endif - for (parent = &mm->context.vmlist; *parent; parent = &(*parent)->next) + for (parent = &mm->context.vmlist; *parent; parent = &(*parent)->next) { + if ((*parent)->vma->vm_start > addr) + break; if ((*parent)->vma->vm_start == addr && - (*parent)->vma->vm_end == end) + ((len == 0) || ((*parent)->vma->vm_end == end))) goto found; + } printk("munmap of non-mmaped memory by process %d (%s): %p\n", current->pid, current->comm, (void *) addr); @@ -814,7 +1044,8 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) realalloc -= kobjsize(vml); askedalloc -= sizeof(*vml); kfree(vml); - // mm->total_vm -= len >> PAGE_SHIFT; + + update_hiwater_vm(mm); vx_vmpages_sub(mm, len >> PAGE_SHIFT); #ifdef DEBUG @@ -824,7 +1055,20 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) return 0; } -/* Release all mmaps. */ +asmlinkage long sys_munmap(unsigned long addr, size_t len) +{ + int ret; + struct mm_struct *mm = current->mm; + + down_write(&mm->mmap_sem); + ret = do_munmap(mm, addr, len); + up_write(&mm->mmap_sem); + return ret; +} + +/* + * Release all mappings + */ void exit_mmap(struct mm_struct * mm) { struct vm_list_struct *tmp; @@ -834,7 +1078,6 @@ void exit_mmap(struct mm_struct * mm) printk("Exit_mmap:\n"); #endif - // mm->total_vm = 0; vx_vmpages_sub(mm, mm->total_vm); while ((tmp = mm->context.vmlist)) { @@ -852,37 +1095,26 @@ void exit_mmap(struct mm_struct * mm) } } -asmlinkage long sys_munmap(unsigned long addr, size_t len) -{ - int ret; - struct mm_struct *mm = current->mm; - - down_write(&mm->mmap_sem); - ret = do_munmap(mm, addr, len); - up_write(&mm->mmap_sem); - return ret; -} - unsigned long do_brk(unsigned long addr, unsigned long len) { return -ENOMEM; } /* - * Expand (or shrink) an existing mapping, potentially moving it at the - * same time (controlled by the MREMAP_MAYMOVE flag and available VM space) + * expand (or shrink) an existing mapping, potentially moving it at the same + * time (controlled by the MREMAP_MAYMOVE flag and available VM space) * - * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise - * This option implies MREMAP_MAYMOVE. + * under NOMMU conditions, we only permit changing a mapping's size, and only + * as long as it stays within the hole allocated by the kmalloc() call in + * do_mmap_pgoff() and the block is not shareable * - * on uClinux, we only permit changing a mapping's size, and only as long as it stays within the - * hole allocated by the kmalloc() call in do_mmap_pgoff() and the block is not shareable + * MREMAP_FIXED is not supported under NOMMU conditions */ unsigned long do_mremap(unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr) { - struct vm_list_struct *vml = NULL; + struct vm_area_struct *vma; /* insanity checks first */ if (new_len == 0) @@ -891,53 +1123,42 @@ unsigned long do_mremap(unsigned long addr, if (flags & MREMAP_FIXED && new_addr != addr) return (unsigned long) -EINVAL; - for (vml = current->mm->context.vmlist; vml; vml = vml->next) - if (vml->vma->vm_start == addr) - goto found; - - return (unsigned long) -EINVAL; + vma = find_vma_exact(current->mm, addr); + if (!vma) + return (unsigned long) -EINVAL; - found: - if (vml->vma->vm_end != vml->vma->vm_start + old_len) + if (vma->vm_end != vma->vm_start + old_len) return (unsigned long) -EFAULT; - if (vml->vma->vm_flags & VM_MAYSHARE) + if (vma->vm_flags & VM_MAYSHARE) return (unsigned long) -EPERM; if (new_len > kobjsize((void *) addr)) return (unsigned long) -ENOMEM; /* all checks complete - do it */ - vml->vma->vm_end = vml->vma->vm_start + new_len; + vma->vm_end = vma->vm_start + new_len; askedalloc -= old_len; askedalloc += new_len; - return vml->vma->vm_start; + return vma->vm_start; } -/* - * Look up the first VMA which satisfies addr < vm_end, NULL if none - */ -struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) +asmlinkage unsigned long sys_mremap(unsigned long addr, + unsigned long old_len, unsigned long new_len, + unsigned long flags, unsigned long new_addr) { - struct vm_list_struct *vml; - - for (vml = mm->context.vmlist; vml; vml = vml->next) - if (addr >= vml->vma->vm_start && addr < vml->vma->vm_end) - return vml->vma; - - return NULL; -} + unsigned long ret; -EXPORT_SYMBOL(find_vma); - -struct page * follow_page(struct mm_struct *mm, unsigned long addr, int write) -{ - return NULL; + down_write(¤t->mm->mmap_sem); + ret = do_mremap(addr, old_len, new_len, flags, new_addr); + up_write(¤t->mm->mmap_sem); + return ret; } -struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) +struct page *follow_page(struct vm_area_struct *vma, unsigned long address, + unsigned int foll_flags) { return NULL; } @@ -945,8 +1166,10 @@ struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) int remap_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long to, unsigned long size, pgprot_t prot) { - return -EPERM; + vma->vm_start = vma->vm_pgoff << PAGE_SHIFT; + return 0; } +EXPORT_SYMBOL(remap_pfn_range); void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) { @@ -958,20 +1181,8 @@ unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, return -ENOMEM; } -void arch_unmap_area(struct vm_area_struct *area) -{ -} - -void update_mem_hiwater(void) +void arch_unmap_area(struct mm_struct *mm, unsigned long addr) { - struct task_struct *tsk = current; - - if (likely(tsk->mm)) { - if (tsk->mm->hiwater_rss < tsk->mm->rss) - tsk->mm->hiwater_rss = tsk->mm->rss; - if (tsk->mm->hiwater_vm < tsk->mm->total_vm) - tsk->mm->hiwater_vm = tsk->mm->total_vm; - } } void unmap_mapping_range(struct address_space *mapping, @@ -979,6 +1190,7 @@ void unmap_mapping_range(struct address_space *mapping, int even_cows) { } +EXPORT_SYMBOL(unmap_mapping_range); /* * Check that a process has enough memory to allocate a new virtual @@ -1011,7 +1223,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin) if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { unsigned long n; - free = get_page_cache_size(); + free = global_page_state(NR_FILE_PAGES); free += nr_swap_pages; /* @@ -1020,7 +1232,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin) * which are reclaimable, under pressure. The dentry * cache and most inode caches should fall into this */ - free += atomic_read(&slab_reclaim_pages); + free += global_page_state(NR_SLAB_RECLAIMABLE); /* * Leave the last 3% for root @@ -1036,14 +1248,26 @@ int __vm_enough_memory(long pages, int cap_sys_admin) * only call if we're about to fail. */ n = nr_free_pages(); + + /* + * Leave reserved pages. The pages are not for anonymous pages. + */ + if (n <= totalreserve_pages) + goto error; + else + n -= totalreserve_pages; + + /* + * Leave the last 3% for root + */ if (!cap_sys_admin) n -= n / 32; free += n; if (free > pages) return 0; - vm_unacct_memory(pages); - return -ENOMEM; + + goto error; } allowed = totalram_pages * sysctl_overcommit_ratio / 100; @@ -1058,11 +1282,67 @@ int __vm_enough_memory(long pages, int cap_sys_admin) leave 3% of the size of this process for other processes */ allowed -= current->mm->total_vm / 32; - if (atomic_read(&vm_committed_space) < allowed) + /* + * cast `allowed' as a signed long because vm_committed_space + * sometimes has a negative value + */ + if (atomic_read(&vm_committed_space) < (long)allowed) return 0; - +error: vm_unacct_memory(pages); return -ENOMEM; } +int in_gate_area_no_task(unsigned long addr) +{ + return 0; +} + +struct page *filemap_nopage(struct vm_area_struct *area, + unsigned long address, int *type) +{ + BUG(); + return NULL; +} + +/* + * Access another process' address space. + * - source/target buffer must be kernel space + */ +int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) +{ + struct vm_area_struct *vma; + struct mm_struct *mm; + + if (addr + len < addr) + return 0; + + mm = get_task_mm(tsk); + if (!mm) + return 0; + + down_read(&mm->mmap_sem); + + /* the access must start within one of the target process's mappings */ + vma = find_vma(mm, addr); + if (vma) { + /* don't overrun this mapping */ + if (addr + len >= vma->vm_end) + len = vma->vm_end - addr; + + /* only read or write mappings where it is permitted */ + if (write && vma->vm_flags & VM_MAYWRITE) + len -= copy_to_user((void *) addr, buf, len); + else if (!write && vma->vm_flags & VM_MAYREAD) + len -= copy_from_user(buf, (void *) addr, len); + else + len = 0; + } else { + len = 0; + } + + up_read(&mm->mmap_sem); + mmput(mm); + return len; +}