mm/nommu.c

   1 /*
   2  *  linux/mm/nommu.c
   3  *
   4  *  Replacement code for mm functions to support CPU's that don't
   5  *  have any form of memory management unit (thus no virtual memory).
   6  *
   7  *  See Documentation/nommu-mmap.txt
   8  *
   9  *  Copyright (c) 2004-2005 David Howells <dhowells@redhat.com>
  10  *  Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
  11  *  Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
  12  *  Copyright (c) 2002      Greg Ungerer <gerg@snapgear.com>
  13  */
  14
  15 #include <linux/mm.h>
  16 #include <linux/mman.h>
  17 #include <linux/swap.h>
  18 #include <linux/file.h>
  19 #include <linux/highmem.h>
  20 #include <linux/pagemap.h>
  21 #include <linux/slab.h>
  22 #include <linux/vmalloc.h>
  23 #include <linux/ptrace.h>
  24 #include <linux/blkdev.h>
  25 #include <linux/backing-dev.h>
  26 #include <linux/mount.h>
  27 #include <linux/personality.h>
  28 #include <linux/security.h>
  29 #include <linux/syscalls.h>
  30
  31 #include <asm/uaccess.h>
  32 #include <asm/tlb.h>
  33 #include <asm/tlbflush.h>
  34
  35 void *high_memory;
  36 struct page *mem_map;
  37 unsigned long max_mapnr;
  38 unsigned long num_physpages;
  39 unsigned long askedalloc, realalloc;
  40 atomic_t vm_committed_space = ATOMIC_INIT(0);
  41 int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
  42 int sysctl_overcommit_ratio = 50; /* default is 50% */
  43 int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
  44 int heap_stack_gap = 0;
  45
  46 EXPORT_SYMBOL(mem_map);
  47 EXPORT_SYMBOL(sysctl_max_map_count);
  48 EXPORT_SYMBOL(sysctl_overcommit_memory);
  49 EXPORT_SYMBOL(sysctl_overcommit_ratio);
  50 EXPORT_SYMBOL(vm_committed_space);
  51 EXPORT_SYMBOL(__vm_enough_memory);
  52
  53 /* list of shareable VMAs */
  54 struct rb_root nommu_vma_tree = RB_ROOT;
  55 DECLARE_RWSEM(nommu_vma_sem);
  56
  57 struct vm_operations_struct generic_file_vm_ops = {
  58 };
  59
  60 /*
  61  * Handle all mappings that got truncated by a "truncate()"
  62  * system call.
  63  *
  64  * NOTE! We have to be ready to update the memory sharing
  65  * between the file and the memory map for a potential last
  66  * incomplete page.  Ugly, but necessary.
  67  */
  68 int vmtruncate(struct inode *inode, loff_t offset)
  69 {
  70         struct address_space *mapping = inode->i_mapping;
  71         unsigned long limit;
  72
  73         if (inode->i_size < offset)
  74                 goto do_expand;
  75         i_size_write(inode, offset);
  76
  77         truncate_inode_pages(mapping, offset);
  78         goto out_truncate;
  79
  80 do_expand:
  81         limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
  82         if (limit != RLIM_INFINITY && offset > limit)
  83                 goto out_sig;
  84         if (offset > inode->i_sb->s_maxbytes)
  85                 goto out;
  86         i_size_write(inode, offset);
  87
  88 out_truncate:
  89         if (inode->i_op && inode->i_op->truncate)
  90                 inode->i_op->truncate(inode);
  91         return 0;
  92 out_sig:
  93         send_sig(SIGXFSZ, current, 0);
  94 out:
  95         return -EFBIG;
  96 }
  97
  98 EXPORT_SYMBOL(vmtruncate);
  99
 100 /*
 101  * Return the total memory allocated for this pointer, not
 102  * just what the caller asked for.
 103  *
 104  * Doesn't have to be accurate, i.e. may have races.
 105  */
 106 unsigned int kobjsize(const void *objp)
 107 {
 108         struct page *page;
 109
 110         if (!objp || !((page = virt_to_page(objp))))
 111                 return 0;
 112
 113         if (PageSlab(page))
 114                 return ksize(objp);
 115
 116         BUG_ON(page->index < 0);
 117         BUG_ON(page->index >= MAX_ORDER);
 118
 119         return (PAGE_SIZE << page->index);
 120 }
 121
 122 /*
 123  * The nommu dodgy version :-)
 124  */
 125 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 126         unsigned long start, int len, int write, int force,
 127         struct page **pages, struct vm_area_struct **vmas)
 128 {
 129         int i;
 130         static struct vm_area_struct dummy_vma;
 131
 132         for (i = 0; i < len; i++) {
 133                 if (pages) {
 134                         pages[i] = virt_to_page(start);
 135                         if (pages[i])
 136                                 page_cache_get(pages[i]);
 137                 }
 138                 if (vmas)
 139                         vmas[i] = &dummy_vma;
 140                 start += PAGE_SIZE;
 141         }
 142         return(i);
 143 }
 144
 145 DEFINE_RWLOCK(vmlist_lock);
 146 struct vm_struct *vmlist;
 147
 148 void vfree(void *addr)
 149 {
 150         kfree(addr);
 151 }
 152
 153 void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot)
 154 {
 155         /*
 156          * kmalloc doesn't like __GFP_HIGHMEM for some reason
 157          */
 158         return kmalloc(size, gfp_mask & ~__GFP_HIGHMEM);
 159 }
 160
 161 struct page * vmalloc_to_page(void *addr)
 162 {
 163         return virt_to_page(addr);
 164 }
 165
 166 unsigned long vmalloc_to_pfn(void *addr)
 167 {
 168         return page_to_pfn(virt_to_page(addr));
 169 }
 170
 171
 172 long vread(char *buf, char *addr, unsigned long count)
 173 {
 174         memcpy(buf, addr, count);
 175         return count;
 176 }
 177
 178 long vwrite(char *buf, char *addr, unsigned long count)
 179 {
 180         /* Don't allow overflow */
 181         if ((unsigned long) addr + count < count)
 182                 count = -(unsigned long) addr;
 183
 184         memcpy(addr, buf, count);
 185         return(count);
 186 }
 187
 188 /*
 189  *      vmalloc  -  allocate virtually continguos memory
 190  *
 191  *      @size:          allocation size
 192  *
 193  *      Allocate enough pages to cover @size from the page level
 194  *      allocator and map them into continguos kernel virtual space.
 195  *
 196  *      For tight cotrol over page level allocator and protection flags
 197  *      use __vmalloc() instead.
 198  */
 199 void *vmalloc(unsigned long size)
 200 {
 201        return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
 202 }
 203
 204 /*
 205  *      vmalloc_32  -  allocate virtually continguos memory (32bit addressable)
 206  *
 207  *      @size:          allocation size
 208  *
 209  *      Allocate enough 32bit PA addressable pages to cover @size from the
 210  *      page level allocator and map them into continguos kernel virtual space.
 211  */
 212 void *vmalloc_32(unsigned long size)
 213 {
 214         return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
 215 }
 216
 217 void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot)
 218 {
 219         BUG();
 220         return NULL;
 221 }
 222
 223 void vunmap(void *addr)
 224 {
 225         BUG();
 226 }
 227
 228 /*
 229  *  sys_brk() for the most part doesn't need the global kernel
 230  *  lock, except when an application is doing something nasty
 231  *  like trying to un-brk an area that has already been mapped
 232  *  to a regular file.  in this case, the unmapping will need
 233  *  to invoke file system routines that need the global lock.
 234  */
 235 asmlinkage unsigned long sys_brk(unsigned long brk)
 236 {
 237         struct mm_struct *mm = current->mm;
 238
 239         if (brk < mm->start_brk || brk > mm->context.end_brk)
 240                 return mm->brk;
 241
 242         if (mm->brk == brk)
 243                 return mm->brk;
 244
 245         /*
 246          * Always allow shrinking brk
 247          */
 248         if (brk <= mm->brk) {
 249                 mm->brk = brk;
 250                 return brk;
 251         }
 252
 253         /*
 254          * Ok, looks good - let it rip.
 255          */
 256         return mm->brk = brk;
 257 }
 258
 259 /*
 260  * Combine the mmap "prot" and "flags" argument into one "vm_flags" used
 261  * internally. Essentially, translate the "PROT_xxx" and "MAP_xxx" bits
 262  * into "VM_xxx".
 263  */
 264 static inline unsigned long calc_vm_flags(unsigned long prot, unsigned long flags)
 265 {
 266 #define _trans(x,bit1,bit2) \
 267 ((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0)
 268
 269         unsigned long prot_bits, flag_bits;
 270         prot_bits =
 271                 _trans(prot, PROT_READ, VM_READ) |
 272                 _trans(prot, PROT_WRITE, VM_WRITE) |
 273                 _trans(prot, PROT_EXEC, VM_EXEC);
 274         flag_bits =
 275                 _trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN) |
 276                 _trans(flags, MAP_DENYWRITE, VM_DENYWRITE) |
 277                 _trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE);
 278         return prot_bits | flag_bits;
 279 #undef _trans
 280 }
 281
 282 #ifdef DEBUG
 283 static void show_process_blocks(void)
 284 {
 285         struct vm_list_struct *vml;
 286
 287         printk("Process blocks %d:", current->pid);
 288
 289         for (vml = &current->mm->context.vmlist; vml; vml = vml->next) {
 290                 printk(" %p: %p", vml, vml->vma);
 291                 if (vml->vma)
 292                         printk(" (%d @%lx #%d)",
 293                                kobjsize((void *) vml->vma->vm_start),
 294                                vml->vma->vm_start,
 295                                atomic_read(&vml->vma->vm_usage));
 296                 printk(vml->next ? " ->" : ".\n");
 297         }
 298 }
 299 #endif /* DEBUG */
 300
 301 static inline struct vm_area_struct *find_nommu_vma(unsigned long start)
 302 {
 303         struct vm_area_struct *vma;
 304         struct rb_node *n = nommu_vma_tree.rb_node;
 305
 306         while (n) {
 307                 vma = rb_entry(n, struct vm_area_struct, vm_rb);
 308
 309                 if (start < vma->vm_start)
 310                         n = n->rb_left;
 311                 else if (start > vma->vm_start)
 312                         n = n->rb_right;
 313                 else
 314                         return vma;
 315         }
 316
 317         return NULL;
 318 }
 319
 320 static void add_nommu_vma(struct vm_area_struct *vma)
 321 {
 322         struct vm_area_struct *pvma;
 323         struct address_space *mapping;
 324         struct rb_node **p = &nommu_vma_tree.rb_node;
 325         struct rb_node *parent = NULL;
 326
 327         /* add the VMA to the mapping */
 328         if (vma->vm_file) {
 329                 mapping = vma->vm_file->f_mapping;
 330
 331                 flush_dcache_mmap_lock(mapping);
 332                 vma_prio_tree_insert(vma, &mapping->i_mmap);
 333                 flush_dcache_mmap_unlock(mapping);
 334         }
 335
 336         /* add the VMA to the master list */
 337         while (*p) {
 338                 parent = *p;
 339                 pvma = rb_entry(parent, struct vm_area_struct, vm_rb);
 340
 341                 if (vma->vm_start < pvma->vm_start) {
 342                         p = &(*p)->rb_left;
 343                 }
 344                 else if (vma->vm_start > pvma->vm_start) {
 345                         p = &(*p)->rb_right;
 346                 }
 347                 else {
 348                         /* mappings are at the same address - this can only
 349                          * happen for shared-mem chardevs and shared file
 350                          * mappings backed by ramfs/tmpfs */
 351                         BUG_ON(!(pvma->vm_flags & VM_SHARED));
 352
 353                         if (vma < pvma)
 354                                 p = &(*p)->rb_left;
 355                         else if (vma > pvma)
 356                                 p = &(*p)->rb_right;
 357                         else
 358                                 BUG();
 359                 }
 360         }
 361
 362         rb_link_node(&vma->vm_rb, parent, p);
 363         rb_insert_color(&vma->vm_rb, &nommu_vma_tree);
 364 }
 365
 366 static void delete_nommu_vma(struct vm_area_struct *vma)
 367 {
 368         struct address_space *mapping;
 369
 370         /* remove the VMA from the mapping */
 371         if (vma->vm_file) {
 372                 mapping = vma->vm_file->f_mapping;
 373
 374                 flush_dcache_mmap_lock(mapping);
 375                 vma_prio_tree_remove(vma, &mapping->i_mmap);
 376                 flush_dcache_mmap_unlock(mapping);
 377         }
 378
 379         /* remove from the master list */
 380         rb_erase(&vma->vm_rb, &nommu_vma_tree);
 381 }
 382
 383 /*
 384  * handle mapping creation for uClinux
 385  */
 386 unsigned long do_mmap_pgoff(struct file *file,
 387                             unsigned long addr,
 388                             unsigned long len,
 389                             unsigned long prot,
 390                             unsigned long flags,
 391                             unsigned long pgoff)
 392 {
 393         struct vm_list_struct *vml = NULL;
 394         struct vm_area_struct *vma = NULL;
 395         struct rb_node *rb;
 396         unsigned int vm_flags;
 397         void *result;
 398         int ret, membacked;
 399
 400         /* do the simple checks first */
 401         if (flags & MAP_FIXED || addr) {
 402                 printk(KERN_DEBUG "%d: Can't do fixed-address/overlay mmap of RAM\n",
 403                        current->pid);
 404                 return -EINVAL;
 405         }
 406
 407         if (PAGE_ALIGN(len) == 0)
 408                 return addr;
 409
 410         if (len > TASK_SIZE)
 411                 return -EINVAL;
 412
 413         /* offset overflow? */
 414         if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
 415                 return -EINVAL;
 416
 417         /* validate file mapping requests */
 418         membacked = 0;
 419         if (file) {
 420                 /* files must support mmap */
 421                 if (!file->f_op || !file->f_op->mmap)
 422                         return -ENODEV;
 423
 424                 if ((prot & PROT_EXEC) &&
 425                     (file->f_vfsmnt->mnt_flags & MNT_NOEXEC))
 426                         return -EPERM;
 427
 428                 /* work out if what we've got could possibly be shared
 429                  * - we support chardevs that provide their own "memory"
 430                  * - we support files/blockdevs that are memory backed
 431                  */
 432                 if (S_ISCHR(file->f_dentry->d_inode->i_mode)) {
 433                         membacked = 1;
 434                 }
 435                 else {
 436                         struct address_space *mapping = file->f_mapping;
 437                         if (!mapping)
 438                                 mapping = file->f_dentry->d_inode->i_mapping;
 439                         if (mapping && mapping->backing_dev_info)
 440                                 membacked = mapping->backing_dev_info->memory_backed;
 441                 }
 442
 443                 if (flags & MAP_SHARED) {
 444                         /* do checks for writing, appending and locking */
 445                         if ((prot & PROT_WRITE) && !(file->f_mode & FMODE_WRITE))
 446                                 return -EACCES;
 447
 448                         if (IS_APPEND(file->f_dentry->d_inode) &&
 449                             (file->f_mode & FMODE_WRITE))
 450                                 return -EACCES;
 451
 452                         if (locks_verify_locked(file->f_dentry->d_inode))
 453                                 return -EAGAIN;
 454
 455                         if (!membacked) {
 456                                 printk("MAP_SHARED not completely supported on !MMU\n");
 457                                 return -EINVAL;
 458                         }
 459
 460                         /* we require greater support from the driver or
 461                          * filesystem - we ask it to tell us what memory to
 462                          * use */
 463                         if (!file->f_op->get_unmapped_area)
 464                                 return -ENODEV;
 465                 }
 466                 else {
 467                         /* we read private files into memory we allocate */
 468                         if (!file->f_op->read)
 469                                 return -ENODEV;
 470                 }
 471         }
 472
 473         /* handle PROT_EXEC implication by PROT_READ */
 474         if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
 475                 if (!(file && (file->f_vfsmnt->mnt_flags & MNT_NOEXEC)))
 476                         prot |= PROT_EXEC;
 477
 478         /* do simple checking here so the lower-level routines won't have
 479          * to. we assume access permissions have been handled by the open
 480          * of the memory object, so we don't do any here.
 481          */
 482         vm_flags = calc_vm_flags(prot,flags) /* | mm->def_flags */
 483                 | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
 484
 485         if (!membacked) {
 486                 /* share any file segment that's mapped read-only */
 487                 if (((flags & MAP_PRIVATE) && !(prot & PROT_WRITE) && file) ||
 488                     ((flags & MAP_SHARED) && !(prot & PROT_WRITE) && file))
 489                         vm_flags |= VM_MAYSHARE;
 490
 491                 /* refuse to let anyone share files with this process if it's being traced -
 492                  * otherwise breakpoints set in it may interfere with another untraced process
 493                  */
 494                 if (current->ptrace & PT_PTRACED)
 495                         vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
 496         }
 497         else {
 498                 /* permit sharing of character devices and ramfs files at any time for
 499                  * anything other than a privately writable mapping
 500                  */
 501                 if (!(flags & MAP_PRIVATE) || !(prot & PROT_WRITE)) {
 502                         vm_flags |= VM_MAYSHARE;
 503                         if (flags & MAP_SHARED)
 504                                 vm_flags |= VM_SHARED;
 505                 }
 506         }
 507
 508         /* allow the security API to have its say */
 509         ret = security_file_mmap(file, prot, flags);
 510         if (ret)
 511                 return ret;
 512
 513         /* we're going to need to record the mapping if it works */
 514         vml = kmalloc(sizeof(struct vm_list_struct), GFP_KERNEL);
 515         if (!vml)
 516                 goto error_getting_vml;
 517         memset(vml, 0, sizeof(*vml));
 518
 519         down_write(&nommu_vma_sem);
 520
 521         /* if we want to share, we need to search for VMAs created by another
 522          * mmap() call that overlap with our proposed mapping
 523          * - we can only share with an exact match on most regular files
 524          * - shared mappings on character devices and memory backed files are
 525          *   permitted to overlap inexactly as far as we are concerned for in
 526          *   these cases, sharing is handled in the driver or filesystem rather
 527          *   than here
 528          */
 529         if (vm_flags & VM_MAYSHARE) {
 530                 unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 531                 unsigned long vmpglen;
 532
 533                 for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) {
 534                         vma = rb_entry(rb, struct vm_area_struct, vm_rb);
 535
 536                         if (!(vma->vm_flags & VM_MAYSHARE))
 537                                 continue;
 538
 539                         /* search for overlapping mappings on the same file */
 540                         if (vma->vm_file->f_dentry->d_inode != file->f_dentry->d_inode)
 541                                 continue;
 542
 543                         if (vma->vm_pgoff >= pgoff + pglen)
 544                                 continue;
 545
 546                         vmpglen = (vma->vm_end - vma->vm_start + PAGE_SIZE - 1) >> PAGE_SHIFT;
 547                         if (pgoff >= vma->vm_pgoff + vmpglen)
 548                                 continue;
 549
 550                         /* handle inexact matches between mappings */
 551                         if (vmpglen != pglen || vma->vm_pgoff != pgoff) {
 552                                 if (!membacked)
 553                                         goto sharing_violation;
 554                                 continue;
 555                         }
 556
 557                         /* we've found a VMA we can share */
 558                         atomic_inc(&vma->vm_usage);
 559
 560                         vml->vma = vma;
 561                         result = (void *) vma->vm_start;
 562                         goto shared;
 563                 }
 564         }
 565
 566         vma = NULL;
 567
 568         /* obtain the address to map to. we verify (or select) it and ensure
 569          * that it represents a valid section of the address space
 570          * - this is the hook for quasi-memory character devices
 571          */
 572         if (file && file->f_op->get_unmapped_area) {
 573                 addr = file->f_op->get_unmapped_area(file, addr, len, pgoff, flags);
 574                 if (IS_ERR((void *) addr)) {
 575                         ret = addr;
 576                         if (ret == (unsigned long) -ENOSYS)
 577                                 ret = (unsigned long) -ENODEV;
 578                         goto error;
 579                 }
 580         }
 581
 582         /* we're going to need a VMA struct as well */
 583         vma = kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
 584         if (!vma)
 585                 goto error_getting_vma;
 586
 587         memset(vma, 0, sizeof(*vma));
 588         INIT_LIST_HEAD(&vma->anon_vma_node);
 589         atomic_set(&vma->vm_usage, 1);
 590         if (file)
 591                 get_file(file);
 592         vma->vm_file    = file;
 593         vma->vm_flags   = vm_flags;
 594         vma->vm_start   = addr;
 595         vma->vm_end     = addr + len;
 596         vma->vm_pgoff   = pgoff;
 597
 598         vml->vma = vma;
 599
 600         /* determine the object being mapped and call the appropriate specific
 601          * mapper.
 602          */
 603         if (file) {
 604 #ifdef MAGIC_ROM_PTR
 605                 /* First, try simpler routine designed to give us a ROM pointer. */
 606                 if (file->f_op->romptr && !(prot & PROT_WRITE)) {
 607                         ret = file->f_op->romptr(file, vma);
 608 #ifdef DEBUG
 609                         printk("romptr mmap returned %d (st=%lx)\n",
 610                                ret, vma->vm_start);
 611 #endif
 612                         result = (void *) vma->vm_start;
 613                         if (!ret)
 614                                 goto done;
 615                         else if (ret != -ENOSYS)
 616                                 goto error;
 617                 } else
 618 #endif /* MAGIC_ROM_PTR */
 619                 /* Then try full mmap routine, which might return a RAM
 620                  * pointer, or do something truly complicated
 621                  */
 622                 if (file->f_op->mmap) {
 623                         ret = file->f_op->mmap(file, vma);
 624
 625 #ifdef DEBUG
 626                         printk("f_op->mmap() returned %d (st=%lx)\n",
 627                                ret, vma->vm_start);
 628 #endif
 629                         result = (void *) vma->vm_start;
 630                         if (!ret)
 631                                 goto done;
 632                         else if (ret != -ENOSYS)
 633                                 goto error;
 634                 } else {
 635                         ret = -ENODEV; /* No mapping operations defined */
 636                         goto error;
 637                 }
 638
 639                 /* An ENOSYS error indicates that mmap isn't possible (as
 640                  * opposed to tried but failed) so we'll fall through to the
 641                  * copy. */
 642         }
 643
 644         /* allocate some memory to hold the mapping
 645          * - note that this may not return a page-aligned address if the object
 646          *   we're allocating is smaller than a page
 647          */
 648         ret = -ENOMEM;
 649         result = kmalloc(len, GFP_KERNEL);
 650         if (!result) {
 651                 printk("Allocation of length %lu from process %d failed\n",
 652                        len, current->pid);
 653                 show_free_areas();
 654                 goto error;
 655         }
 656
 657         vma->vm_start = (unsigned long) result;
 658         vma->vm_end = vma->vm_start + len;
 659
 660 #ifdef WARN_ON_SLACK
 661         if (len + WARN_ON_SLACK <= kobjsize(result))
 662                 printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n",
 663                        len, current->pid, kobjsize(result) - len);
 664 #endif
 665
 666         if (file) {
 667                 mm_segment_t old_fs = get_fs();
 668                 loff_t fpos;
 669
 670                 fpos = pgoff;
 671                 fpos <<= PAGE_SHIFT;
 672
 673                 set_fs(KERNEL_DS);
 674                 ret = file->f_op->read(file, (char *) result, len, &fpos);
 675                 set_fs(old_fs);
 676
 677                 if (ret < 0)
 678                         goto error2;
 679                 if (ret < len)
 680                         memset(result + ret, 0, len - ret);
 681         } else {
 682                 memset(result, 0, len);
 683         }
 684
 685         if (prot & PROT_EXEC)
 686                 flush_icache_range((unsigned long) result, (unsigned long) result + len);
 687
 688  done:
 689         if (!(vma->vm_flags & VM_SHARED)) {
 690                 realalloc += kobjsize(result);
 691                 askedalloc += len;
 692         }
 693
 694         realalloc += kobjsize(vma);
 695         askedalloc += sizeof(*vma);
 696
 697         vx_vmpages_add(current->mm, len >> PAGE_SHIFT);
 698
 699         add_nommu_vma(vma);
 700  shared:
 701         realalloc += kobjsize(vml);
 702         askedalloc += sizeof(*vml);
 703
 704         vml->next = current->mm->context.vmlist;
 705         current->mm->context.vmlist = vml;
 706
 707         up_write(&nommu_vma_sem);
 708
 709 #ifdef DEBUG
 710         printk("do_mmap:\n");
 711         show_process_blocks();
 712 #endif
 713
 714         return (unsigned long) result;
 715
 716  error2:
 717         kfree(result);
 718  error:
 719         up_write(&nommu_vma_sem);
 720         kfree(vml);
 721         if (vma) {
 722                 fput(vma->vm_file);
 723                 kfree(vma);
 724         }
 725         return ret;
 726
 727  sharing_violation:
 728         up_write(&nommu_vma_sem);
 729         printk("Attempt to share mismatched mappings\n");
 730         kfree(vml);
 731         return -EINVAL;
 732
 733  error_getting_vma:
 734         up_write(&nommu_vma_sem);
 735         kfree(vml);
 736         printk("Allocation of vml for %lu byte allocation from process %d failed\n",
 737                len, current->pid);
 738         show_free_areas();
 739         return -ENOMEM;
 740
 741  error_getting_vml:
 742         printk("Allocation of vml for %lu byte allocation from process %d failed\n",
 743                len, current->pid);
 744         show_free_areas();
 745         return -ENOMEM;
 746 }
 747
 748 /*
 749  * handle mapping disposal for uClinux
 750  */
 751 static void put_vma(struct vm_area_struct *vma)
 752 {
 753         if (vma) {
 754                 down_write(&nommu_vma_sem);
 755
 756                 if (atomic_dec_and_test(&vma->vm_usage)) {
 757                         delete_nommu_vma(vma);
 758
 759                         if (vma->vm_ops && vma->vm_ops->close)
 760                                 vma->vm_ops->close(vma);
 761
 762                         /* IO memory and memory shared directly out of the pagecache from
 763                          * ramfs/tmpfs mustn't be released here */
 764                         if (!(vma->vm_flags & (VM_IO | VM_SHARED)) && vma->vm_start) {
 765                                 realalloc -= kobjsize((void *) vma->vm_start);
 766                                 askedalloc -= vma->vm_end - vma->vm_start;
 767                                 kfree((void *) vma->vm_start);
 768                         }
 769
 770                         realalloc -= kobjsize(vma);
 771                         askedalloc -= sizeof(*vma);
 772
 773                         if (vma->vm_file)
 774                                 fput(vma->vm_file);
 775                         kfree(vma);
 776                 }
 777
 778                 up_write(&nommu_vma_sem);
 779         }
 780 }
 781
 782 int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
 783 {
 784         struct vm_list_struct *vml, **parent;
 785         unsigned long end = addr + len;
 786
 787 #ifdef MAGIC_ROM_PTR
 788         /* For efficiency's sake, if the pointer is obviously in ROM,
 789            don't bother walking the lists to free it */
 790         if (is_in_rom(addr))
 791                 return 0;
 792 #endif
 793
 794 #ifdef DEBUG
 795         printk("do_munmap:\n");
 796 #endif
 797
 798         for (parent = &mm->context.vmlist; *parent; parent = &(*parent)->next)
 799                 if ((*parent)->vma->vm_start == addr &&
 800                     (*parent)->vma->vm_end == end)
 801                         goto found;
 802
 803         printk("munmap of non-mmaped memory by process %d (%s): %p\n",
 804                current->pid, current->comm, (void *) addr);
 805         return -EINVAL;
 806
 807  found:
 808         vml = *parent;
 809
 810         put_vma(vml->vma);
 811
 812         *parent = vml->next;
 813         realalloc -= kobjsize(vml);
 814         askedalloc -= sizeof(*vml);
 815         kfree(vml);
 816         vx_vmpages_sub(mm, len >> PAGE_SHIFT);
 817
 818 #ifdef DEBUG
 819         show_process_blocks();
 820 #endif
 821
 822         return 0;
 823 }
 824
 825 /* Release all mmaps. */
 826 void exit_mmap(struct mm_struct * mm)
 827 {
 828         struct vm_list_struct *tmp;
 829
 830         if (mm) {
 831 #ifdef DEBUG
 832                 printk("Exit_mmap:\n");
 833 #endif
 834
 835                 vx_vmpages_sub(mm, mm->total_vm);
 836
 837                 while ((tmp = mm->context.vmlist)) {
 838                         mm->context.vmlist = tmp->next;
 839                         put_vma(tmp->vma);
 840
 841                         realalloc -= kobjsize(tmp);
 842                         askedalloc -= sizeof(*tmp);
 843                         kfree(tmp);
 844                 }
 845
 846 #ifdef DEBUG
 847                 show_process_blocks();
 848 #endif
 849         }
 850 }
 851
 852 asmlinkage long sys_munmap(unsigned long addr, size_t len)
 853 {
 854         int ret;
 855         struct mm_struct *mm = current->mm;
 856
 857         down_write(&mm->mmap_sem);
 858         ret = do_munmap(mm, addr, len);
 859         up_write(&mm->mmap_sem);
 860         return ret;
 861 }
 862
 863 unsigned long do_brk(unsigned long addr, unsigned long len)
 864 {
 865         return -ENOMEM;
 866 }
 867
 868 /*
 869  * Expand (or shrink) an existing mapping, potentially moving it at the
 870  * same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
 871  *
 872  * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise
 873  * This option implies MREMAP_MAYMOVE.
 874  *
 875  * on uClinux, we only permit changing a mapping's size, and only as long as it stays within the
 876  * hole allocated by the kmalloc() call in do_mmap_pgoff() and the block is not shareable
 877  */
 878 unsigned long do_mremap(unsigned long addr,
 879                         unsigned long old_len, unsigned long new_len,
 880                         unsigned long flags, unsigned long new_addr)
 881 {
 882         struct vm_list_struct *vml = NULL;
 883
 884         /* insanity checks first */
 885         if (new_len == 0)
 886                 return (unsigned long) -EINVAL;
 887
 888         if (flags & MREMAP_FIXED && new_addr != addr)
 889                 return (unsigned long) -EINVAL;
 890
 891         for (vml = current->mm->context.vmlist; vml; vml = vml->next)
 892                 if (vml->vma->vm_start == addr)
 893                         goto found;
 894
 895         return (unsigned long) -EINVAL;
 896
 897  found:
 898         if (vml->vma->vm_end != vml->vma->vm_start + old_len)
 899                 return (unsigned long) -EFAULT;
 900
 901         if (vml->vma->vm_flags & VM_MAYSHARE)
 902                 return (unsigned long) -EPERM;
 903
 904         if (new_len > kobjsize((void *) addr))
 905                 return (unsigned long) -ENOMEM;
 906
 907         /* all checks complete - do it */
 908         vml->vma->vm_end = vml->vma->vm_start + new_len;
 909
 910         askedalloc -= old_len;
 911         askedalloc += new_len;
 912
 913         return vml->vma->vm_start;
 914 }
 915
 916 /*
 917  * Look up the first VMA which satisfies  addr < vm_end,  NULL if none
 918  */
 919 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 920 {
 921         struct vm_list_struct *vml;
 922
 923         for (vml = mm->context.vmlist; vml; vml = vml->next)
 924                 if (addr >= vml->vma->vm_start && addr < vml->vma->vm_end)
 925                         return vml->vma;
 926
 927         return NULL;
 928 }
 929
 930 EXPORT_SYMBOL(find_vma);
 931
 932 struct page * follow_page(struct mm_struct *mm, unsigned long addr, int write)
 933 {
 934         return NULL;
 935 }
 936
 937 struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
 938 {
 939         return NULL;
 940 }
 941
 942 int remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
 943                 unsigned long to, unsigned long size, pgprot_t prot)
 944 {
 945         return -EPERM;
 946 }
 947
 948 void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
 949 {
 950 }
 951
 952 unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr,
 953         unsigned long len, unsigned long pgoff, unsigned long flags)
 954 {
 955         return -ENOMEM;
 956 }
 957
 958 void arch_unmap_area(struct vm_area_struct *area)
 959 {
 960 }
 961
 962 void update_mem_hiwater(void)
 963 {
 964         struct task_struct *tsk = current;
 965
 966         if (likely(tsk->mm)) {
 967                 if (tsk->mm->hiwater_rss < tsk->mm->rss)
 968                         tsk->mm->hiwater_rss = tsk->mm->rss;
 969                 if (tsk->mm->hiwater_vm < tsk->mm->total_vm)
 970                         tsk->mm->hiwater_vm = tsk->mm->total_vm;
 971         }
 972 }
 973
 974 void unmap_mapping_range(struct address_space *mapping,
 975                          loff_t const holebegin, loff_t const holelen,
 976                          int even_cows)
 977 {
 978 }
 979
 980 /*
 981  * Check that a process has enough memory to allocate a new virtual
 982  * mapping. 0 means there is enough memory for the allocation to
 983  * succeed and -ENOMEM implies there is not.
 984  *
 985  * We currently support three overcommit policies, which are set via the
 986  * vm.overcommit_memory sysctl.  See Documentation/vm/overcommit-accounting
 987  *
 988  * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
 989  * Additional code 2002 Jul 20 by Robert Love.
 990  *
 991  * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise.
 992  *
 993  * Note this is a helper function intended to be used by LSMs which
 994  * wish to use this logic.
 995  */
 996 int __vm_enough_memory(long pages, int cap_sys_admin)
 997 {
 998         unsigned long free, allowed;
 999
1000         vm_acct_memory(pages);
1001
1002         /*
1003          * Sometimes we want to use more memory than we have
1004          */
1005         if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
1006                 return 0;
1007
1008         if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
1009                 unsigned long n;
1010
1011                 free = get_page_cache_size();
1012                 free += nr_swap_pages;
1013
1014                 /*
1015                  * Any slabs which are created with the
1016                  * SLAB_RECLAIM_ACCOUNT flag claim to have contents
1017                  * which are reclaimable, under pressure.  The dentry
1018                  * cache and most inode caches should fall into this
1019                  */
1020                 free += atomic_read(&slab_reclaim_pages);
1021
1022                 /*
1023                  * Leave the last 3% for root
1024                  */
1025                 if (!cap_sys_admin)
1026                         free -= free / 32;
1027
1028                 if (free > pages)
1029                         return 0;
1030
1031                 /*
1032                  * nr_free_pages() is very expensive on large systems,
1033                  * only call if we're about to fail.
1034                  */
1035                 n = nr_free_pages();
1036                 if (!cap_sys_admin)
1037                         n -= n / 32;
1038                 free += n;
1039
1040                 if (free > pages)
1041                         return 0;
1042                 vm_unacct_memory(pages);
1043                 return -ENOMEM;
1044         }
1045
1046         allowed = totalram_pages * sysctl_overcommit_ratio / 100;
1047         /*
1048          * Leave the last 3% for root
1049          */
1050         if (!cap_sys_admin)
1051                 allowed -= allowed / 32;
1052         allowed += total_swap_pages;
1053
1054         /* Don't let a single process grow too big:
1055            leave 3% of the size of this process for other processes */
1056         allowed -= current->mm->total_vm / 32;
1057
1058         if (atomic_read(&vm_committed_space) < allowed)
1059                 return 0;
1060
1061         vm_unacct_memory(pages);
1062
1063         return -ENOMEM;
1064 }
1065