vserver 2.0-rc4
[linux-2.6.git] / mm / nommu.c
1 /*
2  *  linux/mm/nommu.c
3  *
4  *  Replacement code for mm functions to support CPU's that don't
5  *  have any form of memory management unit (thus no virtual memory).
6  *
7  *  See Documentation/nommu-mmap.txt
8  *
9  *  Copyright (c) 2004-2005 David Howells <dhowells@redhat.com>
10  *  Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
11  *  Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
12  *  Copyright (c) 2002      Greg Ungerer <gerg@snapgear.com>
13  */
14
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/swap.h>
18 #include <linux/file.h>
19 #include <linux/highmem.h>
20 #include <linux/pagemap.h>
21 #include <linux/slab.h>
22 #include <linux/vmalloc.h>
23 #include <linux/ptrace.h>
24 #include <linux/blkdev.h>
25 #include <linux/backing-dev.h>
26 #include <linux/mount.h>
27 #include <linux/personality.h>
28 #include <linux/security.h>
29 #include <linux/syscalls.h>
30
31 #include <asm/uaccess.h>
32 #include <asm/tlb.h>
33 #include <asm/tlbflush.h>
34
35 void *high_memory;
36 struct page *mem_map;
37 unsigned long max_mapnr;
38 unsigned long num_physpages;
39 unsigned long askedalloc, realalloc;
40 atomic_t vm_committed_space = ATOMIC_INIT(0);
41 int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
42 int sysctl_overcommit_ratio = 50; /* default is 50% */
43 int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
44 int heap_stack_gap = 0;
45
46 EXPORT_SYMBOL(mem_map);
47 EXPORT_SYMBOL(sysctl_max_map_count);
48 EXPORT_SYMBOL(sysctl_overcommit_memory);
49 EXPORT_SYMBOL(sysctl_overcommit_ratio);
50 EXPORT_SYMBOL(vm_committed_space);
51 EXPORT_SYMBOL(__vm_enough_memory);
52
53 /* list of shareable VMAs */
54 struct rb_root nommu_vma_tree = RB_ROOT;
55 DECLARE_RWSEM(nommu_vma_sem);
56
57 struct vm_operations_struct generic_file_vm_ops = {
58 };
59
60 /*
61  * Handle all mappings that got truncated by a "truncate()"
62  * system call.
63  *
64  * NOTE! We have to be ready to update the memory sharing
65  * between the file and the memory map for a potential last
66  * incomplete page.  Ugly, but necessary.
67  */
68 int vmtruncate(struct inode *inode, loff_t offset)
69 {
70         struct address_space *mapping = inode->i_mapping;
71         unsigned long limit;
72
73         if (inode->i_size < offset)
74                 goto do_expand;
75         i_size_write(inode, offset);
76
77         truncate_inode_pages(mapping, offset);
78         goto out_truncate;
79
80 do_expand:
81         limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
82         if (limit != RLIM_INFINITY && offset > limit)
83                 goto out_sig;
84         if (offset > inode->i_sb->s_maxbytes)
85                 goto out;
86         i_size_write(inode, offset);
87
88 out_truncate:
89         if (inode->i_op && inode->i_op->truncate)
90                 inode->i_op->truncate(inode);
91         return 0;
92 out_sig:
93         send_sig(SIGXFSZ, current, 0);
94 out:
95         return -EFBIG;
96 }
97
98 EXPORT_SYMBOL(vmtruncate);
99
100 /*
101  * Return the total memory allocated for this pointer, not
102  * just what the caller asked for.
103  *
104  * Doesn't have to be accurate, i.e. may have races.
105  */
106 unsigned int kobjsize(const void *objp)
107 {
108         struct page *page;
109
110         if (!objp || !((page = virt_to_page(objp))))
111                 return 0;
112
113         if (PageSlab(page))
114                 return ksize(objp);
115
116         BUG_ON(page->index < 0);
117         BUG_ON(page->index >= MAX_ORDER);
118
119         return (PAGE_SIZE << page->index);
120 }
121
122 /*
123  * The nommu dodgy version :-)
124  */
125 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
126         unsigned long start, int len, int write, int force,
127         struct page **pages, struct vm_area_struct **vmas)
128 {
129         int i;
130         static struct vm_area_struct dummy_vma;
131
132         for (i = 0; i < len; i++) {
133                 if (pages) {
134                         pages[i] = virt_to_page(start);
135                         if (pages[i])
136                                 page_cache_get(pages[i]);
137                 }
138                 if (vmas)
139                         vmas[i] = &dummy_vma;
140                 start += PAGE_SIZE;
141         }
142         return(i);
143 }
144
145 DEFINE_RWLOCK(vmlist_lock);
146 struct vm_struct *vmlist;
147
148 void vfree(void *addr)
149 {
150         kfree(addr);
151 }
152
153 void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot)
154 {
155         /*
156          * kmalloc doesn't like __GFP_HIGHMEM for some reason
157          */
158         return kmalloc(size, gfp_mask & ~__GFP_HIGHMEM);
159 }
160
161 struct page * vmalloc_to_page(void *addr)
162 {
163         return virt_to_page(addr);
164 }
165
166 unsigned long vmalloc_to_pfn(void *addr)
167 {
168         return page_to_pfn(virt_to_page(addr));
169 }
170
171
172 long vread(char *buf, char *addr, unsigned long count)
173 {
174         memcpy(buf, addr, count);
175         return count;
176 }
177
178 long vwrite(char *buf, char *addr, unsigned long count)
179 {
180         /* Don't allow overflow */
181         if ((unsigned long) addr + count < count)
182                 count = -(unsigned long) addr;
183
184         memcpy(addr, buf, count);
185         return(count);
186 }
187
188 /*
189  *      vmalloc  -  allocate virtually continguos memory
190  *
191  *      @size:          allocation size
192  *
193  *      Allocate enough pages to cover @size from the page level
194  *      allocator and map them into continguos kernel virtual space.
195  *
196  *      For tight cotrol over page level allocator and protection flags
197  *      use __vmalloc() instead.
198  */
199 void *vmalloc(unsigned long size)
200 {
201        return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
202 }
203
204 /*
205  *      vmalloc_32  -  allocate virtually continguos memory (32bit addressable)
206  *
207  *      @size:          allocation size
208  *
209  *      Allocate enough 32bit PA addressable pages to cover @size from the
210  *      page level allocator and map them into continguos kernel virtual space.
211  */
212 void *vmalloc_32(unsigned long size)
213 {
214         return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
215 }
216
217 void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot)
218 {
219         BUG();
220         return NULL;
221 }
222
223 void vunmap(void *addr)
224 {
225         BUG();
226 }
227
228 /*
229  *  sys_brk() for the most part doesn't need the global kernel
230  *  lock, except when an application is doing something nasty
231  *  like trying to un-brk an area that has already been mapped
232  *  to a regular file.  in this case, the unmapping will need
233  *  to invoke file system routines that need the global lock.
234  */
235 asmlinkage unsigned long sys_brk(unsigned long brk)
236 {
237         struct mm_struct *mm = current->mm;
238
239         if (brk < mm->start_brk || brk > mm->context.end_brk)
240                 return mm->brk;
241
242         if (mm->brk == brk)
243                 return mm->brk;
244
245         /*
246          * Always allow shrinking brk
247          */
248         if (brk <= mm->brk) {
249                 mm->brk = brk;
250                 return brk;
251         }
252
253         /*
254          * Ok, looks good - let it rip.
255          */
256         return mm->brk = brk;
257 }
258
259 /*
260  * Combine the mmap "prot" and "flags" argument into one "vm_flags" used
261  * internally. Essentially, translate the "PROT_xxx" and "MAP_xxx" bits
262  * into "VM_xxx".
263  */
264 static inline unsigned long calc_vm_flags(unsigned long prot, unsigned long flags)
265 {
266 #define _trans(x,bit1,bit2) \
267 ((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0)
268
269         unsigned long prot_bits, flag_bits;
270         prot_bits =
271                 _trans(prot, PROT_READ, VM_READ) |
272                 _trans(prot, PROT_WRITE, VM_WRITE) |
273                 _trans(prot, PROT_EXEC, VM_EXEC);
274         flag_bits =
275                 _trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN) |
276                 _trans(flags, MAP_DENYWRITE, VM_DENYWRITE) |
277                 _trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE);
278         return prot_bits | flag_bits;
279 #undef _trans
280 }
281
282 #ifdef DEBUG
283 static void show_process_blocks(void)
284 {
285         struct vm_list_struct *vml;
286
287         printk("Process blocks %d:", current->pid);
288
289         for (vml = &current->mm->context.vmlist; vml; vml = vml->next) {
290                 printk(" %p: %p", vml, vml->vma);
291                 if (vml->vma)
292                         printk(" (%d @%lx #%d)",
293                                kobjsize((void *) vml->vma->vm_start),
294                                vml->vma->vm_start,
295                                atomic_read(&vml->vma->vm_usage));
296                 printk(vml->next ? " ->" : ".\n");
297         }
298 }
299 #endif /* DEBUG */
300
301 static inline struct vm_area_struct *find_nommu_vma(unsigned long start)
302 {
303         struct vm_area_struct *vma;
304         struct rb_node *n = nommu_vma_tree.rb_node;
305
306         while (n) {
307                 vma = rb_entry(n, struct vm_area_struct, vm_rb);
308
309                 if (start < vma->vm_start)
310                         n = n->rb_left;
311                 else if (start > vma->vm_start)
312                         n = n->rb_right;
313                 else
314                         return vma;
315         }
316
317         return NULL;
318 }
319
320 static void add_nommu_vma(struct vm_area_struct *vma)
321 {
322         struct vm_area_struct *pvma;
323         struct address_space *mapping;
324         struct rb_node **p = &nommu_vma_tree.rb_node;
325         struct rb_node *parent = NULL;
326
327         /* add the VMA to the mapping */
328         if (vma->vm_file) {
329                 mapping = vma->vm_file->f_mapping;
330
331                 flush_dcache_mmap_lock(mapping);
332                 vma_prio_tree_insert(vma, &mapping->i_mmap);
333                 flush_dcache_mmap_unlock(mapping);
334         }
335
336         /* add the VMA to the master list */
337         while (*p) {
338                 parent = *p;
339                 pvma = rb_entry(parent, struct vm_area_struct, vm_rb);
340
341                 if (vma->vm_start < pvma->vm_start) {
342                         p = &(*p)->rb_left;
343                 }
344                 else if (vma->vm_start > pvma->vm_start) {
345                         p = &(*p)->rb_right;
346                 }
347                 else {
348                         /* mappings are at the same address - this can only
349                          * happen for shared-mem chardevs and shared file
350                          * mappings backed by ramfs/tmpfs */
351                         BUG_ON(!(pvma->vm_flags & VM_SHARED));
352
353                         if (vma < pvma)
354                                 p = &(*p)->rb_left;
355                         else if (vma > pvma)
356                                 p = &(*p)->rb_right;
357                         else
358                                 BUG();
359                 }
360         }
361
362         rb_link_node(&vma->vm_rb, parent, p);
363         rb_insert_color(&vma->vm_rb, &nommu_vma_tree);
364 }
365
366 static void delete_nommu_vma(struct vm_area_struct *vma)
367 {
368         struct address_space *mapping;
369
370         /* remove the VMA from the mapping */
371         if (vma->vm_file) {
372                 mapping = vma->vm_file->f_mapping;
373
374                 flush_dcache_mmap_lock(mapping);
375                 vma_prio_tree_remove(vma, &mapping->i_mmap);
376                 flush_dcache_mmap_unlock(mapping);
377         }
378
379         /* remove from the master list */
380         rb_erase(&vma->vm_rb, &nommu_vma_tree);
381 }
382
383 /*
384  * handle mapping creation for uClinux
385  */
386 unsigned long do_mmap_pgoff(struct file *file,
387                             unsigned long addr,
388                             unsigned long len,
389                             unsigned long prot,
390                             unsigned long flags,
391                             unsigned long pgoff)
392 {
393         struct vm_list_struct *vml = NULL;
394         struct vm_area_struct *vma = NULL;
395         struct rb_node *rb;
396         unsigned int vm_flags;
397         void *result;
398         int ret, membacked;
399
400         /* do the simple checks first */
401         if (flags & MAP_FIXED || addr) {
402                 printk(KERN_DEBUG "%d: Can't do fixed-address/overlay mmap of RAM\n",
403                        current->pid);
404                 return -EINVAL;
405         }
406
407         if (PAGE_ALIGN(len) == 0)
408                 return addr;
409
410         if (len > TASK_SIZE)
411                 return -EINVAL;
412
413         /* offset overflow? */
414         if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
415                 return -EINVAL;
416
417         /* validate file mapping requests */
418         membacked = 0;
419         if (file) {
420                 /* files must support mmap */
421                 if (!file->f_op || !file->f_op->mmap)
422                         return -ENODEV;
423
424                 if ((prot & PROT_EXEC) &&
425                     (file->f_vfsmnt->mnt_flags & MNT_NOEXEC))
426                         return -EPERM;
427
428                 /* work out if what we've got could possibly be shared
429                  * - we support chardevs that provide their own "memory"
430                  * - we support files/blockdevs that are memory backed
431                  */
432                 if (S_ISCHR(file->f_dentry->d_inode->i_mode)) {
433                         membacked = 1;
434                 }
435                 else {
436                         struct address_space *mapping = file->f_mapping;
437                         if (!mapping)
438                                 mapping = file->f_dentry->d_inode->i_mapping;
439                         if (mapping && mapping->backing_dev_info)
440                                 membacked = mapping->backing_dev_info->memory_backed;
441                 }
442
443                 if (flags & MAP_SHARED) {
444                         /* do checks for writing, appending and locking */
445                         if ((prot & PROT_WRITE) && !(file->f_mode & FMODE_WRITE))
446                                 return -EACCES;
447
448                         if (IS_APPEND(file->f_dentry->d_inode) &&
449                             (file->f_mode & FMODE_WRITE))
450                                 return -EACCES;
451
452                         if (locks_verify_locked(file->f_dentry->d_inode))
453                                 return -EAGAIN;
454
455                         if (!membacked) {
456                                 printk("MAP_SHARED not completely supported on !MMU\n");
457                                 return -EINVAL;
458                         }
459
460                         /* we require greater support from the driver or
461                          * filesystem - we ask it to tell us what memory to
462                          * use */
463                         if (!file->f_op->get_unmapped_area)
464                                 return -ENODEV;
465                 }
466                 else {
467                         /* we read private files into memory we allocate */
468                         if (!file->f_op->read)
469                                 return -ENODEV;
470                 }
471         }
472
473         /* handle PROT_EXEC implication by PROT_READ */
474         if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
475                 if (!(file && (file->f_vfsmnt->mnt_flags & MNT_NOEXEC)))
476                         prot |= PROT_EXEC;
477
478         /* do simple checking here so the lower-level routines won't have
479          * to. we assume access permissions have been handled by the open
480          * of the memory object, so we don't do any here.
481          */
482         vm_flags = calc_vm_flags(prot,flags) /* | mm->def_flags */
483                 | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
484
485         if (!membacked) {
486                 /* share any file segment that's mapped read-only */
487                 if (((flags & MAP_PRIVATE) && !(prot & PROT_WRITE) && file) ||
488                     ((flags & MAP_SHARED) && !(prot & PROT_WRITE) && file))
489                         vm_flags |= VM_MAYSHARE;
490
491                 /* refuse to let anyone share files with this process if it's being traced -
492                  * otherwise breakpoints set in it may interfere with another untraced process
493                  */
494                 if (current->ptrace & PT_PTRACED)
495                         vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
496         }
497         else {
498                 /* permit sharing of character devices and ramfs files at any time for
499                  * anything other than a privately writable mapping
500                  */
501                 if (!(flags & MAP_PRIVATE) || !(prot & PROT_WRITE)) {
502                         vm_flags |= VM_MAYSHARE;
503                         if (flags & MAP_SHARED)
504                                 vm_flags |= VM_SHARED;
505                 }
506         }
507
508         /* allow the security API to have its say */
509         ret = security_file_mmap(file, prot, flags);
510         if (ret)
511                 return ret;
512
513         /* we're going to need to record the mapping if it works */
514         vml = kmalloc(sizeof(struct vm_list_struct), GFP_KERNEL);
515         if (!vml)
516                 goto error_getting_vml;
517         memset(vml, 0, sizeof(*vml));
518
519         down_write(&nommu_vma_sem);
520
521         /* if we want to share, we need to search for VMAs created by another
522          * mmap() call that overlap with our proposed mapping
523          * - we can only share with an exact match on most regular files
524          * - shared mappings on character devices and memory backed files are
525          *   permitted to overlap inexactly as far as we are concerned for in
526          *   these cases, sharing is handled in the driver or filesystem rather
527          *   than here
528          */
529         if (vm_flags & VM_MAYSHARE) {
530                 unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
531                 unsigned long vmpglen;
532
533                 for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) {
534                         vma = rb_entry(rb, struct vm_area_struct, vm_rb);
535
536                         if (!(vma->vm_flags & VM_MAYSHARE))
537                                 continue;
538
539                         /* search for overlapping mappings on the same file */
540                         if (vma->vm_file->f_dentry->d_inode != file->f_dentry->d_inode)
541                                 continue;
542
543                         if (vma->vm_pgoff >= pgoff + pglen)
544                                 continue;
545
546                         vmpglen = (vma->vm_end - vma->vm_start + PAGE_SIZE - 1) >> PAGE_SHIFT;
547                         if (pgoff >= vma->vm_pgoff + vmpglen)
548                                 continue;
549
550                         /* handle inexact matches between mappings */
551                         if (vmpglen != pglen || vma->vm_pgoff != pgoff) {
552                                 if (!membacked)
553                                         goto sharing_violation;
554                                 continue;
555                         }
556
557                         /* we've found a VMA we can share */
558                         atomic_inc(&vma->vm_usage);
559
560                         vml->vma = vma;
561                         result = (void *) vma->vm_start;
562                         goto shared;
563                 }
564         }
565
566         vma = NULL;
567
568         /* obtain the address to map to. we verify (or select) it and ensure
569          * that it represents a valid section of the address space
570          * - this is the hook for quasi-memory character devices
571          */
572         if (file && file->f_op->get_unmapped_area) {
573                 addr = file->f_op->get_unmapped_area(file, addr, len, pgoff, flags);
574                 if (IS_ERR((void *) addr)) {
575                         ret = addr;
576                         if (ret == (unsigned long) -ENOSYS)
577                                 ret = (unsigned long) -ENODEV;
578                         goto error;
579                 }
580         }
581
582         /* we're going to need a VMA struct as well */
583         vma = kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
584         if (!vma)
585                 goto error_getting_vma;
586
587         memset(vma, 0, sizeof(*vma));
588         INIT_LIST_HEAD(&vma->anon_vma_node);
589         atomic_set(&vma->vm_usage, 1);
590         if (file)
591                 get_file(file);
592         vma->vm_file    = file;
593         vma->vm_flags   = vm_flags;
594         vma->vm_start   = addr;
595         vma->vm_end     = addr + len;
596         vma->vm_pgoff   = pgoff;
597
598         vml->vma = vma;
599
600         /* determine the object being mapped and call the appropriate specific
601          * mapper.
602          */
603         if (file) {
604 #ifdef MAGIC_ROM_PTR
605                 /* First, try simpler routine designed to give us a ROM pointer. */
606                 if (file->f_op->romptr && !(prot & PROT_WRITE)) {
607                         ret = file->f_op->romptr(file, vma);
608 #ifdef DEBUG
609                         printk("romptr mmap returned %d (st=%lx)\n",
610                                ret, vma->vm_start);
611 #endif
612                         result = (void *) vma->vm_start;
613                         if (!ret)
614                                 goto done;
615                         else if (ret != -ENOSYS)
616                                 goto error;
617                 } else
618 #endif /* MAGIC_ROM_PTR */
619                 /* Then try full mmap routine, which might return a RAM
620                  * pointer, or do something truly complicated
621                  */
622                 if (file->f_op->mmap) {
623                         ret = file->f_op->mmap(file, vma);
624
625 #ifdef DEBUG
626                         printk("f_op->mmap() returned %d (st=%lx)\n",
627                                ret, vma->vm_start);
628 #endif
629                         result = (void *) vma->vm_start;
630                         if (!ret)
631                                 goto done;
632                         else if (ret != -ENOSYS)
633                                 goto error;
634                 } else {
635                         ret = -ENODEV; /* No mapping operations defined */
636                         goto error;
637                 }
638
639                 /* An ENOSYS error indicates that mmap isn't possible (as
640                  * opposed to tried but failed) so we'll fall through to the
641                  * copy. */
642         }
643
644         /* allocate some memory to hold the mapping
645          * - note that this may not return a page-aligned address if the object
646          *   we're allocating is smaller than a page
647          */
648         ret = -ENOMEM;
649         result = kmalloc(len, GFP_KERNEL);
650         if (!result) {
651                 printk("Allocation of length %lu from process %d failed\n",
652                        len, current->pid);
653                 show_free_areas();
654                 goto error;
655         }
656
657         vma->vm_start = (unsigned long) result;
658         vma->vm_end = vma->vm_start + len;
659
660 #ifdef WARN_ON_SLACK
661         if (len + WARN_ON_SLACK <= kobjsize(result))
662                 printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n",
663                        len, current->pid, kobjsize(result) - len);
664 #endif
665
666         if (file) {
667                 mm_segment_t old_fs = get_fs();
668                 loff_t fpos;
669
670                 fpos = pgoff;
671                 fpos <<= PAGE_SHIFT;
672
673                 set_fs(KERNEL_DS);
674                 ret = file->f_op->read(file, (char *) result, len, &fpos);
675                 set_fs(old_fs);
676
677                 if (ret < 0)
678                         goto error2;
679                 if (ret < len)
680                         memset(result + ret, 0, len - ret);
681         } else {
682                 memset(result, 0, len);
683         }
684
685         if (prot & PROT_EXEC)
686                 flush_icache_range((unsigned long) result, (unsigned long) result + len);
687
688  done:
689         if (!(vma->vm_flags & VM_SHARED)) {
690                 realalloc += kobjsize(result);
691                 askedalloc += len;
692         }
693
694         realalloc += kobjsize(vma);
695         askedalloc += sizeof(*vma);
696
697         vx_vmpages_add(current->mm, len >> PAGE_SHIFT);
698
699         add_nommu_vma(vma);
700  shared:
701         realalloc += kobjsize(vml);
702         askedalloc += sizeof(*vml);
703
704         vml->next = current->mm->context.vmlist;
705         current->mm->context.vmlist = vml;
706
707         up_write(&nommu_vma_sem);
708
709 #ifdef DEBUG
710         printk("do_mmap:\n");
711         show_process_blocks();
712 #endif
713
714         return (unsigned long) result;
715
716  error2:
717         kfree(result);
718  error:
719         up_write(&nommu_vma_sem);
720         kfree(vml);
721         if (vma) {
722                 fput(vma->vm_file);
723                 kfree(vma);
724         }
725         return ret;
726
727  sharing_violation:
728         up_write(&nommu_vma_sem);
729         printk("Attempt to share mismatched mappings\n");
730         kfree(vml);
731         return -EINVAL;
732
733  error_getting_vma:
734         up_write(&nommu_vma_sem);
735         kfree(vml);
736         printk("Allocation of vml for %lu byte allocation from process %d failed\n",
737                len, current->pid);
738         show_free_areas();
739         return -ENOMEM;
740
741  error_getting_vml:
742         printk("Allocation of vml for %lu byte allocation from process %d failed\n",
743                len, current->pid);
744         show_free_areas();
745         return -ENOMEM;
746 }
747
748 /*
749  * handle mapping disposal for uClinux
750  */
751 static void put_vma(struct vm_area_struct *vma)
752 {
753         if (vma) {
754                 down_write(&nommu_vma_sem);
755
756                 if (atomic_dec_and_test(&vma->vm_usage)) {
757                         delete_nommu_vma(vma);
758
759                         if (vma->vm_ops && vma->vm_ops->close)
760                                 vma->vm_ops->close(vma);
761
762                         /* IO memory and memory shared directly out of the pagecache from
763                          * ramfs/tmpfs mustn't be released here */
764                         if (!(vma->vm_flags & (VM_IO | VM_SHARED)) && vma->vm_start) {
765                                 realalloc -= kobjsize((void *) vma->vm_start);
766                                 askedalloc -= vma->vm_end - vma->vm_start;
767                                 kfree((void *) vma->vm_start);
768                         }
769
770                         realalloc -= kobjsize(vma);
771                         askedalloc -= sizeof(*vma);
772
773                         if (vma->vm_file)
774                                 fput(vma->vm_file);
775                         kfree(vma);
776                 }
777
778                 up_write(&nommu_vma_sem);
779         }
780 }
781
782 int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
783 {
784         struct vm_list_struct *vml, **parent;
785         unsigned long end = addr + len;
786
787 #ifdef MAGIC_ROM_PTR
788         /* For efficiency's sake, if the pointer is obviously in ROM,
789            don't bother walking the lists to free it */
790         if (is_in_rom(addr))
791                 return 0;
792 #endif
793
794 #ifdef DEBUG
795         printk("do_munmap:\n");
796 #endif
797
798         for (parent = &mm->context.vmlist; *parent; parent = &(*parent)->next)
799                 if ((*parent)->vma->vm_start == addr &&
800                     (*parent)->vma->vm_end == end)
801                         goto found;
802
803         printk("munmap of non-mmaped memory by process %d (%s): %p\n",
804                current->pid, current->comm, (void *) addr);
805         return -EINVAL;
806
807  found:
808         vml = *parent;
809
810         put_vma(vml->vma);
811
812         *parent = vml->next;
813         realalloc -= kobjsize(vml);
814         askedalloc -= sizeof(*vml);
815         kfree(vml);
816         vx_vmpages_sub(mm, len >> PAGE_SHIFT);
817
818 #ifdef DEBUG
819         show_process_blocks();
820 #endif
821
822         return 0;
823 }
824
825 /* Release all mmaps. */
826 void exit_mmap(struct mm_struct * mm)
827 {
828         struct vm_list_struct *tmp;
829
830         if (mm) {
831 #ifdef DEBUG
832                 printk("Exit_mmap:\n");
833 #endif
834
835                 vx_vmpages_sub(mm, mm->total_vm);
836
837                 while ((tmp = mm->context.vmlist)) {
838                         mm->context.vmlist = tmp->next;
839                         put_vma(tmp->vma);
840
841                         realalloc -= kobjsize(tmp);
842                         askedalloc -= sizeof(*tmp);
843                         kfree(tmp);
844                 }
845
846 #ifdef DEBUG
847                 show_process_blocks();
848 #endif
849         }
850 }
851
852 asmlinkage long sys_munmap(unsigned long addr, size_t len)
853 {
854         int ret;
855         struct mm_struct *mm = current->mm;
856
857         down_write(&mm->mmap_sem);
858         ret = do_munmap(mm, addr, len);
859         up_write(&mm->mmap_sem);
860         return ret;
861 }
862
863 unsigned long do_brk(unsigned long addr, unsigned long len)
864 {
865         return -ENOMEM;
866 }
867
868 /*
869  * Expand (or shrink) an existing mapping, potentially moving it at the
870  * same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
871  *
872  * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise
873  * This option implies MREMAP_MAYMOVE.
874  *
875  * on uClinux, we only permit changing a mapping's size, and only as long as it stays within the
876  * hole allocated by the kmalloc() call in do_mmap_pgoff() and the block is not shareable
877  */
878 unsigned long do_mremap(unsigned long addr,
879                         unsigned long old_len, unsigned long new_len,
880                         unsigned long flags, unsigned long new_addr)
881 {
882         struct vm_list_struct *vml = NULL;
883
884         /* insanity checks first */
885         if (new_len == 0)
886                 return (unsigned long) -EINVAL;
887
888         if (flags & MREMAP_FIXED && new_addr != addr)
889                 return (unsigned long) -EINVAL;
890
891         for (vml = current->mm->context.vmlist; vml; vml = vml->next)
892                 if (vml->vma->vm_start == addr)
893                         goto found;
894
895         return (unsigned long) -EINVAL;
896
897  found:
898         if (vml->vma->vm_end != vml->vma->vm_start + old_len)
899                 return (unsigned long) -EFAULT;
900
901         if (vml->vma->vm_flags & VM_MAYSHARE)
902                 return (unsigned long) -EPERM;
903
904         if (new_len > kobjsize((void *) addr))
905                 return (unsigned long) -ENOMEM;
906
907         /* all checks complete - do it */
908         vml->vma->vm_end = vml->vma->vm_start + new_len;
909
910         askedalloc -= old_len;
911         askedalloc += new_len;
912
913         return vml->vma->vm_start;
914 }
915
916 /*
917  * Look up the first VMA which satisfies  addr < vm_end,  NULL if none
918  */
919 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
920 {
921         struct vm_list_struct *vml;
922
923         for (vml = mm->context.vmlist; vml; vml = vml->next)
924                 if (addr >= vml->vma->vm_start && addr < vml->vma->vm_end)
925                         return vml->vma;
926
927         return NULL;
928 }
929
930 EXPORT_SYMBOL(find_vma);
931
932 struct page * follow_page(struct mm_struct *mm, unsigned long addr, int write)
933 {
934         return NULL;
935 }
936
937 struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
938 {
939         return NULL;
940 }
941
942 int remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
943                 unsigned long to, unsigned long size, pgprot_t prot)
944 {
945         return -EPERM;
946 }
947
948 void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
949 {
950 }
951
952 unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr,
953         unsigned long len, unsigned long pgoff, unsigned long flags)
954 {
955         return -ENOMEM;
956 }
957
958 void arch_unmap_area(struct vm_area_struct *area)
959 {
960 }
961
962 void update_mem_hiwater(void)
963 {
964         struct task_struct *tsk = current;
965
966         if (likely(tsk->mm)) {
967                 if (tsk->mm->hiwater_rss < tsk->mm->rss)
968                         tsk->mm->hiwater_rss = tsk->mm->rss;
969                 if (tsk->mm->hiwater_vm < tsk->mm->total_vm)
970                         tsk->mm->hiwater_vm = tsk->mm->total_vm;
971         }
972 }
973
974 void unmap_mapping_range(struct address_space *mapping,
975                          loff_t const holebegin, loff_t const holelen,
976                          int even_cows)
977 {
978 }
979
980 /*
981  * Check that a process has enough memory to allocate a new virtual
982  * mapping. 0 means there is enough memory for the allocation to
983  * succeed and -ENOMEM implies there is not.
984  *
985  * We currently support three overcommit policies, which are set via the
986  * vm.overcommit_memory sysctl.  See Documentation/vm/overcommit-accounting
987  *
988  * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
989  * Additional code 2002 Jul 20 by Robert Love.
990  *
991  * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise.
992  *
993  * Note this is a helper function intended to be used by LSMs which
994  * wish to use this logic.
995  */
996 int __vm_enough_memory(long pages, int cap_sys_admin)
997 {
998         unsigned long free, allowed;
999
1000         vm_acct_memory(pages);
1001
1002         /*
1003          * Sometimes we want to use more memory than we have
1004          */
1005         if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
1006                 return 0;
1007
1008         if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
1009                 unsigned long n;
1010
1011                 free = get_page_cache_size();
1012                 free += nr_swap_pages;
1013
1014                 /*
1015                  * Any slabs which are created with the
1016                  * SLAB_RECLAIM_ACCOUNT flag claim to have contents
1017                  * which are reclaimable, under pressure.  The dentry
1018                  * cache and most inode caches should fall into this
1019                  */
1020                 free += atomic_read(&slab_reclaim_pages);
1021
1022                 /*
1023                  * Leave the last 3% for root
1024                  */
1025                 if (!cap_sys_admin)
1026                         free -= free / 32;
1027
1028                 if (free > pages)
1029                         return 0;
1030
1031                 /*
1032                  * nr_free_pages() is very expensive on large systems,
1033                  * only call if we're about to fail.
1034                  */
1035                 n = nr_free_pages();
1036                 if (!cap_sys_admin)
1037                         n -= n / 32;
1038                 free += n;
1039
1040                 if (free > pages)
1041                         return 0;
1042                 vm_unacct_memory(pages);
1043                 return -ENOMEM;
1044         }
1045
1046         allowed = totalram_pages * sysctl_overcommit_ratio / 100;
1047         /*
1048          * Leave the last 3% for root
1049          */
1050         if (!cap_sys_admin)
1051                 allowed -= allowed / 32;
1052         allowed += total_swap_pages;
1053
1054         /* Don't let a single process grow too big:
1055            leave 3% of the size of this process for other processes */
1056         allowed -= current->mm->total_vm / 32;
1057
1058         if (atomic_read(&vm_committed_space) < allowed)
1059                 return 0;
1060
1061         vm_unacct_memory(pages);
1062
1063         return -ENOMEM;
1064 }
1065