revert #include statement to be just like imported FC+VS code
[linux-2.6.git] / mm / nommu.c
1 /*
2  *  linux/mm/nommu.c
3  *
4  *  Replacement code for mm functions to support CPU's that don't
5  *  have any form of memory management unit (thus no virtual memory).
6  *
7  *  See Documentation/nommu-mmap.txt
8  *
9  *  Copyright (c) 2004-2005 David Howells <dhowells@redhat.com>
10  *  Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
11  *  Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
12  *  Copyright (c) 2002      Greg Ungerer <gerg@snapgear.com>
13  */
14
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/swap.h>
18 #include <linux/file.h>
19 #include <linux/highmem.h>
20 #include <linux/pagemap.h>
21 #include <linux/slab.h>
22 #include <linux/vmalloc.h>
23 #include <linux/ptrace.h>
24 #include <linux/blkdev.h>
25 #include <linux/backing-dev.h>
26 #include <linux/mount.h>
27 #include <linux/personality.h>
28 #include <linux/security.h>
29 #include <linux/syscalls.h>
30 #include <linux/vs_base.h>
31 #include <linux/vs_memory.h>
32
33 #include <asm/uaccess.h>
34 #include <asm/tlb.h>
35 #include <asm/tlbflush.h>
36
37 void *high_memory;
38 struct page *mem_map;
39 unsigned long max_mapnr;
40 unsigned long num_physpages;
41 unsigned long askedalloc, realalloc;
42 atomic_t vm_committed_space = ATOMIC_INIT(0);
43 int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
44 int sysctl_overcommit_ratio = 50; /* default is 50% */
45 int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
46 int heap_stack_gap = 0;
47
48 EXPORT_SYMBOL(mem_map);
49 EXPORT_SYMBOL(__vm_enough_memory);
50
51 /* list of shareable VMAs */
52 struct rb_root nommu_vma_tree = RB_ROOT;
53 DECLARE_RWSEM(nommu_vma_sem);
54
55 struct vm_operations_struct generic_file_vm_ops = {
56 };
57
58 EXPORT_SYMBOL(vfree);
59 EXPORT_SYMBOL(vmalloc_to_page);
60 EXPORT_SYMBOL(vmalloc_32);
61 EXPORT_SYMBOL(vmap);
62 EXPORT_SYMBOL(vunmap);
63
64 /*
65  * Handle all mappings that got truncated by a "truncate()"
66  * system call.
67  *
68  * NOTE! We have to be ready to update the memory sharing
69  * between the file and the memory map for a potential last
70  * incomplete page.  Ugly, but necessary.
71  */
72 int vmtruncate(struct inode *inode, loff_t offset)
73 {
74         struct address_space *mapping = inode->i_mapping;
75         unsigned long limit;
76
77         if (inode->i_size < offset)
78                 goto do_expand;
79         i_size_write(inode, offset);
80
81         truncate_inode_pages(mapping, offset);
82         goto out_truncate;
83
84 do_expand:
85         limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
86         if (limit != RLIM_INFINITY && offset > limit)
87                 goto out_sig;
88         if (offset > inode->i_sb->s_maxbytes)
89                 goto out;
90         i_size_write(inode, offset);
91
92 out_truncate:
93         if (inode->i_op && inode->i_op->truncate)
94                 inode->i_op->truncate(inode);
95         return 0;
96 out_sig:
97         send_sig(SIGXFSZ, current, 0);
98 out:
99         return -EFBIG;
100 }
101
102 EXPORT_SYMBOL(vmtruncate);
103
104 /*
105  * Return the total memory allocated for this pointer, not
106  * just what the caller asked for.
107  *
108  * Doesn't have to be accurate, i.e. may have races.
109  */
110 unsigned int kobjsize(const void *objp)
111 {
112         struct page *page;
113
114         if (!objp || !((page = virt_to_page(objp))))
115                 return 0;
116
117         if (PageSlab(page))
118                 return ksize(objp);
119
120         BUG_ON(page->index < 0);
121         BUG_ON(page->index >= MAX_ORDER);
122
123         return (PAGE_SIZE << page->index);
124 }
125
126 /*
127  * get a list of pages in an address range belonging to the specified process
128  * and indicate the VMA that covers each page
129  * - this is potentially dodgy as we may end incrementing the page count of a
130  *   slab page or a secondary page from a compound page
131  * - don't permit access to VMAs that don't support it, such as I/O mappings
132  */
133 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
134         unsigned long start, int len, int write, int force,
135         struct page **pages, struct vm_area_struct **vmas)
136 {
137         struct vm_area_struct *vma;
138         unsigned long vm_flags;
139         int i;
140
141         /* calculate required read or write permissions.
142          * - if 'force' is set, we only require the "MAY" flags.
143          */
144         vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
145         vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
146
147         for (i = 0; i < len; i++) {
148                 vma = find_vma(mm, start);
149                 if (!vma)
150                         goto finish_or_fault;
151
152                 /* protect what we can, including chardevs */
153                 if (vma->vm_flags & (VM_IO | VM_PFNMAP) ||
154                     !(vm_flags & vma->vm_flags))
155                         goto finish_or_fault;
156
157                 if (pages) {
158                         pages[i] = virt_to_page(start);
159                         if (pages[i])
160                                 page_cache_get(pages[i]);
161                 }
162                 if (vmas)
163                         vmas[i] = vma;
164                 start += PAGE_SIZE;
165         }
166
167         return i;
168
169 finish_or_fault:
170         return i ? : -EFAULT;
171 }
172
173 EXPORT_SYMBOL(get_user_pages);
174
175 DEFINE_RWLOCK(vmlist_lock);
176 struct vm_struct *vmlist;
177
178 void vfree(void *addr)
179 {
180         kfree(addr);
181 }
182
183 void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
184 {
185         /*
186          * kmalloc doesn't like __GFP_HIGHMEM for some reason
187          */
188         return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM);
189 }
190
191 struct page * vmalloc_to_page(void *addr)
192 {
193         return virt_to_page(addr);
194 }
195
196 unsigned long vmalloc_to_pfn(void *addr)
197 {
198         return page_to_pfn(virt_to_page(addr));
199 }
200
201
202 long vread(char *buf, char *addr, unsigned long count)
203 {
204         memcpy(buf, addr, count);
205         return count;
206 }
207
208 long vwrite(char *buf, char *addr, unsigned long count)
209 {
210         /* Don't allow overflow */
211         if ((unsigned long) addr + count < count)
212                 count = -(unsigned long) addr;
213
214         memcpy(addr, buf, count);
215         return(count);
216 }
217
218 /*
219  *      vmalloc  -  allocate virtually continguos memory
220  *
221  *      @size:          allocation size
222  *
223  *      Allocate enough pages to cover @size from the page level
224  *      allocator and map them into continguos kernel virtual space.
225  *
226  *      For tight control over page level allocator and protection flags
227  *      use __vmalloc() instead.
228  */
229 void *vmalloc(unsigned long size)
230 {
231        return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
232 }
233 EXPORT_SYMBOL(vmalloc);
234
235 void *vmalloc_node(unsigned long size, int node)
236 {
237         return vmalloc(size);
238 }
239 EXPORT_SYMBOL(vmalloc_node);
240
241 /*
242  *      vmalloc_32  -  allocate virtually continguos memory (32bit addressable)
243  *
244  *      @size:          allocation size
245  *
246  *      Allocate enough 32bit PA addressable pages to cover @size from the
247  *      page level allocator and map them into continguos kernel virtual space.
248  */
249 void *vmalloc_32(unsigned long size)
250 {
251         return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
252 }
253
254 void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot)
255 {
256         BUG();
257         return NULL;
258 }
259
260 void vunmap(void *addr)
261 {
262         BUG();
263 }
264
265 /*
266  *  sys_brk() for the most part doesn't need the global kernel
267  *  lock, except when an application is doing something nasty
268  *  like trying to un-brk an area that has already been mapped
269  *  to a regular file.  in this case, the unmapping will need
270  *  to invoke file system routines that need the global lock.
271  */
272 asmlinkage unsigned long sys_brk(unsigned long brk)
273 {
274         struct mm_struct *mm = current->mm;
275
276         if (brk < mm->start_brk || brk > mm->context.end_brk)
277                 return mm->brk;
278
279         if (mm->brk == brk)
280                 return mm->brk;
281
282         /*
283          * Always allow shrinking brk
284          */
285         if (brk <= mm->brk) {
286                 mm->brk = brk;
287                 return brk;
288         }
289
290         /*
291          * Ok, looks good - let it rip.
292          */
293         return mm->brk = brk;
294 }
295
296 #ifdef DEBUG
297 static void show_process_blocks(void)
298 {
299         struct vm_list_struct *vml;
300
301         printk("Process blocks %d:", current->pid);
302
303         for (vml = &current->mm->context.vmlist; vml; vml = vml->next) {
304                 printk(" %p: %p", vml, vml->vma);
305                 if (vml->vma)
306                         printk(" (%d @%lx #%d)",
307                                kobjsize((void *) vml->vma->vm_start),
308                                vml->vma->vm_start,
309                                atomic_read(&vml->vma->vm_usage));
310                 printk(vml->next ? " ->" : ".\n");
311         }
312 }
313 #endif /* DEBUG */
314
315 /*
316  * add a VMA into a process's mm_struct in the appropriate place in the list
317  * - should be called with mm->mmap_sem held writelocked
318  */
319 static void add_vma_to_mm(struct mm_struct *mm, struct vm_list_struct *vml)
320 {
321         struct vm_list_struct **ppv;
322
323         for (ppv = &current->mm->context.vmlist; *ppv; ppv = &(*ppv)->next)
324                 if ((*ppv)->vma->vm_start > vml->vma->vm_start)
325                         break;
326
327         vml->next = *ppv;
328         *ppv = vml;
329 }
330
331 /*
332  * look up the first VMA in which addr resides, NULL if none
333  * - should be called with mm->mmap_sem at least held readlocked
334  */
335 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
336 {
337         struct vm_list_struct *loop, *vml;
338
339         /* search the vm_start ordered list */
340         vml = NULL;
341         for (loop = mm->context.vmlist; loop; loop = loop->next) {
342                 if (loop->vma->vm_start > addr)
343                         break;
344                 vml = loop;
345         }
346
347         if (vml && vml->vma->vm_end > addr)
348                 return vml->vma;
349
350         return NULL;
351 }
352 EXPORT_SYMBOL(find_vma);
353
354 /*
355  * find a VMA
356  * - we don't extend stack VMAs under NOMMU conditions
357  */
358 struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
359 {
360         return find_vma(mm, addr);
361 }
362
363 /*
364  * look up the first VMA exactly that exactly matches addr
365  * - should be called with mm->mmap_sem at least held readlocked
366  */
367 static inline struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
368                                                     unsigned long addr)
369 {
370         struct vm_list_struct *vml;
371
372         /* search the vm_start ordered list */
373         for (vml = mm->context.vmlist; vml; vml = vml->next) {
374                 if (vml->vma->vm_start == addr)
375                         return vml->vma;
376                 if (vml->vma->vm_start > addr)
377                         break;
378         }
379
380         return NULL;
381 }
382
383 /*
384  * find a VMA in the global tree
385  */
386 static inline struct vm_area_struct *find_nommu_vma(unsigned long start)
387 {
388         struct vm_area_struct *vma;
389         struct rb_node *n = nommu_vma_tree.rb_node;
390
391         while (n) {
392                 vma = rb_entry(n, struct vm_area_struct, vm_rb);
393
394                 if (start < vma->vm_start)
395                         n = n->rb_left;
396                 else if (start > vma->vm_start)
397                         n = n->rb_right;
398                 else
399                         return vma;
400         }
401
402         return NULL;
403 }
404
405 /*
406  * add a VMA in the global tree
407  */
408 static void add_nommu_vma(struct vm_area_struct *vma)
409 {
410         struct vm_area_struct *pvma;
411         struct address_space *mapping;
412         struct rb_node **p = &nommu_vma_tree.rb_node;
413         struct rb_node *parent = NULL;
414
415         /* add the VMA to the mapping */
416         if (vma->vm_file) {
417                 mapping = vma->vm_file->f_mapping;
418
419                 flush_dcache_mmap_lock(mapping);
420                 vma_prio_tree_insert(vma, &mapping->i_mmap);
421                 flush_dcache_mmap_unlock(mapping);
422         }
423
424         /* add the VMA to the master list */
425         while (*p) {
426                 parent = *p;
427                 pvma = rb_entry(parent, struct vm_area_struct, vm_rb);
428
429                 if (vma->vm_start < pvma->vm_start) {
430                         p = &(*p)->rb_left;
431                 }
432                 else if (vma->vm_start > pvma->vm_start) {
433                         p = &(*p)->rb_right;
434                 }
435                 else {
436                         /* mappings are at the same address - this can only
437                          * happen for shared-mem chardevs and shared file
438                          * mappings backed by ramfs/tmpfs */
439                         BUG_ON(!(pvma->vm_flags & VM_SHARED));
440
441                         if (vma < pvma)
442                                 p = &(*p)->rb_left;
443                         else if (vma > pvma)
444                                 p = &(*p)->rb_right;
445                         else
446                                 BUG();
447                 }
448         }
449
450         rb_link_node(&vma->vm_rb, parent, p);
451         rb_insert_color(&vma->vm_rb, &nommu_vma_tree);
452 }
453
454 /*
455  * delete a VMA from the global list
456  */
457 static void delete_nommu_vma(struct vm_area_struct *vma)
458 {
459         struct address_space *mapping;
460
461         /* remove the VMA from the mapping */
462         if (vma->vm_file) {
463                 mapping = vma->vm_file->f_mapping;
464
465                 flush_dcache_mmap_lock(mapping);
466                 vma_prio_tree_remove(vma, &mapping->i_mmap);
467                 flush_dcache_mmap_unlock(mapping);
468         }
469
470         /* remove from the master list */
471         rb_erase(&vma->vm_rb, &nommu_vma_tree);
472 }
473
474 /*
475  * determine whether a mapping should be permitted and, if so, what sort of
476  * mapping we're capable of supporting
477  */
478 static int validate_mmap_request(struct file *file,
479                                  unsigned long addr,
480                                  unsigned long len,
481                                  unsigned long prot,
482                                  unsigned long flags,
483                                  unsigned long pgoff,
484                                  unsigned long *_capabilities)
485 {
486         unsigned long capabilities;
487         unsigned long reqprot = prot;
488         int ret;
489
490         /* do the simple checks first */
491         if (flags & MAP_FIXED || addr) {
492                 printk(KERN_DEBUG
493                        "%d: Can't do fixed-address/overlay mmap of RAM\n",
494                        current->pid);
495                 return -EINVAL;
496         }
497
498         if ((flags & MAP_TYPE) != MAP_PRIVATE &&
499             (flags & MAP_TYPE) != MAP_SHARED)
500                 return -EINVAL;
501
502         if (!len)
503                 return -EINVAL;
504
505         /* Careful about overflows.. */
506         len = PAGE_ALIGN(len);
507         if (!len || len > TASK_SIZE)
508                 return -ENOMEM;
509
510         /* offset overflow? */
511         if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
512                 return -EOVERFLOW;
513
514         if (file) {
515                 /* validate file mapping requests */
516                 struct address_space *mapping;
517
518                 /* files must support mmap */
519                 if (!file->f_op || !file->f_op->mmap)
520                         return -ENODEV;
521
522                 /* work out if what we've got could possibly be shared
523                  * - we support chardevs that provide their own "memory"
524                  * - we support files/blockdevs that are memory backed
525                  */
526                 mapping = file->f_mapping;
527                 if (!mapping)
528                         mapping = file->f_path.dentry->d_inode->i_mapping;
529
530                 capabilities = 0;
531                 if (mapping && mapping->backing_dev_info)
532                         capabilities = mapping->backing_dev_info->capabilities;
533
534                 if (!capabilities) {
535                         /* no explicit capabilities set, so assume some
536                          * defaults */
537                         switch (file->f_path.dentry->d_inode->i_mode & S_IFMT) {
538                         case S_IFREG:
539                         case S_IFBLK:
540                                 capabilities = BDI_CAP_MAP_COPY;
541                                 break;
542
543                         case S_IFCHR:
544                                 capabilities =
545                                         BDI_CAP_MAP_DIRECT |
546                                         BDI_CAP_READ_MAP |
547                                         BDI_CAP_WRITE_MAP;
548                                 break;
549
550                         default:
551                                 return -EINVAL;
552                         }
553                 }
554
555                 /* eliminate any capabilities that we can't support on this
556                  * device */
557                 if (!file->f_op->get_unmapped_area)
558                         capabilities &= ~BDI_CAP_MAP_DIRECT;
559                 if (!file->f_op->read)
560                         capabilities &= ~BDI_CAP_MAP_COPY;
561
562                 if (flags & MAP_SHARED) {
563                         /* do checks for writing, appending and locking */
564                         if ((prot & PROT_WRITE) &&
565                             !(file->f_mode & FMODE_WRITE))
566                                 return -EACCES;
567
568                         if (IS_APPEND(file->f_path.dentry->d_inode) &&
569                             (file->f_mode & FMODE_WRITE))
570                                 return -EACCES;
571
572                         if (locks_verify_locked(file->f_path.dentry->d_inode))
573                                 return -EAGAIN;
574
575                         if (!(capabilities & BDI_CAP_MAP_DIRECT))
576                                 return -ENODEV;
577
578                         if (((prot & PROT_READ)  && !(capabilities & BDI_CAP_READ_MAP))  ||
579                             ((prot & PROT_WRITE) && !(capabilities & BDI_CAP_WRITE_MAP)) ||
580                             ((prot & PROT_EXEC)  && !(capabilities & BDI_CAP_EXEC_MAP))
581                             ) {
582                                 printk("MAP_SHARED not completely supported on !MMU\n");
583                                 return -EINVAL;
584                         }
585
586                         /* we mustn't privatise shared mappings */
587                         capabilities &= ~BDI_CAP_MAP_COPY;
588                 }
589                 else {
590                         /* we're going to read the file into private memory we
591                          * allocate */
592                         if (!(capabilities & BDI_CAP_MAP_COPY))
593                                 return -ENODEV;
594
595                         /* we don't permit a private writable mapping to be
596                          * shared with the backing device */
597                         if (prot & PROT_WRITE)
598                                 capabilities &= ~BDI_CAP_MAP_DIRECT;
599                 }
600
601                 /* handle executable mappings and implied executable
602                  * mappings */
603                 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
604                         if (prot & PROT_EXEC)
605                                 return -EPERM;
606                 }
607                 else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) {
608                         /* handle implication of PROT_EXEC by PROT_READ */
609                         if (current->personality & READ_IMPLIES_EXEC) {
610                                 if (capabilities & BDI_CAP_EXEC_MAP)
611                                         prot |= PROT_EXEC;
612                         }
613                 }
614                 else if ((prot & PROT_READ) &&
615                          (prot & PROT_EXEC) &&
616                          !(capabilities & BDI_CAP_EXEC_MAP)
617                          ) {
618                         /* backing file is not executable, try to copy */
619                         capabilities &= ~BDI_CAP_MAP_DIRECT;
620                 }
621         }
622         else {
623                 /* anonymous mappings are always memory backed and can be
624                  * privately mapped
625                  */
626                 capabilities = BDI_CAP_MAP_COPY;
627
628                 /* handle PROT_EXEC implication by PROT_READ */
629                 if ((prot & PROT_READ) &&
630                     (current->personality & READ_IMPLIES_EXEC))
631                         prot |= PROT_EXEC;
632         }
633
634         /* allow the security API to have its say */
635         ret = security_file_mmap(file, reqprot, prot, flags);
636         if (ret < 0)
637                 return ret;
638
639         /* looks okay */
640         *_capabilities = capabilities;
641         return 0;
642 }
643
644 /*
645  * we've determined that we can make the mapping, now translate what we
646  * now know into VMA flags
647  */
648 static unsigned long determine_vm_flags(struct file *file,
649                                         unsigned long prot,
650                                         unsigned long flags,
651                                         unsigned long capabilities)
652 {
653         unsigned long vm_flags;
654
655         vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags);
656         vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
657         /* vm_flags |= mm->def_flags; */
658
659         if (!(capabilities & BDI_CAP_MAP_DIRECT)) {
660                 /* attempt to share read-only copies of mapped file chunks */
661                 if (file && !(prot & PROT_WRITE))
662                         vm_flags |= VM_MAYSHARE;
663         }
664         else {
665                 /* overlay a shareable mapping on the backing device or inode
666                  * if possible - used for chardevs, ramfs/tmpfs/shmfs and
667                  * romfs/cramfs */
668                 if (flags & MAP_SHARED)
669                         vm_flags |= VM_MAYSHARE | VM_SHARED;
670                 else if ((((vm_flags & capabilities) ^ vm_flags) & BDI_CAP_VMFLAGS) == 0)
671                         vm_flags |= VM_MAYSHARE;
672         }
673
674         /* refuse to let anyone share private mappings with this process if
675          * it's being traced - otherwise breakpoints set in it may interfere
676          * with another untraced process
677          */
678         if ((flags & MAP_PRIVATE) && (current->ptrace & PT_PTRACED))
679                 vm_flags &= ~VM_MAYSHARE;
680
681         return vm_flags;
682 }
683
684 /*
685  * set up a shared mapping on a file
686  */
687 static int do_mmap_shared_file(struct vm_area_struct *vma, unsigned long len)
688 {
689         int ret;
690
691         ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
692         if (ret != -ENOSYS)
693                 return ret;
694
695         /* getting an ENOSYS error indicates that direct mmap isn't
696          * possible (as opposed to tried but failed) so we'll fall
697          * through to making a private copy of the data and mapping
698          * that if we can */
699         return -ENODEV;
700 }
701
702 /*
703  * set up a private mapping or an anonymous shared mapping
704  */
705 static int do_mmap_private(struct vm_area_struct *vma, unsigned long len)
706 {
707         void *base;
708         int ret;
709
710         /* invoke the file's mapping function so that it can keep track of
711          * shared mappings on devices or memory
712          * - VM_MAYSHARE will be set if it may attempt to share
713          */
714         if (vma->vm_file) {
715                 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
716                 if (ret != -ENOSYS) {
717                         /* shouldn't return success if we're not sharing */
718                         BUG_ON(ret == 0 && !(vma->vm_flags & VM_MAYSHARE));
719                         return ret; /* success or a real error */
720                 }
721
722                 /* getting an ENOSYS error indicates that direct mmap isn't
723                  * possible (as opposed to tried but failed) so we'll try to
724                  * make a private copy of the data and map that instead */
725         }
726
727         /* allocate some memory to hold the mapping
728          * - note that this may not return a page-aligned address if the object
729          *   we're allocating is smaller than a page
730          */
731         base = kmalloc(len, GFP_KERNEL|__GFP_COMP);
732         if (!base)
733                 goto enomem;
734
735         vma->vm_start = (unsigned long) base;
736         vma->vm_end = vma->vm_start + len;
737         vma->vm_flags |= VM_MAPPED_COPY;
738
739 #ifdef WARN_ON_SLACK
740         if (len + WARN_ON_SLACK <= kobjsize(result))
741                 printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n",
742                        len, current->pid, kobjsize(result) - len);
743 #endif
744
745         if (vma->vm_file) {
746                 /* read the contents of a file into the copy */
747                 mm_segment_t old_fs;
748                 loff_t fpos;
749
750                 fpos = vma->vm_pgoff;
751                 fpos <<= PAGE_SHIFT;
752
753                 old_fs = get_fs();
754                 set_fs(KERNEL_DS);
755                 ret = vma->vm_file->f_op->read(vma->vm_file, base, len, &fpos);
756                 set_fs(old_fs);
757
758                 if (ret < 0)
759                         goto error_free;
760
761                 /* clear the last little bit */
762                 if (ret < len)
763                         memset(base + ret, 0, len - ret);
764
765         } else {
766                 /* if it's an anonymous mapping, then just clear it */
767                 memset(base, 0, len);
768         }
769
770         return 0;
771
772 error_free:
773         kfree(base);
774         vma->vm_start = 0;
775         return ret;
776
777 enomem:
778         printk("Allocation of length %lu from process %d failed\n",
779                len, current->pid);
780         show_free_areas();
781         return -ENOMEM;
782 }
783
784 /*
785  * handle mapping creation for uClinux
786  */
787 unsigned long do_mmap_pgoff(struct file *file,
788                             unsigned long addr,
789                             unsigned long len,
790                             unsigned long prot,
791                             unsigned long flags,
792                             unsigned long pgoff)
793 {
794         struct vm_list_struct *vml = NULL;
795         struct vm_area_struct *vma = NULL;
796         struct rb_node *rb;
797         unsigned long capabilities, vm_flags;
798         void *result;
799         int ret;
800
801         /* decide whether we should attempt the mapping, and if so what sort of
802          * mapping */
803         ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
804                                     &capabilities);
805         if (ret < 0)
806                 return ret;
807
808         /* we've determined that we can make the mapping, now translate what we
809          * now know into VMA flags */
810         vm_flags = determine_vm_flags(file, prot, flags, capabilities);
811
812         /* we're going to need to record the mapping if it works */
813         vml = kzalloc(sizeof(struct vm_list_struct), GFP_KERNEL);
814         if (!vml)
815                 goto error_getting_vml;
816
817         down_write(&nommu_vma_sem);
818
819         /* if we want to share, we need to check for VMAs created by other
820          * mmap() calls that overlap with our proposed mapping
821          * - we can only share with an exact match on most regular files
822          * - shared mappings on character devices and memory backed files are
823          *   permitted to overlap inexactly as far as we are concerned for in
824          *   these cases, sharing is handled in the driver or filesystem rather
825          *   than here
826          */
827         if (vm_flags & VM_MAYSHARE) {
828                 unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
829                 unsigned long vmpglen;
830
831                 for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) {
832                         vma = rb_entry(rb, struct vm_area_struct, vm_rb);
833
834                         if (!(vma->vm_flags & VM_MAYSHARE))
835                                 continue;
836
837                         /* search for overlapping mappings on the same file */
838                         if (vma->vm_file->f_path.dentry->d_inode != file->f_path.dentry->d_inode)
839                                 continue;
840
841                         if (vma->vm_pgoff >= pgoff + pglen)
842                                 continue;
843
844                         vmpglen = vma->vm_end - vma->vm_start + PAGE_SIZE - 1;
845                         vmpglen >>= PAGE_SHIFT;
846                         if (pgoff >= vma->vm_pgoff + vmpglen)
847                                 continue;
848
849                         /* handle inexactly overlapping matches between mappings */
850                         if (vma->vm_pgoff != pgoff || vmpglen != pglen) {
851                                 if (!(capabilities & BDI_CAP_MAP_DIRECT))
852                                         goto sharing_violation;
853                                 continue;
854                         }
855
856                         /* we've found a VMA we can share */
857                         atomic_inc(&vma->vm_usage);
858
859                         vml->vma = vma;
860                         result = (void *) vma->vm_start;
861                         goto shared;
862                 }
863
864                 vma = NULL;
865
866                 /* obtain the address at which to make a shared mapping
867                  * - this is the hook for quasi-memory character devices to
868                  *   tell us the location of a shared mapping
869                  */
870                 if (file && file->f_op->get_unmapped_area) {
871                         addr = file->f_op->get_unmapped_area(file, addr, len,
872                                                              pgoff, flags);
873                         if (IS_ERR((void *) addr)) {
874                                 ret = addr;
875                                 if (ret != (unsigned long) -ENOSYS)
876                                         goto error;
877
878                                 /* the driver refused to tell us where to site
879                                  * the mapping so we'll have to attempt to copy
880                                  * it */
881                                 ret = (unsigned long) -ENODEV;
882                                 if (!(capabilities & BDI_CAP_MAP_COPY))
883                                         goto error;
884
885                                 capabilities &= ~BDI_CAP_MAP_DIRECT;
886                         }
887                 }
888         }
889
890         /* we're going to need a VMA struct as well */
891         vma = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
892         if (!vma)
893                 goto error_getting_vma;
894
895         INIT_LIST_HEAD(&vma->anon_vma_node);
896         atomic_set(&vma->vm_usage, 1);
897         if (file)
898                 get_file(file);
899         vma->vm_file    = file;
900         vma->vm_flags   = vm_flags;
901         vma->vm_start   = addr;
902         vma->vm_end     = addr + len;
903         vma->vm_pgoff   = pgoff;
904
905         vml->vma = vma;
906
907         /* set up the mapping */
908         if (file && vma->vm_flags & VM_SHARED)
909                 ret = do_mmap_shared_file(vma, len);
910         else
911                 ret = do_mmap_private(vma, len);
912         if (ret < 0)
913                 goto error;
914
915         /* okay... we have a mapping; now we have to register it */
916         result = (void *) vma->vm_start;
917
918         if (vma->vm_flags & VM_MAPPED_COPY) {
919                 realalloc += kobjsize(result);
920                 askedalloc += len;
921         }
922
923         realalloc += kobjsize(vma);
924         askedalloc += sizeof(*vma);
925
926         vx_vmpages_add(current->mm, len >> PAGE_SHIFT);
927
928         add_nommu_vma(vma);
929
930  shared:
931         realalloc += kobjsize(vml);
932         askedalloc += sizeof(*vml);
933
934         add_vma_to_mm(current->mm, vml);
935
936         up_write(&nommu_vma_sem);
937
938         if (prot & PROT_EXEC)
939                 flush_icache_range((unsigned long) result,
940                                    (unsigned long) result + len);
941
942 #ifdef DEBUG
943         printk("do_mmap:\n");
944         show_process_blocks();
945 #endif
946
947         return (unsigned long) result;
948
949  error:
950         up_write(&nommu_vma_sem);
951         kfree(vml);
952         if (vma) {
953                 if (vma->vm_file)
954                         fput(vma->vm_file);
955                 kfree(vma);
956         }
957         return ret;
958
959  sharing_violation:
960         up_write(&nommu_vma_sem);
961         printk("Attempt to share mismatched mappings\n");
962         kfree(vml);
963         return -EINVAL;
964
965  error_getting_vma:
966         up_write(&nommu_vma_sem);
967         kfree(vml);
968         printk("Allocation of vma for %lu byte allocation from process %d failed\n",
969                len, current->pid);
970         show_free_areas();
971         return -ENOMEM;
972
973  error_getting_vml:
974         printk("Allocation of vml for %lu byte allocation from process %d failed\n",
975                len, current->pid);
976         show_free_areas();
977         return -ENOMEM;
978 }
979
980 /*
981  * handle mapping disposal for uClinux
982  */
983 static void put_vma(struct vm_area_struct *vma)
984 {
985         if (vma) {
986                 down_write(&nommu_vma_sem);
987
988                 if (atomic_dec_and_test(&vma->vm_usage)) {
989                         delete_nommu_vma(vma);
990
991                         if (vma->vm_ops && vma->vm_ops->close)
992                                 vma->vm_ops->close(vma);
993
994                         /* IO memory and memory shared directly out of the pagecache from
995                          * ramfs/tmpfs mustn't be released here */
996                         if (vma->vm_flags & VM_MAPPED_COPY) {
997                                 realalloc -= kobjsize((void *) vma->vm_start);
998                                 askedalloc -= vma->vm_end - vma->vm_start;
999                                 kfree((void *) vma->vm_start);
1000                         }
1001
1002                         realalloc -= kobjsize(vma);
1003                         askedalloc -= sizeof(*vma);
1004
1005                         if (vma->vm_file)
1006                                 fput(vma->vm_file);
1007                         kfree(vma);
1008                 }
1009
1010                 up_write(&nommu_vma_sem);
1011         }
1012 }
1013
1014 /*
1015  * release a mapping
1016  * - under NOMMU conditions the parameters must match exactly to the mapping to
1017  *   be removed
1018  */
1019 int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
1020 {
1021         struct vm_list_struct *vml, **parent;
1022         unsigned long end = addr + len;
1023
1024 #ifdef DEBUG
1025         printk("do_munmap:\n");
1026 #endif
1027
1028         for (parent = &mm->context.vmlist; *parent; parent = &(*parent)->next) {
1029                 if ((*parent)->vma->vm_start > addr)
1030                         break;
1031                 if ((*parent)->vma->vm_start == addr &&
1032                     ((len == 0) || ((*parent)->vma->vm_end == end)))
1033                         goto found;
1034         }
1035
1036         printk("munmap of non-mmaped memory by process %d (%s): %p\n",
1037                current->pid, current->comm, (void *) addr);
1038         return -EINVAL;
1039
1040  found:
1041         vml = *parent;
1042
1043         put_vma(vml->vma);
1044
1045         *parent = vml->next;
1046         realalloc -= kobjsize(vml);
1047         askedalloc -= sizeof(*vml);
1048         kfree(vml);
1049
1050         update_hiwater_vm(mm);
1051         vx_vmpages_sub(mm, len >> PAGE_SHIFT);
1052
1053 #ifdef DEBUG
1054         show_process_blocks();
1055 #endif
1056
1057         return 0;
1058 }
1059
1060 asmlinkage long sys_munmap(unsigned long addr, size_t len)
1061 {
1062         int ret;
1063         struct mm_struct *mm = current->mm;
1064
1065         down_write(&mm->mmap_sem);
1066         ret = do_munmap(mm, addr, len);
1067         up_write(&mm->mmap_sem);
1068         return ret;
1069 }
1070
1071 /*
1072  * Release all mappings
1073  */
1074 void exit_mmap(struct mm_struct * mm)
1075 {
1076         struct vm_list_struct *tmp;
1077
1078         if (mm) {
1079 #ifdef DEBUG
1080                 printk("Exit_mmap:\n");
1081 #endif
1082
1083                 vx_vmpages_sub(mm, mm->total_vm);
1084
1085                 while ((tmp = mm->context.vmlist)) {
1086                         mm->context.vmlist = tmp->next;
1087                         put_vma(tmp->vma);
1088
1089                         realalloc -= kobjsize(tmp);
1090                         askedalloc -= sizeof(*tmp);
1091                         kfree(tmp);
1092                 }
1093
1094 #ifdef DEBUG
1095                 show_process_blocks();
1096 #endif
1097         }
1098 }
1099
1100 unsigned long do_brk(unsigned long addr, unsigned long len)
1101 {
1102         return -ENOMEM;
1103 }
1104
1105 /*
1106  * expand (or shrink) an existing mapping, potentially moving it at the same
1107  * time (controlled by the MREMAP_MAYMOVE flag and available VM space)
1108  *
1109  * under NOMMU conditions, we only permit changing a mapping's size, and only
1110  * as long as it stays within the hole allocated by the kmalloc() call in
1111  * do_mmap_pgoff() and the block is not shareable
1112  *
1113  * MREMAP_FIXED is not supported under NOMMU conditions
1114  */
1115 unsigned long do_mremap(unsigned long addr,
1116                         unsigned long old_len, unsigned long new_len,
1117                         unsigned long flags, unsigned long new_addr)
1118 {
1119         struct vm_area_struct *vma;
1120
1121         /* insanity checks first */
1122         if (new_len == 0)
1123                 return (unsigned long) -EINVAL;
1124
1125         if (flags & MREMAP_FIXED && new_addr != addr)
1126                 return (unsigned long) -EINVAL;
1127
1128         vma = find_vma_exact(current->mm, addr);
1129         if (!vma)
1130                 return (unsigned long) -EINVAL;
1131
1132         if (vma->vm_end != vma->vm_start + old_len)
1133                 return (unsigned long) -EFAULT;
1134
1135         if (vma->vm_flags & VM_MAYSHARE)
1136                 return (unsigned long) -EPERM;
1137
1138         if (new_len > kobjsize((void *) addr))
1139                 return (unsigned long) -ENOMEM;
1140
1141         /* all checks complete - do it */
1142         vma->vm_end = vma->vm_start + new_len;
1143
1144         askedalloc -= old_len;
1145         askedalloc += new_len;
1146
1147         return vma->vm_start;
1148 }
1149
1150 asmlinkage unsigned long sys_mremap(unsigned long addr,
1151         unsigned long old_len, unsigned long new_len,
1152         unsigned long flags, unsigned long new_addr)
1153 {
1154         unsigned long ret;
1155
1156         down_write(&current->mm->mmap_sem);
1157         ret = do_mremap(addr, old_len, new_len, flags, new_addr);
1158         up_write(&current->mm->mmap_sem);
1159         return ret;
1160 }
1161
1162 struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
1163                         unsigned int foll_flags)
1164 {
1165         return NULL;
1166 }
1167
1168 int remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
1169                 unsigned long to, unsigned long size, pgprot_t prot)
1170 {
1171         vma->vm_start = vma->vm_pgoff << PAGE_SHIFT;
1172         return 0;
1173 }
1174 EXPORT_SYMBOL(remap_pfn_range);
1175
1176 void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1177 {
1178 }
1179
1180 unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr,
1181         unsigned long len, unsigned long pgoff, unsigned long flags)
1182 {
1183         return -ENOMEM;
1184 }
1185
1186 void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
1187 {
1188 }
1189
1190 void unmap_mapping_range(struct address_space *mapping,
1191                          loff_t const holebegin, loff_t const holelen,
1192                          int even_cows)
1193 {
1194 }
1195 EXPORT_SYMBOL(unmap_mapping_range);
1196
1197 /*
1198  * Check that a process has enough memory to allocate a new virtual
1199  * mapping. 0 means there is enough memory for the allocation to
1200  * succeed and -ENOMEM implies there is not.
1201  *
1202  * We currently support three overcommit policies, which are set via the
1203  * vm.overcommit_memory sysctl.  See Documentation/vm/overcommit-accounting
1204  *
1205  * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
1206  * Additional code 2002 Jul 20 by Robert Love.
1207  *
1208  * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise.
1209  *
1210  * Note this is a helper function intended to be used by LSMs which
1211  * wish to use this logic.
1212  */
1213 int __vm_enough_memory(long pages, int cap_sys_admin)
1214 {
1215         unsigned long free, allowed;
1216
1217         vm_acct_memory(pages);
1218
1219         /*
1220          * Sometimes we want to use more memory than we have
1221          */
1222         if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
1223                 return 0;
1224
1225         if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
1226                 unsigned long n;
1227
1228                 free = global_page_state(NR_FILE_PAGES);
1229                 free += nr_swap_pages;
1230
1231                 /*
1232                  * Any slabs which are created with the
1233                  * SLAB_RECLAIM_ACCOUNT flag claim to have contents
1234                  * which are reclaimable, under pressure.  The dentry
1235                  * cache and most inode caches should fall into this
1236                  */
1237                 free += global_page_state(NR_SLAB_RECLAIMABLE);
1238
1239                 /*
1240                  * Leave the last 3% for root
1241                  */
1242                 if (!cap_sys_admin)
1243                         free -= free / 32;
1244
1245                 if (free > pages)
1246                         return 0;
1247
1248                 /*
1249                  * nr_free_pages() is very expensive on large systems,
1250                  * only call if we're about to fail.
1251                  */
1252                 n = nr_free_pages();
1253
1254                 /*
1255                  * Leave reserved pages. The pages are not for anonymous pages.
1256                  */
1257                 if (n <= totalreserve_pages)
1258                         goto error;
1259                 else
1260                         n -= totalreserve_pages;
1261
1262                 /*
1263                  * Leave the last 3% for root
1264                  */
1265                 if (!cap_sys_admin)
1266                         n -= n / 32;
1267                 free += n;
1268
1269                 if (free > pages)
1270                         return 0;
1271
1272                 goto error;
1273         }
1274
1275         allowed = totalram_pages * sysctl_overcommit_ratio / 100;
1276         /*
1277          * Leave the last 3% for root
1278          */
1279         if (!cap_sys_admin)
1280                 allowed -= allowed / 32;
1281         allowed += total_swap_pages;
1282
1283         /* Don't let a single process grow too big:
1284            leave 3% of the size of this process for other processes */
1285         allowed -= current->mm->total_vm / 32;
1286
1287         /*
1288          * cast `allowed' as a signed long because vm_committed_space
1289          * sometimes has a negative value
1290          */
1291         if (atomic_read(&vm_committed_space) < (long)allowed)
1292                 return 0;
1293 error:
1294         vm_unacct_memory(pages);
1295
1296         return -ENOMEM;
1297 }
1298
1299 int in_gate_area_no_task(unsigned long addr)
1300 {
1301         return 0;
1302 }
1303
1304 struct page *filemap_nopage(struct vm_area_struct *area,
1305                         unsigned long address, int *type)
1306 {
1307         BUG();
1308         return NULL;
1309 }
1310
1311 /*
1312  * Access another process' address space.
1313  * - source/target buffer must be kernel space
1314  */
1315 int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
1316 {
1317         struct vm_area_struct *vma;
1318         struct mm_struct *mm;
1319
1320         if (addr + len < addr)
1321                 return 0;
1322
1323         mm = get_task_mm(tsk);
1324         if (!mm)
1325                 return 0;
1326
1327         down_read(&mm->mmap_sem);
1328
1329         /* the access must start within one of the target process's mappings */
1330         vma = find_vma(mm, addr);
1331         if (vma) {
1332                 /* don't overrun this mapping */
1333                 if (addr + len >= vma->vm_end)
1334                         len = vma->vm_end - addr;
1335
1336                 /* only read or write mappings where it is permitted */
1337                 if (write && vma->vm_flags & VM_MAYWRITE)
1338                         len -= copy_to_user((void *) addr, buf, len);
1339                 else if (!write && vma->vm_flags & VM_MAYREAD)
1340                         len -= copy_from_user(buf, (void *) addr, len);
1341                 else
1342                         len = 0;
1343         } else {
1344                 len = 0;
1345         }
1346
1347         up_read(&mm->mmap_sem);
1348         mmput(mm);
1349         return len;
1350 }