VServer 1.9.2 (patch-2.6.8.1-vs1.9.2.diff)
[linux-2.6.git] / mm / nommu.c
1 /*
2  *  linux/mm/nommu.c
3  *
4  *  Replacement code for mm functions to support CPU's that don't
5  *  have any form of memory management unit (thus no virtual memory).
6  *
7  *  Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
8  *  Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
9  *  Copyright (c) 2002      Greg Ungerer <gerg@snapgear.com>
10  */
11
12 #include <linux/mm.h>
13 #include <linux/mman.h>
14 #include <linux/swap.h>
15 #include <linux/smp_lock.h>
16 #include <linux/highmem.h>
17 #include <linux/pagemap.h>
18 #include <linux/slab.h>
19 #include <linux/vmalloc.h>
20 #include <linux/blkdev.h>
21 #include <linux/backing-dev.h>
22
23 #include <asm/uaccess.h>
24 #include <asm/tlb.h>
25 #include <asm/tlbflush.h>
26
27 void *high_memory;
28 struct page *mem_map;
29 unsigned long max_mapnr;
30 unsigned long num_physpages;
31 unsigned long askedalloc, realalloc;
32 atomic_t vm_committed_space = ATOMIC_INIT(0);
33 int sysctl_overcommit_memory; /* default is heuristic overcommit */
34 int sysctl_overcommit_ratio = 50; /* default is 50% */
35
36 int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
37 EXPORT_SYMBOL(sysctl_max_map_count);
38
39 /*
40  * Handle all mappings that got truncated by a "truncate()"
41  * system call.
42  *
43  * NOTE! We have to be ready to update the memory sharing
44  * between the file and the memory map for a potential last
45  * incomplete page.  Ugly, but necessary.
46  */
47 int vmtruncate(struct inode *inode, loff_t offset)
48 {
49         struct address_space *mapping = inode->i_mapping;
50         unsigned long limit;
51
52         if (inode->i_size < offset)
53                 goto do_expand;
54         i_size_write(inode, offset);
55
56         truncate_inode_pages(mapping, offset);
57         goto out_truncate;
58
59 do_expand:
60         limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
61         if (limit != RLIM_INFINITY && offset > limit)
62                 goto out_sig;
63         if (offset > inode->i_sb->s_maxbytes)
64                 goto out;
65         i_size_write(inode, offset);
66
67 out_truncate:
68         if (inode->i_op && inode->i_op->truncate)
69                 inode->i_op->truncate(inode);
70         return 0;
71 out_sig:
72         send_sig(SIGXFSZ, current, 0);
73 out:
74         return -EFBIG;
75 }
76
77 /*
78  * Return the total memory allocated for this pointer, not
79  * just what the caller asked for.
80  *
81  * Doesn't have to be accurate, i.e. may have races.
82  */
83 unsigned int kobjsize(const void *objp)
84 {
85         struct page *page;
86
87         if (!objp || !((page = virt_to_page(objp))))
88                 return 0;
89
90         if (PageSlab(page))
91                 return ksize(objp);
92
93         BUG_ON(page->index < 0);
94         BUG_ON(page->index >= MAX_ORDER);
95
96         return (PAGE_SIZE << page->index);
97 }
98
99 /*
100  * The nommu dodgy version :-)
101  */
102 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
103         unsigned long start, int len, int write, int force,
104         struct page **pages, struct vm_area_struct **vmas)
105 {
106         int i;
107         static struct vm_area_struct dummy_vma;
108
109         for (i = 0; i < len; i++) {
110                 if (pages) {
111                         pages[i] = virt_to_page(start);
112                         if (pages[i])
113                                 page_cache_get(pages[i]);
114                 }
115                 if (vmas)
116                         vmas[i] = &dummy_vma;
117                 start += PAGE_SIZE;
118         }
119         return(i);
120 }
121
122 rwlock_t vmlist_lock = RW_LOCK_UNLOCKED;
123 struct vm_struct *vmlist;
124
125 void vfree(void *addr)
126 {
127         kfree(addr);
128 }
129
130 void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot)
131 {
132         /*
133          * kmalloc doesn't like __GFP_HIGHMEM for some reason
134          */
135         return kmalloc(size, gfp_mask & ~__GFP_HIGHMEM);
136 }
137
138 struct page * vmalloc_to_page(void *addr)
139 {
140         return virt_to_page(addr);
141 }
142
143 long vread(char *buf, char *addr, unsigned long count)
144 {
145         memcpy(buf, addr, count);
146         return count;
147 }
148
149 long vwrite(char *buf, char *addr, unsigned long count)
150 {
151         /* Don't allow overflow */
152         if ((unsigned long) addr + count < count)
153                 count = -(unsigned long) addr;
154         
155         memcpy(addr, buf, count);
156         return(count);
157 }
158
159 /*
160  *      vmalloc  -  allocate virtually continguos memory
161  *
162  *      @size:          allocation size
163  *
164  *      Allocate enough pages to cover @size from the page level
165  *      allocator and map them into continguos kernel virtual space.
166  *
167  *      For tight cotrol over page level allocator and protection flags
168  *      use __vmalloc() instead.
169  */
170 void *vmalloc(unsigned long size)
171 {
172        return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
173 }
174
175 /*
176  *      vmalloc_32  -  allocate virtually continguos memory (32bit addressable)
177  *
178  *      @size:          allocation size
179  *
180  *      Allocate enough 32bit PA addressable pages to cover @size from the
181  *      page level allocator and map them into continguos kernel virtual space.
182  */
183 void *vmalloc_32(unsigned long size)
184 {
185         return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
186 }
187
188 void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot)
189 {
190         BUG();
191         return NULL;
192 }
193
194 void vunmap(void *addr)
195 {
196         BUG();
197 }
198
199 /*
200  *  sys_brk() for the most part doesn't need the global kernel
201  *  lock, except when an application is doing something nasty
202  *  like trying to un-brk an area that has already been mapped
203  *  to a regular file.  in this case, the unmapping will need
204  *  to invoke file system routines that need the global lock.
205  */
206 asmlinkage unsigned long sys_brk(unsigned long brk)
207 {
208         struct mm_struct *mm = current->mm;
209
210         if (brk < mm->end_code || brk < mm->start_brk || brk > mm->context.end_brk)
211                 return mm->brk;
212
213         if (mm->brk == brk)
214                 return mm->brk;
215
216         /*
217          * Always allow shrinking brk
218          */
219         if (brk <= mm->brk) {
220                 mm->brk = brk;
221                 return brk;
222         }
223
224         /*
225          * Ok, looks good - let it rip.
226          */
227         return mm->brk = brk;
228 }
229
230 /*
231  * Combine the mmap "prot" and "flags" argument into one "vm_flags" used
232  * internally. Essentially, translate the "PROT_xxx" and "MAP_xxx" bits
233  * into "VM_xxx".
234  */
235 static inline unsigned long calc_vm_flags(unsigned long prot, unsigned long flags)
236 {
237 #define _trans(x,bit1,bit2) \
238 ((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0)
239
240         unsigned long prot_bits, flag_bits;
241         prot_bits =
242                 _trans(prot, PROT_READ, VM_READ) |
243                 _trans(prot, PROT_WRITE, VM_WRITE) |
244                 _trans(prot, PROT_EXEC, VM_EXEC);
245         flag_bits =
246                 _trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN) |
247                 _trans(flags, MAP_DENYWRITE, VM_DENYWRITE) |
248                 _trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE);
249         return prot_bits | flag_bits;
250 #undef _trans
251 }
252
253 #ifdef DEBUG
254 static void show_process_blocks(void)
255 {
256         struct mm_tblock_struct *tblock;
257
258         printk("Process blocks %d:", current->pid);
259
260         for (tblock = &current->mm->context.tblock; tblock; tblock = tblock->next) {
261                 printk(" %p: %p", tblock, tblock->rblock);
262                 if (tblock->rblock)
263                         printk(" (%d @%p #%d)", kobjsize(tblock->rblock->kblock), tblock->rblock->kblock, tblock->rblock->refcount);
264                 printk(tblock->next ? " ->" : ".\n");
265         }
266 }
267 #endif /* DEBUG */
268
269 unsigned long do_mmap_pgoff(
270         struct file * file,
271         unsigned long addr,
272         unsigned long len,
273         unsigned long prot,
274         unsigned long flags,
275         unsigned long pgoff)
276 {
277         void * result;
278         struct mm_tblock_struct * tblock;
279         unsigned int vm_flags;
280
281         /*
282          * Get the !CONFIG_MMU specific checks done first
283          */
284         if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && (file)) {
285                 printk("MAP_SHARED not supported (cannot write mappings to disk)\n");
286                 return -EINVAL;
287         }
288         
289         if ((prot & PROT_WRITE) && (flags & MAP_PRIVATE)) {
290                 printk("Private writable mappings not supported\n");
291                 return -EINVAL;
292         }
293         
294         /*
295          *      now all the standard checks
296          */
297         if (file && (!file->f_op || !file->f_op->mmap))
298                 return -ENODEV;
299
300         if (PAGE_ALIGN(len) == 0)
301                 return addr;
302
303         if (len > TASK_SIZE)
304                 return -EINVAL;
305
306         /* offset overflow? */
307         if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
308                 return -EINVAL;
309
310         /* Do simple checking here so the lower-level routines won't have
311          * to. we assume access permissions have been handled by the open
312          * of the memory object, so we don't do any here.
313          */
314         vm_flags = calc_vm_flags(prot,flags) /* | mm->def_flags */ | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
315
316         /*
317          * determine the object being mapped and call the appropriate
318          * specific mapper. 
319          */
320         if (file) {
321                 struct vm_area_struct vma;
322                 int error;
323
324                 if (!file->f_op)
325                         return -ENODEV;
326
327                 vma.vm_start = addr;
328                 vma.vm_end = addr + len;
329                 vma.vm_flags = vm_flags;
330                 vma.vm_pgoff = pgoff;
331
332 #ifdef MAGIC_ROM_PTR
333                 /* First, try simpler routine designed to give us a ROM pointer. */
334
335                 if (file->f_op->romptr && !(prot & PROT_WRITE)) {
336                         error = file->f_op->romptr(file, &vma);
337 #ifdef DEBUG
338                         printk("romptr mmap returned %d, start 0x%.8x\n", error,
339                                         vma.vm_start);
340 #endif
341                         if (!error)
342                                 return vma.vm_start;
343                         else if (error != -ENOSYS)
344                                 return error;
345                 } else
346 #endif /* MAGIC_ROM_PTR */
347                 /* Then try full mmap routine, which might return a RAM pointer,
348                    or do something truly complicated. */
349                    
350                 if (file->f_op->mmap) {
351                         error = file->f_op->mmap(file, &vma);
352                                    
353 #ifdef DEBUG
354                         printk("f_op->mmap() returned %d/%lx\n", error, vma.vm_start);
355 #endif
356                         if (!error)
357                                 return vma.vm_start;
358                         else if (error != -ENOSYS)
359                                 return error;
360                 } else
361                         return -ENODEV; /* No mapping operations defined */
362
363                 /* An ENOSYS error indicates that mmap isn't possible (as opposed to
364                    tried but failed) so we'll fall through to the copy. */
365         }
366
367         tblock = (struct mm_tblock_struct *)
368                         kmalloc(sizeof(struct mm_tblock_struct), GFP_KERNEL);
369         if (!tblock) {
370                 printk("Allocation of tblock for %lu byte allocation from process %d failed\n", len, current->pid);
371                 show_free_areas();
372                 return -ENOMEM;
373         }
374
375         tblock->rblock = (struct mm_rblock_struct *)
376                         kmalloc(sizeof(struct mm_rblock_struct), GFP_KERNEL);
377
378         if (!tblock->rblock) {
379                 printk("Allocation of rblock for %lu byte allocation from process %d failed\n", len, current->pid);
380                 show_free_areas();
381                 kfree(tblock);
382                 return -ENOMEM;
383         }
384
385         result = kmalloc(len, GFP_KERNEL);
386         if (!result) {
387                 printk("Allocation of length %lu from process %d failed\n", len,
388                                 current->pid);
389                 show_free_areas();
390                 kfree(tblock->rblock);
391                 kfree(tblock);
392                 return -ENOMEM;
393         }
394
395         tblock->rblock->refcount = 1;
396         tblock->rblock->kblock = result;
397         tblock->rblock->size = len;
398         
399         realalloc += kobjsize(result);
400         askedalloc += len;
401
402 #ifdef WARN_ON_SLACK    
403         if ((len+WARN_ON_SLACK) <= kobjsize(result))
404                 printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n", len, current->pid, kobjsize(result)-len);
405 #endif
406         
407         if (file) {
408                 int error;
409                 mm_segment_t old_fs = get_fs();
410                 set_fs(KERNEL_DS);
411                 error = file->f_op->read(file, (char *) result, len, &file->f_pos);
412                 set_fs(old_fs);
413                 if (error < 0) {
414                         kfree(result);
415                         kfree(tblock->rblock);
416                         kfree(tblock);
417                         return error;
418                 }
419                 if (error < len)
420                         memset(result+error, '\0', len-error);
421         } else {
422                 memset(result, '\0', len);
423         }
424
425         realalloc += kobjsize(tblock);
426         askedalloc += sizeof(struct mm_tblock_struct);
427
428         realalloc += kobjsize(tblock->rblock);
429         askedalloc += sizeof(struct mm_rblock_struct);
430
431         tblock->next = current->mm->context.tblock.next;
432         current->mm->context.tblock.next = tblock;
433
434 #ifdef DEBUG
435         printk("do_mmap:\n");
436         show_process_blocks();
437 #endif    
438
439         return (unsigned long)result;
440 }
441
442 int do_munmap(struct mm_struct * mm, unsigned long addr, size_t len)
443 {
444         struct mm_tblock_struct * tblock, *tmp;
445
446 #ifdef MAGIC_ROM_PTR
447         /*
448          * For efficiency's sake, if the pointer is obviously in ROM,
449          * don't bother walking the lists to free it.
450          */
451         if (is_in_rom(addr))
452                 return 0;
453 #endif
454
455 #ifdef DEBUG
456         printk("do_munmap:\n");
457 #endif
458
459         tmp = &mm->context.tblock; /* dummy head */
460         while ((tblock=tmp->next) && tblock->rblock &&
461                         tblock->rblock->kblock != (void*)addr) 
462                 tmp = tblock;
463                 
464         if (!tblock) {
465                 printk("munmap of non-mmaped memory by process %d (%s): %p\n",
466                                 current->pid, current->comm, (void*)addr);
467                 return -EINVAL;
468         }
469         if (tblock->rblock) {
470                 if (!--tblock->rblock->refcount) {
471                         if (tblock->rblock->kblock) {
472                                 realalloc -= kobjsize(tblock->rblock->kblock);
473                                 askedalloc -= tblock->rblock->size;
474                                 kfree(tblock->rblock->kblock);
475                         }
476                         
477                         realalloc -= kobjsize(tblock->rblock);
478                         askedalloc -= sizeof(struct mm_rblock_struct);
479                         kfree(tblock->rblock);
480                 }
481         }
482         tmp->next = tblock->next;
483         realalloc -= kobjsize(tblock);
484         askedalloc -= sizeof(struct mm_tblock_struct);
485         kfree(tblock);
486
487 #ifdef DEBUG
488         show_process_blocks();
489 #endif    
490
491         return 0;
492 }
493
494 /* Release all mmaps. */
495 void exit_mmap(struct mm_struct * mm)
496 {
497         struct mm_tblock_struct *tmp;
498
499         if (!mm)
500                 return;
501
502 #ifdef DEBUG
503         printk("Exit_mmap:\n");
504 #endif
505
506         while((tmp = mm->context.tblock.next)) {
507                 if (tmp->rblock) {
508                         if (!--tmp->rblock->refcount) {
509                                 if (tmp->rblock->kblock) {
510                                         realalloc -= kobjsize(tmp->rblock->kblock);
511                                         askedalloc -= tmp->rblock->size;
512                                         kfree(tmp->rblock->kblock);
513                                 }
514                                 realalloc -= kobjsize(tmp->rblock);
515                                 askedalloc -= sizeof(struct mm_rblock_struct);
516                                 kfree(tmp->rblock);
517                         }
518                         tmp->rblock = 0;
519                 }
520                 mm->context.tblock.next = tmp->next;
521                 realalloc -= kobjsize(tmp);
522                 askedalloc -= sizeof(struct mm_tblock_struct);
523                 kfree(tmp);
524         }
525
526 #ifdef DEBUG
527         show_process_blocks();
528 #endif    
529 }
530
531 asmlinkage long sys_munmap(unsigned long addr, size_t len)
532 {
533         int ret;
534         struct mm_struct *mm = current->mm;
535
536         down_write(&mm->mmap_sem);
537         ret = do_munmap(mm, addr, len);
538         up_write(&mm->mmap_sem);
539         return ret;
540 }
541
542 unsigned long do_brk(unsigned long addr, unsigned long len)
543 {
544         return -ENOMEM;
545 }
546
547 struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
548 {
549         return NULL;
550 }
551
552 struct page * follow_page(struct mm_struct *mm, unsigned long addr, int write)
553 {
554         return NULL;
555 }
556
557 struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
558 {
559         return NULL;
560 }
561
562 int remap_page_range(struct vm_area_struct *vma, unsigned long from,
563                 unsigned long to, unsigned long size, pgprot_t prot)
564 {
565         return -EPERM;
566 }
567
568 unsigned long get_unmapped_area(struct file *file, unsigned long addr,
569         unsigned long len, unsigned long pgoff, unsigned long flags)
570 {
571         return -ENOMEM;
572 }
573
574 void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
575 {
576 }