patch-2_6_7-vs1_9_1_12
[linux-2.6.git] / mm / nommu.c
1 /*
2  *  linux/mm/nommu.c
3  *
4  *  Replacement code for mm functions to support CPU's that don't
5  *  have any form of memory management unit (thus no virtual memory).
6  *
7  *  Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
8  *  Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
9  *  Copyright (c) 2002      Greg Ungerer <gerg@snapgear.com>
10  */
11
12 #include <linux/mm.h>
13 #include <linux/mman.h>
14 #include <linux/swap.h>
15 #include <linux/smp_lock.h>
16 #include <linux/highmem.h>
17 #include <linux/pagemap.h>
18 #include <linux/slab.h>
19 #include <linux/vmalloc.h>
20 #include <linux/blkdev.h>
21 #include <linux/backing-dev.h>
22
23 #include <asm/pgalloc.h>
24 #include <asm/uaccess.h>
25 #include <asm/tlb.h>
26 #include <asm/tlbflush.h>
27
28 void *high_memory;
29 struct page *mem_map;
30 unsigned long max_mapnr;
31 unsigned long num_physpages;
32 unsigned long askedalloc, realalloc;
33 atomic_t vm_committed_space = ATOMIC_INIT(0);
34 int sysctl_overcommit_memory; /* default is heuristic overcommit */
35 int sysctl_overcommit_ratio = 50; /* default is 50% */
36
37 int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
38 EXPORT_SYMBOL(sysctl_max_map_count);
39
40 /*
41  * Handle all mappings that got truncated by a "truncate()"
42  * system call.
43  *
44  * NOTE! We have to be ready to update the memory sharing
45  * between the file and the memory map for a potential last
46  * incomplete page.  Ugly, but necessary.
47  */
48 int vmtruncate(struct inode *inode, loff_t offset)
49 {
50         struct address_space *mapping = inode->i_mapping;
51         unsigned long limit;
52
53         if (inode->i_size < offset)
54                 goto do_expand;
55         i_size_write(inode, offset);
56
57         truncate_inode_pages(mapping, offset);
58         goto out_truncate;
59
60 do_expand:
61         limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
62         if (limit != RLIM_INFINITY && offset > limit)
63                 goto out_sig;
64         if (offset > inode->i_sb->s_maxbytes)
65                 goto out;
66         i_size_write(inode, offset);
67
68 out_truncate:
69         if (inode->i_op && inode->i_op->truncate)
70                 inode->i_op->truncate(inode);
71         return 0;
72 out_sig:
73         send_sig(SIGXFSZ, current, 0);
74 out:
75         return -EFBIG;
76 }
77
78 /*
79  * Return the total memory allocated for this pointer, not
80  * just what the caller asked for.
81  *
82  * Doesn't have to be accurate, i.e. may have races.
83  */
84 unsigned int kobjsize(const void *objp)
85 {
86         struct page *page;
87
88         if (!objp || !((page = virt_to_page(objp))))
89                 return 0;
90
91         if (PageSlab(page))
92                 return ksize(objp);
93
94         BUG_ON(page->index < 0);
95         BUG_ON(page->index >= MAX_ORDER);
96
97         return (PAGE_SIZE << page->index);
98 }
99
100 /*
101  * The nommu dodgy version :-)
102  */
103 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
104         unsigned long start, int len, int write, int force,
105         struct page **pages, struct vm_area_struct **vmas)
106 {
107         int i;
108         static struct vm_area_struct dummy_vma;
109
110         for (i = 0; i < len; i++) {
111                 if (pages) {
112                         pages[i] = virt_to_page(start);
113                         if (pages[i])
114                                 page_cache_get(pages[i]);
115                 }
116                 if (vmas)
117                         vmas[i] = &dummy_vma;
118                 start += PAGE_SIZE;
119         }
120         return(i);
121 }
122
123 rwlock_t vmlist_lock = RW_LOCK_UNLOCKED;
124 struct vm_struct *vmlist;
125
126 void vfree(void *addr)
127 {
128         kfree(addr);
129 }
130
131 void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot)
132 {
133         /*
134          * kmalloc doesn't like __GFP_HIGHMEM for some reason
135          */
136         return kmalloc(size, gfp_mask & ~__GFP_HIGHMEM);
137 }
138
139 struct page * vmalloc_to_page(void *addr)
140 {
141         return virt_to_page(addr);
142 }
143
144 long vread(char *buf, char *addr, unsigned long count)
145 {
146         memcpy(buf, addr, count);
147         return count;
148 }
149
150 long vwrite(char *buf, char *addr, unsigned long count)
151 {
152         /* Don't allow overflow */
153         if ((unsigned long) addr + count < count)
154                 count = -(unsigned long) addr;
155         
156         memcpy(addr, buf, count);
157         return(count);
158 }
159
160 /*
161  *      vmalloc  -  allocate virtually continguos memory
162  *
163  *      @size:          allocation size
164  *
165  *      Allocate enough pages to cover @size from the page level
166  *      allocator and map them into continguos kernel virtual space.
167  *
168  *      For tight cotrol over page level allocator and protection flags
169  *      use __vmalloc() instead.
170  */
171 void *vmalloc(unsigned long size)
172 {
173        return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
174 }
175
176 /*
177  *      vmalloc_32  -  allocate virtually continguos memory (32bit addressable)
178  *
179  *      @size:          allocation size
180  *
181  *      Allocate enough 32bit PA addressable pages to cover @size from the
182  *      page level allocator and map them into continguos kernel virtual space.
183  */
184 void *vmalloc_32(unsigned long size)
185 {
186         return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
187 }
188
189 void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot)
190 {
191         BUG();
192         return NULL;
193 }
194
195 void vunmap(void *addr)
196 {
197         BUG();
198 }
199
200 /*
201  *  sys_brk() for the most part doesn't need the global kernel
202  *  lock, except when an application is doing something nasty
203  *  like trying to un-brk an area that has already been mapped
204  *  to a regular file.  in this case, the unmapping will need
205  *  to invoke file system routines that need the global lock.
206  */
207 asmlinkage unsigned long sys_brk(unsigned long brk)
208 {
209         struct mm_struct *mm = current->mm;
210
211         if (brk < mm->end_code || brk < mm->start_brk || brk > mm->context.end_brk)
212                 return mm->brk;
213
214         if (mm->brk == brk)
215                 return mm->brk;
216
217         /*
218          * Always allow shrinking brk
219          */
220         if (brk <= mm->brk) {
221                 mm->brk = brk;
222                 return brk;
223         }
224
225         /*
226          * Ok, looks good - let it rip.
227          */
228         return mm->brk = brk;
229 }
230
231 /*
232  * Combine the mmap "prot" and "flags" argument into one "vm_flags" used
233  * internally. Essentially, translate the "PROT_xxx" and "MAP_xxx" bits
234  * into "VM_xxx".
235  */
236 static inline unsigned long calc_vm_flags(unsigned long prot, unsigned long flags)
237 {
238 #define _trans(x,bit1,bit2) \
239 ((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0)
240
241         unsigned long prot_bits, flag_bits;
242         prot_bits =
243                 _trans(prot, PROT_READ, VM_READ) |
244                 _trans(prot, PROT_WRITE, VM_WRITE) |
245                 _trans(prot, PROT_EXEC, VM_EXEC);
246         flag_bits =
247                 _trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN) |
248                 _trans(flags, MAP_DENYWRITE, VM_DENYWRITE) |
249                 _trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE);
250         return prot_bits | flag_bits;
251 #undef _trans
252 }
253
254 #ifdef DEBUG
255 static void show_process_blocks(void)
256 {
257         struct mm_tblock_struct *tblock;
258
259         printk("Process blocks %d:", current->pid);
260
261         for (tblock = &current->mm->context.tblock; tblock; tblock = tblock->next) {
262                 printk(" %p: %p", tblock, tblock->rblock);
263                 if (tblock->rblock)
264                         printk(" (%d @%p #%d)", kobjsize(tblock->rblock->kblock), tblock->rblock->kblock, tblock->rblock->refcount);
265                 printk(tblock->next ? " ->" : ".\n");
266         }
267 }
268 #endif /* DEBUG */
269
270 unsigned long do_mmap_pgoff(
271         struct file * file,
272         unsigned long addr,
273         unsigned long len,
274         unsigned long prot,
275         unsigned long flags,
276         unsigned long pgoff)
277 {
278         void * result;
279         struct mm_tblock_struct * tblock;
280         unsigned int vm_flags;
281
282         /*
283          * Get the !CONFIG_MMU specific checks done first
284          */
285         if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && (file)) {
286                 printk("MAP_SHARED not supported (cannot write mappings to disk)\n");
287                 return -EINVAL;
288         }
289         
290         if ((prot & PROT_WRITE) && (flags & MAP_PRIVATE)) {
291                 printk("Private writable mappings not supported\n");
292                 return -EINVAL;
293         }
294         
295         /*
296          *      now all the standard checks
297          */
298         if (file && (!file->f_op || !file->f_op->mmap))
299                 return -ENODEV;
300
301         if (PAGE_ALIGN(len) == 0)
302                 return addr;
303
304         if (len > TASK_SIZE)
305                 return -EINVAL;
306
307         /* offset overflow? */
308         if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
309                 return -EINVAL;
310
311         /* Do simple checking here so the lower-level routines won't have
312          * to. we assume access permissions have been handled by the open
313          * of the memory object, so we don't do any here.
314          */
315         vm_flags = calc_vm_flags(prot,flags) /* | mm->def_flags */ | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
316
317         /*
318          * determine the object being mapped and call the appropriate
319          * specific mapper. 
320          */
321         if (file) {
322                 struct vm_area_struct vma;
323                 int error;
324
325                 if (!file->f_op)
326                         return -ENODEV;
327
328                 vma.vm_start = addr;
329                 vma.vm_end = addr + len;
330                 vma.vm_flags = vm_flags;
331                 vma.vm_pgoff = pgoff;
332
333 #ifdef MAGIC_ROM_PTR
334                 /* First, try simpler routine designed to give us a ROM pointer. */
335
336                 if (file->f_op->romptr && !(prot & PROT_WRITE)) {
337                         error = file->f_op->romptr(file, &vma);
338 #ifdef DEBUG
339                         printk("romptr mmap returned %d, start 0x%.8x\n", error,
340                                         vma.vm_start);
341 #endif
342                         if (!error)
343                                 return vma.vm_start;
344                         else if (error != -ENOSYS)
345                                 return error;
346                 } else
347 #endif /* MAGIC_ROM_PTR */
348                 /* Then try full mmap routine, which might return a RAM pointer,
349                    or do something truly complicated. */
350                    
351                 if (file->f_op->mmap) {
352                         error = file->f_op->mmap(file, &vma);
353                                    
354 #ifdef DEBUG
355                         printk("f_op->mmap() returned %d/%lx\n", error, vma.vm_start);
356 #endif
357                         if (!error)
358                                 return vma.vm_start;
359                         else if (error != -ENOSYS)
360                                 return error;
361                 } else
362                         return -ENODEV; /* No mapping operations defined */
363
364                 /* An ENOSYS error indicates that mmap isn't possible (as opposed to
365                    tried but failed) so we'll fall through to the copy. */
366         }
367
368         tblock = (struct mm_tblock_struct *)
369                         kmalloc(sizeof(struct mm_tblock_struct), GFP_KERNEL);
370         if (!tblock) {
371                 printk("Allocation of tblock for %lu byte allocation from process %d failed\n", len, current->pid);
372                 show_free_areas();
373                 return -ENOMEM;
374         }
375
376         tblock->rblock = (struct mm_rblock_struct *)
377                         kmalloc(sizeof(struct mm_rblock_struct), GFP_KERNEL);
378
379         if (!tblock->rblock) {
380                 printk("Allocation of rblock for %lu byte allocation from process %d failed\n", len, current->pid);
381                 show_free_areas();
382                 kfree(tblock);
383                 return -ENOMEM;
384         }
385
386         result = kmalloc(len, GFP_KERNEL);
387         if (!result) {
388                 printk("Allocation of length %lu from process %d failed\n", len,
389                                 current->pid);
390                 show_free_areas();
391                 kfree(tblock->rblock);
392                 kfree(tblock);
393                 return -ENOMEM;
394         }
395
396         tblock->rblock->refcount = 1;
397         tblock->rblock->kblock = result;
398         tblock->rblock->size = len;
399         
400         realalloc += kobjsize(result);
401         askedalloc += len;
402
403 #ifdef WARN_ON_SLACK    
404         if ((len+WARN_ON_SLACK) <= kobjsize(result))
405                 printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n", len, current->pid, kobjsize(result)-len);
406 #endif
407         
408         if (file) {
409                 int error;
410                 mm_segment_t old_fs = get_fs();
411                 set_fs(KERNEL_DS);
412                 error = file->f_op->read(file, (char *) result, len, &file->f_pos);
413                 set_fs(old_fs);
414                 if (error < 0) {
415                         kfree(result);
416                         kfree(tblock->rblock);
417                         kfree(tblock);
418                         return error;
419                 }
420                 if (error < len)
421                         memset(result+error, '\0', len-error);
422         } else {
423                 memset(result, '\0', len);
424         }
425
426         realalloc += kobjsize(tblock);
427         askedalloc += sizeof(struct mm_tblock_struct);
428
429         realalloc += kobjsize(tblock->rblock);
430         askedalloc += sizeof(struct mm_rblock_struct);
431
432         tblock->next = current->mm->context.tblock.next;
433         current->mm->context.tblock.next = tblock;
434
435 #ifdef DEBUG
436         printk("do_mmap:\n");
437         show_process_blocks();
438 #endif    
439
440         return (unsigned long)result;
441 }
442
443 int do_munmap(struct mm_struct * mm, unsigned long addr, size_t len)
444 {
445         struct mm_tblock_struct * tblock, *tmp;
446
447 #ifdef MAGIC_ROM_PTR
448         /*
449          * For efficiency's sake, if the pointer is obviously in ROM,
450          * don't bother walking the lists to free it.
451          */
452         if (is_in_rom(addr))
453                 return 0;
454 #endif
455
456 #ifdef DEBUG
457         printk("do_munmap:\n");
458 #endif
459
460         tmp = &mm->context.tblock; /* dummy head */
461         while ((tblock=tmp->next) && tblock->rblock &&
462                         tblock->rblock->kblock != (void*)addr) 
463                 tmp = tblock;
464                 
465         if (!tblock) {
466                 printk("munmap of non-mmaped memory by process %d (%s): %p\n",
467                                 current->pid, current->comm, (void*)addr);
468                 return -EINVAL;
469         }
470         if (tblock->rblock) {
471                 if (!--tblock->rblock->refcount) {
472                         if (tblock->rblock->kblock) {
473                                 realalloc -= kobjsize(tblock->rblock->kblock);
474                                 askedalloc -= tblock->rblock->size;
475                                 kfree(tblock->rblock->kblock);
476                         }
477                         
478                         realalloc -= kobjsize(tblock->rblock);
479                         askedalloc -= sizeof(struct mm_rblock_struct);
480                         kfree(tblock->rblock);
481                 }
482         }
483         tmp->next = tblock->next;
484         realalloc -= kobjsize(tblock);
485         askedalloc -= sizeof(struct mm_tblock_struct);
486         kfree(tblock);
487
488 #ifdef DEBUG
489         show_process_blocks();
490 #endif    
491
492         return 0;
493 }
494
495 /* Release all mmaps. */
496 void exit_mmap(struct mm_struct * mm)
497 {
498         struct mm_tblock_struct *tmp;
499
500         if (!mm)
501                 return;
502
503 #ifdef DEBUG
504         printk("Exit_mmap:\n");
505 #endif
506
507         while((tmp = mm->context.tblock.next)) {
508                 if (tmp->rblock) {
509                         if (!--tmp->rblock->refcount) {
510                                 if (tmp->rblock->kblock) {
511                                         realalloc -= kobjsize(tmp->rblock->kblock);
512                                         askedalloc -= tmp->rblock->size;
513                                         kfree(tmp->rblock->kblock);
514                                 }
515                                 realalloc -= kobjsize(tmp->rblock);
516                                 askedalloc -= sizeof(struct mm_rblock_struct);
517                                 kfree(tmp->rblock);
518                         }
519                         tmp->rblock = 0;
520                 }
521                 mm->context.tblock.next = tmp->next;
522                 realalloc -= kobjsize(tmp);
523                 askedalloc -= sizeof(struct mm_tblock_struct);
524                 kfree(tmp);
525         }
526
527 #ifdef DEBUG
528         show_process_blocks();
529 #endif    
530 }
531
532 asmlinkage long sys_munmap(unsigned long addr, size_t len)
533 {
534         int ret;
535         struct mm_struct *mm = current->mm;
536
537         down_write(&mm->mmap_sem);
538         ret = do_munmap(mm, addr, len);
539         up_write(&mm->mmap_sem);
540         return ret;
541 }
542
543 unsigned long do_brk(unsigned long addr, unsigned long len)
544 {
545         return -ENOMEM;
546 }
547
548 struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
549 {
550         return NULL;
551 }
552
553 struct page * follow_page(struct mm_struct *mm, unsigned long addr, int write)
554 {
555         return NULL;
556 }
557
558 struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
559 {
560         return NULL;
561 }
562
563 int remap_page_range(struct vm_area_struct *vma, unsigned long from,
564                 unsigned long to, unsigned long size, pgprot_t prot)
565 {
566         return -EPERM;
567 }
568
569 unsigned long get_unmapped_area(struct file *file, unsigned long addr,
570         unsigned long len, unsigned long pgoff, unsigned long flags)
571 {
572         return -ENOMEM;
573 }
574
575 void swap_unplug_io_fn(struct backing_dev_info *)
576 {
577 }